X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/f783ed4680c6862a1b7543237e89d1221334bae0..635188886cb9b98f353a1c3869dee898f9b73b8b:/examples/msg/chord/chord.c diff --git a/examples/msg/chord/chord.c b/examples/msg/chord/chord.c index 15d3423466..a3b7830071 100644 --- a/examples/msg/chord/chord.c +++ b/examples/msg/chord/chord.c @@ -9,11 +9,19 @@ #include "msg/msg.h" #include "xbt/log.h" #include "xbt/asserts.h" -#include "mc/modelchecker.h" -#include "mc/mc.h" -#include "xbt/xbt_os_time.h" +#include "simgrid/modelchecker.h" -XBT_LOG_NEW_DEFAULT_CATEGORY(msg_chord, +/** @addtogroup MSG_examples + * + * - chord/chord.c: Classical Chord P2P protocol + * This example implements the well known Chord P2P protocol. Its + * main advantage is that it constitute a fully working non-trivial + * example. In addition, its implementation is rather efficient, as + * demonstrated in http://hal.inria.fr/inria-00602216/ + */ + + + XBT_LOG_NEW_DEFAULT_CATEGORY(msg_chord, "Messages specific for this msg example"); #define COMM_SIZE 10 @@ -31,18 +39,18 @@ static int periodic_lookup_delay = 10; extern long int smx_total_comms; -/** +/* * Finger element. */ -typedef struct finger { +typedef struct s_finger { int id; char mailbox[MAILBOX_NAME_SIZE]; // string representation of the id } s_finger_t, *finger_t; -/** +/* * Node data. */ -typedef struct node { +typedef struct s_node { int id; // my id char mailbox[MAILBOX_NAME_SIZE]; // my mailbox name (string representation of the id) s_finger_t *fingers; // finger table, of size nb_bits (fingers[0] is my successor) @@ -66,10 +74,10 @@ typedef enum { TASK_PREDECESSOR_LEAVING } e_task_type_t; -/** +/* * Data attached with the tasks sent and received */ -typedef struct task_data { +typedef struct s_task_data { e_task_type_t type; // type of task int request_id; // id paramater (used by some types of tasks) int request_finger; // finger parameter (used by some types of tasks) @@ -82,6 +90,7 @@ static int *powers2; // utility functions static void chord_initialize(void); +static void chord_exit(void); static int normalize(int id); static int is_in_interval(int id, int start, int end); static void get_mailbox(int host_id, char* mailbox); @@ -92,7 +101,7 @@ static void set_predecessor(node_t node, int predecessor_id); // process functions static int node(int argc, char *argv[]); -static void handle_task(node_t node, m_task_t task); +static void handle_task(node_t node, msg_task_t task); // Chord core static void create(node_t node); @@ -108,7 +117,7 @@ static void remote_notify(node_t node, int notify_to, int predecessor_candidate_ static void fix_fingers(node_t node); static void check_predecessor(node_t node); static void random_lookup(node_t); -static void quit_notify(node_t node, int to); +static void quit_notify(node_t node); /** * \brief Global initialization of the Chord simulation. @@ -127,6 +136,11 @@ static void chord_initialize(void) XBT_DEBUG("Sets nb_keys to %d", nb_keys); } +static void chord_exit(void) +{ + xbt_free(powers2); +} + /** * \brief Turns an id into an equivalent id in [0, nb_keys). * \param id an id @@ -139,7 +153,7 @@ static int normalize(int id) } /** - * \brief Returns whether a id belongs to the interval [start, end]. + * \brief Returns whether an id belongs to the interval [start, end]. * * The parameters are noramlized to make sure they are between 0 and nb_keys - 1). * 1 belongs to [62, 3] @@ -256,14 +270,14 @@ static void set_predecessor(node_t node, int predecessor_id) int node(int argc, char *argv[]) { /* Reduce the run size for the MC */ - if(MC_IS_ENABLED){ + if(MC_is_active()){ periodic_stabilize_delay = 8; periodic_fix_fingers_delay = 8; periodic_check_predecessor_delay = 8; } double init_time = MSG_get_clock(); - m_task_t task_received = NULL; + msg_task_t task_received = NULL; int i; int join_success = 0; double deadline; @@ -309,8 +323,8 @@ int node(int argc, char *argv[]) if (join_success) { while (MSG_get_clock() < init_time + deadline -// && MSG_get_clock() < node.last_change_date + 1000 - && MSG_get_clock() < max_simulation_time) { +// && MSG_get_clock() < node.last_change_date + 1000 + && MSG_get_clock() < max_simulation_time) { if (node.comm_receive == NULL) { task_received = NULL; @@ -333,22 +347,22 @@ int node(int argc, char *argv[]) check_predecessor(&node); next_check_predecessor_date = MSG_get_clock() + periodic_check_predecessor_delay; } - else if (MSG_get_clock() >= next_lookup_date) { - random_lookup(&node); - next_lookup_date = MSG_get_clock() + periodic_lookup_delay; - } + else if (MSG_get_clock() >= next_lookup_date) { + random_lookup(&node); + next_lookup_date = MSG_get_clock() + periodic_lookup_delay; + } else { // nothing to do: sleep for a while MSG_process_sleep(5); } - } - else { + } else { // a transfer has occured - MSG_error_t status = MSG_comm_get_status(node.comm_receive); + msg_error_t status = MSG_comm_get_status(node.comm_receive); if (status != MSG_OK) { XBT_DEBUG("Failed to receive a task. Nevermind."); + MSG_comm_destroy(node.comm_receive); node.comm_receive = NULL; } else { @@ -358,35 +372,12 @@ int node(int argc, char *argv[]) handle_task(&node, task_received); } } - - // see if some communications are finished - /* - while ((index = MSG_comm_testany(node.comms)) != -1) { - comm_send = xbt_dynar_get_as(node.comms, index, msg_comm_t); - MSG_error_t status = MSG_comm_get_status(comm_send); - xbt_dynar_remove_at(node.comms, index, &comm_send); - XBT_DEBUG("Communication %p is finished with status %d, dynar size is now %lu", - comm_send, status, xbt_dynar_length(node.comms)); - m_task_t task = MSG_comm_get_task(comm_send); - MSG_comm_destroy(comm_send); - if (status != MSG_OK) { - task_data_destroy(MSG_task_get_data(task)); - MSG_task_destroy(task); - } - } - */ } - // clean unfinished comms sent - /* unsigned int cursor; - xbt_dynar_foreach(node.comms, cursor, comm_send) { - m_task_t task = MSG_comm_get_task(comm_send); - MSG_task_cancel(task); - task_data_destroy(MSG_task_get_data(task)); - MSG_task_destroy(task); - MSG_comm_destroy(comm_send); - // FIXME: the task is actually not destroyed because MSG thinks that the other side (whose process is dead) is still using it - }*/ + if (node.comm_receive) { + MSG_comm_destroy(node.comm_receive); + node.comm_receive = NULL; + } // leave the ring leave(&node); @@ -403,7 +394,7 @@ int node(int argc, char *argv[]) * \param task the task to handle (don't touch it then: * it will be destroyed, reused or forwarded) */ -static void handle_task(node_t node, m_task_t task) { +static void handle_task(node_t node, msg_task_t task) { XBT_DEBUG("Handling task %p", task); char mailbox[MAILBOX_NAME_SIZE]; @@ -421,7 +412,7 @@ static void handle_task(node_t node, m_task_t task) { task_data->answer_id = node->fingers[0].id; XBT_DEBUG("Sending back a 'Find Successor Answer' to %s (mailbox %s): the successor of %d is %d", task_data->issuer_host_name, - task_data->answer_to, + task_data->answer_to, task_data->request_id, task_data->answer_id); MSG_task_dsend(task, task_data->answer_to, task_free); } @@ -477,7 +468,7 @@ static void handle_task(node_t node, m_task_t task) { case TASK_FIND_SUCCESSOR_ANSWER: case TASK_GET_PREDECESSOR_ANSWER: - XBT_DEBUG("Ignoring unexpected task of type %d (%p)", type, task); + XBT_DEBUG("Ignoring unexpected task of type %d (%p)", (int)type, task); task_free(task); break; } @@ -532,51 +523,41 @@ static int join(node_t node, int known_id) static void leave(node_t node) { XBT_DEBUG("Well Guys! I Think it's time for me to quit ;)"); - quit_notify(node, 1); // notify to my successor ( >>> 1 ); - quit_notify(node, -1); // notify my predecessor ( >>> -1); - // TODO ... + quit_notify(node); } /* - * \brief Notifies the successor or the predecessor of the current node + * \brief Notifies the successor and the predecessor of the current node * of the departure * \param node the current node - * \param to 1 to notify the successor, -1 to notify the predecessor - * FIXME: notify both nodes with only one call */ -static void quit_notify(node_t node, int to) +static void quit_notify(node_t node) { - /* TODO - task_data_t req_data = xbt_new0(s_task_data_t, 1); - req_data->request_id = node->id; - req_data->successor_id = node->fingers[0].id; - req_data->pred_id = node->pred_id; + char mailbox[MAILBOX_NAME_SIZE]; + //send the PREDECESSOR_LEAVING to our successor + task_data_t req_data = xbt_new0(s_task_data_t,1); + req_data->type = TASK_PREDECESSOR_LEAVING; + req_data->request_id = node->pred_id; + get_mailbox(node->id, req_data->answer_to); req_data->issuer_host_name = MSG_host_get_name(MSG_host_self()); - req_data->answer_to = NULL; - const char* task_name = NULL; - const char* to_mailbox = NULL; - if (to == 1) { // notify my successor - to_mailbox = node->fingers[0].mailbox; - XBT_INFO("Telling my Successor %d about my departure via mailbox %s", - node->fingers[0].id, to_mailbox); - req_data->type = TASK_PREDECESSOR_LEAVING; - } - else if (to == -1) { // notify my predecessor - if (node->pred_id == -1) { - return; - } + msg_task_t task_sent = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); + XBT_DEBUG("Sending a 'PREDECESSOR_LEAVING' to my successor %d",node->fingers[0].id); + MSG_task_send_with_timeout(task_sent, node->fingers[0].mailbox, timeout); + + //send the SUCCESSOR_LEAVING to our predecessor + get_mailbox(node->pred_id, mailbox); + task_data_t req_data_s = xbt_new0(s_task_data_t,1); + req_data_s->type = TASK_SUCCESSOR_LEAVING; + req_data_s->request_id = node->fingers[0].id; + req_data_s->request_id = node->pred_id; + get_mailbox(node->id, req_data_s->answer_to); + req_data_s->issuer_host_name = MSG_host_get_name(MSG_host_self()); + + msg_task_t task_sent_s = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data_s); + XBT_DEBUG("Sending a 'SUCCESSOR_LEAVING' to my predecessor %d",node->pred_id); + MSG_task_send_with_timeout(task_sent_s, mailbox, timeout); - to_mailbox = node->pred_mailbox; - XBT_INFO("Telling my Predecessor %d about my departure via mailbox %s", - node->pred_id, to_mailbox); - req_data->type = TASK_SUCCESSOR_LEAVING; - } - m_task_t task = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); - //char* mailbox = get_mailbox(to_mailbox); - msg_comm_t comm = MSG_task_isend(task, to_mailbox); - xbt_dynar_push(node->comms, &comm); - */ } /** @@ -617,9 +598,9 @@ static int remote_find_successor(node_t node, int ask_to, int id) req_data->issuer_host_name = MSG_host_get_name(MSG_host_self()); // send a "Find Successor" request to ask_to_id - m_task_t task_sent = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); + msg_task_t task_sent = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); XBT_DEBUG("Sending a 'Find Successor' request (task %p) to %d for id %d", task_sent, ask_to, id); - MSG_error_t res = MSG_task_send_with_timeout(task_sent, mailbox, timeout); + msg_error_t res = MSG_task_send_with_timeout(task_sent, mailbox, timeout); if (res != MSG_OK) { XBT_DEBUG("Failed to send the 'Find Successor' request (task %p) to %d for id %d", @@ -634,7 +615,7 @@ static int remote_find_successor(node_t node, int ask_to, int id) do { if (node->comm_receive == NULL) { - m_task_t task_received = NULL; + msg_task_t task_received = NULL; node->comm_receive = MSG_task_irecv(&task_received, node->mailbox); } @@ -642,24 +623,34 @@ static int remote_find_successor(node_t node, int ask_to, int id) if (res != MSG_OK) { XBT_DEBUG("Failed to receive the answer to my 'Find Successor' request (task %p): %d", - task_sent, res); + task_sent, (int)res); stop = 1; - MSG_comm_destroy(node->comm_receive); - node->comm_receive = NULL; + MSG_comm_destroy(node->comm_receive); + node->comm_receive = NULL; } else { - m_task_t task_received = MSG_comm_get_task(node->comm_receive); + msg_task_t task_received = MSG_comm_get_task(node->comm_receive); XBT_DEBUG("Received a task (%p)", task_received); task_data_t ans_data = MSG_task_get_data(task_received); - if (MC_IS_ENABLED) { - MC_assert(task_received == task_sent); - } + // Once upon a time, our code assumed that here, task_received != task_sent all the time + // + // This assumption is wrong (as messages from differing round can interleave), leading to a bug in our code. + // We failed to find this bug directly, as it only occured on large platforms, leading to hardly usable traces. + // Instead, we used the model-checker to track down the issue by adding the following test here in the code: + // if (MC_is_active()) { + // MC_assert(task_received == task_sent); + // } + // That explained the bug in a snap, with a very cool example and everything. + // + // This MC_assert is now desactivated as the case is now properly handled in our code and we don't want the + // MC to fail any further under that condition, but this comment is here to as a memorial for this first + // brillant victory of the model-checking in the SimGrid community :) if (task_received != task_sent) { // this is not the expected answer - MSG_comm_destroy(node->comm_receive); - node->comm_receive = NULL; + MSG_comm_destroy(node->comm_receive); + node->comm_receive = NULL; handle_task(node, task_received); } else { @@ -668,8 +659,8 @@ static int remote_find_successor(node_t node, int ask_to, int id) ans_data->request_id, task_received, id, ans_data->answer_id); successor = ans_data->answer_id; stop = 1; - MSG_comm_destroy(node->comm_receive); - node->comm_receive = NULL; + MSG_comm_destroy(node->comm_receive); + node->comm_receive = NULL; task_free(task_received); } } @@ -699,8 +690,8 @@ static int remote_get_predecessor(node_t node, int ask_to) // send a "Get Predecessor" request to ask_to_id XBT_DEBUG("Sending a 'Get Predecessor' request to %d", ask_to); - m_task_t task_sent = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); - MSG_error_t res = MSG_task_send_with_timeout(task_sent, mailbox, timeout); + msg_task_t task_sent = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); + msg_error_t res = MSG_task_send_with_timeout(task_sent, mailbox, timeout); if (res != MSG_OK) { XBT_DEBUG("Failed to send the 'Get Predecessor' request (task %p) to %d", @@ -715,7 +706,7 @@ static int remote_get_predecessor(node_t node, int ask_to) do { if (node->comm_receive == NULL) { // FIXME simplify this - m_task_t task_received = NULL; + msg_task_t task_received = NULL; node->comm_receive = MSG_task_irecv(&task_received, node->mailbox); } @@ -723,22 +714,22 @@ static int remote_get_predecessor(node_t node, int ask_to) if (res != MSG_OK) { XBT_DEBUG("Failed to receive the answer to my 'Get Predecessor' request (task %p): %d", - task_sent, res); + task_sent, (int)res); stop = 1; - MSG_comm_destroy(node->comm_receive); - node->comm_receive = NULL; + MSG_comm_destroy(node->comm_receive); + node->comm_receive = NULL; } else { - m_task_t task_received = MSG_comm_get_task(node->comm_receive); + msg_task_t task_received = MSG_comm_get_task(node->comm_receive); task_data_t ans_data = MSG_task_get_data(task_received); - if (MC_IS_ENABLED) { - MC_assert(task_received == task_sent); - } + if (MC_is_active()) { + MC_assert(task_received == task_sent); + } if (task_received != task_sent) { - MSG_comm_destroy(node->comm_receive); - node->comm_receive = NULL; + MSG_comm_destroy(node->comm_receive); + node->comm_receive = NULL; handle_task(node, task_received); } else { @@ -746,8 +737,8 @@ static int remote_get_predecessor(node_t node, int ask_to) task_received, ask_to, ans_data->answer_id); predecessor_id = ans_data->answer_id; stop = 1; - MSG_comm_destroy(node->comm_receive); - node->comm_receive = NULL; + MSG_comm_destroy(node->comm_receive); + node->comm_receive = NULL; task_free(task_received); } } @@ -830,18 +821,18 @@ static void notify(node_t node, int predecessor_candidate_id) { */ static void remote_notify(node_t node, int notify_id, int predecessor_candidate_id) { - task_data_t req_data = xbt_new0(s_task_data_t, 1); - req_data->type = TASK_NOTIFY; - req_data->request_id = predecessor_candidate_id; - req_data->issuer_host_name = MSG_host_get_name(MSG_host_self()); - - // send a "Notify" request to notify_id - m_task_t task = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); - XBT_DEBUG("Sending a 'Notify' request (task %p) to %d", task, notify_id); - char mailbox[MAILBOX_NAME_SIZE]; - get_mailbox(notify_id, mailbox); - MSG_task_dsend(task, mailbox, task_free); -} + task_data_t req_data = xbt_new0(s_task_data_t, 1); + req_data->type = TASK_NOTIFY; + req_data->request_id = predecessor_candidate_id; + req_data->issuer_host_name = MSG_host_get_name(MSG_host_self()); + + // send a "Notify" request to notify_id + msg_task_t task = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); + XBT_DEBUG("Sending a 'Notify' request (task %p) to %d", task, notify_id); + char mailbox[MAILBOX_NAME_SIZE]; + get_mailbox(notify_id, mailbox); + MSG_task_dsend(task, mailbox, task_free); + } /** * \brief This function is called periodically. @@ -890,14 +881,13 @@ static void random_lookup(node_t node) */ int main(int argc, char *argv[]) { + MSG_init(&argc, argv); if (argc < 3) { printf("Usage: %s [-nb_bits=n] [-timeout=t] platform_file deployment_file\n", argv[0]); printf("example: %s ../msg_platform.xml chord.xml\n", argv[0]); exit(1); } - MSG_global_init(&argc, argv); - char **options = &argv[1]; while (!strncmp(options[0], "-", 1)) { @@ -910,11 +900,11 @@ int main(int argc, char *argv[]) length = strlen("-timeout="); if (!strncmp(options[0], "-timeout=", length) && strlen(options[0]) > length) { - timeout = atoi(options[0] + length); - XBT_DEBUG("Set timeout to %d", timeout); + timeout = atoi(options[0] + length); + XBT_DEBUG("Set timeout to %d", timeout); } else { - xbt_die("Invalid chord option '%s'", options[0]); + xbt_die("Invalid chord option '%s'", options[0]); } } options++; @@ -925,17 +915,16 @@ int main(int argc, char *argv[]) chord_initialize(); - MSG_set_channel_number(0); MSG_create_environment(platform_file); MSG_function_register("node", node); MSG_launch_application(application_file); - MSG_error_t res = MSG_main(); + msg_error_t res = MSG_main(); XBT_CRITICAL("Messages created: %ld", smx_total_comms); XBT_INFO("Simulated time: %g", MSG_get_clock()); - MSG_clean(); + chord_exit(); if (res == MSG_OK) return 0;