X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/3c673d7d868285e9c026b0428a7f821a1760189f..635188886cb9b98f353a1c3869dee898f9b73b8b:/examples/msg/chord/chord.c diff --git a/examples/msg/chord/chord.c b/examples/msg/chord/chord.c index d9f9995584..a3b7830071 100644 --- a/examples/msg/chord/chord.c +++ b/examples/msg/chord/chord.c @@ -39,7 +39,7 @@ static int periodic_lookup_delay = 10; extern long int smx_total_comms; -/** +/* * Finger element. */ typedef struct s_finger { @@ -47,7 +47,7 @@ typedef struct s_finger { char mailbox[MAILBOX_NAME_SIZE]; // string representation of the id } s_finger_t, *finger_t; -/** +/* * Node data. */ typedef struct s_node { @@ -74,7 +74,7 @@ typedef enum { TASK_PREDECESSOR_LEAVING } e_task_type_t; -/** +/* * Data attached with the tasks sent and received */ typedef struct s_task_data { @@ -101,7 +101,7 @@ static void set_predecessor(node_t node, int predecessor_id); // process functions static int node(int argc, char *argv[]); -static void handle_task(node_t node, m_task_t task); +static void handle_task(node_t node, msg_task_t task); // Chord core static void create(node_t node); @@ -117,7 +117,7 @@ static void remote_notify(node_t node, int notify_to, int predecessor_candidate_ static void fix_fingers(node_t node); static void check_predecessor(node_t node); static void random_lookup(node_t); -static void quit_notify(node_t node, int to); +static void quit_notify(node_t node); /** * \brief Global initialization of the Chord simulation. @@ -270,14 +270,14 @@ static void set_predecessor(node_t node, int predecessor_id) int node(int argc, char *argv[]) { /* Reduce the run size for the MC */ - if(MC_IS_ENABLED){ + if(MC_is_active()){ periodic_stabilize_delay = 8; periodic_fix_fingers_delay = 8; periodic_check_predecessor_delay = 8; } double init_time = MSG_get_clock(); - m_task_t task_received = NULL; + msg_task_t task_received = NULL; int i; int join_success = 0; double deadline; @@ -355,13 +355,10 @@ int node(int argc, char *argv[]) // nothing to do: sleep for a while MSG_process_sleep(5); } - } - - if (node.comm_receive && MSG_comm_test(node.comm_receive)) { - + } else { // a transfer has occured - MSG_error_t status = MSG_comm_get_status(node.comm_receive); + msg_error_t status = MSG_comm_get_status(node.comm_receive); if (status != MSG_OK) { XBT_DEBUG("Failed to receive a task. Nevermind."); @@ -397,7 +394,7 @@ int node(int argc, char *argv[]) * \param task the task to handle (don't touch it then: * it will be destroyed, reused or forwarded) */ -static void handle_task(node_t node, m_task_t task) { +static void handle_task(node_t node, msg_task_t task) { XBT_DEBUG("Handling task %p", task); char mailbox[MAILBOX_NAME_SIZE]; @@ -526,51 +523,41 @@ static int join(node_t node, int known_id) static void leave(node_t node) { XBT_DEBUG("Well Guys! I Think it's time for me to quit ;)"); - quit_notify(node, 1); // notify to my successor ( >>> 1 ); - quit_notify(node, -1); // notify my predecessor ( >>> -1); - // TODO ... + quit_notify(node); } /* - * \brief Notifies the successor or the predecessor of the current node + * \brief Notifies the successor and the predecessor of the current node * of the departure * \param node the current node - * \param to 1 to notify the successor, -1 to notify the predecessor - * FIXME: notify both nodes with only one call */ -static void quit_notify(node_t node, int to) +static void quit_notify(node_t node) { - /* TODO - task_data_t req_data = xbt_new0(s_task_data_t, 1); - req_data->request_id = node->id; - req_data->successor_id = node->fingers[0].id; - req_data->pred_id = node->pred_id; + char mailbox[MAILBOX_NAME_SIZE]; + //send the PREDECESSOR_LEAVING to our successor + task_data_t req_data = xbt_new0(s_task_data_t,1); + req_data->type = TASK_PREDECESSOR_LEAVING; + req_data->request_id = node->pred_id; + get_mailbox(node->id, req_data->answer_to); req_data->issuer_host_name = MSG_host_get_name(MSG_host_self()); - req_data->answer_to = NULL; - const char* task_name = NULL; - const char* to_mailbox = NULL; - if (to == 1) { // notify my successor - to_mailbox = node->fingers[0].mailbox; - XBT_INFO("Telling my Successor %d about my departure via mailbox %s", - node->fingers[0].id, to_mailbox); - req_data->type = TASK_PREDECESSOR_LEAVING; - } - else if (to == -1) { // notify my predecessor - if (node->pred_id == -1) { - return; - } + msg_task_t task_sent = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); + XBT_DEBUG("Sending a 'PREDECESSOR_LEAVING' to my successor %d",node->fingers[0].id); + MSG_task_send_with_timeout(task_sent, node->fingers[0].mailbox, timeout); + + //send the SUCCESSOR_LEAVING to our predecessor + get_mailbox(node->pred_id, mailbox); + task_data_t req_data_s = xbt_new0(s_task_data_t,1); + req_data_s->type = TASK_SUCCESSOR_LEAVING; + req_data_s->request_id = node->fingers[0].id; + req_data_s->request_id = node->pred_id; + get_mailbox(node->id, req_data_s->answer_to); + req_data_s->issuer_host_name = MSG_host_get_name(MSG_host_self()); + + msg_task_t task_sent_s = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data_s); + XBT_DEBUG("Sending a 'SUCCESSOR_LEAVING' to my predecessor %d",node->pred_id); + MSG_task_send_with_timeout(task_sent_s, mailbox, timeout); - to_mailbox = node->pred_mailbox; - XBT_INFO("Telling my Predecessor %d about my departure via mailbox %s", - node->pred_id, to_mailbox); - req_data->type = TASK_SUCCESSOR_LEAVING; - } - m_task_t task = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); - //char* mailbox = get_mailbox(to_mailbox); - msg_comm_t comm = MSG_task_isend(task, to_mailbox); - xbt_dynar_push(node->comms, &comm); - */ } /** @@ -611,9 +598,9 @@ static int remote_find_successor(node_t node, int ask_to, int id) req_data->issuer_host_name = MSG_host_get_name(MSG_host_self()); // send a "Find Successor" request to ask_to_id - m_task_t task_sent = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); + msg_task_t task_sent = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); XBT_DEBUG("Sending a 'Find Successor' request (task %p) to %d for id %d", task_sent, ask_to, id); - MSG_error_t res = MSG_task_send_with_timeout(task_sent, mailbox, timeout); + msg_error_t res = MSG_task_send_with_timeout(task_sent, mailbox, timeout); if (res != MSG_OK) { XBT_DEBUG("Failed to send the 'Find Successor' request (task %p) to %d for id %d", @@ -628,7 +615,7 @@ static int remote_find_successor(node_t node, int ask_to, int id) do { if (node->comm_receive == NULL) { - m_task_t task_received = NULL; + msg_task_t task_received = NULL; node->comm_receive = MSG_task_irecv(&task_received, node->mailbox); } @@ -642,13 +629,23 @@ static int remote_find_successor(node_t node, int ask_to, int id) node->comm_receive = NULL; } else { - m_task_t task_received = MSG_comm_get_task(node->comm_receive); + msg_task_t task_received = MSG_comm_get_task(node->comm_receive); XBT_DEBUG("Received a task (%p)", task_received); task_data_t ans_data = MSG_task_get_data(task_received); - if (MC_IS_ENABLED) { - MC_assert(task_received == task_sent); - } + // Once upon a time, our code assumed that here, task_received != task_sent all the time + // + // This assumption is wrong (as messages from differing round can interleave), leading to a bug in our code. + // We failed to find this bug directly, as it only occured on large platforms, leading to hardly usable traces. + // Instead, we used the model-checker to track down the issue by adding the following test here in the code: + // if (MC_is_active()) { + // MC_assert(task_received == task_sent); + // } + // That explained the bug in a snap, with a very cool example and everything. + // + // This MC_assert is now desactivated as the case is now properly handled in our code and we don't want the + // MC to fail any further under that condition, but this comment is here to as a memorial for this first + // brillant victory of the model-checking in the SimGrid community :) if (task_received != task_sent) { // this is not the expected answer @@ -693,8 +690,8 @@ static int remote_get_predecessor(node_t node, int ask_to) // send a "Get Predecessor" request to ask_to_id XBT_DEBUG("Sending a 'Get Predecessor' request to %d", ask_to); - m_task_t task_sent = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); - MSG_error_t res = MSG_task_send_with_timeout(task_sent, mailbox, timeout); + msg_task_t task_sent = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); + msg_error_t res = MSG_task_send_with_timeout(task_sent, mailbox, timeout); if (res != MSG_OK) { XBT_DEBUG("Failed to send the 'Get Predecessor' request (task %p) to %d", @@ -709,7 +706,7 @@ static int remote_get_predecessor(node_t node, int ask_to) do { if (node->comm_receive == NULL) { // FIXME simplify this - m_task_t task_received = NULL; + msg_task_t task_received = NULL; node->comm_receive = MSG_task_irecv(&task_received, node->mailbox); } @@ -723,10 +720,10 @@ static int remote_get_predecessor(node_t node, int ask_to) node->comm_receive = NULL; } else { - m_task_t task_received = MSG_comm_get_task(node->comm_receive); + msg_task_t task_received = MSG_comm_get_task(node->comm_receive); task_data_t ans_data = MSG_task_get_data(task_received); - if (MC_IS_ENABLED) { + if (MC_is_active()) { MC_assert(task_received == task_sent); } @@ -824,18 +821,18 @@ static void notify(node_t node, int predecessor_candidate_id) { */ static void remote_notify(node_t node, int notify_id, int predecessor_candidate_id) { - task_data_t req_data = xbt_new0(s_task_data_t, 1); - req_data->type = TASK_NOTIFY; - req_data->request_id = predecessor_candidate_id; - req_data->issuer_host_name = MSG_host_get_name(MSG_host_self()); - - // send a "Notify" request to notify_id - m_task_t task = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); - XBT_DEBUG("Sending a 'Notify' request (task %p) to %d", task, notify_id); - char mailbox[MAILBOX_NAME_SIZE]; - get_mailbox(notify_id, mailbox); - MSG_task_dsend(task, mailbox, task_free); -} + task_data_t req_data = xbt_new0(s_task_data_t, 1); + req_data->type = TASK_NOTIFY; + req_data->request_id = predecessor_candidate_id; + req_data->issuer_host_name = MSG_host_get_name(MSG_host_self()); + + // send a "Notify" request to notify_id + msg_task_t task = MSG_task_create(NULL, COMP_SIZE, COMM_SIZE, req_data); + XBT_DEBUG("Sending a 'Notify' request (task %p) to %d", task, notify_id); + char mailbox[MAILBOX_NAME_SIZE]; + get_mailbox(notify_id, mailbox); + MSG_task_dsend(task, mailbox, task_free); + } /** * \brief This function is called periodically. @@ -884,7 +881,7 @@ static void random_lookup(node_t node) */ int main(int argc, char *argv[]) { - MSG_global_init(&argc, argv); + MSG_init(&argc, argv); if (argc < 3) { printf("Usage: %s [-nb_bits=n] [-timeout=t] platform_file deployment_file\n", argv[0]); printf("example: %s ../msg_platform.xml chord.xml\n", argv[0]); @@ -923,11 +920,10 @@ int main(int argc, char *argv[]) MSG_function_register("node", node); MSG_launch_application(application_file); - MSG_error_t res = MSG_main(); + msg_error_t res = MSG_main(); XBT_CRITICAL("Messages created: %ld", smx_total_comms); XBT_INFO("Simulated time: %g", MSG_get_clock()); - MSG_clean(); chord_exit(); if (res == MSG_OK)