From: Gabriel Corona Date: Mon, 13 Apr 2015 09:48:49 +0000 (+0200) Subject: [mc] Fix comm_determinism to work in split MCer/MCed X-Git-Tag: v3_12~732^2~62 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/da7ddf47cca7f83884a15a05778d3909fd0e4cfd [mc] Fix comm_determinism to work in split MCer/MCed --- diff --git a/examples/smpi/mc/only_send_deterministic.tesh b/examples/smpi/mc/only_send_deterministic.tesh index a9c854e33d..0ebb4ec890 100644 --- a/examples/smpi/mc/only_send_deterministic.tesh +++ b/examples/smpi/mc/only_send_deterministic.tesh @@ -2,8 +2,8 @@ ! timeout 60 $ ../../../smpi_script/bin/smpirun -wrapper "${bindir:=.}/../../../bin/simgrid-mc" --log=xbt_cfg.thresh:warning -hostfile ${srcdir:=.}/hostfile_only_send_deterministic -platform ${srcdir:=.}/../../platforms/cluster.xml --cfg=model-check:1 --cfg=model-check/communications_determinism:1 --cfg=smpi/send_is_detached_thres:0 --cfg=smpi/running_power:1e9 ./smpi_only_send_deterministic -> [0.000000] [surf_config/INFO] Switching workstation model to compound since you changed the network and/or cpu model(s) > [0.000000] [mc_global/INFO] Check communication determinism +> [0.000000] [surf_config/INFO] Switching workstation model to compound since you changed the network and/or cpu model(s) > [0.000000] [mc_global/INFO] ****************************************************** > [0.000000] [mc_global/INFO] **** Only-send-deterministic communication pattern **** > [0.000000] [mc_global/INFO] ****************************************************** diff --git a/src/mc/mc_comm_determinism.c b/src/mc/mc_comm_determinism.c index 37009cfcd7..2ae9d1c9d1 100644 --- a/src/mc/mc_comm_determinism.c +++ b/src/mc/mc_comm_determinism.c @@ -86,41 +86,51 @@ static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int p return res; } -static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm) +static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm_addr) { - smx_process_t src_proc = MC_smx_resolve_process(comm->comm.src_proc); - smx_process_t dst_proc = MC_smx_resolve_process(comm->comm.dst_proc); + s_smx_synchro_t comm; + MC_process_read_simple(&mc_model_checker->process, + &comm, comm_addr, sizeof(comm)); + + smx_process_t src_proc = MC_smx_resolve_process(comm.comm.src_proc); + smx_process_t dst_proc = MC_smx_resolve_process(comm.comm.dst_proc); comm_pattern->src_proc = src_proc->pid; comm_pattern->dst_proc = dst_proc->pid; comm_pattern->src_host = MC_smx_process_get_host_name(src_proc); comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc); - if (comm_pattern->data_size == -1 && comm->comm.src_buff != NULL) { - comm_pattern->data_size = *(comm->comm.dst_buff_size); + if (comm_pattern->data_size == -1 && comm.comm.src_buff != NULL) { + size_t buff_size; + MC_process_read_simple(&mc_model_checker->process, + &buff_size, comm.comm.dst_buff_size, sizeof(buff_size)); + comm_pattern->data_size = buff_size; comm_pattern->data = xbt_malloc0(comm_pattern->data_size); MC_process_read_simple(&mc_model_checker->process, - comm_pattern->data, comm->comm.src_buff, comm_pattern->data_size); + comm_pattern->data, comm.comm.src_buff, comm_pattern->data_size); } } static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int backtracking) { - mc_list_comm_pattern_t list_comm_pattern = (mc_list_comm_pattern_t)xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t); + mc_list_comm_pattern_t list = + xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t); if(!backtracking){ - mc_comm_pattern_t initial_comm = xbt_dynar_get_as(list_comm_pattern->list, list_comm_pattern->index_comm, mc_comm_pattern_t); - e_mc_comm_pattern_difference_t diff; - - if((diff = compare_comm_pattern(initial_comm, comm)) != NONE_DIFF){ + mc_comm_pattern_t initial_comm = + xbt_dynar_get_as(list->list, list->index_comm, mc_comm_pattern_t); + e_mc_comm_pattern_difference_t diff = + compare_comm_pattern(initial_comm, comm); + + if (diff != NONE_DIFF) { if (comm->type == SIMIX_COMM_SEND){ initial_global_state->send_deterministic = 0; if(initial_global_state->send_diff != NULL) xbt_free(initial_global_state->send_diff); - initial_global_state->send_diff = print_determinism_result(diff, process, comm, list_comm_pattern->index_comm + 1); + initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1); }else{ initial_global_state->recv_deterministic = 0; if(initial_global_state->recv_diff != NULL) xbt_free(initial_global_state->recv_diff); - initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list_comm_pattern->index_comm + 1); + initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1); } if(_sg_mc_send_determinism && !initial_global_state->send_deterministic){ XBT_INFO("*********************************************************"); @@ -155,28 +165,26 @@ static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking) { + const smx_process_t issuer = MC_smx_simcall_get_issuer(request); + mc_list_comm_pattern_t initial_pattern = xbt_dynar_get_as( + initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t); + xbt_dynar_t incomplete_pattern = xbt_dynar_get_as( + incomplete_communications_pattern, issuer->pid, xbt_dynar_t); + mc_comm_pattern_t pattern = xbt_new0(s_mc_comm_pattern_t, 1); pattern->data_size = -1; pattern->data = NULL; - - // Fill initial_pattern->index_comm: - const smx_process_t issuer = MC_smx_simcall_get_issuer(request); - mc_list_comm_pattern_t initial_pattern = - (mc_list_comm_pattern_t) xbt_dynar_get_as(initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t); - xbt_dynar_t incomplete_pattern = - (xbt_dynar_t) xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t); pattern->index = initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern); - if (call_type == MC_CALL_TYPE_SEND) { /* Create comm pattern */ pattern->type = SIMIX_COMM_SEND; - pattern->comm = simcall_comm_isend__get__result(request); + pattern->comm_addr = simcall_comm_isend__get__result(request); s_smx_synchro_t synchro; MC_process_read_simple(&mc_model_checker->process, - &synchro, pattern->comm, sizeof(synchro)); + &synchro, pattern->comm_addr, sizeof(synchro)); char* remote_name; MC_process_read_simple(&mc_model_checker->process, &remote_name, @@ -202,17 +210,23 @@ void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type if(mpi_request.detached){ if (!initial_global_state->initial_communications_pattern_done) { /* Store comm pattern */ - xbt_dynar_push(((mc_list_comm_pattern_t)xbt_dynar_get_as(initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t))->list, &pattern); + xbt_dynar_push( + xbt_dynar_get_as( + initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t + )->list, + &pattern); } else { /* Evaluate comm determinism */ deterministic_comm_pattern(pattern->src_proc, pattern, backtracking); - ((mc_list_comm_pattern_t)xbt_dynar_get_as(initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t))->index_comm++; + xbt_dynar_get_as( + initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t + )->index_comm++; } return; } } else if (call_type == MC_CALL_TYPE_RECV) { pattern->type = SIMIX_COMM_RECEIVE; - pattern->comm = simcall_comm_irecv__get__result(request); + pattern->comm_addr = simcall_comm_irecv__get__result(request); struct s_smpi_mpi_request mpi_request; MC_process_read_simple(&mc_model_checker->process, @@ -222,7 +236,7 @@ void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type s_smx_synchro_t synchro; MC_process_read_simple(&mc_model_checker->process, - &synchro, pattern->comm, sizeof(synchro)); + &synchro, pattern->comm_addr, sizeof(synchro)); char* remote_name; MC_process_read_simple(&mc_model_checker->process, &remote_name, @@ -236,23 +250,27 @@ void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type xbt_die("Unexpected call_type %i", (int) call_type); } - xbt_dynar_push((xbt_dynar_t)xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t), &pattern); + xbt_dynar_push( + xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t), + &pattern); XBT_DEBUG("Insert incomplete comm pattern %p for process %lu", pattern, issuer->pid); } -void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm, unsigned int issuer, int backtracking) { +void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) { mc_comm_pattern_t current_comm_pattern; unsigned int cursor = 0; mc_comm_pattern_t comm_pattern; int completed = 0; /* Complete comm pattern */ - xbt_dynar_foreach((xbt_dynar_t)xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern) { - if (current_comm_pattern-> comm == comm) { - update_comm_pattern(current_comm_pattern, comm); + xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern) { + if (current_comm_pattern->comm_addr == comm_addr) { + update_comm_pattern(current_comm_pattern, comm_addr); completed = 1; - xbt_dynar_remove_at((xbt_dynar_t)xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, &comm_pattern); + xbt_dynar_remove_at( + xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), + cursor, &comm_pattern); XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor); break; } @@ -260,13 +278,16 @@ void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm, unsigned int if(!completed) xbt_die("Corresponding communication not found!"); + mc_list_comm_pattern_t pattern = xbt_dynar_get_as( + initial_communications_pattern, issuer, mc_list_comm_pattern_t); + if (!initial_global_state->initial_communications_pattern_done) { /* Store comm pattern */ - xbt_dynar_push(((mc_list_comm_pattern_t)xbt_dynar_get_as(initial_communications_pattern, issuer, mc_list_comm_pattern_t))->list, &comm_pattern); + xbt_dynar_push(pattern->list, &comm_pattern); } else { /* Evaluate comm determinism */ deterministic_comm_pattern(issuer, comm_pattern, backtracking); - ((mc_list_comm_pattern_t)xbt_dynar_get_as(initial_communications_pattern, issuer, mc_list_comm_pattern_t))->index_comm++; + pattern->index_comm++; } } @@ -282,19 +303,23 @@ static void MC_pre_modelcheck_comm_determinism(void) mc_state_t initial_state = NULL; smx_process_t process; int i; + const int maxpid = MC_smx_get_maxpid(); if (_sg_mc_visited > 0) visited_states = xbt_dynar_new(sizeof(mc_visited_state_t), visited_state_free_voidp); + // Create initial_communications_pattern elements: initial_communications_pattern = xbt_dynar_new(sizeof(mc_list_comm_pattern_t), MC_list_comm_pattern_free_voidp); - for (i=0; i < MC_smx_get_maxpid(); i++){ + for (i=0; i < maxpid; i++){ mc_list_comm_pattern_t process_list_pattern = xbt_new0(s_mc_list_comm_pattern_t, 1); process_list_pattern->list = xbt_dynar_new(sizeof(mc_comm_pattern_t), MC_comm_pattern_free_voidp); process_list_pattern->index_comm = 0; xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern); } + + // Create incomplete_communications_pattern elements: incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp); - for (i=0; i < MC_smx_get_maxpid(); i++){ + for (i=0; i < maxpid; i++){ xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(mc_comm_pattern_t), NULL); xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern); } @@ -469,7 +494,7 @@ static void MC_modelcheck_comm_determinism_main(void) MC_print_statistics(mc_stats); MC_SET_STD_HEAP; - return; + exit(0); } void MC_modelcheck_comm_determinism(void) diff --git a/src/mc/mc_comm_pattern.c b/src/mc/mc_comm_pattern.c index cef2575847..37a90b3d69 100644 --- a/src/mc/mc_comm_pattern.c +++ b/src/mc/mc_comm_pattern.c @@ -10,13 +10,17 @@ #include #include "mc_comm_pattern.h" +#include "mc_smx.h" + +XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_pattern, mc, + "Logging specific to MC communication patterns"); mc_comm_pattern_t MC_comm_pattern_dup(mc_comm_pattern_t comm) { mc_comm_pattern_t res = xbt_new0(s_mc_comm_pattern_t, 1); res->index = comm->index; res->type = comm->type; - res->comm = comm->comm; + res->comm_addr = comm->comm_addr; res->rdv = strdup(comm->rdv); res->data_size = -1; res->data = NULL; @@ -49,6 +53,18 @@ xbt_dynar_t MC_comm_patterns_dup(xbt_dynar_t patterns) return res; } +static void MC_patterns_copy(xbt_dynar_t dest, xbt_dynar_t source) +{ + xbt_dynar_reset(dest); + + unsigned int cursor; + mc_comm_pattern_t comm; + xbt_dynar_foreach(source, cursor, comm) { + mc_comm_pattern_t copy_comm = MC_comm_pattern_dup(comm); + xbt_dynar_push(dest, ©_comm); + } +} + void MC_restore_communications_pattern(mc_state_t state) { mc_list_comm_pattern_t list_process_comm; @@ -59,16 +75,10 @@ void MC_restore_communications_pattern(mc_state_t state) } for (int i = 0; i < MC_smx_get_maxpid(); i++) { - xbt_dynar_t initial_incomplete_process_comms = xbt_dynar_get_as(incomplete_communications_pattern, i, xbt_dynar_t); - xbt_dynar_reset(initial_incomplete_process_comms); - xbt_dynar_t incomplete_process_comms = xbt_dynar_get_as(state->incomplete_comm_pattern, i, xbt_dynar_t); - - mc_comm_pattern_t comm; - xbt_dynar_foreach(incomplete_process_comms, cursor, comm) { - mc_comm_pattern_t copy_comm = MC_comm_pattern_dup(comm); - xbt_dynar_push(initial_incomplete_process_comms, ©_comm); - } - + MC_patterns_copy( + xbt_dynar_get_as(incomplete_communications_pattern, i, xbt_dynar_t), + xbt_dynar_get_as(state->incomplete_comm_pattern, i, xbt_dynar_t) + ); } } @@ -94,6 +104,38 @@ void MC_state_copy_index_communications_pattern(mc_state_t state) } } +void MC_handle_comm_pattern( + e_mc_call_type_t call_type, smx_simcall_t req, + int value, xbt_dynar_t pattern, int backtracking) +{ + + switch(call_type) { + case MC_CALL_TYPE_NONE: + break; + case MC_CALL_TYPE_SEND: + case MC_CALL_TYPE_RECV: + MC_get_comm_pattern(pattern, req, call_type, backtracking); + break; + case MC_CALL_TYPE_WAIT: + case MC_CALL_TYPE_WAITANY: + { + smx_synchro_t comm_addr = NULL; + if (call_type == MC_CALL_TYPE_WAIT) + comm_addr = simcall_comm_wait__get__comm(req); + else + // comm_addr = REMOTE(xbt_dynar_get_as(simcall_comm_waitany__get__comms(req), value, smx_synchro_t)): + MC_process_read_dynar_element(&mc_model_checker->process, &comm_addr, + simcall_comm_waitany__get__comms(req), value, sizeof(comm_addr)); + MC_complete_comm_pattern(pattern, comm_addr, + MC_smx_simcall_get_issuer(req)->pid, backtracking); + } + break; + default: + xbt_die("Unexpected call type %i", (int)call_type); + } + +} + void MC_comm_pattern_free(mc_comm_pattern_t p) { xbt_free(p->rdv); diff --git a/src/mc/mc_comm_pattern.h b/src/mc/mc_comm_pattern.h index 9c73038bb3..3a4a0a6d01 100644 --- a/src/mc/mc_comm_pattern.h +++ b/src/mc/mc_comm_pattern.h @@ -22,7 +22,7 @@ SG_BEGIN_DECL() typedef struct s_mc_comm_pattern{ int num; - smx_synchro_t comm; + smx_synchro_t comm_addr; e_smx_comm_type_t type; unsigned long src_proc; unsigned long dst_proc; @@ -40,7 +40,14 @@ typedef struct s_mc_list_comm_pattern{ xbt_dynar_t list; }s_mc_list_comm_pattern_t, *mc_list_comm_pattern_t; +/** + * Type: `xbt_dynar_t` + */ extern xbt_dynar_t initial_communications_pattern; + +/** + * Type: `xbt_dynar_t>` + */ extern xbt_dynar_t incomplete_communications_pattern; typedef enum { @@ -82,7 +89,7 @@ void MC_get_comm_pattern(xbt_dynar_t communications_pattern, smx_simcall_t reque void MC_handle_comm_pattern(e_mc_call_type_t call_type, smx_simcall_t request, int value, xbt_dynar_t current_pattern, int backtracking); void MC_comm_pattern_free_voidp(void *p); void MC_list_comm_pattern_free_voidp(void *p); -void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm, unsigned int issuer, int backtracking); +void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking); void MC_modelcheck_comm_determinism(void); void MC_restore_communications_pattern(mc_state_t state); diff --git a/src/mc/mc_global.c b/src/mc/mc_global.c index 7ca082ea0f..8d498deacb 100644 --- a/src/mc/mc_global.c +++ b/src/mc/mc_global.c @@ -4,6 +4,7 @@ /* This program is free software; you can redistribute it and/or modify it * under the terms of the license (GNU LGPL) which comes with this package. */ +#include #include #include @@ -125,9 +126,10 @@ void MC_init_pid(pid_t pid, int socket) iteration of the model-checker (in RAW memory) */ xbt_mheap_t heap = mmalloc_set_current_heap(mc_heap); - mc_model_checker = MC_model_checker_new(pid, socket); - if (mc_mode == MC_MODE_SERVER) { + + // It's not useful anymore: + if (0 && mc_mode == MC_MODE_SERVER) { unsigned long maxpid; MC_process_read_variable(&mc_model_checker->process, "simix_process_maxpid", &maxpid, sizeof(maxpid)); @@ -224,7 +226,9 @@ void MC_do_the_modelcheck_for_real() MC_client_main_loop(); return; case MC_MODE_SERVER: + break; case MC_MODE_STANDALONE: + MC_init(); break; } @@ -306,32 +310,6 @@ int MC_deadlock_check() return deadlock; } -void MC_handle_comm_pattern(e_mc_call_type_t call_type, smx_simcall_t req, int value, xbt_dynar_t pattern, int backtracking) { - - switch(call_type) { - case MC_CALL_TYPE_NONE: - break; - case MC_CALL_TYPE_SEND: - case MC_CALL_TYPE_RECV: - MC_get_comm_pattern(pattern, req, call_type, backtracking); - break; - case MC_CALL_TYPE_WAIT: - case MC_CALL_TYPE_WAITANY: - { - smx_synchro_t current_comm = NULL; - if (call_type == MC_CALL_TYPE_WAIT) - current_comm = simcall_comm_wait__get__comm(req); - else - current_comm = xbt_dynar_get_as(simcall_comm_waitany__get__comms(req), value, smx_synchro_t); - MC_complete_comm_pattern(pattern, current_comm, req->issuer->pid, backtracking); - } - break; - default: - xbt_die("Unexpected call type %i", (int)call_type); - } - -} - /** * \brief Re-executes from the state at position start all the transitions indicated by * a given model-checker stack. @@ -375,9 +353,13 @@ void MC_replay(xbt_fifo_t stack) MC_SET_MC_HEAP; if (_sg_mc_comms_determinism || _sg_mc_send_determinism) { - for (j=0; j < simix_process_maxpid; j++) { + // int n = xbt_dynar_length(incomplete_communications_pattern); + int n = MC_smx_get_maxpid(); + assert(n == xbt_dynar_length(incomplete_communications_pattern)); + assert(n == xbt_dynar_length(initial_communications_pattern)); + for (j=0; j < n ; j++) { xbt_dynar_reset((xbt_dynar_t)xbt_dynar_get_as(incomplete_communications_pattern, j, xbt_dynar_t)); - ((mc_list_comm_pattern_t)xbt_dynar_get_as(initial_communications_pattern, j, mc_list_comm_pattern_t))->index_comm = 0; + xbt_dynar_get_as(initial_communications_pattern, j, mc_list_comm_pattern_t)->index_comm = 0; } } @@ -416,7 +398,7 @@ void MC_replay(xbt_fifo_t stack) if (_sg_mc_comms_determinism || _sg_mc_send_determinism) MC_handle_comm_pattern(call, req, value, NULL, 1); MC_SET_STD_HEAP; - + MC_wait_for_requests(); count++; diff --git a/src/mc/mc_liveness.c b/src/mc/mc_liveness.c index cfd565aa53..8505daf83a 100644 --- a/src/mc/mc_liveness.c +++ b/src/mc/mc_liveness.c @@ -404,7 +404,6 @@ static void MC_modelcheck_liveness_main(void) void MC_modelcheck_liveness(void) { XBT_DEBUG("Starting the liveness algorithm"); - xbt_assert(mc_mode == MC_MODE_SERVER); _sg_mc_liveness = 1; xbt_mheap_t heap = mmalloc_set_current_heap(mc_heap); diff --git a/src/mc/mc_safety.c b/src/mc/mc_safety.c index 01c1d6b071..2efe90a2e2 100644 --- a/src/mc/mc_safety.c +++ b/src/mc/mc_safety.c @@ -271,7 +271,6 @@ static void MC_modelcheck_safety_main(void) void MC_modelcheck_safety(void) { XBT_DEBUG("Starting the safety algorithm"); - xbt_assert(mc_mode == MC_MODE_SERVER); _sg_mc_safety = 1;