Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'master' of git+ssh://scm.gforge.inria.fr//gitroot/simgrid/simgrid
[simgrid.git] / src / mc / mc_comm_determinism.cpp
1 /* Copyright (c) 2008-2015. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include "src/mc/mc_state.h"
8 #include "src/mc/mc_comm_pattern.h"
9 #include "src/mc/mc_request.h"
10 #include "src/mc/mc_safety.h"
11 #include "src/mc/mc_private.h"
12 #include "src/mc/mc_record.h"
13 #include "src/mc/mc_smx.h"
14 #include "src/mc/mc_client.h"
15 #include "src/mc/mc_exit.h"
16
17 using simgrid::mc::remote;
18
19 extern "C" {
20
21 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
22                                 "Logging specific to MC communication determinism detection");
23
24 /********** Global variables **********/
25
26 xbt_dynar_t initial_communications_pattern;
27 xbt_dynar_t incomplete_communications_pattern;
28
29 /********** Static functions ***********/
30
31 static e_mc_comm_pattern_difference_t compare_comm_pattern(mc_comm_pattern_t comm1, mc_comm_pattern_t comm2) {
32   if(comm1->type != comm2->type)
33     return TYPE_DIFF;
34   if (strcmp(comm1->rdv, comm2->rdv) != 0)
35     return RDV_DIFF;
36   if (comm1->src_proc != comm2->src_proc)
37     return SRC_PROC_DIFF;
38   if (comm1->dst_proc != comm2->dst_proc)
39     return DST_PROC_DIFF;
40   if (comm1->tag != comm2->tag)
41     return TAG_DIFF;
42   if (comm1->data_size != comm2->data_size)
43     return DATA_SIZE_DIFF;
44   if(comm1->data == NULL && comm2->data == NULL)
45     return NONE_DIFF;
46   if(comm1->data != NULL && comm2->data !=NULL) {
47     if (!memcmp(comm1->data, comm2->data, comm1->data_size))
48       return NONE_DIFF;
49     return DATA_DIFF;
50   }else{
51     return DATA_DIFF;
52   }
53   return NONE_DIFF;
54 }
55
56 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, mc_comm_pattern_t comm, unsigned int cursor) {
57   char *type, *res;
58
59   if(comm->type == SIMIX_COMM_SEND)
60     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
61   else
62     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
63
64   switch(diff) {
65   case TYPE_DIFF:
66     res = bprintf("%s Different type for communication #%d", type, cursor);
67     break;
68   case RDV_DIFF:
69     res = bprintf("%s Different rdv for communication #%d", type, cursor);
70     break;
71   case TAG_DIFF:
72     res = bprintf("%s Different tag for communication #%d", type, cursor);
73     break;
74   case SRC_PROC_DIFF:
75       res = bprintf("%s Different source for communication #%d", type, cursor);
76     break;
77   case DST_PROC_DIFF:
78       res = bprintf("%s Different destination for communication #%d", type, cursor);
79     break;
80   case DATA_SIZE_DIFF:
81     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
82     break;
83   case DATA_DIFF:
84     res = bprintf("%s\n Different data for communication #%d", type, cursor);
85     break;
86   default:
87     res = NULL;
88     break;
89   }
90
91   return res;
92 }
93
94 static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm_addr)
95 {
96   s_smx_synchro_t comm;
97   mc_model_checker->process().read(&comm, remote(comm_addr));
98
99   smx_process_t src_proc = MC_smx_resolve_process(comm.comm.src_proc);
100   smx_process_t dst_proc = MC_smx_resolve_process(comm.comm.dst_proc);
101   comm_pattern->src_proc = src_proc->pid;
102   comm_pattern->dst_proc = dst_proc->pid;
103   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
104   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
105   if (comm_pattern->data_size == -1 && comm.comm.src_buff != NULL) {
106     size_t buff_size;
107     mc_model_checker->process().read(
108       &buff_size, remote(comm.comm.dst_buff_size));
109     comm_pattern->data_size = buff_size;
110     comm_pattern->data = xbt_malloc0(comm_pattern->data_size);
111     mc_model_checker->process().read_bytes(
112       comm_pattern->data, comm_pattern->data_size,
113       remote(comm.comm.src_buff));
114   }
115 }
116
117 static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int backtracking) {
118
119   mc_list_comm_pattern_t list =
120     xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t);
121
122   if(!backtracking){
123     mc_comm_pattern_t initial_comm =
124       xbt_dynar_get_as(list->list, list->index_comm, mc_comm_pattern_t);
125     e_mc_comm_pattern_difference_t diff =
126       compare_comm_pattern(initial_comm, comm);
127
128     if (diff != NONE_DIFF) {
129       if (comm->type == SIMIX_COMM_SEND){
130         initial_global_state->send_deterministic = 0;
131         if(initial_global_state->send_diff != NULL)
132           xbt_free(initial_global_state->send_diff);
133         initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
134       }else{
135         initial_global_state->recv_deterministic = 0;
136         if(initial_global_state->recv_diff != NULL)
137           xbt_free(initial_global_state->recv_diff);
138         initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
139       }
140       if(_sg_mc_send_determinism && !initial_global_state->send_deterministic){
141         XBT_INFO("*********************************************************");
142         XBT_INFO("***** Non-send-deterministic communications pattern *****");
143         XBT_INFO("*********************************************************");
144         XBT_INFO("%s", initial_global_state->send_diff);
145         xbt_free(initial_global_state->send_diff);
146         initial_global_state->send_diff = NULL;
147         MC_print_statistics(mc_stats);
148         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
149       }else if(_sg_mc_comms_determinism && (!initial_global_state->send_deterministic && !initial_global_state->recv_deterministic)) {
150         XBT_INFO("****************************************************");
151         XBT_INFO("***** Non-deterministic communications pattern *****");
152         XBT_INFO("****************************************************");
153         XBT_INFO("%s", initial_global_state->send_diff);
154         XBT_INFO("%s", initial_global_state->recv_diff);
155         xbt_free(initial_global_state->send_diff);
156         initial_global_state->send_diff = NULL;
157         xbt_free(initial_global_state->recv_diff);
158         initial_global_state->recv_diff = NULL;
159         MC_print_statistics(mc_stats);
160         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
161       } 
162     }
163   }
164     
165   MC_comm_pattern_free(comm);
166
167 }
168
169 /********** Non Static functions ***********/
170
171 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
172 {
173   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
174   mc_list_comm_pattern_t initial_pattern = xbt_dynar_get_as(
175     initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t);
176   xbt_dynar_t incomplete_pattern = xbt_dynar_get_as(
177     incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
178
179   mc_comm_pattern_t pattern = xbt_new0(s_mc_comm_pattern_t, 1);
180   pattern->data_size = -1;
181   pattern->data = NULL;
182   pattern->index =
183     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
184
185   if (call_type == MC_CALL_TYPE_SEND) {
186     /* Create comm pattern */
187     pattern->type = SIMIX_COMM_SEND;
188     pattern->comm_addr = simcall_comm_isend__get__result(request);
189
190     s_smx_synchro_t synchro = mc_model_checker->process().read<s_smx_synchro_t>(
191       (std::uint64_t) pattern->comm_addr);
192
193     char* remote_name = mc_model_checker->process().read<char*>(
194       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
195     pattern->rdv = mc_model_checker->process().read_string(remote_name);
196     pattern->src_proc = MC_smx_resolve_process(synchro.comm.src_proc)->pid;
197     pattern->src_host = MC_smx_process_get_host_name(issuer);
198
199     struct s_smpi_mpi_request mpi_request =
200       mc_model_checker->process().read<s_smpi_mpi_request>(
201         (std::uint64_t) simcall_comm_isend__get__data(request));
202     pattern->tag = mpi_request.tag;
203
204     if(synchro.comm.src_buff != NULL){
205       pattern->data_size = synchro.comm.src_buff_size;
206       pattern->data = xbt_malloc0(pattern->data_size);
207       mc_model_checker->process().read_bytes(
208         pattern->data, pattern->data_size, remote(synchro.comm.src_buff));
209     }
210     if(mpi_request.detached){
211       if (!initial_global_state->initial_communications_pattern_done) {
212         /* Store comm pattern */
213         xbt_dynar_push(
214           xbt_dynar_get_as(
215             initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
216           )->list,
217           &pattern);
218       } else {
219         /* Evaluate comm determinism */
220         deterministic_comm_pattern(pattern->src_proc, pattern, backtracking);
221         xbt_dynar_get_as(
222           initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
223         )->index_comm++;
224       }
225       return;
226     }
227   } else if (call_type == MC_CALL_TYPE_RECV) {                      
228     pattern->type = SIMIX_COMM_RECEIVE;
229     pattern->comm_addr = simcall_comm_irecv__get__result(request);
230
231     struct s_smpi_mpi_request mpi_request;
232     mc_model_checker->process().read(
233       &mpi_request, remote((struct s_smpi_mpi_request*)simcall_comm_irecv__get__data(request)));
234     pattern->tag = mpi_request.tag;
235
236     s_smx_synchro_t synchro;
237     mc_model_checker->process().read(&synchro, remote(pattern->comm_addr));
238
239     char* remote_name;
240     mc_model_checker->process().read(&remote_name,
241       remote(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
242     pattern->rdv = mc_model_checker->process().read_string(remote_name);
243     pattern->dst_proc = MC_smx_resolve_process(synchro.comm.dst_proc)->pid;
244     pattern->dst_host = MC_smx_process_get_host_name(issuer);
245   } else {
246     xbt_die("Unexpected call_type %i", (int) call_type);
247   }
248
249   xbt_dynar_push(
250     xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t),
251     &pattern);
252
253   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu", pattern, issuer->pid);
254 }
255
256 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) {
257   mc_comm_pattern_t current_comm_pattern;
258   unsigned int cursor = 0;
259   mc_comm_pattern_t comm_pattern;
260   int completed = 0;
261
262   /* Complete comm pattern */
263   xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern) {
264     if (current_comm_pattern->comm_addr == comm_addr) {
265       update_comm_pattern(current_comm_pattern, comm_addr);
266       completed = 1;
267       xbt_dynar_remove_at(
268         xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t),
269         cursor, &comm_pattern);
270       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
271       break;
272     }
273   }
274   if(!completed)
275     xbt_die("Corresponding communication not found!");
276
277   mc_list_comm_pattern_t pattern = xbt_dynar_get_as(
278     initial_communications_pattern, issuer, mc_list_comm_pattern_t);
279
280   if (!initial_global_state->initial_communications_pattern_done) {
281     /* Store comm pattern */
282     xbt_dynar_push(pattern->list, &comm_pattern);
283   } else {
284     /* Evaluate comm determinism */
285     deterministic_comm_pattern(issuer, comm_pattern, backtracking);
286     pattern->index_comm++;
287   }
288 }
289
290
291 /************************ Main algorithm ************************/
292
293 static int MC_modelcheck_comm_determinism_main(void);
294
295 static void MC_pre_modelcheck_comm_determinism(void)
296 {
297   mc_state_t initial_state = NULL;
298   smx_process_t process;
299   int i;
300   const int maxpid = MC_smx_get_maxpid();
301
302   if (_sg_mc_visited > 0)
303     visited_states = xbt_dynar_new(sizeof(mc_visited_state_t), visited_state_free_voidp);
304  
305   // Create initial_communications_pattern elements:
306   initial_communications_pattern = xbt_dynar_new(sizeof(mc_list_comm_pattern_t), MC_list_comm_pattern_free_voidp);
307   for (i=0; i < maxpid; i++){
308     mc_list_comm_pattern_t process_list_pattern = xbt_new0(s_mc_list_comm_pattern_t, 1);
309     process_list_pattern->list = xbt_dynar_new(sizeof(mc_comm_pattern_t), MC_comm_pattern_free_voidp);
310     process_list_pattern->index_comm = 0;
311     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
312   }
313
314   // Create incomplete_communications_pattern elements:
315   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
316   for (i=0; i < maxpid; i++){
317     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(mc_comm_pattern_t), NULL);
318     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
319   }
320
321   initial_state = MC_state_new();
322   
323   XBT_DEBUG("********* Start communication determinism verification *********");
324
325   /* Wait for requests (schedules processes) */
326   mc_model_checker->wait_for_requests();
327
328   /* Get an enabled process and insert it in the interleave set of the initial state */
329   MC_EACH_SIMIX_PROCESS(process,
330     if (MC_process_is_enabled(process)) {
331       MC_state_interleave_process(initial_state, process);
332     }
333   );
334
335   xbt_fifo_unshift(mc_stack, initial_state);
336 }
337
338 static int MC_modelcheck_comm_determinism_main(void)
339 {
340
341   char *req_str = NULL;
342   int value;
343   mc_visited_state_t visited_state = NULL;
344   smx_simcall_t req = NULL;
345   smx_process_t process = NULL;
346   mc_state_t state = NULL, next_state = NULL;
347
348   while (xbt_fifo_size(mc_stack) > 0) {
349
350     /* Get current state */
351     state = (mc_state_t) xbt_fifo_get_item_content(xbt_fifo_get_first_item(mc_stack));
352
353     XBT_DEBUG("**************************************************");
354     XBT_DEBUG("Exploration depth = %d (state = %d, interleaved processes = %d)",
355               xbt_fifo_size(mc_stack), state->num,
356               MC_state_interleave_size(state));
357
358     /* Update statistics */
359     mc_stats->visited_states++;
360
361     if ((xbt_fifo_size(mc_stack) <= _sg_mc_max_depth)
362         && (req = MC_state_get_request(state, &value))
363         && (visited_state == NULL)) {
364
365       req_str = MC_request_to_string(req, value, MC_REQUEST_SIMIX);
366       XBT_DEBUG("Execute: %s", req_str);
367       xbt_free(req_str);
368       
369       if (dot_output != NULL) {
370         req_str = MC_request_get_dot_output(req, value);
371       }
372
373       MC_state_set_executed_request(state, req, value);
374       mc_stats->executed_transitions++;
375
376       /* TODO : handle test and testany simcalls */
377       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
378       if (_sg_mc_comms_determinism || _sg_mc_send_determinism) {
379         call = MC_get_call_type(req);
380       }
381
382       /* Answer the request */
383       MC_simcall_handle(req, value);    /* After this call req is no longer useful */
384
385       if(!initial_global_state->initial_communications_pattern_done)
386         MC_handle_comm_pattern(call, req, value, initial_communications_pattern, 0);
387       else
388         MC_handle_comm_pattern(call, req, value, NULL, 0);
389
390       /* Wait for requests (schedules processes) */
391       mc_model_checker->wait_for_requests();
392
393       /* Create the new expanded state */
394       next_state = MC_state_new();
395
396       if ((visited_state = is_visited_state(next_state)) == NULL) {
397
398         /* Get enabled processes and insert them in the interleave set of the next state */
399         MC_EACH_SIMIX_PROCESS(process,
400           if (MC_process_is_enabled(process)) {
401             MC_state_interleave_process(next_state, process);
402           }
403         );
404
405         if (dot_output != NULL)
406           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num,  next_state->num, req_str);
407
408       } else {
409
410         if (dot_output != NULL)
411           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str);
412
413       }
414
415       xbt_fifo_unshift(mc_stack, next_state);
416
417       if (dot_output != NULL)
418         xbt_free(req_str);
419
420     } else {
421
422       if (xbt_fifo_size(mc_stack) > _sg_mc_max_depth) {
423         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
424       } else if (visited_state != NULL) {
425         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
426       } else {
427         XBT_DEBUG("There are no more processes to interleave. (depth %d)", xbt_fifo_size(mc_stack));
428       }
429
430       if (!initial_global_state->initial_communications_pattern_done) 
431         initial_global_state->initial_communications_pattern_done = 1;
432
433       /* Trash the current state, no longer needed */
434       xbt_fifo_shift(mc_stack);
435       MC_state_delete(state, !state->in_visited_states ? 1 : 0);
436       XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
437
438       visited_state = NULL;
439
440       /* Check for deadlocks */
441       if (MC_deadlock_check()) {
442         MC_show_deadlock(NULL);
443         return SIMGRID_MC_EXIT_DEADLOCK;
444       }
445
446       while ((state = (mc_state_t) xbt_fifo_shift(mc_stack)) != NULL) {
447         if (MC_state_interleave_size(state) && xbt_fifo_size(mc_stack) < _sg_mc_max_depth) {
448           /* We found a back-tracking point, let's loop */
449           XBT_DEBUG("Back-tracking to state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
450           xbt_fifo_unshift(mc_stack, state);
451
452           MC_replay(mc_stack);
453
454           XBT_DEBUG("Back-tracking to state %d at depth %d done", state->num, xbt_fifo_size(mc_stack));
455
456           break;
457         } else {
458           XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
459           MC_state_delete(state, !state->in_visited_states ? 1 : 0);
460         }
461       }
462     }
463   }
464
465   MC_print_statistics(mc_stats);
466   return SIMGRID_MC_EXIT_SUCCESS;
467 }
468
469 int MC_modelcheck_comm_determinism(void)
470 {
471   XBT_INFO("Check communication determinism");
472   mc_reduce_kind = e_mc_reduce_none;
473   mc_model_checker->wait_for_requests();
474
475   if (mc_mode == MC_MODE_CLIENT) {
476     // This will move somehwere else:
477     MC_client_handle_messages();
478   }
479
480   /* Create exploration stack */
481   mc_stack = xbt_fifo_new();
482
483   MC_pre_modelcheck_comm_determinism();
484
485   initial_global_state = xbt_new0(s_mc_global_t, 1);
486   initial_global_state->snapshot = simgrid::mc::take_snapshot(0);
487   initial_global_state->initial_communications_pattern_done = 0;
488   initial_global_state->recv_deterministic = 1;
489   initial_global_state->send_deterministic = 1;
490   initial_global_state->recv_diff = NULL;
491   initial_global_state->send_diff = NULL;
492
493   return MC_modelcheck_comm_determinism_main();
494 }
495
496 }