Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
[mc] Cross-process MC/safety implementation
[simgrid.git] / src / mc / mc_comm_determinism.c
1 /* Copyright (c) 2008-2014. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include "mc_state.h"
8 #include "mc_comm_pattern.h"
9 #include "mc_request.h"
10 #include "mc_safety.h"
11 #include "mc_private.h"
12 #include "mc_record.h"
13 #include "mc_smx.h"
14
15 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
16                                 "Logging specific to MC communication determinism detection");
17
18 /********** Global variables **********/
19
20 xbt_dynar_t initial_communications_pattern;
21 xbt_dynar_t incomplete_communications_pattern;
22
23 /********** Static functions ***********/
24
25 static e_mc_comm_pattern_difference_t compare_comm_pattern(mc_comm_pattern_t comm1, mc_comm_pattern_t comm2) {
26   if(comm1->type != comm2->type)
27     return TYPE_DIFF;
28   if (strcmp(comm1->rdv, comm2->rdv) != 0)
29     return RDV_DIFF;
30   if (comm1->src_proc != comm2->src_proc)
31     return SRC_PROC_DIFF;
32   if (comm1->dst_proc != comm2->dst_proc)
33     return DST_PROC_DIFF;
34   if (comm1->tag != comm2->tag)
35     return TAG_DIFF;
36   if (comm1->data_size != comm2->data_size)
37     return DATA_SIZE_DIFF;
38   if(comm1->data == NULL && comm2->data == NULL)
39     return 0;
40   if(comm1->data != NULL && comm2->data !=NULL) {
41     if (!memcmp(comm1->data, comm2->data, comm1->data_size))
42       return 0;
43     return DATA_DIFF;
44   }else{
45     return DATA_DIFF;
46   }
47   return 0;
48 }
49
50 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, mc_comm_pattern_t comm, unsigned int cursor) {
51   char *type, *res;
52
53   if(comm->type == SIMIX_COMM_SEND)
54     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
55   else
56     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
57
58   switch(diff) {
59   case TYPE_DIFF:
60     res = bprintf("%s Different type for communication #%d", type, cursor);
61     break;
62   case RDV_DIFF:
63     res = bprintf("%s Different rdv for communication #%d", type, cursor);
64     break;
65   case TAG_DIFF:
66     res = bprintf("%s Different tag for communication #%d", type, cursor);
67     break;
68   case SRC_PROC_DIFF:
69       res = bprintf("%s Different source for communication #%d", type, cursor);
70     break;
71   case DST_PROC_DIFF:
72       res = bprintf("%s Different destination for communication #%d", type, cursor);
73     break;
74   case DATA_SIZE_DIFF:
75     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
76     break;
77   case DATA_DIFF:
78     res = bprintf("%s\n Different data for communication #%d", type, cursor);
79     break;
80   default:
81     res = NULL;
82     break;
83   }
84
85   return res;
86 }
87
88 static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm)
89 {
90   smx_process_t src_proc = MC_smx_resolve_process(comm->comm.src_proc);
91   smx_process_t dst_proc = MC_smx_resolve_process(comm->comm.dst_proc);
92   comm_pattern->src_proc = src_proc->pid;
93   comm_pattern->dst_proc = dst_proc->pid;
94   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
95   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
96   if (comm_pattern->data_size == -1 && comm->comm.src_buff != NULL) {
97     comm_pattern->data_size = *(comm->comm.dst_buff_size);
98     comm_pattern->data = xbt_malloc0(comm_pattern->data_size);
99     MC_process_read_simple(&mc_model_checker->process,
100       comm_pattern->data, comm->comm.src_buff, comm_pattern->data_size);
101   }
102 }
103
104 static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int backtracking) {
105
106   mc_list_comm_pattern_t list_comm_pattern = (mc_list_comm_pattern_t)xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t);
107
108   if(!backtracking){
109     mc_comm_pattern_t initial_comm = xbt_dynar_get_as(list_comm_pattern->list, list_comm_pattern->index_comm, mc_comm_pattern_t);
110     e_mc_comm_pattern_difference_t diff;
111     
112     if((diff = compare_comm_pattern(initial_comm, comm)) != NONE_DIFF){
113       if (comm->type == SIMIX_COMM_SEND){
114         initial_global_state->send_deterministic = 0;
115         if(initial_global_state->send_diff != NULL)
116           xbt_free(initial_global_state->send_diff);
117         initial_global_state->send_diff = print_determinism_result(diff, process, comm, list_comm_pattern->index_comm + 1);
118       }else{
119         initial_global_state->recv_deterministic = 0;
120         if(initial_global_state->recv_diff != NULL)
121           xbt_free(initial_global_state->recv_diff);
122         initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list_comm_pattern->index_comm + 1);
123       }
124       if(_sg_mc_send_determinism && !initial_global_state->send_deterministic){
125         XBT_INFO("*********************************************************");
126         XBT_INFO("***** Non-send-deterministic communications pattern *****");
127         XBT_INFO("*********************************************************");
128         XBT_INFO("%s", initial_global_state->send_diff);
129         xbt_free(initial_global_state->send_diff);
130         initial_global_state->send_diff = NULL;
131         MC_print_statistics(mc_stats);
132         xbt_abort(); 
133       }else if(_sg_mc_comms_determinism && (!initial_global_state->send_deterministic && !initial_global_state->recv_deterministic)) {
134         XBT_INFO("****************************************************");
135         XBT_INFO("***** Non-deterministic communications pattern *****");
136         XBT_INFO("****************************************************");
137         XBT_INFO("%s", initial_global_state->send_diff);
138         XBT_INFO("%s", initial_global_state->recv_diff);
139         xbt_free(initial_global_state->send_diff);
140         initial_global_state->send_diff = NULL;
141         xbt_free(initial_global_state->recv_diff);
142         initial_global_state->recv_diff = NULL;
143         MC_print_statistics(mc_stats);
144         xbt_abort();
145       } 
146     }
147   }
148     
149   MC_comm_pattern_free(comm);
150
151 }
152
153 /********** Non Static functions ***********/
154
155 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
156 {
157   mc_comm_pattern_t pattern = xbt_new0(s_mc_comm_pattern_t, 1);
158   pattern->data_size = -1;
159   pattern->data = NULL;
160
161   // Fill initial_pattern->index_comm:
162   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
163   mc_list_comm_pattern_t initial_pattern =
164     (mc_list_comm_pattern_t) xbt_dynar_get_as(initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t);
165   xbt_dynar_t incomplete_pattern =
166     (xbt_dynar_t) xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
167   pattern->index =
168     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
169
170   
171   if (call_type == MC_CALL_TYPE_SEND) {
172     /* Create comm pattern */
173     pattern->type = SIMIX_COMM_SEND;
174     pattern->comm = simcall_comm_isend__get__result(request);
175
176     s_smx_synchro_t synchro;
177     MC_process_read_simple(&mc_model_checker->process,
178       &synchro, pattern->comm, sizeof(synchro));
179
180     char* remote_name;
181     MC_process_read_simple(&mc_model_checker->process, &remote_name,
182       synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name,
183       sizeof(remote_name));
184     pattern->rdv =
185       MC_process_read_string(&mc_model_checker->process, remote_name);
186     pattern->src_proc = MC_smx_resolve_process(synchro.comm.src_proc)->pid;
187     pattern->src_host = MC_smx_process_get_host_name(issuer);
188
189     struct s_smpi_mpi_request mpi_request;
190     MC_process_read_simple(&mc_model_checker->process,
191       &mpi_request, (MPI_Request) simcall_comm_isend__get__data(request),
192       sizeof(mpi_request));
193     pattern->tag = mpi_request.tag;
194
195     if(synchro.comm.src_buff != NULL){
196       pattern->data_size = synchro.comm.src_buff_size;
197       pattern->data = xbt_malloc0(pattern->data_size);
198       MC_process_read_simple(&mc_model_checker->process,
199         pattern->data, synchro.comm.src_buff, pattern->data_size);
200     }
201     if(mpi_request.detached){
202       if (!initial_global_state->initial_communications_pattern_done) {
203         /* Store comm pattern */
204         xbt_dynar_push(((mc_list_comm_pattern_t)xbt_dynar_get_as(initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t))->list, &pattern);
205       } else {
206         /* Evaluate comm determinism */
207         deterministic_comm_pattern(pattern->src_proc, pattern, backtracking);
208         ((mc_list_comm_pattern_t)xbt_dynar_get_as(initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t))->index_comm++;
209       }
210       return;
211     }
212   } else if (call_type == MC_CALL_TYPE_RECV) {                      
213     pattern->type = SIMIX_COMM_RECEIVE;
214     pattern->comm = simcall_comm_irecv__get__result(request);
215
216     struct s_smpi_mpi_request mpi_request;
217     MC_process_read_simple(&mc_model_checker->process,
218       &mpi_request, (MPI_Request) simcall_comm_irecv__get__data(request),
219       sizeof(mpi_request));
220     pattern->tag = mpi_request.tag;
221
222     s_smx_synchro_t synchro;
223     MC_process_read_simple(&mc_model_checker->process,
224       &synchro, pattern->comm, sizeof(synchro));
225
226     char* remote_name;
227     MC_process_read_simple(&mc_model_checker->process, &remote_name,
228       synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name,
229       sizeof(remote_name));
230     pattern->rdv =
231       MC_process_read_string(&mc_model_checker->process, remote_name);
232     pattern->dst_proc = MC_smx_resolve_process(synchro.comm.dst_proc)->pid;
233     pattern->dst_host = MC_smx_process_get_host_name(issuer);
234   } else {
235     xbt_die("Unexpected call_type %i", (int) call_type);
236   }
237
238   xbt_dynar_push((xbt_dynar_t)xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t), &pattern);
239
240   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu", pattern, issuer->pid);
241 }
242
243 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm, unsigned int issuer, int backtracking) {
244   mc_comm_pattern_t current_comm_pattern;
245   unsigned int cursor = 0;
246   mc_comm_pattern_t comm_pattern;
247   int completed = 0;
248
249   /* Complete comm pattern */
250   xbt_dynar_foreach((xbt_dynar_t)xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern) {
251     if (current_comm_pattern-> comm == comm) {
252       update_comm_pattern(current_comm_pattern, comm);
253       completed = 1;
254       xbt_dynar_remove_at((xbt_dynar_t)xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, &comm_pattern);
255       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
256       break;
257     }
258   }
259   if(!completed)
260     xbt_die("Corresponding communication not found!");
261
262   if (!initial_global_state->initial_communications_pattern_done) {
263     /* Store comm pattern */
264     xbt_dynar_push(((mc_list_comm_pattern_t)xbt_dynar_get_as(initial_communications_pattern, issuer, mc_list_comm_pattern_t))->list, &comm_pattern);
265   } else {
266     /* Evaluate comm determinism */
267     deterministic_comm_pattern(issuer, comm_pattern, backtracking);
268     ((mc_list_comm_pattern_t)xbt_dynar_get_as(initial_communications_pattern, issuer, mc_list_comm_pattern_t))->index_comm++;
269   }
270 }
271
272
273 /************************ Main algorithm ************************/
274
275 void MC_pre_modelcheck_comm_determinism(void)
276 {
277   MC_SET_MC_HEAP;
278
279   mc_state_t initial_state = NULL;
280   smx_process_t process;
281   int i;
282
283   if (_sg_mc_visited > 0)
284     visited_states = xbt_dynar_new(sizeof(mc_visited_state_t), visited_state_free_voidp);
285  
286   initial_communications_pattern = xbt_dynar_new(sizeof(mc_list_comm_pattern_t), MC_list_comm_pattern_free_voidp);
287   for (i=0; i < MC_smx_get_maxpid(); i++){
288     mc_list_comm_pattern_t process_list_pattern = xbt_new0(s_mc_list_comm_pattern_t, 1);
289     process_list_pattern->list = xbt_dynar_new(sizeof(mc_comm_pattern_t), MC_comm_pattern_free_voidp);
290     process_list_pattern->index_comm = 0;
291     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
292   }
293   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
294   for (i=0; i < MC_smx_get_maxpid(); i++){
295     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(mc_comm_pattern_t), NULL);
296     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
297   }
298
299   initial_state = MC_state_new();
300   MC_SET_STD_HEAP;
301   
302   XBT_DEBUG("********* Start communication determinism verification *********");
303
304   /* Wait for requests (schedules processes) */
305   MC_wait_for_requests();
306
307   MC_SET_MC_HEAP;
308
309   /* Get an enabled process and insert it in the interleave set of the initial state */
310   MC_EACH_SIMIX_PROCESS(process,
311     if (MC_process_is_enabled(process)) {
312       MC_state_interleave_process(initial_state, process);
313     }
314   );
315
316   xbt_fifo_unshift(mc_stack, initial_state);
317
318   MC_SET_STD_HEAP;
319
320 }
321
322 void MC_modelcheck_comm_determinism(void)
323 {
324
325   char *req_str = NULL;
326   int value;
327   mc_visited_state_t visited_state = NULL;
328   smx_simcall_t req = NULL;
329   smx_process_t process = NULL;
330   mc_state_t state = NULL, next_state = NULL;
331
332   while (xbt_fifo_size(mc_stack) > 0) {
333
334     /* Get current state */
335     state = (mc_state_t) xbt_fifo_get_item_content(xbt_fifo_get_first_item(mc_stack));
336
337     XBT_DEBUG("**************************************************");
338     XBT_DEBUG("Exploration depth = %d (state = %d, interleaved processes = %d)",
339               xbt_fifo_size(mc_stack), state->num,
340               MC_state_interleave_size(state));
341
342     /* Update statistics */
343     mc_stats->visited_states++;
344
345     if ((xbt_fifo_size(mc_stack) <= _sg_mc_max_depth)
346         && (req = MC_state_get_request(state, &value))
347         && (visited_state == NULL)) {
348
349       req_str = MC_request_to_string(req, value, MC_REQUEST_SIMIX);
350       XBT_DEBUG("Execute: %s", req_str);
351       xbt_free(req_str);
352       
353       if (dot_output != NULL) {
354         MC_SET_MC_HEAP;
355         req_str = MC_request_get_dot_output(req, value);
356         MC_SET_STD_HEAP;
357       }
358
359       MC_state_set_executed_request(state, req, value);
360       mc_stats->executed_transitions++;
361
362       /* TODO : handle test and testany simcalls */
363       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
364       if (_sg_mc_comms_determinism || _sg_mc_send_determinism) {
365         call = MC_get_call_type(req);
366       }
367
368       /* Answer the request */
369       MC_simcall_handle(req, value);    /* After this call req is no longer useful */
370
371       MC_SET_MC_HEAP;
372       if(!initial_global_state->initial_communications_pattern_done)
373         MC_handle_comm_pattern(call, req, value, initial_communications_pattern, 0);
374       else
375         MC_handle_comm_pattern(call, req, value, NULL, 0);
376       MC_SET_STD_HEAP;
377
378       /* Wait for requests (schedules processes) */
379       MC_wait_for_requests();
380
381       /* Create the new expanded state */
382       MC_SET_MC_HEAP;
383
384       next_state = MC_state_new();
385
386       if ((visited_state = is_visited_state(next_state)) == NULL) {
387
388         /* Get enabled processes and insert them in the interleave set of the next state */
389         MC_EACH_SIMIX_PROCESS(process,
390           if (MC_process_is_enabled(process)) {
391             MC_state_interleave_process(next_state, process);
392           }
393         );
394
395         if (dot_output != NULL)
396           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num,  next_state->num, req_str);
397
398       } else {
399
400         if (dot_output != NULL)
401           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str);
402
403       }
404
405       xbt_fifo_unshift(mc_stack, next_state);
406
407       if (dot_output != NULL)
408         xbt_free(req_str);
409
410       MC_SET_STD_HEAP;
411
412     } else {
413
414       if (xbt_fifo_size(mc_stack) > _sg_mc_max_depth) {
415         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
416       } else if (visited_state != NULL) {
417         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
418       } else {
419         XBT_DEBUG("There are no more processes to interleave. (depth %d)", xbt_fifo_size(mc_stack));
420       }
421
422       MC_SET_MC_HEAP;
423
424       if (!initial_global_state->initial_communications_pattern_done) 
425         initial_global_state->initial_communications_pattern_done = 1;
426
427       /* Trash the current state, no longer needed */
428       xbt_fifo_shift(mc_stack);
429       MC_state_delete(state, !state->in_visited_states ? 1 : 0);
430       XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
431
432       MC_SET_STD_HEAP;
433
434       visited_state = NULL;
435
436       /* Check for deadlocks */
437       if (MC_deadlock_check()) {
438         MC_show_deadlock(NULL);
439         return;
440       }
441
442       MC_SET_MC_HEAP;
443
444       while ((state = xbt_fifo_shift(mc_stack)) != NULL) {
445         if (MC_state_interleave_size(state) && xbt_fifo_size(mc_stack) < _sg_mc_max_depth) {
446           /* We found a back-tracking point, let's loop */
447           XBT_DEBUG("Back-tracking to state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
448           xbt_fifo_unshift(mc_stack, state);
449           MC_SET_STD_HEAP;
450
451           MC_replay(mc_stack);
452
453           XBT_DEBUG("Back-tracking to state %d at depth %d done", state->num, xbt_fifo_size(mc_stack));
454
455           break;
456         } else {
457           XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
458           MC_state_delete(state, !state->in_visited_states ? 1 : 0);
459         }
460       }
461
462       MC_SET_STD_HEAP;
463     }
464   }
465
466   MC_print_statistics(mc_stats);
467   MC_SET_STD_HEAP;
468
469   return;
470 }