Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
6bb42f7b6e1ddd5e573d46c4c9c28e17b028f183
[simgrid.git] / src / mc / mc_comm_determinism.cpp
1 /* Copyright (c) 2008-2014. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include "mc_state.h"
8 #include "mc_comm_pattern.h"
9 #include "mc_request.h"
10 #include "mc_safety.h"
11 #include "mc_private.h"
12 #include "mc_record.h"
13 #include "mc_smx.h"
14 #include "mc_client.h"
15
16 using simgrid::mc::remote;
17
18 extern "C" {
19
20 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
21                                 "Logging specific to MC communication determinism detection");
22
23 /********** Global variables **********/
24
25 xbt_dynar_t initial_communications_pattern;
26 xbt_dynar_t incomplete_communications_pattern;
27
28 /********** Static functions ***********/
29
30 static e_mc_comm_pattern_difference_t compare_comm_pattern(mc_comm_pattern_t comm1, mc_comm_pattern_t comm2) {
31   if(comm1->type != comm2->type)
32     return TYPE_DIFF;
33   if (strcmp(comm1->rdv, comm2->rdv) != 0)
34     return RDV_DIFF;
35   if (comm1->src_proc != comm2->src_proc)
36     return SRC_PROC_DIFF;
37   if (comm1->dst_proc != comm2->dst_proc)
38     return DST_PROC_DIFF;
39   if (comm1->tag != comm2->tag)
40     return TAG_DIFF;
41   if (comm1->data_size != comm2->data_size)
42     return DATA_SIZE_DIFF;
43   if(comm1->data == NULL && comm2->data == NULL)
44     return NONE_DIFF;
45   if(comm1->data != NULL && comm2->data !=NULL) {
46     if (!memcmp(comm1->data, comm2->data, comm1->data_size))
47       return NONE_DIFF;
48     return DATA_DIFF;
49   }else{
50     return DATA_DIFF;
51   }
52   return NONE_DIFF;
53 }
54
55 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, mc_comm_pattern_t comm, unsigned int cursor) {
56   char *type, *res;
57
58   if(comm->type == SIMIX_COMM_SEND)
59     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
60   else
61     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
62
63   switch(diff) {
64   case TYPE_DIFF:
65     res = bprintf("%s Different type for communication #%d", type, cursor);
66     break;
67   case RDV_DIFF:
68     res = bprintf("%s Different rdv for communication #%d", type, cursor);
69     break;
70   case TAG_DIFF:
71     res = bprintf("%s Different tag for communication #%d", type, cursor);
72     break;
73   case SRC_PROC_DIFF:
74       res = bprintf("%s Different source for communication #%d", type, cursor);
75     break;
76   case DST_PROC_DIFF:
77       res = bprintf("%s Different destination for communication #%d", type, cursor);
78     break;
79   case DATA_SIZE_DIFF:
80     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
81     break;
82   case DATA_DIFF:
83     res = bprintf("%s\n Different data for communication #%d", type, cursor);
84     break;
85   default:
86     res = NULL;
87     break;
88   }
89
90   return res;
91 }
92
93 static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm_addr)
94 {
95   s_smx_synchro_t comm;
96   mc_model_checker->process().read(&comm, remote(comm_addr));
97
98   smx_process_t src_proc = MC_smx_resolve_process(comm.comm.src_proc);
99   smx_process_t dst_proc = MC_smx_resolve_process(comm.comm.dst_proc);
100   comm_pattern->src_proc = src_proc->pid;
101   comm_pattern->dst_proc = dst_proc->pid;
102   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
103   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
104   if (comm_pattern->data_size == -1 && comm.comm.src_buff != NULL) {
105     size_t buff_size;
106     mc_model_checker->process().read(
107       &buff_size, remote(comm.comm.dst_buff_size));
108     comm_pattern->data_size = buff_size;
109     comm_pattern->data = xbt_malloc0(comm_pattern->data_size);
110     mc_model_checker->process().read_bytes(
111       comm_pattern->data, comm_pattern->data_size,
112       remote(comm.comm.src_buff));
113   }
114 }
115
116 static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int backtracking) {
117
118   mc_list_comm_pattern_t list =
119     xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t);
120
121   if(!backtracking){
122     mc_comm_pattern_t initial_comm =
123       xbt_dynar_get_as(list->list, list->index_comm, mc_comm_pattern_t);
124     e_mc_comm_pattern_difference_t diff =
125       compare_comm_pattern(initial_comm, comm);
126
127     if (diff != NONE_DIFF) {
128       if (comm->type == SIMIX_COMM_SEND){
129         initial_global_state->send_deterministic = 0;
130         if(initial_global_state->send_diff != NULL)
131           xbt_free(initial_global_state->send_diff);
132         initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
133       }else{
134         initial_global_state->recv_deterministic = 0;
135         if(initial_global_state->recv_diff != NULL)
136           xbt_free(initial_global_state->recv_diff);
137         initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
138       }
139       if(_sg_mc_send_determinism && !initial_global_state->send_deterministic){
140         XBT_INFO("*********************************************************");
141         XBT_INFO("***** Non-send-deterministic communications pattern *****");
142         XBT_INFO("*********************************************************");
143         XBT_INFO("%s", initial_global_state->send_diff);
144         xbt_free(initial_global_state->send_diff);
145         initial_global_state->send_diff = NULL;
146         MC_print_statistics(mc_stats);
147         xbt_abort(); 
148       }else if(_sg_mc_comms_determinism && (!initial_global_state->send_deterministic && !initial_global_state->recv_deterministic)) {
149         XBT_INFO("****************************************************");
150         XBT_INFO("***** Non-deterministic communications pattern *****");
151         XBT_INFO("****************************************************");
152         XBT_INFO("%s", initial_global_state->send_diff);
153         XBT_INFO("%s", initial_global_state->recv_diff);
154         xbt_free(initial_global_state->send_diff);
155         initial_global_state->send_diff = NULL;
156         xbt_free(initial_global_state->recv_diff);
157         initial_global_state->recv_diff = NULL;
158         MC_print_statistics(mc_stats);
159         xbt_abort();
160       } 
161     }
162   }
163     
164   MC_comm_pattern_free(comm);
165
166 }
167
168 /********** Non Static functions ***********/
169
170 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
171 {
172   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
173   mc_list_comm_pattern_t initial_pattern = xbt_dynar_get_as(
174     initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t);
175   xbt_dynar_t incomplete_pattern = xbt_dynar_get_as(
176     incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
177
178   mc_comm_pattern_t pattern = xbt_new0(s_mc_comm_pattern_t, 1);
179   pattern->data_size = -1;
180   pattern->data = NULL;
181   pattern->index =
182     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
183
184   if (call_type == MC_CALL_TYPE_SEND) {
185     /* Create comm pattern */
186     pattern->type = SIMIX_COMM_SEND;
187     pattern->comm_addr = simcall_comm_isend__get__result(request);
188
189     s_smx_synchro_t synchro = mc_model_checker->process().read<s_smx_synchro_t>(
190       (std::uint64_t) pattern->comm_addr);
191
192     char* remote_name = mc_model_checker->process().read<char*>(
193       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
194     pattern->rdv =
195       MC_process_read_string(&mc_model_checker->process(), remote_name);
196     pattern->src_proc = MC_smx_resolve_process(synchro.comm.src_proc)->pid;
197     pattern->src_host = MC_smx_process_get_host_name(issuer);
198
199     struct s_smpi_mpi_request mpi_request =
200       mc_model_checker->process().read<s_smpi_mpi_request>(
201         (std::uint64_t) simcall_comm_isend__get__data(request));
202     pattern->tag = mpi_request.tag;
203
204     if(synchro.comm.src_buff != NULL){
205       pattern->data_size = synchro.comm.src_buff_size;
206       pattern->data = xbt_malloc0(pattern->data_size);
207       mc_model_checker->process().read_bytes(
208         pattern->data, pattern->data_size, remote(synchro.comm.src_buff));
209     }
210     if(mpi_request.detached){
211       if (!initial_global_state->initial_communications_pattern_done) {
212         /* Store comm pattern */
213         xbt_dynar_push(
214           xbt_dynar_get_as(
215             initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
216           )->list,
217           &pattern);
218       } else {
219         /* Evaluate comm determinism */
220         deterministic_comm_pattern(pattern->src_proc, pattern, backtracking);
221         xbt_dynar_get_as(
222           initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
223         )->index_comm++;
224       }
225       return;
226     }
227   } else if (call_type == MC_CALL_TYPE_RECV) {                      
228     pattern->type = SIMIX_COMM_RECEIVE;
229     pattern->comm_addr = simcall_comm_irecv__get__result(request);
230
231     struct s_smpi_mpi_request mpi_request;
232     mc_model_checker->process().read(
233       &mpi_request, remote((struct s_smpi_mpi_request*)simcall_comm_irecv__get__data(request)));
234     pattern->tag = mpi_request.tag;
235
236     s_smx_synchro_t synchro;
237     mc_model_checker->process().read(&synchro, remote(pattern->comm_addr));
238
239     char* remote_name;
240     mc_model_checker->process().read(&remote_name,
241       remote(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
242     pattern->rdv =
243       MC_process_read_string(&mc_model_checker->process(), remote_name);
244     pattern->dst_proc = MC_smx_resolve_process(synchro.comm.dst_proc)->pid;
245     pattern->dst_host = MC_smx_process_get_host_name(issuer);
246   } else {
247     xbt_die("Unexpected call_type %i", (int) call_type);
248   }
249
250   xbt_dynar_push(
251     xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t),
252     &pattern);
253
254   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu", pattern, issuer->pid);
255 }
256
257 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) {
258   mc_comm_pattern_t current_comm_pattern;
259   unsigned int cursor = 0;
260   mc_comm_pattern_t comm_pattern;
261   int completed = 0;
262
263   /* Complete comm pattern */
264   xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern) {
265     if (current_comm_pattern->comm_addr == comm_addr) {
266       update_comm_pattern(current_comm_pattern, comm_addr);
267       completed = 1;
268       xbt_dynar_remove_at(
269         xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t),
270         cursor, &comm_pattern);
271       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
272       break;
273     }
274   }
275   if(!completed)
276     xbt_die("Corresponding communication not found!");
277
278   mc_list_comm_pattern_t pattern = xbt_dynar_get_as(
279     initial_communications_pattern, issuer, mc_list_comm_pattern_t);
280
281   if (!initial_global_state->initial_communications_pattern_done) {
282     /* Store comm pattern */
283     xbt_dynar_push(pattern->list, &comm_pattern);
284   } else {
285     /* Evaluate comm determinism */
286     deterministic_comm_pattern(issuer, comm_pattern, backtracking);
287     pattern->index_comm++;
288   }
289 }
290
291
292 /************************ Main algorithm ************************/
293
294 static void MC_modelcheck_comm_determinism_main(void);
295
296 static void MC_pre_modelcheck_comm_determinism(void)
297 {
298   mc_state_t initial_state = NULL;
299   smx_process_t process;
300   int i;
301   const int maxpid = MC_smx_get_maxpid();
302
303   if (_sg_mc_visited > 0)
304     visited_states = xbt_dynar_new(sizeof(mc_visited_state_t), visited_state_free_voidp);
305  
306   // Create initial_communications_pattern elements:
307   initial_communications_pattern = xbt_dynar_new(sizeof(mc_list_comm_pattern_t), MC_list_comm_pattern_free_voidp);
308   for (i=0; i < maxpid; i++){
309     mc_list_comm_pattern_t process_list_pattern = xbt_new0(s_mc_list_comm_pattern_t, 1);
310     process_list_pattern->list = xbt_dynar_new(sizeof(mc_comm_pattern_t), MC_comm_pattern_free_voidp);
311     process_list_pattern->index_comm = 0;
312     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
313   }
314
315   // Create incomplete_communications_pattern elements:
316   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
317   for (i=0; i < maxpid; i++){
318     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(mc_comm_pattern_t), NULL);
319     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
320   }
321
322   initial_state = MC_state_new();
323   
324   XBT_DEBUG("********* Start communication determinism verification *********");
325
326   /* Wait for requests (schedules processes) */
327   MC_wait_for_requests();
328
329   /* Get an enabled process and insert it in the interleave set of the initial state */
330   MC_EACH_SIMIX_PROCESS(process,
331     if (MC_process_is_enabled(process)) {
332       MC_state_interleave_process(initial_state, process);
333     }
334   );
335
336   xbt_fifo_unshift(mc_stack, initial_state);
337 }
338
339 static void MC_modelcheck_comm_determinism_main(void)
340 {
341
342   char *req_str = NULL;
343   int value;
344   mc_visited_state_t visited_state = NULL;
345   smx_simcall_t req = NULL;
346   smx_process_t process = NULL;
347   mc_state_t state = NULL, next_state = NULL;
348
349   while (xbt_fifo_size(mc_stack) > 0) {
350
351     /* Get current state */
352     state = (mc_state_t) xbt_fifo_get_item_content(xbt_fifo_get_first_item(mc_stack));
353
354     XBT_DEBUG("**************************************************");
355     XBT_DEBUG("Exploration depth = %d (state = %d, interleaved processes = %d)",
356               xbt_fifo_size(mc_stack), state->num,
357               MC_state_interleave_size(state));
358
359     /* Update statistics */
360     mc_stats->visited_states++;
361
362     if ((xbt_fifo_size(mc_stack) <= _sg_mc_max_depth)
363         && (req = MC_state_get_request(state, &value))
364         && (visited_state == NULL)) {
365
366       req_str = MC_request_to_string(req, value, MC_REQUEST_SIMIX);
367       XBT_DEBUG("Execute: %s", req_str);
368       xbt_free(req_str);
369       
370       if (dot_output != NULL) {
371         req_str = MC_request_get_dot_output(req, value);
372       }
373
374       MC_state_set_executed_request(state, req, value);
375       mc_stats->executed_transitions++;
376
377       /* TODO : handle test and testany simcalls */
378       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
379       if (_sg_mc_comms_determinism || _sg_mc_send_determinism) {
380         call = MC_get_call_type(req);
381       }
382
383       /* Answer the request */
384       MC_simcall_handle(req, value);    /* After this call req is no longer useful */
385
386       if(!initial_global_state->initial_communications_pattern_done)
387         MC_handle_comm_pattern(call, req, value, initial_communications_pattern, 0);
388       else
389         MC_handle_comm_pattern(call, req, value, NULL, 0);
390
391       /* Wait for requests (schedules processes) */
392       MC_wait_for_requests();
393
394       /* Create the new expanded state */
395       next_state = MC_state_new();
396
397       if ((visited_state = is_visited_state(next_state)) == NULL) {
398
399         /* Get enabled processes and insert them in the interleave set of the next state */
400         MC_EACH_SIMIX_PROCESS(process,
401           if (MC_process_is_enabled(process)) {
402             MC_state_interleave_process(next_state, process);
403           }
404         );
405
406         if (dot_output != NULL)
407           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num,  next_state->num, req_str);
408
409       } else {
410
411         if (dot_output != NULL)
412           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str);
413
414       }
415
416       xbt_fifo_unshift(mc_stack, next_state);
417
418       if (dot_output != NULL)
419         xbt_free(req_str);
420
421     } else {
422
423       if (xbt_fifo_size(mc_stack) > _sg_mc_max_depth) {
424         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
425       } else if (visited_state != NULL) {
426         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
427       } else {
428         XBT_DEBUG("There are no more processes to interleave. (depth %d)", xbt_fifo_size(mc_stack));
429       }
430
431       if (!initial_global_state->initial_communications_pattern_done) 
432         initial_global_state->initial_communications_pattern_done = 1;
433
434       /* Trash the current state, no longer needed */
435       xbt_fifo_shift(mc_stack);
436       MC_state_delete(state, !state->in_visited_states ? 1 : 0);
437       XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
438
439       visited_state = NULL;
440
441       /* Check for deadlocks */
442       if (MC_deadlock_check()) {
443         MC_show_deadlock(NULL);
444         return;
445       }
446
447       while ((state = (mc_state_t) xbt_fifo_shift(mc_stack)) != NULL) {
448         if (MC_state_interleave_size(state) && xbt_fifo_size(mc_stack) < _sg_mc_max_depth) {
449           /* We found a back-tracking point, let's loop */
450           XBT_DEBUG("Back-tracking to state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
451           xbt_fifo_unshift(mc_stack, state);
452
453           MC_replay(mc_stack);
454
455           XBT_DEBUG("Back-tracking to state %d at depth %d done", state->num, xbt_fifo_size(mc_stack));
456
457           break;
458         } else {
459           XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
460           MC_state_delete(state, !state->in_visited_states ? 1 : 0);
461         }
462       }
463     }
464   }
465
466   MC_print_statistics(mc_stats);
467   exit(0);
468 }
469
470 void MC_modelcheck_comm_determinism(void)
471 {
472   XBT_INFO("Check communication determinism");
473   mc_reduce_kind = e_mc_reduce_none;
474   MC_wait_for_requests();
475
476   if (mc_mode == MC_MODE_CLIENT) {
477     // This will move somehwere else:
478     MC_client_handle_messages();
479   }
480
481   /* Create exploration stack */
482   mc_stack = xbt_fifo_new();
483
484   MC_pre_modelcheck_comm_determinism();
485
486   initial_global_state = xbt_new0(s_mc_global_t, 1);
487   initial_global_state->snapshot = MC_take_snapshot(0);
488   initial_global_state->initial_communications_pattern_done = 0;
489   initial_global_state->recv_deterministic = 1;
490   initial_global_state->send_deterministic = 1;
491   initial_global_state->recv_diff = NULL;
492   initial_global_state->send_diff = NULL;
493
494   MC_modelcheck_comm_determinism_main();
495 }
496
497 }