Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
20236c1170356bc179e2c4e945fff3495497283a
[simgrid.git] / src / mc / mc_comm_determinism.cpp
1 /* Copyright (c) 2008-2014. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include "mc_state.h"
8 #include "mc_comm_pattern.h"
9 #include "mc_request.h"
10 #include "mc_safety.h"
11 #include "mc_private.h"
12 #include "mc_record.h"
13 #include "mc_smx.h"
14 #include "mc_client.h"
15
16 extern "C" {
17
18 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
19                                 "Logging specific to MC communication determinism detection");
20
21 /********** Global variables **********/
22
23 xbt_dynar_t initial_communications_pattern;
24 xbt_dynar_t incomplete_communications_pattern;
25
26 /********** Static functions ***********/
27
28 static e_mc_comm_pattern_difference_t compare_comm_pattern(mc_comm_pattern_t comm1, mc_comm_pattern_t comm2) {
29   if(comm1->type != comm2->type)
30     return TYPE_DIFF;
31   if (strcmp(comm1->rdv, comm2->rdv) != 0)
32     return RDV_DIFF;
33   if (comm1->src_proc != comm2->src_proc)
34     return SRC_PROC_DIFF;
35   if (comm1->dst_proc != comm2->dst_proc)
36     return DST_PROC_DIFF;
37   if (comm1->tag != comm2->tag)
38     return TAG_DIFF;
39   if (comm1->data_size != comm2->data_size)
40     return DATA_SIZE_DIFF;
41   if(comm1->data == NULL && comm2->data == NULL)
42     return NONE_DIFF;
43   if(comm1->data != NULL && comm2->data !=NULL) {
44     if (!memcmp(comm1->data, comm2->data, comm1->data_size))
45       return NONE_DIFF;
46     return DATA_DIFF;
47   }else{
48     return DATA_DIFF;
49   }
50   return NONE_DIFF;
51 }
52
53 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, mc_comm_pattern_t comm, unsigned int cursor) {
54   char *type, *res;
55
56   if(comm->type == SIMIX_COMM_SEND)
57     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
58   else
59     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
60
61   switch(diff) {
62   case TYPE_DIFF:
63     res = bprintf("%s Different type for communication #%d", type, cursor);
64     break;
65   case RDV_DIFF:
66     res = bprintf("%s Different rdv for communication #%d", type, cursor);
67     break;
68   case TAG_DIFF:
69     res = bprintf("%s Different tag for communication #%d", type, cursor);
70     break;
71   case SRC_PROC_DIFF:
72       res = bprintf("%s Different source for communication #%d", type, cursor);
73     break;
74   case DST_PROC_DIFF:
75       res = bprintf("%s Different destination for communication #%d", type, cursor);
76     break;
77   case DATA_SIZE_DIFF:
78     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
79     break;
80   case DATA_DIFF:
81     res = bprintf("%s\n Different data for communication #%d", type, cursor);
82     break;
83   default:
84     res = NULL;
85     break;
86   }
87
88   return res;
89 }
90
91 static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm_addr)
92 {
93   s_smx_synchro_t comm;
94   mc_model_checker->process().read_bytes(
95     &comm, sizeof(comm), (std::uint64_t)comm_addr);
96
97   smx_process_t src_proc = MC_smx_resolve_process(comm.comm.src_proc);
98   smx_process_t dst_proc = MC_smx_resolve_process(comm.comm.dst_proc);
99   comm_pattern->src_proc = src_proc->pid;
100   comm_pattern->dst_proc = dst_proc->pid;
101   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
102   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
103   if (comm_pattern->data_size == -1 && comm.comm.src_buff != NULL) {
104     size_t buff_size;
105     mc_model_checker->process().read_bytes(
106       &buff_size, sizeof(buff_size), (std::uint64_t)comm.comm.dst_buff_size);
107     comm_pattern->data_size = buff_size;
108     comm_pattern->data = xbt_malloc0(comm_pattern->data_size);
109     mc_model_checker->process().read_bytes(
110       comm_pattern->data, comm_pattern->data_size,
111       (std::uint64_t) comm.comm.src_buff);
112   }
113 }
114
115 static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int backtracking) {
116
117   mc_list_comm_pattern_t list =
118     xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t);
119
120   if(!backtracking){
121     mc_comm_pattern_t initial_comm =
122       xbt_dynar_get_as(list->list, list->index_comm, mc_comm_pattern_t);
123     e_mc_comm_pattern_difference_t diff =
124       compare_comm_pattern(initial_comm, comm);
125
126     if (diff != NONE_DIFF) {
127       if (comm->type == SIMIX_COMM_SEND){
128         initial_global_state->send_deterministic = 0;
129         if(initial_global_state->send_diff != NULL)
130           xbt_free(initial_global_state->send_diff);
131         initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
132       }else{
133         initial_global_state->recv_deterministic = 0;
134         if(initial_global_state->recv_diff != NULL)
135           xbt_free(initial_global_state->recv_diff);
136         initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
137       }
138       if(_sg_mc_send_determinism && !initial_global_state->send_deterministic){
139         XBT_INFO("*********************************************************");
140         XBT_INFO("***** Non-send-deterministic communications pattern *****");
141         XBT_INFO("*********************************************************");
142         XBT_INFO("%s", initial_global_state->send_diff);
143         xbt_free(initial_global_state->send_diff);
144         initial_global_state->send_diff = NULL;
145         MC_print_statistics(mc_stats);
146         xbt_abort(); 
147       }else if(_sg_mc_comms_determinism && (!initial_global_state->send_deterministic && !initial_global_state->recv_deterministic)) {
148         XBT_INFO("****************************************************");
149         XBT_INFO("***** Non-deterministic communications pattern *****");
150         XBT_INFO("****************************************************");
151         XBT_INFO("%s", initial_global_state->send_diff);
152         XBT_INFO("%s", initial_global_state->recv_diff);
153         xbt_free(initial_global_state->send_diff);
154         initial_global_state->send_diff = NULL;
155         xbt_free(initial_global_state->recv_diff);
156         initial_global_state->recv_diff = NULL;
157         MC_print_statistics(mc_stats);
158         xbt_abort();
159       } 
160     }
161   }
162     
163   MC_comm_pattern_free(comm);
164
165 }
166
167 /********** Non Static functions ***********/
168
169 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
170 {
171   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
172   mc_list_comm_pattern_t initial_pattern = xbt_dynar_get_as(
173     initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t);
174   xbt_dynar_t incomplete_pattern = xbt_dynar_get_as(
175     incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
176
177   mc_comm_pattern_t pattern = xbt_new0(s_mc_comm_pattern_t, 1);
178   pattern->data_size = -1;
179   pattern->data = NULL;
180   pattern->index =
181     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
182
183   if (call_type == MC_CALL_TYPE_SEND) {
184     /* Create comm pattern */
185     pattern->type = SIMIX_COMM_SEND;
186     pattern->comm_addr = simcall_comm_isend__get__result(request);
187
188     s_smx_synchro_t synchro = mc_model_checker->process().read<s_smx_synchro_t>(
189       (std::uint64_t) pattern->comm_addr);
190
191     char* remote_name = mc_model_checker->process().read<char*>(
192       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
193     pattern->rdv =
194       MC_process_read_string(&mc_model_checker->process(), remote_name);
195     pattern->src_proc = MC_smx_resolve_process(synchro.comm.src_proc)->pid;
196     pattern->src_host = MC_smx_process_get_host_name(issuer);
197
198     struct s_smpi_mpi_request mpi_request =
199       mc_model_checker->process().read<s_smpi_mpi_request>(
200         (std::uint64_t) simcall_comm_isend__get__data(request));
201     pattern->tag = mpi_request.tag;
202
203     if(synchro.comm.src_buff != NULL){
204       pattern->data_size = synchro.comm.src_buff_size;
205       pattern->data = xbt_malloc0(pattern->data_size);
206       mc_model_checker->process().read_bytes(
207         pattern->data, pattern->data_size, (std::uint64_t)synchro.comm.src_buff);
208     }
209     if(mpi_request.detached){
210       if (!initial_global_state->initial_communications_pattern_done) {
211         /* Store comm pattern */
212         xbt_dynar_push(
213           xbt_dynar_get_as(
214             initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
215           )->list,
216           &pattern);
217       } else {
218         /* Evaluate comm determinism */
219         deterministic_comm_pattern(pattern->src_proc, pattern, backtracking);
220         xbt_dynar_get_as(
221           initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
222         )->index_comm++;
223       }
224       return;
225     }
226   } else if (call_type == MC_CALL_TYPE_RECV) {                      
227     pattern->type = SIMIX_COMM_RECEIVE;
228     pattern->comm_addr = simcall_comm_irecv__get__result(request);
229
230     struct s_smpi_mpi_request mpi_request;
231     mc_model_checker->process().read_bytes(
232       &mpi_request, sizeof(mpi_request),
233       (std::uint64_t) simcall_comm_irecv__get__data(request));
234     pattern->tag = mpi_request.tag;
235
236     s_smx_synchro_t synchro;
237     mc_model_checker->process().read_bytes(
238       &synchro, sizeof(synchro), (std::uint64_t)pattern->comm_addr);
239
240     char* remote_name;
241     mc_model_checker->process().read_bytes(
242       &remote_name, sizeof(remote_name),
243       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
244     pattern->rdv =
245       MC_process_read_string(&mc_model_checker->process(), remote_name);
246     pattern->dst_proc = MC_smx_resolve_process(synchro.comm.dst_proc)->pid;
247     pattern->dst_host = MC_smx_process_get_host_name(issuer);
248   } else {
249     xbt_die("Unexpected call_type %i", (int) call_type);
250   }
251
252   xbt_dynar_push(
253     xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t),
254     &pattern);
255
256   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu", pattern, issuer->pid);
257 }
258
259 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) {
260   mc_comm_pattern_t current_comm_pattern;
261   unsigned int cursor = 0;
262   mc_comm_pattern_t comm_pattern;
263   int completed = 0;
264
265   /* Complete comm pattern */
266   xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern) {
267     if (current_comm_pattern->comm_addr == comm_addr) {
268       update_comm_pattern(current_comm_pattern, comm_addr);
269       completed = 1;
270       xbt_dynar_remove_at(
271         xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t),
272         cursor, &comm_pattern);
273       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
274       break;
275     }
276   }
277   if(!completed)
278     xbt_die("Corresponding communication not found!");
279
280   mc_list_comm_pattern_t pattern = xbt_dynar_get_as(
281     initial_communications_pattern, issuer, mc_list_comm_pattern_t);
282
283   if (!initial_global_state->initial_communications_pattern_done) {
284     /* Store comm pattern */
285     xbt_dynar_push(pattern->list, &comm_pattern);
286   } else {
287     /* Evaluate comm determinism */
288     deterministic_comm_pattern(issuer, comm_pattern, backtracking);
289     pattern->index_comm++;
290   }
291 }
292
293
294 /************************ Main algorithm ************************/
295
296 static void MC_modelcheck_comm_determinism_main(void);
297
298 static void MC_pre_modelcheck_comm_determinism(void)
299 {
300   mc_state_t initial_state = NULL;
301   smx_process_t process;
302   int i;
303   const int maxpid = MC_smx_get_maxpid();
304
305   if (_sg_mc_visited > 0)
306     visited_states = xbt_dynar_new(sizeof(mc_visited_state_t), visited_state_free_voidp);
307  
308   // Create initial_communications_pattern elements:
309   initial_communications_pattern = xbt_dynar_new(sizeof(mc_list_comm_pattern_t), MC_list_comm_pattern_free_voidp);
310   for (i=0; i < maxpid; i++){
311     mc_list_comm_pattern_t process_list_pattern = xbt_new0(s_mc_list_comm_pattern_t, 1);
312     process_list_pattern->list = xbt_dynar_new(sizeof(mc_comm_pattern_t), MC_comm_pattern_free_voidp);
313     process_list_pattern->index_comm = 0;
314     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
315   }
316
317   // Create incomplete_communications_pattern elements:
318   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
319   for (i=0; i < maxpid; i++){
320     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(mc_comm_pattern_t), NULL);
321     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
322   }
323
324   initial_state = MC_state_new();
325   
326   XBT_DEBUG("********* Start communication determinism verification *********");
327
328   /* Wait for requests (schedules processes) */
329   MC_wait_for_requests();
330
331   /* Get an enabled process and insert it in the interleave set of the initial state */
332   MC_EACH_SIMIX_PROCESS(process,
333     if (MC_process_is_enabled(process)) {
334       MC_state_interleave_process(initial_state, process);
335     }
336   );
337
338   xbt_fifo_unshift(mc_stack, initial_state);
339 }
340
341 static void MC_modelcheck_comm_determinism_main(void)
342 {
343
344   char *req_str = NULL;
345   int value;
346   mc_visited_state_t visited_state = NULL;
347   smx_simcall_t req = NULL;
348   smx_process_t process = NULL;
349   mc_state_t state = NULL, next_state = NULL;
350
351   while (xbt_fifo_size(mc_stack) > 0) {
352
353     /* Get current state */
354     state = (mc_state_t) xbt_fifo_get_item_content(xbt_fifo_get_first_item(mc_stack));
355
356     XBT_DEBUG("**************************************************");
357     XBT_DEBUG("Exploration depth = %d (state = %d, interleaved processes = %d)",
358               xbt_fifo_size(mc_stack), state->num,
359               MC_state_interleave_size(state));
360
361     /* Update statistics */
362     mc_stats->visited_states++;
363
364     if ((xbt_fifo_size(mc_stack) <= _sg_mc_max_depth)
365         && (req = MC_state_get_request(state, &value))
366         && (visited_state == NULL)) {
367
368       req_str = MC_request_to_string(req, value, MC_REQUEST_SIMIX);
369       XBT_DEBUG("Execute: %s", req_str);
370       xbt_free(req_str);
371       
372       if (dot_output != NULL) {
373         req_str = MC_request_get_dot_output(req, value);
374       }
375
376       MC_state_set_executed_request(state, req, value);
377       mc_stats->executed_transitions++;
378
379       /* TODO : handle test and testany simcalls */
380       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
381       if (_sg_mc_comms_determinism || _sg_mc_send_determinism) {
382         call = MC_get_call_type(req);
383       }
384
385       /* Answer the request */
386       MC_simcall_handle(req, value);    /* After this call req is no longer useful */
387
388       if(!initial_global_state->initial_communications_pattern_done)
389         MC_handle_comm_pattern(call, req, value, initial_communications_pattern, 0);
390       else
391         MC_handle_comm_pattern(call, req, value, NULL, 0);
392
393       /* Wait for requests (schedules processes) */
394       MC_wait_for_requests();
395
396       /* Create the new expanded state */
397       next_state = MC_state_new();
398
399       if ((visited_state = is_visited_state(next_state)) == NULL) {
400
401         /* Get enabled processes and insert them in the interleave set of the next state */
402         MC_EACH_SIMIX_PROCESS(process,
403           if (MC_process_is_enabled(process)) {
404             MC_state_interleave_process(next_state, process);
405           }
406         );
407
408         if (dot_output != NULL)
409           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num,  next_state->num, req_str);
410
411       } else {
412
413         if (dot_output != NULL)
414           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str);
415
416       }
417
418       xbt_fifo_unshift(mc_stack, next_state);
419
420       if (dot_output != NULL)
421         xbt_free(req_str);
422
423     } else {
424
425       if (xbt_fifo_size(mc_stack) > _sg_mc_max_depth) {
426         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
427       } else if (visited_state != NULL) {
428         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
429       } else {
430         XBT_DEBUG("There are no more processes to interleave. (depth %d)", xbt_fifo_size(mc_stack));
431       }
432
433       if (!initial_global_state->initial_communications_pattern_done) 
434         initial_global_state->initial_communications_pattern_done = 1;
435
436       /* Trash the current state, no longer needed */
437       xbt_fifo_shift(mc_stack);
438       MC_state_delete(state, !state->in_visited_states ? 1 : 0);
439       XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
440
441       visited_state = NULL;
442
443       /* Check for deadlocks */
444       if (MC_deadlock_check()) {
445         MC_show_deadlock(NULL);
446         return;
447       }
448
449       while ((state = (mc_state_t) xbt_fifo_shift(mc_stack)) != NULL) {
450         if (MC_state_interleave_size(state) && xbt_fifo_size(mc_stack) < _sg_mc_max_depth) {
451           /* We found a back-tracking point, let's loop */
452           XBT_DEBUG("Back-tracking to state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
453           xbt_fifo_unshift(mc_stack, state);
454
455           MC_replay(mc_stack);
456
457           XBT_DEBUG("Back-tracking to state %d at depth %d done", state->num, xbt_fifo_size(mc_stack));
458
459           break;
460         } else {
461           XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
462           MC_state_delete(state, !state->in_visited_states ? 1 : 0);
463         }
464       }
465     }
466   }
467
468   MC_print_statistics(mc_stats);
469   exit(0);
470 }
471
472 void MC_modelcheck_comm_determinism(void)
473 {
474   XBT_INFO("Check communication determinism");
475   mc_reduce_kind = e_mc_reduce_none;
476   MC_wait_for_requests();
477
478   if (mc_mode == MC_MODE_CLIENT) {
479     // This will move somehwere else:
480     MC_client_handle_messages();
481   }
482
483   /* Create exploration stack */
484   mc_stack = xbt_fifo_new();
485
486   MC_pre_modelcheck_comm_determinism();
487
488   initial_global_state = xbt_new0(s_mc_global_t, 1);
489   initial_global_state->snapshot = MC_take_snapshot(0);
490   initial_global_state->initial_communications_pattern_done = 0;
491   initial_global_state->recv_deterministic = 1;
492   initial_global_state->send_deterministic = 1;
493   initial_global_state->recv_diff = NULL;
494   initial_global_state->send_diff = NULL;
495
496   MC_modelcheck_comm_determinism_main();
497 }
498
499 }