Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
623365919ebc05c31de2a53ca8614264df1aa2f9
[simgrid.git] / src / mc / mc_comm_determinism.cpp
1 /* Copyright (c) 2008-2015. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include <cstdint>
8
9 #include <xbt/dynar.h>
10 #include <xbt/fifo.h>
11 #include <xbt/log.h>
12 #include <xbt/sysdep.h>
13
14 #include "src/mc/mc_state.h"
15 #include "src/mc/mc_comm_pattern.h"
16 #include "src/mc/mc_request.h"
17 #include "src/mc/mc_safety.h"
18 #include "src/mc/mc_private.h"
19 #include "src/mc/mc_record.h"
20 #include "src/mc/mc_smx.h"
21 #include "src/mc/mc_client.h"
22 #include "src/mc/mc_exit.h"
23
24 using simgrid::mc::remote;
25
26 extern "C" {
27
28 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
29                                 "Logging specific to MC communication determinism detection");
30
31 /********** Global variables **********/
32
33 xbt_dynar_t initial_communications_pattern;
34 xbt_dynar_t incomplete_communications_pattern;
35
36 /********** Static functions ***********/
37
38 static e_mc_comm_pattern_difference_t compare_comm_pattern(mc_comm_pattern_t comm1, mc_comm_pattern_t comm2) {
39   if(comm1->type != comm2->type)
40     return TYPE_DIFF;
41   if (strcmp(comm1->rdv, comm2->rdv) != 0)
42     return RDV_DIFF;
43   if (comm1->src_proc != comm2->src_proc)
44     return SRC_PROC_DIFF;
45   if (comm1->dst_proc != comm2->dst_proc)
46     return DST_PROC_DIFF;
47   if (comm1->tag != comm2->tag)
48     return TAG_DIFF;
49   if (comm1->data_size != comm2->data_size)
50     return DATA_SIZE_DIFF;
51   if(comm1->data == nullptr && comm2->data == NULL)
52     return NONE_DIFF;
53   if(comm1->data != nullptr && comm2->data !=NULL) {
54     if (!memcmp(comm1->data, comm2->data, comm1->data_size))
55       return NONE_DIFF;
56     return DATA_DIFF;
57   } else
58     return DATA_DIFF;
59   return NONE_DIFF;
60 }
61
62 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, mc_comm_pattern_t comm, unsigned int cursor) {
63   char *type, *res;
64
65   if(comm->type == SIMIX_COMM_SEND)
66     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
67   else
68     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
69
70   switch(diff) {
71   case TYPE_DIFF:
72     res = bprintf("%s Different type for communication #%d", type, cursor);
73     break;
74   case RDV_DIFF:
75     res = bprintf("%s Different rdv for communication #%d", type, cursor);
76     break;
77   case TAG_DIFF:
78     res = bprintf("%s Different tag for communication #%d", type, cursor);
79     break;
80   case SRC_PROC_DIFF:
81       res = bprintf("%s Different source for communication #%d", type, cursor);
82     break;
83   case DST_PROC_DIFF:
84       res = bprintf("%s Different destination for communication #%d", type, cursor);
85     break;
86   case DATA_SIZE_DIFF:
87     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
88     break;
89   case DATA_DIFF:
90     res = bprintf("%s\n Different data for communication #%d", type, cursor);
91     break;
92   default:
93     res = nullptr;
94     break;
95   }
96
97   return res;
98 }
99
100 static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm_addr)
101 {
102   s_smx_synchro_t comm;
103   mc_model_checker->process().read(&comm, remote(comm_addr));
104
105   smx_process_t src_proc = MC_smx_resolve_process(comm.comm.src_proc);
106   smx_process_t dst_proc = MC_smx_resolve_process(comm.comm.dst_proc);
107   comm_pattern->src_proc = src_proc->pid;
108   comm_pattern->dst_proc = dst_proc->pid;
109   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
110   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
111   if (comm_pattern->data_size == -1 && comm.comm.src_buff != nullptr) {
112     size_t buff_size;
113     mc_model_checker->process().read(
114       &buff_size, remote(comm.comm.dst_buff_size));
115     comm_pattern->data_size = buff_size;
116     comm_pattern->data = xbt_malloc0(comm_pattern->data_size);
117     mc_model_checker->process().read_bytes(
118       comm_pattern->data, comm_pattern->data_size,
119       remote(comm.comm.src_buff));
120   }
121 }
122
123 static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int backtracking) {
124
125   mc_list_comm_pattern_t list =
126     xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t);
127
128   if(!backtracking){
129     mc_comm_pattern_t initial_comm =
130       xbt_dynar_get_as(list->list, list->index_comm, mc_comm_pattern_t);
131     e_mc_comm_pattern_difference_t diff =
132       compare_comm_pattern(initial_comm, comm);
133
134     if (diff != NONE_DIFF) {
135       if (comm->type == SIMIX_COMM_SEND){
136         initial_global_state->send_deterministic = 0;
137         if(initial_global_state->send_diff != nullptr)
138           xbt_free(initial_global_state->send_diff);
139         initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
140       }else{
141         initial_global_state->recv_deterministic = 0;
142         if(initial_global_state->recv_diff != nullptr)
143           xbt_free(initial_global_state->recv_diff);
144         initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
145       }
146       if(_sg_mc_send_determinism && !initial_global_state->send_deterministic){
147         XBT_INFO("*********************************************************");
148         XBT_INFO("***** Non-send-deterministic communications pattern *****");
149         XBT_INFO("*********************************************************");
150         XBT_INFO("%s", initial_global_state->send_diff);
151         xbt_free(initial_global_state->send_diff);
152         initial_global_state->send_diff = nullptr;
153         MC_print_statistics(mc_stats);
154         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
155       }else if(_sg_mc_comms_determinism && (!initial_global_state->send_deterministic && !initial_global_state->recv_deterministic)) {
156         XBT_INFO("****************************************************");
157         XBT_INFO("***** Non-deterministic communications pattern *****");
158         XBT_INFO("****************************************************");
159         XBT_INFO("%s", initial_global_state->send_diff);
160         XBT_INFO("%s", initial_global_state->recv_diff);
161         xbt_free(initial_global_state->send_diff);
162         initial_global_state->send_diff = nullptr;
163         xbt_free(initial_global_state->recv_diff);
164         initial_global_state->recv_diff = nullptr;
165         MC_print_statistics(mc_stats);
166         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
167       } 
168     }
169   }
170     
171   MC_comm_pattern_free(comm);
172
173 }
174
175 /********** Non Static functions ***********/
176
177 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
178 {
179   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
180   mc_list_comm_pattern_t initial_pattern = xbt_dynar_get_as(
181     initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t);
182   xbt_dynar_t incomplete_pattern = xbt_dynar_get_as(
183     incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
184
185   mc_comm_pattern_t pattern = xbt_new0(s_mc_comm_pattern_t, 1);
186   pattern->data_size = -1;
187   pattern->data = nullptr;
188   pattern->index =
189     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
190
191   if (call_type == MC_CALL_TYPE_SEND) {
192     /* Create comm pattern */
193     pattern->type = SIMIX_COMM_SEND;
194     pattern->comm_addr = simcall_comm_isend__get__result(request);
195
196     s_smx_synchro_t synchro = mc_model_checker->process().read<s_smx_synchro_t>(
197       (std::uint64_t) pattern->comm_addr);
198
199     char* remote_name = mc_model_checker->process().read<char*>(
200       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
201     pattern->rdv = mc_model_checker->process().read_string(remote_name);
202     pattern->src_proc = MC_smx_resolve_process(synchro.comm.src_proc)->pid;
203     pattern->src_host = MC_smx_process_get_host_name(issuer);
204
205     struct s_smpi_mpi_request mpi_request =
206       mc_model_checker->process().read<s_smpi_mpi_request>(
207         (std::uint64_t) simcall_comm_isend__get__data(request));
208     pattern->tag = mpi_request.tag;
209
210     if(synchro.comm.src_buff != nullptr){
211       pattern->data_size = synchro.comm.src_buff_size;
212       pattern->data = xbt_malloc0(pattern->data_size);
213       mc_model_checker->process().read_bytes(
214         pattern->data, pattern->data_size, remote(synchro.comm.src_buff));
215     }
216     if(mpi_request.detached){
217       if (!initial_global_state->initial_communications_pattern_done) {
218         /* Store comm pattern */
219         xbt_dynar_push(
220           xbt_dynar_get_as(
221             initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
222           )->list,
223           &pattern);
224       } else {
225         /* Evaluate comm determinism */
226         deterministic_comm_pattern(pattern->src_proc, pattern, backtracking);
227         xbt_dynar_get_as(
228           initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
229         )->index_comm++;
230       }
231       return;
232     }
233   } else if (call_type == MC_CALL_TYPE_RECV) {                      
234     pattern->type = SIMIX_COMM_RECEIVE;
235     pattern->comm_addr = simcall_comm_irecv__get__result(request);
236
237     struct s_smpi_mpi_request mpi_request;
238     mc_model_checker->process().read(
239       &mpi_request, remote((struct s_smpi_mpi_request*)simcall_comm_irecv__get__data(request)));
240     pattern->tag = mpi_request.tag;
241
242     s_smx_synchro_t synchro;
243     mc_model_checker->process().read(&synchro, remote(pattern->comm_addr));
244
245     char* remote_name;
246     mc_model_checker->process().read(&remote_name,
247       remote(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
248     pattern->rdv = mc_model_checker->process().read_string(remote_name);
249     pattern->dst_proc = MC_smx_resolve_process(synchro.comm.dst_proc)->pid;
250     pattern->dst_host = MC_smx_process_get_host_name(issuer);
251   } else
252     xbt_die("Unexpected call_type %i", (int) call_type);
253
254   xbt_dynar_push(
255     xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t),
256     &pattern);
257
258   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu", pattern, issuer->pid);
259 }
260
261 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) {
262   mc_comm_pattern_t current_comm_pattern;
263   unsigned int cursor = 0;
264   mc_comm_pattern_t comm_pattern;
265   int completed = 0;
266
267   /* Complete comm pattern */
268   xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern)
269     if (current_comm_pattern->comm_addr == comm_addr) {
270       update_comm_pattern(current_comm_pattern, comm_addr);
271       completed = 1;
272       xbt_dynar_remove_at(
273         xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t),
274         cursor, &comm_pattern);
275       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
276       break;
277     }
278
279   if(!completed)
280     xbt_die("Corresponding communication not found!");
281
282   mc_list_comm_pattern_t pattern = xbt_dynar_get_as(
283     initial_communications_pattern, issuer, mc_list_comm_pattern_t);
284
285   if (!initial_global_state->initial_communications_pattern_done)
286     /* Store comm pattern */
287     xbt_dynar_push(pattern->list, &comm_pattern);
288   else {
289     /* Evaluate comm determinism */
290     deterministic_comm_pattern(issuer, comm_pattern, backtracking);
291     pattern->index_comm++;
292   }
293 }
294
295
296 /************************ Main algorithm ************************/
297
298 static int MC_modelcheck_comm_determinism_main(void);
299
300 static void MC_pre_modelcheck_comm_determinism(void)
301 {
302   mc_state_t initial_state = nullptr;
303   int i;
304   const int maxpid = MC_smx_get_maxpid();
305
306   if (_sg_mc_visited > 0)
307     visited_states = xbt_dynar_new(sizeof(mc_visited_state_t), visited_state_free_voidp);
308  
309   // Create initial_communications_pattern elements:
310   initial_communications_pattern = xbt_dynar_new(sizeof(mc_list_comm_pattern_t), MC_list_comm_pattern_free_voidp);
311   for (i=0; i < maxpid; i++){
312     mc_list_comm_pattern_t process_list_pattern = xbt_new0(s_mc_list_comm_pattern_t, 1);
313     process_list_pattern->list = xbt_dynar_new(sizeof(mc_comm_pattern_t), MC_comm_pattern_free_voidp);
314     process_list_pattern->index_comm = 0;
315     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
316   }
317
318   // Create incomplete_communications_pattern elements:
319   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
320   for (i=0; i < maxpid; i++){
321     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(mc_comm_pattern_t), nullptr);
322     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
323   }
324
325   initial_state = MC_state_new();
326   
327   XBT_DEBUG("********* Start communication determinism verification *********");
328
329   /* Wait for requests (schedules processes) */
330   mc_model_checker->wait_for_requests();
331
332   /* Get an enabled process and insert it in the interleave set of the initial state */
333   for (auto& p : mc_model_checker->process().simix_processes())
334     if (MC_process_is_enabled(&p.copy))
335       MC_state_interleave_process(initial_state, &p.copy);
336
337   xbt_fifo_unshift(mc_stack, initial_state);
338 }
339
340 static int MC_modelcheck_comm_determinism_main(void)
341 {
342
343   char *req_str = nullptr;
344   int value;
345   mc_visited_state_t visited_state = nullptr;
346   smx_simcall_t req = nullptr;
347   mc_state_t state = nullptr, next_state = NULL;
348
349   while (xbt_fifo_size(mc_stack) > 0) {
350
351     /* Get current state */
352     state = (mc_state_t) xbt_fifo_get_item_content(xbt_fifo_get_first_item(mc_stack));
353
354     XBT_DEBUG("**************************************************");
355     XBT_DEBUG("Exploration depth = %d (state = %d, interleaved processes = %d)",
356               xbt_fifo_size(mc_stack), state->num,
357               MC_state_interleave_size(state));
358
359     /* Update statistics */
360     mc_stats->visited_states++;
361
362     if ((xbt_fifo_size(mc_stack) <= _sg_mc_max_depth)
363         && (req = MC_state_get_request(state, &value))
364         && (visited_state == nullptr)) {
365
366       req_str = MC_request_to_string(req, value, MC_REQUEST_SIMIX);
367       XBT_DEBUG("Execute: %s", req_str);
368       xbt_free(req_str);
369       
370       if (dot_output != nullptr)
371         req_str = MC_request_get_dot_output(req, value);
372
373       MC_state_set_executed_request(state, req, value);
374       mc_stats->executed_transitions++;
375
376       /* TODO : handle test and testany simcalls */
377       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
378       if (_sg_mc_comms_determinism || _sg_mc_send_determinism)
379         call = MC_get_call_type(req);
380
381       /* Answer the request */
382       MC_simcall_handle(req, value);    /* After this call req is no longer useful */
383
384       if(!initial_global_state->initial_communications_pattern_done)
385         MC_handle_comm_pattern(call, req, value, initial_communications_pattern, 0);
386       else
387         MC_handle_comm_pattern(call, req, value, nullptr, 0);
388
389       /* Wait for requests (schedules processes) */
390       mc_model_checker->wait_for_requests();
391
392       /* Create the new expanded state */
393       next_state = MC_state_new();
394
395       if ((visited_state = is_visited_state(next_state)) == nullptr) {
396
397         /* Get enabled processes and insert them in the interleave set of the next state */
398         for (auto& p : mc_model_checker->process().simix_processes())
399           if (MC_process_is_enabled(&p.copy))
400             MC_state_interleave_process(next_state, &p.copy);
401
402         if (dot_output != nullptr)
403           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num,  next_state->num, req_str);
404
405       } else if (dot_output != nullptr)
406         fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n",
407           state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str);
408
409       xbt_fifo_unshift(mc_stack, next_state);
410
411       if (dot_output != nullptr)
412         xbt_free(req_str);
413
414     } else {
415
416       if (xbt_fifo_size(mc_stack) > _sg_mc_max_depth)
417         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
418       else if (visited_state != nullptr)
419         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
420       else
421         XBT_DEBUG("There are no more processes to interleave. (depth %d)", xbt_fifo_size(mc_stack));
422
423       if (!initial_global_state->initial_communications_pattern_done) 
424         initial_global_state->initial_communications_pattern_done = 1;
425
426       /* Trash the current state, no longer needed */
427       xbt_fifo_shift(mc_stack);
428       MC_state_delete(state, !state->in_visited_states ? 1 : 0);
429       XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
430
431       visited_state = nullptr;
432
433       /* Check for deadlocks */
434       if (MC_deadlock_check()) {
435         MC_show_deadlock(nullptr);
436         return SIMGRID_MC_EXIT_DEADLOCK;
437       }
438
439       while ((state = (mc_state_t) xbt_fifo_shift(mc_stack)) != nullptr)
440         if (MC_state_interleave_size(state) && xbt_fifo_size(mc_stack) < _sg_mc_max_depth) {
441           /* We found a back-tracking point, let's loop */
442           XBT_DEBUG("Back-tracking to state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
443           xbt_fifo_unshift(mc_stack, state);
444
445           MC_replay(mc_stack);
446
447           XBT_DEBUG("Back-tracking to state %d at depth %d done", state->num, xbt_fifo_size(mc_stack));
448
449           break;
450         } else {
451           XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
452           MC_state_delete(state, !state->in_visited_states ? 1 : 0);
453         }
454
455     }
456   }
457
458   MC_print_statistics(mc_stats);
459   return SIMGRID_MC_EXIT_SUCCESS;
460 }
461
462 int MC_modelcheck_comm_determinism(void)
463 {
464   XBT_INFO("Check communication determinism");
465   mc_reduce_kind = e_mc_reduce_none;
466   mc_model_checker->wait_for_requests();
467
468   if (mc_mode == MC_MODE_CLIENT)
469     // This will move somehwere else:
470     MC_client_handle_messages();
471
472   /* Create exploration stack */
473   mc_stack = xbt_fifo_new();
474
475   MC_pre_modelcheck_comm_determinism();
476
477   initial_global_state = xbt_new0(s_mc_global_t, 1);
478   initial_global_state->snapshot = simgrid::mc::take_snapshot(0);
479   initial_global_state->initial_communications_pattern_done = 0;
480   initial_global_state->recv_deterministic = 1;
481   initial_global_state->send_deterministic = 1;
482   initial_global_state->recv_diff = nullptr;
483   initial_global_state->send_diff = nullptr;
484
485   return MC_modelcheck_comm_determinism_main();
486 }
487
488 }