Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'master' of git+ssh://scm.gforge.inria.fr//gitroot/simgrid/simgrid
[simgrid.git] / src / mc / mc_comm_determinism.cpp
1 /* Copyright (c) 2008-2015. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include <cstdint>
8
9 #include <xbt/dynar.h>
10 #include <xbt/dynar.hpp>
11 #include <xbt/fifo.h>
12 #include <xbt/log.h>
13 #include <xbt/sysdep.h>
14
15 #include "src/mc/mc_state.h"
16 #include "src/mc/mc_comm_pattern.h"
17 #include "src/mc/mc_request.h"
18 #include "src/mc/mc_safety.h"
19 #include "src/mc/mc_private.h"
20 #include "src/mc/mc_record.h"
21 #include "src/mc/mc_smx.h"
22 #include "src/mc/Client.hpp"
23 #include "src/mc/mc_exit.h"
24
25 using simgrid::mc::remote;
26
27 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
28                                 "Logging specific to MC communication determinism detection");
29
30 extern "C" {
31
32 /********** Global variables **********/
33
34 xbt_dynar_t initial_communications_pattern;
35 xbt_dynar_t incomplete_communications_pattern;
36
37 /********** Static functions ***********/
38
39 static e_mc_comm_pattern_difference_t compare_comm_pattern(mc_comm_pattern_t comm1, mc_comm_pattern_t comm2) {
40   if(comm1->type != comm2->type)
41     return TYPE_DIFF;
42   if (strcmp(comm1->rdv, comm2->rdv) != 0)
43     return RDV_DIFF;
44   if (comm1->src_proc != comm2->src_proc)
45     return SRC_PROC_DIFF;
46   if (comm1->dst_proc != comm2->dst_proc)
47     return DST_PROC_DIFF;
48   if (comm1->tag != comm2->tag)
49     return TAG_DIFF;
50   if (comm1->data_size != comm2->data_size)
51     return DATA_SIZE_DIFF;
52   if(comm1->data == nullptr && comm2->data == NULL)
53     return NONE_DIFF;
54   if(comm1->data != nullptr && comm2->data !=NULL) {
55     if (!memcmp(comm1->data, comm2->data, comm1->data_size))
56       return NONE_DIFF;
57     return DATA_DIFF;
58   } else
59     return DATA_DIFF;
60   return NONE_DIFF;
61 }
62
63 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, mc_comm_pattern_t comm, unsigned int cursor) {
64   char *type, *res;
65
66   if(comm->type == SIMIX_COMM_SEND)
67     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
68   else
69     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
70
71   switch(diff) {
72   case TYPE_DIFF:
73     res = bprintf("%s Different type for communication #%d", type, cursor);
74     break;
75   case RDV_DIFF:
76     res = bprintf("%s Different rdv for communication #%d", type, cursor);
77     break;
78   case TAG_DIFF:
79     res = bprintf("%s Different tag for communication #%d", type, cursor);
80     break;
81   case SRC_PROC_DIFF:
82       res = bprintf("%s Different source for communication #%d", type, cursor);
83     break;
84   case DST_PROC_DIFF:
85       res = bprintf("%s Different destination for communication #%d", type, cursor);
86     break;
87   case DATA_SIZE_DIFF:
88     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
89     break;
90   case DATA_DIFF:
91     res = bprintf("%s\n Different data for communication #%d", type, cursor);
92     break;
93   default:
94     res = nullptr;
95     break;
96   }
97
98   return res;
99 }
100
101 static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm_addr)
102 {
103   s_smx_synchro_t comm;
104   mc_model_checker->process().read(&comm, remote(comm_addr));
105
106   smx_process_t src_proc = MC_smx_resolve_process(comm.comm.src_proc);
107   smx_process_t dst_proc = MC_smx_resolve_process(comm.comm.dst_proc);
108   comm_pattern->src_proc = src_proc->pid;
109   comm_pattern->dst_proc = dst_proc->pid;
110   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
111   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
112   if (comm_pattern->data_size == -1 && comm.comm.src_buff != nullptr) {
113     size_t buff_size;
114     mc_model_checker->process().read(
115       &buff_size, remote(comm.comm.dst_buff_size));
116     comm_pattern->data_size = buff_size;
117     comm_pattern->data = xbt_malloc0(comm_pattern->data_size);
118     mc_model_checker->process().read_bytes(
119       comm_pattern->data, comm_pattern->data_size,
120       remote(comm.comm.src_buff));
121   }
122 }
123
124 static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int backtracking) {
125
126   mc_list_comm_pattern_t list =
127     xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t);
128
129   if(!backtracking){
130     mc_comm_pattern_t initial_comm =
131       xbt_dynar_get_as(list->list, list->index_comm, mc_comm_pattern_t);
132     e_mc_comm_pattern_difference_t diff =
133       compare_comm_pattern(initial_comm, comm);
134
135     if (diff != NONE_DIFF) {
136       if (comm->type == SIMIX_COMM_SEND){
137         initial_global_state->send_deterministic = 0;
138         if(initial_global_state->send_diff != nullptr)
139           xbt_free(initial_global_state->send_diff);
140         initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
141       }else{
142         initial_global_state->recv_deterministic = 0;
143         if(initial_global_state->recv_diff != nullptr)
144           xbt_free(initial_global_state->recv_diff);
145         initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
146       }
147       if(_sg_mc_send_determinism && !initial_global_state->send_deterministic){
148         XBT_INFO("*********************************************************");
149         XBT_INFO("***** Non-send-deterministic communications pattern *****");
150         XBT_INFO("*********************************************************");
151         XBT_INFO("%s", initial_global_state->send_diff);
152         xbt_free(initial_global_state->send_diff);
153         initial_global_state->send_diff = nullptr;
154         MC_print_statistics(mc_stats);
155         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
156       }else if(_sg_mc_comms_determinism && (!initial_global_state->send_deterministic && !initial_global_state->recv_deterministic)) {
157         XBT_INFO("****************************************************");
158         XBT_INFO("***** Non-deterministic communications pattern *****");
159         XBT_INFO("****************************************************");
160         XBT_INFO("%s", initial_global_state->send_diff);
161         XBT_INFO("%s", initial_global_state->recv_diff);
162         xbt_free(initial_global_state->send_diff);
163         initial_global_state->send_diff = nullptr;
164         xbt_free(initial_global_state->recv_diff);
165         initial_global_state->recv_diff = nullptr;
166         MC_print_statistics(mc_stats);
167         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
168       } 
169     }
170   }
171     
172   MC_comm_pattern_free(comm);
173
174 }
175
176 /********** Non Static functions ***********/
177
178 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
179 {
180   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
181   mc_list_comm_pattern_t initial_pattern = xbt_dynar_get_as(
182     initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t);
183   xbt_dynar_t incomplete_pattern = xbt_dynar_get_as(
184     incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
185
186   mc_comm_pattern_t pattern = xbt_new0(s_mc_comm_pattern_t, 1);
187   pattern->data_size = -1;
188   pattern->data = nullptr;
189   pattern->index =
190     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
191
192   if (call_type == MC_CALL_TYPE_SEND) {
193     /* Create comm pattern */
194     pattern->type = SIMIX_COMM_SEND;
195     pattern->comm_addr = simcall_comm_isend__get__result(request);
196
197     s_smx_synchro_t synchro = mc_model_checker->process().read<s_smx_synchro_t>(
198       (std::uint64_t) pattern->comm_addr);
199
200     char* remote_name = mc_model_checker->process().read<char*>(
201       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
202     pattern->rdv = mc_model_checker->process().read_string(remote_name);
203     pattern->src_proc = MC_smx_resolve_process(synchro.comm.src_proc)->pid;
204     pattern->src_host = MC_smx_process_get_host_name(issuer);
205
206     struct s_smpi_mpi_request mpi_request =
207       mc_model_checker->process().read<s_smpi_mpi_request>(
208         (std::uint64_t) simcall_comm_isend__get__data(request));
209     pattern->tag = mpi_request.tag;
210
211     if(synchro.comm.src_buff != nullptr){
212       pattern->data_size = synchro.comm.src_buff_size;
213       pattern->data = xbt_malloc0(pattern->data_size);
214       mc_model_checker->process().read_bytes(
215         pattern->data, pattern->data_size, remote(synchro.comm.src_buff));
216     }
217     if(mpi_request.detached){
218       if (!initial_global_state->initial_communications_pattern_done) {
219         /* Store comm pattern */
220         xbt_dynar_push(
221           xbt_dynar_get_as(
222             initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
223           )->list,
224           &pattern);
225       } else {
226         /* Evaluate comm determinism */
227         deterministic_comm_pattern(pattern->src_proc, pattern, backtracking);
228         xbt_dynar_get_as(
229           initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
230         )->index_comm++;
231       }
232       return;
233     }
234   } else if (call_type == MC_CALL_TYPE_RECV) {                      
235     pattern->type = SIMIX_COMM_RECEIVE;
236     pattern->comm_addr = simcall_comm_irecv__get__result(request);
237
238     struct s_smpi_mpi_request mpi_request;
239     mc_model_checker->process().read(
240       &mpi_request, remote((struct s_smpi_mpi_request*)simcall_comm_irecv__get__data(request)));
241     pattern->tag = mpi_request.tag;
242
243     s_smx_synchro_t synchro;
244     mc_model_checker->process().read(&synchro, remote(pattern->comm_addr));
245
246     char* remote_name;
247     mc_model_checker->process().read(&remote_name,
248       remote(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
249     pattern->rdv = mc_model_checker->process().read_string(remote_name);
250     pattern->dst_proc = MC_smx_resolve_process(synchro.comm.dst_proc)->pid;
251     pattern->dst_host = MC_smx_process_get_host_name(issuer);
252   } else
253     xbt_die("Unexpected call_type %i", (int) call_type);
254
255   xbt_dynar_push(
256     xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t),
257     &pattern);
258
259   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu", pattern, issuer->pid);
260 }
261
262 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) {
263   mc_comm_pattern_t current_comm_pattern;
264   unsigned int cursor = 0;
265   mc_comm_pattern_t comm_pattern;
266   int completed = 0;
267
268   /* Complete comm pattern */
269   xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern)
270     if (current_comm_pattern->comm_addr == comm_addr) {
271       update_comm_pattern(current_comm_pattern, comm_addr);
272       completed = 1;
273       xbt_dynar_remove_at(
274         xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t),
275         cursor, &comm_pattern);
276       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
277       break;
278     }
279
280   if(!completed)
281     xbt_die("Corresponding communication not found!");
282
283   mc_list_comm_pattern_t pattern = xbt_dynar_get_as(
284     initial_communications_pattern, issuer, mc_list_comm_pattern_t);
285
286   if (!initial_global_state->initial_communications_pattern_done)
287     /* Store comm pattern */
288     xbt_dynar_push(pattern->list, &comm_pattern);
289   else {
290     /* Evaluate comm determinism */
291     deterministic_comm_pattern(issuer, comm_pattern, backtracking);
292     pattern->index_comm++;
293   }
294 }
295
296
297 /************************ Main algorithm ************************/
298
299 static int MC_modelcheck_comm_determinism_main(void);
300
301 static void MC_pre_modelcheck_comm_determinism(void)
302 {
303   mc_state_t initial_state = nullptr;
304   int i;
305   const int maxpid = MC_smx_get_maxpid();
306
307   simgrid::mc::visited_states.clear();
308  
309   // Create initial_communications_pattern elements:
310   initial_communications_pattern = xbt_dynar_new(sizeof(mc_list_comm_pattern_t), MC_list_comm_pattern_free_voidp);
311   for (i=0; i < maxpid; i++){
312     mc_list_comm_pattern_t process_list_pattern = xbt_new0(s_mc_list_comm_pattern_t, 1);
313     process_list_pattern->list = xbt_dynar_new(sizeof(mc_comm_pattern_t), MC_comm_pattern_free_voidp);
314     process_list_pattern->index_comm = 0;
315     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
316   }
317
318   // Create incomplete_communications_pattern elements:
319   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
320   for (i=0; i < maxpid; i++){
321     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(mc_comm_pattern_t), nullptr);
322     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
323   }
324
325   initial_state = MC_state_new();
326   
327   XBT_DEBUG("********* Start communication determinism verification *********");
328
329   /* Wait for requests (schedules processes) */
330   mc_model_checker->wait_for_requests();
331
332   /* Get an enabled process and insert it in the interleave set of the initial state */
333   for (auto& p : mc_model_checker->process().simix_processes())
334     if (simgrid::mc::process_is_enabled(&p.copy))
335       MC_state_interleave_process(initial_state, &p.copy);
336
337   xbt_fifo_unshift(mc_stack, initial_state);
338 }
339
340 static inline
341 bool all_communications_are_finished()
342 {
343   for (size_t current_process = 1; current_process < MC_smx_get_maxpid(); current_process++) {
344     xbt_dynar_t pattern = xbt_dynar_get_as(
345       incomplete_communications_pattern, current_process, xbt_dynar_t);
346     if (!xbt_dynar_is_empty(pattern)) {
347       XBT_DEBUG("Some communications are not finished, cannot stop the exploration ! State not visited.");
348       return false;
349     }
350   }
351   return true;
352 }
353
354 static int MC_modelcheck_comm_determinism_main(void)
355 {
356
357   char *req_str = nullptr;
358   int value;
359   std::unique_ptr<simgrid::mc::VisitedState> visited_state = nullptr;
360   smx_simcall_t req = nullptr;
361   mc_state_t state = nullptr, next_state = NULL;
362
363   while (xbt_fifo_size(mc_stack) > 0) {
364
365     /* Get current state */
366     state = (mc_state_t) xbt_fifo_get_item_content(xbt_fifo_get_first_item(mc_stack));
367
368     XBT_DEBUG("**************************************************");
369     XBT_DEBUG("Exploration depth = %d (state = %d, interleaved processes = %d)",
370               xbt_fifo_size(mc_stack), state->num,
371               MC_state_interleave_size(state));
372
373     /* Update statistics */
374     mc_stats->visited_states++;
375
376     if ((xbt_fifo_size(mc_stack) <= _sg_mc_max_depth)
377         && (req = MC_state_get_request(state, &value))
378         && (visited_state == nullptr)) {
379
380       req_str = simgrid::mc::request_to_string(req, value, simgrid::mc::RequestType::simix);
381       XBT_DEBUG("Execute: %s", req_str);
382       xbt_free(req_str);
383       
384       if (dot_output != nullptr)
385         req_str = simgrid::mc::request_get_dot_output(req, value);
386
387       MC_state_set_executed_request(state, req, value);
388       mc_stats->executed_transitions++;
389
390       /* TODO : handle test and testany simcalls */
391       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
392       if (_sg_mc_comms_determinism || _sg_mc_send_determinism)
393         call = MC_get_call_type(req);
394
395       /* Answer the request */
396       simgrid::mc::handle_simcall(req, value);    /* After this call req is no longer useful */
397
398       if(!initial_global_state->initial_communications_pattern_done)
399         MC_handle_comm_pattern(call, req, value, initial_communications_pattern, 0);
400       else
401         MC_handle_comm_pattern(call, req, value, nullptr, 0);
402
403       /* Wait for requests (schedules processes) */
404       mc_model_checker->wait_for_requests();
405
406       /* Create the new expanded state */
407       next_state = MC_state_new();
408
409       /* If comm determinism verification, we cannot stop the exploration if
410          some communications are not finished (at least, data are transfered).
411          These communications  are incomplete and they cannot be analyzed and
412          compared with the initial pattern. */
413       bool compare_snapshots = all_communications_are_finished()
414         && initial_global_state->initial_communications_pattern_done;
415
416       if (_sg_mc_visited == 0 || (visited_state = simgrid::mc::is_visited_state(next_state, compare_snapshots)) == nullptr) {
417
418         /* Get enabled processes and insert them in the interleave set of the next state */
419         for (auto& p : mc_model_checker->process().simix_processes())
420           if (simgrid::mc::process_is_enabled(&p.copy))
421             MC_state_interleave_process(next_state, &p.copy);
422
423         if (dot_output != nullptr)
424           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num,  next_state->num, req_str);
425
426       } else if (dot_output != nullptr)
427         fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n",
428           state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str);
429
430       xbt_fifo_unshift(mc_stack, next_state);
431
432       if (dot_output != nullptr)
433         xbt_free(req_str);
434
435     } else {
436
437       if (xbt_fifo_size(mc_stack) > _sg_mc_max_depth)
438         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
439       else if (visited_state != nullptr)
440         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
441       else
442         XBT_DEBUG("There are no more processes to interleave. (depth %d)", xbt_fifo_size(mc_stack));
443
444       if (!initial_global_state->initial_communications_pattern_done) 
445         initial_global_state->initial_communications_pattern_done = 1;
446
447       /* Trash the current state, no longer needed */
448       xbt_fifo_shift(mc_stack);
449       MC_state_delete(state, !state->in_visited_states ? 1 : 0);
450       XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
451
452       visited_state = nullptr;
453
454       /* Check for deadlocks */
455       if (mc_model_checker->checkDeadlock()) {
456         MC_show_deadlock(nullptr);
457         return SIMGRID_MC_EXIT_DEADLOCK;
458       }
459
460       while ((state = (mc_state_t) xbt_fifo_shift(mc_stack)) != nullptr)
461         if (MC_state_interleave_size(state) && xbt_fifo_size(mc_stack) < _sg_mc_max_depth) {
462           /* We found a back-tracking point, let's loop */
463           XBT_DEBUG("Back-tracking to state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
464           xbt_fifo_unshift(mc_stack, state);
465
466           MC_replay(mc_stack);
467
468           XBT_DEBUG("Back-tracking to state %d at depth %d done", state->num, xbt_fifo_size(mc_stack));
469
470           break;
471         } else {
472           XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
473           MC_state_delete(state, !state->in_visited_states ? 1 : 0);
474         }
475
476     }
477   }
478
479   MC_print_statistics(mc_stats);
480   return SIMGRID_MC_EXIT_SUCCESS;
481 }
482
483 int MC_modelcheck_comm_determinism(void)
484 {
485   XBT_INFO("Check communication determinism");
486   mc_model_checker->wait_for_requests();
487
488   if (mc_mode == MC_MODE_CLIENT)
489     // This will move somehwere else:
490     simgrid::mc::Client::get()->handleMessages();
491
492   /* Create exploration stack */
493   mc_stack = xbt_fifo_new();
494
495   MC_pre_modelcheck_comm_determinism();
496
497   initial_global_state = xbt_new0(s_mc_global_t, 1);
498   initial_global_state->snapshot = simgrid::mc::take_snapshot(0);
499   initial_global_state->initial_communications_pattern_done = 0;
500   initial_global_state->recv_deterministic = 1;
501   initial_global_state->send_deterministic = 1;
502   initial_global_state->recv_diff = nullptr;
503   initial_global_state->send_diff = nullptr;
504
505   return MC_modelcheck_comm_determinism_main();
506 }
507
508 }