Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'master' of scm.gforge.inria.fr:/gitroot/simgrid/simgrid
[simgrid.git] / src / mc / CommunicationDeterminismChecker.cpp
1 /* Copyright (c) 2008-2015. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include <cstdint>
8
9 #include <xbt/dynar.h>
10 #include <xbt/dynar.hpp>
11 #include <xbt/log.h>
12 #include <xbt/sysdep.h>
13
14 #include "src/mc/mc_state.h"
15 #include "src/mc/mc_comm_pattern.h"
16 #include "src/mc/mc_request.h"
17 #include "src/mc/mc_safety.h"
18 #include "src/mc/mc_private.h"
19 #include "src/mc/mc_record.h"
20 #include "src/mc/mc_smx.h"
21 #include "src/mc/Client.hpp"
22 #include "src/mc/CommunicationDeterminismChecker.hpp"
23 #include "src/mc/mc_exit.h"
24 #include "src/mc/VisitedState.hpp"
25
26 using simgrid::mc::remote;
27
28 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
29                                 "Logging specific to MC communication determinism detection");
30
31 /********** Global variables **********/
32
33 xbt_dynar_t initial_communications_pattern;
34 xbt_dynar_t incomplete_communications_pattern;
35
36 /********** Static functions ***********/
37
38 static e_mc_comm_pattern_difference_t compare_comm_pattern(mc_comm_pattern_t comm1, mc_comm_pattern_t comm2) {
39   if(comm1->type != comm2->type)
40     return TYPE_DIFF;
41   if (strcmp(comm1->rdv, comm2->rdv) != 0)
42     return RDV_DIFF;
43   if (comm1->src_proc != comm2->src_proc)
44     return SRC_PROC_DIFF;
45   if (comm1->dst_proc != comm2->dst_proc)
46     return DST_PROC_DIFF;
47   if (comm1->tag != comm2->tag)
48     return TAG_DIFF;
49   if (comm1->data_size != comm2->data_size)
50     return DATA_SIZE_DIFF;
51   if(comm1->data == nullptr && comm2->data == NULL)
52     return NONE_DIFF;
53   if(comm1->data != nullptr && comm2->data !=NULL) {
54     if (!memcmp(comm1->data, comm2->data, comm1->data_size))
55       return NONE_DIFF;
56     return DATA_DIFF;
57   } else
58     return DATA_DIFF;
59   return NONE_DIFF;
60 }
61
62 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, mc_comm_pattern_t comm, unsigned int cursor) {
63   char *type, *res;
64
65   if(comm->type == SIMIX_COMM_SEND)
66     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
67   else
68     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
69
70   switch(diff) {
71   case TYPE_DIFF:
72     res = bprintf("%s Different type for communication #%d", type, cursor);
73     break;
74   case RDV_DIFF:
75     res = bprintf("%s Different rdv for communication #%d", type, cursor);
76     break;
77   case TAG_DIFF:
78     res = bprintf("%s Different tag for communication #%d", type, cursor);
79     break;
80   case SRC_PROC_DIFF:
81       res = bprintf("%s Different source for communication #%d", type, cursor);
82     break;
83   case DST_PROC_DIFF:
84       res = bprintf("%s Different destination for communication #%d", type, cursor);
85     break;
86   case DATA_SIZE_DIFF:
87     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
88     break;
89   case DATA_DIFF:
90     res = bprintf("%s\n Different data for communication #%d", type, cursor);
91     break;
92   default:
93     res = nullptr;
94     break;
95   }
96
97   return res;
98 }
99
100 static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm_addr)
101 {
102   s_smx_synchro_t comm;
103   mc_model_checker->process().read(&comm, remote(comm_addr));
104
105   smx_process_t src_proc = MC_smx_resolve_process(comm.comm.src_proc);
106   smx_process_t dst_proc = MC_smx_resolve_process(comm.comm.dst_proc);
107   comm_pattern->src_proc = src_proc->pid;
108   comm_pattern->dst_proc = dst_proc->pid;
109   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
110   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
111   if (comm_pattern->data_size == -1 && comm.comm.src_buff != nullptr) {
112     size_t buff_size;
113     mc_model_checker->process().read(
114       &buff_size, remote(comm.comm.dst_buff_size));
115     comm_pattern->data_size = buff_size;
116     comm_pattern->data = xbt_malloc0(comm_pattern->data_size);
117     mc_model_checker->process().read_bytes(
118       comm_pattern->data, comm_pattern->data_size,
119       remote(comm.comm.src_buff));
120   }
121 }
122
123 static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int backtracking) {
124
125   mc_list_comm_pattern_t list =
126     xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t);
127
128   if(!backtracking){
129     mc_comm_pattern_t initial_comm =
130       xbt_dynar_get_as(list->list, list->index_comm, mc_comm_pattern_t);
131     e_mc_comm_pattern_difference_t diff =
132       compare_comm_pattern(initial_comm, comm);
133
134     if (diff != NONE_DIFF) {
135       if (comm->type == SIMIX_COMM_SEND){
136         simgrid::mc::initial_global_state->send_deterministic = 0;
137         if(simgrid::mc::initial_global_state->send_diff != nullptr)
138           xbt_free(simgrid::mc::initial_global_state->send_diff);
139         simgrid::mc::initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
140       }else{
141         simgrid::mc::initial_global_state->recv_deterministic = 0;
142         if(simgrid::mc::initial_global_state->recv_diff != nullptr)
143           xbt_free(simgrid::mc::initial_global_state->recv_diff);
144         simgrid::mc::initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
145       }
146       if(_sg_mc_send_determinism && !simgrid::mc::initial_global_state->send_deterministic){
147         XBT_INFO("*********************************************************");
148         XBT_INFO("***** Non-send-deterministic communications pattern *****");
149         XBT_INFO("*********************************************************");
150         XBT_INFO("%s", simgrid::mc::initial_global_state->send_diff);
151         xbt_free(simgrid::mc::initial_global_state->send_diff);
152         simgrid::mc::initial_global_state->send_diff = nullptr;
153         MC_print_statistics(mc_stats);
154         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
155       }else if(_sg_mc_comms_determinism
156           && (!simgrid::mc::initial_global_state->send_deterministic
157             && !simgrid::mc::initial_global_state->recv_deterministic)) {
158         XBT_INFO("****************************************************");
159         XBT_INFO("***** Non-deterministic communications pattern *****");
160         XBT_INFO("****************************************************");
161         XBT_INFO("%s", simgrid::mc::initial_global_state->send_diff);
162         XBT_INFO("%s", simgrid::mc::initial_global_state->recv_diff);
163         xbt_free(simgrid::mc::initial_global_state->send_diff);
164         simgrid::mc::initial_global_state->send_diff = nullptr;
165         xbt_free(simgrid::mc::initial_global_state->recv_diff);
166         simgrid::mc::initial_global_state->recv_diff = nullptr;
167         MC_print_statistics(mc_stats);
168         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
169       }
170     }
171   }
172
173   MC_comm_pattern_free(comm);
174
175 }
176
177 /********** Non Static functions ***********/
178
179 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
180 {
181   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
182   mc_list_comm_pattern_t initial_pattern = xbt_dynar_get_as(
183     initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t);
184   xbt_dynar_t incomplete_pattern = xbt_dynar_get_as(
185     incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
186
187   mc_comm_pattern_t pattern = xbt_new0(s_mc_comm_pattern_t, 1);
188   pattern->data_size = -1;
189   pattern->data = nullptr;
190   pattern->index =
191     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
192
193   if (call_type == MC_CALL_TYPE_SEND) {
194     /* Create comm pattern */
195     pattern->type = SIMIX_COMM_SEND;
196     pattern->comm_addr = simcall_comm_isend__get__result(request);
197
198     s_smx_synchro_t synchro = mc_model_checker->process().read<s_smx_synchro_t>(
199       (std::uint64_t) pattern->comm_addr);
200
201     char* remote_name = mc_model_checker->process().read<char*>(
202       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
203     pattern->rdv = mc_model_checker->process().read_string(remote_name);
204     pattern->src_proc = MC_smx_resolve_process(synchro.comm.src_proc)->pid;
205     pattern->src_host = MC_smx_process_get_host_name(issuer);
206
207     struct s_smpi_mpi_request mpi_request =
208       mc_model_checker->process().read<s_smpi_mpi_request>(
209         (std::uint64_t) simcall_comm_isend__get__data(request));
210     pattern->tag = mpi_request.tag;
211
212     if(synchro.comm.src_buff != nullptr){
213       pattern->data_size = synchro.comm.src_buff_size;
214       pattern->data = xbt_malloc0(pattern->data_size);
215       mc_model_checker->process().read_bytes(
216         pattern->data, pattern->data_size, remote(synchro.comm.src_buff));
217     }
218     if(mpi_request.detached){
219       if (!simgrid::mc::initial_global_state->initial_communications_pattern_done) {
220         /* Store comm pattern */
221         xbt_dynar_push(
222           xbt_dynar_get_as(
223             initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
224           )->list,
225           &pattern);
226       } else {
227         /* Evaluate comm determinism */
228         deterministic_comm_pattern(pattern->src_proc, pattern, backtracking);
229         xbt_dynar_get_as(
230           initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
231         )->index_comm++;
232       }
233       return;
234     }
235   } else if (call_type == MC_CALL_TYPE_RECV) {
236     pattern->type = SIMIX_COMM_RECEIVE;
237     pattern->comm_addr = simcall_comm_irecv__get__result(request);
238
239     struct s_smpi_mpi_request mpi_request;
240     mc_model_checker->process().read(
241       &mpi_request, remote((struct s_smpi_mpi_request*)simcall_comm_irecv__get__data(request)));
242     pattern->tag = mpi_request.tag;
243
244     s_smx_synchro_t synchro;
245     mc_model_checker->process().read(&synchro, remote(pattern->comm_addr));
246
247     char* remote_name;
248     mc_model_checker->process().read(&remote_name,
249       remote(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
250     pattern->rdv = mc_model_checker->process().read_string(remote_name);
251     pattern->dst_proc = MC_smx_resolve_process(synchro.comm.dst_proc)->pid;
252     pattern->dst_host = MC_smx_process_get_host_name(issuer);
253   } else
254     xbt_die("Unexpected call_type %i", (int) call_type);
255
256   xbt_dynar_push(
257     xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t),
258     &pattern);
259
260   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu", pattern, issuer->pid);
261 }
262
263 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) {
264   mc_comm_pattern_t current_comm_pattern;
265   unsigned int cursor = 0;
266   mc_comm_pattern_t comm_pattern;
267   int completed = 0;
268
269   /* Complete comm pattern */
270   xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern)
271     if (current_comm_pattern->comm_addr == comm_addr) {
272       update_comm_pattern(current_comm_pattern, comm_addr);
273       completed = 1;
274       xbt_dynar_remove_at(
275         xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t),
276         cursor, &comm_pattern);
277       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
278       break;
279     }
280
281   if(!completed)
282     xbt_die("Corresponding communication not found!");
283
284   mc_list_comm_pattern_t pattern = xbt_dynar_get_as(
285     initial_communications_pattern, issuer, mc_list_comm_pattern_t);
286
287   if (!simgrid::mc::initial_global_state->initial_communications_pattern_done)
288     /* Store comm pattern */
289     xbt_dynar_push(pattern->list, &comm_pattern);
290   else {
291     /* Evaluate comm determinism */
292     deterministic_comm_pattern(issuer, comm_pattern, backtracking);
293     pattern->index_comm++;
294   }
295 }
296
297
298 /************************ Main algorithm ************************/
299
300 namespace simgrid {
301 namespace mc {
302
303 CommunicationDeterminismChecker::CommunicationDeterminismChecker(Session& session)
304   : Checker(session)
305 {
306
307 }
308
309 CommunicationDeterminismChecker::~CommunicationDeterminismChecker()
310 {
311
312 }
313
314 // TODO, deduplicate with SafetyChecker
315 RecordTrace CommunicationDeterminismChecker::getRecordTrace() // override
316 {
317   RecordTrace res;
318   for (auto const& state : stack_) {
319     int value = 0;
320     smx_simcall_t saved_req = MC_state_get_executed_request(state.get(), &value);
321     const smx_process_t issuer = MC_smx_simcall_get_issuer(saved_req);
322     const int pid = issuer->pid;
323     res.push_back(RecordTraceElement(pid, value));
324   }
325   return res;
326 }
327
328 // TODO, deduplicate with SafetyChecker
329 std::vector<std::string> CommunicationDeterminismChecker::getTextualTrace() // override
330 {
331   std::vector<std::string> trace;
332   for (auto const& state : stack_) {
333     int value;
334     smx_simcall_t req = MC_state_get_executed_request(state.get(), &value);
335     if (req) {
336       char* req_str = simgrid::mc::request_to_string(
337         req, value, simgrid::mc::RequestType::executed);
338       trace.push_back(req_str);
339       xbt_free(req_str);
340     }
341   }
342   return trace;
343 }
344
345 void CommunicationDeterminismChecker::prepare()
346 {
347
348   int i;
349   const int maxpid = MC_smx_get_maxpid();
350
351   // Create initial_communications_pattern elements:
352   initial_communications_pattern = xbt_dynar_new(sizeof(mc_list_comm_pattern_t), MC_list_comm_pattern_free_voidp);
353   for (i=0; i < maxpid; i++){
354     mc_list_comm_pattern_t process_list_pattern = xbt_new0(s_mc_list_comm_pattern_t, 1);
355     process_list_pattern->list = xbt_dynar_new(sizeof(mc_comm_pattern_t), MC_comm_pattern_free_voidp);
356     process_list_pattern->index_comm = 0;
357     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
358   }
359
360   // Create incomplete_communications_pattern elements:
361   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
362   for (i=0; i < maxpid; i++){
363     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(mc_comm_pattern_t), nullptr);
364     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
365   }
366
367   std::unique_ptr<simgrid::mc::State> initial_state =
368     std::unique_ptr<simgrid::mc::State>(MC_state_new());
369
370   XBT_DEBUG("********* Start communication determinism verification *********");
371
372   /* Wait for requests (schedules processes) */
373   mc_model_checker->wait_for_requests();
374
375   /* Get an enabled process and insert it in the interleave set of the initial state */
376   for (auto& p : mc_model_checker->process().simix_processes())
377     if (simgrid::mc::process_is_enabled(&p.copy))
378       MC_state_interleave_process(initial_state.get(), &p.copy);
379
380   stack_.push_back(std::move(initial_state));
381 }
382
383 static inline
384 bool all_communications_are_finished()
385 {
386   for (size_t current_process = 1; current_process < MC_smx_get_maxpid(); current_process++) {
387     xbt_dynar_t pattern = xbt_dynar_get_as(
388       incomplete_communications_pattern, current_process, xbt_dynar_t);
389     if (!xbt_dynar_is_empty(pattern)) {
390       XBT_DEBUG("Some communications are not finished, cannot stop the exploration ! State not visited.");
391       return false;
392     }
393   }
394   return true;
395 }
396
397 int CommunicationDeterminismChecker::main(void)
398 {
399
400   char *req_str = nullptr;
401   int value;
402   std::unique_ptr<simgrid::mc::VisitedState> visited_state = nullptr;
403   smx_simcall_t req = nullptr;
404
405   while (!stack_.empty()) {
406
407     /* Get current state */
408     simgrid::mc::State* state = stack_.back().get();
409
410     XBT_DEBUG("**************************************************");
411     XBT_DEBUG("Exploration depth = %zi (state = %d, interleaved processes = %d)",
412               stack_.size(), state->num,
413               MC_state_interleave_size(state));
414
415     /* Update statistics */
416     mc_stats->visited_states++;
417
418     if (stack_.size() <= (std::size_t) _sg_mc_max_depth
419         && (req = MC_state_get_request(state, &value))
420         && (visited_state == nullptr)) {
421
422       req_str = simgrid::mc::request_to_string(req, value, simgrid::mc::RequestType::simix);
423       XBT_DEBUG("Execute: %s", req_str);
424       xbt_free(req_str);
425
426       if (dot_output != nullptr)
427         req_str = simgrid::mc::request_get_dot_output(req, value);
428
429       MC_state_set_executed_request(state, req, value);
430       mc_stats->executed_transitions++;
431
432       /* TODO : handle test and testany simcalls */
433       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
434       if (_sg_mc_comms_determinism || _sg_mc_send_determinism)
435         call = MC_get_call_type(req);
436
437       /* Answer the request */
438       simgrid::mc::handle_simcall(req, value);    /* After this call req is no longer useful */
439
440       if(!initial_global_state->initial_communications_pattern_done)
441         MC_handle_comm_pattern(call, req, value, initial_communications_pattern, 0);
442       else
443         MC_handle_comm_pattern(call, req, value, nullptr, 0);
444
445       /* Wait for requests (schedules processes) */
446       mc_model_checker->wait_for_requests();
447
448       /* Create the new expanded state */
449       std::unique_ptr<simgrid::mc::State> next_state =
450         std::unique_ptr<simgrid::mc::State>(MC_state_new());
451
452       /* If comm determinism verification, we cannot stop the exploration if
453          some communications are not finished (at least, data are transfered).
454          These communications  are incomplete and they cannot be analyzed and
455          compared with the initial pattern. */
456       bool compare_snapshots = all_communications_are_finished()
457         && initial_global_state->initial_communications_pattern_done;
458
459       if (_sg_mc_visited == 0
460           || (visited_state = visitedStates_.addVisitedState(next_state.get(), compare_snapshots)) == nullptr) {
461
462         /* Get enabled processes and insert them in the interleave set of the next state */
463         for (auto& p : mc_model_checker->process().simix_processes())
464           if (simgrid::mc::process_is_enabled(&p.copy))
465             MC_state_interleave_process(next_state.get(), &p.copy);
466
467         if (dot_output != nullptr)
468           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num,  next_state->num, req_str);
469
470       } else if (dot_output != nullptr)
471         fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n",
472           state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str);
473
474       stack_.push_back(std::move(next_state));
475
476       if (dot_output != nullptr)
477         xbt_free(req_str);
478
479     } else {
480
481       if (stack_.size() > (std::size_t) _sg_mc_max_depth)
482         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
483       else if (visited_state != nullptr)
484         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
485       else
486         XBT_DEBUG("There are no more processes to interleave. (depth %zi)",
487           stack_.size());
488
489       if (!initial_global_state->initial_communications_pattern_done)
490         initial_global_state->initial_communications_pattern_done = 1;
491
492       /* Trash the current state, no longer needed */
493       XBT_DEBUG("Delete state %d at depth %zi",
494         state->num, stack_.size());
495       stack_.pop_back();
496
497       visited_state = nullptr;
498
499       /* Check for deadlocks */
500       if (mc_model_checker->checkDeadlock()) {
501         MC_show_deadlock();
502         return SIMGRID_MC_EXIT_DEADLOCK;
503       }
504
505       while (!stack_.empty()) {
506         std::unique_ptr<simgrid::mc::State> state = std::move(stack_.back());
507         stack_.pop_back();
508         if (MC_state_interleave_size(state.get())
509             && stack_.size() < (std::size_t) _sg_mc_max_depth) {
510           /* We found a back-tracking point, let's loop */
511           XBT_DEBUG("Back-tracking to state %d at depth %zi",
512             state->num, stack_.size() + 1);
513           stack_.push_back(std::move(state));
514
515           simgrid::mc::replay(stack_);
516
517           XBT_DEBUG("Back-tracking to state %d at depth %zi done",
518             stack_.back()->num, stack_.size());
519
520           break;
521         } else {
522           XBT_DEBUG("Delete state %d at depth %zi",
523             state->num, stack_.size() + 1);
524         }
525       }
526     }
527   }
528
529   MC_print_statistics(mc_stats);
530   return SIMGRID_MC_EXIT_SUCCESS;
531 }
532
533 int CommunicationDeterminismChecker::run()
534 {
535   XBT_INFO("Check communication determinism");
536   mc_model_checker->wait_for_requests();
537
538   if (mc_mode == MC_MODE_CLIENT)
539     // This will move somehwere else:
540     simgrid::mc::Client::get()->handleMessages();
541
542   this->prepare();
543
544   initial_global_state = std::unique_ptr<s_mc_global_t>(new s_mc_global_t());
545   initial_global_state->snapshot = simgrid::mc::take_snapshot(0);
546   initial_global_state->initial_communications_pattern_done = 0;
547   initial_global_state->recv_deterministic = 1;
548   initial_global_state->send_deterministic = 1;
549   initial_global_state->recv_diff = nullptr;
550   initial_global_state->send_diff = nullptr;
551
552   int res = this->main();
553   initial_global_state = nullptr;
554   return res;
555 }
556
557 Checker* createCommunicationDeterminismChecker(Session& session)
558 {
559   return new CommunicationDeterminismChecker(session);
560 }
561
562 }
563 }