Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
simgrid::config::flag, a more declarative way to describe CLI flags
[simgrid.git] / src / mc / CommunicationDeterminismChecker.cpp
1 /* Copyright (c) 2008-2015. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include <cstdint>
8
9 #include <xbt/dynar.h>
10 #include <xbt/dynar.hpp>
11 #include <xbt/log.h>
12 #include <xbt/sysdep.h>
13
14 #include "src/mc/mc_state.h"
15 #include "src/mc/mc_comm_pattern.h"
16 #include "src/mc/mc_request.h"
17 #include "src/mc/mc_safety.h"
18 #include "src/mc/mc_private.h"
19 #include "src/mc/mc_record.h"
20 #include "src/mc/mc_smx.h"
21 #include "src/mc/Client.hpp"
22 #include "src/mc/CommunicationDeterminismChecker.hpp"
23 #include "src/mc/mc_exit.h"
24 #include "src/mc/VisitedState.hpp"
25 #include "src/mc/Transition.hpp"
26
27 using simgrid::mc::remote;
28
29 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
30                                 "Logging specific to MC communication determinism detection");
31
32 /********** Global variables **********/
33
34 xbt_dynar_t initial_communications_pattern;
35 xbt_dynar_t incomplete_communications_pattern;
36
37 /********** Static functions ***********/
38
39 static e_mc_comm_pattern_difference_t compare_comm_pattern(simgrid::mc::PatternCommunication* comm1, simgrid::mc::PatternCommunication* comm2) {
40   if(comm1->type != comm2->type)
41     return TYPE_DIFF;
42   if (comm1->rdv != comm2->rdv)
43     return RDV_DIFF;
44   if (comm1->src_proc != comm2->src_proc)
45     return SRC_PROC_DIFF;
46   if (comm1->dst_proc != comm2->dst_proc)
47     return DST_PROC_DIFF;
48   if (comm1->tag != comm2->tag)
49     return TAG_DIFF;
50   if (comm1->data.size() != comm2->data.size())
51     return DATA_SIZE_DIFF;
52   if (comm1->data != comm2->data)
53     return DATA_DIFF;
54   return NONE_DIFF;
55 }
56
57 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, simgrid::mc::PatternCommunication* comm, unsigned int cursor) {
58   char *type, *res;
59
60   if(comm->type == SIMIX_COMM_SEND)
61     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
62   else
63     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
64
65   switch(diff) {
66   case TYPE_DIFF:
67     res = bprintf("%s Different type for communication #%d", type, cursor);
68     break;
69   case RDV_DIFF:
70     res = bprintf("%s Different rdv for communication #%d", type, cursor);
71     break;
72   case TAG_DIFF:
73     res = bprintf("%s Different tag for communication #%d", type, cursor);
74     break;
75   case SRC_PROC_DIFF:
76       res = bprintf("%s Different source for communication #%d", type, cursor);
77     break;
78   case DST_PROC_DIFF:
79       res = bprintf("%s Different destination for communication #%d", type, cursor);
80     break;
81   case DATA_SIZE_DIFF:
82     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
83     break;
84   case DATA_DIFF:
85     res = bprintf("%s\n Different data for communication #%d", type, cursor);
86     break;
87   default:
88     res = nullptr;
89     break;
90   }
91
92   return res;
93 }
94
95 static void update_comm_pattern(simgrid::mc::PatternCommunication* comm_pattern, smx_synchro_t comm_addr)
96 {
97   s_smx_synchro_t comm;
98   mc_model_checker->process().read(&comm, remote(comm_addr));
99
100   smx_process_t src_proc = mc_model_checker->process().resolveProcess(
101     simgrid::mc::remote(comm.comm.src_proc));
102   smx_process_t dst_proc = mc_model_checker->process().resolveProcess(
103     simgrid::mc::remote(comm.comm.dst_proc));
104   comm_pattern->src_proc = src_proc->pid;
105   comm_pattern->dst_proc = dst_proc->pid;
106   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
107   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
108   if (comm_pattern->data.size() == 0 && comm.comm.src_buff != nullptr) {
109     size_t buff_size;
110     mc_model_checker->process().read(
111       &buff_size, remote(comm.comm.dst_buff_size));
112     comm_pattern->data.resize(buff_size);
113     mc_model_checker->process().read_bytes(
114       comm_pattern->data.data(), comm_pattern->data.size(),
115       remote(comm.comm.src_buff));
116   }
117 }
118
119 static void deterministic_comm_pattern(int process, simgrid::mc::PatternCommunication* comm, int backtracking) {
120
121   simgrid::mc::PatternCommunicationList* list =
122     xbt_dynar_get_as(initial_communications_pattern, process, simgrid::mc::PatternCommunicationList*);
123
124   if(!backtracking){
125     e_mc_comm_pattern_difference_t diff =
126       compare_comm_pattern(list->list[list->index_comm].get(), comm);
127
128     if (diff != NONE_DIFF) {
129       if (comm->type == SIMIX_COMM_SEND){
130         simgrid::mc::initial_global_state->send_deterministic = 0;
131         if(simgrid::mc::initial_global_state->send_diff != nullptr)
132           xbt_free(simgrid::mc::initial_global_state->send_diff);
133         simgrid::mc::initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
134       }else{
135         simgrid::mc::initial_global_state->recv_deterministic = 0;
136         if(simgrid::mc::initial_global_state->recv_diff != nullptr)
137           xbt_free(simgrid::mc::initial_global_state->recv_diff);
138         simgrid::mc::initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
139       }
140       if(_sg_mc_send_determinism && !simgrid::mc::initial_global_state->send_deterministic){
141         XBT_INFO("*********************************************************");
142         XBT_INFO("***** Non-send-deterministic communications pattern *****");
143         XBT_INFO("*********************************************************");
144         XBT_INFO("%s", simgrid::mc::initial_global_state->send_diff);
145         xbt_free(simgrid::mc::initial_global_state->send_diff);
146         simgrid::mc::initial_global_state->send_diff = nullptr;
147         MC_print_statistics(mc_stats);
148         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
149       }else if(_sg_mc_comms_determinism
150           && (!simgrid::mc::initial_global_state->send_deterministic
151             && !simgrid::mc::initial_global_state->recv_deterministic)) {
152         XBT_INFO("****************************************************");
153         XBT_INFO("***** Non-deterministic communications pattern *****");
154         XBT_INFO("****************************************************");
155         XBT_INFO("%s", simgrid::mc::initial_global_state->send_diff);
156         XBT_INFO("%s", simgrid::mc::initial_global_state->recv_diff);
157         xbt_free(simgrid::mc::initial_global_state->send_diff);
158         simgrid::mc::initial_global_state->send_diff = nullptr;
159         xbt_free(simgrid::mc::initial_global_state->recv_diff);
160         simgrid::mc::initial_global_state->recv_diff = nullptr;
161         MC_print_statistics(mc_stats);
162         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
163       }
164     }
165   }
166 }
167
168 /********** Non Static functions ***********/
169
170 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
171 {
172   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
173   simgrid::mc::PatternCommunicationList* initial_pattern = xbt_dynar_get_as(
174     initial_communications_pattern, issuer->pid, simgrid::mc::PatternCommunicationList*);
175   xbt_dynar_t incomplete_pattern = xbt_dynar_get_as(
176     incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
177
178   std::unique_ptr<simgrid::mc::PatternCommunication> pattern =
179     std::unique_ptr<simgrid::mc::PatternCommunication>(
180       new simgrid::mc::PatternCommunication());
181   pattern->index =
182     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
183
184   if (call_type == MC_CALL_TYPE_SEND) {
185     /* Create comm pattern */
186     pattern->type = SIMIX_COMM_SEND;
187     pattern->comm_addr = simcall_comm_isend__get__result(request);
188
189     s_smx_synchro_t synchro = mc_model_checker->process().read<s_smx_synchro_t>(
190       (std::uint64_t) pattern->comm_addr);
191
192     char* remote_name = mc_model_checker->process().read<char*>(
193       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
194     pattern->rdv = mc_model_checker->process().read_string(remote_name);
195     pattern->src_proc = mc_model_checker->process().resolveProcess(
196       simgrid::mc::remote(synchro.comm.src_proc))->pid;
197     pattern->src_host = MC_smx_process_get_host_name(issuer);
198
199     struct s_smpi_mpi_request mpi_request =
200       mc_model_checker->process().read<s_smpi_mpi_request>(
201         (std::uint64_t) simcall_comm_isend__get__data(request));
202     pattern->tag = mpi_request.tag;
203
204     if(synchro.comm.src_buff != nullptr){
205       pattern->data.resize(synchro.comm.src_buff_size);
206       mc_model_checker->process().read_bytes(
207         pattern->data.data(), pattern->data.size(),
208         remote(synchro.comm.src_buff));
209     }
210     if(mpi_request.detached){
211       if (!simgrid::mc::initial_global_state->initial_communications_pattern_done) {
212         /* Store comm pattern */
213         simgrid::mc::PatternCommunicationList* list = xbt_dynar_get_as(
214           initial_communications_pattern, pattern->src_proc,
215           simgrid::mc::PatternCommunicationList*);
216         list->list.push_back(std::move(pattern));
217       } else {
218         /* Evaluate comm determinism */
219         deterministic_comm_pattern(pattern->src_proc, pattern.get(), backtracking);
220         xbt_dynar_get_as(
221           initial_communications_pattern, pattern->src_proc, simgrid::mc::PatternCommunicationList*
222         )->index_comm++;
223       }
224       return;
225     }
226   } else if (call_type == MC_CALL_TYPE_RECV) {
227     pattern->type = SIMIX_COMM_RECEIVE;
228     pattern->comm_addr = simcall_comm_irecv__get__result(request);
229
230     struct s_smpi_mpi_request mpi_request;
231     mc_model_checker->process().read(
232       &mpi_request, remote((struct s_smpi_mpi_request*)simcall_comm_irecv__get__data(request)));
233     pattern->tag = mpi_request.tag;
234
235     s_smx_synchro_t synchro;
236     mc_model_checker->process().read(&synchro, remote(pattern->comm_addr));
237
238     char* remote_name;
239     mc_model_checker->process().read(&remote_name,
240       remote(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
241     pattern->rdv = mc_model_checker->process().read_string(remote_name);
242     pattern->dst_proc = mc_model_checker->process().resolveProcess(
243       simgrid::mc::remote(synchro.comm.dst_proc))->pid;
244     pattern->dst_host = MC_smx_process_get_host_name(issuer);
245   } else
246     xbt_die("Unexpected call_type %i", (int) call_type);
247
248   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu",
249     pattern.get(), issuer->pid);
250   xbt_dynar_t dynar =
251     xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
252   simgrid::mc::PatternCommunication* pattern2 = pattern.release();
253   xbt_dynar_push(dynar, &pattern2);
254 }
255
256 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) {
257   simgrid::mc::PatternCommunication* current_comm_pattern;
258   unsigned int cursor = 0;
259   std::unique_ptr<simgrid::mc::PatternCommunication> comm_pattern;
260   int completed = 0;
261
262   /* Complete comm pattern */
263   xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern)
264     if (current_comm_pattern->comm_addr == comm_addr) {
265       update_comm_pattern(current_comm_pattern, comm_addr);
266       completed = 1;
267       simgrid::mc::PatternCommunication* temp;
268       xbt_dynar_remove_at(
269         xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t),
270         cursor, &temp);
271       comm_pattern = std::unique_ptr<simgrid::mc::PatternCommunication>(temp);
272       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
273       break;
274     }
275
276   if(!completed)
277     xbt_die("Corresponding communication not found!");
278
279   simgrid::mc::PatternCommunicationList* pattern = xbt_dynar_get_as(
280     initial_communications_pattern, issuer, simgrid::mc::PatternCommunicationList*);
281
282   if (!simgrid::mc::initial_global_state->initial_communications_pattern_done)
283     /* Store comm pattern */
284     pattern->list.push_back(std::move(comm_pattern));
285   else {
286     /* Evaluate comm determinism */
287     deterministic_comm_pattern(issuer, comm_pattern.get(), backtracking);
288     pattern->index_comm++;
289   }
290 }
291
292
293 /************************ Main algorithm ************************/
294
295 namespace simgrid {
296 namespace mc {
297
298 CommunicationDeterminismChecker::CommunicationDeterminismChecker(Session& session)
299   : Checker(session)
300 {
301
302 }
303
304 CommunicationDeterminismChecker::~CommunicationDeterminismChecker()
305 {
306
307 }
308
309 RecordTrace CommunicationDeterminismChecker::getRecordTrace() // override
310 {
311   RecordTrace res;
312   for (auto const& state : stack_)
313     res.push_back(state->getRecordElement());
314   return res;
315 }
316
317 std::vector<std::string> CommunicationDeterminismChecker::getTextualTrace() // override
318 {
319   std::vector<std::string> trace;
320   for (auto const& state : stack_) {
321     smx_simcall_t req = &state->executed_req;
322     if (req)
323       trace.push_back(simgrid::mc::request_to_string(
324         req, state->transition.argument, simgrid::mc::RequestType::executed));
325   }
326   return trace;
327 }
328
329 void CommunicationDeterminismChecker::prepare()
330 {
331
332   int i;
333   const int maxpid = MC_smx_get_maxpid();
334
335   // Create initial_communications_pattern elements:
336   initial_communications_pattern = simgrid::xbt::newDeleteDynar<simgrid::mc::PatternCommunicationList*>();
337   for (i=0; i < maxpid; i++){
338     simgrid::mc::PatternCommunicationList* process_list_pattern = new simgrid::mc::PatternCommunicationList();
339     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
340   }
341
342   // Create incomplete_communications_pattern elements:
343   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
344   for (i=0; i < maxpid; i++){
345     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(simgrid::mc::PatternCommunication*), nullptr);
346     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
347   }
348
349   std::unique_ptr<simgrid::mc::State> initial_state =
350     std::unique_ptr<simgrid::mc::State>(MC_state_new());
351
352   XBT_DEBUG("********* Start communication determinism verification *********");
353
354   /* Wait for requests (schedules processes) */
355   mc_model_checker->wait_for_requests();
356
357   /* Get an enabled process and insert it in the interleave set of the initial state */
358   for (auto& p : mc_model_checker->process().simix_processes())
359     if (simgrid::mc::process_is_enabled(&p.copy))
360       initial_state->interleave(&p.copy);
361
362   stack_.push_back(std::move(initial_state));
363 }
364
365 static inline
366 bool all_communications_are_finished()
367 {
368   for (size_t current_process = 1; current_process < MC_smx_get_maxpid(); current_process++) {
369     xbt_dynar_t pattern = xbt_dynar_get_as(
370       incomplete_communications_pattern, current_process, xbt_dynar_t);
371     if (!xbt_dynar_is_empty(pattern)) {
372       XBT_DEBUG("Some communications are not finished, cannot stop the exploration ! State not visited.");
373       return false;
374     }
375   }
376   return true;
377 }
378
379 int CommunicationDeterminismChecker::main(void)
380 {
381   std::unique_ptr<simgrid::mc::VisitedState> visited_state = nullptr;
382   smx_simcall_t req = nullptr;
383
384   while (!stack_.empty()) {
385
386     /* Get current state */
387     simgrid::mc::State* state = stack_.back().get();
388
389     XBT_DEBUG("**************************************************");
390     XBT_DEBUG("Exploration depth = %zi (state = %d, interleaved processes = %zd)",
391               stack_.size(), state->num,
392               state->interleaveSize());
393
394     /* Update statistics */
395     mc_stats->visited_states++;
396
397     if (stack_.size() <= (std::size_t) _sg_mc_max_depth
398         && (req = MC_state_get_request(state)) != nullptr
399         && (visited_state == nullptr)) {
400
401       int req_num = state->transition.argument;
402
403       XBT_DEBUG("Execute: %s",
404         simgrid::mc::request_to_string(
405           req, req_num, simgrid::mc::RequestType::simix).c_str());
406
407       std::string req_str;
408       if (dot_output != nullptr)
409         req_str = simgrid::mc::request_get_dot_output(req, req_num);
410
411       mc_stats->executed_transitions++;
412
413       /* TODO : handle test and testany simcalls */
414       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
415       if (_sg_mc_comms_determinism || _sg_mc_send_determinism)
416         call = MC_get_call_type(req);
417
418       /* Answer the request */
419       mc_model_checker->handle_simcall(state->transition);
420       /* After this call req is no longer useful */
421
422       if(!initial_global_state->initial_communications_pattern_done)
423         MC_handle_comm_pattern(call, req, req_num, initial_communications_pattern, 0);
424       else
425         MC_handle_comm_pattern(call, req, req_num, nullptr, 0);
426
427       /* Wait for requests (schedules processes) */
428       mc_model_checker->wait_for_requests();
429
430       /* Create the new expanded state */
431       std::unique_ptr<simgrid::mc::State> next_state =
432         std::unique_ptr<simgrid::mc::State>(MC_state_new());
433
434       /* If comm determinism verification, we cannot stop the exploration if
435          some communications are not finished (at least, data are transfered).
436          These communications  are incomplete and they cannot be analyzed and
437          compared with the initial pattern. */
438       bool compare_snapshots = all_communications_are_finished()
439         && initial_global_state->initial_communications_pattern_done;
440
441       if (_sg_mc_visited == 0
442           || (visited_state = visitedStates_.addVisitedState(next_state.get(), compare_snapshots)) == nullptr) {
443
444         /* Get enabled processes and insert them in the interleave set of the next state */
445         for (auto& p : mc_model_checker->process().simix_processes())
446           if (simgrid::mc::process_is_enabled(&p.copy))
447             next_state->interleave(&p.copy);
448
449         if (dot_output != nullptr)
450           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n",
451             state->num,  next_state->num, req_str.c_str());
452
453       } else if (dot_output != nullptr)
454         fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n",
455           state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str.c_str());
456
457       stack_.push_back(std::move(next_state));
458
459     } else {
460
461       if (stack_.size() > (std::size_t) _sg_mc_max_depth)
462         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
463       else if (visited_state != nullptr)
464         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
465       else
466         XBT_DEBUG("There are no more processes to interleave. (depth %zi)",
467           stack_.size());
468
469       if (!initial_global_state->initial_communications_pattern_done)
470         initial_global_state->initial_communications_pattern_done = 1;
471
472       /* Trash the current state, no longer needed */
473       XBT_DEBUG("Delete state %d at depth %zi",
474         state->num, stack_.size());
475       stack_.pop_back();
476
477       visited_state = nullptr;
478
479       /* Check for deadlocks */
480       if (mc_model_checker->checkDeadlock()) {
481         MC_show_deadlock();
482         return SIMGRID_MC_EXIT_DEADLOCK;
483       }
484
485       while (!stack_.empty()) {
486         std::unique_ptr<simgrid::mc::State> state = std::move(stack_.back());
487         stack_.pop_back();
488         if (state->interleaveSize()
489             && stack_.size() < (std::size_t) _sg_mc_max_depth) {
490           /* We found a back-tracking point, let's loop */
491           XBT_DEBUG("Back-tracking to state %d at depth %zi",
492             state->num, stack_.size() + 1);
493           stack_.push_back(std::move(state));
494
495           simgrid::mc::replay(stack_);
496
497           XBT_DEBUG("Back-tracking to state %d at depth %zi done",
498             stack_.back()->num, stack_.size());
499
500           break;
501         } else {
502           XBT_DEBUG("Delete state %d at depth %zi",
503             state->num, stack_.size() + 1);
504         }
505       }
506     }
507   }
508
509   MC_print_statistics(mc_stats);
510   return SIMGRID_MC_EXIT_SUCCESS;
511 }
512
513 int CommunicationDeterminismChecker::run()
514 {
515   XBT_INFO("Check communication determinism");
516   mc_model_checker->wait_for_requests();
517
518   this->prepare();
519
520   initial_global_state = std::unique_ptr<s_mc_global_t>(new s_mc_global_t());
521   initial_global_state->snapshot = simgrid::mc::take_snapshot(0);
522   initial_global_state->initial_communications_pattern_done = 0;
523   initial_global_state->recv_deterministic = 1;
524   initial_global_state->send_deterministic = 1;
525   initial_global_state->recv_diff = nullptr;
526   initial_global_state->send_diff = nullptr;
527
528   int res = this->main();
529   initial_global_state = nullptr;
530   return res;
531 }
532
533 Checker* createCommunicationDeterminismChecker(Session& session)
534 {
535   return new CommunicationDeterminismChecker(session);
536 }
537
538 }
539 }