Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
[mc] Document/cleanup State
[simgrid.git] / src / mc / CommunicationDeterminismChecker.cpp
1 /* Copyright (c) 2008-2015. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include <cstdint>
8
9 #include <xbt/dynar.h>
10 #include <xbt/dynar.hpp>
11 #include <xbt/log.h>
12 #include <xbt/sysdep.h>
13
14 #include "src/mc/mc_state.h"
15 #include "src/mc/mc_comm_pattern.h"
16 #include "src/mc/mc_request.h"
17 #include "src/mc/mc_safety.h"
18 #include "src/mc/mc_private.h"
19 #include "src/mc/mc_record.h"
20 #include "src/mc/mc_smx.h"
21 #include "src/mc/Client.hpp"
22 #include "src/mc/CommunicationDeterminismChecker.hpp"
23 #include "src/mc/mc_exit.h"
24 #include "src/mc/VisitedState.hpp"
25
26 using simgrid::mc::remote;
27
28 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
29                                 "Logging specific to MC communication determinism detection");
30
31 /********** Global variables **********/
32
33 xbt_dynar_t initial_communications_pattern;
34 xbt_dynar_t incomplete_communications_pattern;
35
36 /********** Static functions ***********/
37
38 static e_mc_comm_pattern_difference_t compare_comm_pattern(simgrid::mc::PatternCommunication* comm1, simgrid::mc::PatternCommunication* comm2) {
39   if(comm1->type != comm2->type)
40     return TYPE_DIFF;
41   if (comm1->rdv != comm2->rdv)
42     return RDV_DIFF;
43   if (comm1->src_proc != comm2->src_proc)
44     return SRC_PROC_DIFF;
45   if (comm1->dst_proc != comm2->dst_proc)
46     return DST_PROC_DIFF;
47   if (comm1->tag != comm2->tag)
48     return TAG_DIFF;
49   if (comm1->data.size() != comm2->data.size())
50     return DATA_SIZE_DIFF;
51   if (comm1->data != comm2->data)
52     return DATA_DIFF;
53   return NONE_DIFF;
54 }
55
56 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, simgrid::mc::PatternCommunication* comm, unsigned int cursor) {
57   char *type, *res;
58
59   if(comm->type == SIMIX_COMM_SEND)
60     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
61   else
62     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
63
64   switch(diff) {
65   case TYPE_DIFF:
66     res = bprintf("%s Different type for communication #%d", type, cursor);
67     break;
68   case RDV_DIFF:
69     res = bprintf("%s Different rdv for communication #%d", type, cursor);
70     break;
71   case TAG_DIFF:
72     res = bprintf("%s Different tag for communication #%d", type, cursor);
73     break;
74   case SRC_PROC_DIFF:
75       res = bprintf("%s Different source for communication #%d", type, cursor);
76     break;
77   case DST_PROC_DIFF:
78       res = bprintf("%s Different destination for communication #%d", type, cursor);
79     break;
80   case DATA_SIZE_DIFF:
81     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
82     break;
83   case DATA_DIFF:
84     res = bprintf("%s\n Different data for communication #%d", type, cursor);
85     break;
86   default:
87     res = nullptr;
88     break;
89   }
90
91   return res;
92 }
93
94 static void update_comm_pattern(simgrid::mc::PatternCommunication* comm_pattern, smx_synchro_t comm_addr)
95 {
96   s_smx_synchro_t comm;
97   mc_model_checker->process().read(&comm, remote(comm_addr));
98
99   smx_process_t src_proc = MC_smx_resolve_process(comm.comm.src_proc);
100   smx_process_t dst_proc = MC_smx_resolve_process(comm.comm.dst_proc);
101   comm_pattern->src_proc = src_proc->pid;
102   comm_pattern->dst_proc = dst_proc->pid;
103   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
104   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
105   if (comm_pattern->data.size() == 0 && comm.comm.src_buff != nullptr) {
106     size_t buff_size;
107     mc_model_checker->process().read(
108       &buff_size, remote(comm.comm.dst_buff_size));
109     comm_pattern->data.resize(buff_size);
110     mc_model_checker->process().read_bytes(
111       comm_pattern->data.data(), comm_pattern->data.size(),
112       remote(comm.comm.src_buff));
113   }
114 }
115
116 static void deterministic_comm_pattern(int process, simgrid::mc::PatternCommunication* comm, int backtracking) {
117
118   simgrid::mc::PatternCommunicationList* list =
119     xbt_dynar_get_as(initial_communications_pattern, process, simgrid::mc::PatternCommunicationList*);
120
121   if(!backtracking){
122     e_mc_comm_pattern_difference_t diff =
123       compare_comm_pattern(list->list[list->index_comm].get(), comm);
124
125     if (diff != NONE_DIFF) {
126       if (comm->type == SIMIX_COMM_SEND){
127         simgrid::mc::initial_global_state->send_deterministic = 0;
128         if(simgrid::mc::initial_global_state->send_diff != nullptr)
129           xbt_free(simgrid::mc::initial_global_state->send_diff);
130         simgrid::mc::initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
131       }else{
132         simgrid::mc::initial_global_state->recv_deterministic = 0;
133         if(simgrid::mc::initial_global_state->recv_diff != nullptr)
134           xbt_free(simgrid::mc::initial_global_state->recv_diff);
135         simgrid::mc::initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
136       }
137       if(_sg_mc_send_determinism && !simgrid::mc::initial_global_state->send_deterministic){
138         XBT_INFO("*********************************************************");
139         XBT_INFO("***** Non-send-deterministic communications pattern *****");
140         XBT_INFO("*********************************************************");
141         XBT_INFO("%s", simgrid::mc::initial_global_state->send_diff);
142         xbt_free(simgrid::mc::initial_global_state->send_diff);
143         simgrid::mc::initial_global_state->send_diff = nullptr;
144         MC_print_statistics(mc_stats);
145         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
146       }else if(_sg_mc_comms_determinism
147           && (!simgrid::mc::initial_global_state->send_deterministic
148             && !simgrid::mc::initial_global_state->recv_deterministic)) {
149         XBT_INFO("****************************************************");
150         XBT_INFO("***** Non-deterministic communications pattern *****");
151         XBT_INFO("****************************************************");
152         XBT_INFO("%s", simgrid::mc::initial_global_state->send_diff);
153         XBT_INFO("%s", simgrid::mc::initial_global_state->recv_diff);
154         xbt_free(simgrid::mc::initial_global_state->send_diff);
155         simgrid::mc::initial_global_state->send_diff = nullptr;
156         xbt_free(simgrid::mc::initial_global_state->recv_diff);
157         simgrid::mc::initial_global_state->recv_diff = nullptr;
158         MC_print_statistics(mc_stats);
159         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
160       }
161     }
162   }
163 }
164
165 /********** Non Static functions ***********/
166
167 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
168 {
169   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
170   simgrid::mc::PatternCommunicationList* initial_pattern = xbt_dynar_get_as(
171     initial_communications_pattern, issuer->pid, simgrid::mc::PatternCommunicationList*);
172   xbt_dynar_t incomplete_pattern = xbt_dynar_get_as(
173     incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
174
175   std::unique_ptr<simgrid::mc::PatternCommunication> pattern =
176     std::unique_ptr<simgrid::mc::PatternCommunication>(
177       new simgrid::mc::PatternCommunication());
178   pattern->index =
179     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
180
181   if (call_type == MC_CALL_TYPE_SEND) {
182     /* Create comm pattern */
183     pattern->type = SIMIX_COMM_SEND;
184     pattern->comm_addr = simcall_comm_isend__get__result(request);
185
186     s_smx_synchro_t synchro = mc_model_checker->process().read<s_smx_synchro_t>(
187       (std::uint64_t) pattern->comm_addr);
188
189     char* remote_name = mc_model_checker->process().read<char*>(
190       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
191     pattern->rdv = mc_model_checker->process().read_string(remote_name);
192     pattern->src_proc = MC_smx_resolve_process(synchro.comm.src_proc)->pid;
193     pattern->src_host = MC_smx_process_get_host_name(issuer);
194
195     struct s_smpi_mpi_request mpi_request =
196       mc_model_checker->process().read<s_smpi_mpi_request>(
197         (std::uint64_t) simcall_comm_isend__get__data(request));
198     pattern->tag = mpi_request.tag;
199
200     if(synchro.comm.src_buff != nullptr){
201       pattern->data.resize(synchro.comm.src_buff_size);
202       mc_model_checker->process().read_bytes(
203         pattern->data.data(), pattern->data.size(),
204         remote(synchro.comm.src_buff));
205     }
206     if(mpi_request.detached){
207       if (!simgrid::mc::initial_global_state->initial_communications_pattern_done) {
208         /* Store comm pattern */
209         simgrid::mc::PatternCommunicationList* list = xbt_dynar_get_as(
210           initial_communications_pattern, pattern->src_proc,
211           simgrid::mc::PatternCommunicationList*);
212         list->list.push_back(std::move(pattern));
213       } else {
214         /* Evaluate comm determinism */
215         deterministic_comm_pattern(pattern->src_proc, pattern.get(), backtracking);
216         xbt_dynar_get_as(
217           initial_communications_pattern, pattern->src_proc, simgrid::mc::PatternCommunicationList*
218         )->index_comm++;
219       }
220       return;
221     }
222   } else if (call_type == MC_CALL_TYPE_RECV) {
223     pattern->type = SIMIX_COMM_RECEIVE;
224     pattern->comm_addr = simcall_comm_irecv__get__result(request);
225
226     struct s_smpi_mpi_request mpi_request;
227     mc_model_checker->process().read(
228       &mpi_request, remote((struct s_smpi_mpi_request*)simcall_comm_irecv__get__data(request)));
229     pattern->tag = mpi_request.tag;
230
231     s_smx_synchro_t synchro;
232     mc_model_checker->process().read(&synchro, remote(pattern->comm_addr));
233
234     char* remote_name;
235     mc_model_checker->process().read(&remote_name,
236       remote(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
237     pattern->rdv = mc_model_checker->process().read_string(remote_name);
238     pattern->dst_proc = MC_smx_resolve_process(synchro.comm.dst_proc)->pid;
239     pattern->dst_host = MC_smx_process_get_host_name(issuer);
240   } else
241     xbt_die("Unexpected call_type %i", (int) call_type);
242
243   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu",
244     pattern.get(), issuer->pid);
245   xbt_dynar_t dynar =
246     xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
247   simgrid::mc::PatternCommunication* pattern2 = pattern.release();
248   xbt_dynar_push(dynar, &pattern2);
249 }
250
251 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) {
252   simgrid::mc::PatternCommunication* current_comm_pattern;
253   unsigned int cursor = 0;
254   std::unique_ptr<simgrid::mc::PatternCommunication> comm_pattern;
255   int completed = 0;
256
257   /* Complete comm pattern */
258   xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern)
259     if (current_comm_pattern->comm_addr == comm_addr) {
260       update_comm_pattern(current_comm_pattern, comm_addr);
261       completed = 1;
262       simgrid::mc::PatternCommunication* temp;
263       xbt_dynar_remove_at(
264         xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t),
265         cursor, &temp);
266       comm_pattern = std::unique_ptr<simgrid::mc::PatternCommunication>(temp);
267       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
268       break;
269     }
270
271   if(!completed)
272     xbt_die("Corresponding communication not found!");
273
274   simgrid::mc::PatternCommunicationList* pattern = xbt_dynar_get_as(
275     initial_communications_pattern, issuer, simgrid::mc::PatternCommunicationList*);
276
277   if (!simgrid::mc::initial_global_state->initial_communications_pattern_done)
278     /* Store comm pattern */
279     pattern->list.push_back(std::move(comm_pattern));
280   else {
281     /* Evaluate comm determinism */
282     deterministic_comm_pattern(issuer, comm_pattern.get(), backtracking);
283     pattern->index_comm++;
284   }
285 }
286
287
288 /************************ Main algorithm ************************/
289
290 namespace simgrid {
291 namespace mc {
292
293 CommunicationDeterminismChecker::CommunicationDeterminismChecker(Session& session)
294   : Checker(session)
295 {
296
297 }
298
299 CommunicationDeterminismChecker::~CommunicationDeterminismChecker()
300 {
301
302 }
303
304 // TODO, deduplicate with SafetyChecker
305 RecordTrace CommunicationDeterminismChecker::getRecordTrace() // override
306 {
307   RecordTrace res;
308   for (auto const& state : stack_) {
309     int value = 0;
310     smx_simcall_t saved_req = MC_state_get_executed_request(state.get(), &value);
311     const smx_process_t issuer = MC_smx_simcall_get_issuer(saved_req);
312     const int pid = issuer->pid;
313     res.push_back(RecordTraceElement(pid, value));
314   }
315   return res;
316 }
317
318 // TODO, deduplicate with SafetyChecker
319 std::vector<std::string> CommunicationDeterminismChecker::getTextualTrace() // override
320 {
321   std::vector<std::string> trace;
322   for (auto const& state : stack_) {
323     int value;
324     smx_simcall_t req = MC_state_get_executed_request(state.get(), &value);
325     if (req)
326       trace.push_back(simgrid::mc::request_to_string(
327         req, value, simgrid::mc::RequestType::executed));
328   }
329   return trace;
330 }
331
332 void CommunicationDeterminismChecker::prepare()
333 {
334
335   int i;
336   const int maxpid = MC_smx_get_maxpid();
337
338   // Create initial_communications_pattern elements:
339   initial_communications_pattern = simgrid::xbt::newDeleteDynar<simgrid::mc::PatternCommunicationList*>();
340   for (i=0; i < maxpid; i++){
341     simgrid::mc::PatternCommunicationList* process_list_pattern = new simgrid::mc::PatternCommunicationList();
342     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
343   }
344
345   // Create incomplete_communications_pattern elements:
346   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
347   for (i=0; i < maxpid; i++){
348     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(simgrid::mc::PatternCommunication*), nullptr);
349     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
350   }
351
352   std::unique_ptr<simgrid::mc::State> initial_state =
353     std::unique_ptr<simgrid::mc::State>(MC_state_new());
354
355   XBT_DEBUG("********* Start communication determinism verification *********");
356
357   /* Wait for requests (schedules processes) */
358   mc_model_checker->wait_for_requests();
359
360   /* Get an enabled process and insert it in the interleave set of the initial state */
361   for (auto& p : mc_model_checker->process().simix_processes())
362     if (simgrid::mc::process_is_enabled(&p.copy))
363       MC_state_interleave_process(initial_state.get(), &p.copy);
364
365   stack_.push_back(std::move(initial_state));
366 }
367
368 static inline
369 bool all_communications_are_finished()
370 {
371   for (size_t current_process = 1; current_process < MC_smx_get_maxpid(); current_process++) {
372     xbt_dynar_t pattern = xbt_dynar_get_as(
373       incomplete_communications_pattern, current_process, xbt_dynar_t);
374     if (!xbt_dynar_is_empty(pattern)) {
375       XBT_DEBUG("Some communications are not finished, cannot stop the exploration ! State not visited.");
376       return false;
377     }
378   }
379   return true;
380 }
381
382 int CommunicationDeterminismChecker::main(void)
383 {
384   int value;
385   std::unique_ptr<simgrid::mc::VisitedState> visited_state = nullptr;
386   smx_simcall_t req = nullptr;
387
388   while (!stack_.empty()) {
389
390     /* Get current state */
391     simgrid::mc::State* state = stack_.back().get();
392
393     XBT_DEBUG("**************************************************");
394     XBT_DEBUG("Exploration depth = %zi (state = %d, interleaved processes = %zd)",
395               stack_.size(), state->num,
396               state->interleaveSize());
397
398     /* Update statistics */
399     mc_stats->visited_states++;
400
401     if (stack_.size() <= (std::size_t) _sg_mc_max_depth
402         && (req = MC_state_get_request(state, &value))
403         && (visited_state == nullptr)) {
404
405       XBT_DEBUG("Execute: %s",
406         simgrid::mc::request_to_string(
407           req, value, simgrid::mc::RequestType::simix).c_str());
408
409       std::string req_str;
410       if (dot_output != nullptr)
411         req_str = simgrid::mc::request_get_dot_output(req, value);
412
413       MC_state_set_executed_request(state, req, value);
414       mc_stats->executed_transitions++;
415
416       /* TODO : handle test and testany simcalls */
417       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
418       if (_sg_mc_comms_determinism || _sg_mc_send_determinism)
419         call = MC_get_call_type(req);
420
421       /* Answer the request */
422       simgrid::mc::handle_simcall(req, value);    /* After this call req is no longer useful */
423
424       if(!initial_global_state->initial_communications_pattern_done)
425         MC_handle_comm_pattern(call, req, value, initial_communications_pattern, 0);
426       else
427         MC_handle_comm_pattern(call, req, value, nullptr, 0);
428
429       /* Wait for requests (schedules processes) */
430       mc_model_checker->wait_for_requests();
431
432       /* Create the new expanded state */
433       std::unique_ptr<simgrid::mc::State> next_state =
434         std::unique_ptr<simgrid::mc::State>(MC_state_new());
435
436       /* If comm determinism verification, we cannot stop the exploration if
437          some communications are not finished (at least, data are transfered).
438          These communications  are incomplete and they cannot be analyzed and
439          compared with the initial pattern. */
440       bool compare_snapshots = all_communications_are_finished()
441         && initial_global_state->initial_communications_pattern_done;
442
443       if (_sg_mc_visited == 0
444           || (visited_state = visitedStates_.addVisitedState(next_state.get(), compare_snapshots)) == nullptr) {
445
446         /* Get enabled processes and insert them in the interleave set of the next state */
447         for (auto& p : mc_model_checker->process().simix_processes())
448           if (simgrid::mc::process_is_enabled(&p.copy))
449             MC_state_interleave_process(next_state.get(), &p.copy);
450
451         if (dot_output != nullptr)
452           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n",
453             state->num,  next_state->num, req_str.c_str());
454
455       } else if (dot_output != nullptr)
456         fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n",
457           state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str.c_str());
458
459       stack_.push_back(std::move(next_state));
460
461     } else {
462
463       if (stack_.size() > (std::size_t) _sg_mc_max_depth)
464         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
465       else if (visited_state != nullptr)
466         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
467       else
468         XBT_DEBUG("There are no more processes to interleave. (depth %zi)",
469           stack_.size());
470
471       if (!initial_global_state->initial_communications_pattern_done)
472         initial_global_state->initial_communications_pattern_done = 1;
473
474       /* Trash the current state, no longer needed */
475       XBT_DEBUG("Delete state %d at depth %zi",
476         state->num, stack_.size());
477       stack_.pop_back();
478
479       visited_state = nullptr;
480
481       /* Check for deadlocks */
482       if (mc_model_checker->checkDeadlock()) {
483         MC_show_deadlock();
484         return SIMGRID_MC_EXIT_DEADLOCK;
485       }
486
487       while (!stack_.empty()) {
488         std::unique_ptr<simgrid::mc::State> state = std::move(stack_.back());
489         stack_.pop_back();
490         if (state->interleaveSize()
491             && stack_.size() < (std::size_t) _sg_mc_max_depth) {
492           /* We found a back-tracking point, let's loop */
493           XBT_DEBUG("Back-tracking to state %d at depth %zi",
494             state->num, stack_.size() + 1);
495           stack_.push_back(std::move(state));
496
497           simgrid::mc::replay(stack_);
498
499           XBT_DEBUG("Back-tracking to state %d at depth %zi done",
500             stack_.back()->num, stack_.size());
501
502           break;
503         } else {
504           XBT_DEBUG("Delete state %d at depth %zi",
505             state->num, stack_.size() + 1);
506         }
507       }
508     }
509   }
510
511   MC_print_statistics(mc_stats);
512   return SIMGRID_MC_EXIT_SUCCESS;
513 }
514
515 int CommunicationDeterminismChecker::run()
516 {
517   XBT_INFO("Check communication determinism");
518   mc_model_checker->wait_for_requests();
519
520   this->prepare();
521
522   initial_global_state = std::unique_ptr<s_mc_global_t>(new s_mc_global_t());
523   initial_global_state->snapshot = simgrid::mc::take_snapshot(0);
524   initial_global_state->initial_communications_pattern_done = 0;
525   initial_global_state->recv_deterministic = 1;
526   initial_global_state->send_deterministic = 1;
527   initial_global_state->recv_diff = nullptr;
528   initial_global_state->send_diff = nullptr;
529
530   int res = this->main();
531   initial_global_state = nullptr;
532   return res;
533 }
534
535 Checker* createCommunicationDeterminismChecker(Session& session)
536 {
537   return new CommunicationDeterminismChecker(session);
538 }
539
540 }
541 }