Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
4c51b1b2a647ea18df97ddcb2281906a0e99d03c
[simgrid.git] / src / mc / CommunicationDeterminismChecker.cpp
1 /* Copyright (c) 2008-2015. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include <cstdint>
8
9 #include <xbt/dynar.h>
10 #include <xbt/dynar.hpp>
11 #include <xbt/log.h>
12 #include <xbt/sysdep.h>
13
14 #include "src/mc/mc_state.h"
15 #include "src/mc/mc_comm_pattern.h"
16 #include "src/mc/mc_request.h"
17 #include "src/mc/mc_safety.h"
18 #include "src/mc/mc_private.h"
19 #include "src/mc/mc_record.h"
20 #include "src/mc/mc_smx.h"
21 #include "src/mc/Client.hpp"
22 #include "src/mc/CommunicationDeterminismChecker.hpp"
23 #include "src/mc/mc_exit.h"
24 #include "src/mc/VisitedState.hpp"
25
26 using simgrid::mc::remote;
27
28 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
29                                 "Logging specific to MC communication determinism detection");
30
31 /********** Global variables **********/
32
33 xbt_dynar_t initial_communications_pattern;
34 xbt_dynar_t incomplete_communications_pattern;
35
36 /********** Static functions ***********/
37
38 static e_mc_comm_pattern_difference_t compare_comm_pattern(mc_comm_pattern_t comm1, mc_comm_pattern_t comm2) {
39   if(comm1->type != comm2->type)
40     return TYPE_DIFF;
41   if (comm1->rdv != comm2->rdv)
42     return RDV_DIFF;
43   if (comm1->src_proc != comm2->src_proc)
44     return SRC_PROC_DIFF;
45   if (comm1->dst_proc != comm2->dst_proc)
46     return DST_PROC_DIFF;
47   if (comm1->tag != comm2->tag)
48     return TAG_DIFF;
49   if (comm1->data.size() != comm2->data.size())
50     return DATA_SIZE_DIFF;
51   if (comm1->data != comm2->data)
52     return DATA_DIFF;
53   return NONE_DIFF;
54 }
55
56 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, mc_comm_pattern_t comm, unsigned int cursor) {
57   char *type, *res;
58
59   if(comm->type == SIMIX_COMM_SEND)
60     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
61   else
62     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
63
64   switch(diff) {
65   case TYPE_DIFF:
66     res = bprintf("%s Different type for communication #%d", type, cursor);
67     break;
68   case RDV_DIFF:
69     res = bprintf("%s Different rdv for communication #%d", type, cursor);
70     break;
71   case TAG_DIFF:
72     res = bprintf("%s Different tag for communication #%d", type, cursor);
73     break;
74   case SRC_PROC_DIFF:
75       res = bprintf("%s Different source for communication #%d", type, cursor);
76     break;
77   case DST_PROC_DIFF:
78       res = bprintf("%s Different destination for communication #%d", type, cursor);
79     break;
80   case DATA_SIZE_DIFF:
81     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
82     break;
83   case DATA_DIFF:
84     res = bprintf("%s\n Different data for communication #%d", type, cursor);
85     break;
86   default:
87     res = nullptr;
88     break;
89   }
90
91   return res;
92 }
93
94 static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm_addr)
95 {
96   s_smx_synchro_t comm;
97   mc_model_checker->process().read(&comm, remote(comm_addr));
98
99   smx_process_t src_proc = MC_smx_resolve_process(comm.comm.src_proc);
100   smx_process_t dst_proc = MC_smx_resolve_process(comm.comm.dst_proc);
101   comm_pattern->src_proc = src_proc->pid;
102   comm_pattern->dst_proc = dst_proc->pid;
103   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
104   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
105   if (comm_pattern->data.size() == 0 && comm.comm.src_buff != nullptr) {
106     size_t buff_size;
107     mc_model_checker->process().read(
108       &buff_size, remote(comm.comm.dst_buff_size));
109     comm_pattern->data.resize(buff_size);
110     mc_model_checker->process().read_bytes(
111       comm_pattern->data.data(), comm_pattern->data.size(),
112       remote(comm.comm.src_buff));
113   }
114 }
115
116 static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int backtracking) {
117
118   mc_list_comm_pattern_t list =
119     xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t);
120
121   if(!backtracking){
122     mc_comm_pattern_t initial_comm =
123       xbt_dynar_get_as(list->list, list->index_comm, mc_comm_pattern_t);
124     e_mc_comm_pattern_difference_t diff =
125       compare_comm_pattern(initial_comm, comm);
126
127     if (diff != NONE_DIFF) {
128       if (comm->type == SIMIX_COMM_SEND){
129         simgrid::mc::initial_global_state->send_deterministic = 0;
130         if(simgrid::mc::initial_global_state->send_diff != nullptr)
131           xbt_free(simgrid::mc::initial_global_state->send_diff);
132         simgrid::mc::initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
133       }else{
134         simgrid::mc::initial_global_state->recv_deterministic = 0;
135         if(simgrid::mc::initial_global_state->recv_diff != nullptr)
136           xbt_free(simgrid::mc::initial_global_state->recv_diff);
137         simgrid::mc::initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
138       }
139       if(_sg_mc_send_determinism && !simgrid::mc::initial_global_state->send_deterministic){
140         XBT_INFO("*********************************************************");
141         XBT_INFO("***** Non-send-deterministic communications pattern *****");
142         XBT_INFO("*********************************************************");
143         XBT_INFO("%s", simgrid::mc::initial_global_state->send_diff);
144         xbt_free(simgrid::mc::initial_global_state->send_diff);
145         simgrid::mc::initial_global_state->send_diff = nullptr;
146         MC_print_statistics(mc_stats);
147         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
148       }else if(_sg_mc_comms_determinism
149           && (!simgrid::mc::initial_global_state->send_deterministic
150             && !simgrid::mc::initial_global_state->recv_deterministic)) {
151         XBT_INFO("****************************************************");
152         XBT_INFO("***** Non-deterministic communications pattern *****");
153         XBT_INFO("****************************************************");
154         XBT_INFO("%s", simgrid::mc::initial_global_state->send_diff);
155         XBT_INFO("%s", simgrid::mc::initial_global_state->recv_diff);
156         xbt_free(simgrid::mc::initial_global_state->send_diff);
157         simgrid::mc::initial_global_state->send_diff = nullptr;
158         xbt_free(simgrid::mc::initial_global_state->recv_diff);
159         simgrid::mc::initial_global_state->recv_diff = nullptr;
160         MC_print_statistics(mc_stats);
161         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
162       }
163     }
164   }
165
166   MC_comm_pattern_free(comm);
167
168 }
169
170 /********** Non Static functions ***********/
171
172 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
173 {
174   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
175   mc_list_comm_pattern_t initial_pattern = xbt_dynar_get_as(
176     initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t);
177   xbt_dynar_t incomplete_pattern = xbt_dynar_get_as(
178     incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
179
180   mc_comm_pattern_t pattern = new s_mc_comm_pattern_t();
181   pattern->index =
182     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
183
184   if (call_type == MC_CALL_TYPE_SEND) {
185     /* Create comm pattern */
186     pattern->type = SIMIX_COMM_SEND;
187     pattern->comm_addr = simcall_comm_isend__get__result(request);
188
189     s_smx_synchro_t synchro = mc_model_checker->process().read<s_smx_synchro_t>(
190       (std::uint64_t) pattern->comm_addr);
191
192     char* remote_name = mc_model_checker->process().read<char*>(
193       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
194     pattern->rdv = mc_model_checker->process().read_string(remote_name);
195     pattern->src_proc = MC_smx_resolve_process(synchro.comm.src_proc)->pid;
196     pattern->src_host = MC_smx_process_get_host_name(issuer);
197
198     struct s_smpi_mpi_request mpi_request =
199       mc_model_checker->process().read<s_smpi_mpi_request>(
200         (std::uint64_t) simcall_comm_isend__get__data(request));
201     pattern->tag = mpi_request.tag;
202
203     if(synchro.comm.src_buff != nullptr){
204       pattern->data.resize(synchro.comm.src_buff_size);
205       mc_model_checker->process().read_bytes(
206         pattern->data.data(), pattern->data.size(),
207         remote(synchro.comm.src_buff));
208     }
209     if(mpi_request.detached){
210       if (!simgrid::mc::initial_global_state->initial_communications_pattern_done) {
211         /* Store comm pattern */
212         xbt_dynar_push(
213           xbt_dynar_get_as(
214             initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
215           )->list,
216           &pattern);
217       } else {
218         /* Evaluate comm determinism */
219         deterministic_comm_pattern(pattern->src_proc, pattern, backtracking);
220         xbt_dynar_get_as(
221           initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
222         )->index_comm++;
223       }
224       return;
225     }
226   } else if (call_type == MC_CALL_TYPE_RECV) {
227     pattern->type = SIMIX_COMM_RECEIVE;
228     pattern->comm_addr = simcall_comm_irecv__get__result(request);
229
230     struct s_smpi_mpi_request mpi_request;
231     mc_model_checker->process().read(
232       &mpi_request, remote((struct s_smpi_mpi_request*)simcall_comm_irecv__get__data(request)));
233     pattern->tag = mpi_request.tag;
234
235     s_smx_synchro_t synchro;
236     mc_model_checker->process().read(&synchro, remote(pattern->comm_addr));
237
238     char* remote_name;
239     mc_model_checker->process().read(&remote_name,
240       remote(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
241     pattern->rdv = mc_model_checker->process().read_string(remote_name);
242     pattern->dst_proc = MC_smx_resolve_process(synchro.comm.dst_proc)->pid;
243     pattern->dst_host = MC_smx_process_get_host_name(issuer);
244   } else
245     xbt_die("Unexpected call_type %i", (int) call_type);
246
247   xbt_dynar_push(
248     xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t),
249     &pattern);
250
251   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu", pattern, issuer->pid);
252 }
253
254 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) {
255   mc_comm_pattern_t current_comm_pattern;
256   unsigned int cursor = 0;
257   mc_comm_pattern_t comm_pattern;
258   int completed = 0;
259
260   /* Complete comm pattern */
261   xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern)
262     if (current_comm_pattern->comm_addr == comm_addr) {
263       update_comm_pattern(current_comm_pattern, comm_addr);
264       completed = 1;
265       xbt_dynar_remove_at(
266         xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t),
267         cursor, &comm_pattern);
268       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
269       break;
270     }
271
272   if(!completed)
273     xbt_die("Corresponding communication not found!");
274
275   mc_list_comm_pattern_t pattern = xbt_dynar_get_as(
276     initial_communications_pattern, issuer, mc_list_comm_pattern_t);
277
278   if (!simgrid::mc::initial_global_state->initial_communications_pattern_done)
279     /* Store comm pattern */
280     xbt_dynar_push(pattern->list, &comm_pattern);
281   else {
282     /* Evaluate comm determinism */
283     deterministic_comm_pattern(issuer, comm_pattern, backtracking);
284     pattern->index_comm++;
285   }
286 }
287
288
289 /************************ Main algorithm ************************/
290
291 namespace simgrid {
292 namespace mc {
293
294 CommunicationDeterminismChecker::CommunicationDeterminismChecker(Session& session)
295   : Checker(session)
296 {
297
298 }
299
300 CommunicationDeterminismChecker::~CommunicationDeterminismChecker()
301 {
302
303 }
304
305 // TODO, deduplicate with SafetyChecker
306 RecordTrace CommunicationDeterminismChecker::getRecordTrace() // override
307 {
308   RecordTrace res;
309   for (auto const& state : stack_) {
310     int value = 0;
311     smx_simcall_t saved_req = MC_state_get_executed_request(state.get(), &value);
312     const smx_process_t issuer = MC_smx_simcall_get_issuer(saved_req);
313     const int pid = issuer->pid;
314     res.push_back(RecordTraceElement(pid, value));
315   }
316   return res;
317 }
318
319 // TODO, deduplicate with SafetyChecker
320 std::vector<std::string> CommunicationDeterminismChecker::getTextualTrace() // override
321 {
322   std::vector<std::string> trace;
323   for (auto const& state : stack_) {
324     int value;
325     smx_simcall_t req = MC_state_get_executed_request(state.get(), &value);
326     if (req) {
327       char* req_str = simgrid::mc::request_to_string(
328         req, value, simgrid::mc::RequestType::executed);
329       trace.push_back(req_str);
330       xbt_free(req_str);
331     }
332   }
333   return trace;
334 }
335
336 void CommunicationDeterminismChecker::prepare()
337 {
338
339   int i;
340   const int maxpid = MC_smx_get_maxpid();
341
342   // Create initial_communications_pattern elements:
343   initial_communications_pattern = xbt_dynar_new(sizeof(mc_list_comm_pattern_t), MC_list_comm_pattern_free_voidp);
344   for (i=0; i < maxpid; i++){
345     mc_list_comm_pattern_t process_list_pattern = xbt_new0(s_mc_list_comm_pattern_t, 1);
346     process_list_pattern->list = xbt_dynar_new(sizeof(mc_comm_pattern_t), MC_comm_pattern_free_voidp);
347     process_list_pattern->index_comm = 0;
348     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
349   }
350
351   // Create incomplete_communications_pattern elements:
352   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
353   for (i=0; i < maxpid; i++){
354     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(mc_comm_pattern_t), nullptr);
355     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
356   }
357
358   std::unique_ptr<simgrid::mc::State> initial_state =
359     std::unique_ptr<simgrid::mc::State>(MC_state_new());
360
361   XBT_DEBUG("********* Start communication determinism verification *********");
362
363   /* Wait for requests (schedules processes) */
364   mc_model_checker->wait_for_requests();
365
366   /* Get an enabled process and insert it in the interleave set of the initial state */
367   for (auto& p : mc_model_checker->process().simix_processes())
368     if (simgrid::mc::process_is_enabled(&p.copy))
369       MC_state_interleave_process(initial_state.get(), &p.copy);
370
371   stack_.push_back(std::move(initial_state));
372 }
373
374 static inline
375 bool all_communications_are_finished()
376 {
377   for (size_t current_process = 1; current_process < MC_smx_get_maxpid(); current_process++) {
378     xbt_dynar_t pattern = xbt_dynar_get_as(
379       incomplete_communications_pattern, current_process, xbt_dynar_t);
380     if (!xbt_dynar_is_empty(pattern)) {
381       XBT_DEBUG("Some communications are not finished, cannot stop the exploration ! State not visited.");
382       return false;
383     }
384   }
385   return true;
386 }
387
388 int CommunicationDeterminismChecker::main(void)
389 {
390
391   char *req_str = nullptr;
392   int value;
393   std::unique_ptr<simgrid::mc::VisitedState> visited_state = nullptr;
394   smx_simcall_t req = nullptr;
395
396   while (!stack_.empty()) {
397
398     /* Get current state */
399     simgrid::mc::State* state = stack_.back().get();
400
401     XBT_DEBUG("**************************************************");
402     XBT_DEBUG("Exploration depth = %zi (state = %d, interleaved processes = %zd)",
403               stack_.size(), state->num,
404               state->interleaveSize());
405
406     /* Update statistics */
407     mc_stats->visited_states++;
408
409     if (stack_.size() <= (std::size_t) _sg_mc_max_depth
410         && (req = MC_state_get_request(state, &value))
411         && (visited_state == nullptr)) {
412
413       req_str = simgrid::mc::request_to_string(req, value, simgrid::mc::RequestType::simix);
414       XBT_DEBUG("Execute: %s", req_str);
415       xbt_free(req_str);
416
417       if (dot_output != nullptr)
418         req_str = simgrid::mc::request_get_dot_output(req, value);
419
420       MC_state_set_executed_request(state, req, value);
421       mc_stats->executed_transitions++;
422
423       /* TODO : handle test and testany simcalls */
424       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
425       if (_sg_mc_comms_determinism || _sg_mc_send_determinism)
426         call = MC_get_call_type(req);
427
428       /* Answer the request */
429       simgrid::mc::handle_simcall(req, value);    /* After this call req is no longer useful */
430
431       if(!initial_global_state->initial_communications_pattern_done)
432         MC_handle_comm_pattern(call, req, value, initial_communications_pattern, 0);
433       else
434         MC_handle_comm_pattern(call, req, value, nullptr, 0);
435
436       /* Wait for requests (schedules processes) */
437       mc_model_checker->wait_for_requests();
438
439       /* Create the new expanded state */
440       std::unique_ptr<simgrid::mc::State> next_state =
441         std::unique_ptr<simgrid::mc::State>(MC_state_new());
442
443       /* If comm determinism verification, we cannot stop the exploration if
444          some communications are not finished (at least, data are transfered).
445          These communications  are incomplete and they cannot be analyzed and
446          compared with the initial pattern. */
447       bool compare_snapshots = all_communications_are_finished()
448         && initial_global_state->initial_communications_pattern_done;
449
450       if (_sg_mc_visited == 0
451           || (visited_state = visitedStates_.addVisitedState(next_state.get(), compare_snapshots)) == nullptr) {
452
453         /* Get enabled processes and insert them in the interleave set of the next state */
454         for (auto& p : mc_model_checker->process().simix_processes())
455           if (simgrid::mc::process_is_enabled(&p.copy))
456             MC_state_interleave_process(next_state.get(), &p.copy);
457
458         if (dot_output != nullptr)
459           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num,  next_state->num, req_str);
460
461       } else if (dot_output != nullptr)
462         fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n",
463           state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str);
464
465       stack_.push_back(std::move(next_state));
466
467       if (dot_output != nullptr)
468         xbt_free(req_str);
469
470     } else {
471
472       if (stack_.size() > (std::size_t) _sg_mc_max_depth)
473         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
474       else if (visited_state != nullptr)
475         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
476       else
477         XBT_DEBUG("There are no more processes to interleave. (depth %zi)",
478           stack_.size());
479
480       if (!initial_global_state->initial_communications_pattern_done)
481         initial_global_state->initial_communications_pattern_done = 1;
482
483       /* Trash the current state, no longer needed */
484       XBT_DEBUG("Delete state %d at depth %zi",
485         state->num, stack_.size());
486       stack_.pop_back();
487
488       visited_state = nullptr;
489
490       /* Check for deadlocks */
491       if (mc_model_checker->checkDeadlock()) {
492         MC_show_deadlock();
493         return SIMGRID_MC_EXIT_DEADLOCK;
494       }
495
496       while (!stack_.empty()) {
497         std::unique_ptr<simgrid::mc::State> state = std::move(stack_.back());
498         stack_.pop_back();
499         if (state->interleaveSize()
500             && stack_.size() < (std::size_t) _sg_mc_max_depth) {
501           /* We found a back-tracking point, let's loop */
502           XBT_DEBUG("Back-tracking to state %d at depth %zi",
503             state->num, stack_.size() + 1);
504           stack_.push_back(std::move(state));
505
506           simgrid::mc::replay(stack_);
507
508           XBT_DEBUG("Back-tracking to state %d at depth %zi done",
509             stack_.back()->num, stack_.size());
510
511           break;
512         } else {
513           XBT_DEBUG("Delete state %d at depth %zi",
514             state->num, stack_.size() + 1);
515         }
516       }
517     }
518   }
519
520   MC_print_statistics(mc_stats);
521   return SIMGRID_MC_EXIT_SUCCESS;
522 }
523
524 int CommunicationDeterminismChecker::run()
525 {
526   XBT_INFO("Check communication determinism");
527   mc_model_checker->wait_for_requests();
528
529   if (mc_mode == MC_MODE_CLIENT)
530     // This will move somehwere else:
531     simgrid::mc::Client::get()->handleMessages();
532
533   this->prepare();
534
535   initial_global_state = std::unique_ptr<s_mc_global_t>(new s_mc_global_t());
536   initial_global_state->snapshot = simgrid::mc::take_snapshot(0);
537   initial_global_state->initial_communications_pattern_done = 0;
538   initial_global_state->recv_deterministic = 1;
539   initial_global_state->send_deterministic = 1;
540   initial_global_state->recv_diff = nullptr;
541   initial_global_state->send_diff = nullptr;
542
543   int res = this->main();
544   initial_global_state = nullptr;
545   return res;
546 }
547
548 Checker* createCommunicationDeterminismChecker(Session& session)
549 {
550   return new CommunicationDeterminismChecker(session);
551 }
552
553 }
554 }