Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
fc90234a01b581e014ef9dca5d8da2318218a3bd
[simgrid.git] / src / mc / CommunicationDeterminismChecker.cpp
1 /* Copyright (c) 2008-2015. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include <cstdint>
8
9 #include <xbt/dynar.h>
10 #include <xbt/dynar.hpp>
11 #include <xbt/fifo.h>
12 #include <xbt/log.h>
13 #include <xbt/sysdep.h>
14
15 #include "src/mc/mc_state.h"
16 #include "src/mc/mc_comm_pattern.h"
17 #include "src/mc/mc_request.h"
18 #include "src/mc/mc_safety.h"
19 #include "src/mc/mc_private.h"
20 #include "src/mc/mc_record.h"
21 #include "src/mc/mc_smx.h"
22 #include "src/mc/Client.hpp"
23 #include "src/mc/CommunicationDeterminismChecker.hpp"
24 #include "src/mc/mc_exit.h"
25
26 using simgrid::mc::remote;
27
28 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
29                                 "Logging specific to MC communication determinism detection");
30
31 XBT_PRIVATE static std::list<simgrid::mc::State*> mc_stack;
32
33 /********** Global variables **********/
34
35 xbt_dynar_t initial_communications_pattern;
36 xbt_dynar_t incomplete_communications_pattern;
37
38 /********** Static functions ***********/
39
40 static e_mc_comm_pattern_difference_t compare_comm_pattern(mc_comm_pattern_t comm1, mc_comm_pattern_t comm2) {
41   if(comm1->type != comm2->type)
42     return TYPE_DIFF;
43   if (strcmp(comm1->rdv, comm2->rdv) != 0)
44     return RDV_DIFF;
45   if (comm1->src_proc != comm2->src_proc)
46     return SRC_PROC_DIFF;
47   if (comm1->dst_proc != comm2->dst_proc)
48     return DST_PROC_DIFF;
49   if (comm1->tag != comm2->tag)
50     return TAG_DIFF;
51   if (comm1->data_size != comm2->data_size)
52     return DATA_SIZE_DIFF;
53   if(comm1->data == nullptr && comm2->data == NULL)
54     return NONE_DIFF;
55   if(comm1->data != nullptr && comm2->data !=NULL) {
56     if (!memcmp(comm1->data, comm2->data, comm1->data_size))
57       return NONE_DIFF;
58     return DATA_DIFF;
59   } else
60     return DATA_DIFF;
61   return NONE_DIFF;
62 }
63
64 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, mc_comm_pattern_t comm, unsigned int cursor) {
65   char *type, *res;
66
67   if(comm->type == SIMIX_COMM_SEND)
68     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
69   else
70     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
71
72   switch(diff) {
73   case TYPE_DIFF:
74     res = bprintf("%s Different type for communication #%d", type, cursor);
75     break;
76   case RDV_DIFF:
77     res = bprintf("%s Different rdv for communication #%d", type, cursor);
78     break;
79   case TAG_DIFF:
80     res = bprintf("%s Different tag for communication #%d", type, cursor);
81     break;
82   case SRC_PROC_DIFF:
83       res = bprintf("%s Different source for communication #%d", type, cursor);
84     break;
85   case DST_PROC_DIFF:
86       res = bprintf("%s Different destination for communication #%d", type, cursor);
87     break;
88   case DATA_SIZE_DIFF:
89     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
90     break;
91   case DATA_DIFF:
92     res = bprintf("%s\n Different data for communication #%d", type, cursor);
93     break;
94   default:
95     res = nullptr;
96     break;
97   }
98
99   return res;
100 }
101
102 static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm_addr)
103 {
104   s_smx_synchro_t comm;
105   mc_model_checker->process().read(&comm, remote(comm_addr));
106
107   smx_process_t src_proc = MC_smx_resolve_process(comm.comm.src_proc);
108   smx_process_t dst_proc = MC_smx_resolve_process(comm.comm.dst_proc);
109   comm_pattern->src_proc = src_proc->pid;
110   comm_pattern->dst_proc = dst_proc->pid;
111   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
112   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
113   if (comm_pattern->data_size == -1 && comm.comm.src_buff != nullptr) {
114     size_t buff_size;
115     mc_model_checker->process().read(
116       &buff_size, remote(comm.comm.dst_buff_size));
117     comm_pattern->data_size = buff_size;
118     comm_pattern->data = xbt_malloc0(comm_pattern->data_size);
119     mc_model_checker->process().read_bytes(
120       comm_pattern->data, comm_pattern->data_size,
121       remote(comm.comm.src_buff));
122   }
123 }
124
125 static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int backtracking) {
126
127   mc_list_comm_pattern_t list =
128     xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t);
129
130   if(!backtracking){
131     mc_comm_pattern_t initial_comm =
132       xbt_dynar_get_as(list->list, list->index_comm, mc_comm_pattern_t);
133     e_mc_comm_pattern_difference_t diff =
134       compare_comm_pattern(initial_comm, comm);
135
136     if (diff != NONE_DIFF) {
137       if (comm->type == SIMIX_COMM_SEND){
138         simgrid::mc::initial_global_state->send_deterministic = 0;
139         if(simgrid::mc::initial_global_state->send_diff != nullptr)
140           xbt_free(simgrid::mc::initial_global_state->send_diff);
141         simgrid::mc::initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
142       }else{
143         simgrid::mc::initial_global_state->recv_deterministic = 0;
144         if(simgrid::mc::initial_global_state->recv_diff != nullptr)
145           xbt_free(simgrid::mc::initial_global_state->recv_diff);
146         simgrid::mc::initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
147       }
148       if(_sg_mc_send_determinism && !simgrid::mc::initial_global_state->send_deterministic){
149         XBT_INFO("*********************************************************");
150         XBT_INFO("***** Non-send-deterministic communications pattern *****");
151         XBT_INFO("*********************************************************");
152         XBT_INFO("%s", simgrid::mc::initial_global_state->send_diff);
153         xbt_free(simgrid::mc::initial_global_state->send_diff);
154         simgrid::mc::initial_global_state->send_diff = nullptr;
155         MC_print_statistics(mc_stats);
156         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
157       }else if(_sg_mc_comms_determinism
158           && (!simgrid::mc::initial_global_state->send_deterministic
159             && !simgrid::mc::initial_global_state->recv_deterministic)) {
160         XBT_INFO("****************************************************");
161         XBT_INFO("***** Non-deterministic communications pattern *****");
162         XBT_INFO("****************************************************");
163         XBT_INFO("%s", simgrid::mc::initial_global_state->send_diff);
164         XBT_INFO("%s", simgrid::mc::initial_global_state->recv_diff);
165         xbt_free(simgrid::mc::initial_global_state->send_diff);
166         simgrid::mc::initial_global_state->send_diff = nullptr;
167         xbt_free(simgrid::mc::initial_global_state->recv_diff);
168         simgrid::mc::initial_global_state->recv_diff = nullptr;
169         MC_print_statistics(mc_stats);
170         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
171       }
172     }
173   }
174
175   MC_comm_pattern_free(comm);
176
177 }
178
179 /********** Non Static functions ***********/
180
181 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
182 {
183   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
184   mc_list_comm_pattern_t initial_pattern = xbt_dynar_get_as(
185     initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t);
186   xbt_dynar_t incomplete_pattern = xbt_dynar_get_as(
187     incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
188
189   mc_comm_pattern_t pattern = xbt_new0(s_mc_comm_pattern_t, 1);
190   pattern->data_size = -1;
191   pattern->data = nullptr;
192   pattern->index =
193     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
194
195   if (call_type == MC_CALL_TYPE_SEND) {
196     /* Create comm pattern */
197     pattern->type = SIMIX_COMM_SEND;
198     pattern->comm_addr = simcall_comm_isend__get__result(request);
199
200     s_smx_synchro_t synchro = mc_model_checker->process().read<s_smx_synchro_t>(
201       (std::uint64_t) pattern->comm_addr);
202
203     char* remote_name = mc_model_checker->process().read<char*>(
204       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
205     pattern->rdv = mc_model_checker->process().read_string(remote_name);
206     pattern->src_proc = MC_smx_resolve_process(synchro.comm.src_proc)->pid;
207     pattern->src_host = MC_smx_process_get_host_name(issuer);
208
209     struct s_smpi_mpi_request mpi_request =
210       mc_model_checker->process().read<s_smpi_mpi_request>(
211         (std::uint64_t) simcall_comm_isend__get__data(request));
212     pattern->tag = mpi_request.tag;
213
214     if(synchro.comm.src_buff != nullptr){
215       pattern->data_size = synchro.comm.src_buff_size;
216       pattern->data = xbt_malloc0(pattern->data_size);
217       mc_model_checker->process().read_bytes(
218         pattern->data, pattern->data_size, remote(synchro.comm.src_buff));
219     }
220     if(mpi_request.detached){
221       if (!simgrid::mc::initial_global_state->initial_communications_pattern_done) {
222         /* Store comm pattern */
223         xbt_dynar_push(
224           xbt_dynar_get_as(
225             initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
226           )->list,
227           &pattern);
228       } else {
229         /* Evaluate comm determinism */
230         deterministic_comm_pattern(pattern->src_proc, pattern, backtracking);
231         xbt_dynar_get_as(
232           initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
233         )->index_comm++;
234       }
235       return;
236     }
237   } else if (call_type == MC_CALL_TYPE_RECV) {
238     pattern->type = SIMIX_COMM_RECEIVE;
239     pattern->comm_addr = simcall_comm_irecv__get__result(request);
240
241     struct s_smpi_mpi_request mpi_request;
242     mc_model_checker->process().read(
243       &mpi_request, remote((struct s_smpi_mpi_request*)simcall_comm_irecv__get__data(request)));
244     pattern->tag = mpi_request.tag;
245
246     s_smx_synchro_t synchro;
247     mc_model_checker->process().read(&synchro, remote(pattern->comm_addr));
248
249     char* remote_name;
250     mc_model_checker->process().read(&remote_name,
251       remote(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
252     pattern->rdv = mc_model_checker->process().read_string(remote_name);
253     pattern->dst_proc = MC_smx_resolve_process(synchro.comm.dst_proc)->pid;
254     pattern->dst_host = MC_smx_process_get_host_name(issuer);
255   } else
256     xbt_die("Unexpected call_type %i", (int) call_type);
257
258   xbt_dynar_push(
259     xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t),
260     &pattern);
261
262   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu", pattern, issuer->pid);
263 }
264
265 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) {
266   mc_comm_pattern_t current_comm_pattern;
267   unsigned int cursor = 0;
268   mc_comm_pattern_t comm_pattern;
269   int completed = 0;
270
271   /* Complete comm pattern */
272   xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern)
273     if (current_comm_pattern->comm_addr == comm_addr) {
274       update_comm_pattern(current_comm_pattern, comm_addr);
275       completed = 1;
276       xbt_dynar_remove_at(
277         xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t),
278         cursor, &comm_pattern);
279       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
280       break;
281     }
282
283   if(!completed)
284     xbt_die("Corresponding communication not found!");
285
286   mc_list_comm_pattern_t pattern = xbt_dynar_get_as(
287     initial_communications_pattern, issuer, mc_list_comm_pattern_t);
288
289   if (!simgrid::mc::initial_global_state->initial_communications_pattern_done)
290     /* Store comm pattern */
291     xbt_dynar_push(pattern->list, &comm_pattern);
292   else {
293     /* Evaluate comm determinism */
294     deterministic_comm_pattern(issuer, comm_pattern, backtracking);
295     pattern->index_comm++;
296   }
297 }
298
299
300 /************************ Main algorithm ************************/
301
302 namespace simgrid {
303 namespace mc {
304
305 CommunicationDeterminismChecker::CommunicationDeterminismChecker(Session& session)
306   : Checker(session)
307 {
308
309 }
310
311 CommunicationDeterminismChecker::~CommunicationDeterminismChecker()
312 {
313
314 }
315
316 // TODO, deduplicate with SafetyChecker
317 RecordTrace CommunicationDeterminismChecker::getRecordTrace() // override
318 {
319   RecordTrace res;
320   for (simgrid::mc::State* state : mc_stack) {
321     int value = 0;
322     smx_simcall_t saved_req = MC_state_get_executed_request(state, &value);
323     const smx_process_t issuer = MC_smx_simcall_get_issuer(saved_req);
324     const int pid = issuer->pid;
325     res.push_back(RecordTraceElement(pid, value));
326   }
327   return res;
328 }
329
330 // TODO, deduplicate with SafetyChecker
331 std::vector<std::string> CommunicationDeterminismChecker::getTextualTrace() // override
332 {
333   std::vector<std::string> trace;
334   for (simgrid::mc::State* state : mc_stack) {
335     int value;
336     smx_simcall_t req = MC_state_get_executed_request(state, &value);
337     if (req) {
338       char* req_str = simgrid::mc::request_to_string(
339         req, value, simgrid::mc::RequestType::executed);
340       trace.push_back(req_str);
341       xbt_free(req_str);
342     }
343   }
344   return trace;
345 }
346
347 void CommunicationDeterminismChecker::prepare()
348 {
349   simgrid::mc::State* initial_state = nullptr;
350   int i;
351   const int maxpid = MC_smx_get_maxpid();
352
353   simgrid::mc::visited_states.clear();
354
355   // Create initial_communications_pattern elements:
356   initial_communications_pattern = xbt_dynar_new(sizeof(mc_list_comm_pattern_t), MC_list_comm_pattern_free_voidp);
357   for (i=0; i < maxpid; i++){
358     mc_list_comm_pattern_t process_list_pattern = xbt_new0(s_mc_list_comm_pattern_t, 1);
359     process_list_pattern->list = xbt_dynar_new(sizeof(mc_comm_pattern_t), MC_comm_pattern_free_voidp);
360     process_list_pattern->index_comm = 0;
361     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
362   }
363
364   // Create incomplete_communications_pattern elements:
365   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
366   for (i=0; i < maxpid; i++){
367     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(mc_comm_pattern_t), nullptr);
368     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
369   }
370
371   initial_state = MC_state_new();
372
373   XBT_DEBUG("********* Start communication determinism verification *********");
374
375   /* Wait for requests (schedules processes) */
376   mc_model_checker->wait_for_requests();
377
378   /* Get an enabled process and insert it in the interleave set of the initial state */
379   for (auto& p : mc_model_checker->process().simix_processes())
380     if (simgrid::mc::process_is_enabled(&p.copy))
381       MC_state_interleave_process(initial_state, &p.copy);
382
383   mc_stack.push_back(initial_state);
384 }
385
386 static inline
387 bool all_communications_are_finished()
388 {
389   for (size_t current_process = 1; current_process < MC_smx_get_maxpid(); current_process++) {
390     xbt_dynar_t pattern = xbt_dynar_get_as(
391       incomplete_communications_pattern, current_process, xbt_dynar_t);
392     if (!xbt_dynar_is_empty(pattern)) {
393       XBT_DEBUG("Some communications are not finished, cannot stop the exploration ! State not visited.");
394       return false;
395     }
396   }
397   return true;
398 }
399
400 int CommunicationDeterminismChecker::main(void)
401 {
402
403   char *req_str = nullptr;
404   int value;
405   std::unique_ptr<simgrid::mc::VisitedState> visited_state = nullptr;
406   smx_simcall_t req = nullptr;
407   simgrid::mc::State* next_state = nullptr;
408
409   while (!mc_stack.empty()) {
410
411     /* Get current state */
412     simgrid::mc::State* state = mc_stack.back();
413
414     XBT_DEBUG("**************************************************");
415     XBT_DEBUG("Exploration depth = %zi (state = %d, interleaved processes = %d)",
416               mc_stack.size(), state->num,
417               MC_state_interleave_size(state));
418
419     /* Update statistics */
420     mc_stats->visited_states++;
421
422     if (mc_stack.size() <= _sg_mc_max_depth
423         && (req = MC_state_get_request(state, &value))
424         && (visited_state == nullptr)) {
425
426       req_str = simgrid::mc::request_to_string(req, value, simgrid::mc::RequestType::simix);
427       XBT_DEBUG("Execute: %s", req_str);
428       xbt_free(req_str);
429
430       if (dot_output != nullptr)
431         req_str = simgrid::mc::request_get_dot_output(req, value);
432
433       MC_state_set_executed_request(state, req, value);
434       mc_stats->executed_transitions++;
435
436       /* TODO : handle test and testany simcalls */
437       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
438       if (_sg_mc_comms_determinism || _sg_mc_send_determinism)
439         call = MC_get_call_type(req);
440
441       /* Answer the request */
442       simgrid::mc::handle_simcall(req, value);    /* After this call req is no longer useful */
443
444       if(!initial_global_state->initial_communications_pattern_done)
445         MC_handle_comm_pattern(call, req, value, initial_communications_pattern, 0);
446       else
447         MC_handle_comm_pattern(call, req, value, nullptr, 0);
448
449       /* Wait for requests (schedules processes) */
450       mc_model_checker->wait_for_requests();
451
452       /* Create the new expanded state */
453       next_state = MC_state_new();
454
455       /* If comm determinism verification, we cannot stop the exploration if
456          some communications are not finished (at least, data are transfered).
457          These communications  are incomplete and they cannot be analyzed and
458          compared with the initial pattern. */
459       bool compare_snapshots = all_communications_are_finished()
460         && initial_global_state->initial_communications_pattern_done;
461
462       if (_sg_mc_visited == 0 || (visited_state = simgrid::mc::is_visited_state(next_state, compare_snapshots)) == nullptr) {
463
464         /* Get enabled processes and insert them in the interleave set of the next state */
465         for (auto& p : mc_model_checker->process().simix_processes())
466           if (simgrid::mc::process_is_enabled(&p.copy))
467             MC_state_interleave_process(next_state, &p.copy);
468
469         if (dot_output != nullptr)
470           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num,  next_state->num, req_str);
471
472       } else if (dot_output != nullptr)
473         fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n",
474           state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str);
475
476       mc_stack.push_back(next_state);
477
478       if (dot_output != nullptr)
479         xbt_free(req_str);
480
481     } else {
482
483       if (mc_stack.size() > _sg_mc_max_depth)
484         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
485       else if (visited_state != nullptr)
486         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
487       else
488         XBT_DEBUG("There are no more processes to interleave. (depth %zi)",
489           mc_stack.size());
490
491       if (!initial_global_state->initial_communications_pattern_done)
492         initial_global_state->initial_communications_pattern_done = 1;
493
494       /* Trash the current state, no longer needed */
495       mc_stack.pop_back();
496       MC_state_delete(state, !state->in_visited_states ? 1 : 0);
497       XBT_DEBUG("Delete state %d at depth %zi",
498         state->num, mc_stack.size() + 1);
499
500       visited_state = nullptr;
501
502       /* Check for deadlocks */
503       if (mc_model_checker->checkDeadlock()) {
504         MC_show_deadlock();
505         return SIMGRID_MC_EXIT_DEADLOCK;
506       }
507
508       while (!mc_stack.empty()) {
509         simgrid::mc::State* state = mc_stack.back();
510         mc_stack.pop_back();
511         if (MC_state_interleave_size(state)
512             && mc_stack.size() < _sg_mc_max_depth) {
513           /* We found a back-tracking point, let's loop */
514           XBT_DEBUG("Back-tracking to state %d at depth %zi",
515             state->num, mc_stack.size() + 1);
516           mc_stack.push_back(state);
517
518           simgrid::mc::replay(mc_stack);
519
520           XBT_DEBUG("Back-tracking to state %d at depth %zi done",
521             state->num, mc_stack.size());
522
523           break;
524         } else {
525           XBT_DEBUG("Delete state %d at depth %zi",
526             state->num, mc_stack.size() + 1);
527           MC_state_delete(state, !state->in_visited_states ? 1 : 0);
528         }
529       }
530     }
531   }
532
533   MC_print_statistics(mc_stats);
534   return SIMGRID_MC_EXIT_SUCCESS;
535 }
536
537 int CommunicationDeterminismChecker::run()
538 {
539   XBT_INFO("Check communication determinism");
540   mc_model_checker->wait_for_requests();
541
542   if (mc_mode == MC_MODE_CLIENT)
543     // This will move somehwere else:
544     simgrid::mc::Client::get()->handleMessages();
545
546   /* Create exploration stack */
547   mc_stack.clear();
548
549   this->prepare();
550
551   initial_global_state = std::unique_ptr<s_mc_global_t>(new s_mc_global_t());
552   initial_global_state->snapshot = simgrid::mc::take_snapshot(0);
553   initial_global_state->initial_communications_pattern_done = 0;
554   initial_global_state->recv_deterministic = 1;
555   initial_global_state->send_deterministic = 1;
556   initial_global_state->recv_diff = nullptr;
557   initial_global_state->send_diff = nullptr;
558
559   int res = this->main();
560   initial_global_state = nullptr;
561   return res;
562 }
563
564 }
565 }