Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
[mc] Move the stack as field of SafetyChecker and CommDetChecker
[simgrid.git] / src / mc / CommunicationDeterminismChecker.cpp
1 /* Copyright (c) 2008-2015. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include <cstdint>
8
9 #include <xbt/dynar.h>
10 #include <xbt/dynar.hpp>
11 #include <xbt/fifo.h>
12 #include <xbt/log.h>
13 #include <xbt/sysdep.h>
14
15 #include "src/mc/mc_state.h"
16 #include "src/mc/mc_comm_pattern.h"
17 #include "src/mc/mc_request.h"
18 #include "src/mc/mc_safety.h"
19 #include "src/mc/mc_private.h"
20 #include "src/mc/mc_record.h"
21 #include "src/mc/mc_smx.h"
22 #include "src/mc/Client.hpp"
23 #include "src/mc/CommunicationDeterminismChecker.hpp"
24 #include "src/mc/mc_exit.h"
25
26 using simgrid::mc::remote;
27
28 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
29                                 "Logging specific to MC communication determinism detection");
30
31 /********** Global variables **********/
32
33 xbt_dynar_t initial_communications_pattern;
34 xbt_dynar_t incomplete_communications_pattern;
35
36 /********** Static functions ***********/
37
38 static e_mc_comm_pattern_difference_t compare_comm_pattern(mc_comm_pattern_t comm1, mc_comm_pattern_t comm2) {
39   if(comm1->type != comm2->type)
40     return TYPE_DIFF;
41   if (strcmp(comm1->rdv, comm2->rdv) != 0)
42     return RDV_DIFF;
43   if (comm1->src_proc != comm2->src_proc)
44     return SRC_PROC_DIFF;
45   if (comm1->dst_proc != comm2->dst_proc)
46     return DST_PROC_DIFF;
47   if (comm1->tag != comm2->tag)
48     return TAG_DIFF;
49   if (comm1->data_size != comm2->data_size)
50     return DATA_SIZE_DIFF;
51   if(comm1->data == nullptr && comm2->data == NULL)
52     return NONE_DIFF;
53   if(comm1->data != nullptr && comm2->data !=NULL) {
54     if (!memcmp(comm1->data, comm2->data, comm1->data_size))
55       return NONE_DIFF;
56     return DATA_DIFF;
57   } else
58     return DATA_DIFF;
59   return NONE_DIFF;
60 }
61
62 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, mc_comm_pattern_t comm, unsigned int cursor) {
63   char *type, *res;
64
65   if(comm->type == SIMIX_COMM_SEND)
66     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
67   else
68     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
69
70   switch(diff) {
71   case TYPE_DIFF:
72     res = bprintf("%s Different type for communication #%d", type, cursor);
73     break;
74   case RDV_DIFF:
75     res = bprintf("%s Different rdv for communication #%d", type, cursor);
76     break;
77   case TAG_DIFF:
78     res = bprintf("%s Different tag for communication #%d", type, cursor);
79     break;
80   case SRC_PROC_DIFF:
81       res = bprintf("%s Different source for communication #%d", type, cursor);
82     break;
83   case DST_PROC_DIFF:
84       res = bprintf("%s Different destination for communication #%d", type, cursor);
85     break;
86   case DATA_SIZE_DIFF:
87     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
88     break;
89   case DATA_DIFF:
90     res = bprintf("%s\n Different data for communication #%d", type, cursor);
91     break;
92   default:
93     res = nullptr;
94     break;
95   }
96
97   return res;
98 }
99
100 static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm_addr)
101 {
102   s_smx_synchro_t comm;
103   mc_model_checker->process().read(&comm, remote(comm_addr));
104
105   smx_process_t src_proc = MC_smx_resolve_process(comm.comm.src_proc);
106   smx_process_t dst_proc = MC_smx_resolve_process(comm.comm.dst_proc);
107   comm_pattern->src_proc = src_proc->pid;
108   comm_pattern->dst_proc = dst_proc->pid;
109   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
110   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
111   if (comm_pattern->data_size == -1 && comm.comm.src_buff != nullptr) {
112     size_t buff_size;
113     mc_model_checker->process().read(
114       &buff_size, remote(comm.comm.dst_buff_size));
115     comm_pattern->data_size = buff_size;
116     comm_pattern->data = xbt_malloc0(comm_pattern->data_size);
117     mc_model_checker->process().read_bytes(
118       comm_pattern->data, comm_pattern->data_size,
119       remote(comm.comm.src_buff));
120   }
121 }
122
123 static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int backtracking) {
124
125   mc_list_comm_pattern_t list =
126     xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t);
127
128   if(!backtracking){
129     mc_comm_pattern_t initial_comm =
130       xbt_dynar_get_as(list->list, list->index_comm, mc_comm_pattern_t);
131     e_mc_comm_pattern_difference_t diff =
132       compare_comm_pattern(initial_comm, comm);
133
134     if (diff != NONE_DIFF) {
135       if (comm->type == SIMIX_COMM_SEND){
136         simgrid::mc::initial_global_state->send_deterministic = 0;
137         if(simgrid::mc::initial_global_state->send_diff != nullptr)
138           xbt_free(simgrid::mc::initial_global_state->send_diff);
139         simgrid::mc::initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
140       }else{
141         simgrid::mc::initial_global_state->recv_deterministic = 0;
142         if(simgrid::mc::initial_global_state->recv_diff != nullptr)
143           xbt_free(simgrid::mc::initial_global_state->recv_diff);
144         simgrid::mc::initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
145       }
146       if(_sg_mc_send_determinism && !simgrid::mc::initial_global_state->send_deterministic){
147         XBT_INFO("*********************************************************");
148         XBT_INFO("***** Non-send-deterministic communications pattern *****");
149         XBT_INFO("*********************************************************");
150         XBT_INFO("%s", simgrid::mc::initial_global_state->send_diff);
151         xbt_free(simgrid::mc::initial_global_state->send_diff);
152         simgrid::mc::initial_global_state->send_diff = nullptr;
153         MC_print_statistics(mc_stats);
154         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
155       }else if(_sg_mc_comms_determinism
156           && (!simgrid::mc::initial_global_state->send_deterministic
157             && !simgrid::mc::initial_global_state->recv_deterministic)) {
158         XBT_INFO("****************************************************");
159         XBT_INFO("***** Non-deterministic communications pattern *****");
160         XBT_INFO("****************************************************");
161         XBT_INFO("%s", simgrid::mc::initial_global_state->send_diff);
162         XBT_INFO("%s", simgrid::mc::initial_global_state->recv_diff);
163         xbt_free(simgrid::mc::initial_global_state->send_diff);
164         simgrid::mc::initial_global_state->send_diff = nullptr;
165         xbt_free(simgrid::mc::initial_global_state->recv_diff);
166         simgrid::mc::initial_global_state->recv_diff = nullptr;
167         MC_print_statistics(mc_stats);
168         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
169       }
170     }
171   }
172
173   MC_comm_pattern_free(comm);
174
175 }
176
177 /********** Non Static functions ***********/
178
179 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
180 {
181   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
182   mc_list_comm_pattern_t initial_pattern = xbt_dynar_get_as(
183     initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t);
184   xbt_dynar_t incomplete_pattern = xbt_dynar_get_as(
185     incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
186
187   mc_comm_pattern_t pattern = xbt_new0(s_mc_comm_pattern_t, 1);
188   pattern->data_size = -1;
189   pattern->data = nullptr;
190   pattern->index =
191     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
192
193   if (call_type == MC_CALL_TYPE_SEND) {
194     /* Create comm pattern */
195     pattern->type = SIMIX_COMM_SEND;
196     pattern->comm_addr = simcall_comm_isend__get__result(request);
197
198     s_smx_synchro_t synchro = mc_model_checker->process().read<s_smx_synchro_t>(
199       (std::uint64_t) pattern->comm_addr);
200
201     char* remote_name = mc_model_checker->process().read<char*>(
202       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
203     pattern->rdv = mc_model_checker->process().read_string(remote_name);
204     pattern->src_proc = MC_smx_resolve_process(synchro.comm.src_proc)->pid;
205     pattern->src_host = MC_smx_process_get_host_name(issuer);
206
207     struct s_smpi_mpi_request mpi_request =
208       mc_model_checker->process().read<s_smpi_mpi_request>(
209         (std::uint64_t) simcall_comm_isend__get__data(request));
210     pattern->tag = mpi_request.tag;
211
212     if(synchro.comm.src_buff != nullptr){
213       pattern->data_size = synchro.comm.src_buff_size;
214       pattern->data = xbt_malloc0(pattern->data_size);
215       mc_model_checker->process().read_bytes(
216         pattern->data, pattern->data_size, remote(synchro.comm.src_buff));
217     }
218     if(mpi_request.detached){
219       if (!simgrid::mc::initial_global_state->initial_communications_pattern_done) {
220         /* Store comm pattern */
221         xbt_dynar_push(
222           xbt_dynar_get_as(
223             initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
224           )->list,
225           &pattern);
226       } else {
227         /* Evaluate comm determinism */
228         deterministic_comm_pattern(pattern->src_proc, pattern, backtracking);
229         xbt_dynar_get_as(
230           initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
231         )->index_comm++;
232       }
233       return;
234     }
235   } else if (call_type == MC_CALL_TYPE_RECV) {
236     pattern->type = SIMIX_COMM_RECEIVE;
237     pattern->comm_addr = simcall_comm_irecv__get__result(request);
238
239     struct s_smpi_mpi_request mpi_request;
240     mc_model_checker->process().read(
241       &mpi_request, remote((struct s_smpi_mpi_request*)simcall_comm_irecv__get__data(request)));
242     pattern->tag = mpi_request.tag;
243
244     s_smx_synchro_t synchro;
245     mc_model_checker->process().read(&synchro, remote(pattern->comm_addr));
246
247     char* remote_name;
248     mc_model_checker->process().read(&remote_name,
249       remote(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
250     pattern->rdv = mc_model_checker->process().read_string(remote_name);
251     pattern->dst_proc = MC_smx_resolve_process(synchro.comm.dst_proc)->pid;
252     pattern->dst_host = MC_smx_process_get_host_name(issuer);
253   } else
254     xbt_die("Unexpected call_type %i", (int) call_type);
255
256   xbt_dynar_push(
257     xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t),
258     &pattern);
259
260   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu", pattern, issuer->pid);
261 }
262
263 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) {
264   mc_comm_pattern_t current_comm_pattern;
265   unsigned int cursor = 0;
266   mc_comm_pattern_t comm_pattern;
267   int completed = 0;
268
269   /* Complete comm pattern */
270   xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern)
271     if (current_comm_pattern->comm_addr == comm_addr) {
272       update_comm_pattern(current_comm_pattern, comm_addr);
273       completed = 1;
274       xbt_dynar_remove_at(
275         xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t),
276         cursor, &comm_pattern);
277       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
278       break;
279     }
280
281   if(!completed)
282     xbt_die("Corresponding communication not found!");
283
284   mc_list_comm_pattern_t pattern = xbt_dynar_get_as(
285     initial_communications_pattern, issuer, mc_list_comm_pattern_t);
286
287   if (!simgrid::mc::initial_global_state->initial_communications_pattern_done)
288     /* Store comm pattern */
289     xbt_dynar_push(pattern->list, &comm_pattern);
290   else {
291     /* Evaluate comm determinism */
292     deterministic_comm_pattern(issuer, comm_pattern, backtracking);
293     pattern->index_comm++;
294   }
295 }
296
297
298 /************************ Main algorithm ************************/
299
300 namespace simgrid {
301 namespace mc {
302
303 CommunicationDeterminismChecker::CommunicationDeterminismChecker(Session& session)
304   : Checker(session)
305 {
306
307 }
308
309 CommunicationDeterminismChecker::~CommunicationDeterminismChecker()
310 {
311
312 }
313
314 // TODO, deduplicate with SafetyChecker
315 RecordTrace CommunicationDeterminismChecker::getRecordTrace() // override
316 {
317   RecordTrace res;
318   for (simgrid::mc::State* state : stack_) {
319     int value = 0;
320     smx_simcall_t saved_req = MC_state_get_executed_request(state, &value);
321     const smx_process_t issuer = MC_smx_simcall_get_issuer(saved_req);
322     const int pid = issuer->pid;
323     res.push_back(RecordTraceElement(pid, value));
324   }
325   return res;
326 }
327
328 // TODO, deduplicate with SafetyChecker
329 std::vector<std::string> CommunicationDeterminismChecker::getTextualTrace() // override
330 {
331   std::vector<std::string> trace;
332   for (simgrid::mc::State* state : stack_) {
333     int value;
334     smx_simcall_t req = MC_state_get_executed_request(state, &value);
335     if (req) {
336       char* req_str = simgrid::mc::request_to_string(
337         req, value, simgrid::mc::RequestType::executed);
338       trace.push_back(req_str);
339       xbt_free(req_str);
340     }
341   }
342   return trace;
343 }
344
345 void CommunicationDeterminismChecker::prepare()
346 {
347   simgrid::mc::State* initial_state = nullptr;
348   int i;
349   const int maxpid = MC_smx_get_maxpid();
350
351   simgrid::mc::visited_states.clear();
352
353   // Create initial_communications_pattern elements:
354   initial_communications_pattern = xbt_dynar_new(sizeof(mc_list_comm_pattern_t), MC_list_comm_pattern_free_voidp);
355   for (i=0; i < maxpid; i++){
356     mc_list_comm_pattern_t process_list_pattern = xbt_new0(s_mc_list_comm_pattern_t, 1);
357     process_list_pattern->list = xbt_dynar_new(sizeof(mc_comm_pattern_t), MC_comm_pattern_free_voidp);
358     process_list_pattern->index_comm = 0;
359     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
360   }
361
362   // Create incomplete_communications_pattern elements:
363   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
364   for (i=0; i < maxpid; i++){
365     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(mc_comm_pattern_t), nullptr);
366     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
367   }
368
369   initial_state = MC_state_new();
370
371   XBT_DEBUG("********* Start communication determinism verification *********");
372
373   /* Wait for requests (schedules processes) */
374   mc_model_checker->wait_for_requests();
375
376   /* Get an enabled process and insert it in the interleave set of the initial state */
377   for (auto& p : mc_model_checker->process().simix_processes())
378     if (simgrid::mc::process_is_enabled(&p.copy))
379       MC_state_interleave_process(initial_state, &p.copy);
380
381   stack_.push_back(initial_state);
382 }
383
384 static inline
385 bool all_communications_are_finished()
386 {
387   for (size_t current_process = 1; current_process < MC_smx_get_maxpid(); current_process++) {
388     xbt_dynar_t pattern = xbt_dynar_get_as(
389       incomplete_communications_pattern, current_process, xbt_dynar_t);
390     if (!xbt_dynar_is_empty(pattern)) {
391       XBT_DEBUG("Some communications are not finished, cannot stop the exploration ! State not visited.");
392       return false;
393     }
394   }
395   return true;
396 }
397
398 int CommunicationDeterminismChecker::main(void)
399 {
400
401   char *req_str = nullptr;
402   int value;
403   std::unique_ptr<simgrid::mc::VisitedState> visited_state = nullptr;
404   smx_simcall_t req = nullptr;
405   simgrid::mc::State* next_state = nullptr;
406
407   while (!stack_.empty()) {
408
409     /* Get current state */
410     simgrid::mc::State* state = stack_.back();
411
412     XBT_DEBUG("**************************************************");
413     XBT_DEBUG("Exploration depth = %zi (state = %d, interleaved processes = %d)",
414               stack_.size(), state->num,
415               MC_state_interleave_size(state));
416
417     /* Update statistics */
418     mc_stats->visited_states++;
419
420     if (stack_.size() <= _sg_mc_max_depth
421         && (req = MC_state_get_request(state, &value))
422         && (visited_state == nullptr)) {
423
424       req_str = simgrid::mc::request_to_string(req, value, simgrid::mc::RequestType::simix);
425       XBT_DEBUG("Execute: %s", req_str);
426       xbt_free(req_str);
427
428       if (dot_output != nullptr)
429         req_str = simgrid::mc::request_get_dot_output(req, value);
430
431       MC_state_set_executed_request(state, req, value);
432       mc_stats->executed_transitions++;
433
434       /* TODO : handle test and testany simcalls */
435       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
436       if (_sg_mc_comms_determinism || _sg_mc_send_determinism)
437         call = MC_get_call_type(req);
438
439       /* Answer the request */
440       simgrid::mc::handle_simcall(req, value);    /* After this call req is no longer useful */
441
442       if(!initial_global_state->initial_communications_pattern_done)
443         MC_handle_comm_pattern(call, req, value, initial_communications_pattern, 0);
444       else
445         MC_handle_comm_pattern(call, req, value, nullptr, 0);
446
447       /* Wait for requests (schedules processes) */
448       mc_model_checker->wait_for_requests();
449
450       /* Create the new expanded state */
451       next_state = MC_state_new();
452
453       /* If comm determinism verification, we cannot stop the exploration if
454          some communications are not finished (at least, data are transfered).
455          These communications  are incomplete and they cannot be analyzed and
456          compared with the initial pattern. */
457       bool compare_snapshots = all_communications_are_finished()
458         && initial_global_state->initial_communications_pattern_done;
459
460       if (_sg_mc_visited == 0 || (visited_state = simgrid::mc::is_visited_state(next_state, compare_snapshots)) == nullptr) {
461
462         /* Get enabled processes and insert them in the interleave set of the next state */
463         for (auto& p : mc_model_checker->process().simix_processes())
464           if (simgrid::mc::process_is_enabled(&p.copy))
465             MC_state_interleave_process(next_state, &p.copy);
466
467         if (dot_output != nullptr)
468           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num,  next_state->num, req_str);
469
470       } else if (dot_output != nullptr)
471         fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n",
472           state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str);
473
474       stack_.push_back(next_state);
475
476       if (dot_output != nullptr)
477         xbt_free(req_str);
478
479     } else {
480
481       if (stack_.size() > _sg_mc_max_depth)
482         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
483       else if (visited_state != nullptr)
484         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
485       else
486         XBT_DEBUG("There are no more processes to interleave. (depth %zi)",
487           stack_.size());
488
489       if (!initial_global_state->initial_communications_pattern_done)
490         initial_global_state->initial_communications_pattern_done = 1;
491
492       /* Trash the current state, no longer needed */
493       stack_.pop_back();
494       MC_state_delete(state, !state->in_visited_states ? 1 : 0);
495       XBT_DEBUG("Delete state %d at depth %zi",
496         state->num, stack_.size() + 1);
497
498       visited_state = nullptr;
499
500       /* Check for deadlocks */
501       if (mc_model_checker->checkDeadlock()) {
502         MC_show_deadlock();
503         return SIMGRID_MC_EXIT_DEADLOCK;
504       }
505
506       while (!stack_.empty()) {
507         simgrid::mc::State* state = stack_.back();
508         stack_.pop_back();
509         if (MC_state_interleave_size(state)
510             && stack_.size() < _sg_mc_max_depth) {
511           /* We found a back-tracking point, let's loop */
512           XBT_DEBUG("Back-tracking to state %d at depth %zi",
513             state->num, stack_.size() + 1);
514           stack_.push_back(state);
515
516           simgrid::mc::replay(stack_);
517
518           XBT_DEBUG("Back-tracking to state %d at depth %zi done",
519             state->num, stack_.size());
520
521           break;
522         } else {
523           XBT_DEBUG("Delete state %d at depth %zi",
524             state->num, stack_.size() + 1);
525           MC_state_delete(state, !state->in_visited_states ? 1 : 0);
526         }
527       }
528     }
529   }
530
531   MC_print_statistics(mc_stats);
532   return SIMGRID_MC_EXIT_SUCCESS;
533 }
534
535 int CommunicationDeterminismChecker::run()
536 {
537   XBT_INFO("Check communication determinism");
538   mc_model_checker->wait_for_requests();
539
540   if (mc_mode == MC_MODE_CLIENT)
541     // This will move somehwere else:
542     simgrid::mc::Client::get()->handleMessages();
543
544   this->prepare();
545
546   initial_global_state = std::unique_ptr<s_mc_global_t>(new s_mc_global_t());
547   initial_global_state->snapshot = simgrid::mc::take_snapshot(0);
548   initial_global_state->initial_communications_pattern_done = 0;
549   initial_global_state->recv_deterministic = 1;
550   initial_global_state->send_deterministic = 1;
551   initial_global_state->recv_diff = nullptr;
552   initial_global_state->send_diff = nullptr;
553
554   int res = this->main();
555   initial_global_state = nullptr;
556   return res;
557 }
558
559 }
560 }