Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
935568da5fc75be20c9fb76ecb502845904c79f2
[simgrid.git] / src / mc / CommunicationDeterminismChecker.cpp
1 /* Copyright (c) 2008-2015. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include <cstdint>
8
9 #include <xbt/dynar.h>
10 #include <xbt/dynar.hpp>
11 #include <xbt/fifo.h>
12 #include <xbt/log.h>
13 #include <xbt/sysdep.h>
14
15 #include "src/mc/mc_state.h"
16 #include "src/mc/mc_comm_pattern.h"
17 #include "src/mc/mc_request.h"
18 #include "src/mc/mc_safety.h"
19 #include "src/mc/mc_private.h"
20 #include "src/mc/mc_record.h"
21 #include "src/mc/mc_smx.h"
22 #include "src/mc/Client.hpp"
23 #include "src/mc/CommunicationDeterminismChecker.hpp"
24 #include "src/mc/mc_exit.h"
25
26 using simgrid::mc::remote;
27
28 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
29                                 "Logging specific to MC communication determinism detection");
30
31 /********** Global variables **********/
32
33 xbt_dynar_t initial_communications_pattern;
34 xbt_dynar_t incomplete_communications_pattern;
35
36 /********** Static functions ***********/
37
38 static e_mc_comm_pattern_difference_t compare_comm_pattern(mc_comm_pattern_t comm1, mc_comm_pattern_t comm2) {
39   if(comm1->type != comm2->type)
40     return TYPE_DIFF;
41   if (strcmp(comm1->rdv, comm2->rdv) != 0)
42     return RDV_DIFF;
43   if (comm1->src_proc != comm2->src_proc)
44     return SRC_PROC_DIFF;
45   if (comm1->dst_proc != comm2->dst_proc)
46     return DST_PROC_DIFF;
47   if (comm1->tag != comm2->tag)
48     return TAG_DIFF;
49   if (comm1->data_size != comm2->data_size)
50     return DATA_SIZE_DIFF;
51   if(comm1->data == nullptr && comm2->data == NULL)
52     return NONE_DIFF;
53   if(comm1->data != nullptr && comm2->data !=NULL) {
54     if (!memcmp(comm1->data, comm2->data, comm1->data_size))
55       return NONE_DIFF;
56     return DATA_DIFF;
57   } else
58     return DATA_DIFF;
59   return NONE_DIFF;
60 }
61
62 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, mc_comm_pattern_t comm, unsigned int cursor) {
63   char *type, *res;
64
65   if(comm->type == SIMIX_COMM_SEND)
66     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
67   else
68     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
69
70   switch(diff) {
71   case TYPE_DIFF:
72     res = bprintf("%s Different type for communication #%d", type, cursor);
73     break;
74   case RDV_DIFF:
75     res = bprintf("%s Different rdv for communication #%d", type, cursor);
76     break;
77   case TAG_DIFF:
78     res = bprintf("%s Different tag for communication #%d", type, cursor);
79     break;
80   case SRC_PROC_DIFF:
81       res = bprintf("%s Different source for communication #%d", type, cursor);
82     break;
83   case DST_PROC_DIFF:
84       res = bprintf("%s Different destination for communication #%d", type, cursor);
85     break;
86   case DATA_SIZE_DIFF:
87     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
88     break;
89   case DATA_DIFF:
90     res = bprintf("%s\n Different data for communication #%d", type, cursor);
91     break;
92   default:
93     res = nullptr;
94     break;
95   }
96
97   return res;
98 }
99
100 static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm_addr)
101 {
102   s_smx_synchro_t comm;
103   mc_model_checker->process().read(&comm, remote(comm_addr));
104
105   smx_process_t src_proc = MC_smx_resolve_process(comm.comm.src_proc);
106   smx_process_t dst_proc = MC_smx_resolve_process(comm.comm.dst_proc);
107   comm_pattern->src_proc = src_proc->pid;
108   comm_pattern->dst_proc = dst_proc->pid;
109   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
110   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
111   if (comm_pattern->data_size == -1 && comm.comm.src_buff != nullptr) {
112     size_t buff_size;
113     mc_model_checker->process().read(
114       &buff_size, remote(comm.comm.dst_buff_size));
115     comm_pattern->data_size = buff_size;
116     comm_pattern->data = xbt_malloc0(comm_pattern->data_size);
117     mc_model_checker->process().read_bytes(
118       comm_pattern->data, comm_pattern->data_size,
119       remote(comm.comm.src_buff));
120   }
121 }
122
123 static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int backtracking) {
124
125   mc_list_comm_pattern_t list =
126     xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t);
127
128   if(!backtracking){
129     mc_comm_pattern_t initial_comm =
130       xbt_dynar_get_as(list->list, list->index_comm, mc_comm_pattern_t);
131     e_mc_comm_pattern_difference_t diff =
132       compare_comm_pattern(initial_comm, comm);
133
134     if (diff != NONE_DIFF) {
135       if (comm->type == SIMIX_COMM_SEND){
136         initial_global_state->send_deterministic = 0;
137         if(initial_global_state->send_diff != nullptr)
138           xbt_free(initial_global_state->send_diff);
139         initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
140       }else{
141         initial_global_state->recv_deterministic = 0;
142         if(initial_global_state->recv_diff != nullptr)
143           xbt_free(initial_global_state->recv_diff);
144         initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
145       }
146       if(_sg_mc_send_determinism && !initial_global_state->send_deterministic){
147         XBT_INFO("*********************************************************");
148         XBT_INFO("***** Non-send-deterministic communications pattern *****");
149         XBT_INFO("*********************************************************");
150         XBT_INFO("%s", initial_global_state->send_diff);
151         xbt_free(initial_global_state->send_diff);
152         initial_global_state->send_diff = nullptr;
153         MC_print_statistics(mc_stats);
154         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
155       }else if(_sg_mc_comms_determinism && (!initial_global_state->send_deterministic && !initial_global_state->recv_deterministic)) {
156         XBT_INFO("****************************************************");
157         XBT_INFO("***** Non-deterministic communications pattern *****");
158         XBT_INFO("****************************************************");
159         XBT_INFO("%s", initial_global_state->send_diff);
160         XBT_INFO("%s", initial_global_state->recv_diff);
161         xbt_free(initial_global_state->send_diff);
162         initial_global_state->send_diff = nullptr;
163         xbt_free(initial_global_state->recv_diff);
164         initial_global_state->recv_diff = nullptr;
165         MC_print_statistics(mc_stats);
166         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
167       }
168     }
169   }
170
171   MC_comm_pattern_free(comm);
172
173 }
174
175 /********** Non Static functions ***********/
176
177 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
178 {
179   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
180   mc_list_comm_pattern_t initial_pattern = xbt_dynar_get_as(
181     initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t);
182   xbt_dynar_t incomplete_pattern = xbt_dynar_get_as(
183     incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
184
185   mc_comm_pattern_t pattern = xbt_new0(s_mc_comm_pattern_t, 1);
186   pattern->data_size = -1;
187   pattern->data = nullptr;
188   pattern->index =
189     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
190
191   if (call_type == MC_CALL_TYPE_SEND) {
192     /* Create comm pattern */
193     pattern->type = SIMIX_COMM_SEND;
194     pattern->comm_addr = simcall_comm_isend__get__result(request);
195
196     s_smx_synchro_t synchro = mc_model_checker->process().read<s_smx_synchro_t>(
197       (std::uint64_t) pattern->comm_addr);
198
199     char* remote_name = mc_model_checker->process().read<char*>(
200       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
201     pattern->rdv = mc_model_checker->process().read_string(remote_name);
202     pattern->src_proc = MC_smx_resolve_process(synchro.comm.src_proc)->pid;
203     pattern->src_host = MC_smx_process_get_host_name(issuer);
204
205     struct s_smpi_mpi_request mpi_request =
206       mc_model_checker->process().read<s_smpi_mpi_request>(
207         (std::uint64_t) simcall_comm_isend__get__data(request));
208     pattern->tag = mpi_request.tag;
209
210     if(synchro.comm.src_buff != nullptr){
211       pattern->data_size = synchro.comm.src_buff_size;
212       pattern->data = xbt_malloc0(pattern->data_size);
213       mc_model_checker->process().read_bytes(
214         pattern->data, pattern->data_size, remote(synchro.comm.src_buff));
215     }
216     if(mpi_request.detached){
217       if (!initial_global_state->initial_communications_pattern_done) {
218         /* Store comm pattern */
219         xbt_dynar_push(
220           xbt_dynar_get_as(
221             initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
222           )->list,
223           &pattern);
224       } else {
225         /* Evaluate comm determinism */
226         deterministic_comm_pattern(pattern->src_proc, pattern, backtracking);
227         xbt_dynar_get_as(
228           initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
229         )->index_comm++;
230       }
231       return;
232     }
233   } else if (call_type == MC_CALL_TYPE_RECV) {
234     pattern->type = SIMIX_COMM_RECEIVE;
235     pattern->comm_addr = simcall_comm_irecv__get__result(request);
236
237     struct s_smpi_mpi_request mpi_request;
238     mc_model_checker->process().read(
239       &mpi_request, remote((struct s_smpi_mpi_request*)simcall_comm_irecv__get__data(request)));
240     pattern->tag = mpi_request.tag;
241
242     s_smx_synchro_t synchro;
243     mc_model_checker->process().read(&synchro, remote(pattern->comm_addr));
244
245     char* remote_name;
246     mc_model_checker->process().read(&remote_name,
247       remote(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
248     pattern->rdv = mc_model_checker->process().read_string(remote_name);
249     pattern->dst_proc = MC_smx_resolve_process(synchro.comm.dst_proc)->pid;
250     pattern->dst_host = MC_smx_process_get_host_name(issuer);
251   } else
252     xbt_die("Unexpected call_type %i", (int) call_type);
253
254   xbt_dynar_push(
255     xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t),
256     &pattern);
257
258   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu", pattern, issuer->pid);
259 }
260
261 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) {
262   mc_comm_pattern_t current_comm_pattern;
263   unsigned int cursor = 0;
264   mc_comm_pattern_t comm_pattern;
265   int completed = 0;
266
267   /* Complete comm pattern */
268   xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern)
269     if (current_comm_pattern->comm_addr == comm_addr) {
270       update_comm_pattern(current_comm_pattern, comm_addr);
271       completed = 1;
272       xbt_dynar_remove_at(
273         xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t),
274         cursor, &comm_pattern);
275       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
276       break;
277     }
278
279   if(!completed)
280     xbt_die("Corresponding communication not found!");
281
282   mc_list_comm_pattern_t pattern = xbt_dynar_get_as(
283     initial_communications_pattern, issuer, mc_list_comm_pattern_t);
284
285   if (!initial_global_state->initial_communications_pattern_done)
286     /* Store comm pattern */
287     xbt_dynar_push(pattern->list, &comm_pattern);
288   else {
289     /* Evaluate comm determinism */
290     deterministic_comm_pattern(issuer, comm_pattern, backtracking);
291     pattern->index_comm++;
292   }
293 }
294
295
296 /************************ Main algorithm ************************/
297
298 namespace simgrid {
299 namespace mc {
300
301 CommunicationDeterminismChecker::CommunicationDeterminismChecker(Session& session)
302   : Checker(session)
303 {
304
305 }
306
307 CommunicationDeterminismChecker::~CommunicationDeterminismChecker()
308 {
309
310 }
311
312 // TODO, deduplicate with SafetyChecker
313 RecordTrace CommunicationDeterminismChecker::getRecordTrace() // override
314 {
315   RecordTrace res;
316
317   xbt_fifo_item_t start = xbt_fifo_get_last_item(mc_stack);
318   for (xbt_fifo_item_t item = start; item; item = xbt_fifo_get_prev_item(item)) {
319
320     // Find (pid, value):
321     mc_state_t state = (mc_state_t) xbt_fifo_get_item_content(item);
322     int value = 0;
323     smx_simcall_t saved_req = MC_state_get_executed_request(state, &value);
324     const smx_process_t issuer = MC_smx_simcall_get_issuer(saved_req);
325     const int pid = issuer->pid;
326
327     res.push_back(RecordTraceElement(pid, value));
328   }
329
330   return std::move(res);
331 }
332
333 // TODO, deduplicate with SafetyChecker
334 std::vector<std::string> CommunicationDeterminismChecker::getTextualTrace() // override
335 {
336   std::vector<std::string> res;
337   for (xbt_fifo_item_t item = xbt_fifo_get_last_item(mc_stack);
338        item; item = xbt_fifo_get_prev_item(item)) {
339     mc_state_t state = (mc_state_t)xbt_fifo_get_item_content(item);
340     int value;
341     smx_simcall_t req = MC_state_get_executed_request(state, &value);
342     if (req) {
343       char* req_str = simgrid::mc::request_to_string(
344         req, value, simgrid::mc::RequestType::executed);
345       res.push_back(req_str);
346       xbt_free(req_str);
347     }
348   }
349   return std::move(res);
350 }
351
352 void CommunicationDeterminismChecker::prepare()
353 {
354   mc_state_t initial_state = nullptr;
355   int i;
356   const int maxpid = MC_smx_get_maxpid();
357
358   simgrid::mc::visited_states.clear();
359
360   // Create initial_communications_pattern elements:
361   initial_communications_pattern = xbt_dynar_new(sizeof(mc_list_comm_pattern_t), MC_list_comm_pattern_free_voidp);
362   for (i=0; i < maxpid; i++){
363     mc_list_comm_pattern_t process_list_pattern = xbt_new0(s_mc_list_comm_pattern_t, 1);
364     process_list_pattern->list = xbt_dynar_new(sizeof(mc_comm_pattern_t), MC_comm_pattern_free_voidp);
365     process_list_pattern->index_comm = 0;
366     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
367   }
368
369   // Create incomplete_communications_pattern elements:
370   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
371   for (i=0; i < maxpid; i++){
372     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(mc_comm_pattern_t), nullptr);
373     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
374   }
375
376   initial_state = MC_state_new();
377
378   XBT_DEBUG("********* Start communication determinism verification *********");
379
380   /* Wait for requests (schedules processes) */
381   mc_model_checker->wait_for_requests();
382
383   /* Get an enabled process and insert it in the interleave set of the initial state */
384   for (auto& p : mc_model_checker->process().simix_processes())
385     if (simgrid::mc::process_is_enabled(&p.copy))
386       MC_state_interleave_process(initial_state, &p.copy);
387
388   xbt_fifo_unshift(mc_stack, initial_state);
389 }
390
391 static inline
392 bool all_communications_are_finished()
393 {
394   for (size_t current_process = 1; current_process < MC_smx_get_maxpid(); current_process++) {
395     xbt_dynar_t pattern = xbt_dynar_get_as(
396       incomplete_communications_pattern, current_process, xbt_dynar_t);
397     if (!xbt_dynar_is_empty(pattern)) {
398       XBT_DEBUG("Some communications are not finished, cannot stop the exploration ! State not visited.");
399       return false;
400     }
401   }
402   return true;
403 }
404
405 int CommunicationDeterminismChecker::main(void)
406 {
407
408   char *req_str = nullptr;
409   int value;
410   std::unique_ptr<simgrid::mc::VisitedState> visited_state = nullptr;
411   smx_simcall_t req = nullptr;
412   mc_state_t state = nullptr, next_state = NULL;
413
414   while (xbt_fifo_size(mc_stack) > 0) {
415
416     /* Get current state */
417     state = (mc_state_t) xbt_fifo_get_item_content(xbt_fifo_get_first_item(mc_stack));
418
419     XBT_DEBUG("**************************************************");
420     XBT_DEBUG("Exploration depth = %d (state = %d, interleaved processes = %d)",
421               xbt_fifo_size(mc_stack), state->num,
422               MC_state_interleave_size(state));
423
424     /* Update statistics */
425     mc_stats->visited_states++;
426
427     if ((xbt_fifo_size(mc_stack) <= _sg_mc_max_depth)
428         && (req = MC_state_get_request(state, &value))
429         && (visited_state == nullptr)) {
430
431       req_str = simgrid::mc::request_to_string(req, value, simgrid::mc::RequestType::simix);
432       XBT_DEBUG("Execute: %s", req_str);
433       xbt_free(req_str);
434
435       if (dot_output != nullptr)
436         req_str = simgrid::mc::request_get_dot_output(req, value);
437
438       MC_state_set_executed_request(state, req, value);
439       mc_stats->executed_transitions++;
440
441       /* TODO : handle test and testany simcalls */
442       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
443       if (_sg_mc_comms_determinism || _sg_mc_send_determinism)
444         call = MC_get_call_type(req);
445
446       /* Answer the request */
447       simgrid::mc::handle_simcall(req, value);    /* After this call req is no longer useful */
448
449       if(!initial_global_state->initial_communications_pattern_done)
450         MC_handle_comm_pattern(call, req, value, initial_communications_pattern, 0);
451       else
452         MC_handle_comm_pattern(call, req, value, nullptr, 0);
453
454       /* Wait for requests (schedules processes) */
455       mc_model_checker->wait_for_requests();
456
457       /* Create the new expanded state */
458       next_state = MC_state_new();
459
460       /* If comm determinism verification, we cannot stop the exploration if
461          some communications are not finished (at least, data are transfered).
462          These communications  are incomplete and they cannot be analyzed and
463          compared with the initial pattern. */
464       bool compare_snapshots = all_communications_are_finished()
465         && initial_global_state->initial_communications_pattern_done;
466
467       if (_sg_mc_visited == 0 || (visited_state = simgrid::mc::is_visited_state(next_state, compare_snapshots)) == nullptr) {
468
469         /* Get enabled processes and insert them in the interleave set of the next state */
470         for (auto& p : mc_model_checker->process().simix_processes())
471           if (simgrid::mc::process_is_enabled(&p.copy))
472             MC_state_interleave_process(next_state, &p.copy);
473
474         if (dot_output != nullptr)
475           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num,  next_state->num, req_str);
476
477       } else if (dot_output != nullptr)
478         fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n",
479           state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str);
480
481       xbt_fifo_unshift(mc_stack, next_state);
482
483       if (dot_output != nullptr)
484         xbt_free(req_str);
485
486     } else {
487
488       if (xbt_fifo_size(mc_stack) > _sg_mc_max_depth)
489         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
490       else if (visited_state != nullptr)
491         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
492       else
493         XBT_DEBUG("There are no more processes to interleave. (depth %d)", xbt_fifo_size(mc_stack));
494
495       if (!initial_global_state->initial_communications_pattern_done)
496         initial_global_state->initial_communications_pattern_done = 1;
497
498       /* Trash the current state, no longer needed */
499       xbt_fifo_shift(mc_stack);
500       MC_state_delete(state, !state->in_visited_states ? 1 : 0);
501       XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
502
503       visited_state = nullptr;
504
505       /* Check for deadlocks */
506       if (mc_model_checker->checkDeadlock()) {
507         MC_show_deadlock();
508         return SIMGRID_MC_EXIT_DEADLOCK;
509       }
510
511       while ((state = (mc_state_t) xbt_fifo_shift(mc_stack)) != nullptr)
512         if (MC_state_interleave_size(state) && xbt_fifo_size(mc_stack) < _sg_mc_max_depth) {
513           /* We found a back-tracking point, let's loop */
514           XBT_DEBUG("Back-tracking to state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
515           xbt_fifo_unshift(mc_stack, state);
516
517           MC_replay(mc_stack);
518
519           XBT_DEBUG("Back-tracking to state %d at depth %d done", state->num, xbt_fifo_size(mc_stack));
520
521           break;
522         } else {
523           XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
524           MC_state_delete(state, !state->in_visited_states ? 1 : 0);
525         }
526
527     }
528   }
529
530   MC_print_statistics(mc_stats);
531   return SIMGRID_MC_EXIT_SUCCESS;
532 }
533
534 int CommunicationDeterminismChecker::run()
535 {
536   XBT_INFO("Check communication determinism");
537   mc_model_checker->wait_for_requests();
538
539   if (mc_mode == MC_MODE_CLIENT)
540     // This will move somehwere else:
541     simgrid::mc::Client::get()->handleMessages();
542
543   /* Create exploration stack */
544   mc_stack = xbt_fifo_new();
545
546   this->prepare();
547
548   initial_global_state = xbt_new0(s_mc_global_t, 1);
549   initial_global_state->snapshot = simgrid::mc::take_snapshot(0);
550   initial_global_state->initial_communications_pattern_done = 0;
551   initial_global_state->recv_deterministic = 1;
552   initial_global_state->send_deterministic = 1;
553   initial_global_state->recv_diff = nullptr;
554   initial_global_state->send_diff = nullptr;
555
556   return this->main();
557 }
558
559 }
560 }