Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
[mc] Let the Checker give us the current record trace
[simgrid.git] / src / mc / CommunicationDeterminismChecker.cpp
1 /* Copyright (c) 2008-2015. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include <cstdint>
8
9 #include <xbt/dynar.h>
10 #include <xbt/dynar.hpp>
11 #include <xbt/fifo.h>
12 #include <xbt/log.h>
13 #include <xbt/sysdep.h>
14
15 #include "src/mc/mc_state.h"
16 #include "src/mc/mc_comm_pattern.h"
17 #include "src/mc/mc_request.h"
18 #include "src/mc/mc_safety.h"
19 #include "src/mc/mc_private.h"
20 #include "src/mc/mc_record.h"
21 #include "src/mc/mc_smx.h"
22 #include "src/mc/Client.hpp"
23 #include "src/mc/CommunicationDeterminismChecker.hpp"
24 #include "src/mc/mc_exit.h"
25
26 using simgrid::mc::remote;
27
28 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
29                                 "Logging specific to MC communication determinism detection");
30
31 /********** Global variables **********/
32
33 xbt_dynar_t initial_communications_pattern;
34 xbt_dynar_t incomplete_communications_pattern;
35
36 /********** Static functions ***********/
37
38 static e_mc_comm_pattern_difference_t compare_comm_pattern(mc_comm_pattern_t comm1, mc_comm_pattern_t comm2) {
39   if(comm1->type != comm2->type)
40     return TYPE_DIFF;
41   if (strcmp(comm1->rdv, comm2->rdv) != 0)
42     return RDV_DIFF;
43   if (comm1->src_proc != comm2->src_proc)
44     return SRC_PROC_DIFF;
45   if (comm1->dst_proc != comm2->dst_proc)
46     return DST_PROC_DIFF;
47   if (comm1->tag != comm2->tag)
48     return TAG_DIFF;
49   if (comm1->data_size != comm2->data_size)
50     return DATA_SIZE_DIFF;
51   if(comm1->data == nullptr && comm2->data == NULL)
52     return NONE_DIFF;
53   if(comm1->data != nullptr && comm2->data !=NULL) {
54     if (!memcmp(comm1->data, comm2->data, comm1->data_size))
55       return NONE_DIFF;
56     return DATA_DIFF;
57   } else
58     return DATA_DIFF;
59   return NONE_DIFF;
60 }
61
62 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, mc_comm_pattern_t comm, unsigned int cursor) {
63   char *type, *res;
64
65   if(comm->type == SIMIX_COMM_SEND)
66     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
67   else
68     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
69
70   switch(diff) {
71   case TYPE_DIFF:
72     res = bprintf("%s Different type for communication #%d", type, cursor);
73     break;
74   case RDV_DIFF:
75     res = bprintf("%s Different rdv for communication #%d", type, cursor);
76     break;
77   case TAG_DIFF:
78     res = bprintf("%s Different tag for communication #%d", type, cursor);
79     break;
80   case SRC_PROC_DIFF:
81       res = bprintf("%s Different source for communication #%d", type, cursor);
82     break;
83   case DST_PROC_DIFF:
84       res = bprintf("%s Different destination for communication #%d", type, cursor);
85     break;
86   case DATA_SIZE_DIFF:
87     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
88     break;
89   case DATA_DIFF:
90     res = bprintf("%s\n Different data for communication #%d", type, cursor);
91     break;
92   default:
93     res = nullptr;
94     break;
95   }
96
97   return res;
98 }
99
100 static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm_addr)
101 {
102   s_smx_synchro_t comm;
103   mc_model_checker->process().read(&comm, remote(comm_addr));
104
105   smx_process_t src_proc = MC_smx_resolve_process(comm.comm.src_proc);
106   smx_process_t dst_proc = MC_smx_resolve_process(comm.comm.dst_proc);
107   comm_pattern->src_proc = src_proc->pid;
108   comm_pattern->dst_proc = dst_proc->pid;
109   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
110   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
111   if (comm_pattern->data_size == -1 && comm.comm.src_buff != nullptr) {
112     size_t buff_size;
113     mc_model_checker->process().read(
114       &buff_size, remote(comm.comm.dst_buff_size));
115     comm_pattern->data_size = buff_size;
116     comm_pattern->data = xbt_malloc0(comm_pattern->data_size);
117     mc_model_checker->process().read_bytes(
118       comm_pattern->data, comm_pattern->data_size,
119       remote(comm.comm.src_buff));
120   }
121 }
122
123 static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int backtracking) {
124
125   mc_list_comm_pattern_t list =
126     xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t);
127
128   if(!backtracking){
129     mc_comm_pattern_t initial_comm =
130       xbt_dynar_get_as(list->list, list->index_comm, mc_comm_pattern_t);
131     e_mc_comm_pattern_difference_t diff =
132       compare_comm_pattern(initial_comm, comm);
133
134     if (diff != NONE_DIFF) {
135       if (comm->type == SIMIX_COMM_SEND){
136         initial_global_state->send_deterministic = 0;
137         if(initial_global_state->send_diff != nullptr)
138           xbt_free(initial_global_state->send_diff);
139         initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
140       }else{
141         initial_global_state->recv_deterministic = 0;
142         if(initial_global_state->recv_diff != nullptr)
143           xbt_free(initial_global_state->recv_diff);
144         initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
145       }
146       if(_sg_mc_send_determinism && !initial_global_state->send_deterministic){
147         XBT_INFO("*********************************************************");
148         XBT_INFO("***** Non-send-deterministic communications pattern *****");
149         XBT_INFO("*********************************************************");
150         XBT_INFO("%s", initial_global_state->send_diff);
151         xbt_free(initial_global_state->send_diff);
152         initial_global_state->send_diff = nullptr;
153         MC_print_statistics(mc_stats);
154         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
155       }else if(_sg_mc_comms_determinism && (!initial_global_state->send_deterministic && !initial_global_state->recv_deterministic)) {
156         XBT_INFO("****************************************************");
157         XBT_INFO("***** Non-deterministic communications pattern *****");
158         XBT_INFO("****************************************************");
159         XBT_INFO("%s", initial_global_state->send_diff);
160         XBT_INFO("%s", initial_global_state->recv_diff);
161         xbt_free(initial_global_state->send_diff);
162         initial_global_state->send_diff = nullptr;
163         xbt_free(initial_global_state->recv_diff);
164         initial_global_state->recv_diff = nullptr;
165         MC_print_statistics(mc_stats);
166         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
167       }
168     }
169   }
170
171   MC_comm_pattern_free(comm);
172
173 }
174
175 /********** Non Static functions ***********/
176
177 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
178 {
179   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
180   mc_list_comm_pattern_t initial_pattern = xbt_dynar_get_as(
181     initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t);
182   xbt_dynar_t incomplete_pattern = xbt_dynar_get_as(
183     incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
184
185   mc_comm_pattern_t pattern = xbt_new0(s_mc_comm_pattern_t, 1);
186   pattern->data_size = -1;
187   pattern->data = nullptr;
188   pattern->index =
189     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
190
191   if (call_type == MC_CALL_TYPE_SEND) {
192     /* Create comm pattern */
193     pattern->type = SIMIX_COMM_SEND;
194     pattern->comm_addr = simcall_comm_isend__get__result(request);
195
196     s_smx_synchro_t synchro = mc_model_checker->process().read<s_smx_synchro_t>(
197       (std::uint64_t) pattern->comm_addr);
198
199     char* remote_name = mc_model_checker->process().read<char*>(
200       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
201     pattern->rdv = mc_model_checker->process().read_string(remote_name);
202     pattern->src_proc = MC_smx_resolve_process(synchro.comm.src_proc)->pid;
203     pattern->src_host = MC_smx_process_get_host_name(issuer);
204
205     struct s_smpi_mpi_request mpi_request =
206       mc_model_checker->process().read<s_smpi_mpi_request>(
207         (std::uint64_t) simcall_comm_isend__get__data(request));
208     pattern->tag = mpi_request.tag;
209
210     if(synchro.comm.src_buff != nullptr){
211       pattern->data_size = synchro.comm.src_buff_size;
212       pattern->data = xbt_malloc0(pattern->data_size);
213       mc_model_checker->process().read_bytes(
214         pattern->data, pattern->data_size, remote(synchro.comm.src_buff));
215     }
216     if(mpi_request.detached){
217       if (!initial_global_state->initial_communications_pattern_done) {
218         /* Store comm pattern */
219         xbt_dynar_push(
220           xbt_dynar_get_as(
221             initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
222           )->list,
223           &pattern);
224       } else {
225         /* Evaluate comm determinism */
226         deterministic_comm_pattern(pattern->src_proc, pattern, backtracking);
227         xbt_dynar_get_as(
228           initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
229         )->index_comm++;
230       }
231       return;
232     }
233   } else if (call_type == MC_CALL_TYPE_RECV) {
234     pattern->type = SIMIX_COMM_RECEIVE;
235     pattern->comm_addr = simcall_comm_irecv__get__result(request);
236
237     struct s_smpi_mpi_request mpi_request;
238     mc_model_checker->process().read(
239       &mpi_request, remote((struct s_smpi_mpi_request*)simcall_comm_irecv__get__data(request)));
240     pattern->tag = mpi_request.tag;
241
242     s_smx_synchro_t synchro;
243     mc_model_checker->process().read(&synchro, remote(pattern->comm_addr));
244
245     char* remote_name;
246     mc_model_checker->process().read(&remote_name,
247       remote(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
248     pattern->rdv = mc_model_checker->process().read_string(remote_name);
249     pattern->dst_proc = MC_smx_resolve_process(synchro.comm.dst_proc)->pid;
250     pattern->dst_host = MC_smx_process_get_host_name(issuer);
251   } else
252     xbt_die("Unexpected call_type %i", (int) call_type);
253
254   xbt_dynar_push(
255     xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t),
256     &pattern);
257
258   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu", pattern, issuer->pid);
259 }
260
261 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) {
262   mc_comm_pattern_t current_comm_pattern;
263   unsigned int cursor = 0;
264   mc_comm_pattern_t comm_pattern;
265   int completed = 0;
266
267   /* Complete comm pattern */
268   xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern)
269     if (current_comm_pattern->comm_addr == comm_addr) {
270       update_comm_pattern(current_comm_pattern, comm_addr);
271       completed = 1;
272       xbt_dynar_remove_at(
273         xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t),
274         cursor, &comm_pattern);
275       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
276       break;
277     }
278
279   if(!completed)
280     xbt_die("Corresponding communication not found!");
281
282   mc_list_comm_pattern_t pattern = xbt_dynar_get_as(
283     initial_communications_pattern, issuer, mc_list_comm_pattern_t);
284
285   if (!initial_global_state->initial_communications_pattern_done)
286     /* Store comm pattern */
287     xbt_dynar_push(pattern->list, &comm_pattern);
288   else {
289     /* Evaluate comm determinism */
290     deterministic_comm_pattern(issuer, comm_pattern, backtracking);
291     pattern->index_comm++;
292   }
293 }
294
295
296 /************************ Main algorithm ************************/
297
298 namespace simgrid {
299 namespace mc {
300
301 CommunicationDeterminismChecker::CommunicationDeterminismChecker(Session& session)
302   : Checker(session)
303 {
304
305 }
306
307 CommunicationDeterminismChecker::~CommunicationDeterminismChecker()
308 {
309
310 }
311
312 // TODO, deduplicate with SafetyChecker
313 RecordTrace CommunicationDeterminismChecker::getRecordTrace() // override
314 {
315   RecordTrace res;
316
317   xbt_fifo_item_t start = xbt_fifo_get_last_item(mc_stack);
318   for (xbt_fifo_item_t item = start; item; item = xbt_fifo_get_prev_item(item)) {
319
320     // Find (pid, value):
321     mc_state_t state = (mc_state_t) xbt_fifo_get_item_content(item);
322     int value = 0;
323     smx_simcall_t saved_req = MC_state_get_executed_request(state, &value);
324     const smx_process_t issuer = MC_smx_simcall_get_issuer(saved_req);
325     const int pid = issuer->pid;
326
327     res.push_back(RecordTraceElement(pid, value));
328   }
329
330   return std::move(res);
331 }
332
333 void CommunicationDeterminismChecker::prepare()
334 {
335   mc_state_t initial_state = nullptr;
336   int i;
337   const int maxpid = MC_smx_get_maxpid();
338
339   simgrid::mc::visited_states.clear();
340
341   // Create initial_communications_pattern elements:
342   initial_communications_pattern = xbt_dynar_new(sizeof(mc_list_comm_pattern_t), MC_list_comm_pattern_free_voidp);
343   for (i=0; i < maxpid; i++){
344     mc_list_comm_pattern_t process_list_pattern = xbt_new0(s_mc_list_comm_pattern_t, 1);
345     process_list_pattern->list = xbt_dynar_new(sizeof(mc_comm_pattern_t), MC_comm_pattern_free_voidp);
346     process_list_pattern->index_comm = 0;
347     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
348   }
349
350   // Create incomplete_communications_pattern elements:
351   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
352   for (i=0; i < maxpid; i++){
353     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(mc_comm_pattern_t), nullptr);
354     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
355   }
356
357   initial_state = MC_state_new();
358
359   XBT_DEBUG("********* Start communication determinism verification *********");
360
361   /* Wait for requests (schedules processes) */
362   mc_model_checker->wait_for_requests();
363
364   /* Get an enabled process and insert it in the interleave set of the initial state */
365   for (auto& p : mc_model_checker->process().simix_processes())
366     if (simgrid::mc::process_is_enabled(&p.copy))
367       MC_state_interleave_process(initial_state, &p.copy);
368
369   xbt_fifo_unshift(mc_stack, initial_state);
370 }
371
372 static inline
373 bool all_communications_are_finished()
374 {
375   for (size_t current_process = 1; current_process < MC_smx_get_maxpid(); current_process++) {
376     xbt_dynar_t pattern = xbt_dynar_get_as(
377       incomplete_communications_pattern, current_process, xbt_dynar_t);
378     if (!xbt_dynar_is_empty(pattern)) {
379       XBT_DEBUG("Some communications are not finished, cannot stop the exploration ! State not visited.");
380       return false;
381     }
382   }
383   return true;
384 }
385
386 int CommunicationDeterminismChecker::main(void)
387 {
388
389   char *req_str = nullptr;
390   int value;
391   std::unique_ptr<simgrid::mc::VisitedState> visited_state = nullptr;
392   smx_simcall_t req = nullptr;
393   mc_state_t state = nullptr, next_state = NULL;
394
395   while (xbt_fifo_size(mc_stack) > 0) {
396
397     /* Get current state */
398     state = (mc_state_t) xbt_fifo_get_item_content(xbt_fifo_get_first_item(mc_stack));
399
400     XBT_DEBUG("**************************************************");
401     XBT_DEBUG("Exploration depth = %d (state = %d, interleaved processes = %d)",
402               xbt_fifo_size(mc_stack), state->num,
403               MC_state_interleave_size(state));
404
405     /* Update statistics */
406     mc_stats->visited_states++;
407
408     if ((xbt_fifo_size(mc_stack) <= _sg_mc_max_depth)
409         && (req = MC_state_get_request(state, &value))
410         && (visited_state == nullptr)) {
411
412       req_str = simgrid::mc::request_to_string(req, value, simgrid::mc::RequestType::simix);
413       XBT_DEBUG("Execute: %s", req_str);
414       xbt_free(req_str);
415
416       if (dot_output != nullptr)
417         req_str = simgrid::mc::request_get_dot_output(req, value);
418
419       MC_state_set_executed_request(state, req, value);
420       mc_stats->executed_transitions++;
421
422       /* TODO : handle test and testany simcalls */
423       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
424       if (_sg_mc_comms_determinism || _sg_mc_send_determinism)
425         call = MC_get_call_type(req);
426
427       /* Answer the request */
428       simgrid::mc::handle_simcall(req, value);    /* After this call req is no longer useful */
429
430       if(!initial_global_state->initial_communications_pattern_done)
431         MC_handle_comm_pattern(call, req, value, initial_communications_pattern, 0);
432       else
433         MC_handle_comm_pattern(call, req, value, nullptr, 0);
434
435       /* Wait for requests (schedules processes) */
436       mc_model_checker->wait_for_requests();
437
438       /* Create the new expanded state */
439       next_state = MC_state_new();
440
441       /* If comm determinism verification, we cannot stop the exploration if
442          some communications are not finished (at least, data are transfered).
443          These communications  are incomplete and they cannot be analyzed and
444          compared with the initial pattern. */
445       bool compare_snapshots = all_communications_are_finished()
446         && initial_global_state->initial_communications_pattern_done;
447
448       if (_sg_mc_visited == 0 || (visited_state = simgrid::mc::is_visited_state(next_state, compare_snapshots)) == nullptr) {
449
450         /* Get enabled processes and insert them in the interleave set of the next state */
451         for (auto& p : mc_model_checker->process().simix_processes())
452           if (simgrid::mc::process_is_enabled(&p.copy))
453             MC_state_interleave_process(next_state, &p.copy);
454
455         if (dot_output != nullptr)
456           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num,  next_state->num, req_str);
457
458       } else if (dot_output != nullptr)
459         fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n",
460           state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str);
461
462       xbt_fifo_unshift(mc_stack, next_state);
463
464       if (dot_output != nullptr)
465         xbt_free(req_str);
466
467     } else {
468
469       if (xbt_fifo_size(mc_stack) > _sg_mc_max_depth)
470         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
471       else if (visited_state != nullptr)
472         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
473       else
474         XBT_DEBUG("There are no more processes to interleave. (depth %d)", xbt_fifo_size(mc_stack));
475
476       if (!initial_global_state->initial_communications_pattern_done)
477         initial_global_state->initial_communications_pattern_done = 1;
478
479       /* Trash the current state, no longer needed */
480       xbt_fifo_shift(mc_stack);
481       MC_state_delete(state, !state->in_visited_states ? 1 : 0);
482       XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
483
484       visited_state = nullptr;
485
486       /* Check for deadlocks */
487       if (mc_model_checker->checkDeadlock()) {
488         MC_show_deadlock();
489         return SIMGRID_MC_EXIT_DEADLOCK;
490       }
491
492       while ((state = (mc_state_t) xbt_fifo_shift(mc_stack)) != nullptr)
493         if (MC_state_interleave_size(state) && xbt_fifo_size(mc_stack) < _sg_mc_max_depth) {
494           /* We found a back-tracking point, let's loop */
495           XBT_DEBUG("Back-tracking to state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
496           xbt_fifo_unshift(mc_stack, state);
497
498           MC_replay(mc_stack);
499
500           XBT_DEBUG("Back-tracking to state %d at depth %d done", state->num, xbt_fifo_size(mc_stack));
501
502           break;
503         } else {
504           XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
505           MC_state_delete(state, !state->in_visited_states ? 1 : 0);
506         }
507
508     }
509   }
510
511   MC_print_statistics(mc_stats);
512   return SIMGRID_MC_EXIT_SUCCESS;
513 }
514
515 int CommunicationDeterminismChecker::run()
516 {
517   XBT_INFO("Check communication determinism");
518   mc_model_checker->wait_for_requests();
519
520   if (mc_mode == MC_MODE_CLIENT)
521     // This will move somehwere else:
522     simgrid::mc::Client::get()->handleMessages();
523
524   /* Create exploration stack */
525   mc_stack = xbt_fifo_new();
526
527   this->prepare();
528
529   initial_global_state = xbt_new0(s_mc_global_t, 1);
530   initial_global_state->snapshot = simgrid::mc::take_snapshot(0);
531   initial_global_state->initial_communications_pattern_done = 0;
532   initial_global_state->recv_deterministic = 1;
533   initial_global_state->send_deterministic = 1;
534   initial_global_state->recv_diff = nullptr;
535   initial_global_state->send_diff = nullptr;
536
537   return this->main();
538 }
539
540 }
541 }