Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
[mc] Making a State a class
[simgrid.git] / src / mc / CommunicationDeterminismChecker.cpp
1 /* Copyright (c) 2008-2015. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include <cstdint>
8
9 #include <xbt/dynar.h>
10 #include <xbt/dynar.hpp>
11 #include <xbt/fifo.h>
12 #include <xbt/log.h>
13 #include <xbt/sysdep.h>
14
15 #include "src/mc/mc_state.h"
16 #include "src/mc/mc_comm_pattern.h"
17 #include "src/mc/mc_request.h"
18 #include "src/mc/mc_safety.h"
19 #include "src/mc/mc_private.h"
20 #include "src/mc/mc_record.h"
21 #include "src/mc/mc_smx.h"
22 #include "src/mc/Client.hpp"
23 #include "src/mc/CommunicationDeterminismChecker.hpp"
24 #include "src/mc/mc_exit.h"
25
26 using simgrid::mc::remote;
27
28 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
29                                 "Logging specific to MC communication determinism detection");
30
31 /********** Global variables **********/
32
33 xbt_dynar_t initial_communications_pattern;
34 xbt_dynar_t incomplete_communications_pattern;
35
36 /********** Static functions ***********/
37
38 static e_mc_comm_pattern_difference_t compare_comm_pattern(mc_comm_pattern_t comm1, mc_comm_pattern_t comm2) {
39   if(comm1->type != comm2->type)
40     return TYPE_DIFF;
41   if (strcmp(comm1->rdv, comm2->rdv) != 0)
42     return RDV_DIFF;
43   if (comm1->src_proc != comm2->src_proc)
44     return SRC_PROC_DIFF;
45   if (comm1->dst_proc != comm2->dst_proc)
46     return DST_PROC_DIFF;
47   if (comm1->tag != comm2->tag)
48     return TAG_DIFF;
49   if (comm1->data_size != comm2->data_size)
50     return DATA_SIZE_DIFF;
51   if(comm1->data == nullptr && comm2->data == NULL)
52     return NONE_DIFF;
53   if(comm1->data != nullptr && comm2->data !=NULL) {
54     if (!memcmp(comm1->data, comm2->data, comm1->data_size))
55       return NONE_DIFF;
56     return DATA_DIFF;
57   } else
58     return DATA_DIFF;
59   return NONE_DIFF;
60 }
61
62 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, mc_comm_pattern_t comm, unsigned int cursor) {
63   char *type, *res;
64
65   if(comm->type == SIMIX_COMM_SEND)
66     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
67   else
68     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
69
70   switch(diff) {
71   case TYPE_DIFF:
72     res = bprintf("%s Different type for communication #%d", type, cursor);
73     break;
74   case RDV_DIFF:
75     res = bprintf("%s Different rdv for communication #%d", type, cursor);
76     break;
77   case TAG_DIFF:
78     res = bprintf("%s Different tag for communication #%d", type, cursor);
79     break;
80   case SRC_PROC_DIFF:
81       res = bprintf("%s Different source for communication #%d", type, cursor);
82     break;
83   case DST_PROC_DIFF:
84       res = bprintf("%s Different destination for communication #%d", type, cursor);
85     break;
86   case DATA_SIZE_DIFF:
87     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
88     break;
89   case DATA_DIFF:
90     res = bprintf("%s\n Different data for communication #%d", type, cursor);
91     break;
92   default:
93     res = nullptr;
94     break;
95   }
96
97   return res;
98 }
99
100 static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm_addr)
101 {
102   s_smx_synchro_t comm;
103   mc_model_checker->process().read(&comm, remote(comm_addr));
104
105   smx_process_t src_proc = MC_smx_resolve_process(comm.comm.src_proc);
106   smx_process_t dst_proc = MC_smx_resolve_process(comm.comm.dst_proc);
107   comm_pattern->src_proc = src_proc->pid;
108   comm_pattern->dst_proc = dst_proc->pid;
109   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
110   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
111   if (comm_pattern->data_size == -1 && comm.comm.src_buff != nullptr) {
112     size_t buff_size;
113     mc_model_checker->process().read(
114       &buff_size, remote(comm.comm.dst_buff_size));
115     comm_pattern->data_size = buff_size;
116     comm_pattern->data = xbt_malloc0(comm_pattern->data_size);
117     mc_model_checker->process().read_bytes(
118       comm_pattern->data, comm_pattern->data_size,
119       remote(comm.comm.src_buff));
120   }
121 }
122
123 static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int backtracking) {
124
125   mc_list_comm_pattern_t list =
126     xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t);
127
128   if(!backtracking){
129     mc_comm_pattern_t initial_comm =
130       xbt_dynar_get_as(list->list, list->index_comm, mc_comm_pattern_t);
131     e_mc_comm_pattern_difference_t diff =
132       compare_comm_pattern(initial_comm, comm);
133
134     if (diff != NONE_DIFF) {
135       if (comm->type == SIMIX_COMM_SEND){
136         initial_global_state->send_deterministic = 0;
137         if(initial_global_state->send_diff != nullptr)
138           xbt_free(initial_global_state->send_diff);
139         initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
140       }else{
141         initial_global_state->recv_deterministic = 0;
142         if(initial_global_state->recv_diff != nullptr)
143           xbt_free(initial_global_state->recv_diff);
144         initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
145       }
146       if(_sg_mc_send_determinism && !initial_global_state->send_deterministic){
147         XBT_INFO("*********************************************************");
148         XBT_INFO("***** Non-send-deterministic communications pattern *****");
149         XBT_INFO("*********************************************************");
150         XBT_INFO("%s", initial_global_state->send_diff);
151         xbt_free(initial_global_state->send_diff);
152         initial_global_state->send_diff = nullptr;
153         MC_print_statistics(mc_stats);
154         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
155       }else if(_sg_mc_comms_determinism && (!initial_global_state->send_deterministic && !initial_global_state->recv_deterministic)) {
156         XBT_INFO("****************************************************");
157         XBT_INFO("***** Non-deterministic communications pattern *****");
158         XBT_INFO("****************************************************");
159         XBT_INFO("%s", initial_global_state->send_diff);
160         XBT_INFO("%s", initial_global_state->recv_diff);
161         xbt_free(initial_global_state->send_diff);
162         initial_global_state->send_diff = nullptr;
163         xbt_free(initial_global_state->recv_diff);
164         initial_global_state->recv_diff = nullptr;
165         MC_print_statistics(mc_stats);
166         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
167       }
168     }
169   }
170
171   MC_comm_pattern_free(comm);
172
173 }
174
175 /********** Non Static functions ***********/
176
177 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
178 {
179   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
180   mc_list_comm_pattern_t initial_pattern = xbt_dynar_get_as(
181     initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t);
182   xbt_dynar_t incomplete_pattern = xbt_dynar_get_as(
183     incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
184
185   mc_comm_pattern_t pattern = xbt_new0(s_mc_comm_pattern_t, 1);
186   pattern->data_size = -1;
187   pattern->data = nullptr;
188   pattern->index =
189     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
190
191   if (call_type == MC_CALL_TYPE_SEND) {
192     /* Create comm pattern */
193     pattern->type = SIMIX_COMM_SEND;
194     pattern->comm_addr = simcall_comm_isend__get__result(request);
195
196     s_smx_synchro_t synchro = mc_model_checker->process().read<s_smx_synchro_t>(
197       (std::uint64_t) pattern->comm_addr);
198
199     char* remote_name = mc_model_checker->process().read<char*>(
200       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
201     pattern->rdv = mc_model_checker->process().read_string(remote_name);
202     pattern->src_proc = MC_smx_resolve_process(synchro.comm.src_proc)->pid;
203     pattern->src_host = MC_smx_process_get_host_name(issuer);
204
205     struct s_smpi_mpi_request mpi_request =
206       mc_model_checker->process().read<s_smpi_mpi_request>(
207         (std::uint64_t) simcall_comm_isend__get__data(request));
208     pattern->tag = mpi_request.tag;
209
210     if(synchro.comm.src_buff != nullptr){
211       pattern->data_size = synchro.comm.src_buff_size;
212       pattern->data = xbt_malloc0(pattern->data_size);
213       mc_model_checker->process().read_bytes(
214         pattern->data, pattern->data_size, remote(synchro.comm.src_buff));
215     }
216     if(mpi_request.detached){
217       if (!initial_global_state->initial_communications_pattern_done) {
218         /* Store comm pattern */
219         xbt_dynar_push(
220           xbt_dynar_get_as(
221             initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
222           )->list,
223           &pattern);
224       } else {
225         /* Evaluate comm determinism */
226         deterministic_comm_pattern(pattern->src_proc, pattern, backtracking);
227         xbt_dynar_get_as(
228           initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
229         )->index_comm++;
230       }
231       return;
232     }
233   } else if (call_type == MC_CALL_TYPE_RECV) {
234     pattern->type = SIMIX_COMM_RECEIVE;
235     pattern->comm_addr = simcall_comm_irecv__get__result(request);
236
237     struct s_smpi_mpi_request mpi_request;
238     mc_model_checker->process().read(
239       &mpi_request, remote((struct s_smpi_mpi_request*)simcall_comm_irecv__get__data(request)));
240     pattern->tag = mpi_request.tag;
241
242     s_smx_synchro_t synchro;
243     mc_model_checker->process().read(&synchro, remote(pattern->comm_addr));
244
245     char* remote_name;
246     mc_model_checker->process().read(&remote_name,
247       remote(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
248     pattern->rdv = mc_model_checker->process().read_string(remote_name);
249     pattern->dst_proc = MC_smx_resolve_process(synchro.comm.dst_proc)->pid;
250     pattern->dst_host = MC_smx_process_get_host_name(issuer);
251   } else
252     xbt_die("Unexpected call_type %i", (int) call_type);
253
254   xbt_dynar_push(
255     xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t),
256     &pattern);
257
258   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu", pattern, issuer->pid);
259 }
260
261 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) {
262   mc_comm_pattern_t current_comm_pattern;
263   unsigned int cursor = 0;
264   mc_comm_pattern_t comm_pattern;
265   int completed = 0;
266
267   /* Complete comm pattern */
268   xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern)
269     if (current_comm_pattern->comm_addr == comm_addr) {
270       update_comm_pattern(current_comm_pattern, comm_addr);
271       completed = 1;
272       xbt_dynar_remove_at(
273         xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t),
274         cursor, &comm_pattern);
275       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
276       break;
277     }
278
279   if(!completed)
280     xbt_die("Corresponding communication not found!");
281
282   mc_list_comm_pattern_t pattern = xbt_dynar_get_as(
283     initial_communications_pattern, issuer, mc_list_comm_pattern_t);
284
285   if (!initial_global_state->initial_communications_pattern_done)
286     /* Store comm pattern */
287     xbt_dynar_push(pattern->list, &comm_pattern);
288   else {
289     /* Evaluate comm determinism */
290     deterministic_comm_pattern(issuer, comm_pattern, backtracking);
291     pattern->index_comm++;
292   }
293 }
294
295
296 /************************ Main algorithm ************************/
297
298 namespace simgrid {
299 namespace mc {
300
301 CommunicationDeterminismChecker::CommunicationDeterminismChecker(Session& session)
302   : Checker(session)
303 {
304
305 }
306
307 CommunicationDeterminismChecker::~CommunicationDeterminismChecker()
308 {
309
310 }
311
312 // TODO, deduplicate with SafetyChecker
313 RecordTrace CommunicationDeterminismChecker::getRecordTrace() // override
314 {
315   RecordTrace res;
316
317   xbt_fifo_item_t start = xbt_fifo_get_last_item(mc_stack);
318   for (xbt_fifo_item_t item = start; item; item = xbt_fifo_get_prev_item(item)) {
319
320     // Find (pid, value):
321     simgrid::mc::State* state = (simgrid::mc::State*) xbt_fifo_get_item_content(item);
322     int value = 0;
323     smx_simcall_t saved_req = MC_state_get_executed_request(state, &value);
324     const smx_process_t issuer = MC_smx_simcall_get_issuer(saved_req);
325     const int pid = issuer->pid;
326
327     res.push_back(RecordTraceElement(pid, value));
328   }
329
330   return std::move(res);
331 }
332
333 // TODO, deduplicate with SafetyChecker
334 std::vector<std::string> CommunicationDeterminismChecker::getTextualTrace() // override
335 {
336   std::vector<std::string> res;
337   for (xbt_fifo_item_t item = xbt_fifo_get_last_item(mc_stack);
338        item; item = xbt_fifo_get_prev_item(item)) {
339     simgrid::mc::State* state = (simgrid::mc::State*)xbt_fifo_get_item_content(item);
340     int value;
341     smx_simcall_t req = MC_state_get_executed_request(state, &value);
342     if (req) {
343       char* req_str = simgrid::mc::request_to_string(
344         req, value, simgrid::mc::RequestType::executed);
345       res.push_back(req_str);
346       xbt_free(req_str);
347     }
348   }
349   return std::move(res);
350 }
351
352 void CommunicationDeterminismChecker::prepare()
353 {
354   simgrid::mc::State* initial_state = nullptr;
355   int i;
356   const int maxpid = MC_smx_get_maxpid();
357
358   simgrid::mc::visited_states.clear();
359
360   // Create initial_communications_pattern elements:
361   initial_communications_pattern = xbt_dynar_new(sizeof(mc_list_comm_pattern_t), MC_list_comm_pattern_free_voidp);
362   for (i=0; i < maxpid; i++){
363     mc_list_comm_pattern_t process_list_pattern = xbt_new0(s_mc_list_comm_pattern_t, 1);
364     process_list_pattern->list = xbt_dynar_new(sizeof(mc_comm_pattern_t), MC_comm_pattern_free_voidp);
365     process_list_pattern->index_comm = 0;
366     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
367   }
368
369   // Create incomplete_communications_pattern elements:
370   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
371   for (i=0; i < maxpid; i++){
372     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(mc_comm_pattern_t), nullptr);
373     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
374   }
375
376   initial_state = MC_state_new();
377
378   XBT_DEBUG("********* Start communication determinism verification *********");
379
380   /* Wait for requests (schedules processes) */
381   mc_model_checker->wait_for_requests();
382
383   /* Get an enabled process and insert it in the interleave set of the initial state */
384   for (auto& p : mc_model_checker->process().simix_processes())
385     if (simgrid::mc::process_is_enabled(&p.copy))
386       MC_state_interleave_process(initial_state, &p.copy);
387
388   xbt_fifo_unshift(mc_stack, initial_state);
389 }
390
391 static inline
392 bool all_communications_are_finished()
393 {
394   for (size_t current_process = 1; current_process < MC_smx_get_maxpid(); current_process++) {
395     xbt_dynar_t pattern = xbt_dynar_get_as(
396       incomplete_communications_pattern, current_process, xbt_dynar_t);
397     if (!xbt_dynar_is_empty(pattern)) {
398       XBT_DEBUG("Some communications are not finished, cannot stop the exploration ! State not visited.");
399       return false;
400     }
401   }
402   return true;
403 }
404
405 int CommunicationDeterminismChecker::main(void)
406 {
407
408   char *req_str = nullptr;
409   int value;
410   std::unique_ptr<simgrid::mc::VisitedState> visited_state = nullptr;
411   smx_simcall_t req = nullptr;
412   simgrid::mc::State* state = nullptr;
413   simgrid::mc::State* next_state = nullptr;
414
415   while (xbt_fifo_size(mc_stack) > 0) {
416
417     /* Get current state */
418     state = (simgrid::mc::State*) xbt_fifo_get_item_content(xbt_fifo_get_first_item(mc_stack));
419
420     XBT_DEBUG("**************************************************");
421     XBT_DEBUG("Exploration depth = %d (state = %d, interleaved processes = %d)",
422               xbt_fifo_size(mc_stack), state->num,
423               MC_state_interleave_size(state));
424
425     /* Update statistics */
426     mc_stats->visited_states++;
427
428     if ((xbt_fifo_size(mc_stack) <= _sg_mc_max_depth)
429         && (req = MC_state_get_request(state, &value))
430         && (visited_state == nullptr)) {
431
432       req_str = simgrid::mc::request_to_string(req, value, simgrid::mc::RequestType::simix);
433       XBT_DEBUG("Execute: %s", req_str);
434       xbt_free(req_str);
435
436       if (dot_output != nullptr)
437         req_str = simgrid::mc::request_get_dot_output(req, value);
438
439       MC_state_set_executed_request(state, req, value);
440       mc_stats->executed_transitions++;
441
442       /* TODO : handle test and testany simcalls */
443       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
444       if (_sg_mc_comms_determinism || _sg_mc_send_determinism)
445         call = MC_get_call_type(req);
446
447       /* Answer the request */
448       simgrid::mc::handle_simcall(req, value);    /* After this call req is no longer useful */
449
450       if(!initial_global_state->initial_communications_pattern_done)
451         MC_handle_comm_pattern(call, req, value, initial_communications_pattern, 0);
452       else
453         MC_handle_comm_pattern(call, req, value, nullptr, 0);
454
455       /* Wait for requests (schedules processes) */
456       mc_model_checker->wait_for_requests();
457
458       /* Create the new expanded state */
459       next_state = MC_state_new();
460
461       /* If comm determinism verification, we cannot stop the exploration if
462          some communications are not finished (at least, data are transfered).
463          These communications  are incomplete and they cannot be analyzed and
464          compared with the initial pattern. */
465       bool compare_snapshots = all_communications_are_finished()
466         && initial_global_state->initial_communications_pattern_done;
467
468       if (_sg_mc_visited == 0 || (visited_state = simgrid::mc::is_visited_state(next_state, compare_snapshots)) == nullptr) {
469
470         /* Get enabled processes and insert them in the interleave set of the next state */
471         for (auto& p : mc_model_checker->process().simix_processes())
472           if (simgrid::mc::process_is_enabled(&p.copy))
473             MC_state_interleave_process(next_state, &p.copy);
474
475         if (dot_output != nullptr)
476           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num,  next_state->num, req_str);
477
478       } else if (dot_output != nullptr)
479         fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n",
480           state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str);
481
482       xbt_fifo_unshift(mc_stack, next_state);
483
484       if (dot_output != nullptr)
485         xbt_free(req_str);
486
487     } else {
488
489       if (xbt_fifo_size(mc_stack) > _sg_mc_max_depth)
490         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
491       else if (visited_state != nullptr)
492         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
493       else
494         XBT_DEBUG("There are no more processes to interleave. (depth %d)", xbt_fifo_size(mc_stack));
495
496       if (!initial_global_state->initial_communications_pattern_done)
497         initial_global_state->initial_communications_pattern_done = 1;
498
499       /* Trash the current state, no longer needed */
500       xbt_fifo_shift(mc_stack);
501       MC_state_delete(state, !state->in_visited_states ? 1 : 0);
502       XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
503
504       visited_state = nullptr;
505
506       /* Check for deadlocks */
507       if (mc_model_checker->checkDeadlock()) {
508         MC_show_deadlock();
509         return SIMGRID_MC_EXIT_DEADLOCK;
510       }
511
512       while ((state = (simgrid::mc::State*) xbt_fifo_shift(mc_stack)) != nullptr)
513         if (MC_state_interleave_size(state) && xbt_fifo_size(mc_stack) < _sg_mc_max_depth) {
514           /* We found a back-tracking point, let's loop */
515           XBT_DEBUG("Back-tracking to state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
516           xbt_fifo_unshift(mc_stack, state);
517
518           MC_replay(mc_stack);
519
520           XBT_DEBUG("Back-tracking to state %d at depth %d done", state->num, xbt_fifo_size(mc_stack));
521
522           break;
523         } else {
524           XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
525           MC_state_delete(state, !state->in_visited_states ? 1 : 0);
526         }
527
528     }
529   }
530
531   MC_print_statistics(mc_stats);
532   return SIMGRID_MC_EXIT_SUCCESS;
533 }
534
535 int CommunicationDeterminismChecker::run()
536 {
537   XBT_INFO("Check communication determinism");
538   mc_model_checker->wait_for_requests();
539
540   if (mc_mode == MC_MODE_CLIENT)
541     // This will move somehwere else:
542     simgrid::mc::Client::get()->handleMessages();
543
544   /* Create exploration stack */
545   mc_stack = xbt_fifo_new();
546
547   this->prepare();
548
549   initial_global_state = xbt_new0(s_mc_global_t, 1);
550   initial_global_state->snapshot = simgrid::mc::take_snapshot(0);
551   initial_global_state->initial_communications_pattern_done = 0;
552   initial_global_state->recv_deterministic = 1;
553   initial_global_state->send_deterministic = 1;
554   initial_global_state->recv_diff = nullptr;
555   initial_global_state->send_diff = nullptr;
556
557   return this->main();
558 }
559
560 }
561 }