Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
[mc] Move main communication determinism code in a CommunicationDeterminismChecker...
[simgrid.git] / src / mc / CommunicationDeterminismChecker.cpp
1 /* Copyright (c) 2008-2015. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include <cstdint>
8
9 #include <xbt/dynar.h>
10 #include <xbt/dynar.hpp>
11 #include <xbt/fifo.h>
12 #include <xbt/log.h>
13 #include <xbt/sysdep.h>
14
15 #include "src/mc/mc_state.h"
16 #include "src/mc/mc_comm_pattern.h"
17 #include "src/mc/mc_request.h"
18 #include "src/mc/mc_safety.h"
19 #include "src/mc/mc_private.h"
20 #include "src/mc/mc_record.h"
21 #include "src/mc/mc_smx.h"
22 #include "src/mc/Client.hpp"
23 #include "src/mc/CommunicationDeterminismChecker.hpp"
24 #include "src/mc/mc_exit.h"
25
26 using simgrid::mc::remote;
27
28 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_comm_determinism, mc,
29                                 "Logging specific to MC communication determinism detection");
30
31 /********** Global variables **********/
32
33 xbt_dynar_t initial_communications_pattern;
34 xbt_dynar_t incomplete_communications_pattern;
35
36 /********** Static functions ***********/
37
38 static e_mc_comm_pattern_difference_t compare_comm_pattern(mc_comm_pattern_t comm1, mc_comm_pattern_t comm2) {
39   if(comm1->type != comm2->type)
40     return TYPE_DIFF;
41   if (strcmp(comm1->rdv, comm2->rdv) != 0)
42     return RDV_DIFF;
43   if (comm1->src_proc != comm2->src_proc)
44     return SRC_PROC_DIFF;
45   if (comm1->dst_proc != comm2->dst_proc)
46     return DST_PROC_DIFF;
47   if (comm1->tag != comm2->tag)
48     return TAG_DIFF;
49   if (comm1->data_size != comm2->data_size)
50     return DATA_SIZE_DIFF;
51   if(comm1->data == nullptr && comm2->data == NULL)
52     return NONE_DIFF;
53   if(comm1->data != nullptr && comm2->data !=NULL) {
54     if (!memcmp(comm1->data, comm2->data, comm1->data_size))
55       return NONE_DIFF;
56     return DATA_DIFF;
57   } else
58     return DATA_DIFF;
59   return NONE_DIFF;
60 }
61
62 static char* print_determinism_result(e_mc_comm_pattern_difference_t diff, int process, mc_comm_pattern_t comm, unsigned int cursor) {
63   char *type, *res;
64
65   if(comm->type == SIMIX_COMM_SEND)
66     type = bprintf("The send communications pattern of the process %d is different!", process - 1);
67   else
68     type = bprintf("The recv communications pattern of the process %d is different!", process - 1);
69
70   switch(diff) {
71   case TYPE_DIFF:
72     res = bprintf("%s Different type for communication #%d", type, cursor);
73     break;
74   case RDV_DIFF:
75     res = bprintf("%s Different rdv for communication #%d", type, cursor);
76     break;
77   case TAG_DIFF:
78     res = bprintf("%s Different tag for communication #%d", type, cursor);
79     break;
80   case SRC_PROC_DIFF:
81       res = bprintf("%s Different source for communication #%d", type, cursor);
82     break;
83   case DST_PROC_DIFF:
84       res = bprintf("%s Different destination for communication #%d", type, cursor);
85     break;
86   case DATA_SIZE_DIFF:
87     res = bprintf("%s\n Different data size for communication #%d", type, cursor);
88     break;
89   case DATA_DIFF:
90     res = bprintf("%s\n Different data for communication #%d", type, cursor);
91     break;
92   default:
93     res = nullptr;
94     break;
95   }
96
97   return res;
98 }
99
100 static void update_comm_pattern(mc_comm_pattern_t comm_pattern, smx_synchro_t comm_addr)
101 {
102   s_smx_synchro_t comm;
103   mc_model_checker->process().read(&comm, remote(comm_addr));
104
105   smx_process_t src_proc = MC_smx_resolve_process(comm.comm.src_proc);
106   smx_process_t dst_proc = MC_smx_resolve_process(comm.comm.dst_proc);
107   comm_pattern->src_proc = src_proc->pid;
108   comm_pattern->dst_proc = dst_proc->pid;
109   comm_pattern->src_host = MC_smx_process_get_host_name(src_proc);
110   comm_pattern->dst_host = MC_smx_process_get_host_name(dst_proc);
111   if (comm_pattern->data_size == -1 && comm.comm.src_buff != nullptr) {
112     size_t buff_size;
113     mc_model_checker->process().read(
114       &buff_size, remote(comm.comm.dst_buff_size));
115     comm_pattern->data_size = buff_size;
116     comm_pattern->data = xbt_malloc0(comm_pattern->data_size);
117     mc_model_checker->process().read_bytes(
118       comm_pattern->data, comm_pattern->data_size,
119       remote(comm.comm.src_buff));
120   }
121 }
122
123 static void deterministic_comm_pattern(int process, mc_comm_pattern_t comm, int backtracking) {
124
125   mc_list_comm_pattern_t list =
126     xbt_dynar_get_as(initial_communications_pattern, process, mc_list_comm_pattern_t);
127
128   if(!backtracking){
129     mc_comm_pattern_t initial_comm =
130       xbt_dynar_get_as(list->list, list->index_comm, mc_comm_pattern_t);
131     e_mc_comm_pattern_difference_t diff =
132       compare_comm_pattern(initial_comm, comm);
133
134     if (diff != NONE_DIFF) {
135       if (comm->type == SIMIX_COMM_SEND){
136         initial_global_state->send_deterministic = 0;
137         if(initial_global_state->send_diff != nullptr)
138           xbt_free(initial_global_state->send_diff);
139         initial_global_state->send_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
140       }else{
141         initial_global_state->recv_deterministic = 0;
142         if(initial_global_state->recv_diff != nullptr)
143           xbt_free(initial_global_state->recv_diff);
144         initial_global_state->recv_diff = print_determinism_result(diff, process, comm, list->index_comm + 1);
145       }
146       if(_sg_mc_send_determinism && !initial_global_state->send_deterministic){
147         XBT_INFO("*********************************************************");
148         XBT_INFO("***** Non-send-deterministic communications pattern *****");
149         XBT_INFO("*********************************************************");
150         XBT_INFO("%s", initial_global_state->send_diff);
151         xbt_free(initial_global_state->send_diff);
152         initial_global_state->send_diff = nullptr;
153         MC_print_statistics(mc_stats);
154         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
155       }else if(_sg_mc_comms_determinism && (!initial_global_state->send_deterministic && !initial_global_state->recv_deterministic)) {
156         XBT_INFO("****************************************************");
157         XBT_INFO("***** Non-deterministic communications pattern *****");
158         XBT_INFO("****************************************************");
159         XBT_INFO("%s", initial_global_state->send_diff);
160         XBT_INFO("%s", initial_global_state->recv_diff);
161         xbt_free(initial_global_state->send_diff);
162         initial_global_state->send_diff = nullptr;
163         xbt_free(initial_global_state->recv_diff);
164         initial_global_state->recv_diff = nullptr;
165         MC_print_statistics(mc_stats);
166         mc_model_checker->exit(SIMGRID_MC_EXIT_NON_DETERMINISM);
167       }
168     }
169   }
170
171   MC_comm_pattern_free(comm);
172
173 }
174
175 /********** Non Static functions ***********/
176
177 void MC_get_comm_pattern(xbt_dynar_t list, smx_simcall_t request, e_mc_call_type_t call_type, int backtracking)
178 {
179   const smx_process_t issuer = MC_smx_simcall_get_issuer(request);
180   mc_list_comm_pattern_t initial_pattern = xbt_dynar_get_as(
181     initial_communications_pattern, issuer->pid, mc_list_comm_pattern_t);
182   xbt_dynar_t incomplete_pattern = xbt_dynar_get_as(
183     incomplete_communications_pattern, issuer->pid, xbt_dynar_t);
184
185   mc_comm_pattern_t pattern = xbt_new0(s_mc_comm_pattern_t, 1);
186   pattern->data_size = -1;
187   pattern->data = nullptr;
188   pattern->index =
189     initial_pattern->index_comm + xbt_dynar_length(incomplete_pattern);
190
191   if (call_type == MC_CALL_TYPE_SEND) {
192     /* Create comm pattern */
193     pattern->type = SIMIX_COMM_SEND;
194     pattern->comm_addr = simcall_comm_isend__get__result(request);
195
196     s_smx_synchro_t synchro = mc_model_checker->process().read<s_smx_synchro_t>(
197       (std::uint64_t) pattern->comm_addr);
198
199     char* remote_name = mc_model_checker->process().read<char*>(
200       (std::uint64_t)(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
201     pattern->rdv = mc_model_checker->process().read_string(remote_name);
202     pattern->src_proc = MC_smx_resolve_process(synchro.comm.src_proc)->pid;
203     pattern->src_host = MC_smx_process_get_host_name(issuer);
204
205     struct s_smpi_mpi_request mpi_request =
206       mc_model_checker->process().read<s_smpi_mpi_request>(
207         (std::uint64_t) simcall_comm_isend__get__data(request));
208     pattern->tag = mpi_request.tag;
209
210     if(synchro.comm.src_buff != nullptr){
211       pattern->data_size = synchro.comm.src_buff_size;
212       pattern->data = xbt_malloc0(pattern->data_size);
213       mc_model_checker->process().read_bytes(
214         pattern->data, pattern->data_size, remote(synchro.comm.src_buff));
215     }
216     if(mpi_request.detached){
217       if (!initial_global_state->initial_communications_pattern_done) {
218         /* Store comm pattern */
219         xbt_dynar_push(
220           xbt_dynar_get_as(
221             initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
222           )->list,
223           &pattern);
224       } else {
225         /* Evaluate comm determinism */
226         deterministic_comm_pattern(pattern->src_proc, pattern, backtracking);
227         xbt_dynar_get_as(
228           initial_communications_pattern, pattern->src_proc, mc_list_comm_pattern_t
229         )->index_comm++;
230       }
231       return;
232     }
233   } else if (call_type == MC_CALL_TYPE_RECV) {
234     pattern->type = SIMIX_COMM_RECEIVE;
235     pattern->comm_addr = simcall_comm_irecv__get__result(request);
236
237     struct s_smpi_mpi_request mpi_request;
238     mc_model_checker->process().read(
239       &mpi_request, remote((struct s_smpi_mpi_request*)simcall_comm_irecv__get__data(request)));
240     pattern->tag = mpi_request.tag;
241
242     s_smx_synchro_t synchro;
243     mc_model_checker->process().read(&synchro, remote(pattern->comm_addr));
244
245     char* remote_name;
246     mc_model_checker->process().read(&remote_name,
247       remote(synchro.comm.rdv ? &synchro.comm.rdv->name : &synchro.comm.rdv_cpy->name));
248     pattern->rdv = mc_model_checker->process().read_string(remote_name);
249     pattern->dst_proc = MC_smx_resolve_process(synchro.comm.dst_proc)->pid;
250     pattern->dst_host = MC_smx_process_get_host_name(issuer);
251   } else
252     xbt_die("Unexpected call_type %i", (int) call_type);
253
254   xbt_dynar_push(
255     xbt_dynar_get_as(incomplete_communications_pattern, issuer->pid, xbt_dynar_t),
256     &pattern);
257
258   XBT_DEBUG("Insert incomplete comm pattern %p for process %lu", pattern, issuer->pid);
259 }
260
261 void MC_complete_comm_pattern(xbt_dynar_t list, smx_synchro_t comm_addr, unsigned int issuer, int backtracking) {
262   mc_comm_pattern_t current_comm_pattern;
263   unsigned int cursor = 0;
264   mc_comm_pattern_t comm_pattern;
265   int completed = 0;
266
267   /* Complete comm pattern */
268   xbt_dynar_foreach(xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t), cursor, current_comm_pattern)
269     if (current_comm_pattern->comm_addr == comm_addr) {
270       update_comm_pattern(current_comm_pattern, comm_addr);
271       completed = 1;
272       xbt_dynar_remove_at(
273         xbt_dynar_get_as(incomplete_communications_pattern, issuer, xbt_dynar_t),
274         cursor, &comm_pattern);
275       XBT_DEBUG("Remove incomplete comm pattern for process %u at cursor %u", issuer, cursor);
276       break;
277     }
278
279   if(!completed)
280     xbt_die("Corresponding communication not found!");
281
282   mc_list_comm_pattern_t pattern = xbt_dynar_get_as(
283     initial_communications_pattern, issuer, mc_list_comm_pattern_t);
284
285   if (!initial_global_state->initial_communications_pattern_done)
286     /* Store comm pattern */
287     xbt_dynar_push(pattern->list, &comm_pattern);
288   else {
289     /* Evaluate comm determinism */
290     deterministic_comm_pattern(issuer, comm_pattern, backtracking);
291     pattern->index_comm++;
292   }
293 }
294
295
296 /************************ Main algorithm ************************/
297
298 namespace simgrid {
299 namespace mc {
300
301 CommunicationDeterminismChecker::CommunicationDeterminismChecker(Session& session)
302   : Checker(session)
303 {
304
305 }
306
307 CommunicationDeterminismChecker::~CommunicationDeterminismChecker()
308 {
309
310 }
311
312 void CommunicationDeterminismChecker::prepare()
313 {
314   mc_state_t initial_state = nullptr;
315   int i;
316   const int maxpid = MC_smx_get_maxpid();
317
318   simgrid::mc::visited_states.clear();
319
320   // Create initial_communications_pattern elements:
321   initial_communications_pattern = xbt_dynar_new(sizeof(mc_list_comm_pattern_t), MC_list_comm_pattern_free_voidp);
322   for (i=0; i < maxpid; i++){
323     mc_list_comm_pattern_t process_list_pattern = xbt_new0(s_mc_list_comm_pattern_t, 1);
324     process_list_pattern->list = xbt_dynar_new(sizeof(mc_comm_pattern_t), MC_comm_pattern_free_voidp);
325     process_list_pattern->index_comm = 0;
326     xbt_dynar_insert_at(initial_communications_pattern, i, &process_list_pattern);
327   }
328
329   // Create incomplete_communications_pattern elements:
330   incomplete_communications_pattern = xbt_dynar_new(sizeof(xbt_dynar_t), xbt_dynar_free_voidp);
331   for (i=0; i < maxpid; i++){
332     xbt_dynar_t process_pattern = xbt_dynar_new(sizeof(mc_comm_pattern_t), nullptr);
333     xbt_dynar_insert_at(incomplete_communications_pattern, i, &process_pattern);
334   }
335
336   initial_state = MC_state_new();
337
338   XBT_DEBUG("********* Start communication determinism verification *********");
339
340   /* Wait for requests (schedules processes) */
341   mc_model_checker->wait_for_requests();
342
343   /* Get an enabled process and insert it in the interleave set of the initial state */
344   for (auto& p : mc_model_checker->process().simix_processes())
345     if (simgrid::mc::process_is_enabled(&p.copy))
346       MC_state_interleave_process(initial_state, &p.copy);
347
348   xbt_fifo_unshift(mc_stack, initial_state);
349 }
350
351 static inline
352 bool all_communications_are_finished()
353 {
354   for (size_t current_process = 1; current_process < MC_smx_get_maxpid(); current_process++) {
355     xbt_dynar_t pattern = xbt_dynar_get_as(
356       incomplete_communications_pattern, current_process, xbt_dynar_t);
357     if (!xbt_dynar_is_empty(pattern)) {
358       XBT_DEBUG("Some communications are not finished, cannot stop the exploration ! State not visited.");
359       return false;
360     }
361   }
362   return true;
363 }
364
365 int CommunicationDeterminismChecker::main(void)
366 {
367
368   char *req_str = nullptr;
369   int value;
370   std::unique_ptr<simgrid::mc::VisitedState> visited_state = nullptr;
371   smx_simcall_t req = nullptr;
372   mc_state_t state = nullptr, next_state = NULL;
373
374   while (xbt_fifo_size(mc_stack) > 0) {
375
376     /* Get current state */
377     state = (mc_state_t) xbt_fifo_get_item_content(xbt_fifo_get_first_item(mc_stack));
378
379     XBT_DEBUG("**************************************************");
380     XBT_DEBUG("Exploration depth = %d (state = %d, interleaved processes = %d)",
381               xbt_fifo_size(mc_stack), state->num,
382               MC_state_interleave_size(state));
383
384     /* Update statistics */
385     mc_stats->visited_states++;
386
387     if ((xbt_fifo_size(mc_stack) <= _sg_mc_max_depth)
388         && (req = MC_state_get_request(state, &value))
389         && (visited_state == nullptr)) {
390
391       req_str = simgrid::mc::request_to_string(req, value, simgrid::mc::RequestType::simix);
392       XBT_DEBUG("Execute: %s", req_str);
393       xbt_free(req_str);
394
395       if (dot_output != nullptr)
396         req_str = simgrid::mc::request_get_dot_output(req, value);
397
398       MC_state_set_executed_request(state, req, value);
399       mc_stats->executed_transitions++;
400
401       /* TODO : handle test and testany simcalls */
402       e_mc_call_type_t call = MC_CALL_TYPE_NONE;
403       if (_sg_mc_comms_determinism || _sg_mc_send_determinism)
404         call = MC_get_call_type(req);
405
406       /* Answer the request */
407       simgrid::mc::handle_simcall(req, value);    /* After this call req is no longer useful */
408
409       if(!initial_global_state->initial_communications_pattern_done)
410         MC_handle_comm_pattern(call, req, value, initial_communications_pattern, 0);
411       else
412         MC_handle_comm_pattern(call, req, value, nullptr, 0);
413
414       /* Wait for requests (schedules processes) */
415       mc_model_checker->wait_for_requests();
416
417       /* Create the new expanded state */
418       next_state = MC_state_new();
419
420       /* If comm determinism verification, we cannot stop the exploration if
421          some communications are not finished (at least, data are transfered).
422          These communications  are incomplete and they cannot be analyzed and
423          compared with the initial pattern. */
424       bool compare_snapshots = all_communications_are_finished()
425         && initial_global_state->initial_communications_pattern_done;
426
427       if (_sg_mc_visited == 0 || (visited_state = simgrid::mc::is_visited_state(next_state, compare_snapshots)) == nullptr) {
428
429         /* Get enabled processes and insert them in the interleave set of the next state */
430         for (auto& p : mc_model_checker->process().simix_processes())
431           if (simgrid::mc::process_is_enabled(&p.copy))
432             MC_state_interleave_process(next_state, &p.copy);
433
434         if (dot_output != nullptr)
435           fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n", state->num,  next_state->num, req_str);
436
437       } else if (dot_output != nullptr)
438         fprintf(dot_output, "\"%d\" -> \"%d\" [%s];\n",
439           state->num, visited_state->other_num == -1 ? visited_state->num : visited_state->other_num, req_str);
440
441       xbt_fifo_unshift(mc_stack, next_state);
442
443       if (dot_output != nullptr)
444         xbt_free(req_str);
445
446     } else {
447
448       if (xbt_fifo_size(mc_stack) > _sg_mc_max_depth)
449         XBT_WARN("/!\\ Max depth reached ! /!\\ ");
450       else if (visited_state != nullptr)
451         XBT_DEBUG("State already visited (equal to state %d), exploration stopped on this path.", visited_state->other_num == -1 ? visited_state->num : visited_state->other_num);
452       else
453         XBT_DEBUG("There are no more processes to interleave. (depth %d)", xbt_fifo_size(mc_stack));
454
455       if (!initial_global_state->initial_communications_pattern_done)
456         initial_global_state->initial_communications_pattern_done = 1;
457
458       /* Trash the current state, no longer needed */
459       xbt_fifo_shift(mc_stack);
460       MC_state_delete(state, !state->in_visited_states ? 1 : 0);
461       XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
462
463       visited_state = nullptr;
464
465       /* Check for deadlocks */
466       if (mc_model_checker->checkDeadlock()) {
467         MC_show_deadlock();
468         return SIMGRID_MC_EXIT_DEADLOCK;
469       }
470
471       while ((state = (mc_state_t) xbt_fifo_shift(mc_stack)) != nullptr)
472         if (MC_state_interleave_size(state) && xbt_fifo_size(mc_stack) < _sg_mc_max_depth) {
473           /* We found a back-tracking point, let's loop */
474           XBT_DEBUG("Back-tracking to state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
475           xbt_fifo_unshift(mc_stack, state);
476
477           MC_replay(mc_stack);
478
479           XBT_DEBUG("Back-tracking to state %d at depth %d done", state->num, xbt_fifo_size(mc_stack));
480
481           break;
482         } else {
483           XBT_DEBUG("Delete state %d at depth %d", state->num, xbt_fifo_size(mc_stack) + 1);
484           MC_state_delete(state, !state->in_visited_states ? 1 : 0);
485         }
486
487     }
488   }
489
490   MC_print_statistics(mc_stats);
491   return SIMGRID_MC_EXIT_SUCCESS;
492 }
493
494 int CommunicationDeterminismChecker::run()
495 {
496   XBT_INFO("Check communication determinism");
497   mc_model_checker->wait_for_requests();
498
499   if (mc_mode == MC_MODE_CLIENT)
500     // This will move somehwere else:
501     simgrid::mc::Client::get()->handleMessages();
502
503   /* Create exploration stack */
504   mc_stack = xbt_fifo_new();
505
506   this->prepare();
507
508   initial_global_state = xbt_new0(s_mc_global_t, 1);
509   initial_global_state->snapshot = simgrid::mc::take_snapshot(0);
510   initial_global_state->initial_communications_pattern_done = 0;
511   initial_global_state->recv_deterministic = 1;
512   initial_global_state->send_deterministic = 1;
513   initial_global_state->recv_diff = nullptr;
514   initial_global_state->send_diff = nullptr;
515
516   return this->main();
517 }
518
519 }
520 }