Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
f2cd0925227200774e4d289cb78e10864190d958
[simgrid.git] / src / smpi / internals / instr_smpi.cpp
1 /* Copyright (c) 2010-2018. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include "private.hpp"
8 #include <boost/algorithm/string.hpp>
9 #include <simgrid/s4u/Actor.hpp>
10 #include <cctype>
11 #include <cstdarg>
12 #include <cwchar>
13 #include <deque>
14 #include <simgrid/sg_config.hpp>
15 #include <simgrid/s4u/Host.hpp>
16 #include <string>
17 #include <vector>
18
19 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(instr_smpi, instr, "Tracing SMPI");
20
21 static std::unordered_map<std::string, std::deque<std::string>*> keys;
22
23 static const char* smpi_colors[] = {"recv",
24                                     "1 0 0",
25                                     "irecv",
26                                     "1 0.52 0.52",
27                                     "send",
28                                     "0 0 1",
29                                     "isend",
30                                     "0.52 0.52 1",
31                                     "sendrecv",
32                                     "0 1 1",
33                                     "wait",
34                                     "1 1 0",
35                                     "waitall",
36                                     "0.78 0.78 0",
37                                     "waitany",
38                                     "0.78 0.78 0.58",
39                                     "test",
40                                     "0.52 0.52 0",
41
42                                     "allgather",
43                                     "1 0 0",
44                                     "allgatherv",
45                                     "1 0.52 0.52",
46                                     "allreduce",
47                                     "1 0 1",
48                                     "alltoall",
49                                     "0.52 0 1",
50                                     "alltoallv",
51                                     "0.78 0.52 1",
52                                     "barrier",
53                                     "0 0.78 0.78",
54                                     "bcast",
55                                     "0 0.78 0.39",
56                                     "gather",
57                                     "1 1 0",
58                                     "gatherv",
59                                     "1 1 0.52",
60                                     "reduce",
61                                     "0 1 0",
62                                     "reducescatter",
63                                     "0.52 1 0.52",
64                                     "scan",
65                                     "1 0.58 0.23",
66                                     "exscan",
67                                     "1 0.54 0.25",
68                                     "scatterv",
69                                     "0.52 0 0.52",
70                                     "scatter",
71                                     "1 0.74 0.54",
72
73                                     "computing",
74                                     "0 1 1",
75                                     "sleeping",
76                                     "0 0.5 0.5",
77
78                                     "init",
79                                     "0 1 0",
80                                     "finalize",
81                                     "0 1 0",
82
83                                     "put",
84                                     "0.3 1 0",
85                                     "get",
86                                     "0 1 0.3",
87                                     "accumulate",
88                                     "1 0.3 0",
89                                     "migration",
90                                     "0.2 0.5 0.2",
91                                     "rput",
92                                     "0.3 1 0",
93                                     "rget",
94                                     "0 1 0.3",
95                                     "raccumulate",
96                                     "1 0.3 0",
97                                     "compare_and_swap",
98                                     "0.3 1 0",
99                                     "get_accumulate",
100                                     "0 1 0.3",
101                                     "rget_accumulate",
102                                     "1 0.3 0",
103                                     "win_fence",
104                                     "1 0 0.3",
105                                     "win_post",
106                                     "1 0 0.8",
107                                     "win_wait",
108                                     "1 0.8 0",
109                                     "win_start",
110                                     "0.8 0 1",
111                                     "win_complete",
112                                     "0.8 1 0",
113                                     "win_lock",
114                                     "1 0 0.3",
115                                     "win_unlock",
116                                     "1 0 0.3",
117                                     "win_lock_all",
118                                     "1 0 0.8",
119                                     "win_unlock_all",
120                                     "1 0.8 0",
121                                     "win_flush",
122                                     "1 0 0.3",
123                                     "win_flush_local",
124                                     "1 0 0.8",
125                                     "win_flush_all",
126                                     "1 0.8 0",
127                                     "win_flush_local_all",
128                                     "1 0 0.3",
129                                     "",
130                                     ""};
131
132 static const char* instr_find_color(const char* state)
133 {
134   std::string target = std::string(state);
135   boost::algorithm::to_lower(target);
136   const char* ret     = nullptr;
137   unsigned int i      = 0;
138   const char* current = smpi_colors[i];
139   while (current != nullptr) {
140     if (target == current                          // exact match
141         || strstr(target.c_str(), current) != 0) { // as substring
142       ret = smpi_colors[i + 1];
143       break;
144     }
145     i+=2;
146     current = smpi_colors[i];
147   }
148   return ret;
149 }
150
151 XBT_PRIVATE std::string smpi_container_key(int rank)
152 {
153   return std::string("rank-") + std::to_string(rank);
154 }
155
156 XBT_PRIVATE container_t smpi_container(int rank)
157 {
158   return simgrid::instr::Container::by_name(smpi_container_key(rank));
159 }
160
161 static std::string TRACE_smpi_put_key(int src, int dst, int tag, int send)
162 {
163   // get the deque for src#dst
164   std::string aux =
165       std::to_string(src) + "#" + std::to_string(dst) + "#" + std::to_string(tag) + "#" + std::to_string(send);
166   auto it = keys.find(aux);
167   std::deque<std::string>* d;
168
169   if (it == keys.end()) {
170     d         = new std::deque<std::string>;
171     keys[aux] = d;
172   } else
173     d = it->second;
174
175   //generate the key
176   static unsigned long long counter = 0;
177   counter++;
178   std::string key =
179       std::to_string(src) + "_" + std::to_string(dst) + "_" + std::to_string(tag) + "_" + std::to_string(counter);
180
181   //push it
182   d->push_back(key);
183
184   return key;
185 }
186
187 static std::string TRACE_smpi_get_key(int src, int dst, int tag, int send)
188 {
189   std::string key;
190   std::string aux = std::to_string(src) + "#" + std::to_string(dst) + "#" + std::to_string(tag) + "#" +
191                     std::to_string(send == 1 ? 0 : 1);
192   auto it = keys.find(aux);
193   if (it == keys.end()) {
194     // first posted
195     key = TRACE_smpi_put_key(src, dst, tag, send);
196   } else {
197     key = it->second->front();
198     it->second->pop_front();
199   }
200   return key;
201 }
202
203 static std::unordered_map<smx_actor_t, std::string> process_category;
204
205 void TRACE_internal_smpi_set_category (const char *category)
206 {
207   if (not TRACE_smpi_is_enabled())
208     return;
209
210   //declare category
211   TRACE_category (category);
212
213   if (category != nullptr)
214     process_category[SIMIX_process_self()] = category;
215 }
216
217 const char *TRACE_internal_smpi_get_category ()
218 {
219   if (not TRACE_smpi_is_enabled())
220     return nullptr;
221
222   auto it = process_category.find(SIMIX_process_self());
223   return (it == process_category.end()) ? nullptr : it->second.c_str();
224 }
225
226 void TRACE_smpi_release()
227 {
228   for (auto const& elm : keys)
229     delete elm.second;
230 }
231
232 void TRACE_smpi_setup_container(int rank, sg_host_t host)
233 {
234   std::string str = smpi_container_key(rank);
235
236   container_t father;
237   if (TRACE_smpi_is_grouped()){
238     father = simgrid::instr::Container::by_name_or_null(host->get_name());
239   }else{
240     father = simgrid::instr::Container::get_root();
241   }
242   xbt_assert(father != nullptr, "Could not find a parent for mpi rank %s at function %s", str.c_str(), __func__);
243   father->create_child(str, "MPI"); // This container is of type MPI
244 }
245
246 void TRACE_smpi_init(int rank)
247 {
248   if (not TRACE_smpi_is_enabled())
249     return;
250
251   TRACE_smpi_setup_container(rank, sg_host_self());
252 #if HAVE_PAPI
253   container_t container   = simgrid::instr::Container::by_name(str);
254   papi_counter_t counters = smpi_process()->papi_counters();
255
256   for (auto const& it : counters) {
257     /**
258      * Check whether this variable already exists or not. Otherwise, it will be created
259      * multiple times but only the last one would be used...
260      */
261     if (s_type::getOrNull(it.first.c_str(), container->type_) == nullptr) {
262       Type::variableNew(it.first.c_str(), "", container->type_);
263     }
264   }
265 #endif
266 }
267
268 void TRACE_smpi_finalize(int rank)
269 {
270   if (not TRACE_smpi_is_enabled())
271     return;
272
273   smpi_container(rank)->remove_from_parent();
274 }
275
276 void TRACE_smpi_computing_init(int rank)
277 {
278  //first use, initialize the color in the trace
279  if (TRACE_smpi_is_enabled() && TRACE_smpi_is_computing())
280    smpi_container(rank)->get_state("MPI_STATE")->add_entity_value("computing", instr_find_color("computing"));
281 }
282
283 void TRACE_smpi_computing_in(int rank, double amount)
284 {
285   if (TRACE_smpi_is_enabled() && TRACE_smpi_is_computing())
286     smpi_container(rank)
287         ->get_state("MPI_STATE")
288         ->push_event("computing", new simgrid::instr::CpuTIData("compute", amount));
289 }
290
291 void TRACE_smpi_computing_out(int rank)
292 {
293   if (TRACE_smpi_is_enabled() && TRACE_smpi_is_computing())
294     smpi_container(rank)->get_state("MPI_STATE")->pop_event();
295 }
296
297 void TRACE_smpi_sleeping_in(int rank, double duration)
298 {
299   if (TRACE_smpi_is_enabled() && TRACE_smpi_is_sleeping())
300     smpi_container(rank)
301         ->get_state("MPI_STATE")
302         ->push_event("sleeping", new simgrid::instr::CpuTIData("sleep", duration));
303 }
304
305 void TRACE_smpi_sleeping_out(int rank)
306 {
307   if (TRACE_smpi_is_enabled() && not TRACE_smpi_is_sleeping())
308     smpi_container(rank)->get_state("MPI_STATE")->pop_event();
309 }
310
311 void TRACE_smpi_testing_in(int rank)
312 {
313   //do not forget to set the color first, otherwise this will explode
314   if (not TRACE_smpi_is_enabled())
315     return;
316
317   simgrid::instr::StateType* state = smpi_container(rank)->get_state("MPI_STATE");
318   state->add_entity_value("test");
319   state->push_event("test", new simgrid::instr::NoOpTIData("test"));
320 }
321
322 void TRACE_smpi_testing_out(int rank)
323 {
324   if (TRACE_smpi_is_enabled())
325     smpi_container(rank)->get_state("MPI_STATE")->pop_event();
326 }
327
328 void TRACE_smpi_comm_in(int rank, const char* operation, simgrid::instr::TIData* extra)
329 {
330   if (not TRACE_smpi_is_enabled()) {
331     delete extra;
332     return;
333   }
334
335   simgrid::instr::StateType* state = smpi_container(rank)->get_state("MPI_STATE");
336   state->add_entity_value(operation, instr_find_color(operation));
337   state->push_event(operation, extra);
338 }
339
340 void TRACE_smpi_comm_out(int rank)
341 {
342   if (TRACE_smpi_is_enabled())
343     smpi_container(rank)->get_state("MPI_STATE")->pop_event();
344 }
345
346 void TRACE_smpi_send(int rank, int src, int dst, int tag, int size)
347 {
348   if (not TRACE_smpi_is_enabled())
349     return;
350
351   std::string key = TRACE_smpi_get_key(src, dst, tag, 1);
352
353   XBT_DEBUG("Send tracing from %d to %d, tag %d, with key %s", src, dst, tag, key.c_str());
354   simgrid::instr::Container::get_root()->get_link("MPI_LINK")->start_event(smpi_container(rank), "PTP", key, size);
355 }
356
357 void TRACE_smpi_recv(int src, int dst, int tag)
358 {
359   if (not TRACE_smpi_is_enabled())
360     return;
361
362   std::string key = TRACE_smpi_get_key(src, dst, tag, 0);
363
364   XBT_DEBUG("Recv tracing from %d to %d, tag %d, with key %s", src, dst, tag, key.c_str());
365   simgrid::instr::Container::get_root()->get_link("MPI_LINK")->end_event(smpi_container(dst), "PTP", key);
366 }
367
368 /**************** Functions to trace the migration of tasks. *****************/
369 void TRACE_smpi_send_process_data_in(int rank)
370 {
371   if (!TRACE_smpi_is_enabled()) return;
372
373   smpi_container(rank)->get_state("MIGRATE_STATE")->add_entity_value("migration", instr_find_color("migration"));
374   smpi_container(rank)->get_state("MIGRATE_STATE")->push_event("migration");
375 }
376
377 void TRACE_smpi_send_process_data_out(int rank)
378 {
379   if (!TRACE_smpi_is_enabled()) return; 
380
381   /* Clean the process state. */
382   smpi_container(rank)->get_state("MIGRATE_STATE")->pop_event();
383 }
384
385 void TRACE_smpi_process_change_host(int rank, sg_host_t new_host)
386 {
387   if (!TRACE_smpi_is_enabled()) return;
388
389   /** The key is (most likely) used to match the events in the trace */
390   static long long int counter = 0;
391   std::string key              = std::to_string(counter);
392   counter++;
393
394   // start link (= tell the trace that this rank moves from A to B)
395   container_t cont = smpi_container(rank);
396   simgrid::instr::Container::get_root()->get_link("MIGRATE_LINK")->start_event(cont, "M", key);
397
398   // Destroy container of this rank on this host
399   cont->remove_from_parent();
400
401   // Setup container on new host
402   TRACE_smpi_setup_container(rank, new_host);
403
404   // end link
405   cont = smpi_container(rank); // This points to the newly created container
406   simgrid::instr::Container::get_root()->get_link("MIGRATE_LINK")->end_event(cont, "M", key);
407 }