Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
chain reaction when solving implicit cast smells
[simgrid.git] / src / instr / instr_platform.cpp
1 /* Copyright (c) 2010-2021. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #include "src/instr/instr_private.hpp"
7
8 #include "simgrid/kernel/routing/NetPoint.hpp"
9 #include "simgrid/kernel/routing/NetZoneImpl.hpp"
10 #include "simgrid/s4u/Actor.hpp"
11 #include "simgrid/s4u/Comm.hpp"
12 #include "simgrid/s4u/Engine.hpp"
13 #include "simgrid/s4u/Exec.hpp"
14 #include "simgrid/s4u/Host.hpp"
15 #include "simgrid/s4u/VirtualMachine.hpp"
16 #include "src/surf/cpu_interface.hpp"
17 #include "src/surf/network_interface.hpp"
18 #include "src/surf/surf_interface.hpp"
19 #include "src/surf/xml/platf_private.hpp"
20 #include "surf/surf.hpp"
21 #include "xbt/graph.h"
22
23 #include <fstream>
24
25 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(instr_routing, instr, "Tracing platform hierarchy");
26
27 std::string instr_pid(simgrid::s4u::Actor const& proc)
28 {
29   return std::string(proc.get_name()) + "-" + std::to_string(proc.get_pid());
30 }
31
32 static simgrid::instr::Container* lowestCommonAncestor(const simgrid::instr::Container* a1,
33                                                        const simgrid::instr::Container* a2)
34 {
35   // this is only an optimization (since most of a1 and a2 share the same parent)
36   if (a1->get_parent() == a2->get_parent())
37     return a1->get_parent();
38
39   // create an array with all ancestors of a1
40   std::vector<simgrid::instr::Container*> ancestors_a1;
41   for (auto* p = a1->get_parent(); p != nullptr; p = p->get_parent())
42     ancestors_a1.push_back(p);
43
44   // create an array with all ancestors of a2
45   std::vector<simgrid::instr::Container*> ancestors_a2;
46   for (auto* p = a2->get_parent(); p != nullptr; p = p->get_parent())
47     ancestors_a2.push_back(p);
48
49   // find the lowest ancestor
50   simgrid::instr::Container* p = nullptr;
51   int i = static_cast<int>(ancestors_a1.size()) - 1;
52   int j = static_cast<int>(ancestors_a2.size()) - 1;
53   while (i >= 0 && j >= 0) {
54     simgrid::instr::Container* a1p       = ancestors_a1.at(i);
55     const simgrid::instr::Container* a2p = ancestors_a2.at(j);
56     if (a1p == a2p) {
57       p = a1p;
58     } else {
59       break;
60     }
61     i--;
62     j--;
63   }
64   return p;
65 }
66
67 static void linkContainers(simgrid::instr::Container* src, simgrid::instr::Container* dst,
68                            std::set<std::string, std::less<>>* filter)
69 {
70   // ignore loopback
71   if (src->get_name() == "__loopback__" || dst->get_name() == "__loopback__") {
72     XBT_DEBUG("  linkContainers: ignoring loopback link");
73     return;
74   }
75
76   // find common parent
77   simgrid::instr::Container* parent = lowestCommonAncestor(src, dst);
78   xbt_assert(parent, "common parent unknown, this is a tracing problem");
79
80   // check if we already register this pair (we only need one direction)
81   std::string aux1 = src->get_name() + dst->get_name();
82   std::string aux2 = dst->get_name() + src->get_name();
83   if (filter->find(aux1) != filter->end()) {
84     XBT_DEBUG("  linkContainers: already registered %s <-> %s (1)", src->get_cname(), dst->get_cname());
85     return;
86   }
87   if (filter->find(aux2) != filter->end()) {
88     XBT_DEBUG("  linkContainers: already registered %s <-> %s (2)", dst->get_cname(), src->get_cname());
89     return;
90   }
91
92   // ok, not found, register it
93   filter->insert(aux1);
94   filter->insert(aux2);
95
96   // declare type
97   std::string link_typename = parent->get_type()->get_name() + "-" + src->get_type()->get_name() +
98                               std::to_string(src->get_type()->get_id()) + "-" + dst->get_type()->get_name() +
99                               std::to_string(dst->get_type()->get_id());
100   simgrid::instr::LinkType* link =
101       parent->get_type()->by_name_or_create(link_typename, src->get_type(), dst->get_type());
102   link->set_calling_container(parent);
103
104   // create the link
105   static long long counter = 0;
106
107   std::string key = std::to_string(counter);
108   counter++;
109
110   link->start_event(src, "topology", key);
111   link->end_event(dst, "topology", key);
112
113   XBT_DEBUG("  linkContainers %s <-> %s", src->get_cname(), dst->get_cname());
114 }
115
116 static void recursiveGraphExtraction(const simgrid::s4u::NetZone* netzone, const simgrid::instr::Container* container,
117                                      std::set<std::string, std::less<>>* filter)
118 {
119   if (not TRACE_platform_topology()) {
120     XBT_DEBUG("Graph extraction disabled by user.");
121     return;
122   }
123   XBT_DEBUG("Graph extraction for NetZone = %s", netzone->get_cname());
124
125   // bottom-up recursion
126   for (auto const& nz_son : netzone->get_children()) {
127     simgrid::instr::Container* child_container = container->get_child_by_name(nz_son->get_name());
128     recursiveGraphExtraction(nz_son, child_container, filter);
129   }
130
131   auto* graph = xbt_graph_new_graph(0, nullptr);
132   std::map<std::string, xbt_node_t, std::less<>> nodes;
133   std::map<std::string, xbt_edge_t, std::less<>> edges;
134
135   netzone->get_impl()->get_graph(graph, &nodes, &edges);
136   for (auto const& elm : edges) {
137     const xbt_edge* edge = elm.second;
138     linkContainers(simgrid::instr::Container::by_name(static_cast<const char*>(edge->src->data)),
139                    simgrid::instr::Container::by_name(static_cast<const char*>(edge->dst->data)), filter);
140   }
141   xbt_graph_free_graph(graph, xbt_free_f, xbt_free_f, nullptr);
142 }
143
144 /*
145  * user categories support
146  */
147 static void recursiveNewVariableType(const std::string& new_typename, const std::string& color,
148                                      simgrid::instr::Type* root)
149 {
150   if (root->get_name() == "HOST" || root->get_name() == "VM")
151     root->by_name_or_create(std::string("p") + new_typename, color);
152
153   if (root->get_name() == "LINK")
154     root->by_name_or_create(std::string("b") + new_typename, color);
155
156   for (auto const& elm : root->get_children()) {
157     recursiveNewVariableType(new_typename, color, elm.second.get());
158   }
159 }
160
161 void instr_new_variable_type(const std::string& new_typename, const std::string& color)
162 {
163   recursiveNewVariableType(new_typename, color, simgrid::instr::Container::get_root()->get_type());
164 }
165
166 static void recursiveNewUserVariableType(const std::string& parent_type, const std::string& new_typename,
167                                          const std::string& color, simgrid::instr::Type* root)
168 {
169   if (root->get_name() == parent_type) {
170     root->by_name_or_create(new_typename, color);
171   }
172   for (auto const& elm : root->get_children())
173     recursiveNewUserVariableType(parent_type, new_typename, color, elm.second.get());
174 }
175
176 void instr_new_user_variable_type(const std::string& parent_type, const std::string& new_typename,
177                                   const std::string& color)
178 {
179   recursiveNewUserVariableType(parent_type, new_typename, color, simgrid::instr::Container::get_root()->get_type());
180 }
181
182 static void recursiveNewUserStateType(const std::string& parent_type, const std::string& new_typename,
183                                       simgrid::instr::Type* root)
184 {
185   if (root->get_name() == parent_type)
186     root->by_name_or_create<simgrid::instr::StateType>(new_typename);
187
188   for (auto const& elm : root->get_children())
189     recursiveNewUserStateType(parent_type, new_typename, elm.second.get());
190 }
191
192 void instr_new_user_state_type(const std::string& parent_type, const std::string& new_typename)
193 {
194   recursiveNewUserStateType(parent_type, new_typename, simgrid::instr::Container::get_root()->get_type());
195 }
196
197 static void recursiveNewValueForUserStateType(const std::string& type_name, const char* val, const std::string& color,
198                                               simgrid::instr::Type* root)
199 {
200   if (root->get_name() == type_name)
201     static_cast<simgrid::instr::StateType*>(root)->add_entity_value(val, color);
202
203   for (auto const& elm : root->get_children())
204     recursiveNewValueForUserStateType(type_name, val, color, elm.second.get());
205 }
206
207 void instr_new_value_for_user_state_type(const std::string& type_name, const char* value, const std::string& color)
208 {
209   recursiveNewValueForUserStateType(type_name, value, color, simgrid::instr::Container::get_root()->get_type());
210 }
211
212 namespace simgrid {
213 namespace instr {
214
215 void platform_graph_export_graphviz(const std::string& output_filename)
216 {
217   auto* g     = xbt_graph_new_graph(0, nullptr);
218   std::map<std::string, xbt_node_t, std::less<>> nodes;
219   std::map<std::string, xbt_edge_t, std::less<>> edges;
220   s4u::Engine::get_instance()->get_netzone_root()->extract_xbt_graph(g, &nodes, &edges);
221
222   std::ofstream fs;
223   fs.open(output_filename, std::ofstream::out);
224   xbt_assert(not fs.fail(), "Failed to open %s", output_filename.c_str());
225
226   if (g->directed)
227     fs << "digraph test {" << std::endl;
228   else
229     fs << "graph test {" << std::endl;
230
231   fs << "  graph [overlap=scale]" << std::endl;
232
233   fs << "  node [shape=box, style=filled]" << std::endl;
234   fs << "  node [width=.3, height=.3, style=filled, color=skyblue]" << std::endl << std::endl;
235
236   for (auto const& elm : nodes)
237     fs << "  \"" << elm.first << "\";" << std::endl;
238
239   for (auto const& elm : edges) {
240     const char* src_s = static_cast<char*>(elm.second->src->data);
241     const char* dst_s = static_cast<char*>(elm.second->dst->data);
242     if (g->directed)
243       fs << "  \"" << src_s << "\" -> \"" << dst_s << "\";" << std::endl;
244     else
245       fs << "  \"" << src_s << "\" -- \"" << dst_s << "\";" << std::endl;
246   }
247   fs << "}" << std::endl;
248   fs.close();
249
250   xbt_graph_free_graph(g, xbt_free_f, xbt_free_f, nullptr);
251 }
252
253 /* Callbacks */
254 static std::vector<NetZoneContainer*> currentContainer; /* push and pop, used only in creation */
255 static void on_netzone_creation(s4u::NetZone const& netzone)
256 {
257   std::string id = netzone.get_name();
258   if (Container::get_root() == nullptr) {
259     auto* root = new NetZoneContainer(id, 0, nullptr);
260     xbt_assert(Container::get_root() == root);
261
262     if (TRACE_smpi_is_enabled()) {
263       auto* mpi = root->get_type()->by_name_or_create<ContainerType>("MPI");
264       if (not TRACE_smpi_is_grouped())
265         mpi->by_name_or_create<StateType>("MPI_STATE");
266       root->get_type()->by_name_or_create("MPI_LINK", mpi, mpi);
267       root->get_type()->by_name_or_create("MIGRATE_LINK", mpi, mpi);
268       mpi->by_name_or_create<StateType>("MIGRATE_STATE");
269     }
270
271     if (TRACE_needs_platform()) {
272       currentContainer.push_back(root);
273     }
274     return;
275   }
276
277   if (TRACE_needs_platform()) {
278     auto level      = static_cast<unsigned>(currentContainer.size());
279     auto* container = new NetZoneContainer(id, level, currentContainer.back());
280     currentContainer.push_back(container);
281   }
282 }
283
284 static void on_link_creation(s4u::Link const& link)
285 {
286   if (currentContainer.empty()) // No ongoing parsing. Are you creating the loopback?
287     return;
288
289   auto* container = new Container(link.get_name(), "LINK", currentContainer.back());
290
291   if ((TRACE_categorized() || TRACE_uncategorized() || TRACE_platform()) && (not TRACE_disable_link())) {
292     VariableType* bandwidth = container->get_type()->by_name_or_create("bandwidth", "");
293     bandwidth->set_calling_container(container);
294     bandwidth->set_event(0, link.get_bandwidth());
295     VariableType* latency = container->get_type()->by_name_or_create("latency", "");
296     latency->set_calling_container(container);
297     latency->set_event(0, link.get_latency());
298   }
299
300   if (TRACE_uncategorized()) {
301     container->get_type()->by_name_or_create("bandwidth_used", "0.5 0.5 0.5");
302   }
303 }
304
305 static void on_host_creation(s4u::Host const& host)
306 {
307   if (Container::by_name_or_null(host.get_name())) // This host already exists, do nothing
308     return;
309
310   Container* container  = new HostContainer(host, currentContainer.back());
311   const Container* root = Container::get_root();
312
313   if ((TRACE_categorized() || TRACE_uncategorized() || TRACE_platform()) && (not TRACE_disable_speed())) {
314     VariableType* speed = container->get_type()->by_name_or_create("speed", "");
315     speed->set_calling_container(container);
316     speed->set_event(0, host.get_speed());
317
318     VariableType* cores = container->get_type()->by_name_or_create("core_count", "");
319     cores->set_calling_container(container);
320     cores->set_event(0, host.get_core_count());
321   }
322
323   if (TRACE_uncategorized())
324     container->get_type()->by_name_or_create("speed_used", "0.5 0.5 0.5");
325
326   if (TRACE_smpi_is_enabled() && TRACE_smpi_is_grouped()) {
327     auto* mpi = container->get_type()->by_name_or_create<ContainerType>("MPI");
328     mpi->by_name_or_create<StateType>("MPI_STATE");
329     root->get_type()->by_name_or_create("MIGRATE_LINK", mpi, mpi);
330     mpi->by_name_or_create<StateType>("MIGRATE_STATE");
331   }
332 }
333
334 static void on_action_state_change(kernel::resource::Action const& action,
335                                    kernel::resource::Action::State /* previous */)
336 {
337   auto n = static_cast<unsigned>(action.get_variable()->get_number_of_constraint());
338
339   for (unsigned i = 0; i < n; i++) {
340     double value = action.get_rate() * action.get_variable()->get_constraint_weight(i);
341     /* Beware of composite actions: ptasks put links and cpus together. Extra pb: we cannot dynamic_cast from void* */
342     kernel::resource::Resource* resource = action.get_variable()->get_constraint(i)->get_id();
343     const kernel::resource::CpuImpl* cpu = dynamic_cast<kernel::resource::CpuImpl*>(resource);
344
345     if (cpu != nullptr)
346       resource_set_utilization("HOST", "speed_used", cpu->get_cname(), action.get_category(), value,
347                                action.get_last_update(), simgrid_get_clock() - action.get_last_update());
348
349     const kernel::resource::LinkImpl* link = dynamic_cast<kernel::resource::LinkImpl*>(resource);
350
351     if (link != nullptr)
352       resource_set_utilization("LINK", "bandwidth_used", link->get_cname(), action.get_category(), value,
353                                action.get_last_update(), simgrid_get_clock() - action.get_last_update());
354   }
355 }
356
357 static void on_platform_created()
358 {
359   currentContainer.clear();
360   std::set<std::string, std::less<>> filter;
361   XBT_DEBUG("Starting graph extraction.");
362   recursiveGraphExtraction(s4u::Engine::get_instance()->get_netzone_root(), Container::get_root(), &filter);
363   XBT_DEBUG("Graph extraction finished.");
364   dump_buffer(true);
365 }
366
367 static void on_actor_creation(s4u::Actor const& actor)
368 {
369   const Container* root      = Container::get_root();
370   Container* container       = Container::by_name(actor.get_host()->get_name());
371   std::string container_name = instr_pid(actor);
372
373   container->create_child(container_name, "ACTOR");
374   auto* actor_type = container->get_type()->by_name_or_create<ContainerType>("ACTOR");
375   auto* state      = actor_type->by_name_or_create<StateType>("ACTOR_STATE");
376   state->add_entity_value("suspend", "1 0 1");
377   state->add_entity_value("sleep", "1 1 0");
378   state->add_entity_value("receive", "1 0 0");
379   state->add_entity_value("send", "0 0 1");
380   state->add_entity_value("execute", "0 1 1");
381   root->get_type()->by_name_or_create("ACTOR_LINK", actor_type, actor_type);
382
383   actor.on_exit([container_name](bool failed) {
384     if (failed)
385       // kill means that this actor no longer exists, let's destroy it
386       Container::by_name(container_name)->remove_from_parent();
387   });
388 }
389
390 static void on_actor_host_change(s4u::Actor const& actor, s4u::Host const& /*previous_location*/)
391 {
392   static long long int counter = 0;
393   Container* container         = Container::by_name(instr_pid(actor));
394   LinkType* link               = Container::get_root()->get_link("ACTOR_LINK");
395
396   // start link
397   link->start_event(container, "M", std::to_string(counter));
398   // destroy existing container of this process
399   container->remove_from_parent();
400   // create new container on the new_host location
401   Container::by_name(actor.get_host()->get_name())->create_child(instr_pid(actor), "ACTOR");
402   // end link
403   link->end_event(Container::by_name(instr_pid(actor)), "M", std::to_string(counter));
404   counter++;
405 }
406
407 static void on_vm_creation(s4u::Host const& host)
408 {
409   const Container* container = new HostContainer(host, currentContainer.back());
410   const Container* root      = Container::get_root();
411   auto* vm                   = container->get_type()->by_name_or_create<ContainerType>("VM");
412   auto* state                = vm->by_name_or_create<StateType>("VM_STATE");
413   state->add_entity_value("suspend", "1 0 1");
414   state->add_entity_value("sleep", "1 1 0");
415   state->add_entity_value("receive", "1 0 0");
416   state->add_entity_value("send", "0 0 1");
417   state->add_entity_value("execute", "0 1 1");
418   root->get_type()->by_name_or_create("VM_LINK", vm, vm);
419   root->get_type()->by_name_or_create("VM_ACTOR_LINK", vm, vm);
420 }
421
422 void define_callbacks()
423 {
424   // always need the callbacks to zones (we need only the root zone), to create the rootContainer and the rootType
425   // properly
426   if (TRACE_needs_platform()) {
427     s4u::Engine::on_platform_created.connect(on_platform_created);
428     s4u::Host::on_creation.connect(on_host_creation);
429     s4u::Host::on_speed_change.connect([](s4u::Host const& host) {
430       Container::by_name(host.get_name())
431           ->get_variable("speed")
432           ->set_event(surf_get_clock(), host.get_core_count() * host.get_available_speed());
433     });
434     s4u::Link::on_creation.connect(on_link_creation);
435     s4u::Link::on_bandwidth_change.connect([](s4u::Link const& link) {
436       Container::by_name(link.get_name())
437           ->get_variable("bandwidth")
438           ->set_event(surf_get_clock(), sg_bandwidth_factor * link.get_bandwidth());
439     });
440     s4u::NetZone::on_seal.connect([](s4u::NetZone const& /*netzone*/) { currentContainer.pop_back(); });
441     kernel::routing::NetPoint::on_creation.connect([](kernel::routing::NetPoint const& netpoint) {
442       if (netpoint.is_router())
443         new RouterContainer(netpoint.get_name(), currentContainer.back());
444     });
445   }
446
447   s4u::NetZone::on_creation.connect(on_netzone_creation);
448
449   kernel::resource::CpuAction::on_state_change.connect(on_action_state_change);
450   s4u::Link::on_communication_state_change.connect(on_action_state_change);
451
452   if (TRACE_actor_is_enabled()) {
453     s4u::Actor::on_creation.connect(on_actor_creation);
454     s4u::Actor::on_destruction.connect([](s4u::Actor const& actor) {
455       auto container = Container::by_name_or_null(instr_pid(actor));
456       if (container != nullptr)
457         container->remove_from_parent();
458     });
459     s4u::Actor::on_suspend.connect([](s4u::Actor const& actor) {
460       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("suspend");
461     });
462     s4u::Actor::on_resume.connect(
463         [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
464     s4u::Actor::on_sleep.connect([](s4u::Actor const& actor) {
465       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("sleep");
466     });
467     s4u::Actor::on_wake_up.connect(
468         [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
469     s4u::Exec::on_start.connect([](s4u::Exec const&) {
470       Container::by_name(instr_pid(*s4u::Actor::self()))->get_state("ACTOR_STATE")->push_event("execute");
471     });
472     s4u::Exec::on_completion.connect([](s4u::Exec const&) {
473       Container::by_name(instr_pid(*s4u::Actor::self()))->get_state("ACTOR_STATE")->pop_event();
474     });
475     s4u::Comm::on_start.connect([](s4u::Comm const&, bool is_sender) {
476       Container::by_name(instr_pid(*s4u::Actor::self()))
477           ->get_state("ACTOR_STATE")
478           ->push_event(is_sender ? "send" : "receive");
479     });
480     s4u::Comm::on_completion.connect([](s4u::Comm const&) {
481       Container::by_name(instr_pid(*s4u::Actor::self()))->get_state("ACTOR_STATE")->pop_event();
482     });
483     s4u::Actor::on_host_change.connect(on_actor_host_change);
484   }
485
486   if (TRACE_smpi_is_enabled() && TRACE_smpi_is_computing()) {
487     s4u::Exec::on_start.connect([](s4u::Exec const& exec) {
488       Container::by_name(std::string("rank-") + std::to_string(s4u::Actor::self()->get_pid()))
489           ->get_state("MPI_STATE")
490           ->push_event("computing", new CpuTIData("compute", exec.get_cost()));
491     });
492     s4u::Exec::on_completion.connect([](s4u::Exec const&) {
493       Container::by_name(std::string("rank-") + std::to_string(s4u::Actor::self()->get_pid()))
494           ->get_state("MPI_STATE")
495           ->pop_event();
496     });
497   }
498
499   if (TRACE_vm_is_enabled()) {
500     s4u::Host::on_creation.connect(on_vm_creation);
501     s4u::VirtualMachine::on_start.connect([](s4u::VirtualMachine const& vm) {
502       Container::by_name(vm.get_name())->get_state("VM_STATE")->push_event("start");
503     });
504     s4u::VirtualMachine::on_started.connect(
505         [](s4u::VirtualMachine const& vm) { Container::by_name(vm.get_name())->get_state("VM_STATE")->pop_event(); });
506     s4u::VirtualMachine::on_suspend.connect([](s4u::VirtualMachine const& vm) {
507       Container::by_name(vm.get_name())->get_state("VM_STATE")->push_event("suspend");
508     });
509     s4u::VirtualMachine::on_resume.connect(
510         [](s4u::VirtualMachine const& vm) { Container::by_name(vm.get_name())->get_state("VM_STATE")->pop_event(); });
511     s4u::Host::on_destruction.connect(
512         [](s4u::Host const& host) { Container::by_name(host.get_name())->remove_from_parent(); });
513   }
514 }
515 } // namespace instr
516 } // namespace simgrid