Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
use signal to decouple instr from surf code
[simgrid.git] / src / instr / instr_platform.cpp
1 /* Copyright (c) 2010-2020. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #include "src/instr/instr_private.hpp"
7
8 #include "simgrid/kernel/routing/NetPoint.hpp"
9 #include "simgrid/kernel/routing/NetZoneImpl.hpp"
10 #include "simgrid/s4u/Actor.hpp"
11 #include "simgrid/s4u/Comm.hpp"
12 #include "simgrid/s4u/Engine.hpp"
13 #include "simgrid/s4u/Exec.hpp"
14 #include "simgrid/s4u/Host.hpp"
15 #include "simgrid/s4u/VirtualMachine.hpp"
16 #include "src/surf/cpu_interface.hpp"
17 #include "src/surf/network_interface.hpp"
18 #include "src/surf/surf_interface.hpp"
19 #include "src/surf/xml/platf_private.hpp"
20 #include "surf/surf.hpp"
21 #include "xbt/graph.h"
22
23 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(instr_routing, instr, "Tracing platform hierarchy");
24
25 std::string instr_pid(simgrid::s4u::Actor const& proc)
26 {
27   return std::string(proc.get_name()) + "-" + std::to_string(proc.get_pid());
28 }
29
30 static const char* instr_node_name(const s_xbt_node_t* node)
31 {
32   return static_cast<char*>(xbt_graph_node_get_data(node));
33 }
34
35 static container_t lowestCommonAncestor(const simgrid::instr::Container* a1, const simgrid::instr::Container* a2)
36 {
37   // this is only an optimization (since most of a1 and a2 share the same parent)
38   if (a1->father_ == a2->father_)
39     return a1->father_;
40
41   // create an array with all ancestors of a1
42   std::vector<container_t> ancestors_a1;
43   container_t p = a1->father_;
44   while (p) {
45     ancestors_a1.push_back(p);
46     p = p->father_;
47   }
48
49   // create an array with all ancestors of a2
50   std::vector<container_t> ancestors_a2;
51   p = a2->father_;
52   while (p) {
53     ancestors_a2.push_back(p);
54     p = p->father_;
55   }
56
57   // find the lowest ancestor
58   p     = nullptr;
59   int i = ancestors_a1.size() - 1;
60   int j = ancestors_a2.size() - 1;
61   while (i >= 0 && j >= 0) {
62     container_t a1p = ancestors_a1.at(i);
63     const simgrid::instr::Container* a2p = ancestors_a2.at(j);
64     if (a1p == a2p) {
65       p = a1p;
66     } else {
67       break;
68     }
69     i--;
70     j--;
71   }
72   return p;
73 }
74
75 static void linkContainers(container_t src, container_t dst, std::set<std::string>* filter)
76 {
77   // ignore loopback
78   if (src->get_name() == "__loopback__" || dst->get_name() == "__loopback__") {
79     XBT_DEBUG("  linkContainers: ignoring loopback link");
80     return;
81   }
82
83   // find common father
84   container_t father = lowestCommonAncestor(src, dst);
85   if (not father) {
86     xbt_die("common father unknown, this is a tracing problem");
87   }
88
89   // check if we already register this pair (we only need one direction)
90   std::string aux1 = src->get_name() + dst->get_name();
91   std::string aux2 = dst->get_name() + src->get_name();
92   if (filter->find(aux1) != filter->end()) {
93     XBT_DEBUG("  linkContainers: already registered %s <-> %s (1)", src->get_cname(), dst->get_cname());
94     return;
95   }
96   if (filter->find(aux2) != filter->end()) {
97     XBT_DEBUG("  linkContainers: already registered %s <-> %s (2)", dst->get_cname(), src->get_cname());
98     return;
99   }
100
101   // ok, not found, register it
102   filter->insert(aux1);
103   filter->insert(aux2);
104
105   // declare type
106   std::string link_typename = father->type_->get_name() + "-" + src->type_->get_name() +
107                               std::to_string(src->type_->get_id()) + "-" + dst->type_->get_name() +
108                               std::to_string(dst->type_->get_id());
109   simgrid::instr::LinkType* link = father->type_->by_name_or_create(link_typename, src->type_, dst->type_);
110   link->set_calling_container(father);
111
112   // create the link
113   static long long counter = 0;
114
115   std::string key = std::to_string(counter);
116   counter++;
117
118   link->start_event(src, "topology", key);
119   link->end_event(dst, "topology", key);
120
121   XBT_DEBUG("  linkContainers %s <-> %s", src->get_cname(), dst->get_cname());
122 }
123
124 static void recursiveGraphExtraction(const simgrid::s4u::NetZone* netzone, container_t container,
125                                      std::set<std::string>* filter)
126 {
127   if (not TRACE_platform_topology()) {
128     XBT_DEBUG("Graph extraction disabled by user.");
129     return;
130   }
131   XBT_DEBUG("Graph extraction for NetZone = %s", netzone->get_cname());
132   if (not netzone->get_children().empty()) {
133     // bottom-up recursion
134     for (auto const& nz_son : netzone->get_children()) {
135       container_t child_container = container->children_.at(nz_son->get_name());
136       recursiveGraphExtraction(nz_son, child_container, filter);
137     }
138   }
139
140   xbt_graph_t graph                        = xbt_graph_new_graph(0, nullptr);
141   std::map<std::string, xbt_node_t>* nodes = new std::map<std::string, xbt_node_t>();
142   std::map<std::string, xbt_edge_t>* edges = new std::map<std::string, xbt_edge_t>();
143
144   netzone->get_impl()->get_graph(graph, nodes, edges);
145   for (auto elm : *edges) {
146     const xbt_edge* edge = elm.second;
147     linkContainers(simgrid::instr::Container::by_name(static_cast<const char*>(edge->src->data)),
148                    simgrid::instr::Container::by_name(static_cast<const char*>(edge->dst->data)), filter);
149   }
150   delete nodes;
151   delete edges;
152   xbt_graph_free_graph(graph, xbt_free_f, xbt_free_f, nullptr);
153 }
154
155 /*
156  * Callbacks
157  */
158
159 namespace simgrid {
160 namespace instr {
161
162 static std::vector<NetZoneContainer*> currentContainer; /* push and pop, used only in creation */
163 static void on_netzone_creation(s4u::NetZone const& netzone)
164 {
165   std::string id = netzone.get_name();
166   if (Container::get_root() == nullptr) {
167     NetZoneContainer* root = new NetZoneContainer(id, 0, nullptr);
168     xbt_assert(Container::get_root() == root);
169
170     if (TRACE_smpi_is_enabled()) {
171       ContainerType* mpi = root->type_->by_name_or_create<ContainerType>("MPI");
172       if (not TRACE_smpi_is_grouped())
173         mpi->by_name_or_create<StateType>("MPI_STATE");
174       root->type_->by_name_or_create("MPI_LINK", mpi, mpi);
175       // TODO See if we can move this to the LoadBalancer plugin
176       root->type_->by_name_or_create("MIGRATE_LINK", mpi, mpi);
177       mpi->by_name_or_create<StateType>("MIGRATE_STATE");
178     }
179
180     if (TRACE_needs_platform()) {
181       currentContainer.push_back(root);
182     }
183     return;
184   }
185
186   if (TRACE_needs_platform()) {
187     NetZoneContainer* container = new NetZoneContainer(id, currentContainer.size(), currentContainer.back());
188     currentContainer.push_back(container);
189   }
190 }
191
192 static void on_link_creation(s4u::Link const& link)
193 {
194   if (currentContainer.empty()) // No ongoing parsing. Are you creating the loopback?
195     return;
196
197   Container* container = new Container(link.get_name(), "LINK", currentContainer.back());
198
199   if ((TRACE_categorized() || TRACE_uncategorized() || TRACE_platform()) && (not TRACE_disable_link())) {
200     VariableType* bandwidth = container->type_->by_name_or_create("bandwidth", "");
201     bandwidth->set_calling_container(container);
202     bandwidth->set_event(0, link.get_bandwidth());
203     VariableType* latency = container->type_->by_name_or_create("latency", "");
204     latency->set_calling_container(container);
205     latency->set_event(0, link.get_latency());
206   }
207   if (TRACE_uncategorized()) {
208     container->type_->by_name_or_create("bandwidth_used", "0.5 0.5 0.5");
209   }
210 }
211
212 static void on_host_creation(s4u::Host const& host)
213 {
214   Container* container  = new HostContainer(host, currentContainer.back());
215   const Container* root = Container::get_root();
216
217   if ((TRACE_categorized() || TRACE_uncategorized() || TRACE_platform()) && (not TRACE_disable_speed())) {
218     VariableType* speed = container->type_->by_name_or_create("speed", "");
219     speed->set_calling_container(container);
220     speed->set_event(0, host.get_speed());
221
222     VariableType* cores = container->type_->by_name_or_create("core_count", "");
223     cores->set_calling_container(container);
224     cores->set_event(0, host.get_core_count());
225   }
226
227   if (TRACE_uncategorized())
228     container->type_->by_name_or_create("speed_used", "0.5 0.5 0.5");
229
230   if (TRACE_smpi_is_enabled() && TRACE_smpi_is_grouped()) {
231     ContainerType* mpi = container->type_->by_name_or_create<ContainerType>("MPI");
232     mpi->by_name_or_create<StateType>("MPI_STATE");
233     // TODO See if we can move this to the LoadBalancer plugin
234     root->type_->by_name_or_create("MIGRATE_LINK", mpi, mpi);
235     mpi->by_name_or_create<StateType>("MIGRATE_STATE");
236   }
237 }
238
239 static void on_host_speed_change(s4u::Host const& host)
240 {
241   Container::by_name(host.get_name())
242       ->get_variable("speed")
243       ->set_event(surf_get_clock(), host.get_core_count() * host.get_available_speed());
244 }
245
246 static void on_action_state_change(kernel::resource::Action const& action,
247                                    kernel::resource::Action::State /* previous */)
248 {
249   int n = action.get_variable()->get_number_of_constraint();
250
251   for (int i = 0; i < n; i++) {
252     double value = action.get_variable()->get_value() * action.get_variable()->get_constraint_weight(i);
253     /* Beware of composite actions: ptasks put links and cpus together. Extra pb: we cannot dynamic_cast from void* */
254     kernel::resource::Resource* resource = action.get_variable()->get_constraint(i)->get_id();
255     const kernel::resource::Cpu* cpu     = dynamic_cast<kernel::resource::Cpu*>(resource);
256
257     if (cpu != nullptr)
258       resource_set_utilization("HOST", "speed_used", cpu->get_cname(), action.get_category(), value,
259                                action.get_last_update(), SIMIX_get_clock() - action.get_last_update());
260
261     const kernel::resource::LinkImpl* link = dynamic_cast<kernel::resource::LinkImpl*>(resource);
262
263     if (link != nullptr)
264       resource_set_utilization("LINK", "bandwidth_used", link->get_cname(), action.get_category(), value,
265                                action.get_last_update(), SIMIX_get_clock() - action.get_last_update());
266   }
267 }
268
269 static void on_link_bandwidth_change(s4u::Link const& link)
270 {
271   Container::by_name(link.get_name())
272       ->get_variable("bandwidth")
273       ->set_event(surf_get_clock(), sg_bandwidth_factor * link.get_bandwidth());
274 }
275
276 static void on_netpoint_creation(kernel::routing::NetPoint const& netpoint)
277 {
278   if (netpoint.is_router())
279     new RouterContainer(netpoint.get_name(), currentContainer.back());
280 }
281
282 static void on_platform_created()
283 {
284   currentContainer.clear();
285   std::set<std::string>* filter = new std::set<std::string>();
286   XBT_DEBUG("Starting graph extraction.");
287   recursiveGraphExtraction(s4u::Engine::get_instance()->get_netzone_root(), Container::get_root(), filter);
288   XBT_DEBUG("Graph extraction finished.");
289   delete filter;
290   TRACE_paje_dump_buffer(true);
291 }
292
293 static void on_actor_creation(s4u::Actor const& actor)
294 {
295   const Container* root = Container::get_root();
296   Container* container  = Container::by_name(actor.get_host()->get_name());
297
298   container->create_child(instr_pid(actor), "ACTOR");
299   ContainerType* actor_type = container->type_->by_name_or_create<ContainerType>("ACTOR");
300   StateType* state          = actor_type->by_name_or_create<StateType>("ACTOR_STATE");
301   state->add_entity_value("suspend", "1 0 1");
302   state->add_entity_value("sleep", "1 1 0");
303   state->add_entity_value("receive", "1 0 0");
304   state->add_entity_value("send", "0 0 1");
305   state->add_entity_value("execute", "0 1 1");
306   root->type_->by_name_or_create("ACTOR_LINK", actor_type, actor_type);
307   root->type_->by_name_or_create("ACTOR_TASK_LINK", actor_type, actor_type);
308
309   std::string container_name = instr_pid(actor);
310   actor.on_exit([container_name](bool failed) {
311     if (failed)
312       // kill means that this actor no longer exists, let's destroy it
313       Container::by_name(container_name)->remove_from_parent();
314   });
315 }
316
317 static void on_actor_host_change(s4u::Actor const& actor, s4u::Host const& /*previous_location*/)
318 {
319   static long long int counter = 0;
320   container_t container        = Container::by_name(instr_pid(actor));
321   LinkType* link               = Container::get_root()->get_link("ACTOR_LINK");
322
323   // start link
324   link->start_event(container, "M", std::to_string(counter));
325   // destroy existing container of this process
326   container->remove_from_parent();
327   // create new container on the new_host location
328   Container::by_name(actor.get_host()->get_name())->create_child(instr_pid(actor), "ACTOR");
329   // end link
330   link->end_event(Container::by_name(instr_pid(actor)), "M", std::to_string(counter));
331   counter++;
332 }
333
334 static void on_vm_creation(s4u::Host const& host)
335 {
336   const Container* container = new HostContainer(host, currentContainer.back());
337   const Container* root      = Container::get_root();
338   ContainerType* vm          = container->type_->by_name_or_create<ContainerType>("VM");
339   StateType* state           = vm->by_name_or_create<StateType>("VM_STATE");
340   state->add_entity_value("suspend", "1 0 1");
341   state->add_entity_value("sleep", "1 1 0");
342   state->add_entity_value("receive", "1 0 0");
343   state->add_entity_value("send", "0 0 1");
344   state->add_entity_value("execute", "0 1 1");
345   root->type_->by_name_or_create("VM_LINK", vm, vm);
346   root->type_->by_name_or_create("VM_ACTOR_LINK", vm, vm);
347 }
348
349 void define_callbacks()
350 {
351   // always need the callbacks to zones (we need only the root zone), to create the rootContainer and the rootType
352   // properly
353   if (TRACE_needs_platform()) {
354     s4u::Engine::on_platform_created.connect(on_platform_created);
355     s4u::Host::on_creation.connect(on_host_creation);
356     s4u::Host::on_speed_change.connect(on_host_speed_change);
357     s4u::Link::on_creation.connect(on_link_creation);
358     s4u::Link::on_bandwidth_change.connect(on_link_bandwidth_change);
359     s4u::NetZone::on_seal.connect([](s4u::NetZone const& /*netzone*/) { currentContainer.pop_back(); });
360     kernel::routing::NetPoint::on_creation.connect(on_netpoint_creation);
361   }
362   s4u::NetZone::on_creation.connect(on_netzone_creation);
363   s4u::Engine::on_time_advance.connect([](double time_delta) { TRACE_paje_dump_buffer(false); });
364
365   kernel::resource::CpuAction::on_state_change.connect(on_action_state_change);
366   s4u::Link::on_communication_state_change.connect(on_action_state_change);
367
368   if (TRACE_actor_is_enabled()) {
369     s4u::Actor::on_creation.connect(on_actor_creation);
370     s4u::Actor::on_destruction.connect([](s4u::Actor const& actor) {
371       auto container = Container::by_name_or_null(instr_pid(actor));
372       if (container != nullptr)
373         container->remove_from_parent();
374     });
375     s4u::Actor::on_suspend.connect([](s4u::Actor const& actor) {
376       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("suspend");
377     });
378     s4u::Actor::on_resume.connect(
379         [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
380     s4u::Actor::on_sleep.connect([](s4u::Actor const& actor) {
381       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("sleep");
382     });
383     s4u::Actor::on_wake_up.connect(
384         [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
385     s4u::Exec::on_start.connect([](simgrid::s4u::Actor const& actor, s4u::Exec const&) {
386       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("execute");
387     });
388     s4u::Exec::on_completion.connect([](s4u::Actor const& actor, s4u::Exec const&) {
389       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event();
390     });
391     s4u::Comm::on_sender_start.connect([](s4u::Actor const& actor) {
392       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("send");
393     });
394     s4u::Comm::on_receiver_start.connect([](s4u::Actor const& actor) {
395       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("receive");
396     });
397     s4u::Comm::on_completion.connect(
398         [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
399     s4u::Actor::on_host_change.connect(on_actor_host_change);
400   }
401
402   if (TRACE_vm_is_enabled()) {
403     s4u::Host::on_creation.connect(on_vm_creation);
404     s4u::VirtualMachine::on_start.connect([](s4u::VirtualMachine const& vm) {
405       Container::by_name(vm.get_name())->get_state("VM_STATE")->push_event("start");
406     });
407     s4u::VirtualMachine::on_started.connect(
408         [](s4u::VirtualMachine const& vm) { Container::by_name(vm.get_name())->get_state("VM_STATE")->pop_event(); });
409     s4u::VirtualMachine::on_suspend.connect([](s4u::VirtualMachine const& vm) {
410       Container::by_name(vm.get_name())->get_state("VM_STATE")->push_event("suspend");
411     });
412     s4u::VirtualMachine::on_resume.connect(
413         [](s4u::VirtualMachine const& vm) { Container::by_name(vm.get_name())->get_state("VM_STATE")->pop_event(); });
414     s4u::Host::on_destruction.connect(
415         [](s4u::Host const& host) { Container::by_name(host.get_name())->remove_from_parent(); });
416   }
417 }
418 } // namespace instr
419 } // namespace simgrid
420
421 /*
422  * user categories support
423  */
424 static void recursiveNewVariableType(const std::string& new_typename, const std::string& color,
425                                      simgrid::instr::Type* root)
426 {
427   if (root->get_name() == "HOST" || root->get_name() == "VM")
428     root->by_name_or_create(std::string("p") + new_typename, color);
429
430   if (root->get_name() == "LINK")
431     root->by_name_or_create(std::string("b") + new_typename, color);
432
433   for (auto const& elm : root->children_) {
434     recursiveNewVariableType(new_typename, color, elm.second.get());
435   }
436 }
437
438 void instr_new_variable_type(const std::string& new_typename, const std::string& color)
439 {
440   recursiveNewVariableType(new_typename, color, simgrid::instr::Container::get_root()->type_);
441 }
442
443 static void recursiveNewUserVariableType(const std::string& father_type, const std::string& new_typename,
444                                          const std::string& color, simgrid::instr::Type* root)
445 {
446   if (root->get_name() == father_type) {
447     root->by_name_or_create(new_typename, color);
448   }
449   for (auto const& elm : root->children_)
450     recursiveNewUserVariableType(father_type, new_typename, color, elm.second.get());
451 }
452
453 void instr_new_user_variable_type(const std::string& father_type, const std::string& new_typename,
454                                   const std::string& color)
455 {
456   recursiveNewUserVariableType(father_type, new_typename, color, simgrid::instr::Container::get_root()->type_);
457 }
458
459 static void recursiveNewUserStateType(const std::string& father_type, const std::string& new_typename,
460                                       simgrid::instr::Type* root)
461 {
462   if (root->get_name() == father_type)
463     root->by_name_or_create<simgrid::instr::StateType>(new_typename);
464
465   for (auto const& elm : root->children_)
466     recursiveNewUserStateType(father_type, new_typename, elm.second.get());
467 }
468
469 void instr_new_user_state_type(const std::string& father_type, const std::string& new_typename)
470 {
471   recursiveNewUserStateType(father_type, new_typename, simgrid::instr::Container::get_root()->type_);
472 }
473
474 static void recursiveNewValueForUserStateType(const std::string& type_name, const char* val, const std::string& color,
475                                               simgrid::instr::Type* root)
476 {
477   if (root->get_name() == type_name)
478     static_cast<simgrid::instr::StateType*>(root)->add_entity_value(val, color);
479
480   for (auto const& elm : root->children_)
481     recursiveNewValueForUserStateType(type_name, val, color, elm.second.get());
482 }
483
484 void instr_new_value_for_user_state_type(const std::string& type_name, const char* value, const std::string& color)
485 {
486   recursiveNewValueForUserStateType(type_name, value, color, simgrid::instr::Container::get_root()->type_);
487 }
488
489 #define GRAPHICATOR_SUPPORT_FUNCTIONS
490
491 static void recursiveXBTGraphExtraction(const s_xbt_graph_t* graph, std::map<std::string, xbt_node_t>* nodes,
492                                         std::map<std::string, xbt_edge_t>* edges, const_sg_netzone_t netzone,
493                                         container_t container)
494 {
495   if (not netzone->get_children().empty()) {
496     // bottom-up recursion
497     for (auto const& netzone_child : netzone->get_children()) {
498       container_t child_container = container->children_.at(netzone_child->get_name());
499       recursiveXBTGraphExtraction(graph, nodes, edges, netzone_child, child_container);
500     }
501   }
502
503   netzone->get_impl()->get_graph(graph, nodes, edges);
504 }
505
506 xbt_graph_t instr_routing_platform_graph()
507 {
508   xbt_graph_t ret                          = xbt_graph_new_graph(0, nullptr);
509   std::map<std::string, xbt_node_t>* nodes = new std::map<std::string, xbt_node_t>();
510   std::map<std::string, xbt_edge_t>* edges = new std::map<std::string, xbt_edge_t>();
511   recursiveXBTGraphExtraction(ret, nodes, edges, simgrid::s4u::Engine::get_instance()->get_netzone_root(),
512                               simgrid::instr::Container::get_root());
513   delete nodes;
514   delete edges;
515   return ret;
516 }
517
518 void instr_routing_platform_graph_export_graphviz(const s_xbt_graph_t* g, const char* filename)
519 {
520   unsigned int cursor = 0;
521   xbt_node_t node     = nullptr;
522   xbt_edge_t edge     = nullptr;
523
524   FILE* file = fopen(filename, "w");
525   xbt_assert(file, "Failed to open %s \n", filename);
526
527   if (g->directed)
528     fprintf(file, "digraph test {\n");
529   else
530     fprintf(file, "graph test {\n");
531
532   fprintf(file, "  graph [overlap=scale]\n");
533
534   fprintf(file, "  node [shape=box, style=filled]\n");
535   fprintf(file, "  node [width=.3, height=.3, style=filled, color=skyblue]\n\n");
536
537   xbt_dynar_foreach (g->nodes, cursor, node) {
538     fprintf(file, "  \"%s\";\n", instr_node_name(node));
539   }
540   xbt_dynar_foreach (g->edges, cursor, edge) {
541     const char* src_s = instr_node_name(edge->src);
542     const char* dst_s = instr_node_name(edge->dst);
543     if (g->directed)
544       fprintf(file, "  \"%s\" -> \"%s\";\n", src_s, dst_s);
545     else
546       fprintf(file, "  \"%s\" -- \"%s\";\n", src_s, dst_s);
547   }
548   fprintf(file, "}\n");
549   fclose(file);
550 }