Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
0c785975c85e9357f6bedb709db6ca3be34af4ca
[simgrid.git] / src / instr / instr_platform.cpp
1 /* Copyright (c) 2010-2022. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #include <simgrid/kernel/routing/NetPoint.hpp>
7 #include <simgrid/kernel/routing/NetZoneImpl.hpp>
8 #include <simgrid/s4u/Actor.hpp>
9 #include <simgrid/s4u/Comm.hpp>
10 #include <simgrid/s4u/Engine.hpp>
11 #include <simgrid/s4u/Exec.hpp>
12 #include <simgrid/s4u/Host.hpp>
13 #include <simgrid/s4u/VirtualMachine.hpp>
14 #include <xbt/graph.h>
15
16 #include "src/instr/instr_private.hpp"
17 #include "src/kernel/resource/CpuImpl.hpp"
18 #include "src/kernel/resource/NetworkModel.hpp"
19 #include "src/surf/surf_interface.hpp"
20
21 #include <fstream>
22
23 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(instr_routing, instr, "Tracing platform hierarchy");
24
25 std::string instr_pid(simgrid::s4u::Actor const& proc)
26 {
27   return std::string(proc.get_name()) + "-" + std::to_string(proc.get_pid());
28 }
29
30 static simgrid::instr::Container* lowestCommonAncestor(const simgrid::instr::Container* a1,
31                                                        const simgrid::instr::Container* a2)
32 {
33   // this is only an optimization (since most of a1 and a2 share the same parent)
34   if (a1->get_parent() == a2->get_parent())
35     return a1->get_parent();
36
37   // create an array with all ancestors of a1
38   std::vector<simgrid::instr::Container*> ancestors_a1;
39   for (auto* p = a1->get_parent(); p != nullptr; p = p->get_parent())
40     ancestors_a1.push_back(p);
41
42   // create an array with all ancestors of a2
43   std::vector<simgrid::instr::Container*> ancestors_a2;
44   for (auto* p = a2->get_parent(); p != nullptr; p = p->get_parent())
45     ancestors_a2.push_back(p);
46
47   // find the lowest ancestor
48   simgrid::instr::Container* p = nullptr;
49   int i = static_cast<int>(ancestors_a1.size()) - 1;
50   int j = static_cast<int>(ancestors_a2.size()) - 1;
51   while (i >= 0 && j >= 0) {
52     simgrid::instr::Container* a1p       = ancestors_a1.at(i);
53     if (a1p != ancestors_a2.at(j))
54       break;
55     p = a1p;
56     i--;
57     j--;
58   }
59   return p;
60 }
61
62 static void linkContainers(simgrid::instr::Container* src, simgrid::instr::Container* dst,
63                            std::set<std::string, std::less<>>* filter)
64 {
65   // ignore loopback
66   if (src->get_name() == "__loopback__" || dst->get_name() == "__loopback__") {
67     XBT_DEBUG("  linkContainers: ignoring loopback link");
68     return;
69   }
70
71   // find common parent
72   simgrid::instr::Container* parent = lowestCommonAncestor(src, dst);
73   xbt_assert(parent, "common parent unknown, this is a tracing problem");
74
75   // check if we already register this pair (we only need one direction)
76   std::string aux1 = src->get_name() + dst->get_name();
77   std::string aux2 = dst->get_name() + src->get_name();
78   if (filter->find(aux1) != filter->end()) {
79     XBT_DEBUG("  linkContainers: already registered %s <-> %s (1)", src->get_cname(), dst->get_cname());
80     return;
81   }
82   if (filter->find(aux2) != filter->end()) {
83     XBT_DEBUG("  linkContainers: already registered %s <-> %s (2)", dst->get_cname(), src->get_cname());
84     return;
85   }
86
87   // ok, not found, register it
88   filter->insert(aux1);
89   filter->insert(aux2);
90
91   // declare type
92   std::string link_typename = parent->get_type()->get_name() + "-" + src->get_type()->get_name() +
93                               std::to_string(src->get_type()->get_id()) + "-" + dst->get_type()->get_name() +
94                               std::to_string(dst->get_type()->get_id());
95   simgrid::instr::LinkType* link =
96       parent->get_type()->by_name_or_create(link_typename, src->get_type(), dst->get_type());
97   link->set_calling_container(parent);
98
99   // create the link
100   static long long counter = 0;
101
102   std::string key = std::to_string(counter);
103   counter++;
104
105   link->start_event(src, "topology", key);
106   link->end_event(dst, "topology", key);
107
108   XBT_DEBUG("  linkContainers %s <-> %s", src->get_cname(), dst->get_cname());
109 }
110
111 static void recursiveGraphExtraction(const simgrid::s4u::NetZone* netzone, const simgrid::instr::Container* container,
112                                      std::set<std::string, std::less<>>* filter)
113 {
114   if (not TRACE_platform_topology()) {
115     XBT_DEBUG("Graph extraction disabled by user.");
116     return;
117   }
118   XBT_DEBUG("Graph extraction for NetZone = %s", netzone->get_cname());
119
120   // bottom-up recursion
121   for (auto const& nz_son : netzone->get_children()) {
122     const simgrid::instr::Container* child_container = container->get_child_by_name(nz_son->get_name());
123     recursiveGraphExtraction(nz_son, child_container, filter);
124   }
125
126   auto* graph = xbt_graph_new_graph(0, nullptr);
127   std::map<std::string, xbt_node_t, std::less<>> nodes;
128   std::map<std::string, xbt_edge_t, std::less<>> edges;
129
130   netzone->get_impl()->get_graph(graph, &nodes, &edges);
131   for (auto const& [_, edge] : edges) {
132     linkContainers(simgrid::instr::Container::by_name(static_cast<const char*>(edge->src->data)),
133                    simgrid::instr::Container::by_name(static_cast<const char*>(edge->dst->data)), filter);
134   }
135   xbt_graph_free_graph(graph, xbt_free_f, xbt_free_f, nullptr);
136 }
137
138 /*
139  * user categories support
140  */
141 static void recursiveNewVariableType(const std::string& new_typename, const std::string& color,
142                                      simgrid::instr::Type* root)
143 {
144   if (root->get_name() == "HOST" || root->get_name() == "VM")
145     root->by_name_or_create(std::string("p") + new_typename, color);
146
147   if (root->get_name() == "LINK")
148     root->by_name_or_create(std::string("b") + new_typename, color);
149
150   for (auto const& [_, child] : root->get_children()) {
151     recursiveNewVariableType(new_typename, color, child.get());
152   }
153 }
154
155 void instr_new_variable_type(const std::string& new_typename, const std::string& color)
156 {
157   recursiveNewVariableType(new_typename, color, simgrid::instr::Container::get_root()->get_type());
158 }
159
160 static void recursiveNewUserVariableType(const std::string& parent_type, const std::string& new_typename,
161                                          const std::string& color, simgrid::instr::Type* root)
162 {
163   if (root->get_name() == parent_type) {
164     root->by_name_or_create(new_typename, color);
165   }
166   for (auto const& [_, child] : root->get_children())
167     recursiveNewUserVariableType(parent_type, new_typename, color, child.get());
168 }
169
170 void instr_new_user_variable_type(const std::string& parent_type, const std::string& new_typename,
171                                   const std::string& color)
172 {
173   recursiveNewUserVariableType(parent_type, new_typename, color, simgrid::instr::Container::get_root()->get_type());
174 }
175
176 static void recursiveNewUserStateType(const std::string& parent_type, const std::string& new_typename,
177                                       simgrid::instr::Type* root)
178 {
179   if (root->get_name() == parent_type)
180     root->by_name_or_create<simgrid::instr::StateType>(new_typename);
181
182   for (auto const& [_, child] : root->get_children())
183     recursiveNewUserStateType(parent_type, new_typename, child.get());
184 }
185
186 void instr_new_user_state_type(const std::string& parent_type, const std::string& new_typename)
187 {
188   recursiveNewUserStateType(parent_type, new_typename, simgrid::instr::Container::get_root()->get_type());
189 }
190
191 static void recursiveNewValueForUserStateType(const std::string& type_name, const char* val, const std::string& color,
192                                               simgrid::instr::Type* root)
193 {
194   if (root->get_name() == type_name)
195     static_cast<simgrid::instr::StateType*>(root)->add_entity_value(val, color);
196
197   for (auto const& [_, child] : root->get_children())
198     recursiveNewValueForUserStateType(type_name, val, color, child.get());
199 }
200
201 void instr_new_value_for_user_state_type(const std::string& type_name, const char* value, const std::string& color)
202 {
203   recursiveNewValueForUserStateType(type_name, value, color, simgrid::instr::Container::get_root()->get_type());
204 }
205
206 namespace simgrid::instr {
207
208 /** @brief Creates a file with the topology of the platform file used for the simulator.
209  *
210  *  The graph topology will have the following properties: all hosts, links and routers of the platform file are mapped
211  *  to graph nodes; routes are mapped to edges. The platform's zones are not represented in the output.
212  */
213 void platform_graph_export_graphviz(const std::string& output_filename)
214 {
215   auto* g     = xbt_graph_new_graph(0, nullptr);
216   std::map<std::string, xbt_node_t, std::less<>> nodes;
217   std::map<std::string, xbt_edge_t, std::less<>> edges;
218   s4u::Engine::get_instance()->get_netzone_root()->extract_xbt_graph(g, &nodes, &edges);
219
220   std::ofstream fs;
221   fs.open(output_filename, std::ofstream::out);
222   xbt_assert(not fs.fail(), "Failed to open %s", output_filename.c_str());
223
224   if (g->directed)
225     fs << "digraph test {\n";
226   else
227     fs << "graph test {\n";
228
229   fs << "  graph [overlap=scale]\n";
230
231   fs << "  node [shape=box, style=filled]\n";
232   fs << "  node [width=.3, height=.3, style=filled, color=skyblue]\n\n";
233
234   for (auto const& [node, _] : nodes)
235     fs << "  \"" << node << "\";\n";
236
237   for (auto const& [_, edge] : edges) {
238     const char* src_s = static_cast<char*>(edge->src->data);
239     const char* dst_s = static_cast<char*>(edge->dst->data);
240     if (g->directed)
241       fs << "  \"" << src_s << "\" -> \"" << dst_s << "\";\n";
242     else
243       fs << "  \"" << src_s << "\" -- \"" << dst_s << "\";\n";
244   }
245   fs << "}\n";
246   fs.close();
247
248   xbt_graph_free_graph(g, xbt_free_f, xbt_free_f, nullptr);
249 }
250
251 void platform_graph_export_csv(const std::string& output_filename) {
252   auto* g         = xbt_graph_new_graph(0, nullptr);
253   std::map<std::string, xbt_node_t, std::less<>> nodes;
254   std::map<std::string, xbt_edge_t, std::less<>> edges;
255   s4u::Engine::get_instance()->get_netzone_root()->extract_xbt_graph(g, &nodes, &edges);
256
257   std::ofstream fs;
258   fs.open(output_filename, std::ofstream::out);
259   xbt_assert(not fs.fail(), "Failed to open %s", output_filename.c_str());
260
261   fs << "src,dst" << std::endl;
262   for (auto const& elm : edges) {
263     const char* src_s = static_cast<char*>(elm.second->src->data);
264     const char* dst_s = static_cast<char*>(elm.second->dst->data);
265     fs << src_s << "," << dst_s << std::endl;
266   }
267   fs.close();
268   xbt_graph_free_graph(g, xbt_free_f, xbt_free_f, nullptr);
269 }
270
271 /* Callbacks */
272 static std::vector<NetZoneContainer*> currentContainer; /* push and pop, used only in creation */
273 static void on_netzone_creation(s4u::NetZone const& netzone)
274 {
275   std::string id = netzone.get_name();
276   if (Container::get_root() == nullptr) {
277     auto* root = new NetZoneContainer(id, 0, nullptr);
278     xbt_assert(Container::get_root() == root);
279
280     if (TRACE_smpi_is_enabled()) {
281       auto* mpi = root->get_type()->by_name_or_create<ContainerType>("MPI");
282       if (not TRACE_smpi_is_grouped())
283         mpi->by_name_or_create<StateType>("MPI_STATE");
284       root->get_type()->by_name_or_create("MPI_LINK", mpi, mpi);
285       root->get_type()->by_name_or_create("MIGRATE_LINK", mpi, mpi);
286       mpi->by_name_or_create<StateType>("MIGRATE_STATE");
287     }
288
289     if (TRACE_needs_platform()) {
290       currentContainer.push_back(root);
291     }
292     return;
293   }
294
295   if (TRACE_needs_platform()) {
296     auto level      = static_cast<unsigned>(currentContainer.size());
297     auto* container = new NetZoneContainer(id, level, currentContainer.back());
298     currentContainer.push_back(container);
299   }
300 }
301
302 static void on_link_creation(s4u::Link const& link)
303 {
304   if (currentContainer.empty()) // No ongoing parsing. Are you creating the loopback?
305     return;
306
307   auto* container = new Container(link.get_name(), "LINK", currentContainer.back());
308
309   if ((TRACE_categorized() || TRACE_uncategorized() || TRACE_platform()) && (not TRACE_disable_link())) {
310     VariableType* bandwidth = container->get_type()->by_name_or_create("bandwidth", "");
311     bandwidth->set_calling_container(container);
312     bandwidth->set_event(0, link.get_bandwidth());
313     VariableType* latency = container->get_type()->by_name_or_create("latency", "");
314     latency->set_calling_container(container);
315     latency->set_event(0, link.get_latency());
316   }
317
318   if (TRACE_uncategorized()) {
319     container->get_type()->by_name_or_create("bandwidth_used", "0.5 0.5 0.5");
320   }
321 }
322
323 static void on_host_creation(s4u::Host const& host)
324 {
325   if (Container::by_name_or_null(host.get_name())) // This host already exists, do nothing
326     return;
327
328   Container* container  = new HostContainer(host, currentContainer.back());
329   const Container* root = Container::get_root();
330
331   if ((TRACE_categorized() || TRACE_uncategorized() || TRACE_platform()) && (not TRACE_disable_speed())) {
332     VariableType* speed = container->get_type()->by_name_or_create("speed", "");
333     speed->set_calling_container(container);
334     speed->set_event(0, host.get_speed());
335
336     VariableType* cores = container->get_type()->by_name_or_create("core_count", "");
337     cores->set_calling_container(container);
338     cores->set_event(0, host.get_core_count());
339   }
340
341   if (TRACE_uncategorized())
342     container->get_type()->by_name_or_create("speed_used", "0.5 0.5 0.5");
343
344   if (TRACE_smpi_is_enabled() && TRACE_smpi_is_grouped()) {
345     auto* mpi = container->get_type()->by_name_or_create<ContainerType>("MPI");
346     mpi->by_name_or_create<StateType>("MPI_STATE");
347     root->get_type()->by_name_or_create("MIGRATE_LINK", mpi, mpi);
348     mpi->by_name_or_create<StateType>("MIGRATE_STATE");
349   }
350 }
351
352 static void on_action_state_change(kernel::resource::Action const& action,
353                                    kernel::resource::Action::State /* previous */)
354 {
355   auto n = static_cast<unsigned>(action.get_variable()->get_number_of_constraint());
356
357   for (unsigned i = 0; i < n; i++) {
358     double value = action.get_rate() * action.get_variable()->get_constraint_weight(i);
359     /* Beware of composite actions: ptasks put links and cpus together. Extra pb: we cannot dynamic_cast from void* */
360     kernel::resource::Resource* resource = action.get_variable()->get_constraint(i)->get_id();
361     if (const auto* cpu = dynamic_cast<kernel::resource::CpuImpl*>(resource))
362       resource_set_utilization("HOST", "speed_used", cpu->get_cname(), action.get_category(), value,
363                                action.get_last_update(), simgrid_get_clock() - action.get_last_update());
364
365     if (const auto* link = dynamic_cast<kernel::resource::StandardLinkImpl*>(resource))
366       resource_set_utilization("LINK", "bandwidth_used", link->get_cname(), action.get_category(), value,
367                                action.get_last_update(), simgrid_get_clock() - action.get_last_update());
368   }
369 }
370
371 static void on_platform_created()
372 {
373   currentContainer.clear();
374   std::set<std::string, std::less<>> filter;
375   XBT_DEBUG("Starting graph extraction.");
376   recursiveGraphExtraction(s4u::Engine::get_instance()->get_netzone_root(), Container::get_root(), &filter);
377   XBT_DEBUG("Graph extraction finished.");
378   dump_buffer(true);
379 }
380
381 static void on_actor_creation(s4u::Actor const& actor)
382 {
383   const Container* root      = Container::get_root();
384   Container* container       = Container::by_name(actor.get_host()->get_name());
385   std::string container_name = instr_pid(actor);
386
387   container->create_child(container_name, "ACTOR");
388   auto* actor_type = container->get_type()->by_name_or_create<ContainerType>("ACTOR");
389   auto* state      = actor_type->by_name_or_create<StateType>("ACTOR_STATE");
390   state->add_entity_value("suspend", "1 0 1");
391   state->add_entity_value("sleep", "1 1 0");
392   state->add_entity_value("receive", "1 0 0");
393   state->add_entity_value("send", "0 0 1");
394   state->add_entity_value("execute", "0 1 1");
395   root->get_type()->by_name_or_create("ACTOR_LINK", actor_type, actor_type);
396
397   actor.on_exit([container_name](bool failed) {
398     if (failed)
399       // kill means that this actor no longer exists, let's destroy it
400       Container::by_name(container_name)->remove_from_parent();
401   });
402 }
403
404 static void on_actor_host_change(s4u::Actor const& actor, s4u::Host const& /*previous_location*/)
405 {
406   static long long int counter = 0;
407   Container* container         = Container::by_name(instr_pid(actor));
408   LinkType* link               = Container::get_root()->get_link("ACTOR_LINK");
409
410   // start link
411   link->start_event(container, "M", std::to_string(counter));
412   // destroy existing container of this process
413   container->remove_from_parent();
414   // create new container on the new_host location
415   Container::by_name(actor.get_host()->get_name())->create_child(instr_pid(actor), "ACTOR");
416   // end link
417   link->end_event(Container::by_name(instr_pid(actor)), "M", std::to_string(counter));
418   counter++;
419 }
420
421 static void on_vm_creation(s4u::Host const& host)
422 {
423   const Container* container = new HostContainer(host, currentContainer.back());
424   const Container* root      = Container::get_root();
425   auto* vm                   = container->get_type()->by_name_or_create<ContainerType>("VM");
426   auto* state                = vm->by_name_or_create<StateType>("VM_STATE");
427   state->add_entity_value("suspend", "1 0 1");
428   state->add_entity_value("sleep", "1 1 0");
429   state->add_entity_value("receive", "1 0 0");
430   state->add_entity_value("send", "0 0 1");
431   state->add_entity_value("execute", "0 1 1");
432   root->get_type()->by_name_or_create("VM_LINK", vm, vm);
433   root->get_type()->by_name_or_create("VM_ACTOR_LINK", vm, vm);
434 }
435
436 void define_callbacks()
437 {
438   // always need the callbacks to zones (we need only the root zone), to create the rootContainer and the rootType
439   // properly
440   if (TRACE_needs_platform()) {
441     s4u::Engine::on_platform_created_cb(on_platform_created);
442     s4u::Host::on_creation_cb(on_host_creation);
443     s4u::Host::on_speed_change_cb([](s4u::Host const& host) {
444       Container::by_name(host.get_name())
445           ->get_variable("speed")
446           ->set_event(simgrid_get_clock(), host.get_core_count() * host.get_available_speed());
447     });
448     s4u::Link::on_creation_cb(on_link_creation);
449     s4u::Link::on_bandwidth_change_cb([](s4u::Link const& link) {
450       Container::by_name(link.get_name())
451           ->get_variable("bandwidth")
452           ->set_event(simgrid_get_clock(), sg_bandwidth_factor * link.get_bandwidth());
453     });
454     s4u::NetZone::on_seal_cb([](s4u::NetZone const& /*netzone*/) { currentContainer.pop_back(); });
455     kernel::routing::NetPoint::on_creation.connect([](kernel::routing::NetPoint const& netpoint) {
456       if (netpoint.is_router())
457         new RouterContainer(netpoint.get_name(), currentContainer.back());
458     });
459   }
460
461   s4u::NetZone::on_creation_cb(on_netzone_creation);
462
463   kernel::resource::CpuAction::on_state_change.connect(on_action_state_change);
464   s4u::Link::on_communication_state_change_cb(on_action_state_change);
465
466   if (TRACE_actor_is_enabled()) {
467     s4u::Actor::on_creation_cb(on_actor_creation);
468     s4u::Actor::on_destruction_cb([](s4u::Actor const& actor) {
469       auto container = Container::by_name_or_null(instr_pid(actor));
470       if (container != nullptr)
471         container->remove_from_parent();
472     });
473     s4u::Actor::on_suspend_cb([](s4u::Actor const& actor) {
474       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("suspend");
475     });
476     s4u::Actor::on_resume_cb(
477         [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
478     s4u::Actor::on_sleep_cb([](s4u::Actor const& actor) {
479       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("sleep");
480     });
481     s4u::Actor::on_wake_up_cb(
482         [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
483     s4u::Exec::on_start_cb([](s4u::Exec const&) {
484       Container::by_name(instr_pid(*s4u::Actor::self()))->get_state("ACTOR_STATE")->push_event("execute");
485     });
486     s4u::Activity::on_completion_cb([](const s4u::Activity&) {
487       Container::by_name(instr_pid(*s4u::Actor::self()))->get_state("ACTOR_STATE")->pop_event();
488     });
489     s4u::Comm::on_send_cb([](s4u::Comm const&) {
490       Container::by_name(instr_pid(*s4u::Actor::self()))->get_state("ACTOR_STATE")->push_event("send");
491     });
492     s4u::Comm::on_recv_cb([](s4u::Comm const&) {
493       Container::by_name(instr_pid(*s4u::Actor::self()))->get_state("ACTOR_STATE")->push_event("receive");
494     });
495     s4u::Actor::on_host_change_cb(on_actor_host_change);
496   }
497
498   if (TRACE_smpi_is_enabled() && TRACE_smpi_is_computing()) {
499     s4u::Exec::on_start_cb([](s4u::Exec const& exec) {
500       Container::by_name(std::string("rank-") + std::to_string(s4u::Actor::self()->get_pid()))
501           ->get_state("MPI_STATE")
502           ->push_event("computing", new CpuTIData("compute", exec.get_cost()));
503     });
504     s4u::Activity::on_completion_cb([](const s4u::Activity&) {
505       Container::by_name(std::string("rank-") + std::to_string(s4u::Actor::self()->get_pid()))
506           ->get_state("MPI_STATE")
507           ->pop_event();
508     });
509   }
510
511   if (TRACE_vm_is_enabled()) {
512     s4u::Host::on_creation_cb(on_vm_creation);
513     s4u::VirtualMachine::on_start_cb([](s4u::VirtualMachine const& vm) {
514       Container::by_name(vm.get_name())->get_state("VM_STATE")->push_event("start");
515     });
516     s4u::VirtualMachine::on_started_cb(
517         [](s4u::VirtualMachine const& vm) { Container::by_name(vm.get_name())->get_state("VM_STATE")->pop_event(); });
518     s4u::VirtualMachine::on_suspend_cb([](s4u::VirtualMachine const& vm) {
519       Container::by_name(vm.get_name())->get_state("VM_STATE")->push_event("suspend");
520     });
521     s4u::VirtualMachine::on_resume_cb(
522         [](s4u::VirtualMachine const& vm) { Container::by_name(vm.get_name())->get_state("VM_STATE")->pop_event(); });
523     s4u::Host::on_destruction_cb(
524         [](s4u::Host const& host) { Container::by_name(host.get_name())->remove_from_parent(); });
525   }
526 }
527 } // namespace simgrid::instr