Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'doc' into 'master'
[simgrid.git] / src / instr / instr_platform.cpp
1 /* Copyright (c) 2010-2020. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #include "src/instr/instr_private.hpp"
7
8 #include "simgrid/kernel/routing/NetPoint.hpp"
9 #include "simgrid/kernel/routing/NetZoneImpl.hpp"
10 #include "simgrid/s4u/Actor.hpp"
11 #include "simgrid/s4u/Comm.hpp"
12 #include "simgrid/s4u/Engine.hpp"
13 #include "simgrid/s4u/Exec.hpp"
14 #include "simgrid/s4u/Host.hpp"
15 #include "simgrid/s4u/VirtualMachine.hpp"
16 #include "src/surf/cpu_interface.hpp"
17 #include "src/surf/network_interface.hpp"
18 #include "src/surf/surf_interface.hpp"
19 #include "src/surf/xml/platf_private.hpp"
20 #include "surf/surf.hpp"
21 #include "xbt/graph.h"
22
23 #include <fstream>
24
25 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(instr_routing, instr, "Tracing platform hierarchy");
26
27 std::string instr_pid(simgrid::s4u::Actor const& proc)
28 {
29   return std::string(proc.get_name()) + "-" + std::to_string(proc.get_pid());
30 }
31
32 static container_t lowestCommonAncestor(const simgrid::instr::Container* a1, const simgrid::instr::Container* a2)
33 {
34   // this is only an optimization (since most of a1 and a2 share the same parent)
35   if (a1->father_ == a2->father_)
36     return a1->father_;
37
38   // create an array with all ancestors of a1
39   std::vector<container_t> ancestors_a1;
40   container_t p = a1->father_;
41   while (p) {
42     ancestors_a1.push_back(p);
43     p = p->father_;
44   }
45
46   // create an array with all ancestors of a2
47   std::vector<container_t> ancestors_a2;
48   p = a2->father_;
49   while (p) {
50     ancestors_a2.push_back(p);
51     p = p->father_;
52   }
53
54   // find the lowest ancestor
55   p     = nullptr;
56   int i = ancestors_a1.size() - 1;
57   int j = ancestors_a2.size() - 1;
58   while (i >= 0 && j >= 0) {
59     container_t a1p = ancestors_a1.at(i);
60     const simgrid::instr::Container* a2p = ancestors_a2.at(j);
61     if (a1p == a2p) {
62       p = a1p;
63     } else {
64       break;
65     }
66     i--;
67     j--;
68   }
69   return p;
70 }
71
72 static void linkContainers(container_t src, container_t dst, std::set<std::string>* filter)
73 {
74   // ignore loopback
75   if (src->get_name() == "__loopback__" || dst->get_name() == "__loopback__") {
76     XBT_DEBUG("  linkContainers: ignoring loopback link");
77     return;
78   }
79
80   // find common father
81   container_t father = lowestCommonAncestor(src, dst);
82   if (not father) {
83     xbt_die("common father unknown, this is a tracing problem");
84   }
85
86   // check if we already register this pair (we only need one direction)
87   std::string aux1 = src->get_name() + dst->get_name();
88   std::string aux2 = dst->get_name() + src->get_name();
89   if (filter->find(aux1) != filter->end()) {
90     XBT_DEBUG("  linkContainers: already registered %s <-> %s (1)", src->get_cname(), dst->get_cname());
91     return;
92   }
93   if (filter->find(aux2) != filter->end()) {
94     XBT_DEBUG("  linkContainers: already registered %s <-> %s (2)", dst->get_cname(), src->get_cname());
95     return;
96   }
97
98   // ok, not found, register it
99   filter->insert(aux1);
100   filter->insert(aux2);
101
102   // declare type
103   std::string link_typename = father->type_->get_name() + "-" + src->type_->get_name() +
104                               std::to_string(src->type_->get_id()) + "-" + dst->type_->get_name() +
105                               std::to_string(dst->type_->get_id());
106   simgrid::instr::LinkType* link = father->type_->by_name_or_create(link_typename, src->type_, dst->type_);
107   link->set_calling_container(father);
108
109   // create the link
110   static long long counter = 0;
111
112   std::string key = std::to_string(counter);
113   counter++;
114
115   link->start_event(src, "topology", key);
116   link->end_event(dst, "topology", key);
117
118   XBT_DEBUG("  linkContainers %s <-> %s", src->get_cname(), dst->get_cname());
119 }
120
121 static void recursiveGraphExtraction(const simgrid::s4u::NetZone* netzone, container_t container,
122                                      std::set<std::string>* filter)
123 {
124   if (not TRACE_platform_topology()) {
125     XBT_DEBUG("Graph extraction disabled by user.");
126     return;
127   }
128   XBT_DEBUG("Graph extraction for NetZone = %s", netzone->get_cname());
129   if (not netzone->get_children().empty()) {
130     // bottom-up recursion
131     for (auto const& nz_son : netzone->get_children()) {
132       container_t child_container = container->children_.at(nz_son->get_name());
133       recursiveGraphExtraction(nz_son, child_container, filter);
134     }
135   }
136
137   xbt_graph_t graph                        = xbt_graph_new_graph(0, nullptr);
138   std::map<std::string, xbt_node_t>* nodes = new std::map<std::string, xbt_node_t>();
139   std::map<std::string, xbt_edge_t>* edges = new std::map<std::string, xbt_edge_t>();
140
141   netzone->get_impl()->get_graph(graph, nodes, edges);
142   for (auto elm : *edges) {
143     const xbt_edge* edge = elm.second;
144     linkContainers(simgrid::instr::Container::by_name(static_cast<const char*>(edge->src->data)),
145                    simgrid::instr::Container::by_name(static_cast<const char*>(edge->dst->data)), filter);
146   }
147   delete nodes;
148   delete edges;
149   xbt_graph_free_graph(graph, xbt_free_f, xbt_free_f, nullptr);
150 }
151
152 /*
153  * user categories support
154  */
155 static void recursiveNewVariableType(const std::string& new_typename, const std::string& color,
156                                      simgrid::instr::Type* root)
157 {
158   if (root->get_name() == "HOST" || root->get_name() == "VM")
159     root->by_name_or_create(std::string("p") + new_typename, color);
160
161   if (root->get_name() == "LINK")
162     root->by_name_or_create(std::string("b") + new_typename, color);
163
164   for (auto const& elm : root->get_children()) {
165     recursiveNewVariableType(new_typename, color, elm.second.get());
166   }
167 }
168
169 void instr_new_variable_type(const std::string& new_typename, const std::string& color)
170 {
171   recursiveNewVariableType(new_typename, color, simgrid::instr::Container::get_root()->type_);
172 }
173
174 static void recursiveNewUserVariableType(const std::string& father_type, const std::string& new_typename,
175                                          const std::string& color, simgrid::instr::Type* root)
176 {
177   if (root->get_name() == father_type) {
178     root->by_name_or_create(new_typename, color);
179   }
180   for (auto const& elm : root->get_children())
181     recursiveNewUserVariableType(father_type, new_typename, color, elm.second.get());
182 }
183
184 void instr_new_user_variable_type(const std::string& father_type, const std::string& new_typename,
185                                   const std::string& color)
186 {
187   recursiveNewUserVariableType(father_type, new_typename, color, simgrid::instr::Container::get_root()->type_);
188 }
189
190 static void recursiveNewUserStateType(const std::string& father_type, const std::string& new_typename,
191                                       simgrid::instr::Type* root)
192 {
193   if (root->get_name() == father_type)
194     root->by_name_or_create<simgrid::instr::StateType>(new_typename);
195
196   for (auto const& elm : root->get_children())
197     recursiveNewUserStateType(father_type, new_typename, elm.second.get());
198 }
199
200 void instr_new_user_state_type(const std::string& father_type, const std::string& new_typename)
201 {
202   recursiveNewUserStateType(father_type, new_typename, simgrid::instr::Container::get_root()->type_);
203 }
204
205 static void recursiveNewValueForUserStateType(const std::string& type_name, const char* val, const std::string& color,
206                                               simgrid::instr::Type* root)
207 {
208   if (root->get_name() == type_name)
209     static_cast<simgrid::instr::StateType*>(root)->add_entity_value(val, color);
210
211   for (auto const& elm : root->get_children())
212     recursiveNewValueForUserStateType(type_name, val, color, elm.second.get());
213 }
214
215 void instr_new_value_for_user_state_type(const std::string& type_name, const char* value, const std::string& color)
216 {
217   recursiveNewValueForUserStateType(type_name, value, color, simgrid::instr::Container::get_root()->type_);
218 }
219
220 namespace simgrid {
221 namespace instr {
222
223 void platform_graph_export_graphviz(const std::string& output_filename)
224 {
225   xbt_graph_t g                            = xbt_graph_new_graph(0, nullptr);
226   std::map<std::string, xbt_node_t>* nodes = new std::map<std::string, xbt_node_t>();
227   std::map<std::string, xbt_edge_t>* edges = new std::map<std::string, xbt_edge_t>();
228   s4u::Engine::get_instance()->get_netzone_root()->extract_xbt_graph(g, nodes, edges);
229
230   std::ofstream fs;
231   fs.open(output_filename, std::ofstream::out);
232   xbt_assert(not fs.fail(), "Failed to open %s", output_filename.c_str());
233
234   if (g->directed)
235     fs << "digraph test {" << std::endl;
236   else
237     fs << "graph test {" << std::endl;
238
239   fs << "  graph [overlap=scale]" << std::endl;
240
241   fs << "  node [shape=box, style=filled]" << std::endl;
242   fs << "  node [width=.3, height=.3, style=filled, color=skyblue]" << std::endl << std::endl;
243
244   for (auto const& elm : *nodes)
245     fs << "  \"" << elm.first << "\";" << std::endl;
246
247   for (auto const& elm : *edges) {
248     const char* src_s = static_cast<char*>(elm.second->src->data);
249     const char* dst_s = static_cast<char*>(elm.second->dst->data);
250     if (g->directed)
251       fs << "  \"" << src_s << "\" -> \"" << dst_s << "\";" << std::endl;
252     else
253       fs << "  \"" << src_s << "\" -- \"" << dst_s << "\";" << std::endl;
254   }
255   fs << "}" << std::endl;
256   fs.close();
257
258   xbt_graph_free_graph(g, xbt_free_f, xbt_free_f, nullptr);
259   delete nodes;
260   delete edges;
261 }
262
263 /* Callbacks */
264 static std::vector<NetZoneContainer*> currentContainer; /* push and pop, used only in creation */
265 static void on_netzone_creation(s4u::NetZone const& netzone)
266 {
267   std::string id = netzone.get_name();
268   if (Container::get_root() == nullptr) {
269     NetZoneContainer* root = new NetZoneContainer(id, 0, nullptr);
270     xbt_assert(Container::get_root() == root);
271
272     if (TRACE_smpi_is_enabled()) {
273       ContainerType* mpi = root->type_->by_name_or_create<ContainerType>("MPI");
274       if (not TRACE_smpi_is_grouped())
275         mpi->by_name_or_create<StateType>("MPI_STATE");
276       root->type_->by_name_or_create("MPI_LINK", mpi, mpi);
277       // TODO See if we can move this to the LoadBalancer plugin
278       root->type_->by_name_or_create("MIGRATE_LINK", mpi, mpi);
279       mpi->by_name_or_create<StateType>("MIGRATE_STATE");
280     }
281
282     if (TRACE_needs_platform()) {
283       currentContainer.push_back(root);
284     }
285     return;
286   }
287
288   if (TRACE_needs_platform()) {
289     NetZoneContainer* container = new NetZoneContainer(id, currentContainer.size(), currentContainer.back());
290     currentContainer.push_back(container);
291   }
292 }
293
294 static void on_link_creation(s4u::Link const& link)
295 {
296   if (currentContainer.empty()) // No ongoing parsing. Are you creating the loopback?
297     return;
298
299   Container* container = new Container(link.get_name(), "LINK", currentContainer.back());
300
301   if ((TRACE_categorized() || TRACE_uncategorized() || TRACE_platform()) && (not TRACE_disable_link())) {
302     VariableType* bandwidth = container->type_->by_name_or_create("bandwidth", "");
303     bandwidth->set_calling_container(container);
304     bandwidth->set_event(0, link.get_bandwidth());
305     VariableType* latency = container->type_->by_name_or_create("latency", "");
306     latency->set_calling_container(container);
307     latency->set_event(0, link.get_latency());
308   }
309
310   if (TRACE_uncategorized()) {
311     container->type_->by_name_or_create("bandwidth_used", "0.5 0.5 0.5");
312   }
313 }
314
315 static void on_host_creation(s4u::Host const& host)
316 {
317   Container* container  = new HostContainer(host, currentContainer.back());
318   const Container* root = Container::get_root();
319
320   if ((TRACE_categorized() || TRACE_uncategorized() || TRACE_platform()) && (not TRACE_disable_speed())) {
321     VariableType* speed = container->type_->by_name_or_create("speed", "");
322     speed->set_calling_container(container);
323     speed->set_event(0, host.get_speed());
324
325     VariableType* cores = container->type_->by_name_or_create("core_count", "");
326     cores->set_calling_container(container);
327     cores->set_event(0, host.get_core_count());
328   }
329
330   if (TRACE_uncategorized())
331     container->type_->by_name_or_create("speed_used", "0.5 0.5 0.5");
332
333   if (TRACE_smpi_is_enabled() && TRACE_smpi_is_grouped()) {
334     ContainerType* mpi = container->type_->by_name_or_create<ContainerType>("MPI");
335     mpi->by_name_or_create<StateType>("MPI_STATE");
336     // TODO See if we can move this to the LoadBalancer plugin
337     root->type_->by_name_or_create("MIGRATE_LINK", mpi, mpi);
338     mpi->by_name_or_create<StateType>("MIGRATE_STATE");
339   }
340 }
341
342 static void on_host_speed_change(s4u::Host const& host)
343 {
344   Container::by_name(host.get_name())
345       ->get_variable("speed")
346       ->set_event(surf_get_clock(), host.get_core_count() * host.get_available_speed());
347 }
348
349 static void on_action_state_change(kernel::resource::Action const& action,
350                                    kernel::resource::Action::State /* previous */)
351 {
352   int n = action.get_variable()->get_number_of_constraint();
353
354   for (int i = 0; i < n; i++) {
355     double value = action.get_variable()->get_value() * action.get_variable()->get_constraint_weight(i);
356     /* Beware of composite actions: ptasks put links and cpus together. Extra pb: we cannot dynamic_cast from void* */
357     kernel::resource::Resource* resource = action.get_variable()->get_constraint(i)->get_id();
358     const kernel::resource::Cpu* cpu     = dynamic_cast<kernel::resource::Cpu*>(resource);
359
360     if (cpu != nullptr)
361       resource_set_utilization("HOST", "speed_used", cpu->get_cname(), action.get_category(), value,
362                                action.get_last_update(), SIMIX_get_clock() - action.get_last_update());
363
364     const kernel::resource::LinkImpl* link = dynamic_cast<kernel::resource::LinkImpl*>(resource);
365
366     if (link != nullptr)
367       resource_set_utilization("LINK", "bandwidth_used", link->get_cname(), action.get_category(), value,
368                                action.get_last_update(), SIMIX_get_clock() - action.get_last_update());
369   }
370 }
371
372 static void on_link_bandwidth_change(s4u::Link const& link)
373 {
374   Container::by_name(link.get_name())
375       ->get_variable("bandwidth")
376       ->set_event(surf_get_clock(), sg_bandwidth_factor * link.get_bandwidth());
377 }
378
379 static void on_netpoint_creation(kernel::routing::NetPoint const& netpoint)
380 {
381   if (netpoint.is_router())
382     new RouterContainer(netpoint.get_name(), currentContainer.back());
383 }
384
385 static void on_platform_created()
386 {
387   currentContainer.clear();
388   std::set<std::string>* filter = new std::set<std::string>();
389   XBT_DEBUG("Starting graph extraction.");
390   recursiveGraphExtraction(s4u::Engine::get_instance()->get_netzone_root(), Container::get_root(), filter);
391   XBT_DEBUG("Graph extraction finished.");
392   delete filter;
393   dump_buffer(true);
394 }
395
396 static void on_actor_creation(s4u::Actor const& actor)
397 {
398   const Container* root = Container::get_root();
399   Container* container  = Container::by_name(actor.get_host()->get_name());
400
401   container->create_child(instr_pid(actor), "ACTOR");
402   ContainerType* actor_type = container->type_->by_name_or_create<ContainerType>("ACTOR");
403   StateType* state          = actor_type->by_name_or_create<StateType>("ACTOR_STATE");
404   state->add_entity_value("suspend", "1 0 1");
405   state->add_entity_value("sleep", "1 1 0");
406   state->add_entity_value("receive", "1 0 0");
407   state->add_entity_value("send", "0 0 1");
408   state->add_entity_value("execute", "0 1 1");
409   root->type_->by_name_or_create("ACTOR_LINK", actor_type, actor_type);
410   root->type_->by_name_or_create("ACTOR_TASK_LINK", actor_type, actor_type);
411
412   std::string container_name = instr_pid(actor);
413   actor.on_exit([container_name](bool failed) {
414     if (failed)
415       // kill means that this actor no longer exists, let's destroy it
416       Container::by_name(container_name)->remove_from_parent();
417   });
418 }
419
420 static void on_actor_host_change(s4u::Actor const& actor, s4u::Host const& /*previous_location*/)
421 {
422   static long long int counter = 0;
423   container_t container        = Container::by_name(instr_pid(actor));
424   LinkType* link               = Container::get_root()->get_link("ACTOR_LINK");
425
426   // start link
427   link->start_event(container, "M", std::to_string(counter));
428   // destroy existing container of this process
429   container->remove_from_parent();
430   // create new container on the new_host location
431   Container::by_name(actor.get_host()->get_name())->create_child(instr_pid(actor), "ACTOR");
432   // end link
433   link->end_event(Container::by_name(instr_pid(actor)), "M", std::to_string(counter));
434   counter++;
435 }
436
437 static void on_vm_creation(s4u::Host const& host)
438 {
439   const Container* container = new HostContainer(host, currentContainer.back());
440   const Container* root      = Container::get_root();
441   ContainerType* vm          = container->type_->by_name_or_create<ContainerType>("VM");
442   StateType* state           = vm->by_name_or_create<StateType>("VM_STATE");
443   state->add_entity_value("suspend", "1 0 1");
444   state->add_entity_value("sleep", "1 1 0");
445   state->add_entity_value("receive", "1 0 0");
446   state->add_entity_value("send", "0 0 1");
447   state->add_entity_value("execute", "0 1 1");
448   root->type_->by_name_or_create("VM_LINK", vm, vm);
449   root->type_->by_name_or_create("VM_ACTOR_LINK", vm, vm);
450 }
451
452 void define_callbacks()
453 {
454   // always need the callbacks to zones (we need only the root zone), to create the rootContainer and the rootType
455   // properly
456   if (TRACE_needs_platform()) {
457     s4u::Engine::on_platform_created.connect(on_platform_created);
458     s4u::Host::on_creation.connect(on_host_creation);
459     s4u::Host::on_speed_change.connect(on_host_speed_change);
460     s4u::Link::on_creation.connect(on_link_creation);
461     s4u::Link::on_bandwidth_change.connect(on_link_bandwidth_change);
462     s4u::NetZone::on_seal.connect([](s4u::NetZone const& /*netzone*/) { currentContainer.pop_back(); });
463     kernel::routing::NetPoint::on_creation.connect(on_netpoint_creation);
464   }
465   s4u::NetZone::on_creation.connect(on_netzone_creation);
466
467   kernel::resource::CpuAction::on_state_change.connect(on_action_state_change);
468   s4u::Link::on_communication_state_change.connect(on_action_state_change);
469
470   if (TRACE_actor_is_enabled()) {
471     s4u::Actor::on_creation.connect(on_actor_creation);
472     s4u::Actor::on_destruction.connect([](s4u::Actor const& actor) {
473       auto container = Container::by_name_or_null(instr_pid(actor));
474       if (container != nullptr)
475         container->remove_from_parent();
476     });
477     s4u::Actor::on_suspend.connect([](s4u::Actor const& actor) {
478       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("suspend");
479     });
480     s4u::Actor::on_resume.connect(
481         [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
482     s4u::Actor::on_sleep.connect([](s4u::Actor const& actor) {
483       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("sleep");
484     });
485     s4u::Actor::on_wake_up.connect(
486         [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
487     s4u::Exec::on_start.connect([](simgrid::s4u::Actor const& actor, s4u::Exec const&) {
488       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("execute");
489     });
490     s4u::Exec::on_completion.connect([](s4u::Actor const& actor, s4u::Exec const&) {
491       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event();
492     });
493     s4u::Comm::on_sender_start.connect([](s4u::Actor const& actor) {
494       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("send");
495     });
496     s4u::Comm::on_receiver_start.connect([](s4u::Actor const& actor) {
497       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("receive");
498     });
499     s4u::Comm::on_completion.connect(
500         [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
501     s4u::Actor::on_host_change.connect(on_actor_host_change);
502   }
503
504   if (TRACE_vm_is_enabled()) {
505     s4u::Host::on_creation.connect(on_vm_creation);
506     s4u::VirtualMachine::on_start.connect([](s4u::VirtualMachine const& vm) {
507       Container::by_name(vm.get_name())->get_state("VM_STATE")->push_event("start");
508     });
509     s4u::VirtualMachine::on_started.connect(
510         [](s4u::VirtualMachine const& vm) { Container::by_name(vm.get_name())->get_state("VM_STATE")->pop_event(); });
511     s4u::VirtualMachine::on_suspend.connect([](s4u::VirtualMachine const& vm) {
512       Container::by_name(vm.get_name())->get_state("VM_STATE")->push_event("suspend");
513     });
514     s4u::VirtualMachine::on_resume.connect(
515         [](s4u::VirtualMachine const& vm) { Container::by_name(vm.get_name())->get_state("VM_STATE")->pop_event(); });
516     s4u::Host::on_destruction.connect(
517         [](s4u::Host const& host) { Container::by_name(host.get_name())->remove_from_parent(); });
518   }
519 }
520 } // namespace instr
521 } // namespace simgrid