Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
f27343862d99752333c8bb9d4a0cc1f5209c8011
[simgrid.git] / src / instr / instr_platform.cpp
1 /* Copyright (c) 2010-2020. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #include "src/instr/instr_private.hpp"
7
8 #include "simgrid/kernel/routing/NetPoint.hpp"
9 #include "simgrid/kernel/routing/NetZoneImpl.hpp"
10 #include "simgrid/s4u/Actor.hpp"
11 #include "simgrid/s4u/Comm.hpp"
12 #include "simgrid/s4u/Engine.hpp"
13 #include "simgrid/s4u/Exec.hpp"
14 #include "simgrid/s4u/Host.hpp"
15 #include "simgrid/s4u/VirtualMachine.hpp"
16 #include "src/surf/cpu_interface.hpp"
17 #include "src/surf/network_interface.hpp"
18 #include "src/surf/surf_interface.hpp"
19 #include "src/surf/xml/platf_private.hpp"
20 #include "surf/surf.hpp"
21 #include "xbt/graph.h"
22
23 #include <fstream>
24
25 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(instr_routing, instr, "Tracing platform hierarchy");
26
27 std::string instr_pid(simgrid::s4u::Actor const& proc)
28 {
29   return std::string(proc.get_name()) + "-" + std::to_string(proc.get_pid());
30 }
31
32 static const char* instr_node_name(const s_xbt_node_t* node)
33 {
34   return static_cast<char*>(xbt_graph_node_get_data(node));
35 }
36
37 static container_t lowestCommonAncestor(const simgrid::instr::Container* a1, const simgrid::instr::Container* a2)
38 {
39   // this is only an optimization (since most of a1 and a2 share the same parent)
40   if (a1->father_ == a2->father_)
41     return a1->father_;
42
43   // create an array with all ancestors of a1
44   std::vector<container_t> ancestors_a1;
45   container_t p = a1->father_;
46   while (p) {
47     ancestors_a1.push_back(p);
48     p = p->father_;
49   }
50
51   // create an array with all ancestors of a2
52   std::vector<container_t> ancestors_a2;
53   p = a2->father_;
54   while (p) {
55     ancestors_a2.push_back(p);
56     p = p->father_;
57   }
58
59   // find the lowest ancestor
60   p     = nullptr;
61   int i = ancestors_a1.size() - 1;
62   int j = ancestors_a2.size() - 1;
63   while (i >= 0 && j >= 0) {
64     container_t a1p = ancestors_a1.at(i);
65     const simgrid::instr::Container* a2p = ancestors_a2.at(j);
66     if (a1p == a2p) {
67       p = a1p;
68     } else {
69       break;
70     }
71     i--;
72     j--;
73   }
74   return p;
75 }
76
77 static void linkContainers(container_t src, container_t dst, std::set<std::string>* filter)
78 {
79   // ignore loopback
80   if (src->get_name() == "__loopback__" || dst->get_name() == "__loopback__") {
81     XBT_DEBUG("  linkContainers: ignoring loopback link");
82     return;
83   }
84
85   // find common father
86   container_t father = lowestCommonAncestor(src, dst);
87   if (not father) {
88     xbt_die("common father unknown, this is a tracing problem");
89   }
90
91   // check if we already register this pair (we only need one direction)
92   std::string aux1 = src->get_name() + dst->get_name();
93   std::string aux2 = dst->get_name() + src->get_name();
94   if (filter->find(aux1) != filter->end()) {
95     XBT_DEBUG("  linkContainers: already registered %s <-> %s (1)", src->get_cname(), dst->get_cname());
96     return;
97   }
98   if (filter->find(aux2) != filter->end()) {
99     XBT_DEBUG("  linkContainers: already registered %s <-> %s (2)", dst->get_cname(), src->get_cname());
100     return;
101   }
102
103   // ok, not found, register it
104   filter->insert(aux1);
105   filter->insert(aux2);
106
107   // declare type
108   std::string link_typename = father->type_->get_name() + "-" + src->type_->get_name() +
109                               std::to_string(src->type_->get_id()) + "-" + dst->type_->get_name() +
110                               std::to_string(dst->type_->get_id());
111   simgrid::instr::LinkType* link = father->type_->by_name_or_create(link_typename, src->type_, dst->type_);
112   link->set_calling_container(father);
113
114   // create the link
115   static long long counter = 0;
116
117   std::string key = std::to_string(counter);
118   counter++;
119
120   link->start_event(src, "topology", key);
121   link->end_event(dst, "topology", key);
122
123   XBT_DEBUG("  linkContainers %s <-> %s", src->get_cname(), dst->get_cname());
124 }
125
126 static void recursiveGraphExtraction(const simgrid::s4u::NetZone* netzone, container_t container,
127                                      std::set<std::string>* filter)
128 {
129   if (not TRACE_platform_topology()) {
130     XBT_DEBUG("Graph extraction disabled by user.");
131     return;
132   }
133   XBT_DEBUG("Graph extraction for NetZone = %s", netzone->get_cname());
134   if (not netzone->get_children().empty()) {
135     // bottom-up recursion
136     for (auto const& nz_son : netzone->get_children()) {
137       container_t child_container = container->children_.at(nz_son->get_name());
138       recursiveGraphExtraction(nz_son, child_container, filter);
139     }
140   }
141
142   xbt_graph_t graph                        = xbt_graph_new_graph(0, nullptr);
143   std::map<std::string, xbt_node_t>* nodes = new std::map<std::string, xbt_node_t>();
144   std::map<std::string, xbt_edge_t>* edges = new std::map<std::string, xbt_edge_t>();
145
146   netzone->get_impl()->get_graph(graph, nodes, edges);
147   for (auto elm : *edges) {
148     const xbt_edge* edge = elm.second;
149     linkContainers(simgrid::instr::Container::by_name(static_cast<const char*>(edge->src->data)),
150                    simgrid::instr::Container::by_name(static_cast<const char*>(edge->dst->data)), filter);
151   }
152   delete nodes;
153   delete edges;
154   xbt_graph_free_graph(graph, xbt_free_f, xbt_free_f, nullptr);
155 }
156
157 /*
158  * user categories support
159  */
160 static void recursiveNewVariableType(const std::string& new_typename, const std::string& color,
161                                      simgrid::instr::Type* root)
162 {
163   if (root->get_name() == "HOST" || root->get_name() == "VM")
164     root->by_name_or_create(std::string("p") + new_typename, color);
165
166   if (root->get_name() == "LINK")
167     root->by_name_or_create(std::string("b") + new_typename, color);
168
169   for (auto const& elm : root->children_) {
170     recursiveNewVariableType(new_typename, color, elm.second.get());
171   }
172 }
173
174 void instr_new_variable_type(const std::string& new_typename, const std::string& color)
175 {
176   recursiveNewVariableType(new_typename, color, simgrid::instr::Container::get_root()->type_);
177 }
178
179 static void recursiveNewUserVariableType(const std::string& father_type, const std::string& new_typename,
180                                          const std::string& color, simgrid::instr::Type* root)
181 {
182   if (root->get_name() == father_type) {
183     root->by_name_or_create(new_typename, color);
184   }
185   for (auto const& elm : root->children_)
186     recursiveNewUserVariableType(father_type, new_typename, color, elm.second.get());
187 }
188
189 void instr_new_user_variable_type(const std::string& father_type, const std::string& new_typename,
190                                   const std::string& color)
191 {
192   recursiveNewUserVariableType(father_type, new_typename, color, simgrid::instr::Container::get_root()->type_);
193 }
194
195 static void recursiveNewUserStateType(const std::string& father_type, const std::string& new_typename,
196                                       simgrid::instr::Type* root)
197 {
198   if (root->get_name() == father_type)
199     root->by_name_or_create<simgrid::instr::StateType>(new_typename);
200
201   for (auto const& elm : root->children_)
202     recursiveNewUserStateType(father_type, new_typename, elm.second.get());
203 }
204
205 void instr_new_user_state_type(const std::string& father_type, const std::string& new_typename)
206 {
207   recursiveNewUserStateType(father_type, new_typename, simgrid::instr::Container::get_root()->type_);
208 }
209
210 static void recursiveNewValueForUserStateType(const std::string& type_name, const char* val, const std::string& color,
211                                               simgrid::instr::Type* root)
212 {
213   if (root->get_name() == type_name)
214     static_cast<simgrid::instr::StateType*>(root)->add_entity_value(val, color);
215
216   for (auto const& elm : root->children_)
217     recursiveNewValueForUserStateType(type_name, val, color, elm.second.get());
218 }
219
220 void instr_new_value_for_user_state_type(const std::string& type_name, const char* value, const std::string& color)
221 {
222   recursiveNewValueForUserStateType(type_name, value, color, simgrid::instr::Container::get_root()->type_);
223 }
224
225 static void recursiveXBTGraphExtraction(const s_xbt_graph_t* graph, std::map<std::string, xbt_node_t>* nodes,
226                                         std::map<std::string, xbt_edge_t>* edges, const_sg_netzone_t netzone)
227 {
228   // bottom-up recursion
229   for (auto const& netzone_child : netzone->get_children())
230     recursiveXBTGraphExtraction(graph, nodes, edges, netzone_child);
231
232   netzone->get_impl()->get_graph(graph, nodes, edges);
233 }
234
235 namespace simgrid {
236 namespace instr {
237
238 void platform_graph_export_graphviz(const std::string& output_filename)
239 {
240   xbt_graph_t g                            = xbt_graph_new_graph(0, nullptr);
241   std::map<std::string, xbt_node_t>* nodes = new std::map<std::string, xbt_node_t>();
242   std::map<std::string, xbt_edge_t>* edges = new std::map<std::string, xbt_edge_t>();
243   recursiveXBTGraphExtraction(g, nodes, edges, s4u::Engine::get_instance()->get_netzone_root());
244
245   std::ofstream fs;
246   fs.open(output_filename, std::ofstream::out);
247   xbt_assert(not fs.fail(), "Failed to open %s", output_filename.c_str());
248
249   if (g->directed)
250     fs << "digraph test {" << std::endl;
251   else
252     fs << "graph test {" << std::endl;
253
254   fs << "  graph [overlap=scale]" << std::endl;
255
256   fs << "  node [shape=box, style=filled]" << std::endl;
257   fs << "  node [width=.3, height=.3, style=filled, color=skyblue]" << std::endl << std::endl;
258
259   for (auto const& elm : *nodes)
260     fs << "  \"" << instr_node_name(elm.second) << "\";" << std::endl;
261
262   for (auto const& elm : *edges) {
263     const char* src_s = instr_node_name(elm.second->src);
264     const char* dst_s = instr_node_name(elm.second->dst);
265     if (g->directed)
266       fs << "  \"" << src_s << "\" -> \"" << dst_s << "\";" << std::endl;
267     else
268       fs << "  \"" << src_s << "\" -- \"" << dst_s << "\";" << std::endl;
269   }
270   fs << "}" << std::endl;
271   fs.close();
272
273   xbt_graph_free_graph(g, xbt_free_f, xbt_free_f, nullptr);
274   delete nodes;
275   delete edges;
276 }
277
278 /* Callbacks */
279 static std::vector<NetZoneContainer*> currentContainer; /* push and pop, used only in creation */
280 static void on_netzone_creation(s4u::NetZone const& netzone)
281 {
282   std::string id = netzone.get_name();
283   if (Container::get_root() == nullptr) {
284     NetZoneContainer* root = new NetZoneContainer(id, 0, nullptr);
285     xbt_assert(Container::get_root() == root);
286
287     if (TRACE_smpi_is_enabled()) {
288       ContainerType* mpi = root->type_->by_name_or_create<ContainerType>("MPI");
289       if (not TRACE_smpi_is_grouped())
290         mpi->by_name_or_create<StateType>("MPI_STATE");
291       root->type_->by_name_or_create("MPI_LINK", mpi, mpi);
292       // TODO See if we can move this to the LoadBalancer plugin
293       root->type_->by_name_or_create("MIGRATE_LINK", mpi, mpi);
294       mpi->by_name_or_create<StateType>("MIGRATE_STATE");
295     }
296
297     if (TRACE_needs_platform()) {
298       currentContainer.push_back(root);
299     }
300     return;
301   }
302
303   if (TRACE_needs_platform()) {
304     NetZoneContainer* container = new NetZoneContainer(id, currentContainer.size(), currentContainer.back());
305     currentContainer.push_back(container);
306   }
307 }
308
309 static void on_link_creation(s4u::Link const& link)
310 {
311   if (currentContainer.empty()) // No ongoing parsing. Are you creating the loopback?
312     return;
313
314   Container* container = new Container(link.get_name(), "LINK", currentContainer.back());
315
316   if ((TRACE_categorized() || TRACE_uncategorized() || TRACE_platform()) && (not TRACE_disable_link())) {
317     VariableType* bandwidth = container->type_->by_name_or_create("bandwidth", "");
318     bandwidth->set_calling_container(container);
319     bandwidth->set_event(0, link.get_bandwidth());
320     VariableType* latency = container->type_->by_name_or_create("latency", "");
321     latency->set_calling_container(container);
322     latency->set_event(0, link.get_latency());
323   }
324   if (TRACE_uncategorized()) {
325     container->type_->by_name_or_create("bandwidth_used", "0.5 0.5 0.5");
326   }
327 }
328
329 static void on_host_creation(s4u::Host const& host)
330 {
331   Container* container  = new HostContainer(host, currentContainer.back());
332   const Container* root = Container::get_root();
333
334   if ((TRACE_categorized() || TRACE_uncategorized() || TRACE_platform()) && (not TRACE_disable_speed())) {
335     VariableType* speed = container->type_->by_name_or_create("speed", "");
336     speed->set_calling_container(container);
337     speed->set_event(0, host.get_speed());
338
339     VariableType* cores = container->type_->by_name_or_create("core_count", "");
340     cores->set_calling_container(container);
341     cores->set_event(0, host.get_core_count());
342   }
343
344   if (TRACE_uncategorized())
345     container->type_->by_name_or_create("speed_used", "0.5 0.5 0.5");
346
347   if (TRACE_smpi_is_enabled() && TRACE_smpi_is_grouped()) {
348     ContainerType* mpi = container->type_->by_name_or_create<ContainerType>("MPI");
349     mpi->by_name_or_create<StateType>("MPI_STATE");
350     // TODO See if we can move this to the LoadBalancer plugin
351     root->type_->by_name_or_create("MIGRATE_LINK", mpi, mpi);
352     mpi->by_name_or_create<StateType>("MIGRATE_STATE");
353   }
354 }
355
356 static void on_host_speed_change(s4u::Host const& host)
357 {
358   Container::by_name(host.get_name())
359       ->get_variable("speed")
360       ->set_event(surf_get_clock(), host.get_core_count() * host.get_available_speed());
361 }
362
363 static void on_action_state_change(kernel::resource::Action const& action,
364                                    kernel::resource::Action::State /* previous */)
365 {
366   int n = action.get_variable()->get_number_of_constraint();
367
368   for (int i = 0; i < n; i++) {
369     double value = action.get_variable()->get_value() * action.get_variable()->get_constraint_weight(i);
370     /* Beware of composite actions: ptasks put links and cpus together. Extra pb: we cannot dynamic_cast from void* */
371     kernel::resource::Resource* resource = action.get_variable()->get_constraint(i)->get_id();
372     const kernel::resource::Cpu* cpu     = dynamic_cast<kernel::resource::Cpu*>(resource);
373
374     if (cpu != nullptr)
375       resource_set_utilization("HOST", "speed_used", cpu->get_cname(), action.get_category(), value,
376                                action.get_last_update(), SIMIX_get_clock() - action.get_last_update());
377
378     const kernel::resource::LinkImpl* link = dynamic_cast<kernel::resource::LinkImpl*>(resource);
379
380     if (link != nullptr)
381       resource_set_utilization("LINK", "bandwidth_used", link->get_cname(), action.get_category(), value,
382                                action.get_last_update(), SIMIX_get_clock() - action.get_last_update());
383   }
384 }
385
386 static void on_link_bandwidth_change(s4u::Link const& link)
387 {
388   Container::by_name(link.get_name())
389       ->get_variable("bandwidth")
390       ->set_event(surf_get_clock(), sg_bandwidth_factor * link.get_bandwidth());
391 }
392
393 static void on_netpoint_creation(kernel::routing::NetPoint const& netpoint)
394 {
395   if (netpoint.is_router())
396     new RouterContainer(netpoint.get_name(), currentContainer.back());
397 }
398
399 static void on_platform_created()
400 {
401   currentContainer.clear();
402   std::set<std::string>* filter = new std::set<std::string>();
403   XBT_DEBUG("Starting graph extraction.");
404   recursiveGraphExtraction(s4u::Engine::get_instance()->get_netzone_root(), Container::get_root(), filter);
405   XBT_DEBUG("Graph extraction finished.");
406   delete filter;
407   TRACE_paje_dump_buffer(true);
408 }
409
410 static void on_actor_creation(s4u::Actor const& actor)
411 {
412   const Container* root = Container::get_root();
413   Container* container  = Container::by_name(actor.get_host()->get_name());
414
415   container->create_child(instr_pid(actor), "ACTOR");
416   ContainerType* actor_type = container->type_->by_name_or_create<ContainerType>("ACTOR");
417   StateType* state          = actor_type->by_name_or_create<StateType>("ACTOR_STATE");
418   state->add_entity_value("suspend", "1 0 1");
419   state->add_entity_value("sleep", "1 1 0");
420   state->add_entity_value("receive", "1 0 0");
421   state->add_entity_value("send", "0 0 1");
422   state->add_entity_value("execute", "0 1 1");
423   root->type_->by_name_or_create("ACTOR_LINK", actor_type, actor_type);
424   root->type_->by_name_or_create("ACTOR_TASK_LINK", actor_type, actor_type);
425
426   std::string container_name = instr_pid(actor);
427   actor.on_exit([container_name](bool failed) {
428     if (failed)
429       // kill means that this actor no longer exists, let's destroy it
430       Container::by_name(container_name)->remove_from_parent();
431   });
432 }
433
434 static void on_actor_host_change(s4u::Actor const& actor, s4u::Host const& /*previous_location*/)
435 {
436   static long long int counter = 0;
437   container_t container        = Container::by_name(instr_pid(actor));
438   LinkType* link               = Container::get_root()->get_link("ACTOR_LINK");
439
440   // start link
441   link->start_event(container, "M", std::to_string(counter));
442   // destroy existing container of this process
443   container->remove_from_parent();
444   // create new container on the new_host location
445   Container::by_name(actor.get_host()->get_name())->create_child(instr_pid(actor), "ACTOR");
446   // end link
447   link->end_event(Container::by_name(instr_pid(actor)), "M", std::to_string(counter));
448   counter++;
449 }
450
451 static void on_vm_creation(s4u::Host const& host)
452 {
453   const Container* container = new HostContainer(host, currentContainer.back());
454   const Container* root      = Container::get_root();
455   ContainerType* vm          = container->type_->by_name_or_create<ContainerType>("VM");
456   StateType* state           = vm->by_name_or_create<StateType>("VM_STATE");
457   state->add_entity_value("suspend", "1 0 1");
458   state->add_entity_value("sleep", "1 1 0");
459   state->add_entity_value("receive", "1 0 0");
460   state->add_entity_value("send", "0 0 1");
461   state->add_entity_value("execute", "0 1 1");
462   root->type_->by_name_or_create("VM_LINK", vm, vm);
463   root->type_->by_name_or_create("VM_ACTOR_LINK", vm, vm);
464 }
465
466 void define_callbacks()
467 {
468   // always need the callbacks to zones (we need only the root zone), to create the rootContainer and the rootType
469   // properly
470   if (TRACE_needs_platform()) {
471     s4u::Engine::on_platform_created.connect(on_platform_created);
472     s4u::Host::on_creation.connect(on_host_creation);
473     s4u::Host::on_speed_change.connect(on_host_speed_change);
474     s4u::Link::on_creation.connect(on_link_creation);
475     s4u::Link::on_bandwidth_change.connect(on_link_bandwidth_change);
476     s4u::NetZone::on_seal.connect([](s4u::NetZone const& /*netzone*/) { currentContainer.pop_back(); });
477     kernel::routing::NetPoint::on_creation.connect(on_netpoint_creation);
478   }
479   s4u::NetZone::on_creation.connect(on_netzone_creation);
480   s4u::Engine::on_time_advance.connect([](double /*time_delta*/) { TRACE_paje_dump_buffer(false); });
481
482   kernel::resource::CpuAction::on_state_change.connect(on_action_state_change);
483   s4u::Link::on_communication_state_change.connect(on_action_state_change);
484
485   if (TRACE_actor_is_enabled()) {
486     s4u::Actor::on_creation.connect(on_actor_creation);
487     s4u::Actor::on_destruction.connect([](s4u::Actor const& actor) {
488       auto container = Container::by_name_or_null(instr_pid(actor));
489       if (container != nullptr)
490         container->remove_from_parent();
491     });
492     s4u::Actor::on_suspend.connect([](s4u::Actor const& actor) {
493       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("suspend");
494     });
495     s4u::Actor::on_resume.connect(
496         [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
497     s4u::Actor::on_sleep.connect([](s4u::Actor const& actor) {
498       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("sleep");
499     });
500     s4u::Actor::on_wake_up.connect(
501         [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
502     s4u::Exec::on_start.connect([](simgrid::s4u::Actor const& actor, s4u::Exec const&) {
503       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("execute");
504     });
505     s4u::Exec::on_completion.connect([](s4u::Actor const& actor, s4u::Exec const&) {
506       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event();
507     });
508     s4u::Comm::on_sender_start.connect([](s4u::Actor const& actor) {
509       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("send");
510     });
511     s4u::Comm::on_receiver_start.connect([](s4u::Actor const& actor) {
512       Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("receive");
513     });
514     s4u::Comm::on_completion.connect(
515         [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
516     s4u::Actor::on_host_change.connect(on_actor_host_change);
517   }
518
519   if (TRACE_vm_is_enabled()) {
520     s4u::Host::on_creation.connect(on_vm_creation);
521     s4u::VirtualMachine::on_start.connect([](s4u::VirtualMachine const& vm) {
522       Container::by_name(vm.get_name())->get_state("VM_STATE")->push_event("start");
523     });
524     s4u::VirtualMachine::on_started.connect(
525         [](s4u::VirtualMachine const& vm) { Container::by_name(vm.get_name())->get_state("VM_STATE")->pop_event(); });
526     s4u::VirtualMachine::on_suspend.connect([](s4u::VirtualMachine const& vm) {
527       Container::by_name(vm.get_name())->get_state("VM_STATE")->push_event("suspend");
528     });
529     s4u::VirtualMachine::on_resume.connect(
530         [](s4u::VirtualMachine const& vm) { Container::by_name(vm.get_name())->get_state("VM_STATE")->pop_event(); });
531     s4u::Host::on_destruction.connect(
532         [](s4u::Host const& host) { Container::by_name(host.get_name())->remove_from_parent(); });
533   }
534 }
535 } // namespace instr
536 } // namespace simgrid