1 /* Copyright (c) 2010-2020. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
6 #include "src/instr/instr_private.hpp"
8 #include "simgrid/kernel/routing/NetPoint.hpp"
9 #include "simgrid/kernel/routing/NetZoneImpl.hpp"
10 #include "simgrid/s4u/Actor.hpp"
11 #include "simgrid/s4u/Comm.hpp"
12 #include "simgrid/s4u/Engine.hpp"
13 #include "simgrid/s4u/Exec.hpp"
14 #include "simgrid/s4u/Host.hpp"
15 #include "simgrid/s4u/VirtualMachine.hpp"
16 #include "src/surf/cpu_interface.hpp"
17 #include "src/surf/network_interface.hpp"
18 #include "src/surf/surf_interface.hpp"
19 #include "src/surf/xml/platf_private.hpp"
20 #include "surf/surf.hpp"
21 #include "xbt/graph.h"
25 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(instr_routing, instr, "Tracing platform hierarchy");
27 std::string instr_pid(simgrid::s4u::Actor const& proc)
29 return std::string(proc.get_name()) + "-" + std::to_string(proc.get_pid());
32 static const char* instr_node_name(const s_xbt_node_t* node)
34 return static_cast<char*>(xbt_graph_node_get_data(node));
37 static container_t lowestCommonAncestor(const simgrid::instr::Container* a1, const simgrid::instr::Container* a2)
39 // this is only an optimization (since most of a1 and a2 share the same parent)
40 if (a1->father_ == a2->father_)
43 // create an array with all ancestors of a1
44 std::vector<container_t> ancestors_a1;
45 container_t p = a1->father_;
47 ancestors_a1.push_back(p);
51 // create an array with all ancestors of a2
52 std::vector<container_t> ancestors_a2;
55 ancestors_a2.push_back(p);
59 // find the lowest ancestor
61 int i = ancestors_a1.size() - 1;
62 int j = ancestors_a2.size() - 1;
63 while (i >= 0 && j >= 0) {
64 container_t a1p = ancestors_a1.at(i);
65 const simgrid::instr::Container* a2p = ancestors_a2.at(j);
77 static void linkContainers(container_t src, container_t dst, std::set<std::string>* filter)
80 if (src->get_name() == "__loopback__" || dst->get_name() == "__loopback__") {
81 XBT_DEBUG(" linkContainers: ignoring loopback link");
86 container_t father = lowestCommonAncestor(src, dst);
88 xbt_die("common father unknown, this is a tracing problem");
91 // check if we already register this pair (we only need one direction)
92 std::string aux1 = src->get_name() + dst->get_name();
93 std::string aux2 = dst->get_name() + src->get_name();
94 if (filter->find(aux1) != filter->end()) {
95 XBT_DEBUG(" linkContainers: already registered %s <-> %s (1)", src->get_cname(), dst->get_cname());
98 if (filter->find(aux2) != filter->end()) {
99 XBT_DEBUG(" linkContainers: already registered %s <-> %s (2)", dst->get_cname(), src->get_cname());
103 // ok, not found, register it
104 filter->insert(aux1);
105 filter->insert(aux2);
108 std::string link_typename = father->type_->get_name() + "-" + src->type_->get_name() +
109 std::to_string(src->type_->get_id()) + "-" + dst->type_->get_name() +
110 std::to_string(dst->type_->get_id());
111 simgrid::instr::LinkType* link = father->type_->by_name_or_create(link_typename, src->type_, dst->type_);
112 link->set_calling_container(father);
115 static long long counter = 0;
117 std::string key = std::to_string(counter);
120 link->start_event(src, "topology", key);
121 link->end_event(dst, "topology", key);
123 XBT_DEBUG(" linkContainers %s <-> %s", src->get_cname(), dst->get_cname());
126 static void recursiveGraphExtraction(const simgrid::s4u::NetZone* netzone, container_t container,
127 std::set<std::string>* filter)
129 if (not TRACE_platform_topology()) {
130 XBT_DEBUG("Graph extraction disabled by user.");
133 XBT_DEBUG("Graph extraction for NetZone = %s", netzone->get_cname());
134 if (not netzone->get_children().empty()) {
135 // bottom-up recursion
136 for (auto const& nz_son : netzone->get_children()) {
137 container_t child_container = container->children_.at(nz_son->get_name());
138 recursiveGraphExtraction(nz_son, child_container, filter);
142 xbt_graph_t graph = xbt_graph_new_graph(0, nullptr);
143 std::map<std::string, xbt_node_t>* nodes = new std::map<std::string, xbt_node_t>();
144 std::map<std::string, xbt_edge_t>* edges = new std::map<std::string, xbt_edge_t>();
146 netzone->get_impl()->get_graph(graph, nodes, edges);
147 for (auto elm : *edges) {
148 const xbt_edge* edge = elm.second;
149 linkContainers(simgrid::instr::Container::by_name(static_cast<const char*>(edge->src->data)),
150 simgrid::instr::Container::by_name(static_cast<const char*>(edge->dst->data)), filter);
154 xbt_graph_free_graph(graph, xbt_free_f, xbt_free_f, nullptr);
158 * user categories support
160 static void recursiveNewVariableType(const std::string& new_typename, const std::string& color,
161 simgrid::instr::Type* root)
163 if (root->get_name() == "HOST" || root->get_name() == "VM")
164 root->by_name_or_create(std::string("p") + new_typename, color);
166 if (root->get_name() == "LINK")
167 root->by_name_or_create(std::string("b") + new_typename, color);
169 for (auto const& elm : root->children_) {
170 recursiveNewVariableType(new_typename, color, elm.second.get());
174 void instr_new_variable_type(const std::string& new_typename, const std::string& color)
176 recursiveNewVariableType(new_typename, color, simgrid::instr::Container::get_root()->type_);
179 static void recursiveNewUserVariableType(const std::string& father_type, const std::string& new_typename,
180 const std::string& color, simgrid::instr::Type* root)
182 if (root->get_name() == father_type) {
183 root->by_name_or_create(new_typename, color);
185 for (auto const& elm : root->children_)
186 recursiveNewUserVariableType(father_type, new_typename, color, elm.second.get());
189 void instr_new_user_variable_type(const std::string& father_type, const std::string& new_typename,
190 const std::string& color)
192 recursiveNewUserVariableType(father_type, new_typename, color, simgrid::instr::Container::get_root()->type_);
195 static void recursiveNewUserStateType(const std::string& father_type, const std::string& new_typename,
196 simgrid::instr::Type* root)
198 if (root->get_name() == father_type)
199 root->by_name_or_create<simgrid::instr::StateType>(new_typename);
201 for (auto const& elm : root->children_)
202 recursiveNewUserStateType(father_type, new_typename, elm.second.get());
205 void instr_new_user_state_type(const std::string& father_type, const std::string& new_typename)
207 recursiveNewUserStateType(father_type, new_typename, simgrid::instr::Container::get_root()->type_);
210 static void recursiveNewValueForUserStateType(const std::string& type_name, const char* val, const std::string& color,
211 simgrid::instr::Type* root)
213 if (root->get_name() == type_name)
214 static_cast<simgrid::instr::StateType*>(root)->add_entity_value(val, color);
216 for (auto const& elm : root->children_)
217 recursiveNewValueForUserStateType(type_name, val, color, elm.second.get());
220 void instr_new_value_for_user_state_type(const std::string& type_name, const char* value, const std::string& color)
222 recursiveNewValueForUserStateType(type_name, value, color, simgrid::instr::Container::get_root()->type_);
225 static void recursiveXBTGraphExtraction(const s_xbt_graph_t* graph, std::map<std::string, xbt_node_t>* nodes,
226 std::map<std::string, xbt_edge_t>* edges, const_sg_netzone_t netzone)
228 // bottom-up recursion
229 for (auto const& netzone_child : netzone->get_children())
230 recursiveXBTGraphExtraction(graph, nodes, edges, netzone_child);
232 netzone->get_impl()->get_graph(graph, nodes, edges);
238 void platform_graph_export_graphviz(const std::string& output_filename)
240 xbt_graph_t g = xbt_graph_new_graph(0, nullptr);
241 std::map<std::string, xbt_node_t>* nodes = new std::map<std::string, xbt_node_t>();
242 std::map<std::string, xbt_edge_t>* edges = new std::map<std::string, xbt_edge_t>();
243 recursiveXBTGraphExtraction(g, nodes, edges, s4u::Engine::get_instance()->get_netzone_root());
246 fs.open(output_filename, std::ofstream::out);
247 xbt_assert(not fs.fail(), "Failed to open %s", output_filename.c_str());
250 fs << "digraph test {" << std::endl;
252 fs << "graph test {" << std::endl;
254 fs << " graph [overlap=scale]" << std::endl;
256 fs << " node [shape=box, style=filled]" << std::endl;
257 fs << " node [width=.3, height=.3, style=filled, color=skyblue]" << std::endl << std::endl;
259 for (auto const& elm : *nodes)
260 fs << " \"" << instr_node_name(elm.second) << "\";" << std::endl;
262 for (auto const& elm : *edges) {
263 const char* src_s = instr_node_name(elm.second->src);
264 const char* dst_s = instr_node_name(elm.second->dst);
266 fs << " \"" << src_s << "\" -> \"" << dst_s << "\";" << std::endl;
268 fs << " \"" << src_s << "\" -- \"" << dst_s << "\";" << std::endl;
270 fs << "}" << std::endl;
273 xbt_graph_free_graph(g, xbt_free_f, xbt_free_f, nullptr);
279 static std::vector<NetZoneContainer*> currentContainer; /* push and pop, used only in creation */
280 static void on_netzone_creation(s4u::NetZone const& netzone)
282 std::string id = netzone.get_name();
283 if (Container::get_root() == nullptr) {
284 NetZoneContainer* root = new NetZoneContainer(id, 0, nullptr);
285 xbt_assert(Container::get_root() == root);
287 if (TRACE_smpi_is_enabled()) {
288 ContainerType* mpi = root->type_->by_name_or_create<ContainerType>("MPI");
289 if (not TRACE_smpi_is_grouped())
290 mpi->by_name_or_create<StateType>("MPI_STATE");
291 root->type_->by_name_or_create("MPI_LINK", mpi, mpi);
292 // TODO See if we can move this to the LoadBalancer plugin
293 root->type_->by_name_or_create("MIGRATE_LINK", mpi, mpi);
294 mpi->by_name_or_create<StateType>("MIGRATE_STATE");
297 if (TRACE_needs_platform()) {
298 currentContainer.push_back(root);
303 if (TRACE_needs_platform()) {
304 NetZoneContainer* container = new NetZoneContainer(id, currentContainer.size(), currentContainer.back());
305 currentContainer.push_back(container);
309 static void on_link_creation(s4u::Link const& link)
311 if (currentContainer.empty()) // No ongoing parsing. Are you creating the loopback?
314 Container* container = new Container(link.get_name(), "LINK", currentContainer.back());
316 if ((TRACE_categorized() || TRACE_uncategorized() || TRACE_platform()) && (not TRACE_disable_link())) {
317 VariableType* bandwidth = container->type_->by_name_or_create("bandwidth", "");
318 bandwidth->set_calling_container(container);
319 bandwidth->set_event(0, link.get_bandwidth());
320 VariableType* latency = container->type_->by_name_or_create("latency", "");
321 latency->set_calling_container(container);
322 latency->set_event(0, link.get_latency());
324 if (TRACE_uncategorized()) {
325 container->type_->by_name_or_create("bandwidth_used", "0.5 0.5 0.5");
329 static void on_host_creation(s4u::Host const& host)
331 Container* container = new HostContainer(host, currentContainer.back());
332 const Container* root = Container::get_root();
334 if ((TRACE_categorized() || TRACE_uncategorized() || TRACE_platform()) && (not TRACE_disable_speed())) {
335 VariableType* speed = container->type_->by_name_or_create("speed", "");
336 speed->set_calling_container(container);
337 speed->set_event(0, host.get_speed());
339 VariableType* cores = container->type_->by_name_or_create("core_count", "");
340 cores->set_calling_container(container);
341 cores->set_event(0, host.get_core_count());
344 if (TRACE_uncategorized())
345 container->type_->by_name_or_create("speed_used", "0.5 0.5 0.5");
347 if (TRACE_smpi_is_enabled() && TRACE_smpi_is_grouped()) {
348 ContainerType* mpi = container->type_->by_name_or_create<ContainerType>("MPI");
349 mpi->by_name_or_create<StateType>("MPI_STATE");
350 // TODO See if we can move this to the LoadBalancer plugin
351 root->type_->by_name_or_create("MIGRATE_LINK", mpi, mpi);
352 mpi->by_name_or_create<StateType>("MIGRATE_STATE");
356 static void on_host_speed_change(s4u::Host const& host)
358 Container::by_name(host.get_name())
359 ->get_variable("speed")
360 ->set_event(surf_get_clock(), host.get_core_count() * host.get_available_speed());
363 static void on_action_state_change(kernel::resource::Action const& action,
364 kernel::resource::Action::State /* previous */)
366 int n = action.get_variable()->get_number_of_constraint();
368 for (int i = 0; i < n; i++) {
369 double value = action.get_variable()->get_value() * action.get_variable()->get_constraint_weight(i);
370 /* Beware of composite actions: ptasks put links and cpus together. Extra pb: we cannot dynamic_cast from void* */
371 kernel::resource::Resource* resource = action.get_variable()->get_constraint(i)->get_id();
372 const kernel::resource::Cpu* cpu = dynamic_cast<kernel::resource::Cpu*>(resource);
375 resource_set_utilization("HOST", "speed_used", cpu->get_cname(), action.get_category(), value,
376 action.get_last_update(), SIMIX_get_clock() - action.get_last_update());
378 const kernel::resource::LinkImpl* link = dynamic_cast<kernel::resource::LinkImpl*>(resource);
381 resource_set_utilization("LINK", "bandwidth_used", link->get_cname(), action.get_category(), value,
382 action.get_last_update(), SIMIX_get_clock() - action.get_last_update());
386 static void on_link_bandwidth_change(s4u::Link const& link)
388 Container::by_name(link.get_name())
389 ->get_variable("bandwidth")
390 ->set_event(surf_get_clock(), sg_bandwidth_factor * link.get_bandwidth());
393 static void on_netpoint_creation(kernel::routing::NetPoint const& netpoint)
395 if (netpoint.is_router())
396 new RouterContainer(netpoint.get_name(), currentContainer.back());
399 static void on_platform_created()
401 currentContainer.clear();
402 std::set<std::string>* filter = new std::set<std::string>();
403 XBT_DEBUG("Starting graph extraction.");
404 recursiveGraphExtraction(s4u::Engine::get_instance()->get_netzone_root(), Container::get_root(), filter);
405 XBT_DEBUG("Graph extraction finished.");
407 TRACE_paje_dump_buffer(true);
410 static void on_actor_creation(s4u::Actor const& actor)
412 const Container* root = Container::get_root();
413 Container* container = Container::by_name(actor.get_host()->get_name());
415 container->create_child(instr_pid(actor), "ACTOR");
416 ContainerType* actor_type = container->type_->by_name_or_create<ContainerType>("ACTOR");
417 StateType* state = actor_type->by_name_or_create<StateType>("ACTOR_STATE");
418 state->add_entity_value("suspend", "1 0 1");
419 state->add_entity_value("sleep", "1 1 0");
420 state->add_entity_value("receive", "1 0 0");
421 state->add_entity_value("send", "0 0 1");
422 state->add_entity_value("execute", "0 1 1");
423 root->type_->by_name_or_create("ACTOR_LINK", actor_type, actor_type);
424 root->type_->by_name_or_create("ACTOR_TASK_LINK", actor_type, actor_type);
426 std::string container_name = instr_pid(actor);
427 actor.on_exit([container_name](bool failed) {
429 // kill means that this actor no longer exists, let's destroy it
430 Container::by_name(container_name)->remove_from_parent();
434 static void on_actor_host_change(s4u::Actor const& actor, s4u::Host const& /*previous_location*/)
436 static long long int counter = 0;
437 container_t container = Container::by_name(instr_pid(actor));
438 LinkType* link = Container::get_root()->get_link("ACTOR_LINK");
441 link->start_event(container, "M", std::to_string(counter));
442 // destroy existing container of this process
443 container->remove_from_parent();
444 // create new container on the new_host location
445 Container::by_name(actor.get_host()->get_name())->create_child(instr_pid(actor), "ACTOR");
447 link->end_event(Container::by_name(instr_pid(actor)), "M", std::to_string(counter));
451 static void on_vm_creation(s4u::Host const& host)
453 const Container* container = new HostContainer(host, currentContainer.back());
454 const Container* root = Container::get_root();
455 ContainerType* vm = container->type_->by_name_or_create<ContainerType>("VM");
456 StateType* state = vm->by_name_or_create<StateType>("VM_STATE");
457 state->add_entity_value("suspend", "1 0 1");
458 state->add_entity_value("sleep", "1 1 0");
459 state->add_entity_value("receive", "1 0 0");
460 state->add_entity_value("send", "0 0 1");
461 state->add_entity_value("execute", "0 1 1");
462 root->type_->by_name_or_create("VM_LINK", vm, vm);
463 root->type_->by_name_or_create("VM_ACTOR_LINK", vm, vm);
466 void define_callbacks()
468 // always need the callbacks to zones (we need only the root zone), to create the rootContainer and the rootType
470 if (TRACE_needs_platform()) {
471 s4u::Engine::on_platform_created.connect(on_platform_created);
472 s4u::Host::on_creation.connect(on_host_creation);
473 s4u::Host::on_speed_change.connect(on_host_speed_change);
474 s4u::Link::on_creation.connect(on_link_creation);
475 s4u::Link::on_bandwidth_change.connect(on_link_bandwidth_change);
476 s4u::NetZone::on_seal.connect([](s4u::NetZone const& /*netzone*/) { currentContainer.pop_back(); });
477 kernel::routing::NetPoint::on_creation.connect(on_netpoint_creation);
479 s4u::NetZone::on_creation.connect(on_netzone_creation);
480 s4u::Engine::on_time_advance.connect([](double /*time_delta*/) { TRACE_paje_dump_buffer(false); });
482 kernel::resource::CpuAction::on_state_change.connect(on_action_state_change);
483 s4u::Link::on_communication_state_change.connect(on_action_state_change);
485 if (TRACE_actor_is_enabled()) {
486 s4u::Actor::on_creation.connect(on_actor_creation);
487 s4u::Actor::on_destruction.connect([](s4u::Actor const& actor) {
488 auto container = Container::by_name_or_null(instr_pid(actor));
489 if (container != nullptr)
490 container->remove_from_parent();
492 s4u::Actor::on_suspend.connect([](s4u::Actor const& actor) {
493 Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("suspend");
495 s4u::Actor::on_resume.connect(
496 [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
497 s4u::Actor::on_sleep.connect([](s4u::Actor const& actor) {
498 Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("sleep");
500 s4u::Actor::on_wake_up.connect(
501 [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
502 s4u::Exec::on_start.connect([](simgrid::s4u::Actor const& actor, s4u::Exec const&) {
503 Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("execute");
505 s4u::Exec::on_completion.connect([](s4u::Actor const& actor, s4u::Exec const&) {
506 Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event();
508 s4u::Comm::on_sender_start.connect([](s4u::Actor const& actor) {
509 Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("send");
511 s4u::Comm::on_receiver_start.connect([](s4u::Actor const& actor) {
512 Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->push_event("receive");
514 s4u::Comm::on_completion.connect(
515 [](s4u::Actor const& actor) { Container::by_name(instr_pid(actor))->get_state("ACTOR_STATE")->pop_event(); });
516 s4u::Actor::on_host_change.connect(on_actor_host_change);
519 if (TRACE_vm_is_enabled()) {
520 s4u::Host::on_creation.connect(on_vm_creation);
521 s4u::VirtualMachine::on_start.connect([](s4u::VirtualMachine const& vm) {
522 Container::by_name(vm.get_name())->get_state("VM_STATE")->push_event("start");
524 s4u::VirtualMachine::on_started.connect(
525 [](s4u::VirtualMachine const& vm) { Container::by_name(vm.get_name())->get_state("VM_STATE")->pop_event(); });
526 s4u::VirtualMachine::on_suspend.connect([](s4u::VirtualMachine const& vm) {
527 Container::by_name(vm.get_name())->get_state("VM_STATE")->push_event("suspend");
529 s4u::VirtualMachine::on_resume.connect(
530 [](s4u::VirtualMachine const& vm) { Container::by_name(vm.get_name())->get_state("VM_STATE")->pop_event(); });
531 s4u::Host::on_destruction.connect(
532 [](s4u::Host const& host) { Container::by_name(host.get_name())->remove_from_parent(); });
536 } // namespace simgrid