1 /* Copyright (c) 2010-2022. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
6 #include <simgrid/Exception.hpp>
7 #include <simgrid/s4u/Engine.hpp>
9 #include "src/instr/instr_private.hpp"
10 #include "xbt/config.hpp"
11 #include "xbt/xbt_os_time.h"
15 #include <direct.h> // _mkdir
22 XBT_LOG_NEW_CATEGORY(instr, "Logging the behavior of the tracing system (used for Visualization/Analysis of simulations)");
23 XBT_LOG_NEW_DEFAULT_SUBCATEGORY (instr_config, instr, "Configuration");
25 std::ofstream tracing_file;
26 std::map<const simgrid::instr::Container*, std::ofstream*> tracing_files; // TI specific
28 constexpr char OPT_TRACING_BASIC[] = "tracing/basic";
29 constexpr char OPT_TRACING_COMMENT_FILE[] = "tracing/comment-file";
30 constexpr char OPT_TRACING_DISABLE_DESTROY[] = "tracing/disable-destroy";
31 constexpr char OPT_TRACING_FORMAT_TI_ONEFILE[] = "tracing/smpi/format/ti-one-file";
32 constexpr char OPT_TRACING_SMPI[] = "tracing/smpi";
33 constexpr char OPT_TRACING_TOPOLOGY[] = "tracing/platform/topology";
35 static simgrid::config::Flag<bool> trace_enabled{
36 "tracing", "Enable the tracing system. You have to enable this option to use other tracing options.", false};
38 static simgrid::config::Flag<bool> trace_actor_enabled{
40 "Trace the behavior of all categorized actors, grouping them by host. "
41 "Can be used to track actor location if the simulator does actor migration.",
44 static simgrid::config::Flag<bool> trace_vm_enabled{"tracing/vm", "Trace the behavior of all virtual machines.", false};
46 static simgrid::config::Flag<bool> trace_platform{"tracing/platform",
47 "Register the platform in the trace as a hierarchy.", false};
49 static simgrid::config::Flag<bool> trace_platform_topology{
50 OPT_TRACING_TOPOLOGY, "Register the platform topology in the trace as a graph.", true};
51 static simgrid::config::Flag<bool> trace_smpi_enabled{OPT_TRACING_SMPI, "Tracing of the SMPI interface.", false};
52 static simgrid::config::Flag<bool> trace_smpi_grouped{"tracing/smpi/group", "Group MPI processes by host.", false};
54 static simgrid::config::Flag<bool> trace_smpi_computing{
55 "tracing/smpi/computing", "Generate 'Computing' states to trace the out-of-SMPI parts of the application", false};
57 static simgrid::config::Flag<bool> trace_smpi_sleeping{
58 "tracing/smpi/sleeping", "Generate 'Sleeping' states for the sleeps in the application that do not pertain to SMPI",
61 static simgrid::config::Flag<bool> trace_view_internals{
62 "tracing/smpi/internals",
63 "Generate tracing events corresponding to point-to-point messages sent by SMPI collective communications", false};
65 static simgrid::config::Flag<bool> trace_categorized{
66 "tracing/categorized", "Trace categorized resource utilization of hosts and links.", false};
68 static simgrid::config::Flag<bool> trace_uncategorized{
69 "tracing/uncategorized",
70 "Trace uncategorized resource utilization of hosts and links. "
71 "To use if the simulator does not use tracing categories but resource utilization have to be traced.",
74 static simgrid::config::Flag<bool> trace_disable_destroy{OPT_TRACING_DISABLE_DESTROY,
75 "Disable platform containers destruction.", false};
76 static simgrid::config::Flag<bool> trace_basic{OPT_TRACING_BASIC, "Avoid extended events (impoverished trace file).",
79 static simgrid::config::Flag<bool> trace_display_sizes{
80 "tracing/smpi/display-sizes",
81 "Add message size information (in bytes) to the to links and states (SMPI only). "
82 "For collectives, it usually corresponds to the total number of bytes sent by a process.",
85 static simgrid::config::Flag<bool> trace_disable_link{"tracing/disable_link",
86 "Do not trace link bandwidth and latency.", false};
87 static simgrid::config::Flag<bool> trace_disable_power{"tracing/disable_power", "Do not trace host power.", false};
89 bool TRACE_needs_platform ()
91 return TRACE_actor_is_enabled() || TRACE_vm_is_enabled() || TRACE_categorized() || TRACE_uncategorized() ||
92 TRACE_platform() || (TRACE_smpi_is_enabled() && TRACE_smpi_is_grouped());
95 bool TRACE_is_enabled()
100 bool TRACE_platform()
102 return trace_platform;
105 bool TRACE_platform_topology()
107 return trace_platform_topology;
110 bool TRACE_smpi_is_enabled()
112 return (trace_smpi_enabled || TRACE_smpi_is_grouped()) && TRACE_is_enabled();
115 bool TRACE_smpi_is_grouped()
117 return trace_smpi_grouped;
120 bool TRACE_smpi_is_computing()
122 return trace_smpi_computing;
125 bool TRACE_smpi_is_sleeping()
127 return trace_smpi_sleeping;
130 bool TRACE_smpi_view_internals()
132 return trace_view_internals;
135 bool TRACE_categorized ()
137 return trace_categorized;
140 bool TRACE_uncategorized ()
142 return trace_uncategorized;
145 bool TRACE_actor_is_enabled()
147 return trace_actor_enabled && trace_enabled;
150 bool TRACE_vm_is_enabled()
152 return trace_vm_enabled && trace_enabled;
155 bool TRACE_disable_link()
157 return trace_disable_link && trace_enabled;
160 bool TRACE_disable_speed()
162 return trace_disable_power && trace_enabled;
165 bool TRACE_display_sizes ()
167 return trace_display_sizes && trace_smpi_enabled && trace_enabled;
170 static void print_line(const char* option, const char* desc, const char* longdesc)
172 std::string str = std::string("--cfg=") + option + " ";
174 int len = static_cast<int>(str.size());
175 XBT_HELP("%s%*.*s %s", str.c_str(), 30 - len, 30 - len, "", desc);
176 if (longdesc != nullptr) {
177 XBT_HELP("%s\n", longdesc);
183 XBT_HELP("Description of the tracing options accepted by this simulator:\n");
184 print_line(OPT_TRACING_SMPI, "Trace the MPI Interface (SMPI)",
185 " This option only has effect if this simulator is SMPI-based. Traces the MPI\n"
186 " interface and generates a trace that can be analyzed using Gantt-like\n"
187 " visualizations. Every MPI function (implemented by SMPI) is transformed in a\n"
188 " state, and point-to-point communications can be analyzed with arrows.");
189 print_line(OPT_TRACING_DISABLE_DESTROY, "Disable platform containers destruction",
190 " Disable the destruction of containers at the end of simulation. This can be\n"
191 " used with simulators that have a different notion of time (different from\n"
192 " the simulated time).");
193 print_line(OPT_TRACING_BASIC, "Avoid extended events (impoverished trace file).",
194 " Some visualization tools are not able to parse correctly the Paje file format.\n"
195 " Use this option if you are using one of these tools to visualize the simulation\n"
196 " trace. Keep in mind that the trace might be incomplete, without all the\n"
197 " information that would be registered otherwise.");
198 print_line(OPT_TRACING_FORMAT_TI_ONEFILE, "Only works for SMPI now, and TI output format",
199 " By default, each process outputs to a separate file, inside a filename_files folder\n"
200 " By setting this option to yes, all processes will output to only one file\n"
201 " This is meant to avoid opening thousands of files with large simulations");
202 print_line(OPT_TRACING_TOPOLOGY, "Register the platform topology as a graph",
203 " This option (enabled by default) can be used to disable the tracing of\n"
204 " the platform topology in the trace file. Sometimes, such task is really\n"
205 " time consuming, since it must get the route from each host to other hosts\n"
206 " within the same Autonomous System (AS).");
209 namespace simgrid::instr {
210 static bool trace_active = false;
211 TraceFormat trace_format = TraceFormat::Paje;
217 xbt::signal<void(Container const&)> Container::on_creation;
218 xbt::signal<void(Container const&)> Container::on_destruction;
219 xbt::signal<void(Type const&, PajeEventType)> Type::on_creation;
220 xbt::signal<void(LinkType const&, Type const&, Type const&)> LinkType::on_creation;
221 xbt::signal<void(PajeEvent&)> PajeEvent::on_creation;
222 xbt::signal<void(PajeEvent const&)> PajeEvent::on_destruction;
223 xbt::signal<void(StateEvent const&)> StateEvent::on_destruction;
224 xbt::signal<void(EntityValue const&)> EntityValue::on_creation;
226 static void on_container_creation_paje(const Container& c)
228 double timestamp = simgrid_get_clock();
229 std::stringstream stream;
231 XBT_DEBUG("%s: event_type=%u, timestamp=%f", __func__, static_cast<unsigned>(PajeEventType::CreateContainer),
234 stream << std::fixed << std::setprecision(trace_precision) << PajeEventType::CreateContainer << " ";
235 stream << timestamp << " " << c.get_id() << " " << c.get_type()->get_id() << " " << c.get_parent()->get_id() << " \"";
236 if (c.get_name().find("rank-") != 0)
237 stream << c.get_name() << "\"";
239 /* Subtract -1 because this is the process id and we transform it to the rank id */
240 stream << "rank-" << stoi(c.get_name().substr(5)) - 1 << "\"";
242 XBT_DEBUG("Dump %s", stream.str().c_str());
243 tracing_file << stream.str() << '\n';
246 static void on_container_destruction_paje(const Container& c)
248 // trace my destruction, but not if user requests so or if the container is root
249 if (not trace_disable_destroy && &c != Container::get_root()) {
250 std::stringstream stream;
251 double timestamp = simgrid_get_clock();
253 XBT_DEBUG("%s: event_type=%u, timestamp=%f", __func__, static_cast<unsigned>(PajeEventType::DestroyContainer),
256 stream << std::fixed << std::setprecision(trace_precision) << PajeEventType::DestroyContainer << " ";
257 stream << timestamp << " " << c.get_type()->get_id() << " " << c.get_id();
258 XBT_DEBUG("Dump %s", stream.str().c_str());
259 tracing_file << stream.str() << '\n';
263 static void on_container_creation_ti(const Container& c)
265 XBT_DEBUG("%s: event_type=%u, timestamp=%f", __func__, static_cast<unsigned>(PajeEventType::CreateContainer),
266 simgrid_get_clock());
267 // if we are in the mode with only one file
268 static std::ofstream* ti_unique_file = nullptr;
269 static double prefix = 0.0;
271 if (tracing_files.empty()) {
272 // generate unique run id with time
273 prefix = xbt_os_time();
276 if (not simgrid::config::get_value<bool>("tracing/smpi/format/ti-one-file") || ti_unique_file == nullptr) {
277 std::string folder_name = simgrid::config::get_value<std::string>("tracing/filename") + "_files";
278 std::string filename = folder_name + "/" + std::to_string(prefix) + "_" + c.get_name() + ".txt";
280 _mkdir(folder_name.c_str());
282 mkdir(folder_name.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
284 ti_unique_file = new std::ofstream(filename.c_str(), std::ofstream::out);
285 xbt_assert(not ti_unique_file->fail(), "Tracefile %s could not be opened for writing", filename.c_str());
286 tracing_file << filename << '\n';
288 tracing_files.insert({&c, ti_unique_file});
291 static void on_container_destruction_ti(const Container& c)
293 if (not trace_disable_destroy && &c != Container::get_root()) {
294 if (not simgrid::config::get_value<bool>("tracing/smpi/format/ti-one-file") || tracing_files.size() == 1) {
295 tracing_files.at(&c)->close();
296 delete tracing_files.at(&c);
298 tracing_files.erase(&c);
302 static void on_entity_value_creation(const EntityValue& value)
304 std::stringstream stream;
305 XBT_DEBUG("%s: event_type=%u", __func__, static_cast<unsigned>(PajeEventType::DefineEntityValue));
306 stream << std::fixed << std::setprecision(trace_precision) << PajeEventType::DefineEntityValue;
307 stream << " " << value.get_id() << " " << value.get_parent()->get_id() << " " << value.get_name();
308 if (not value.get_color().empty())
309 stream << " \"" << value.get_color() << "\"";
310 XBT_DEBUG("Dump %s", stream.str().c_str());
311 tracing_file << stream.str() << '\n';
314 static void on_event_creation(PajeEvent& event)
316 XBT_DEBUG("%s: event_type=%u, timestamp=%.*f", __func__, static_cast<unsigned>(event.eventType_), trace_precision,
318 event.stream_ << std::fixed << std::setprecision(trace_precision);
319 event.stream_ << event.eventType_ << " " << event.timestamp_ << " ";
320 event.stream_ << event.get_type()->get_id() << " " << event.get_container()->get_id();
323 static void on_event_destruction(const PajeEvent& event)
325 XBT_DEBUG("Dump %s", event.stream_.str().c_str());
326 tracing_file << event.stream_.str() << '\n';
329 static void on_state_event_destruction(const StateEvent& event)
331 if (event.has_extra())
332 *tracing_files.at(event.get_container()) << event.stream_.str() << '\n';
335 static void on_type_creation(const Type& type, PajeEventType event_type)
337 if (event_type == PajeEventType::DefineLinkType)
338 return; // this kind of type has to be handled differently
340 std::stringstream stream;
341 stream << std::fixed << std::setprecision(trace_precision);
342 XBT_DEBUG("%s: event_type=%u, timestamp=%.*f", __func__, static_cast<unsigned>(event_type), trace_precision, 0.);
343 stream << event_type << " " << type.get_id() << " " << type.get_parent()->get_id() << " " << type.get_name();
344 if (type.is_colored())
345 stream << " \"" << type.get_color() << "\"";
346 XBT_DEBUG("Dump %s", stream.str().c_str());
347 tracing_file << stream.str() << '\n';
350 static void on_link_type_creation(const Type& type, const Type& source, const Type& dest)
352 std::stringstream stream;
353 XBT_DEBUG("%s: event_type=%u, timestamp=%.*f", __func__, static_cast<unsigned>(PajeEventType::DefineLinkType),
354 trace_precision, 0.);
355 stream << PajeEventType::DefineLinkType << " " << type.get_id() << " " << type.get_parent()->get_id();
356 stream << " " << source.get_id() << " " << dest.get_id() << " " << type.get_name();
357 XBT_DEBUG("Dump %s", stream.str().c_str());
358 tracing_file << stream.str() << '\n';
361 static void on_simulation_start()
363 if (trace_active || not TRACE_is_enabled())
368 XBT_DEBUG("Tracing starts");
369 trace_precision = config::get_value<int>("tracing/precision");
371 /* init the tracing module to generate the right output */
372 std::string format = config::get_value<std::string>("tracing/smpi/format");
373 XBT_DEBUG("Tracing format %s", format.c_str());
375 /* open the trace file(s) */
376 std::string filename = simgrid::config::get_value<std::string>("tracing/filename");
377 tracing_file.open(filename.c_str(), std::ofstream::out);
378 if (tracing_file.fail()) {
379 throw TracingError(XBT_THROW_POINT,
380 xbt::string_printf("Tracefile %s could not be opened for writing.", filename.c_str()));
383 XBT_DEBUG("Filename %s is open for writing", filename.c_str());
385 if (format == "Paje") {
386 Container::on_creation_cb(on_container_creation_paje);
387 Container::on_destruction_cb(on_container_destruction_paje);
388 EntityValue::on_creation_cb(on_entity_value_creation);
389 Type::on_creation_cb(on_type_creation);
390 LinkType::on_creation_cb(on_link_type_creation);
391 PajeEvent::on_creation_cb(on_event_creation);
392 PajeEvent::on_destruction_cb(on_event_destruction);
394 paje::dump_generator_version();
396 /* output one line comment */
397 if (auto comment = simgrid::config::get_value<std::string>("tracing/comment"); not comment.empty())
398 tracing_file << "# " << comment << '\n';
400 /* output comment file */
401 paje::dump_comment_file(config::get_value<std::string>(OPT_TRACING_COMMENT_FILE));
402 paje::dump_header(trace_basic, TRACE_display_sizes());
404 trace_format = TraceFormat::Ti;
405 Container::on_creation_cb(on_container_creation_ti);
406 Container::on_destruction_cb(on_container_destruction_ti);
407 StateEvent::on_destruction_cb(on_state_event_destruction);
411 XBT_DEBUG("Tracing is on");
414 static void on_simulation_end()
416 if (not trace_active)
419 /* dump trace buffer */
420 last_timestamp_to_dump = simgrid_get_clock();
423 const Type* root_type = Container::get_root()->get_type();
424 /* destroy all data structures of tracing (and free) */
425 delete Container::get_root();
428 /* close the trace files */
429 tracing_file.close();
430 XBT_DEBUG("Filename %s is closed", config::get_value<std::string>("tracing/filename").c_str());
432 /* de-activate trace */
433 trace_active = false;
434 XBT_DEBUG("Tracing is off");
435 XBT_DEBUG("Tracing system is shutdown");
440 static bool is_initialized = false;
444 is_initialized = true;
446 /* name of the tracefile */
447 config::declare_flag<std::string>("tracing/filename", "Trace file created by the instrumented SimGrid.",
449 config::declare_flag<std::string>("tracing/smpi/format",
450 "Select trace output format used by SMPI. The default is the 'Paje' format. "
451 "The 'TI' (Time-Independent) format allows for trace replay.",
454 config::declare_flag<bool>(OPT_TRACING_FORMAT_TI_ONEFILE,
455 "(smpi only) For replay format only : output to one file only", false);
456 config::declare_flag<std::string>("tracing/comment", "Add a comment line to the top of the trace file.", "");
457 config::declare_flag<std::string>(OPT_TRACING_COMMENT_FILE,
458 "Add the contents of a file as comments to the top of the trace.", "");
459 config::declare_flag<int>("tracing/precision",
460 "Numerical precision used when timestamping events "
461 "(expressed in number of digits after decimal point)",
464 /* Connect Engine callbacks */
465 s4u::Engine::on_platform_creation_cb(on_simulation_start);
466 s4u::Engine::on_time_advance_cb([](double /*time_delta*/) { dump_buffer(false); });
467 s4u::Engine::on_deadlock_cb(on_simulation_end);
468 s4u::Engine::on_simulation_end_cb(on_simulation_end);
470 } // namespace simgrid::instr