Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Rename mc::RemoteSimulation into mc::RemoteProcess
[simgrid.git] / src / mc / remote / RemoteProcess.cpp
1 /* Copyright (c) 2014-2021. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #define _FILE_OFFSET_BITS 64 /* needed for pread_whole to work as expected on 32bits */
7
8 #include "src/mc/remote/RemoteProcess.hpp"
9
10 #include "src/mc/sosp/Snapshot.hpp"
11 #include "xbt/file.hpp"
12 #include "xbt/log.h"
13
14 #include <fcntl.h>
15 #include <libunwind-ptrace.h>
16 #include <sys/mman.h> // PROT_*
17
18 #include <algorithm>
19 #include <cerrno>
20 #include <cstring>
21 #include <memory>
22 #include <string>
23
24 using simgrid::mc::remote;
25
26 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_process, mc, "MC process information");
27
28 namespace simgrid {
29 namespace mc {
30
31 // ***** Helper stuff
32
33 // List of library which memory segments are not considered:
34 static const std::vector<std::string> filtered_libraries = {
35 #ifdef __linux__
36     "ld",
37 #elif defined __FreeBSD__
38     "ld-elf",
39     "ld-elf32",
40     "libkvm",      /* kernel data access library */
41     "libprocstat", /* process and file information retrieval */
42     "libthr",      /* thread library */
43     "libutil",
44 #endif
45     "libargp", /* workarounds for glibc-less systems */
46     "libasan", /* gcc sanitizers */
47     "libasn1",
48     "libboost_chrono",
49     "libboost_context",
50     "libboost_context-mt",
51     "libboost_stacktrace_addr2line",
52     "libboost_stacktrace_backtrace",
53     "libboost_system",
54     "libboost_thread",
55     "libboost_timer",
56     "libbrotlicommon",
57     "libbrotlidec",
58     "libbz2",
59     "libc",
60     "libc++",
61     "libcdt",
62     "libcgraph",
63     "libcom_err",
64     "libcrypt",
65     "libcrypto",
66     "libcurl",
67     "libcurl-gnutls",
68     "libcxxrt",
69     "libdebuginfod",
70     "libdl",
71     "libdw",
72     "libelf",
73     "libevent",
74     "libexecinfo",
75     "libffi",
76     "libflang",
77     "libflangrti",
78     "libgcc_s",
79     "libgmp",
80     "libgnutls",
81     "libgcrypt",
82     "libgfortran",
83     "libgpg-error",
84     "libgssapi",
85     "libgssapi_krb5",
86     "libhcrypto",
87     "libheimbase",
88     "libheimntlm",
89     "libhx509",
90     "libhogweed",
91     "libidn2",
92     "libimf",
93     "libintlc",
94     "libirng",
95     "libk5crypto",
96     "libkeyutils",
97     "libkrb5",
98     "libkrb5support", /*odd behaviour on fedora rawhide ... remove these when fixed*/
99     "liblber",
100     "libldap",
101     "libldap_r",
102     "liblua5.1",
103     "liblua5.3",
104     "liblzma",
105     "libm",
106     "libmd",
107     "libnettle",
108     "libnghttp2",
109     "libomp",
110     "libp11-kit",
111     "libpapi",
112     "libpcre2",
113     "libpfm",
114     "libpgmath",
115     "libpsl",
116     "libpthread",
117     "libquadmath",
118     "libresolv",
119     "libroken",
120     "librt",
121     "librtmp",
122     "libsasl2",
123     "libselinux",
124     "libsqlite3",
125     "libssh",
126     "libssh2",
127     "libssl",
128     "libstdc++",
129     "libsvml",
130     "libtasn1",
131     "libtsan",  /* gcc sanitizers */
132     "libubsan", /* gcc sanitizers */
133     "libunistring",
134     "libunwind",
135     "libunwind-ptrace",
136     "libunwind-x86",
137     "libunwind-x86_64",
138     "libwind",
139     "libz",
140     "libzstd"};
141
142 static bool is_filtered_lib(const std::string& libname)
143 {
144   return std::find(begin(filtered_libraries), end(filtered_libraries), libname) != end(filtered_libraries);
145 }
146
147 static std::string get_lib_name(const std::string& pathname)
148 {
149   std::string map_basename = simgrid::xbt::Path(pathname).get_base_name();
150   std::string libname;
151
152   size_t pos = map_basename.rfind(".so");
153   if (pos != std::string::npos) {
154     // strip the extension (matching regex "\.so.*$")
155     libname.assign(map_basename, 0, pos);
156
157     // strip the version suffix (matching regex "-[.0-9-]*$")
158     while (true) {
159       pos = libname.rfind('-');
160       if (pos == std::string::npos || libname.find_first_not_of(".0123456789", pos + 1) != std::string::npos)
161         break;
162       libname.erase(pos);
163     }
164   }
165
166   return libname;
167 }
168
169 static ssize_t pread_whole(int fd, void* buf, size_t count, off_t offset)
170 {
171   auto* buffer       = static_cast<char*>(buf);
172   ssize_t real_count = count;
173   while (count) {
174     ssize_t res = pread(fd, buffer, count, offset);
175     if (res > 0) {
176       count -= res;
177       buffer += res;
178       offset += res;
179     } else if (res == 0)
180       return -1;
181     else if (errno != EINTR) {
182       XBT_ERROR("pread_whole: %s", strerror(errno));
183       return -1;
184     }
185   }
186   return real_count;
187 }
188
189 static ssize_t pwrite_whole(int fd, const void* buf, size_t count, off_t offset)
190 {
191   const auto* buffer = static_cast<const char*>(buf);
192   ssize_t real_count = count;
193   while (count) {
194     ssize_t res = pwrite(fd, buffer, count, offset);
195     if (res > 0) {
196       count -= res;
197       buffer += res;
198       offset += res;
199     } else if (res == 0)
200       return -1;
201     else if (errno != EINTR) {
202       XBT_ERROR("pwrite_whole: %s", strerror(errno));
203       return -1;
204     }
205   }
206   return real_count;
207 }
208
209 static pthread_once_t zero_buffer_flag = PTHREAD_ONCE_INIT;
210 static const void* zero_buffer;
211 static const size_t zero_buffer_size = 10 * 4096;
212
213 static void zero_buffer_init()
214 {
215   int fd = open("/dev/zero", O_RDONLY);
216   if (fd < 0)
217     xbt_die("Could not open /dev/zero");
218   zero_buffer = mmap(nullptr, zero_buffer_size, PROT_READ, MAP_SHARED, fd, 0);
219   if (zero_buffer == MAP_FAILED)
220     xbt_die("Could not map the zero buffer");
221   close(fd);
222 }
223
224 int open_vm(pid_t pid, int flags)
225 {
226   std::string buffer = "/proc/" + std::to_string(pid) + "/mem";
227   return open(buffer.c_str(), flags);
228 }
229
230 // ***** RemoteProcess
231
232 RemoteProcess::RemoteProcess(pid_t pid) : AddressSpace(this), pid_(pid), running_(true) {}
233
234 void RemoteProcess::init()
235 {
236   this->memory_map_ = simgrid::xbt::get_memory_map(this->pid_);
237   this->init_memory_map_info();
238
239   int fd = open_vm(this->pid_, O_RDWR);
240   xbt_assert(fd >= 0, "Could not open file for process virtual address space");
241   this->memory_file = fd;
242
243   // Read std_heap (is a struct mdesc*):
244   const simgrid::mc::Variable* std_heap_var = this->find_variable("__mmalloc_default_mdp");
245   xbt_assert(std_heap_var, "No heap information in the target process");
246   xbt_assert(std_heap_var->address, "No constant address for this variable");
247   this->read_bytes(&this->heap_address, sizeof(mdesc*), remote(std_heap_var->address));
248
249   this->smx_actors_infos.clear();
250   this->smx_dead_actors_infos.clear();
251   this->unw_addr_space            = simgrid::mc::UnwindContext::createUnwindAddressSpace();
252   this->unw_underlying_addr_space = simgrid::unw::create_addr_space();
253   this->unw_underlying_context    = simgrid::unw::create_context(this->unw_underlying_addr_space, this->pid_);
254 }
255
256 RemoteProcess::~RemoteProcess()
257 {
258   if (this->memory_file >= 0)
259     close(this->memory_file);
260
261   if (this->unw_underlying_addr_space != unw_local_addr_space) {
262     if (this->unw_underlying_addr_space)
263       unw_destroy_addr_space(this->unw_underlying_addr_space);
264     if (this->unw_underlying_context)
265       _UPT_destroy(this->unw_underlying_context);
266   }
267
268   unw_destroy_addr_space(this->unw_addr_space);
269 }
270
271 /** Refresh the information about the process
272  *
273  *  Do not use directly, this is used by the getters when appropriate
274  *  in order to have fresh data.
275  */
276 void RemoteProcess::refresh_heap()
277 {
278   // Read/dereference/refresh the std_heap pointer:
279   if (not this->heap)
280     this->heap = std::make_unique<s_xbt_mheap_t>();
281   this->read_bytes(this->heap.get(), sizeof(mdesc), remote(this->heap_address));
282   this->cache_flags_ |= RemoteProcess::cache_heap;
283 }
284
285 /** Refresh the information about the process
286  *
287  *  Do not use directly, this is used by the getters when appropriate
288  *  in order to have fresh data.
289  * */
290 void RemoteProcess::refresh_malloc_info()
291 {
292   // Refresh process->heapinfo:
293   if (this->cache_flags_ & RemoteProcess::cache_malloc)
294     return;
295   size_t count = this->heap->heaplimit + 1;
296   if (this->heap_info.size() < count)
297     this->heap_info.resize(count);
298   this->read_bytes(this->heap_info.data(), count * sizeof(malloc_info), remote(this->heap->heapinfo));
299   this->cache_flags_ |= RemoteProcess::cache_malloc;
300 }
301
302 /** @brief Finds the range of the different memory segments and binary paths */
303 void RemoteProcess::init_memory_map_info()
304 {
305   XBT_DEBUG("Get debug information ...");
306   this->maestro_stack_start_ = nullptr;
307   this->maestro_stack_end_   = nullptr;
308   this->object_infos.resize(0);
309   this->binary_info = nullptr;
310
311   std::vector<simgrid::xbt::VmMap> const& maps = this->memory_map_;
312
313   const char* current_name = nullptr;
314
315   this->object_infos.clear();
316
317   for (size_t i = 0; i < maps.size(); i++) {
318     simgrid::xbt::VmMap const& reg = maps[i];
319     const char* pathname           = maps[i].pathname.c_str();
320
321     // Nothing to do
322     if (maps[i].pathname.empty()) {
323       current_name = nullptr;
324       continue;
325     }
326
327     // [stack], [vvar], [vsyscall], [vdso] ...
328     if (pathname[0] == '[') {
329       if ((reg.prot & PROT_WRITE) && not memcmp(pathname, "[stack]", 7)) {
330         this->maestro_stack_start_ = remote(reg.start_addr);
331         this->maestro_stack_end_   = remote(reg.end_addr);
332       }
333       current_name = nullptr;
334       continue;
335     }
336
337     if (current_name && strcmp(current_name, pathname) == 0)
338       continue;
339
340     current_name = pathname;
341     if (not(reg.prot & PROT_READ) && (reg.prot & PROT_EXEC))
342       continue;
343
344     const bool is_executable = not i;
345     std::string libname;
346     if (not is_executable) {
347       libname = get_lib_name(pathname);
348       if (is_filtered_lib(libname)) {
349         continue;
350       }
351     }
352
353     std::shared_ptr<simgrid::mc::ObjectInformation> info =
354         simgrid::mc::createObjectInformation(this->memory_map_, pathname);
355     this->object_infos.push_back(info);
356     if (is_executable)
357       this->binary_info = info;
358   }
359
360   // Resolve time (including across different objects):
361   for (auto const& object_info : this->object_infos)
362     postProcessObjectInformation(this, object_info.get());
363
364   xbt_assert(this->maestro_stack_start_, "Did not find maestro_stack_start");
365   xbt_assert(this->maestro_stack_end_, "Did not find maestro_stack_end");
366
367   XBT_DEBUG("Get debug information done !");
368 }
369
370 std::shared_ptr<simgrid::mc::ObjectInformation> RemoteProcess::find_object_info(RemotePtr<void> addr) const
371 {
372   for (auto const& object_info : this->object_infos)
373     if (addr.address() >= (std::uint64_t)object_info->start && addr.address() <= (std::uint64_t)object_info->end)
374       return object_info;
375   return nullptr;
376 }
377
378 std::shared_ptr<ObjectInformation> RemoteProcess::find_object_info_exec(RemotePtr<void> addr) const
379 {
380   for (std::shared_ptr<ObjectInformation> const& info : this->object_infos)
381     if (addr.address() >= (std::uint64_t)info->start_exec && addr.address() <= (std::uint64_t)info->end_exec)
382       return info;
383   return nullptr;
384 }
385
386 std::shared_ptr<ObjectInformation> RemoteProcess::find_object_info_rw(RemotePtr<void> addr) const
387 {
388   for (std::shared_ptr<ObjectInformation> const& info : this->object_infos)
389     if (addr.address() >= (std::uint64_t)info->start_rw && addr.address() <= (std::uint64_t)info->end_rw)
390       return info;
391   return nullptr;
392 }
393
394 simgrid::mc::Frame* RemoteProcess::find_function(RemotePtr<void> ip) const
395 {
396   std::shared_ptr<simgrid::mc::ObjectInformation> info = this->find_object_info_exec(ip);
397   return info ? info->find_function((void*)ip.address()) : nullptr;
398 }
399
400 /** Find (one occurrence of) the named variable definition
401  */
402 const simgrid::mc::Variable* RemoteProcess::find_variable(const char* name) const
403 {
404   // First lookup the variable in the executable shared object.
405   // A global variable used directly by the executable code from a library
406   // is reinstantiated in the executable memory .data/.bss.
407   // We need to look up the variable in the executable first.
408   if (this->binary_info) {
409     std::shared_ptr<simgrid::mc::ObjectInformation> const& info = this->binary_info;
410     const simgrid::mc::Variable* var                            = info->find_variable(name);
411     if (var)
412       return var;
413   }
414
415   for (std::shared_ptr<simgrid::mc::ObjectInformation> const& info : this->object_infos) {
416     const simgrid::mc::Variable* var = info->find_variable(name);
417     if (var)
418       return var;
419   }
420
421   return nullptr;
422 }
423
424 void RemoteProcess::read_variable(const char* name, void* target, size_t size) const
425 {
426   const simgrid::mc::Variable* var = this->find_variable(name);
427   xbt_assert(var, "Variable %s not found", name);
428   xbt_assert(var->address, "No simple location for this variable");
429   xbt_assert(var->type->full_type, "Partial type for %s, cannot check size", name);
430   xbt_assert((size_t)var->type->full_type->byte_size == size, "Unexpected size for %s (expected %zu, was %zu)", name,
431              size, (size_t)var->type->full_type->byte_size);
432   this->read_bytes(target, size, remote(var->address));
433 }
434
435 std::string RemoteProcess::read_string(RemotePtr<char> address) const
436 {
437   if (not address)
438     return {};
439
440   std::vector<char> res(128);
441   off_t off = 0;
442
443   while (true) {
444     ssize_t c = pread(this->memory_file, res.data() + off, res.size() - off, (off_t)address.address() + off);
445     if (c == -1 && errno == EINTR)
446       continue;
447     xbt_assert(c > 0, "Could not read string from remote process");
448
449     const void* p = memchr(res.data() + off, '\0', c);
450     if (p)
451       return std::string(res.data());
452
453     off += c;
454     if (off == (off_t)res.size())
455       res.resize(res.size() * 2);
456   }
457 }
458
459 void* RemoteProcess::read_bytes(void* buffer, std::size_t size, RemotePtr<void> address, ReadOptions /*options*/) const
460 {
461   if (pread_whole(this->memory_file, buffer, size, (size_t)address.address()) < 0)
462     xbt_die("Read at %p from process %lli failed", (void*)address.address(), (long long)this->pid_);
463   return buffer;
464 }
465
466 /** Write data to a process memory
467  *
468  *  @param buffer   local memory address (source)
469  *  @param len      data size
470  *  @param address  target process memory address (target)
471  */
472 void RemoteProcess::write_bytes(const void* buffer, size_t len, RemotePtr<void> address) const
473 {
474   if (pwrite_whole(this->memory_file, buffer, len, (size_t)address.address()) < 0)
475     xbt_die("Write to process %lli failed", (long long)this->pid_);
476 }
477
478 void RemoteProcess::clear_bytes(RemotePtr<void> address, size_t len) const
479 {
480   pthread_once(&zero_buffer_flag, zero_buffer_init);
481   while (len) {
482     size_t s = len > zero_buffer_size ? zero_buffer_size : len;
483     this->write_bytes(zero_buffer, s, address);
484     address = remote((char*)address.address() + s);
485     len -= s;
486   }
487 }
488
489 void RemoteProcess::ignore_region(std::uint64_t addr, std::size_t size)
490 {
491   IgnoredRegion region;
492   region.addr = addr;
493   region.size = size;
494
495   auto pos = std::lower_bound(ignored_regions_.begin(), ignored_regions_.end(), region,
496                               [](auto const& reg1, auto const& reg2) {
497                                 return reg1.addr < reg2.addr || (reg1.addr == reg2.addr && reg1.size < reg2.size);
498                               });
499   if (pos == ignored_regions_.end() || pos->addr != addr || pos->size != size)
500     ignored_regions_.insert(pos, region);
501 }
502
503 void RemoteProcess::ignore_heap(IgnoredHeapRegion const& region)
504 {
505   // Binary search the position of insertion:
506   auto pos = std::lower_bound(ignored_heap_.begin(), ignored_heap_.end(), region.address,
507                               [](auto const& reg, auto const* addr) { return reg.address < addr; });
508   if (pos == ignored_heap_.end() || pos->address != region.address) {
509     // Insert it:
510     ignored_heap_.insert(pos, region);
511   }
512 }
513
514 void RemoteProcess::unignore_heap(void* address, size_t size)
515 {
516   // Binary search:
517   auto pos = std::lower_bound(ignored_heap_.begin(), ignored_heap_.end(), address,
518                               [](auto const& reg, auto const* addr) { return reg.address < addr; });
519   if (pos != ignored_heap_.end() && static_cast<char*>(pos->address) <= static_cast<char*>(address) + size)
520     ignored_heap_.erase(pos);
521 }
522
523 void RemoteProcess::ignore_local_variable(const char* var_name, const char* frame_name) const
524 {
525   if (frame_name != nullptr && strcmp(frame_name, "*") == 0)
526     frame_name = nullptr;
527   for (std::shared_ptr<simgrid::mc::ObjectInformation> const& info : this->object_infos)
528     info->remove_local_variable(var_name, frame_name);
529 }
530
531 std::vector<simgrid::mc::ActorInformation>& RemoteProcess::actors()
532 {
533   this->refresh_simix();
534   return smx_actors_infos;
535 }
536
537 std::vector<simgrid::mc::ActorInformation>& RemoteProcess::dead_actors()
538 {
539   this->refresh_simix();
540   return smx_dead_actors_infos;
541 }
542
543 void RemoteProcess::dump_stack() const
544 {
545   unw_addr_space_t as = unw_create_addr_space(&_UPT_accessors, BYTE_ORDER);
546   if (as == nullptr) {
547     XBT_ERROR("Could not initialize ptrace address space");
548     return;
549   }
550
551   void* context = _UPT_create(this->pid_);
552   if (context == nullptr) {
553     unw_destroy_addr_space(as);
554     XBT_ERROR("Could not initialize ptrace context");
555     return;
556   }
557
558   unw_cursor_t cursor;
559   if (unw_init_remote(&cursor, as, context) != 0) {
560     _UPT_destroy(context);
561     unw_destroy_addr_space(as);
562     XBT_ERROR("Could not initialiez ptrace cursor");
563     return;
564   }
565
566   simgrid::mc::dumpStack(stderr, &cursor);
567
568   _UPT_destroy(context);
569   unw_destroy_addr_space(as);
570 }
571 } // namespace mc
572 } // namespace simgrid