Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
help latest fedora with mc
[simgrid.git] / src / mc / remote / RemoteProcess.cpp
1 /* Copyright (c) 2014-2021. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #define _FILE_OFFSET_BITS 64 /* needed for pread_whole to work as expected on 32bits */
7
8 #include "src/mc/remote/RemoteProcess.hpp"
9
10 #include "src/mc/sosp/Snapshot.hpp"
11 #include "xbt/file.hpp"
12 #include "xbt/log.h"
13
14 #include <fcntl.h>
15 #include <libunwind-ptrace.h>
16 #include <sys/mman.h> // PROT_*
17
18 #include <algorithm>
19 #include <cerrno>
20 #include <cstring>
21 #include <memory>
22 #include <mutex>
23 #include <string>
24
25 using simgrid::mc::remote;
26
27 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_process, mc, "MC process information");
28
29 namespace simgrid {
30 namespace mc {
31
32 // ***** Helper stuff
33
34 // List of library which memory segments are not considered:
35 static const std::vector<std::string> filtered_libraries = {
36 #ifdef __linux__
37     "ld",
38     "ld-linux-x86",
39 #elif defined __FreeBSD__
40     "ld-elf",
41     "ld-elf32",
42     "libkvm",      /* kernel data access library */
43     "libprocstat", /* process and file information retrieval */
44     "libthr",      /* thread library */
45     "libutil",
46 #endif
47     "libargp", /* workarounds for glibc-less systems */
48     "libasan", /* gcc sanitizers */
49     "libasn1",
50     "libboost_chrono",
51     "libboost_context",
52     "libboost_context-mt",
53     "libboost_stacktrace_addr2line",
54     "libboost_stacktrace_backtrace",
55     "libboost_system",
56     "libboost_thread",
57     "libboost_timer",
58     "libbrotlicommon",
59     "libbrotlidec",
60     "libbz2",
61     "libc",
62     "libc++",
63     "libcdt",
64     "libcgraph",
65     "libcom_err",
66     "libcrypt",
67     "libcrypto",
68     "libcurl",
69     "libcurl-gnutls",
70     "libcxxrt",
71     "libdebuginfod",
72     "libdl",
73     "libdw",
74     "libelf",
75     "libevent",
76     "libexecinfo",
77     "libffi",
78     "libflang",
79     "libflangrti",
80     "libgcc_s",
81     "libgmp",
82     "libgnutls",
83     "libgcrypt",
84     "libgfortran",
85     "libgpg-error",
86     "libgssapi",
87     "libgssapi_krb5",
88     "libhcrypto",
89     "libheimbase",
90     "libheimntlm",
91     "libhx509",
92     "libhogweed",
93     "libidn2",
94     "libimf",
95     "libintlc",
96     "libirng",
97     "libk5crypto",
98     "libkeyutils",
99     "libkrb5",
100     "libkrb5support", /*odd behaviour on fedora rawhide ... remove these when fixed*/
101     "liblber",
102     "libldap",
103     "libldap_r",
104     "liblua5.1",
105     "liblua5.3",
106     "liblzma",
107     "libm",
108     "libmd",
109     "libnettle",
110     "libnghttp2",
111     "libomp",
112     "libp11-kit",
113     "libpapi",
114     "libpcre2",
115     "libpfm",
116     "libpgmath",
117     "libpsl",
118     "libpthread",
119     "libquadmath",
120     "libresolv",
121     "libroken",
122     "librt",
123     "librtmp",
124     "libsasl2",
125     "libselinux",
126     "libsqlite3",
127     "libssh",
128     "libssh2",
129     "libssl",
130     "libstdc++",
131     "libsvml",
132     "libtasn1",
133     "libtsan",  /* gcc sanitizers */
134     "libubsan", /* gcc sanitizers */
135     "libunistring",
136     "libunwind",
137     "libunwind-ptrace",
138     "libunwind-x86",
139     "libunwind-x86_64",
140     "libwind",
141     "libz",
142     "libzstd"};
143
144 static bool is_filtered_lib(const std::string& libname)
145 {
146   return std::find(begin(filtered_libraries), end(filtered_libraries), libname) != end(filtered_libraries);
147 }
148
149 static std::string get_lib_name(const std::string& pathname)
150 {
151   std::string map_basename = simgrid::xbt::Path(pathname).get_base_name();
152   std::string libname;
153
154   size_t pos = map_basename.rfind(".so");
155   if (pos != std::string::npos) {
156     // strip the extension (matching regex "\.so.*$")
157     libname.assign(map_basename, 0, pos);
158
159     // strip the version suffix (matching regex "-[.0-9-]*$")
160     while (true) {
161       pos = libname.rfind('-');
162       if (pos == std::string::npos || libname.find_first_not_of(".0123456789", pos + 1) != std::string::npos)
163         break;
164       libname.erase(pos);
165     }
166   }
167
168   return libname;
169 }
170
171 static ssize_t pread_whole(int fd, void* buf, size_t count, off_t offset)
172 {
173   auto* buffer       = static_cast<char*>(buf);
174   ssize_t real_count = count;
175   while (count) {
176     ssize_t res = pread(fd, buffer, count, offset);
177     if (res > 0) {
178       count -= res;
179       buffer += res;
180       offset += res;
181     } else if (res == 0)
182       return -1;
183     else if (errno != EINTR) {
184       XBT_ERROR("pread_whole: %s", strerror(errno));
185       return -1;
186     }
187   }
188   return real_count;
189 }
190
191 static ssize_t pwrite_whole(int fd, const void* buf, size_t count, off_t offset)
192 {
193   const auto* buffer = static_cast<const char*>(buf);
194   ssize_t real_count = count;
195   while (count) {
196     ssize_t res = pwrite(fd, buffer, count, offset);
197     if (res > 0) {
198       count -= res;
199       buffer += res;
200       offset += res;
201     } else if (res == 0)
202       return -1;
203     else if (errno != EINTR) {
204       XBT_ERROR("pwrite_whole: %s", strerror(errno));
205       return -1;
206     }
207   }
208   return real_count;
209 }
210
211 int open_vm(pid_t pid, int flags)
212 {
213   std::string buffer = "/proc/" + std::to_string(pid) + "/mem";
214   return open(buffer.c_str(), flags);
215 }
216
217 // ***** RemoteProcess
218
219 RemoteProcess::RemoteProcess(pid_t pid) : AddressSpace(this), pid_(pid), running_(true) {}
220
221 void RemoteProcess::init(xbt_mheap_t mmalloc_default_mdp, unsigned long* maxpid, xbt_dynar_t actors,
222                          xbt_dynar_t dead_actors)
223 {
224   this->heap_address      = remote(mmalloc_default_mdp);
225   this->maxpid_addr_      = remote(maxpid);
226   this->actors_addr_      = remote(actors);
227   this->dead_actors_addr_ = remote(dead_actors);
228
229   this->memory_map_ = simgrid::xbt::get_memory_map(this->pid_);
230   this->init_memory_map_info();
231
232   int fd = open_vm(this->pid_, O_RDWR);
233   xbt_assert(fd >= 0, "Could not open file for process virtual address space");
234   this->memory_file = fd;
235
236   this->smx_actors_infos.clear();
237   this->smx_dead_actors_infos.clear();
238   this->unw_addr_space            = simgrid::mc::UnwindContext::createUnwindAddressSpace();
239   this->unw_underlying_addr_space = simgrid::unw::create_addr_space();
240   this->unw_underlying_context    = simgrid::unw::create_context(this->unw_underlying_addr_space, this->pid_);
241 }
242
243 RemoteProcess::~RemoteProcess()
244 {
245   if (this->memory_file >= 0)
246     close(this->memory_file);
247
248   if (this->unw_underlying_addr_space != unw_local_addr_space) {
249     if (this->unw_underlying_addr_space)
250       unw_destroy_addr_space(this->unw_underlying_addr_space);
251     if (this->unw_underlying_context)
252       _UPT_destroy(this->unw_underlying_context);
253   }
254
255   unw_destroy_addr_space(this->unw_addr_space);
256 }
257
258 /** Refresh the information about the process
259  *
260  *  Do not use directly, this is used by the getters when appropriate
261  *  in order to have fresh data.
262  */
263 void RemoteProcess::refresh_heap()
264 {
265   // Read/dereference/refresh the std_heap pointer:
266   if (not this->heap)
267     this->heap = std::make_unique<s_xbt_mheap_t>();
268   this->read(this->heap.get(), this->heap_address);
269   this->cache_flags_ |= RemoteProcess::cache_heap;
270 }
271
272 /** Refresh the information about the process
273  *
274  *  Do not use directly, this is used by the getters when appropriate
275  *  in order to have fresh data.
276  * */
277 void RemoteProcess::refresh_malloc_info()
278 {
279   // Refresh process->heapinfo:
280   if (this->cache_flags_ & RemoteProcess::cache_malloc)
281     return;
282   size_t count = this->heap->heaplimit + 1;
283   if (this->heap_info.size() < count)
284     this->heap_info.resize(count);
285   this->read_bytes(this->heap_info.data(), count * sizeof(malloc_info), remote(this->heap->heapinfo));
286   this->cache_flags_ |= RemoteProcess::cache_malloc;
287 }
288
289 /** @brief Finds the range of the different memory segments and binary paths */
290 void RemoteProcess::init_memory_map_info()
291 {
292   XBT_DEBUG("Get debug information ...");
293   this->maestro_stack_start_ = nullptr;
294   this->maestro_stack_end_   = nullptr;
295   this->object_infos.clear();
296   this->binary_info = nullptr;
297
298   std::vector<simgrid::xbt::VmMap> const& maps = this->memory_map_;
299
300   const char* current_name = nullptr;
301
302   for (size_t i = 0; i < maps.size(); i++) {
303     simgrid::xbt::VmMap const& reg = maps[i];
304     const char* pathname           = maps[i].pathname.c_str();
305
306     // Nothing to do
307     if (maps[i].pathname.empty()) {
308       current_name = nullptr;
309       continue;
310     }
311
312     // [stack], [vvar], [vsyscall], [vdso] ...
313     if (pathname[0] == '[') {
314       if ((reg.prot & PROT_WRITE) && not memcmp(pathname, "[stack]", 7)) {
315         this->maestro_stack_start_ = remote(reg.start_addr);
316         this->maestro_stack_end_   = remote(reg.end_addr);
317       }
318       current_name = nullptr;
319       continue;
320     }
321
322     if (current_name && strcmp(current_name, pathname) == 0)
323       continue;
324
325     current_name = pathname;
326     if (not(reg.prot & PROT_READ) && (reg.prot & PROT_EXEC))
327       continue;
328
329     const bool is_executable = not i;
330     std::string libname;
331     if (not is_executable) {
332       libname = get_lib_name(pathname);
333       if (is_filtered_lib(libname)) {
334         continue;
335       }
336     }
337
338     std::shared_ptr<simgrid::mc::ObjectInformation> info =
339         simgrid::mc::createObjectInformation(this->memory_map_, pathname);
340     this->object_infos.push_back(info);
341     if (is_executable)
342       this->binary_info = info;
343   }
344
345   xbt_assert(this->maestro_stack_start_, "Did not find maestro_stack_start");
346   xbt_assert(this->maestro_stack_end_, "Did not find maestro_stack_end");
347
348   XBT_DEBUG("Get debug information done !");
349 }
350
351 std::shared_ptr<simgrid::mc::ObjectInformation> RemoteProcess::find_object_info(RemotePtr<void> addr) const
352 {
353   for (auto const& object_info : this->object_infos)
354     if (addr.address() >= (std::uint64_t)object_info->start && addr.address() <= (std::uint64_t)object_info->end)
355       return object_info;
356   return nullptr;
357 }
358
359 std::shared_ptr<ObjectInformation> RemoteProcess::find_object_info_exec(RemotePtr<void> addr) const
360 {
361   for (std::shared_ptr<ObjectInformation> const& info : this->object_infos)
362     if (addr.address() >= (std::uint64_t)info->start_exec && addr.address() <= (std::uint64_t)info->end_exec)
363       return info;
364   return nullptr;
365 }
366
367 std::shared_ptr<ObjectInformation> RemoteProcess::find_object_info_rw(RemotePtr<void> addr) const
368 {
369   for (std::shared_ptr<ObjectInformation> const& info : this->object_infos)
370     if (addr.address() >= (std::uint64_t)info->start_rw && addr.address() <= (std::uint64_t)info->end_rw)
371       return info;
372   return nullptr;
373 }
374
375 simgrid::mc::Frame* RemoteProcess::find_function(RemotePtr<void> ip) const
376 {
377   std::shared_ptr<simgrid::mc::ObjectInformation> info = this->find_object_info_exec(ip);
378   return info ? info->find_function((void*)ip.address()) : nullptr;
379 }
380
381 /** Find (one occurrence of) the named variable definition
382  */
383 const simgrid::mc::Variable* RemoteProcess::find_variable(const char* name) const
384 {
385   // First lookup the variable in the executable shared object.
386   // A global variable used directly by the executable code from a library
387   // is reinstantiated in the executable memory .data/.bss.
388   // We need to look up the variable in the executable first.
389   if (this->binary_info) {
390     std::shared_ptr<simgrid::mc::ObjectInformation> const& info = this->binary_info;
391     const simgrid::mc::Variable* var                            = info->find_variable(name);
392     if (var)
393       return var;
394   }
395
396   for (std::shared_ptr<simgrid::mc::ObjectInformation> const& info : this->object_infos) {
397     const simgrid::mc::Variable* var = info->find_variable(name);
398     if (var)
399       return var;
400   }
401
402   return nullptr;
403 }
404
405 void RemoteProcess::read_variable(const char* name, void* target, size_t size) const
406 {
407   const simgrid::mc::Variable* var = this->find_variable(name);
408   xbt_assert(var, "Variable %s not found", name);
409   xbt_assert(var->address, "No simple location for this variable");
410
411   if (not var->type->full_type) // Try to resolve this type. The needed ObjectInfo was maybe (lazily) loaded recently
412     for (auto const& object_info : this->object_infos)
413       postProcessObjectInformation(this, object_info.get());
414   xbt_assert(var->type->full_type, "Partial type for %s (even after re-resolving types), cannot retrieve its size.",
415              name);
416   xbt_assert((size_t)var->type->full_type->byte_size == size, "Unexpected size for %s (expected %zu, received %zu).",
417              name, size, (size_t)var->type->full_type->byte_size);
418   this->read_bytes(target, size, remote(var->address));
419 }
420
421 std::string RemoteProcess::read_string(RemotePtr<char> address) const
422 {
423   if (not address)
424     return {};
425
426   std::vector<char> res(128);
427   off_t off = 0;
428
429   while (true) {
430     ssize_t c = pread(this->memory_file, res.data() + off, res.size() - off, (off_t)address.address() + off);
431     if (c == -1 && errno == EINTR)
432       continue;
433     xbt_assert(c > 0, "Could not read string from remote process");
434
435     const void* p = memchr(res.data() + off, '\0', c);
436     if (p)
437       return std::string(res.data());
438
439     off += c;
440     if (off == (off_t)res.size())
441       res.resize(res.size() * 2);
442   }
443 }
444
445 void* RemoteProcess::read_bytes(void* buffer, std::size_t size, RemotePtr<void> address, ReadOptions /*options*/) const
446 {
447   xbt_assert(pread_whole(this->memory_file, buffer, size, (size_t)address.address()) != -1,
448              "Read at %p from process %lli failed", (void*)address.address(), (long long)this->pid_);
449   return buffer;
450 }
451
452 /** Write data to a process memory
453  *
454  *  @param buffer   local memory address (source)
455  *  @param len      data size
456  *  @param address  target process memory address (target)
457  */
458 void RemoteProcess::write_bytes(const void* buffer, size_t len, RemotePtr<void> address) const
459 {
460   xbt_assert(pwrite_whole(this->memory_file, buffer, len, (size_t)address.address()) != -1,
461              "Write to process %lli failed", (long long)this->pid_);
462 }
463
464 static void zero_buffer_init(const void** zero_buffer, size_t zero_buffer_size)
465 {
466   int fd = open("/dev/zero", O_RDONLY);
467   xbt_assert(fd >= 0, "Could not open /dev/zero");
468   *zero_buffer = mmap(nullptr, zero_buffer_size, PROT_READ, MAP_SHARED, fd, 0);
469   xbt_assert(*zero_buffer != MAP_FAILED, "Could not map the zero buffer");
470   close(fd);
471 }
472
473 void RemoteProcess::clear_bytes(RemotePtr<void> address, size_t len) const
474 {
475   static constexpr size_t zero_buffer_size = 10 * 4096;
476   static const void* zero_buffer;
477   static std::once_flag zero_buffer_flag;
478
479   std::call_once(zero_buffer_flag, zero_buffer_init, &zero_buffer, zero_buffer_size);
480   while (len) {
481     size_t s = len > zero_buffer_size ? zero_buffer_size : len;
482     this->write_bytes(zero_buffer, s, address);
483     address = remote((char*)address.address() + s);
484     len -= s;
485   }
486 }
487
488 void RemoteProcess::ignore_region(std::uint64_t addr, std::size_t size)
489 {
490   IgnoredRegion region;
491   region.addr = addr;
492   region.size = size;
493
494   auto pos = std::lower_bound(ignored_regions_.begin(), ignored_regions_.end(), region,
495                               [](auto const& reg1, auto const& reg2) {
496                                 return reg1.addr < reg2.addr || (reg1.addr == reg2.addr && reg1.size < reg2.size);
497                               });
498   if (pos == ignored_regions_.end() || pos->addr != addr || pos->size != size)
499     ignored_regions_.insert(pos, region);
500 }
501
502 void RemoteProcess::ignore_heap(IgnoredHeapRegion const& region)
503 {
504   // Binary search the position of insertion:
505   auto pos = std::lower_bound(ignored_heap_.begin(), ignored_heap_.end(), region.address,
506                               [](auto const& reg, auto const* addr) { return reg.address < addr; });
507   if (pos == ignored_heap_.end() || pos->address != region.address) {
508     // Insert it:
509     ignored_heap_.insert(pos, region);
510   }
511 }
512
513 void RemoteProcess::unignore_heap(void* address, size_t size)
514 {
515   // Binary search:
516   auto pos = std::lower_bound(ignored_heap_.begin(), ignored_heap_.end(), address,
517                               [](auto const& reg, auto const* addr) { return reg.address < addr; });
518   if (pos != ignored_heap_.end() && static_cast<char*>(pos->address) <= static_cast<char*>(address) + size)
519     ignored_heap_.erase(pos);
520 }
521
522 void RemoteProcess::ignore_local_variable(const char* var_name, const char* frame_name) const
523 {
524   if (frame_name != nullptr && strcmp(frame_name, "*") == 0)
525     frame_name = nullptr;
526   for (std::shared_ptr<simgrid::mc::ObjectInformation> const& info : this->object_infos)
527     info->remove_local_variable(var_name, frame_name);
528 }
529
530 std::vector<simgrid::mc::ActorInformation>& RemoteProcess::actors()
531 {
532   this->refresh_simix();
533   return smx_actors_infos;
534 }
535
536 std::vector<simgrid::mc::ActorInformation>& RemoteProcess::dead_actors()
537 {
538   this->refresh_simix();
539   return smx_dead_actors_infos;
540 }
541
542 void RemoteProcess::dump_stack() const
543 {
544   unw_addr_space_t as = unw_create_addr_space(&_UPT_accessors, BYTE_ORDER);
545   if (as == nullptr) {
546     XBT_ERROR("Could not initialize ptrace address space");
547     return;
548   }
549
550   void* context = _UPT_create(this->pid_);
551   if (context == nullptr) {
552     unw_destroy_addr_space(as);
553     XBT_ERROR("Could not initialize ptrace context");
554     return;
555   }
556
557   unw_cursor_t cursor;
558   if (unw_init_remote(&cursor, as, context) != 0) {
559     _UPT_destroy(context);
560     unw_destroy_addr_space(as);
561     XBT_ERROR("Could not initialiez ptrace cursor");
562     return;
563   }
564
565   simgrid::mc::dumpStack(stderr, &cursor);
566
567   _UPT_destroy(context);
568   unw_destroy_addr_space(as);
569 }
570 } // namespace mc
571 } // namespace simgrid