Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Handle simcall result through mc::SimcallObserver.
[simgrid.git] / src / mc / remote / RemoteProcess.cpp
1 /* Copyright (c) 2014-2021. The SimGrid Team. All rights reserved.          */
2
3 /* This program is free software; you can redistribute it and/or modify it
4  * under the terms of the license (GNU LGPL) which comes with this package. */
5
6 #define _FILE_OFFSET_BITS 64 /* needed for pread_whole to work as expected on 32bits */
7
8 #include "src/mc/remote/RemoteProcess.hpp"
9
10 #include "src/mc/sosp/Snapshot.hpp"
11 #include "xbt/file.hpp"
12 #include "xbt/log.h"
13
14 #include <fcntl.h>
15 #include <libunwind-ptrace.h>
16 #include <sys/mman.h> // PROT_*
17
18 #include <algorithm>
19 #include <cerrno>
20 #include <cstring>
21 #include <memory>
22 #include <string>
23
24 using simgrid::mc::remote;
25
26 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_process, mc, "MC process information");
27
28 namespace simgrid {
29 namespace mc {
30
31 // ***** Helper stuff
32
33 // List of library which memory segments are not considered:
34 static const std::vector<std::string> filtered_libraries = {
35 #ifdef __linux__
36     "ld",
37 #elif defined __FreeBSD__
38     "ld-elf",
39     "ld-elf32",
40     "libkvm",      /* kernel data access library */
41     "libprocstat", /* process and file information retrieval */
42     "libthr",      /* thread library */
43     "libutil",
44 #endif
45     "libargp", /* workarounds for glibc-less systems */
46     "libasan", /* gcc sanitizers */
47     "libasn1",
48     "libboost_chrono",
49     "libboost_context",
50     "libboost_context-mt",
51     "libboost_stacktrace_addr2line",
52     "libboost_stacktrace_backtrace",
53     "libboost_system",
54     "libboost_thread",
55     "libboost_timer",
56     "libbrotlicommon",
57     "libbrotlidec",
58     "libbz2",
59     "libc",
60     "libc++",
61     "libcdt",
62     "libcgraph",
63     "libcom_err",
64     "libcrypt",
65     "libcrypto",
66     "libcurl",
67     "libcurl-gnutls",
68     "libcxxrt",
69     "libdebuginfod",
70     "libdl",
71     "libdw",
72     "libelf",
73     "libevent",
74     "libexecinfo",
75     "libffi",
76     "libflang",
77     "libflangrti",
78     "libgcc_s",
79     "libgmp",
80     "libgnutls",
81     "libgcrypt",
82     "libgfortran",
83     "libgpg-error",
84     "libgssapi",
85     "libgssapi_krb5",
86     "libhcrypto",
87     "libheimbase",
88     "libheimntlm",
89     "libhx509",
90     "libhogweed",
91     "libidn2",
92     "libimf",
93     "libintlc",
94     "libirng",
95     "libk5crypto",
96     "libkeyutils",
97     "libkrb5",
98     "libkrb5support", /*odd behaviour on fedora rawhide ... remove these when fixed*/
99     "liblber",
100     "libldap",
101     "libldap_r",
102     "liblua5.1",
103     "liblua5.3",
104     "liblzma",
105     "libm",
106     "libmd",
107     "libnettle",
108     "libnghttp2",
109     "libomp",
110     "libp11-kit",
111     "libpapi",
112     "libpcre2",
113     "libpfm",
114     "libpgmath",
115     "libpsl",
116     "libpthread",
117     "libquadmath",
118     "libresolv",
119     "libroken",
120     "librt",
121     "librtmp",
122     "libsasl2",
123     "libselinux",
124     "libsqlite3",
125     "libssh",
126     "libssh2",
127     "libssl",
128     "libstdc++",
129     "libsvml",
130     "libtasn1",
131     "libtsan",  /* gcc sanitizers */
132     "libubsan", /* gcc sanitizers */
133     "libunistring",
134     "libunwind",
135     "libunwind-ptrace",
136     "libunwind-x86",
137     "libunwind-x86_64",
138     "libwind",
139     "libz",
140     "libzstd"};
141
142 static bool is_filtered_lib(const std::string& libname)
143 {
144   return std::find(begin(filtered_libraries), end(filtered_libraries), libname) != end(filtered_libraries);
145 }
146
147 static std::string get_lib_name(const std::string& pathname)
148 {
149   std::string map_basename = simgrid::xbt::Path(pathname).get_base_name();
150   std::string libname;
151
152   size_t pos = map_basename.rfind(".so");
153   if (pos != std::string::npos) {
154     // strip the extension (matching regex "\.so.*$")
155     libname.assign(map_basename, 0, pos);
156
157     // strip the version suffix (matching regex "-[.0-9-]*$")
158     while (true) {
159       pos = libname.rfind('-');
160       if (pos == std::string::npos || libname.find_first_not_of(".0123456789", pos + 1) != std::string::npos)
161         break;
162       libname.erase(pos);
163     }
164   }
165
166   return libname;
167 }
168
169 static ssize_t pread_whole(int fd, void* buf, size_t count, off_t offset)
170 {
171   auto* buffer       = static_cast<char*>(buf);
172   ssize_t real_count = count;
173   while (count) {
174     ssize_t res = pread(fd, buffer, count, offset);
175     if (res > 0) {
176       count -= res;
177       buffer += res;
178       offset += res;
179     } else if (res == 0)
180       return -1;
181     else if (errno != EINTR) {
182       XBT_ERROR("pread_whole: %s", strerror(errno));
183       return -1;
184     }
185   }
186   return real_count;
187 }
188
189 static ssize_t pwrite_whole(int fd, const void* buf, size_t count, off_t offset)
190 {
191   const auto* buffer = static_cast<const char*>(buf);
192   ssize_t real_count = count;
193   while (count) {
194     ssize_t res = pwrite(fd, buffer, count, offset);
195     if (res > 0) {
196       count -= res;
197       buffer += res;
198       offset += res;
199     } else if (res == 0)
200       return -1;
201     else if (errno != EINTR) {
202       XBT_ERROR("pwrite_whole: %s", strerror(errno));
203       return -1;
204     }
205   }
206   return real_count;
207 }
208
209 static pthread_once_t zero_buffer_flag = PTHREAD_ONCE_INIT;
210 static const void* zero_buffer;
211 static const size_t zero_buffer_size = 10 * 4096;
212
213 static void zero_buffer_init()
214 {
215   int fd = open("/dev/zero", O_RDONLY);
216   if (fd < 0)
217     xbt_die("Could not open /dev/zero");
218   zero_buffer = mmap(nullptr, zero_buffer_size, PROT_READ, MAP_SHARED, fd, 0);
219   if (zero_buffer == MAP_FAILED)
220     xbt_die("Could not map the zero buffer");
221   close(fd);
222 }
223
224 int open_vm(pid_t pid, int flags)
225 {
226   std::string buffer = "/proc/" + std::to_string(pid) + "/mem";
227   return open(buffer.c_str(), flags);
228 }
229
230 // ***** RemoteProcess
231
232 RemoteProcess::RemoteProcess(pid_t pid) : AddressSpace(this), pid_(pid), running_(true) {}
233
234 void RemoteProcess::init(void* mmalloc_default_mdp, void* maxpid, void* actors, void* dead_actors)
235 {
236   this->heap_address      = mmalloc_default_mdp;
237   this->maxpid_addr_      = maxpid;
238   this->actors_addr_      = actors;
239   this->dead_actors_addr_ = dead_actors;
240
241   this->memory_map_ = simgrid::xbt::get_memory_map(this->pid_);
242   this->init_memory_map_info();
243
244   int fd = open_vm(this->pid_, O_RDWR);
245   xbt_assert(fd >= 0, "Could not open file for process virtual address space");
246   this->memory_file = fd;
247
248   this->smx_actors_infos.clear();
249   this->smx_dead_actors_infos.clear();
250   this->unw_addr_space            = simgrid::mc::UnwindContext::createUnwindAddressSpace();
251   this->unw_underlying_addr_space = simgrid::unw::create_addr_space();
252   this->unw_underlying_context    = simgrid::unw::create_context(this->unw_underlying_addr_space, this->pid_);
253 }
254
255 RemoteProcess::~RemoteProcess()
256 {
257   if (this->memory_file >= 0)
258     close(this->memory_file);
259
260   if (this->unw_underlying_addr_space != unw_local_addr_space) {
261     if (this->unw_underlying_addr_space)
262       unw_destroy_addr_space(this->unw_underlying_addr_space);
263     if (this->unw_underlying_context)
264       _UPT_destroy(this->unw_underlying_context);
265   }
266
267   unw_destroy_addr_space(this->unw_addr_space);
268 }
269
270 /** Refresh the information about the process
271  *
272  *  Do not use directly, this is used by the getters when appropriate
273  *  in order to have fresh data.
274  */
275 void RemoteProcess::refresh_heap()
276 {
277   // Read/dereference/refresh the std_heap pointer:
278   if (not this->heap)
279     this->heap = std::make_unique<s_xbt_mheap_t>();
280   this->read_bytes(this->heap.get(), sizeof(mdesc), remote(this->heap_address));
281   this->cache_flags_ |= RemoteProcess::cache_heap;
282 }
283
284 /** Refresh the information about the process
285  *
286  *  Do not use directly, this is used by the getters when appropriate
287  *  in order to have fresh data.
288  * */
289 void RemoteProcess::refresh_malloc_info()
290 {
291   // Refresh process->heapinfo:
292   if (this->cache_flags_ & RemoteProcess::cache_malloc)
293     return;
294   size_t count = this->heap->heaplimit + 1;
295   if (this->heap_info.size() < count)
296     this->heap_info.resize(count);
297   this->read_bytes(this->heap_info.data(), count * sizeof(malloc_info), remote(this->heap->heapinfo));
298   this->cache_flags_ |= RemoteProcess::cache_malloc;
299 }
300
301 /** @brief Finds the range of the different memory segments and binary paths */
302 void RemoteProcess::init_memory_map_info()
303 {
304   XBT_DEBUG("Get debug information ...");
305   this->maestro_stack_start_ = nullptr;
306   this->maestro_stack_end_   = nullptr;
307   this->object_infos.resize(0);
308   this->binary_info = nullptr;
309
310   std::vector<simgrid::xbt::VmMap> const& maps = this->memory_map_;
311
312   const char* current_name = nullptr;
313
314   this->object_infos.clear();
315
316   for (size_t i = 0; i < maps.size(); i++) {
317     simgrid::xbt::VmMap const& reg = maps[i];
318     const char* pathname           = maps[i].pathname.c_str();
319
320     // Nothing to do
321     if (maps[i].pathname.empty()) {
322       current_name = nullptr;
323       continue;
324     }
325
326     // [stack], [vvar], [vsyscall], [vdso] ...
327     if (pathname[0] == '[') {
328       if ((reg.prot & PROT_WRITE) && not memcmp(pathname, "[stack]", 7)) {
329         this->maestro_stack_start_ = remote(reg.start_addr);
330         this->maestro_stack_end_   = remote(reg.end_addr);
331       }
332       current_name = nullptr;
333       continue;
334     }
335
336     if (current_name && strcmp(current_name, pathname) == 0)
337       continue;
338
339     current_name = pathname;
340     if (not(reg.prot & PROT_READ) && (reg.prot & PROT_EXEC))
341       continue;
342
343     const bool is_executable = not i;
344     std::string libname;
345     if (not is_executable) {
346       libname = get_lib_name(pathname);
347       if (is_filtered_lib(libname)) {
348         continue;
349       }
350     }
351
352     std::shared_ptr<simgrid::mc::ObjectInformation> info =
353         simgrid::mc::createObjectInformation(this->memory_map_, pathname);
354     this->object_infos.push_back(info);
355     if (is_executable)
356       this->binary_info = info;
357   }
358
359   xbt_assert(this->maestro_stack_start_, "Did not find maestro_stack_start");
360   xbt_assert(this->maestro_stack_end_, "Did not find maestro_stack_end");
361
362   XBT_DEBUG("Get debug information done !");
363 }
364
365 std::shared_ptr<simgrid::mc::ObjectInformation> RemoteProcess::find_object_info(RemotePtr<void> addr) const
366 {
367   for (auto const& object_info : this->object_infos)
368     if (addr.address() >= (std::uint64_t)object_info->start && addr.address() <= (std::uint64_t)object_info->end)
369       return object_info;
370   return nullptr;
371 }
372
373 std::shared_ptr<ObjectInformation> RemoteProcess::find_object_info_exec(RemotePtr<void> addr) const
374 {
375   for (std::shared_ptr<ObjectInformation> const& info : this->object_infos)
376     if (addr.address() >= (std::uint64_t)info->start_exec && addr.address() <= (std::uint64_t)info->end_exec)
377       return info;
378   return nullptr;
379 }
380
381 std::shared_ptr<ObjectInformation> RemoteProcess::find_object_info_rw(RemotePtr<void> addr) const
382 {
383   for (std::shared_ptr<ObjectInformation> const& info : this->object_infos)
384     if (addr.address() >= (std::uint64_t)info->start_rw && addr.address() <= (std::uint64_t)info->end_rw)
385       return info;
386   return nullptr;
387 }
388
389 simgrid::mc::Frame* RemoteProcess::find_function(RemotePtr<void> ip) const
390 {
391   std::shared_ptr<simgrid::mc::ObjectInformation> info = this->find_object_info_exec(ip);
392   return info ? info->find_function((void*)ip.address()) : nullptr;
393 }
394
395 /** Find (one occurrence of) the named variable definition
396  */
397 const simgrid::mc::Variable* RemoteProcess::find_variable(const char* name) const
398 {
399   // First lookup the variable in the executable shared object.
400   // A global variable used directly by the executable code from a library
401   // is reinstantiated in the executable memory .data/.bss.
402   // We need to look up the variable in the executable first.
403   if (this->binary_info) {
404     std::shared_ptr<simgrid::mc::ObjectInformation> const& info = this->binary_info;
405     const simgrid::mc::Variable* var                            = info->find_variable(name);
406     if (var)
407       return var;
408   }
409
410   for (std::shared_ptr<simgrid::mc::ObjectInformation> const& info : this->object_infos) {
411     const simgrid::mc::Variable* var = info->find_variable(name);
412     if (var)
413       return var;
414   }
415
416   return nullptr;
417 }
418
419 void RemoteProcess::read_variable(const char* name, void* target, size_t size) const
420 {
421   const simgrid::mc::Variable* var = this->find_variable(name);
422   xbt_assert(var, "Variable %s not found", name);
423   xbt_assert(var->address, "No simple location for this variable");
424
425   if (not var->type->full_type) // Try to resolve this type. The needed ObjectInfo was maybe (lazily) loaded recently
426     for (auto const& object_info : this->object_infos)
427       postProcessObjectInformation(this, object_info.get());
428   xbt_assert(var->type->full_type, "Partial type for %s (even after re-resolving types), cannot retrieve its size.",
429              name);
430   xbt_assert((size_t)var->type->full_type->byte_size == size, "Unexpected size for %s (expected %zu, received %zu).",
431              name, size, (size_t)var->type->full_type->byte_size);
432   this->read_bytes(target, size, remote(var->address));
433 }
434
435 std::string RemoteProcess::read_string(RemotePtr<char> address) const
436 {
437   if (not address)
438     return {};
439
440   std::vector<char> res(128);
441   off_t off = 0;
442
443   while (true) {
444     ssize_t c = pread(this->memory_file, res.data() + off, res.size() - off, (off_t)address.address() + off);
445     if (c == -1 && errno == EINTR)
446       continue;
447     xbt_assert(c > 0, "Could not read string from remote process");
448
449     const void* p = memchr(res.data() + off, '\0', c);
450     if (p)
451       return std::string(res.data());
452
453     off += c;
454     if (off == (off_t)res.size())
455       res.resize(res.size() * 2);
456   }
457 }
458
459 void* RemoteProcess::read_bytes(void* buffer, std::size_t size, RemotePtr<void> address, ReadOptions /*options*/) const
460 {
461   if (pread_whole(this->memory_file, buffer, size, (size_t)address.address()) < 0)
462     xbt_die("Read at %p from process %lli failed", (void*)address.address(), (long long)this->pid_);
463   return buffer;
464 }
465
466 /** Write data to a process memory
467  *
468  *  @param buffer   local memory address (source)
469  *  @param len      data size
470  *  @param address  target process memory address (target)
471  */
472 void RemoteProcess::write_bytes(const void* buffer, size_t len, RemotePtr<void> address) const
473 {
474   if (pwrite_whole(this->memory_file, buffer, len, (size_t)address.address()) < 0)
475     xbt_die("Write to process %lli failed", (long long)this->pid_);
476 }
477
478 void RemoteProcess::clear_bytes(RemotePtr<void> address, size_t len) const
479 {
480   pthread_once(&zero_buffer_flag, zero_buffer_init);
481   while (len) {
482     size_t s = len > zero_buffer_size ? zero_buffer_size : len;
483     this->write_bytes(zero_buffer, s, address);
484     address = remote((char*)address.address() + s);
485     len -= s;
486   }
487 }
488
489 void RemoteProcess::ignore_region(std::uint64_t addr, std::size_t size)
490 {
491   IgnoredRegion region;
492   region.addr = addr;
493   region.size = size;
494
495   auto pos = std::lower_bound(ignored_regions_.begin(), ignored_regions_.end(), region,
496                               [](auto const& reg1, auto const& reg2) {
497                                 return reg1.addr < reg2.addr || (reg1.addr == reg2.addr && reg1.size < reg2.size);
498                               });
499   if (pos == ignored_regions_.end() || pos->addr != addr || pos->size != size)
500     ignored_regions_.insert(pos, region);
501 }
502
503 void RemoteProcess::ignore_heap(IgnoredHeapRegion const& region)
504 {
505   // Binary search the position of insertion:
506   auto pos = std::lower_bound(ignored_heap_.begin(), ignored_heap_.end(), region.address,
507                               [](auto const& reg, auto const* addr) { return reg.address < addr; });
508   if (pos == ignored_heap_.end() || pos->address != region.address) {
509     // Insert it:
510     ignored_heap_.insert(pos, region);
511   }
512 }
513
514 void RemoteProcess::unignore_heap(void* address, size_t size)
515 {
516   // Binary search:
517   auto pos = std::lower_bound(ignored_heap_.begin(), ignored_heap_.end(), address,
518                               [](auto const& reg, auto const* addr) { return reg.address < addr; });
519   if (pos != ignored_heap_.end() && static_cast<char*>(pos->address) <= static_cast<char*>(address) + size)
520     ignored_heap_.erase(pos);
521 }
522
523 void RemoteProcess::ignore_local_variable(const char* var_name, const char* frame_name) const
524 {
525   if (frame_name != nullptr && strcmp(frame_name, "*") == 0)
526     frame_name = nullptr;
527   for (std::shared_ptr<simgrid::mc::ObjectInformation> const& info : this->object_infos)
528     info->remove_local_variable(var_name, frame_name);
529 }
530
531 std::vector<simgrid::mc::ActorInformation>& RemoteProcess::actors()
532 {
533   this->refresh_simix();
534   return smx_actors_infos;
535 }
536
537 std::vector<simgrid::mc::ActorInformation>& RemoteProcess::dead_actors()
538 {
539   this->refresh_simix();
540   return smx_dead_actors_infos;
541 }
542
543 void RemoteProcess::dump_stack() const
544 {
545   unw_addr_space_t as = unw_create_addr_space(&_UPT_accessors, BYTE_ORDER);
546   if (as == nullptr) {
547     XBT_ERROR("Could not initialize ptrace address space");
548     return;
549   }
550
551   void* context = _UPT_create(this->pid_);
552   if (context == nullptr) {
553     unw_destroy_addr_space(as);
554     XBT_ERROR("Could not initialize ptrace context");
555     return;
556   }
557
558   unw_cursor_t cursor;
559   if (unw_init_remote(&cursor, as, context) != 0) {
560     _UPT_destroy(context);
561     unw_destroy_addr_space(as);
562     XBT_ERROR("Could not initialiez ptrace cursor");
563     return;
564   }
565
566   simgrid::mc::dumpStack(stderr, &cursor);
567
568   _UPT_destroy(context);
569   unw_destroy_addr_space(as);
570 }
571
572 unsigned long RemoteProcess::get_maxpid() const
573 {
574   unsigned long maxpid;
575   this->read_bytes(&maxpid, sizeof(unsigned long), remote(maxpid_addr_));
576   return maxpid;
577 }
578
579 void RemoteProcess::get_actor_vectors(RemotePtr<s_xbt_dynar_t>& actors, RemotePtr<s_xbt_dynar_t>& dead_actors)
580 {
581   actors      = remote(static_cast<s_xbt_dynar_t*>(actors_addr_));
582   dead_actors = remote(static_cast<s_xbt_dynar_t*>(dead_actors_addr_));
583 }
584 } // namespace mc
585 } // namespace simgrid