From bbe4982c906909c5740a69715b80174c2b800078 Mon Sep 17 00:00:00 2001 From: Gabriel Corona Date: Wed, 20 Jul 2016 11:02:42 +0200 Subject: [PATCH] [mc] Add documentation and fix some things --- src/mc/DwarfExpression.hpp | 2 +- src/mc/ObjectInformation.cpp | 45 +++++-------- src/mc/ObjectInformation.hpp | 118 +++++++++++++++++++++++++++-------- src/mc/mc_dwarf.cpp | 15 +++-- 4 files changed, 118 insertions(+), 62 deletions(-) diff --git a/src/mc/DwarfExpression.hpp b/src/mc/DwarfExpression.hpp index 1e2324299e..4b1f202f26 100644 --- a/src/mc/DwarfExpression.hpp +++ b/src/mc/DwarfExpression.hpp @@ -30,7 +30,7 @@ namespace dwarf { /** A DWARF expression * * DWARF defines a simple stack-based VM for evaluating expressions - * (such as locations of variables, etc.): a DWARF expressions is + * (such as locations of variables, etc.): a DWARF expression is * just a sequence of dwarf instructions. We currently directly use * `Dwarf_Op` from `dwarf.h` for dwarf instructions. */ diff --git a/src/mc/ObjectInformation.cpp b/src/mc/ObjectInformation.cpp index 59280f996f..96835b814a 100644 --- a/src/mc/ObjectInformation.cpp +++ b/src/mc/ObjectInformation.cpp @@ -15,38 +15,26 @@ namespace simgrid { namespace mc { -ObjectInformation::ObjectInformation() -{ - this->flags = 0; - this->start = nullptr; - this->end = nullptr; - this->start_exec = nullptr; - this->end_exec = nullptr; - this->start_rw = nullptr; - this->end_rw = nullptr; - this->start_ro = nullptr; - this->end_ro = nullptr; -} +ObjectInformation::ObjectInformation() {} -/** Find the DWARF offset for this ELF object - * - * An offset is applied to address found in DWARF: - * - * * for an executable object, addresses are virtual address - * (there is no offset) i.e. - * \f$\text{virtual address} = \{dwarf address}\f$; +/* For an executable object, addresses are virtual address + * (there is no offset) i.e. + * \f$\text{virtual address} = \{dwarf address}\f$; * - * * for a shared object, the addreses are offset from the begining - * of the shared object (the base address of the mapped shared - * object must be used as offset - * i.e. \f$\text{virtual address} = \text{shared object base address} + * For a shared object, the addreses are offset from the begining + * of the shared object (the base address of the mapped shared + * object must be used as offset + * i.e. \f$\text{virtual address} = \text{shared object base address} * + \text{dwarf address}\f$. */ void *ObjectInformation::base_address() const { + // For an executable (more precisely for a ET_EXEC) the base it 0: if (this->executable()) return nullptr; + // For an a shared-object (ET_DYN, including position-independant executables) + // the base address is its lowest address: void *result = this->start_exec; if (this->start_rw != nullptr && result > (void *) this->start_rw) result = this->start_rw; @@ -55,7 +43,6 @@ void *ObjectInformation::base_address() const return result; } -/* Find a function by instruction pointer */ simgrid::mc::Frame* ObjectInformation::find_function(const void *ip) const { /* This is implemented by binary search on a sorted array. @@ -146,16 +133,16 @@ void ObjectInformation::remove_global_variable(const char* name) } } -/** \brief Ignore a local variable in a scope +/** Ignore a local variable in a scope * * Ignore all instances of variables with a given name in * any (possibly inlined) subprogram with a given namespaced * name. * - * \param var_name Name of the local variable (or parameter to ignore) - * \param subprogram_name Name of the subprogram to ignore (nullptr for any) - * \param subprogram (possibly inlined) Subprogram of the scope - * \param scope Current scope + * @param var_name Name of the local variable to ignore + * @param subprogram_name Name of the subprogram to ignore (nullptr for any) + * @param subprogram (possibly inlined) Subprogram of the scope current scope + * @param scope Current scope */ static void remove_local_variable(simgrid::mc::Frame& scope, const char *var_name, diff --git a/src/mc/ObjectInformation.hpp b/src/mc/ObjectInformation.hpp index 9a55d87bfc..e65d9d35fb 100644 --- a/src/mc/ObjectInformation.hpp +++ b/src/mc/ObjectInformation.hpp @@ -33,23 +33,23 @@ struct FunctionIndexEntry { simgrid::mc::Frame* function; }; -/** Information about an (ELF) executable/sharedobject +/** Information about an ELF module (executable or shared object) + * + * This contains all the information we need about an executable or + * shared-object in the model-checked process: * - * This contain sall the information we have at runtime about an - * executable/shared object in the target (modelchecked) process: * - where it is located in the virtual address space; - * - where are located it's different memory mapping in the the - * virtual address space ; - * - all the debugging (DWARF) information, - * - location of the functions, - * - types - * - etc. * - * It is not copyable because we are taking pointers to Types/Frames. - * We'd have to update/rebuild some data structures in order to copy - * successfully. + * - where are located its different memory mappings in the the + * virtual address space; + * + * - all the debugging (DWARF) information + * - types, + * - location of the functions and their local variables, + * - global variables, + * + * - etc. */ - class ObjectInformation { public: ObjectInformation(); @@ -62,45 +62,109 @@ public: static const int Executable = 1; /** Bitfield of flags */ - int flags; + int flags = 0; std::string file_name; - const void* start; - const void *end; - char *start_exec; - char *end_exec; // Executable segment - char *start_rw; - char *end_rw; // Read-write segment - char *start_ro; - char *end_ro; // read-only segment + const void* start = nullptr; + const void *end = nullptr; + // Location of its text segment: + char *start_exec = nullptr; + char *end_exec = nullptr; + // Location of the read-only part of its data segment: + char *start_rw = nullptr; + char *end_rw = nullptr; + // Location of the read/write part of its data segment: + char *start_ro = nullptr; + char *end_ro = nullptr; + + /** All of its subprograms indexed by their address */ std::unordered_map subprograms; + + /** Index of functions by instruction address + * + * We need to efficiently find the function from any given instruction + * address inside its range. This index is sorted by low_pc + * + * The entries are sorted by low_pc and a binary search can be used to look + * them up. In order to have a better cache locality, we only keep the + * information we need for the lookup in this vector. We could probably + * replace subprograms by an ordered vector of Frame and replace this one b + * a parallel `std::vector`. + */ + std::vector functions_index; + // TODO, remove the mutable (to remove it we'll have to add a lot of const everywhere) mutable std::vector global_variables; + + /** Types indexed by DWARF ID */ std::unordered_map types; - std::unordered_map full_types_by_name; - /** Index of functions by IP + /** Types indexed by name * - * The entries are sorted by low_pc and a binary search can be used to look - * them up. Should we used a binary tree instead? + * Different compilation units have their separate type definitions + * (for the same type). When we find an opaque type in one compilation unit, + * we use this in order to try to find its definition in another compilation + * unit. */ - std::vector functions_index; + std::unordered_map full_types_by_name; + /** Whether this module is an executable + * + * More precisely we check if this is an ET_EXE ELF. These ELF files + * use fixed addresses instead of base-addres relative addresses. + * Position independant executables are in fact ET_DYN. + */ bool executable() const { return this->flags & simgrid::mc::ObjectInformation::Executable; } + /** Base address of the module + * + * All the location information in ELF and DWARF are expressed as an offsets + * from this base address: + * + * - location of the functions and globale variables; + * + * - the DWARF instruction `OP_addr` pushes this on the DWARF stack. + **/ void* base_address() const; + /** Find a function by instruction address + * + * @param ip instruction address + * @return corresponding function (if any) or nullptr + */ simgrid::mc::Frame* find_function(const void *ip) const; + + /** Find a global variable by name + * + * This is used to ignore global variables and to find well-known variables + * (`__mmalloc_default_mdp`). + * + * @param name scopes name of the global variable (`myproject::Foo::count`) + * @return corresponding variable (if any) or nullptr + */ simgrid::mc::Variable* find_variable(const char* name) const; + + /** Remove a global variable (in order to ignore it) + * + * This is used to ignore a global variable for the snapshot comparison. + */ void remove_global_variable(const char* name); + + /** Remove a loval variables (in order to ignore it) + * + * @param name Name of the globale variable + * @param scope Namespaceed name of the function (or null for all functions) + */ void remove_local_variable( const char* name, const char* scope); }; XBT_PRIVATE std::shared_ptr createObjectInformation( std::vector const& maps, const char* name); + +/** Augment the current module with informations about the other ones */ XBT_PRIVATE void postProcessObjectInformation( simgrid::mc::Process* process, simgrid::mc::ObjectInformation* info); diff --git a/src/mc/mc_dwarf.cpp b/src/mc/mc_dwarf.cpp index d619a96667..857098fbf2 100644 --- a/src/mc/mc_dwarf.cpp +++ b/src/mc/mc_dwarf.cpp @@ -1044,6 +1044,9 @@ void read_dwarf_info(simgrid::mc::ObjectInformation* info, Dwarf* dwarf) static std::vector get_build_id(Elf* elf) { + // Summary: the GNU build ID is stored in a ("GNU, NT_GNU_BUILD_ID) note + // found in a PT_NOTE entry in the program header table. + size_t phnum; if (elf_getphdrnum (elf, &phnum) != 0) xbt_die("Could not read program headers"); @@ -1150,7 +1153,7 @@ std::string find_by_build_id(std::vector id) * lists of types, variables, functions. */ static -void MC_dwarf_get_variables(simgrid::mc::ObjectInformation* info) +void MC_load_dwarf(simgrid::mc::ObjectInformation* info) { if (elf_version(EV_CURRENT) == EV_NONE) xbt_die("libelf initialization error"); @@ -1161,12 +1164,12 @@ void MC_dwarf_get_variables(simgrid::mc::ObjectInformation* info) xbt_die("Could not open file %s", info->file_name.c_str()); Elf* elf = elf_begin(fd, ELF_C_READ, nullptr); if (elf == nullptr) - xbt_die("Not an ELF file 1"); + xbt_die("Not an ELF file"); Elf_Kind kind = elf_kind(elf); if (kind != ELF_K_ELF) - xbt_die("Not an ELF file 2"); + xbt_die("Not an ELF file"); - // Remember if this is a `ET_EXEC` (fixed location) or `ET_DYN` (relocatable): + // Remember if this is a `ET_EXEC` (fixed location) or `ET_DYN`: Elf64_Half type = get_type(elf); if (type == ET_EXEC) info->flags |= simgrid::mc::ObjectInformation::Executable; @@ -1218,6 +1221,8 @@ void MC_dwarf_get_variables(simgrid::mc::ObjectInformation* info) // TODO, try to find DWARF info using debug-link. // Is this method really used anywhere? + elf_end(elf); + close(fd); xbt_die("Debugging information not found for %s\n" "Try recompiling with -g\n", info->file_name.c_str()); @@ -1343,7 +1348,7 @@ std::shared_ptr createObjectInformation( std::make_shared(); result->file_name = name; simgrid::mc::find_object_address(maps, result.get()); - MC_dwarf_get_variables(result.get()); + MC_load_dwarf(result.get()); MC_post_process_variables(result.get()); MC_post_process_types(result.get()); for (auto& entry : result.get()->subprograms) -- 2.20.1