From 4c3d4cccb2f5fb04a2bc157ca321e107711cca53 Mon Sep 17 00:00:00 2001 From: Gabriel Corona Date: Mon, 28 Sep 2015 16:19:26 +0200 Subject: [PATCH] =?utf8?q?[mc]=20Bring=20back=20soft-dirty=20tracking=20fr?= =?utf8?q?om=20the=20dead=20=F0=9F=92=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We want to compare the different snapshoting options on the new cross-process architecture. --- src/include/mc/mc.h | 20 ++++++++++++++++ src/mc/ModelChecker.cpp | 3 ++- src/mc/ModelChecker.hpp | 3 +++ src/mc/RegionSnapshot.cpp | 27 +++++++++++++++++---- src/mc/RegionSnapshot.hpp | 15 ++++++++---- src/mc/mc_checkpoint.cpp | 48 ++++++++++++++++++++++++++++++------- src/mc/mc_config.cpp | 10 ++++++++ src/mc/mc_page_snapshot.cpp | 13 +++++++++- src/mc/mc_private.h | 2 ++ src/mc/mc_process.cpp | 40 +++++++++++++++++++++++++++++++ src/mc/mc_process.h | 6 ++++- src/mc/mc_snapshot.cpp | 6 ++--- src/mc/mc_visited.cpp | 5 ++-- src/simgrid/sg_config.c | 6 +++++ 14 files changed, 178 insertions(+), 26 deletions(-) diff --git a/src/include/mc/mc.h b/src/include/mc/mc.h index dc05335355..5fd0836728 100644 --- a/src/include/mc/mc.h +++ b/src/include/mc/mc.h @@ -74,6 +74,26 @@ XBT_PRIVATE void _mc_cfg_cb_comms_determinism(const char *name, int pos); XBT_PRIVATE void _mc_cfg_cb_send_determinism(const char *name, int pos); XBT_PRIVATE void _mc_cfg_cb_termination(const char *name, int pos); +extern xbt_dynar_t mc_heap_comparison_ignore; +extern xbt_dynar_t stacks_areas; + +/********************************* Global *************************************/ +void _mc_cfg_cb_reduce(const char *name, int pos); +void _mc_cfg_cb_checkpoint(const char *name, int pos); +void _mc_cfg_cb_sparse_checkpoint(const char *name, int pos); +void _mc_cfg_cb_soft_dirty(const char *name, int pos); +void _mc_cfg_cb_ksm(const char *name, int pos); +void _mc_cfg_cb_property(const char *name, int pos); +void _mc_cfg_cb_timeout(const char *name, int pos); +void _mc_cfg_cb_hash(const char *name, int pos); +void _mc_cfg_cb_snapshot_fds(const char *name, int pos); +void _mc_cfg_cb_max_depth(const char *name, int pos); +void _mc_cfg_cb_visited(const char *name, int pos); +void _mc_cfg_cb_dot_output(const char *name, int pos); +void _mc_cfg_cb_comms_determinism(const char *name, int pos); +void _mc_cfg_cb_send_determinism(const char *name, int pos); +void _mc_cfg_cb_termination(const char *name, int pos); + XBT_PUBLIC(void) MC_run(void); XBT_PUBLIC(void) MC_init(void); XBT_PUBLIC(void) MC_exit(void); diff --git a/src/mc/ModelChecker.cpp b/src/mc/ModelChecker.cpp index 2e41784400..f0f65afe4e 100644 --- a/src/mc/ModelChecker.cpp +++ b/src/mc/ModelChecker.cpp @@ -17,7 +17,8 @@ namespace mc { ModelChecker::ModelChecker(pid_t pid, int socket) : hostnames_(xbt_dict_new()), page_store_(500), - process_(pid, socket) + process_(pid, socket), + parent_snapshot_(nullptr) { } diff --git a/src/mc/ModelChecker.hpp b/src/mc/ModelChecker.hpp index 5c3f50c0eb..846ea2ba96 100644 --- a/src/mc/ModelChecker.hpp +++ b/src/mc/ModelChecker.hpp @@ -35,6 +35,9 @@ class ModelChecker { // This is the parent snapshot of the current state: PageStore page_store_; Process process_; +public: + mc_snapshot_t parent_snapshot_; + public: ModelChecker(ModelChecker const&) = delete; ModelChecker& operator=(ModelChecker const&) = delete; diff --git a/src/mc/RegionSnapshot.cpp b/src/mc/RegionSnapshot.cpp index c59f1909fc..6d50ef1873 100644 --- a/src/mc/RegionSnapshot.cpp +++ b/src/mc/RegionSnapshot.cpp @@ -87,28 +87,45 @@ RegionSnapshot dense_region( * @param size Size of the data* */ RegionSnapshot region( - RegionType type, void *start_addr, void* permanent_addr, size_t size) + RegionType type, void *start_addr, void* permanent_addr, size_t size, + RegionSnapshot const* ref_region) { if (_sg_mc_sparse_checkpoint) { - return sparse_region(type, start_addr, permanent_addr, size); + return sparse_region(type, start_addr, permanent_addr, size, ref_region); } else { return dense_region(type, start_addr, permanent_addr, size); } } RegionSnapshot sparse_region(RegionType region_type, - void *start_addr, void* permanent_addr, size_t size) + void *start_addr, void* permanent_addr, size_t size, + RegionSnapshot const* ref_region) { simgrid::mc::Process* process = &mc_model_checker->process(); + bool use_soft_dirty = _sg_mc_sparse_checkpoint && _sg_mc_soft_dirty + && ref_region != nullptr + && ref_region->storage_type() == simgrid::mc::StorageType::Chunked; + xbt_assert((((uintptr_t)start_addr) & (xbt_pagesize-1)) == 0, "Not at the beginning of a page"); xbt_assert((((uintptr_t)permanent_addr) & (xbt_pagesize-1)) == 0, "Not at the beginning of a page"); size_t page_count = mc_page_count(size); - simgrid::mc::PerPageCopy page_data(mc_model_checker->page_store(), *process, - permanent_addr, page_count); + std::vector pagemap; + const size_t* ref_page_numbers = nullptr; + if (use_soft_dirty) { + pagemap.resize(page_count); + process->read_pagemap(pagemap.data(), + mc_page_number(nullptr, permanent_addr), page_count); + ref_page_numbers = ref_region->page_data().pagenos(); + } + + simgrid::mc::PerPageCopy page_data( + mc_model_checker->page_store(), *process, permanent_addr, page_count, + ref_page_numbers, + use_soft_dirty ? pagemap.data() : nullptr); simgrid::mc::RegionSnapshot region( region_type, start_addr, permanent_addr, size); diff --git a/src/mc/RegionSnapshot.hpp b/src/mc/RegionSnapshot.hpp index 975bd266f4..e54516a157 100644 --- a/src/mc/RegionSnapshot.hpp +++ b/src/mc/RegionSnapshot.hpp @@ -76,13 +76,17 @@ public: return pagenos_[i]; } + const std::size_t* pagenos() const { return pagenos_.data(); } + std::size_t* pagenos() { return pagenos_.data(); } + const void* page(std::size_t i) const { return store_->get_page(pagenos_[i]); } PerPageCopy(PageStore& store, AddressSpace& as, - remote_ptr addr, std::size_t page_count); + remote_ptr addr, std::size_t page_count, + const size_t* ref_page_numbers, const std::uint64_t* pagemap); }; enum class RegionType { @@ -131,6 +135,7 @@ public: * each type. */ class RegionSnapshot { +public: static const RegionType UnknownRegion = RegionType::Unknown; static const RegionType HeapRegion = RegionType::Heap; static const RegionType DataRegion = RegionType::Data; @@ -293,10 +298,12 @@ RegionSnapshot privatized_region( RegionSnapshot dense_region( RegionType type, void *start_addr, void* data_addr, size_t size); simgrid::mc::RegionSnapshot sparse_region( - RegionType type, void *start_addr, void* data_addr, size_t size); + RegionType type, void *start_addr, void* data_addr, size_t size, + RegionSnapshot const* ref_region); simgrid::mc::RegionSnapshot region( - RegionType type, void *start_addr, void* data_addr, size_t size); - + RegionType type, void *start_addr, void* data_addr, size_t size, + RegionSnapshot const* ref_region); + } } diff --git a/src/mc/mc_checkpoint.cpp b/src/mc/mc_checkpoint.cpp index a06d5f5ece..88d5ff155e 100644 --- a/src/mc/mc_checkpoint.cpp +++ b/src/mc/mc_checkpoint.cpp @@ -50,6 +50,18 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_checkpoint, mc, /************************************ Free functions **************************************/ /*****************************************************************************************/ +int MC_important_snapshot(mc_snapshot_t snapshot) +{ + // We need this snapshot in order to know which + // pages needs to be stored in the next snapshot. + // This field is only non-NULL when using soft-dirty + // page tracking. + if (snapshot == mc_model_checker->parent_snapshot_) + return true; + + return false; +} + /** @brief Restore a region from a snapshot * * @param reg Target region @@ -85,7 +97,8 @@ namespace mc { #ifdef HAVE_SMPI simgrid::mc::RegionSnapshot privatized_region( - RegionType region_type, void *start_addr, void* permanent_addr, size_t size + RegionType region_type, void *start_addr, void* permanent_addr, size_t size, + const simgrid::mc::RegionSnapshot* ref_region ) { size_t process_count = MC_smpi_process_count(); @@ -102,11 +115,13 @@ simgrid::mc::RegionSnapshot privatized_region( std::vector data; data.reserve(process_count); - for (size_t i = 0; i < process_count; i++) - data.push_back( - simgrid::mc::region(region_type, start_addr, - privatisation_regions[i].address, size) - ); + for (size_t i = 0; i < process_count; i++) { + const simgrid::mc::RegionSnapshot* ref_privatized_region = nullptr; + if (ref_region && ref_region->storage_type() == StorageType::Privatized) + ref_privatized_region = &ref_region->privatized_data()[i]; + data.push_back(simgrid::mc::region(region_type, start_addr, + privatisation_regions[i].address, size, ref_privatized_region)); + } simgrid::mc::RegionSnapshot region = simgrid::mc::RegionSnapshot( region_type, start_addr, permanent_addr, size); @@ -130,15 +145,19 @@ static void MC_snapshot_add_region(int index, mc_snapshot_t snapshot, else if (type == simgrid::mc::RegionType::Heap) xbt_assert(!object_info, "Unexpected object info for heap region."); - simgrid::mc::RegionSnapshot region; + simgrid::mc::RegionSnapshot const* ref_region = nullptr; + if (mc_model_checker->parent_snapshot_) + ref_region = mc_model_checker->parent_snapshot_->snapshot_regions[index].get(); + simgrid::mc::RegionSnapshot region; #ifdef HAVE_SMPI const bool privatization_aware = object_info && object_info->privatized(); if (privatization_aware && MC_smpi_process_count()) - region = simgrid::mc::privatized_region(type, start_addr, permanent_addr, size); + region = simgrid::mc::privatized_region( + type, start_addr, permanent_addr, size, ref_region); else #endif - region = simgrid::mc::region(type, start_addr, permanent_addr, size); + region = simgrid::mc::region(type, start_addr, permanent_addr, size, ref_region); region.object_info(object_info); snapshot->snapshot_regions[index] @@ -595,8 +614,12 @@ mc_snapshot_t MC_take_snapshot(int num_state) if (_sg_mc_snapshot_fds) snapshot->current_fds = MC_get_current_fds(process->pid); + const bool use_soft_dirty = _sg_mc_sparse_checkpoint && _sg_mc_soft_dirty; + /* Save the std heap and the writable mapped pages of libsimgrid and binary */ MC_get_memory_regions(mc_process, snapshot); + if (use_soft_dirty) + mc_process->reset_soft_dirty(); snapshot->to_ignore = MC_take_snapshot_ignore(); @@ -613,6 +636,8 @@ mc_snapshot_t MC_take_snapshot(int num_state) } MC_snapshot_ignore_restore(snapshot); + if (use_soft_dirty) + mc_model_checker->parent_snapshot_ = snapshot; return snapshot; } @@ -663,11 +688,16 @@ void MC_restore_snapshot_fds(mc_snapshot_t snapshot) void MC_restore_snapshot(mc_snapshot_t snapshot) { XBT_DEBUG("Restore snapshot %i", snapshot->num_state); + const bool use_soft_dirty = _sg_mc_sparse_checkpoint && _sg_mc_soft_dirty; MC_restore_snapshot_regions(snapshot); if (_sg_mc_snapshot_fds) MC_restore_snapshot_fds(snapshot); + if (use_soft_dirty) + mc_model_checker->process().reset_soft_dirty(); MC_snapshot_ignore_restore(snapshot); mc_model_checker->process().cache_flags = 0; + if (use_soft_dirty) + mc_model_checker->parent_snapshot_ = snapshot; } mc_snapshot_t simcall_HANDLER_mc_snapshot(smx_simcall_t simcall) diff --git a/src/mc/mc_config.cpp b/src/mc/mc_config.cpp index 02ce10dac3..147b38a1e5 100644 --- a/src/mc/mc_config.cpp +++ b/src/mc/mc_config.cpp @@ -50,6 +50,7 @@ int _sg_do_model_check = 0; int _sg_do_model_check_record = 0; int _sg_mc_checkpoint = 0; int _sg_mc_sparse_checkpoint = 0; +int _sg_mc_soft_dirty = 0; int _sg_mc_ksm = 0; char *_sg_mc_property_file = NULL; int _sg_mc_hash = 0; @@ -96,6 +97,15 @@ void _mc_cfg_cb_sparse_checkpoint(const char *name, int pos) { _sg_mc_sparse_checkpoint = xbt_cfg_get_boolean(_sg_cfg_set, name); } +void _mc_cfg_cb_soft_dirty(const char *name, int pos) { + if (_sg_cfg_init_status && !_sg_do_model_check) + xbt_die("You are specifying a soft dirty value after the initialization " + "(through MSG_config?), but model-checking was not activated " + "at config time (through --cfg=model-check:1). " + "This won't work, sorry."); + _sg_mc_soft_dirty = xbt_cfg_get_boolean(_sg_cfg_set, name); +} + void _mc_cfg_cb_ksm(const char *name, int pos) { if (_sg_cfg_init_status && !_sg_do_model_check) diff --git a/src/mc/mc_page_snapshot.cpp b/src/mc/mc_page_snapshot.cpp index 2efe58b9a0..b180c4021c 100644 --- a/src/mc/mc_page_snapshot.cpp +++ b/src/mc/mc_page_snapshot.cpp @@ -15,6 +15,9 @@ #include +#define SOFT_DIRTY_BIT_NUMBER 55 +#define SOFT_DIRTY (((uint64_t)1) << SOFT_DIRTY_BIT_NUMBER) + using simgrid::mc::remote; namespace simgrid { @@ -27,7 +30,8 @@ namespace mc { * @return Snapshot page numbers of this new snapshot */ PerPageCopy::PerPageCopy(PageStore& store, AddressSpace& as, - remote_ptr addr, std::size_t page_count) + remote_ptr addr, std::size_t page_count, + const size_t* ref_page_numbers, const std::uint64_t* pagemap) { store_ = &store; this->pagenos_.resize(page_count); @@ -35,6 +39,13 @@ PerPageCopy::PerPageCopy(PageStore& store, AddressSpace& as, for (size_t i = 0; i != page_count; ++i) { + // We don't have to compare soft-clean pages: + if (ref_page_numbers && pagemap && !(pagemap[i] & SOFT_DIRTY)) { + pagenos_[i] = ref_page_numbers[i]; + store_->ref_page(ref_page_numbers[i]); + continue; + } + remote_ptr page = remote(addr.address() + (i << xbt_pagebits)); xbt_assert(mc_page_offset((void*)page.address())==0, "Not at the beginning of a page"); diff --git a/src/mc/mc_private.h b/src/mc/mc_private.h index 909267dd8b..b6c36cf3b5 100644 --- a/src/mc/mc_private.h +++ b/src/mc/mc_private.h @@ -117,6 +117,8 @@ XBT_PRIVATE void MC_report_assertion_error(void); XBT_PRIVATE void MC_invalidate_cache(void); +XBT_PRIVATE int MC_important_snapshot(mc_snapshot_t snapshot); + SG_END_DECL() #endif diff --git a/src/mc/mc_process.cpp b/src/mc/mc_process.cpp index dec40acb71..01751d17e6 100644 --- a/src/mc/mc_process.cpp +++ b/src/mc/mc_process.cpp @@ -131,6 +131,7 @@ static ssize_t pread_whole(int fd, void *buf, size_t count, std::uint64_t offset } else if (res==0) { return -1; } else if (errno != EINTR) { + perror("pread_whole"); return -1; } } @@ -171,6 +172,14 @@ static void MC_zero_buffer_init(void) close(fd); } +static +int open_process_file(pid_t pid, const char* file, int flags) +{ + char buff[50]; + snprintf(buff, sizeof(buff), "/proc/%li/%s", (long) pid, file); + return open(buff, flags); +} + } namespace simgrid { @@ -213,6 +222,8 @@ Process::Process(pid_t pid, int sockfd) process->heap = NULL; process->heap_info = NULL; process->init_memory_map_info(); + process->clear_refs_fd_ = -1; + process->pagemap_fd_ = -1; // Open the memory file if (process->is_self()) @@ -284,6 +295,11 @@ Process::~Process() free(process->heap_info); process->heap_info = NULL; + + if (process->clear_refs_fd_ >= 0) + close(process->clear_refs_fd_); + if (process->pagemap_fd_ >= 0) + close(process->pagemap_fd_); } /** Refresh the information about the process @@ -642,5 +658,29 @@ void Process::ignore_region(std::uint64_t addr, std::size_t size) ignored_regions_.begin() + position, region); } +void Process::reset_soft_dirty() +{ + if (this->clear_refs_fd_ < 0) { + this->clear_refs_fd_ = open_process_file(pid_, "clear_refs", O_WRONLY|O_CLOEXEC); + if (this->clear_refs_fd_ < 0) + xbt_die("Could not open clear_refs file for soft-dirty tracking. Run as root?"); + } + if(::write(this->clear_refs_fd_, "4\n", 2) != 2) + xbt_die("Could not reset softdirty bits"); +} + +void Process::read_pagemap(uint64_t* pagemap, size_t page_start, size_t page_count) +{ + if (pagemap_fd_ < 0) { + pagemap_fd_ = open_process_file(pid_, "pagemap", O_RDONLY|O_CLOEXEC); + if (pagemap_fd_ < 0) + xbt_die("Could not open pagemap file for soft-dirty tracking. Run as root?"); + } + ssize_t bytesize = sizeof(uint64_t) * page_count; + off_t offset = sizeof(uint64_t) * page_start; + if (pread_whole(pagemap_fd_, pagemap, bytesize, offset) != bytesize) + xbt_die("Could not read pagemap"); +} + } } diff --git a/src/mc/mc_process.h b/src/mc/mc_process.h index 570d84b68b..4c21485e82 100644 --- a/src/mc/mc_process.h +++ b/src/mc/mc_process.h @@ -158,6 +158,9 @@ public: return MC_receive_message(this->socket_, &m, sizeof(M), 0); } + void reset_soft_dirty(); + void read_pagemap(uint64_t* pagemap, size_t start_page, size_t page_count); + private: void init_memory_map_info(); void refresh_heap(); @@ -172,7 +175,8 @@ private: remote_ptr maestro_stack_start_, maestro_stack_end_; int memory_file; std::vector ignored_regions_; - + int clear_refs_fd_; + int pagemap_fd_; public: // object info // TODO, make private (first, objectify simgrid::mc::ObjectInformation*) std::vector> object_infos; diff --git a/src/mc/mc_snapshot.cpp b/src/mc/mc_snapshot.cpp index 3aabd139ff..c4662eccfd 100644 --- a/src/mc/mc_snapshot.cpp +++ b/src/mc/mc_snapshot.cpp @@ -243,12 +243,12 @@ static void test_snapshot(bool sparse_checkpoint) { // Init memory and take snapshots: init_memory(source, byte_size); simgrid::mc::RegionSnapshot region0 = simgrid::mc::sparse_region( - simgrid::mc::RegionType::Unknown, source, source, byte_size); + simgrid::mc::RegionType::Unknown, source, source, byte_size, nullptr); for(int i=0; inum < min2) { + if (!MC_important_snapshot(state_test->system_state) && state_test->num < min2) { index2 = cursor2; min2 = state_test->num; } @@ -466,7 +466,8 @@ int is_visited_pair(mc_visited_pair_t visited_pair, mc_pair_t pair) { unsigned int cursor2 = 0; unsigned int index2 = 0; xbt_dynar_foreach(visited_pairs, cursor2, pair_test) { - if (pair_test->num < min2) { + if (!MC_important_snapshot(pair_test->graph_state->system_state) + && pair_test->num < min2) { index2 = cursor2; min2 = pair_test->num; } diff --git a/src/simgrid/sg_config.c b/src/simgrid/sg_config.c index f1fae08e3a..2ce94c99aa 100644 --- a/src/simgrid/sg_config.c +++ b/src/simgrid/sg_config.c @@ -625,6 +625,12 @@ void sg_config_init(int *argc, char **argv) xbt_cfgelm_boolean, 1, 1, _mc_cfg_cb_sparse_checkpoint, NULL); xbt_cfg_setdefault_boolean(_sg_cfg_set, "model-check/sparse-checkpoint", "no"); + /* do stateful model-checking */ + xbt_cfg_register(&_sg_cfg_set, "model-check/soft-dirty", + "Use sparse per-page snapshots.", + xbt_cfgelm_boolean, 1, 1, _mc_cfg_cb_soft_dirty, NULL); + xbt_cfg_setdefault_boolean(_sg_cfg_set, "model-check/soft-dirty", "no"); + xbt_cfg_register(&_sg_cfg_set, "model-check/ksm", "Kernel same-page merging", xbt_cfgelm_boolean, 1, 1, _mc_cfg_cb_ksm, NULL); -- 2.20.1