From 38bbe7c67eeccb4b8fa60a3f6982a33e6497c4ea Mon Sep 17 00:00:00 2001 From: Gabriel Corona Date: Fri, 17 Apr 2015 12:36:30 +0200 Subject: [PATCH] [mc] Remove soft dirty page tracking It generates a lot of page faults and is slow. --- src/include/mc/mc.h | 2 - src/mc/mc_checkpoint.cpp | 68 ++++------------ src/mc/mc_config.cpp | 8 -- src/mc/mc_model_checker.cpp | 1 - src/mc/mc_model_checker.h | 1 - src/mc/mc_page_snapshot.cpp | 150 ++---------------------------------- src/mc/mc_snapshot.cpp | 10 ++- src/mc/mc_snapshot.h | 12 ++- src/simgrid/sg_config.c | 6 -- 9 files changed, 33 insertions(+), 225 deletions(-) diff --git a/src/include/mc/mc.h b/src/include/mc/mc.h index 317ade94a4..7cce56c32e 100644 --- a/src/include/mc/mc.h +++ b/src/include/mc/mc.h @@ -40,7 +40,6 @@ extern int _sg_do_model_check; extern int _sg_do_model_check_record; extern int _sg_mc_checkpoint; extern int _sg_mc_sparse_checkpoint; -extern int _sg_mc_soft_dirty; extern char* _sg_mc_property_file; extern int _sg_mc_timeout; extern int _sg_mc_hash; @@ -61,7 +60,6 @@ extern xbt_dynar_t stacks_areas; void _mc_cfg_cb_reduce(const char *name, int pos); void _mc_cfg_cb_checkpoint(const char *name, int pos); void _mc_cfg_cb_sparse_checkpoint(const char *name, int pos); -void _mc_cfg_cb_soft_dirty(const char *name, int pos); void _mc_cfg_cb_property(const char *name, int pos); void _mc_cfg_cb_timeout(const char *name, int pos); void _mc_cfg_cb_hash(const char *name, int pos); diff --git a/src/mc/mc_checkpoint.cpp b/src/mc/mc_checkpoint.cpp index 1db06bc863..473a883de3 100644 --- a/src/mc/mc_checkpoint.cpp +++ b/src/mc/mc_checkpoint.cpp @@ -116,7 +116,7 @@ void MC_free_snapshot(mc_snapshot_t snapshot) static mc_mem_region_t mc_region_new_dense( mc_region_type_t region_type, - void *start_addr, void* permanent_addr, size_t size, mc_mem_region_t ref_reg) + void *start_addr, void* permanent_addr, size_t size) { mc_mem_region_t region = xbt_new(s_mc_mem_region_t, 1); region->region_type = region_type; @@ -139,28 +139,22 @@ static mc_mem_region_t mc_region_new_dense( * @param start_addr Address of the region in the simulated process * @param permanent_addr Permanent address of this data (for privatized variables, this is the virtual address of the privatized mapping) * @param size Size of the data* - * @param ref_reg Reference corresponding region */ -static mc_mem_region_t MC_region_new(mc_region_type_t type, void *start_addr, void* permanent_addr, size_t size, mc_mem_region_t ref_reg) +static mc_mem_region_t MC_region_new( + mc_region_type_t type, void *start_addr, void* permanent_addr, size_t size) { if (_sg_mc_sparse_checkpoint) { - return mc_region_new_sparse(type, start_addr, permanent_addr, size, ref_reg); + return mc_region_new_sparse(type, start_addr, permanent_addr, size); } else { - return mc_region_new_dense(type, start_addr, permanent_addr, size, ref_reg); + return mc_region_new_dense(type, start_addr, permanent_addr, size); } } /** @brief Restore a region from a snapshot - * - * If we are using per page snapshots, it is possible to use the reference - * region in order to do an incremental restoration of the region: the - * softclean pages which are shared between the two snapshots do not need - * to be restored. * * @param reg Target region - * @param reg_reg Current region (if not NULL), used for lazy per page restoration */ -static void MC_region_restore(mc_mem_region_t region, mc_mem_region_t ref_region) +static void MC_region_restore(mc_mem_region_t region) { switch(region->storage_type) { case MC_REGION_STORAGE_TYPE_NONE: @@ -174,17 +168,14 @@ static void MC_region_restore(mc_mem_region_t region, mc_mem_region_t ref_region break; case MC_REGION_STORAGE_TYPE_CHUNKED: - mc_region_restore_sparse(&mc_model_checker->process, region, ref_region); + mc_region_restore_sparse(&mc_model_checker->process, region); break; case MC_REGION_STORAGE_TYPE_PRIVATIZED: { - bool has_ref_regions = ref_region && - ref_region->storage_type == MC_REGION_STORAGE_TYPE_PRIVATIZED; size_t process_count = region->privatized.regions_count; for (size_t i = 0; i < process_count; i++) { - MC_region_restore(region->privatized.regions[i], - has_ref_regions ? ref_region->privatized.regions[i] : NULL); + MC_region_restore(region->privatized.regions[i]); } break; } @@ -192,8 +183,8 @@ static void MC_region_restore(mc_mem_region_t region, mc_mem_region_t ref_region } static mc_mem_region_t MC_region_new_privatized( - mc_region_type_t region_type, void *start_addr, void* permanent_addr, size_t size, - mc_mem_region_t ref_reg) + mc_region_type_t region_type, void *start_addr, void* permanent_addr, size_t size + ) { size_t process_count = MC_smpi_process_count(); mc_mem_region_t region = xbt_new(s_mc_mem_region_t, 1); @@ -215,13 +206,9 @@ static mc_mem_region_t MC_region_new_privatized( remote_smpi_privatisation_regions, sizeof(privatisation_regions)); for (size_t i = 0; i < process_count; i++) { - mc_mem_region_t ref_subreg = NULL; - if (ref_reg && ref_reg->storage_type == MC_REGION_STORAGE_TYPE_PRIVATIZED) - ref_subreg = ref_reg->privatized.regions[i]; region->privatized.regions[i] = MC_region_new(region_type, start_addr, - privatisation_regions[i].address, size, - ref_subreg); + privatisation_regions[i].address, size); } return region; @@ -236,16 +223,12 @@ static void MC_snapshot_add_region(int index, mc_snapshot_t snapshot, mc_region_ else if (type == MC_REGION_TYPE_HEAP) xbt_assert(!object_info, "Unexpected object info for heap region."); - mc_mem_region_t ref_reg = NULL; - if (mc_model_checker->parent_snapshot) - ref_reg = mc_model_checker->parent_snapshot->snapshot_regions[index]; - mc_mem_region_t region; const bool privatization_aware = MC_object_info_is_privatized(object_info); if (privatization_aware && MC_smpi_process_count()) - region = MC_region_new_privatized(type, start_addr, permanent_addr, size, ref_reg); + region = MC_region_new_privatized(type, start_addr, permanent_addr, size); else - region = MC_region_new(type, start_addr, permanent_addr, size, ref_reg); + region = MC_region_new(type, start_addr, permanent_addr, size); region->object_info = object_info; snapshot->snapshot_regions[index] = region; @@ -759,14 +742,8 @@ mc_snapshot_t MC_take_snapshot(int num_state) if (_sg_mc_snapshot_fds) MC_get_current_fd(snapshot); - const bool use_soft_dirty = _sg_mc_sparse_checkpoint - && _sg_mc_soft_dirty - && MC_process_is_self(mc_process); - /* Save the std heap and the writable mapped pages of libsimgrid and binary */ MC_get_memory_regions(mc_process, snapshot); - if (use_soft_dirty) - mc_softdirty_reset(); snapshot->to_ignore = MC_take_snapshot_ignore(); @@ -783,22 +760,17 @@ mc_snapshot_t MC_take_snapshot(int num_state) } MC_snapshot_ignore_restore(snapshot); - if (use_soft_dirty) - mc_model_checker->parent_snapshot = snapshot; return snapshot; } static inline void MC_restore_snapshot_regions(mc_snapshot_t snapshot) { - mc_snapshot_t parent_snapshot = mc_model_checker->parent_snapshot; - const size_t n = snapshot->snapshot_regions_count; for (size_t i = 0; i < n; i++) { // For privatized, variables we decided it was not necessary to take the snapshot: if (snapshot->snapshot_regions[i]) - MC_region_restore(snapshot->snapshot_regions[i], - parent_snapshot ? parent_snapshot->snapshot_regions[i] : NULL); + MC_region_restore(snapshot->snapshot_regions[i]); } #ifdef HAVE_SMPI @@ -841,22 +813,10 @@ void MC_restore_snapshot_fds(mc_snapshot_t snapshot) void MC_restore_snapshot(mc_snapshot_t snapshot) { XBT_DEBUG("Restore snapshot %i", snapshot->num_state); - - const bool use_soft_dirty = _sg_mc_sparse_checkpoint - && _sg_mc_soft_dirty - && MC_process_is_self(&mc_model_checker->process); - MC_restore_snapshot_regions(snapshot); if (_sg_mc_snapshot_fds) MC_restore_snapshot_fds(snapshot); - if (use_soft_dirty) { - mc_softdirty_reset(); - } MC_snapshot_ignore_restore(snapshot); - if (use_soft_dirty) { - mc_model_checker->parent_snapshot = snapshot; - } - mc_model_checker->process.cache_flags = 0; } diff --git a/src/mc/mc_config.cpp b/src/mc/mc_config.cpp index 1169f2d4d0..bf9eae8a34 100644 --- a/src/mc/mc_config.cpp +++ b/src/mc/mc_config.cpp @@ -51,7 +51,6 @@ int _sg_do_model_check = 0; int _sg_do_model_check_record = 0; int _sg_mc_checkpoint = 0; int _sg_mc_sparse_checkpoint = 0; -int _sg_mc_soft_dirty = 0; char *_sg_mc_property_file = NULL; int _sg_mc_hash = 0; int _sg_mc_max_depth = 1000; @@ -97,13 +96,6 @@ void _mc_cfg_cb_sparse_checkpoint(const char *name, int pos) { _sg_mc_sparse_checkpoint = xbt_cfg_get_boolean(_sg_cfg_set, name); } -void _mc_cfg_cb_soft_dirty(const char *name, int pos) { - if (_sg_cfg_init_status && !_sg_do_model_check) { - xbt_die("You are specifying a soft dirty value after the initialization (through MSG_config?), but model-checking was not activated at config time (through --cfg=model-check:1). This won't work, sorry."); - } - _sg_mc_soft_dirty = xbt_cfg_get_boolean(_sg_cfg_set, name); -} - void _mc_cfg_cb_property(const char *name, int pos) { if (_sg_cfg_init_status && !_sg_do_model_check) { diff --git a/src/mc/mc_model_checker.cpp b/src/mc/mc_model_checker.cpp index 0484dd20f1..96a988858b 100644 --- a/src/mc/mc_model_checker.cpp +++ b/src/mc/mc_model_checker.cpp @@ -16,7 +16,6 @@ mc_model_checker_t MC_model_checker_new(pid_t pid, int socket) mc_model_checker_t mc = xbt_new0(s_mc_model_checker_t, 1); mc->pages = mc_pages_store_new(); mc->fd_clear_refs = -1; - mc->fd_pagemap = -1; MC_process_init(&mc->process, pid, socket); mc->hosts = xbt_dict_new(); return mc; diff --git a/src/mc/mc_model_checker.h b/src/mc/mc_model_checker.h index 1dee19e398..4c42b816d9 100644 --- a/src/mc/mc_model_checker.h +++ b/src/mc/mc_model_checker.h @@ -31,7 +31,6 @@ struct s_mc_model_checker { mc_snapshot_t parent_snapshot; mc_pages_store_t pages; int fd_clear_refs; - int fd_pagemap; xbt_dynar_t record; s_mc_process_t process; /** String pool for host names */ diff --git a/src/mc/mc_page_snapshot.cpp b/src/mc/mc_page_snapshot.cpp index 72997ba0b9..a1948b2688 100644 --- a/src/mc/mc_page_snapshot.cpp +++ b/src/mc/mc_page_snapshot.cpp @@ -14,9 +14,6 @@ #include -#define SOFT_DIRTY_BIT_NUMBER 55 -#define SOFT_DIRTY (((uint64_t)1) << SOFT_DIRTY_BIT_NUMBER) - extern "C" { // ***** Region management: @@ -25,12 +22,10 @@ extern "C" { * * @param data The start of the region (must be at the beginning of a page) * @param pag_count Number of pages of the region - * @param pagemap Linux kernel pagemap values fot this region (or NULL) - * @param reference_pages Snapshot page numbers of the previous soft_dirty_reset (or NULL) * @return Snapshot page numbers of this new snapshot */ size_t* mc_take_page_snapshot_region(mc_process_t process, - void* data, size_t page_count, uint64_t* pagemap, size_t* reference_pages) + void* data, size_t page_count) { size_t* pagenos = (size_t*) malloc(page_count * sizeof(size_t)); @@ -41,12 +36,7 @@ size_t* mc_take_page_snapshot_region(mc_process_t process, temp = malloc(xbt_pagesize); for (size_t i=0; i!=page_count; ++i) { - bool softclean = pagemap && !(pagemap[i] & SOFT_DIRTY); - if (softclean && reference_pages) { - // The page is softclean, it is the same page as the reference page: - pagenos[i] = reference_pages[i]; - mc_model_checker->pages->ref_page(reference_pages[i]); - } else { + // Otherwise, we need to store the page the hard way // (by reading its content): void* page = (char*) data + (i << xbt_pagebits); @@ -67,7 +57,7 @@ size_t* mc_take_page_snapshot_region(mc_process_t process, temp, page, xbt_pagesize, MC_PROCESS_INDEX_DISABLED); } pagenos[i] = mc_model_checker->pages->store_page(page_data); - } + } free(temp); @@ -89,21 +79,11 @@ void mc_free_page_snapshot_region(size_t* pagenos, size_t page_count) * @param start_addr * @param page_count Number of pages of the region * @param pagenos - * @param pagemap Linux kernel pagemap values fot this region (or NULL) - * @param reference_pages Snapshot page numbers of the previous soft_dirty_reset (or NULL) */ void mc_restore_page_snapshot_region(mc_process_t process, - void* start_addr, size_t page_count, size_t* pagenos, uint64_t* pagemap, size_t* reference_pagenos) + void* start_addr, size_t page_count, size_t* pagenos) { for (size_t i=0; i!=page_count; ++i) { - - bool softclean = pagemap && !(pagemap[i] & SOFT_DIRTY); - if (softclean && reference_pagenos && pagenos[i] == reference_pagenos[i]) { - // The page is softclean and is the same as the reference one: - // the page is already in the target state. - continue; - } - // Otherwise, copy the page: void* target_page = mc_page_from_number(start_addr, i); const void* source_page = mc_model_checker->pages->get_page(pagenos[i]); @@ -111,91 +91,10 @@ void mc_restore_page_snapshot_region(mc_process_t process, } } -// ***** Soft dirty tracking - -/** @brief Like pread() but without partial reads */ -static size_t pread_whole(int fd, void* buf, size_t count, off_t offset) { - size_t res = 0; - - char* data = (char*) buf; - while(count) { - ssize_t n = pread(fd, buf, count, offset); - // EOF - if (n==0) - return res; - - // Error (or EINTR): - if (n==-1) { - if (errno == EINTR) - continue; - else - return -1; - } - - // It might be a partial read: - count -= n; - data += n; - offset += n; - res += n; - } - - return res; -} - -static inline __attribute__ ((always_inline)) -void mc_ensure_fd(int* fd, const char* path, int flags) { - if (*fd != -1) - return; - *fd = open(path, flags); - if (*fd == -1) { - xbt_die("Could not open file %s", path); - } -} - -/** @brief Reset the soft-dirty bits - * - * This is done after checkpointing and after checkpoint restoration - * (if per page checkpoiting is used) in order to know which pages were - * modified. - * - * See https://www.kernel.org/doc/Documentation/vm/soft-dirty.txt - * */ -void mc_softdirty_reset() { - mc_ensure_fd(&mc_model_checker->fd_clear_refs, "/proc/self/clear_refs", O_WRONLY|O_CLOEXEC); - if( ::write(mc_model_checker->fd_clear_refs, "4\n", 2) != 2) { - xbt_die("Could not reset softdirty bits"); - } -} - -/** @brief Read memory page informations - * - * For each virtual memory page of the process, - * /proc/self/pagemap provides a 64 bit field of information. - * We are interested in the soft-dirty bit: with this we can track which - * pages were modified between snapshots/restorations and avoid - * copying data which was not modified. - * - * See https://www.kernel.org/doc/Documentation/vm/pagemap.txt - * - * @param pagemap Output buffer for pagemap informations - * @param start_addr Address of the first page - * @param page_count Number of pages - */ -static void mc_read_pagemap(uint64_t* pagemap, size_t page_start, size_t page_count) -{ - mc_ensure_fd(&mc_model_checker->fd_pagemap, "/proc/self/pagemap", O_RDONLY|O_CLOEXEC); - size_t bytesize = sizeof(uint64_t) * page_count; - off_t offset = sizeof(uint64_t) * page_start; - if (pread_whole(mc_model_checker->fd_pagemap, pagemap, bytesize, offset) != bytesize) { - xbt_die("Could not read pagemap"); - } -} - // ***** High level API mc_mem_region_t mc_region_new_sparse(mc_region_type_t region_type, - void *start_addr, void* permanent_addr, size_t size, - mc_mem_region_t ref_reg) + void *start_addr, void* permanent_addr, size_t size) { mc_process_t process = &mc_model_checker->process; @@ -212,54 +111,21 @@ mc_mem_region_t mc_region_new_sparse(mc_region_type_t region_type, "Not at the beginning of a page"); size_t page_count = mc_page_count(size); - uint64_t* pagemap = NULL; - if (_sg_mc_soft_dirty && mc_model_checker->parent_snapshot && - MC_process_is_self(process)) { - pagemap = (uint64_t*) malloc_no_memset(sizeof(uint64_t) * page_count); - mc_read_pagemap(pagemap, mc_page_number(NULL, permanent_addr), page_count); - } - - size_t* reg_page_numbers = NULL; - if (ref_reg!=NULL && ref_reg->storage_type == MC_REGION_STORAGE_TYPE_CHUNKED) - reg_page_numbers = ref_reg->chunked.page_numbers; - // Take incremental snapshot: region->chunked.page_numbers = mc_take_page_snapshot_region(process, - permanent_addr, page_count, pagemap, reg_page_numbers); + permanent_addr, page_count); - if(pagemap) { - mfree(mc_heap, pagemap); - } return region; } -void mc_region_restore_sparse(mc_process_t process, mc_mem_region_t reg, mc_mem_region_t ref_reg) +void mc_region_restore_sparse(mc_process_t process, mc_mem_region_t reg) { xbt_assert((((uintptr_t)reg->permanent_addr) & (xbt_pagesize-1)) == 0, "Not at the beginning of a page"); size_t page_count = mc_page_count(reg->size); - uint64_t* pagemap = NULL; - - // Read soft-dirty bits if necessary in order to know which pages have changed: - if (_sg_mc_soft_dirty && mc_model_checker->parent_snapshot - && MC_process_is_self(process)) { - pagemap = (uint64_t*) malloc_no_memset(sizeof(uint64_t) * page_count); - mc_read_pagemap(pagemap, mc_page_number(NULL, reg->permanent_addr), page_count); - } - - // Incremental per-page snapshot restoration:s - size_t* reg_page_numbers = NULL; - if (ref_reg && ref_reg->storage_type == MC_REGION_STORAGE_TYPE_CHUNKED) - reg_page_numbers = ref_reg->chunked.page_numbers; - mc_restore_page_snapshot_region(process, - reg->permanent_addr, page_count, reg->chunked.page_numbers, - pagemap, reg_page_numbers); - - if(pagemap) { - free(pagemap); - } + reg->permanent_addr, page_count, reg->chunked.page_numbers); } } diff --git a/src/mc/mc_snapshot.cpp b/src/mc/mc_snapshot.cpp index 327376640d..82b6ab868a 100644 --- a/src/mc/mc_snapshot.cpp +++ b/src/mc/mc_snapshot.cpp @@ -208,7 +208,6 @@ XBT_TEST_UNIT("flat_snapshot", test_flat_snapshots, "Test flat snapshots") static void test_snapshot(bool sparse_checkpoint) { xbt_test_add("Initialisation"); - _sg_mc_soft_dirty = 0; _sg_mc_sparse_checkpoint = sparse_checkpoint; xbt_assert(xbt_pagesize == getpagesize()); xbt_assert(1 << xbt_pagebits == xbt_pagesize); @@ -224,11 +223,13 @@ static void test_snapshot(bool sparse_checkpoint) { // Init memory and take snapshots: init_memory(source, byte_size); - mc_mem_region_t region0 = mc_region_new_sparse(MC_REGION_TYPE_UNKNOWN, source, source, byte_size, NULL); + mc_mem_region_t region0 = mc_region_new_sparse( + MC_REGION_TYPE_UNKNOWN, source, source, byte_size); for(int i=0; i