X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/b40c2e9bf3cf6e8b2fad15594852ce186bf99574..39d3a0153b242aa016bdc528c9268a958bcd98d2:/src/mc/mc_checkpoint.c diff --git a/src/mc/mc_checkpoint.c b/src/mc/mc_checkpoint.c index 211d99dd31..b21f190862 100644 --- a/src/mc/mc_checkpoint.c +++ b/src/mc/mc_checkpoint.c @@ -7,21 +7,31 @@ #define _GNU_SOURCE #define UNW_LOCAL_ONLY +#include + #include #include +#include + +#include "internal_config.h" #include "mc_private.h" #include "xbt/module.h" #include #include "../smpi/private.h" +#include #include "xbt/mmalloc/mmprivate.h" #include "../simix/smx_private.h" +#define UNW_LOCAL_ONLY #include #include #include "mc_private.h" +#include + +#include "mc_mmu.h" XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_checkpoint, mc, "Logging specific to mc_checkpoint"); @@ -56,25 +66,33 @@ static void local_variable_free_voidp(void *v) local_variable_free((local_variable_t) * (void **) v); } -static void MC_region_destroy(mc_mem_region_t reg) +void MC_region_destroy(mc_mem_region_t reg) { + if (!reg) + return; + //munmap(reg->data, reg->size); xbt_free(reg->data); + if (reg->page_numbers) { + mc_free_page_snapshot_region(reg->page_numbers, mc_page_count(reg->size)); + } xbt_free(reg); } void MC_free_snapshot(mc_snapshot_t snapshot) { unsigned int i; - for (i = 0; i < NB_REGIONS; i++) + for (i = 0; i < NB_REGIONS; i++) { MC_region_destroy(snapshot->regions[i]); + } xbt_free(snapshot->stack_sizes); xbt_dynar_free(&(snapshot->stacks)); xbt_dynar_free(&(snapshot->to_ignore)); + xbt_dynar_free(&snapshot->ignored_data); if (snapshot->privatization_regions) { - size_t n = snapshot->nb_processes; + size_t n = xbt_dynar_length(snapshot->enabled_processes); for (i = 0; i != n; ++i) { MC_region_destroy(snapshot->privatization_regions[i]); } @@ -84,71 +102,112 @@ void MC_free_snapshot(mc_snapshot_t snapshot) xbt_free(snapshot); } - /******************************* Snapshot regions ********************************/ /*********************************************************************************/ -static mc_mem_region_t MC_region_new(int type, void *start_addr, size_t size) +static mc_mem_region_t mc_region_new_dense(int type, void *start_addr, void* permanent_addr, size_t size, mc_mem_region_t ref_reg) { mc_mem_region_t new_reg = xbt_new(s_mc_mem_region_t, 1); new_reg->start_addr = start_addr; + new_reg->permanent_addr = permanent_addr; + new_reg->data = NULL; new_reg->size = size; - //new_reg->data = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - //if(new_reg->data==MAP_FAILED) - //xbt_die("Could not mmap new memory for snapshot."); + new_reg->page_numbers = NULL; new_reg->data = xbt_malloc(size); - memcpy(new_reg->data, start_addr, size); - //madvise(new_reg->data, size, MADV_MERGEABLE); - + memcpy(new_reg->data, permanent_addr, size); XBT_DEBUG("New region : type : %d, data : %p (real addr %p), size : %zu", - type, new_reg->data, start_addr, size); - + type, new_reg->data, permanent_addr, size); return new_reg; + } -static void MC_region_restore(mc_mem_region_t reg) +/** @brief Take a snapshot of a given region + * + * @param type + * @param start_addr Address of the region in the simulated process + * @param permanent_addr Permanent address of this data (for privatized variables, this is the virtual address of the privatized mapping) + * @param size Size of the data* + * @param ref_reg Reference corresponding region + */ +static mc_mem_region_t MC_region_new(int type, void *start_addr, void* permanent_addr, size_t size, mc_mem_region_t ref_reg) { - /*FIXME: check if start_addr is still mapped, if it is not, then map it - before copying the data */ + if (_sg_mc_sparse_checkpoint) { + return mc_region_new_sparse(type, start_addr, permanent_addr, size, ref_reg); + } else { + return mc_region_new_dense(type, start_addr, permanent_addr, size, ref_reg); + } +} - memcpy(reg->start_addr, reg->data, reg->size); +/** @brief Restore a region from a snapshot + * + * If we are using per page snapshots, it is possible to use the reference + * region in order to do an incremental restoration of the region: the + * softclean pages which are shared between the two snapshots do not need + * to be restored. + * + * @param reg Target region + * @param reg_reg Current region (if not NULL), used for lazy per page restoration + */ +static void MC_region_restore(mc_mem_region_t reg, mc_mem_region_t ref_reg) +{ + /*FIXME: check if start_addr is still mapped, if it is not, then map it + before copying the data */ + if (!reg->page_numbers) { + memcpy(reg->permanent_addr, reg->data, reg->size); + } else { + mc_region_restore_sparse(reg, ref_reg); + } return; } static void MC_snapshot_add_region(mc_snapshot_t snapshot, int type, - void *start_addr, size_t size) + void *start_addr, void* permanent_addr, size_t size) + { - mc_mem_region_t new_reg = MC_region_new(type, start_addr, size); + mc_mem_region_t ref_reg = + mc_model_checker->parent_snapshot ? mc_model_checker->parent_snapshot->regions[type] : NULL; + mc_mem_region_t new_reg = MC_region_new(type, start_addr, permanent_addr, size, ref_reg); snapshot->regions[type] = new_reg; return; } static void MC_get_memory_regions(mc_snapshot_t snapshot) { - size_t i; - void *start_heap = ((xbt_mheap_t) std_heap)->base; - void *end_heap = ((xbt_mheap_t) std_heap)->breakval; - MC_snapshot_add_region(snapshot, 0, start_heap, + void *start_heap = std_heap->base; + void *end_heap = std_heap->breakval; + MC_snapshot_add_region(snapshot, 0, start_heap, start_heap, (char *) end_heap - (char *) start_heap); snapshot->heap_bytes_used = mmalloc_get_bytes_used(std_heap); + snapshot->privatization_regions = NULL; - MC_snapshot_add_region(snapshot, 1, mc_libsimgrid_info->start_rw, - mc_libsimgrid_info->end_rw - - mc_libsimgrid_info->start_rw); - if (!smpi_privatize_global_variables) { - MC_snapshot_add_region(snapshot, 2, mc_binary_info->start_rw, - mc_binary_info->end_rw - mc_binary_info->start_rw); - snapshot->privatization_regions = NULL; - snapshot->privatization_index = -1; - } else { + MC_snapshot_add_region(snapshot, 1, + mc_libsimgrid_info->start_rw, mc_libsimgrid_info->start_rw, + mc_libsimgrid_info->end_rw - mc_libsimgrid_info->start_rw); + +#ifdef HAVE_SMPI + size_t i; + if (smpi_privatize_global_variables && smpi_process_count()) { + // Snapshot the global variable of the application separately for each + // simulated process: snapshot->privatization_regions = - xbt_new(mc_mem_region_t, SIMIX_process_count()); - for (i = 0; i < SIMIX_process_count(); i++) { + xbt_new(mc_mem_region_t, smpi_process_count()); + for (i = 0; i < smpi_process_count(); i++) { + mc_mem_region_t ref_reg = + mc_model_checker->parent_snapshot ? mc_model_checker->parent_snapshot->privatization_regions[i] : NULL; snapshot->privatization_regions[i] = - MC_region_new(-1, mappings[i], size_data_exe); + MC_region_new(-1, mc_binary_info->start_rw, smpi_privatisation_regions[i].address, size_data_exe, ref_reg); } - snapshot->privatization_index = loaded_page; + snapshot->privatization_index = smpi_loaded_page; + snapshot->regions[2] = NULL; + } else +#endif + { + MC_snapshot_add_region(snapshot, 2, + mc_binary_info->start_rw, mc_binary_info->start_rw, + mc_binary_info->end_rw - mc_binary_info->start_rw); + snapshot->privatization_regions = NULL; + snapshot->privatization_index = -1; } } @@ -189,50 +248,6 @@ void MC_init_memory_map_info() } -/** \brief Fill/lookup the "subtype" field. - */ -static void MC_resolve_subtype(mc_object_info_t info, dw_type_t type) -{ - - if (type->dw_type_id == NULL) - return; - type->subtype = xbt_dict_get_or_null(info->types, type->dw_type_id); - if (type->subtype == NULL) - return; - if (type->subtype->byte_size != 0) - return; - if (type->subtype->name == NULL) - return; - // Try to find a more complete description of the type: - // We need to fix in order to support C++. - - dw_type_t subtype = - xbt_dict_get_or_null(info->full_types_by_name, type->subtype->name); - if (subtype != NULL) { - type->subtype = subtype; - } - -} - -void MC_post_process_types(mc_object_info_t info) -{ - xbt_dict_cursor_t cursor = NULL; - char *origin; - dw_type_t type; - - // Lookup "subtype" field: - xbt_dict_foreach(info->types, cursor, origin, type) { - MC_resolve_subtype(info, type); - - dw_type_t member; - unsigned int i = 0; - if (type->members != NULL) - xbt_dynar_foreach(type->members, i, member) { - MC_resolve_subtype(info, member); - } - } -} - /** \brief Fills the position of the segments (executable, read-only, read/write). * * TODO, use dl_iterate_phdr to be more robust @@ -304,7 +319,7 @@ static bool mc_valid_variable(dw_variable_t var, dw_frame_t scope, } static void mc_fill_local_variables_values(mc_stack_frame_t stack_frame, - dw_frame_t scope, xbt_dynar_t result) + dw_frame_t scope, int process_index, xbt_dynar_t result) { void *ip = (void *) stack_frame->ip; if (ip < scope->low_pc || ip >= scope->high_pc) @@ -333,12 +348,22 @@ static void mc_fill_local_variables_values(mc_stack_frame_t stack_frame, if (current_variable->address != NULL) { new_var->address = current_variable->address; } else if (current_variable->locations.size != 0) { - new_var->address = - (void *) mc_dwarf_resolve_locations(¤t_variable->locations, + s_mc_location_t location; + mc_dwarf_resolve_locations(&location, ¤t_variable->locations, current_variable->object_info, &(stack_frame->unw_cursor), (void *) stack_frame->frame_base, - NULL); + NULL, process_index); + + switch(mc_get_location_type(&location)) { + case MC_LOCATION_TYPE_ADDRESS: + new_var->address = location.memory_location; + break; + case MC_LOCATION_TYPE_REGISTER: + default: + xbt_die("Cannot handle non-address variable"); + } + } else { xbt_die("No address"); } @@ -349,11 +374,11 @@ static void mc_fill_local_variables_values(mc_stack_frame_t stack_frame, // Recursive processing of nested scopes: dw_frame_t nested_scope = NULL; xbt_dynar_foreach(scope->scopes, cursor, nested_scope) { - mc_fill_local_variables_values(stack_frame, nested_scope, result); + mc_fill_local_variables_values(stack_frame, nested_scope, process_index, result); } } -static xbt_dynar_t MC_get_local_variables_values(xbt_dynar_t stack_frames) +static xbt_dynar_t MC_get_local_variables_values(xbt_dynar_t stack_frames, int process_index) { unsigned cursor1 = 0; @@ -362,7 +387,7 @@ static xbt_dynar_t MC_get_local_variables_values(xbt_dynar_t stack_frames) xbt_dynar_new(sizeof(local_variable_t), local_variable_free_voidp); xbt_dynar_foreach(stack_frames, cursor1, stack_frame) { - mc_fill_local_variables_values(stack_frame, stack_frame->frame, variables); + mc_fill_local_variables_values(stack_frame, stack_frame->frame, process_index, variables); } return variables; @@ -440,7 +465,7 @@ static xbt_dynar_t MC_unwind_stack_frames(void *stack_context) return result; }; -static xbt_dynar_t MC_take_snapshot_stacks(mc_snapshot_t * snapshot, void *heap) +static xbt_dynar_t MC_take_snapshot_stacks(mc_snapshot_t * snapshot) { xbt_dynar_t res = @@ -453,21 +478,16 @@ static xbt_dynar_t MC_take_snapshot_stacks(mc_snapshot_t * snapshot, void *heap) xbt_dynar_foreach(stacks_areas, cursor, current_stack) { mc_snapshot_stack_t st = xbt_new(s_mc_snapshot_stack_t, 1); st->stack_frames = MC_unwind_stack_frames(current_stack->context); - st->local_variables = MC_get_local_variables_values(st->stack_frames); + st->local_variables = MC_get_local_variables_values(st->stack_frames, current_stack->process_index); + st->process_index = current_stack->process_index; unw_word_t sp = xbt_dynar_get_as(st->stack_frames, 0, mc_stack_frame_t)->sp; - st->stack_pointer = - ((char *) heap + (size_t) (((char *) ((long) sp) - (char *) std_heap))); - st->real_address = current_stack->address; xbt_dynar_push(res, &st); (*snapshot)->stack_sizes = xbt_realloc((*snapshot)->stack_sizes, (cursor + 1) * sizeof(size_t)); (*snapshot)->stack_sizes[cursor] = - current_stack->size - ((char *) st->stack_pointer - - (char *) ((char *) heap + - ((char *) current_stack->address - - (char *) std_heap))); + (char*) current_stack->address + current_stack->size - (char*) sp; } return res; @@ -501,53 +521,158 @@ static xbt_dynar_t MC_take_snapshot_ignore() } -static void MC_dump_checkpoint_ignore(mc_snapshot_t snapshot) +static void mc_free_snapshot_ignored_data_pvoid(void* data) { + mc_snapshot_ignored_data_t ignored_data = (mc_snapshot_ignored_data_t) data; + free(ignored_data->data); +} + +static void MC_snapshot_handle_ignore(mc_snapshot_t snapshot) { + snapshot->ignored_data = xbt_dynar_new(sizeof(s_mc_snapshot_ignored_data_t), mc_free_snapshot_ignored_data_pvoid); + // Copy the memory: unsigned int cursor = 0; mc_checkpoint_ignore_region_t region; - size_t offset; - - xbt_dynar_foreach(mc_checkpoint_ignore, cursor, region) { - if (region->addr > snapshot->regions[0]->start_addr - && (char *) (region->addr) < - (char *) snapshot->regions[0]->start_addr + STD_HEAP_SIZE) { - offset = - (char *) region->addr - (char *) snapshot->regions[0]->start_addr; - memset((char *) snapshot->regions[0]->data + offset, 0, region->size); - } else if (region->addr > snapshot->regions[2]->start_addr - && (char *) (region->addr) < - (char *) snapshot->regions[2]->start_addr + - snapshot->regions[2]->size) { - offset = - (char *) region->addr - (char *) snapshot->regions[2]->start_addr; - memset((char *) snapshot->regions[2]->data + offset, 0, region->size); - } else if (region->addr > snapshot->regions[1]->start_addr - && (char *) (region->addr) < - (char *) snapshot->regions[1]->start_addr + - snapshot->regions[1]->size) { - offset = - (char *) region->addr - (char *) snapshot->regions[1]->start_addr; - memset((char *) snapshot->regions[1]->data + offset, 0, region->size); + xbt_dynar_foreach (mc_checkpoint_ignore, cursor, region) { + s_mc_snapshot_ignored_data_t ignored_data; + ignored_data.start = region->addr; + ignored_data.size = region->size; + ignored_data.data = malloc(region->size); + memcpy(ignored_data.data, region->addr, region->size); + xbt_dynar_push(snapshot->ignored_data, &ignored_data); + } + + // Zero the memory: + xbt_dynar_foreach (mc_checkpoint_ignore, cursor, region) { + memset(region->addr, 0, region->size); + } + +} + +static void MC_snapshot_ignore_restore(mc_snapshot_t snapshot) +{ + unsigned int cursor = 0; + s_mc_snapshot_ignored_data_t ignored_data; + xbt_dynar_foreach (snapshot->ignored_data, cursor, ignored_data) { + memcpy(ignored_data.start, ignored_data.data, ignored_data.size); + } +} + +/** @brief Can we remove this snapshot? + * + * Some snapshots cannot be removed (yet) because we need them + * at this point. + * + * @param snapshot + */ +int mc_important_snapshot(mc_snapshot_t snapshot) +{ + // We need this snapshot in order to know which + // pages needs to be stored in the next snapshot. + // This field is only non-NULL when using soft-dirty + // page tracking. + if (snapshot == mc_model_checker->parent_snapshot) + return true; + + return false; +} + +static void MC_get_current_fd(mc_snapshot_t snapshot){ + + snapshot->total_fd = 0; + + const size_t fd_dir_path_size = 20; + char fd_dir_path[fd_dir_path_size]; + if (snprintf(fd_dir_path, fd_dir_path_size, + "/proc/%lli/fd", (long long int) getpid()) > fd_dir_path_size) + xbt_die("Unexpected buffer is too small for fd_dir_path"); + + DIR* fd_dir = opendir (fd_dir_path); + if (fd_dir == NULL) + xbt_die("Cannot open directory '/proc/self/fd'\n"); + + size_t total_fd = 0; + struct dirent* fd_number; + while ((fd_number = readdir(fd_dir))) { + + int fd_value = atoi(fd_number->d_name); + + if(fd_value < 3) + continue; + + const size_t source_size = 25; + char source[25]; + if (snprintf(source, source_size, "/proc/self/fd/%s", fd_number->d_name) > source_size) + xbt_die("Unexpected buffer is too small for fd %s", fd_number->d_name); + + const size_t link_size = 200; + char link[200]; + size_t res = readlink(source, link, link_size); + if (res<0) { + xbt_die("Could not read link for %s", source); + } + if (res==200) { + xbt_die("Buffer to small for link of %s", source); } + link[res] = '\0'; + + if(smpi_is_privatisation_file(link)) + continue; + + // This is (probably) the DIR* we are reading: + // TODO, read all the file entries at once and close the DIR.* + if(strcmp(fd_dir_path, link) == 0) + continue; + + // We don't handle them. + // It does not mean we should silently ignore them however. + if (strncmp(link, "pipe:", 5) == 0 || strncmp(link, "socket:", 7) == 0) + continue; + + // This is probably a shared memory used by lttng-ust: + if(strncmp("/dev/shm/ust-shm-tmp-", link, 21)==0) + continue; + + // Add an entry for this FD in the snapshot: + fd_infos_t fd = xbt_new0(s_fd_infos_t, 1); + fd->filename = strdup(link); + fd->number = fd_value; + fd->flags = fcntl(fd_value, F_GETFL) | fcntl(fd_value, F_GETFD) ; + fd->current_position = lseek(fd_value, 0, SEEK_CUR); + snapshot->current_fd = xbt_realloc(snapshot->current_fd, (total_fd + 1) * sizeof(fd_infos_t)); + snapshot->current_fd[total_fd] = fd; + total_fd++; } + snapshot->total_fd = total_fd; + closedir (fd_dir); } mc_snapshot_t MC_take_snapshot(int num_state) { mc_snapshot_t snapshot = xbt_new0(s_mc_snapshot_t, 1); - snapshot->nb_processes = xbt_swag_size(simix_global->process_list); + snapshot->enabled_processes = xbt_dynar_new(sizeof(int), NULL); + smx_process_t process; + xbt_swag_foreach(process, simix_global->process_list) { + xbt_dynar_push_as(snapshot->enabled_processes, int, (int)process->pid); + } + + MC_snapshot_handle_ignore(snapshot); + + MC_get_current_fd(snapshot); /* Save the std heap and the writable mapped pages of libsimgrid and binary */ MC_get_memory_regions(snapshot); + if (_sg_mc_sparse_checkpoint && _sg_mc_soft_dirty) { + mc_softdirty_reset(); + } snapshot->to_ignore = MC_take_snapshot_ignore(); if (_sg_mc_visited > 0 || strcmp(_sg_mc_property_file, "")) { snapshot->stacks = - MC_take_snapshot_stacks(&snapshot, snapshot->regions[0]->data); + MC_take_snapshot_stacks(&snapshot); if (_sg_mc_hash && snapshot->stacks != NULL) { snapshot->hash = mc_hash_processes_state(num_state, snapshot->stacks); } else { @@ -557,83 +682,75 @@ mc_snapshot_t MC_take_snapshot(int num_state) snapshot->hash = 0; } - if (num_state > 0) - MC_dump_checkpoint_ignore(snapshot); - - // mprotect the region after zero-ing ignored parts: - /*size_t i; - for(i=0; i!=NB_REGIONS; ++i) { - mc_mem_region_t region = snapshot->regions[i]; - mprotect(region->data, region->size, PROT_READ); - } */ - + MC_snapshot_ignore_restore(snapshot); + if (_sg_mc_sparse_checkpoint && _sg_mc_soft_dirty) { + mc_model_checker->parent_snapshot = snapshot; + } return snapshot; - } void MC_restore_snapshot(mc_snapshot_t snapshot) { + mc_snapshot_t parent_snapshot = mc_model_checker->parent_snapshot; + + int new_fd; unsigned int i; for (i = 0; i < NB_REGIONS; i++) { // For privatized, variables we decided it was not necessary to take the snapshot: if (snapshot->regions[i]) - MC_region_restore(snapshot->regions[i]); + MC_region_restore(snapshot->regions[i], + parent_snapshot ? parent_snapshot->regions[i] : NULL); } +#ifdef HAVE_SMPI if (snapshot->privatization_regions) { - for (i = 0; i < SIMIX_process_count(); i++) { + // Restore the global variables of the application separately for each + // simulated process: + for (i = 0; i < smpi_process_count(); i++) { if (snapshot->privatization_regions[i]) { - MC_region_restore(snapshot->privatization_regions[i]); + MC_region_restore(snapshot->privatization_regions[i], + parent_snapshot ? parent_snapshot->privatization_regions[i] : NULL); } } - switch_data_segment(snapshot->privatization_index); } -} - -void *mc_translate_address(uintptr_t addr, mc_snapshot_t snapshot) -{ - - // If not in a process state/clone: - if (!snapshot) { - return (uintptr_t *) addr; + if(snapshot->privatization_index >= 0) { + // We just rewrote the global variables. + // The privatisation segment SMPI thinks + // is mapped might be inconsistent with the segment which + // is really mapped in memory (kernel state). + // We ask politely SMPI to map the segment anyway, + // even if it thinks it is the current one: + smpi_really_switch_data_segment(snapshot->privatization_index); } - // If it is in a snapshot: - for (size_t i = 0; i != NB_REGIONS; ++i) { - mc_mem_region_t region = snapshot->regions[i]; - uintptr_t start = (uintptr_t) region->start_addr; - uintptr_t end = start + region->size; - - // The address is in this region: - if (addr >= start && addr < end) { - uintptr_t offset = addr - start; - return (void *) ((uintptr_t) region->data + offset); +#endif + + for(i=0; i < snapshot->total_fd; i++){ + + new_fd = open(snapshot->current_fd[i]->filename, snapshot->current_fd[i]->flags); + if (new_fd <0) { + xbt_die("Could not reopen the file %s fo restoring the file descriptor", + snapshot->current_fd[i]->filename); } - + if(new_fd != -1 && new_fd != snapshot->current_fd[i]->number){ + dup2(new_fd, snapshot->current_fd[i]->number); + //fprintf(stderr, "%p\n", fdopen(snapshot->current_fd[i]->number, "rw")); + close(new_fd); + }; + lseek(snapshot->current_fd[i]->number, snapshot->current_fd[i]->current_position, SEEK_SET); } - // It is not in a snapshot: - return (void *) addr; -} - -uintptr_t mc_untranslate_address(void *addr, mc_snapshot_t snapshot) -{ - if (!snapshot) { - return (uintptr_t) addr; + if (_sg_mc_sparse_checkpoint && _sg_mc_soft_dirty) { + mc_softdirty_reset(); } - for (size_t i = 0; i != NB_REGIONS; ++i) { - mc_mem_region_t region = snapshot->regions[i]; - if (addr >= region->data - && addr <= (void *) (((char *) region->data) + region->size)) { - size_t offset = (size_t) ((char *) addr - (char *) region->data); - return ((uintptr_t) region->start_addr) + offset; - } + MC_snapshot_ignore_restore(snapshot); + if (_sg_mc_sparse_checkpoint && _sg_mc_soft_dirty) { + mc_model_checker->parent_snapshot = snapshot; } - return (uintptr_t) addr; } -mc_snapshot_t SIMIX_pre_mc_snapshot(smx_simcall_t simcall) +mc_snapshot_t simcall_HANDLER_mc_snapshot(smx_simcall_t simcall) { return MC_take_snapshot(1); }