Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
[mc] Compute a single hash (64 bits) of the current state
authorGabriel Corona <gabriel.corona@loria.fr>
Tue, 21 Jan 2014 11:25:30 +0000 (12:25 +0100)
committerGabriel Corona <gabriel.corona@loria.fr>
Fri, 7 Feb 2014 08:04:49 +0000 (09:04 +0100)
This is an attempt to speedup state comparison by using
a very fast first pass.

Compared to previous attempt:

* use a simple 64 bits djb2 hash instead of SHA-1;
* add has much info as possible into the hash;
* do not add anything suspicious;
* it could be used for efficient state comparison
  * index for hashtable,
  * cache-friendly data-structure.

CMakeLists.txt
buildtools/Cmake/DefinePackages.cmake
src/mc/mc_checkpoint.c
src/mc/mc_compare.c
src/mc/mc_dwarf.c
src/mc/mc_hash.c [new file with mode: 0644]
src/mc/mc_private.h
src/mc/mc_set.cpp [new file with mode: 0644]

index a21be2a..747d9fc 100644 (file)
@@ -4,7 +4,7 @@ if(WIN32)
   SET(CMAKE_RC_COMPILER "windres")
 endif()
 project(SimGrid C)
-if (enable_gtnets OR enable_ns3)
+if (enable_gtnets OR enable_ns3 OR enable_model-checking)
   enable_language(CXX)
 endif()
 if (NOT DEFINED enable_smpi OR enable_smpi) # smpi is enabled by default
index b9854a6..41b6fab 100644 (file)
@@ -525,6 +525,8 @@ set(MC_SRC
   src/mc/mc_state.c
   src/mc/memory_map.c
   src/mc/mc_pair.c
+  src/mc/mc_hash.c
+  src/mc/mc_set.cpp
   )
 
 set(headers_to_install
index d2efa6e..1d6c71d 100644 (file)
@@ -409,8 +409,7 @@ static xbt_dynar_t MC_get_local_variables_values(void *stack_context){
 
   unsigned int cursor = 0;
   dw_variable_t current_variable;
-  dw_location_entry_t entry = NULL;
-  int frame_found = 0, region_type;
+  int region_type;
   void *frame_pointer_address = NULL;
   long true_ip;
   int stop = 0;
@@ -438,27 +437,8 @@ static xbt_dynar_t MC_get_local_variables_values(void *stack_context){
     }
     
     true_ip = (long)frame->low_pc + (long)off;
-    frame_pointer_address = NULL;
-
-    /* Get frame pointer */
-    switch(frame->frame_base->type){
-    case e_dw_loclist:
-      cursor = 0;
-      while(cursor < xbt_dynar_length(frame->frame_base->location.loclist) && !frame_found){
-        entry = xbt_dynar_get_as(frame->frame_base->location.loclist, cursor, dw_location_entry_t);
-        if((true_ip >= entry->lowpc) && (true_ip < entry->highpc)){
-          frame_found = 1;
-          frame_pointer_address =  (void*) MC_dwarf_resolve_location(&c, entry->location, NULL);
-        }
-        cursor++;
-      }
-      break;
-    default :
-      frame_pointer_address = NULL; /* FIXME : implement other cases (with optimizations enabled)*/
-      break;
-    }
+    frame_pointer_address = mc_find_frame_base(true_ip, frame, &c);
 
-    frame_found = 0;
     cursor = 0;
 
     xbt_dynar_foreach(frame->variables, cursor, current_variable){
@@ -581,6 +561,11 @@ mc_snapshot_t MC_take_snapshot(int num_state){
 
   mc_snapshot_t snapshot = xbt_new0(s_mc_snapshot_t, 1);
   snapshot->nb_processes = xbt_swag_size(simix_global->process_list);
+  if(MC_USE_SNAPSHOT_HASH) {
+    snapshot->hash = mc_hash_processes_state(num_state);
+  } else {
+    snapshot->hash = 0;
+  }
 
   /* Save the std heap and the writable mapped pages of libsimgrid and binary */
   MC_get_memory_regions(snapshot);
index 6dda53a..5e8bb31 100644 (file)
@@ -4,6 +4,8 @@
 /* This program is free software; you can redistribute it and/or modify it
  * under the terms of the license (GNU LGPL) which comes with this package. */
 
+#include <inttypes.h>
+
 #include "mc_private.h"
 
 #include "xbt/mmalloc.h"
@@ -230,6 +232,7 @@ static int compare_areas_with_type(void *area1, void *area2, xbt_dict_t types, x
     break;
   case DW_TAG_structure_type:
     xbt_dynar_foreach(type->members, cursor, member){
+      XBT_DEBUG("Compare member %s", member->name);
       res = compare_areas_with_type((char *)area1 + member->offset, (char *)area2 + member->offset, types, other_types, member->dw_type_id, region_size, region_type, start_data, pointer_level);
       if(res == 1)
         return res;
@@ -338,6 +341,7 @@ static int compare_local_variables(mc_snapshot_stack_t stack1, mc_snapshot_stack
       }
       offset1 = (char *)current_var1->address - (char *)std_heap;
       offset2 = (char *)current_var2->address - (char *)std_heap;
+      XBT_DEBUG("Compare local variable %s of frame %s", current_var1->name, current_var1->frame);
       if(current_var1->region == 1)
         res = compare_areas_with_type( (char *)heap1 + offset1, (char *)heap2 + offset2, mc_libsimgrid_info->types, mc_binary_info->types, current_var1->type, 0, 1, start_data_libsimgrid, 0);
       else
@@ -390,10 +394,21 @@ int snapshot_compare(void *state1, void *state2){
     xbt_os_walltimer_start(timer);
   #endif
 
-  /* Compare size of stacks */
+  if(MC_USE_SNAPSHOT_HASH) {
+    if(s1->hash != s2->hash) {
+      XBT_VERB("(%d - %d) Different hash : 0x%" PRIx64 "--0x%" PRIx64, num1, num2, s1->hash, s2->hash);
+      return 1;
+    } else {
+      XBT_VERB("(%d - %d) Same hash : 0x%" PRIx64, num1, num2, s1->hash);
+    }
+  }
+
   int i = 0;
   size_t size_used1, size_used2;
   int is_diff = 0;
+
+
+  /* Compare size of stacks */
   while(i < xbt_dynar_length(s1->stacks)){
     size_used1 = s1->stack_sizes[i];
     size_used2 = s2->stack_sizes[i];
@@ -642,6 +657,10 @@ int snapshot_compare(void *state1, void *state2){
     print_comparison_times();
   #endif
 
+#ifdef MC_VERBOSE
+   if(errors==0)
+     XBT_VERB("(%d - %d) No difference found", num1, num2);
+#endif
   return errors > 0;
   
 }
index 84747d0..025f310 100644 (file)
@@ -948,7 +948,16 @@ void MC_dwarf_get_variables(mc_object_info_t info) {
   size_t length;
   while (dwarf_nextcu (dwarf, offset, &next_offset, &length, NULL, NULL, NULL) == 0) {
     Dwarf_Die die;
+
     if(dwarf_offdie(dwarf, offset+length, &die)!=NULL) {
+
+      // Skip C++ for now (we will add support for it soon):
+      int lang = dwarf_srclang(&die);
+      if((lang==DW_LANG_C_plus_plus) || (lang==DW_LANG_ObjC_plus_plus)) {
+        offset = next_offset;
+        continue;
+      }
+
       MC_dwarf_handle_die(info, &die, &die, NULL);
     }
     offset = next_offset;
diff --git a/src/mc/mc_hash.c b/src/mc/mc_hash.c
new file mode 100644 (file)
index 0000000..86847a9
--- /dev/null
@@ -0,0 +1,376 @@
+/* Copyright (c) 2014. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "mc_private.h"
+#include "mc/datatypes.h"
+#include <mc/mc.h>
+
+XBT_LOG_NEW_DEFAULT_SUBCATEGORY(mc_hash, mc,
+                                "Logging specific to mc_hash");
+
+// This is djb2:
+typedef uint64_t mc_hash_t;
+#define MC_HASH_INIT ((uint64_t)5381)
+
+// #define MC_HASH(hash, value) hash = (((hash << 5) + hash) + (uint64_t) value)
+#define MC_HASH(hash, value) \
+  { hash = (((hash << 5) + hash) + (uint64_t) value);\
+  XBT_DEBUG("%s:%i: %"PRIx64" -> %"PRIx64, __FILE__, __LINE__, (uint64_t) value, hash); }
+
+// ***** Hash state
+
+typedef struct s_mc_hashing_state {
+  // Set of pointers/addresses already processed (avoid loops):
+  mc_address_set_t handled_addresses;
+} mc_hashing_state;
+
+void mc_hash_state_init(mc_hashing_state* state);
+void mc_hash_state_destroy(mc_hashing_state* state);
+
+void mc_hash_state_init(mc_hashing_state* state) {
+  state->handled_addresses = mc_address_set_new();
+}
+
+void mc_hash_state_destroy(mc_hashing_state* state) {
+   mc_address_set_free(&state->handled_addresses);
+}
+
+// TODO, detect and avoid loops
+
+static bool mc_ignored(const void* address, size_t size) {
+  mc_heap_ignore_region_t region;
+  unsigned int cursor = 0;
+  const void* end = (char*) address + size;
+  xbt_dynar_foreach(mc_heap_comparison_ignore, cursor, region) {
+   void* istart = region->address;
+   void* iend   = (char*) region->address + region->size;
+
+   if(address>=istart && address<iend && end>=istart && end<iend)
+     return true;
+  }
+
+  return false;
+}
+
+static void mc_hash_binary(mc_hash_t *hash, const void* s, size_t len) {
+  const char* p = (const void*) s;
+  int i;
+  for(i=0; i!=len; ++i) {
+    MC_HASH(*hash, p[i]);
+  }
+}
+
+/** \brief Compute a hash for a given value of a given type
+ *
+ *  We try to be very conservative (do not hash too ambiguous things).
+ *
+ *  \param address address of the variable
+ *  \param type type of the variable
+ * */
+static void mc_hash_value(mc_hash_t* hash, mc_hashing_state* state, mc_object_info_t info, const void* address, dw_type_t type) {
+  top:
+
+  switch(type->type){
+  // Simple case, hash this has binary:
+  case DW_TAG_base_type:
+  case DW_TAG_enumeration_type:
+  {
+    if(mc_ignored(address, 1))
+      return;
+    mc_hash_binary(hash, address, type->byte_size);
+    return;
+  }
+
+  case DW_TAG_array_type:
+  {
+    if(mc_ignored(address, type->byte_size))
+      return;
+
+    long element_count = type->element_count;
+    dw_type_t subtype = xbt_dict_get_or_null(info->types, type->dw_type_id);
+    if(subtype==NULL) {
+      XBT_DEBUG("Hash array without subtype");
+      return;
+    }
+    int i;
+    for(i=0; i!=element_count; ++i) {
+      XBT_DEBUG("Hash array element %i", i);
+      void* subaddress = ((char*)address)+i*subtype->byte_size;
+      mc_hash_value(hash, state, info, subaddress, subtype);
+    }
+    return;
+  }
+
+  // Get the raw type:
+  case DW_TAG_typedef:
+  case DW_TAG_volatile_type:
+  case DW_TAG_const_type:
+  case DW_TAG_restrict_type:
+  {
+    if(type->dw_type_id==NULL) {
+      return;
+    }
+    type = xbt_dict_get_or_null(info->types, type->dw_type_id);
+    if(type==NULL)
+      return;
+    else
+      goto top;
+  }
+
+  case DW_TAG_structure_type:
+  case DW_TAG_class_type:
+  {
+    if(mc_ignored(address, type->byte_size))
+      return;
+
+    unsigned int cursor = 0;
+    dw_type_t member;
+    xbt_dynar_foreach(type->members, cursor, member){
+      XBT_DEBUG("Hash struct member %s", member->name);
+      dw_type_t subtype = xbt_dict_get_or_null(info->types, member->dw_type_id);
+      if(subtype==NULL)
+        return;
+       mc_hash_value(hash, state, info, ((char*)address) + member->offset, subtype);
+    }
+    return;
+  }
+
+  // Pointer, we hash a single value but it might be an array.
+  case DW_TAG_pointer_type:
+  case DW_TAG_reference_type:
+  case DW_TAG_rvalue_reference_type:
+  {
+    if(mc_ignored(address, 1))
+      return;
+
+    void* pointed = *(void**)address;
+    if(pointed==NULL) {
+      XBT_DEBUG("Hashed pinter is NULL");
+      return;
+    }
+
+    // Avoid loops:
+    if(mc_address_test(state->handled_addresses, pointed)) {
+      XBT_DEBUG("Hashed pointed data %p already hashed", pointed);
+      return;
+    }
+    mc_address_add(state->handled_addresses, pointed);
+
+    // Anything outside the R/W segments and the heap is not hashed:
+    bool valid_pointer = (pointed >= (void*) mc_binary_info->start_rw && pointed <= (void*) mc_binary_info->end_rw)
+        || (pointed >= (void*) mc_libsimgrid_info->start_rw && pointed <= (void*) mc_libsimgrid_info->end_rw)
+        || (pointed >= std_heap && pointed < (void*) ((const char*)std_heap + STD_HEAP_SIZE));
+    if(!valid_pointer) {
+      XBT_DEBUG("Hashed pointed data %p is in an ignored range", pointed);
+      return;
+    }
+
+    dw_type_t subtype = type->dw_type_id == NULL ? NULL : xbt_dict_get_or_null(info->types, type->dw_type_id);
+    if(subtype==NULL) {
+      XBT_DEBUG("Missing type for %p (type=%s)", pointed, type->dw_type_id);
+      return;
+    }
+
+    address = pointed;
+    type = subtype;
+    goto top;
+  }
+
+  // Skip this:
+  case DW_TAG_union_type:
+  case DW_TAG_subroutine_type:
+  default:
+    return;
+  }
+}
+
+
+static void mc_hash_object_globals(mc_hash_t *hash, mc_hashing_state* state, mc_object_info_t info) {
+  unsigned int cursor = 0;
+  dw_variable_t variable;
+  xbt_dynar_foreach(info->global_variables, cursor, variable) {
+    XBT_DEBUG("Hash global variable %s", variable->name);
+
+    if(variable->type_origin == NULL) {
+      // Nothing
+      continue;
+    }
+
+    dw_type_t type = xbt_dict_get_or_null(info->types, variable->type_origin);
+    if(type==NULL) {
+      // Nothing
+      continue;
+    }
+
+    const char* address = variable->address.address;
+    bool valid_pointer = (address >= mc_binary_info->start_rw && address <= mc_binary_info->end_rw)
+        || (address >= mc_libsimgrid_info->start_rw && address <= mc_libsimgrid_info->end_rw)
+        || (address >= (const char*) std_heap && address < (const char *)std_heap + STD_HEAP_SIZE);
+    if(!valid_pointer) continue;
+
+    mc_hash_value(hash, state, info, variable->address.address, type);
+  }
+}
+
+static void mc_hash_stack_frame(
+  mc_hash_t *hash,
+    mc_object_info_t info, unw_cursor_t* unw_cursor, dw_frame_t frame, char* frame_pointer, mc_hashing_state* state
+    ) {
+
+  // return; // TEMP
+
+  unsigned int cursor = 0;
+  dw_variable_t variable;
+  xbt_dynar_foreach(frame->variables, cursor, variable){
+
+    if(variable->type_origin==NULL) {
+      XBT_DEBUG("Hash local variable %s without type", variable->name);
+      continue;
+    }
+    if(variable->address.location == NULL) {
+      XBT_DEBUG("Hash local variable %s without location", variable->name);
+      continue;
+    }
+
+    XBT_DEBUG("Hash local variable %s", variable->name);
+
+    void* variable_address = (void*) MC_dwarf_resolve_location(unw_cursor, variable->address.location, frame_pointer);
+
+    dw_type_t type = xbt_dict_get_or_null(info->types, variable->type_origin);
+    if(type==NULL) {
+      XBT_DEBUG("Hash local variable %s without loctypeation", variable->name);
+      continue;
+    }
+
+    mc_hash_value(hash, state, info, variable_address, type);
+  }
+}
+
+/** \brief Find the frame base of a given frame
+ *
+ *  \param ip         Instruction pointer
+ *  \param frame
+ *  \param unw_cursor
+ */
+void* mc_find_frame_base(unw_word_t ip, dw_frame_t frame, unw_cursor_t* unw_cursor) {
+  switch(frame->frame_base->type) {
+  case e_dw_loclist:
+  {
+    int loclist_cursor;
+    for(loclist_cursor=0; loclist_cursor < xbt_dynar_length(frame->frame_base->location.loclist); loclist_cursor++){
+      dw_location_entry_t entry = xbt_dynar_get_as(frame->frame_base->location.loclist, loclist_cursor, dw_location_entry_t);
+      if((ip >= entry->lowpc) && (ip < entry->highpc)){
+        return (void*) MC_dwarf_resolve_location(unw_cursor, entry->location, NULL);
+      }
+    }
+    return NULL;
+  }
+  // Not handled:
+  default:
+    return NULL;
+  }
+}
+
+static void mc_hash_stack(mc_hash_t *hash, stack_region_t stack, mc_hashing_state* state) {
+
+  unw_cursor_t cursor;
+  if(unw_init_local(&cursor, (unw_context_t *)stack->context)){
+    xbt_die("unw_init_local failed");
+  }
+
+  MC_HASH(*hash, (long)stack->address);
+
+  long count = 0;
+
+  int ret;
+  for(ret=1; ret >= 0; ret = unw_step(&cursor)) {
+
+    // Find the frame name:
+    unw_word_t off;
+    char frame_name[256];
+    if(unw_get_proc_name(&cursor, frame_name, sizeof (frame_name), &off)!=0) {
+      continue;
+    }
+
+    XBT_DEBUG("Frame #%i %s", (int) count, frame_name);
+
+    // Stop before context switch with maestro
+    if(!strcmp(frame_name, "smx_ctx_sysv_wrapper")) {
+      break;
+    }
+
+    ++count;
+
+    unw_word_t ip, sp;
+    if(unw_get_reg(&cursor, UNW_REG_IP, &ip))
+      continue;
+    if(unw_get_reg(&cursor, UNW_REG_SP, &sp))
+      continue;
+
+    MC_HASH(*hash, ip);
+
+    // Find the object info:
+    mc_object_info_t info;
+    if((long)ip >= (long) mc_libsimgrid_info->start_exec && (long)ip < (long) mc_libsimgrid_info->end_exec)
+      info = mc_libsimgrid_info;
+    else if((long)ip >= (long) mc_binary_info->start_exec && (long)ip < (long) mc_binary_info->end_exec)
+      info = mc_binary_info;
+    else
+      continue;
+
+    // Find the frame:
+    dw_frame_t frame = xbt_dict_get_or_null(info->local_variables, frame_name);
+    if(frame==NULL)
+      continue;
+
+    long true_ip = (long)frame->low_pc + (long)off;
+
+    // Find the fame base:
+    void* frame_base = mc_find_frame_base(true_ip, frame, &cursor);
+    if(frame_base==NULL)
+      continue;
+
+    mc_hash_stack_frame(hash, info, &cursor, frame, frame_base, state);
+  }
+
+  MC_HASH(*hash, count);
+}
+
+static void mc_hash_stacks(mc_hash_t *hash, mc_hashing_state* state) {
+  unsigned int cursor = 0;
+  stack_region_t current_stack;
+
+  MC_HASH(*hash, xbt_dynar_length(stacks_areas));
+
+  int i=0;
+  xbt_dynar_foreach(stacks_areas, cursor, current_stack){
+    XBT_DEBUG("Stack %i", i);
+    mc_hash_stack(hash, current_stack, state);
+    ++i;
+  }
+}
+
+uint64_t mc_hash_processes_state(int num_state) {
+  XBT_DEBUG("START hash %i", num_state);
+
+  mc_hashing_state state;
+  mc_hash_state_init(&state);
+
+  mc_hash_t hash = MC_HASH_INIT;
+
+  MC_HASH(hash, xbt_swag_size(simix_global->process_list)); // process count
+  mc_hash_object_globals(&hash, &state, mc_binary_info);
+  // mc_hash_object_globals(&hash, &state, mc_libsimgrid_info);
+  mc_hash_stacks(&hash, &state);
+
+  mc_hash_state_destroy(&state);
+
+  XBT_DEBUG("END hash %i", num_state);
+  return hash;
+}
index 85ef10f..bf1cf11 100644 (file)
@@ -45,6 +45,7 @@ typedef struct s_mc_snapshot{
   size_t *stack_sizes;
   xbt_dynar_t stacks;
   xbt_dynar_t to_ignore;
+  uint64_t hash;
   char hash_global[41];
   char hash_local[41];
 } s_mc_snapshot_t, *mc_snapshot_t;
@@ -438,6 +439,7 @@ void MC_dwarf_register_variable(mc_object_info_t info, dw_frame_t frame, dw_vari
 /********************************** DWARF **********************************/
 
 Dwarf_Off MC_dwarf_resolve_location(unw_cursor_t* c, dw_location_t location, void* frame_pointer_address);
+void* mc_find_frame_base(unw_word_t ip, dw_frame_t frame, unw_cursor_t* unw_cursor);
 
 /********************************** Miscellaneous **********************************/
 
@@ -469,5 +471,20 @@ extern xbt_dynar_t communications_pattern;
 
 void get_comm_pattern(xbt_dynar_t communications_pattern, smx_simcall_t request, int call);
 
+/* *********** Sets *********** */
+
+typedef struct s_mc_address_set *mc_address_set_t;
+
+mc_address_set_t mc_address_set_new();
+mc_address_set_t mc_address_set_free(mc_address_set_t* p);
+void mc_address_add(mc_address_set_t p, const void* value);
+bool mc_address_test(mc_address_set_t p, const void* value);
+
+/* *********** Hash *********** */
+
+uint64_t mc_hash_processes_state(int num_state);
+
+#define MC_USE_SNAPSHOT_HASH 1
+
 #endif
 
diff --git a/src/mc/mc_set.cpp b/src/mc/mc_set.cpp
new file mode 100644 (file)
index 0000000..b04c1ff
--- /dev/null
@@ -0,0 +1,36 @@
+/* Copyright (c) 2007-2013. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#include <stddef.h>
+#include <set>
+
+typedef std::set<const void*>*  mc_address_set_t;
+
+extern "C" {
+
+mc_address_set_t mc_address_set_new();
+mc_address_set_t mc_address_set_free(mc_address_set_t* p);
+void mc_address_add(mc_address_set_t p, const void* value);
+bool mc_address_test(mc_address_set_t p, const void* value);
+
+mc_address_set_t mc_address_set_new() {
+  return new std::set<const void*>();
+}
+
+mc_address_set_t mc_address_set_free(mc_address_set_t* p) {
+  delete *p;
+  *p = NULL;
+}
+
+void mc_address_add(mc_address_set_t p, const void* value) {
+  p->insert(value);
+}
+
+bool mc_address_test(mc_address_set_t p, const void* value) {
+  return p->find(value) != p->end();
+}
+
+};