Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
int -> bool for replay control
[simgrid.git] / src / smpi / smpi_global.cpp
index 1da6308..e2bc344 100644 (file)
@@ -5,6 +5,7 @@
  * under the terms of the license (GNU LGPL) which comes with this package. */
 
 #include "private.h"
+#include "private.hpp"
 #include "smpi_mpi_dt_private.h"
 #include "mc/mc.h"
 #include "src/mc/mc_record.h"
 #include "simgrid/sg_config.h"
 #include "src/mc/mc_replay.h"
 #include "src/msg/msg_private.h"
+#include "src/simix/SynchroComm.hpp"
+
 
 #include <float.h>              /* DBL_MAX */
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <fstream>
 
 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_kernel, smpi, "Logging specific to SMPI (kernel)");
+#include <boost/tokenizer.hpp>
+#include <boost/algorithm/string.hpp> /* trim_right / trim_left */
+
+std::unordered_map<std::string, double> location2speedup;
 
 typedef struct s_smpi_process_data {
   double simulated;
@@ -38,9 +46,10 @@ typedef struct s_smpi_process_data {
   char state;
   int sampling;                 /* inside an SMPI_SAMPLE_ block? */
   char* instance_id;
-  int replaying;                /* is the process replaying a trace */
+  bool replaying;                /* is the process replaying a trace */
   xbt_bar_t finalization_barrier;
   int return_value;
+  smpi_trace_call_location_t* trace_call_loc;
 } s_smpi_process_data_t;
 
 static smpi_process_data_t *process_data = NULL;
@@ -50,7 +59,6 @@ int* index_to_process_data = NULL;
 extern double smpi_total_benched_time;
 xbt_os_timer_t global_timer;
 MPI_Comm MPI_COMM_WORLD = MPI_COMM_UNINITIALIZED;
-
 MPI_Errhandler *MPI_ERRORS_RETURN = NULL;
 MPI_Errhandler *MPI_ERRORS_ARE_FATAL = NULL;
 MPI_Errhandler *MPI_ERRHANDLER_NULL = NULL;
@@ -78,7 +86,7 @@ void smpi_process_init(int *argc, char ***argv)
   if (argc && argv) {
     proc = SIMIX_process_self();
     //FIXME: dirty cleanup method to avoid using msg cleanup functions on these processes when using MSG+SMPI
-    SIMIX_process_set_cleanup_function(proc, SIMIX_process_cleanup);
+    SIMIX_process_set_cleanup_function(proc, MSG_process_cleanup_from_SIMIX);
     char* instance_id = (*argv)[1];
     int rank = xbt_str_parse_int((*argv)[2], "Invalid rank: %s");
     index = smpi_process_index_of_smx_process(proc);
@@ -102,7 +110,7 @@ void smpi_process_init(int *argc, char ***argv)
     if(temp_bar != NULL) data->finalization_barrier = temp_bar;
     data->index       = index;
     data->instance_id = instance_id;
-    data->replaying   = 0;
+    data->replaying   = false;
     //xbt_free(simcall_process_get_data(proc));
 
     simdata_process_t simdata = static_cast<simdata_process_t>(simcall_process_get_data(proc));
@@ -177,17 +185,17 @@ void smpi_process_mark_as_initialized(void)
     process_data[index_to_process_data[index]]->state = SMPI_INITIALIZED;
 }
 
-void smpi_process_set_replaying(int value){
+void smpi_process_set_replaying(bool value){
   int index = smpi_process_index();
   if ((index != MPI_UNDEFINED) && (process_data[index_to_process_data[index]]->state != SMPI_FINALIZED))
     process_data[index_to_process_data[index]]->replaying = value;
 }
 
-int smpi_process_get_replaying(){
+bool smpi_process_get_replaying(){
   int index = smpi_process_index();
   if (index != MPI_UNDEFINED)
     return process_data[index_to_process_data[index]]->replaying;
-  else return _xbt_replay_is_active();
+  else return (_xbt_replay_is_active() != 0);
 }
 
 int smpi_global_size(void)
@@ -226,6 +234,18 @@ int smpi_process_count(void)
   return process_count;
 }
 
+/**
+ * \brief Returns a structure that stores the location (filename + linenumber)
+ *        of the last calls to MPI_* functions.
+ *
+ * \see smpi_trace_set_call_location
+ */
+smpi_trace_call_location_t* smpi_process_get_call_location(void)
+{
+  smpi_process_data_t process_data = smpi_process_data();
+  return process_data->trace_call_loc;
+}
+
 int smpi_process_index(void)
 {
   smpi_process_data_t data = smpi_process_data();
@@ -336,34 +356,37 @@ void print_request(const char *message, MPI_Request request)
        message, request, request->buf, request->size, request->src, request->dst, request->tag, request->flags);
 }
 
-void smpi_comm_copy_buffer_callback(smx_synchro_t comm, void *buff, size_t buff_size)
+void smpi_comm_copy_buffer_callback(smx_synchro_t synchro, void *buff, size_t buff_size)
 {
   XBT_DEBUG("Copy the data over");
   void* tmpbuff=buff;
+  simgrid::simix::Comm *comm = dynamic_cast<simgrid::simix::Comm*>(synchro);
 
   if((smpi_privatize_global_variables) && ((char*)buff >= smpi_start_data_exe)
       && ((char*)buff < smpi_start_data_exe + smpi_size_data_exe )
     ){
        XBT_DEBUG("Privatization : We are copying from a zone inside global memory... Saving data to temp buffer !");
-       smpi_switch_data_segment(((smpi_process_data_t)(((simdata_process_t)SIMIX_process_get_data(comm->comm.src_proc))->data))->index);
+
+
+       smpi_switch_data_segment(((smpi_process_data_t)(((simdata_process_t)SIMIX_process_get_data(comm->src_proc))->data))->index);
        tmpbuff = (void*)xbt_malloc(buff_size);
        memcpy(tmpbuff, buff, buff_size);
   }
 
-  if((smpi_privatize_global_variables) && ((char*)comm->comm.dst_buff >= smpi_start_data_exe)
-      && ((char*)comm->comm.dst_buff < smpi_start_data_exe + smpi_size_data_exe )){
+  if((smpi_privatize_global_variables) && ((char*)comm->dst_buff >= smpi_start_data_exe)
+      && ((char*)comm->dst_buff < smpi_start_data_exe + smpi_size_data_exe )){
        XBT_DEBUG("Privatization : We are copying to a zone inside global memory - Switch data segment");
-       smpi_switch_data_segment(((smpi_process_data_t)(((simdata_process_t)SIMIX_process_get_data(comm->comm.dst_proc))->data))->index);
+       smpi_switch_data_segment(((smpi_process_data_t)(((simdata_process_t)SIMIX_process_get_data(comm->dst_proc))->data))->index);
   }
 
-  memcpy(comm->comm.dst_buff, tmpbuff, buff_size);
-  if (comm->comm.detached) {
+  memcpy(comm->dst_buff, tmpbuff, buff_size);
+  if (comm->detached) {
     // if this is a detached send, the source buffer was duplicated by SMPI
     // sender to make the original buffer available to the application ASAP
     xbt_free(buff);
     //It seems that the request is used after the call there this should be free somewhere else but where???
     //xbt_free(comm->comm.src_data);// inside SMPI the request is kept inside the user data and should be free
-    comm->comm.src_buff = NULL;
+    comm->src_buff = NULL;
   }
 
   if(tmpbuff!=buff)xbt_free(tmpbuff);
@@ -402,6 +425,28 @@ void smpi_global_init(void)
     global_timer = xbt_os_timer_new();
     xbt_os_walltimer_start(global_timer);
   }
+
+  if (xbt_cfg_get_string("smpi/comp-adjustment-file")[0] != '\0') { 
+    std::string filename {xbt_cfg_get_string("smpi/comp-adjustment-file")};
+    std::ifstream fstream(filename);
+    if (!fstream.is_open()) {
+      xbt_die("Could not open file %s. Does it exist?", filename.c_str());
+    }
+
+    std::string line;
+    typedef boost::tokenizer< boost::escaped_list_separator<char>> Tokenizer;
+    std::getline(fstream, line); // Skip the header line
+    while (std::getline(fstream, line)) {
+      Tokenizer tok(line);
+      Tokenizer::iterator it  = tok.begin();
+      Tokenizer::iterator end = std::next(tok.begin());
+
+      std::string location = *it;
+      boost::trim(location);
+      location2speedup.insert(std::pair<std::string, double>(location, std::stod(*end)));
+    }
+  }
+
   if (process_count == 0){
     process_count = SIMIX_process_count();
     smpirun=1;
@@ -426,6 +471,10 @@ void smpi_global_init(void)
     process_data[i]->sampling             = 0;
     process_data[i]->finalization_barrier = NULL;
     process_data[i]->return_value         = 0;
+
+    if (xbt_cfg_get_boolean("smpi/trace-call-location")) {
+      process_data[i]->trace_call_loc     = xbt_new(smpi_trace_call_location_t, 1);
+    }
   }
   //if the process was launched through smpirun script we generate a global mpi_comm_world
   //if not, we let MPI_COMM_NULL, and the comm world will be private to each mpi instance
@@ -463,6 +512,9 @@ void smpi_global_destroy(void)
     }
     xbt_os_timer_free(process_data[i]->timer);
     xbt_mutex_destroy(process_data[i]->mailboxes_mutex);
+    if (xbt_cfg_get_boolean("smpi/trace-call-location")) {
+      xbt_free(process_data[i]->trace_call_loc);
+    }
     xbt_free(process_data[i]);
   }
   xbt_free(process_data);
@@ -471,6 +523,8 @@ void smpi_global_destroy(void)
   if (MPI_COMM_WORLD != MPI_COMM_UNINITIALIZED){
     smpi_comm_cleanup_smp(MPI_COMM_WORLD);
     smpi_comm_cleanup_attributes(MPI_COMM_WORLD);
+    if(smpi_coll_cleanup_callback!=NULL)
+      smpi_coll_cleanup_callback();
     xbt_free(MPI_COMM_WORLD);
   }
 
@@ -593,6 +647,7 @@ static void smpi_init_options(){
     int barrier_id = find_coll_description(mpi_coll_barrier_description, xbt_cfg_get_string("smpi/barrier"),"barrier");
     mpi_coll_barrier_fun = (int (*)(MPI_Comm comm)) mpi_coll_barrier_description[barrier_id].coll;
 
+    smpi_coll_cleanup_callback=NULL;
     smpi_cpu_threshold = xbt_cfg_get_double("smpi/cpu-threshold");
     smpi_running_power = xbt_cfg_get_double("smpi/running-power");
     smpi_privatize_global_variables = xbt_cfg_get_boolean("smpi/privatize-global-variables");