#include "surf/surf.h"
#include "simgrid/sg_config.h"
#include "simgrid/modelchecker.h"
+#include "mc/mc_replay.h"
#ifndef WIN32
#include <sys/mman.h>
double smpi_running_power;
int smpi_loaded_page = -1;
-char* start_data_exe = NULL;
-int size_data_exe = 0;
+char* smpi_start_data_exe = NULL;
+int smpi_size_data_exe = 0;
int smpi_privatize_global_variables;
double smpi_total_benched_time = 0;
-
-
smpi_privatisation_region_t smpi_privatisation_regions;
typedef struct {
}
void smpi_execute_flops(double flops) {
- smx_action_t action;
+ smx_synchro_t action;
smx_host_t host;
host = SIMIX_host_self();
XBT_DEBUG("Handle real computation time: %f flops", flops);
action = simcall_host_execute("computation", host, flops, 1, 0, 0);
-#ifdef HAVE_TRACING
simcall_set_category (action, TRACE_internal_smpi_get_category());
-#endif
simcall_host_execution_wait(action);
smpi_switch_data_segment(smpi_process_index());
}
if (duration >= smpi_cpu_threshold) {
XBT_DEBUG("Sleep for %g to handle real computation time", duration);
double flops = duration * smpi_running_power;
-#ifdef HAVE_TRACING
int rank = smpi_process_index();
instr_extra_data extra = xbt_new0(s_instr_extra_data_t,1);
extra->type=TRACING_COMPUTING;
extra->comp_size=flops;
TRACE_smpi_computing_in(rank, extra);
-#endif
smpi_execute_flops(flops);
-#ifdef HAVE_TRACING
TRACE_smpi_computing_out(rank);
-#endif
} else {
XBT_DEBUG("Real computation took %g while option smpi/cpu_threshold is set to %g => ignore it",
{
smpi_switch_data_segment(smpi_process_index());
- if(MC_is_active())
+ if (MC_is_active() || MC_record_replay_is_active())
return;
xbt_os_threadtimer_start(smpi_process_timer());
void smpi_bench_end(void)
{
- if(MC_is_active())
+ if (MC_is_active() || MC_record_replay_is_active())
return;
xbt_os_timer_t timer = smpi_process_timer();
smpi_bench_end();
XBT_DEBUG("Sleep for: %lf secs", secs);
- #ifdef HAVE_TRACING
int rank = smpi_comm_rank(MPI_COMM_WORLD);
instr_extra_data extra = xbt_new0(s_instr_extra_data_t,1);
extra->type=TRACING_SLEEPING;
extra->sleep_duration=secs;
TRACE_smpi_sleeping_in(rank, extra);
-#endif
+
simcall_process_sleep(secs);
-#ifdef HAVE_TRACING
+
TRACE_smpi_sleeping_out(rank);
-#endif
smpi_bench_begin();
return 0;
{
void* mem;
if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
- char *loc = bprintf("%zu_%s_%d", (size_t)getpid(), file, line);
+ char *loc = bprintf("/%zu_%s_%d", (size_t)getpid(), file, line);
int fd;
shared_data_t *data;
loc = smpi_shared_alloc_hash(loc); /* hash loc, in order to have something
*/
void smpi_really_switch_data_segment(int dest) {
- if(size_data_exe == 0)//no need to switch
+ if(smpi_size_data_exe == 0)//no need to switch
return;
#ifdef HAVE_MMAP
int i;
if(smpi_loaded_page==-1){//initial switch, do the copy from the real page here
for (i=0; i< SIMIX_process_count(); i++){
- memcpy(smpi_privatisation_regions[i].address,TOPAGE(start_data_exe),size_data_exe);
+ memcpy(smpi_privatisation_regions[i].address,
+ TOPAGE(smpi_start_data_exe), smpi_size_data_exe);
}
}
+ // FIXME, cross-process support (mmap across process when necessary)
int current = smpi_privatisation_regions[dest].file_descriptor;
XBT_DEBUG("Switching data frame to the one of process %d", dest);
- void* tmp = mmap (TOPAGE(start_data_exe), size_data_exe, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, current, 0);
- if (tmp != TOPAGE(start_data_exe))
+ void* tmp = mmap (TOPAGE(smpi_start_data_exe), smpi_size_data_exe,
+ PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, current, 0);
+ if (tmp != TOPAGE(smpi_start_data_exe))
xbt_die("Couldn't map the new region");
- smpi_loaded_page=dest;
+ smpi_loaded_page = dest;
#endif
}
+int smpi_is_privatisation_file(char* file)
+{
+ return strncmp("/dev/shm/my-buffer-", file, 19) == 0;
+}
+
void smpi_get_executable_global_size(){
int size_bss_binary=0;
int size_data_binary=0;
if(i>=6){
if(strcmp(lfields[1], ".data") == 0){
size_data_binary = strtoul(lfields[2], NULL, 16);
- start_data_exe = (char*) strtoul(lfields[4], NULL, 16);
+ smpi_start_data_exe = (char*) strtoul(lfields[4], NULL, 16);
found++;
}else if(strcmp(lfields[1], ".bss") == 0){
//the beginning of bss is not exactly the end of data if not aligned, grow bss reported size accordingly
//TODO : check if this is OK, as some segments may be inserted between them..
- size_bss_binary = ((char*) strtoul(lfields[4], NULL, 16) - (start_data_exe + size_data_binary))
+ size_bss_binary = ((char*) strtoul(lfields[4], NULL, 16) - (smpi_start_data_exe + size_data_binary))
+ strtoul(lfields[2], NULL, 16);
found++;
}
}
- size_data_exe =(unsigned long)start_data_exe - (unsigned long)TOPAGE(start_data_exe)+ size_data_binary+size_bss_binary;
+ smpi_size_data_exe = (unsigned long) smpi_start_data_exe
+ - (unsigned long) TOPAGE(smpi_start_data_exe)
+ + size_data_binary+size_bss_binary;
xbt_free(command);
xbt_free(line);
pclose(fp);
unsigned int i = 0;
smpi_get_executable_global_size();
- XBT_DEBUG ("bss+data segment found : size %d starting at %p",size_data_exe, start_data_exe );
+ XBT_DEBUG ("bss+data segment found : size %d starting at %p",
+ smpi_size_data_exe, smpi_start_data_exe );
- if(size_data_exe == 0){//no need to switch
+ if (smpi_size_data_exe == 0){//no need to switch
smpi_privatize_global_variables=0;
return;
}
int status;
int file_descriptor= mkstemp (path);
- if (file_descriptor < 0)
- xbt_die("Impossible to create temporary file for memory mapping");
+ if (file_descriptor < 0) {
+ if (errno==EMFILE) {
+ xbt_die("Impossible to create temporary file for memory mapping: %s\n\
+The open() system call failed with the EMFILE error code (too many files). \n\n\
+This means that you reached the system limits concerning the amount of files per process. \
+This is not a surprise if you are trying to virtualize many processes on top of SMPI. \
+Don't panic -- you should simply increase your system limits and try again. \n\n\
+First, check what your limits are:\n\
+ cat /proc/sys/fs/file-max # Gives you the system-wide limit\n\
+ ulimit -Hn # Gives you the per process hard limit\n\
+ ulimit -Sn # Gives you the per process soft limit\n\
+ cat /proc/self/limits # Displays any per-process limitation (including the one given above)\n\n\
+If one of these values is less than the amount of MPI processes that you try to run, then you got the explanation of this error. \
+Ask the Internet about tutorials on how to increase the files limit such as: https://rtcamp.com/tutorials/linux/increase-open-files-limit/",
+ strerror(errno));
+ }
+ xbt_die("Impossible to create temporary file for memory mapping: %s",
+ strerror(errno));
+ }
status = unlink (path);
if (status)
xbt_die("Impossible to unlink temporary file for memory mapping");
- status = ftruncate(file_descriptor, size_data_exe);
+ status = ftruncate(file_descriptor, smpi_size_data_exe);
if(status)
xbt_die("Impossible to set the size of the temporary file for memory mapping");
/* Ask for a free region */
- address = mmap (NULL, size_data_exe, PROT_READ | PROT_WRITE, MAP_SHARED, file_descriptor, 0);
+ address = mmap (NULL, smpi_size_data_exe, PROT_READ | PROT_WRITE, MAP_SHARED, file_descriptor, 0);
if (address == MAP_FAILED)
xbt_die("Couldn't find a free region for memory mapping");
//initialize the values
- memcpy(address,TOPAGE(start_data_exe),size_data_exe);
+ memcpy(address, TOPAGE(smpi_start_data_exe), smpi_size_data_exe);
//store the address of the mapping for further switches
smpi_privatisation_regions[i].file_descriptor = file_descriptor;
}
void smpi_destroy_global_memory_segments(){
- if(size_data_exe == 0)//no need to switch
+ if (smpi_size_data_exe == 0)//no need to switch
return;
#ifdef HAVE_MMAP
int i;
for (i=0; i< smpi_process_count(); i++){
- if(munmap(smpi_privatisation_regions[i].address,size_data_exe) < 0) {
+ if(munmap(smpi_privatisation_regions[i].address, smpi_size_data_exe) < 0) {
XBT_WARN("Unmapping of fd %d failed: %s",
smpi_privatisation_regions[i].file_descriptor, strerror(errno));
}