From 6963302bb58ba0b4530c9503f8b26bb71c691fb5 Mon Sep 17 00:00:00 2001 From: Augustin Degomme Date: Mon, 27 Sep 2021 00:24:00 +0200 Subject: [PATCH] Intercept exit() to avoid killing simulations too early, as the first process exits. This should render previous warning pointless, let's see it it's still needed. --- ChangeLog | 3 +++ include/smpi/smpi_helpers.h | 2 ++ include/smpi/smpi_helpers_internal.h | 1 + src/smpi/internals/smpi_global.cpp | 6 ++++++ teshsuite/smpi/pt2pt-dsend/pt2pt-dsend.tesh | 2 -- 5 files changed, 12 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 13e0736676..5b778314bb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -28,6 +28,9 @@ SMPI: - scan/excan can now be replayed - wait action now uses ranks and not pid, as the other ones. - smpi/init and smpi/finalization-barrier are now valid for replays. + - exit() is now intercepted by SMPI to avoid premature shutdown of + simulation. First non 0 return codes is returned as simulation return + code. Documentation: * New section "Release Notes" documenting recent and current developments. diff --git a/include/smpi/smpi_helpers.h b/include/smpi/smpi_helpers.h index ef7492d998..8ecd7f582a 100644 --- a/include/smpi/smpi_helpers.h +++ b/include/smpi/smpi_helpers.h @@ -38,4 +38,6 @@ #define realloc(x, y) smpi_shared_realloc_intercept((x), (y), __FILE__, __LINE__) #define free(x) smpi_shared_free(x) #endif + +#define exit(x) smpi_exit(x) #endif diff --git a/include/smpi/smpi_helpers_internal.h b/include/smpi/smpi_helpers_internal.h index 25a4f7093b..929d22e75e 100644 --- a/include/smpi/smpi_helpers_internal.h +++ b/include/smpi/smpi_helpers_internal.h @@ -38,6 +38,7 @@ void* smpi_shared_malloc_intercept(size_t size, const char* file, int line); void* smpi_shared_calloc_intercept(size_t num_elm, size_t elem_size, const char* file, int line); void* smpi_shared_realloc_intercept(void* data, size_t size, const char* file, int line); void smpi_shared_free(void* data); +void smpi_exit(int status); #ifdef __cplusplus } // extern "C" #endif diff --git a/src/smpi/internals/smpi_global.cpp b/src/smpi/internals/smpi_global.cpp index 674cfed28c..13c6a3be99 100644 --- a/src/smpi/internals/smpi_global.cpp +++ b/src/smpi/internals/smpi_global.cpp @@ -635,3 +635,9 @@ void SMPI_thread_create() { TRACE_smpi_init(simgrid::s4u::this_actor::get_pid(), __func__); smpi_process()->mark_as_initialized(); } + +void smpi_exit(int x){ + if(x!=0) + smpi_exit_status = x; + return; +} diff --git a/teshsuite/smpi/pt2pt-dsend/pt2pt-dsend.tesh b/teshsuite/smpi/pt2pt-dsend/pt2pt-dsend.tesh index 713e5410bc..11476da66f 100644 --- a/teshsuite/smpi/pt2pt-dsend/pt2pt-dsend.tesh +++ b/teshsuite/smpi/pt2pt-dsend/pt2pt-dsend.tesh @@ -3,7 +3,6 @@ p Test dsend $ ${bindir:=.}/../../../smpi_script/bin/smpirun -map -hostfile ${bindir:=.}/../hostfile -platform ${platfdir}/small_platform.xml -np 2 --log=no_loc ${bindir:=.}/pt2pt-dsend -s --long --log=smpi_config.thres:warning --log=xbt_cfg.thres:warning --cfg=smpi/simulate-computation:no --cfg=smpi/finalization-barrier:on > [Jupiter:1:(2) 0.000000] [dsend/INFO] rank 1: data exchanged > [Tremblay:0:(1) 0.005896] [dsend/INFO] rank 0: data exchanged -> [Tremblay:0:(1) 0.008843] [ker_engine/WARNING] Process called exit when leaving - Skipping cleanups > [0.000000] [smpi/INFO] [rank 0] -> Tremblay > [0.000000] [smpi/INFO] [rank 1] -> Jupiter @@ -14,7 +13,6 @@ p process 2 will finish at 0.5+2*4 (time before first send) + 2*(1+0.5*4) (recv+ ! output sort $ ${bindir:=.}/../../../smpi_script/bin/smpirun -map -hostfile ${bindir:=.}/../hostfile -platform ${platfdir}/small_platform.xml -np 2 --log=no_loc ${bindir:=.}/pt2pt-dsend -s --long --log=smpi_config.thres:warning --cfg=smpi/or:0:1:0.5 --cfg=smpi/os:0:0.5:2 --cfg=smpi/ois:0:1:0.1 --cfg=smpi/simulate-computation:no --cfg=smpi/finalization-barrier:on --log=xbt_cfg.thres:warning > [Jupiter:1:(2) 9.900000] [dsend/INFO] rank 1: data exchanged -> [Tremblay:0:(1) 16.508843] [ker_engine/WARNING] Process called exit when leaving - Skipping cleanups > [Tremblay:0:(1) 14.505896] [dsend/INFO] rank 0: data exchanged > [0.000000] [smpi/INFO] [rank 0] -> Tremblay > [0.000000] [smpi/INFO] [rank 1] -> Jupiter -- 2.20.1