From 77b1286ea2fbc5c4dd502da81961599504f01000 Mon Sep 17 00:00:00 2001 From: degomme Date: Wed, 22 Mar 2017 15:40:30 +0100 Subject: [PATCH] Add mpi_get_accumulate call. And MPI_NO_OP operation --- include/smpi/smpi.h | 4 ++ src/smpi/smpi_mpi.cpp | 1 + src/smpi/smpi_op.cpp | 5 +++ src/smpi/smpi_pmpi.cpp | 40 +++++++++++++++++++ src/smpi/smpi_win.cpp | 31 ++++++++++++++ src/smpi/smpi_win.hpp | 3 ++ teshsuite/smpi/mpich3-test/rma/CMakeLists.txt | 8 ++-- teshsuite/smpi/mpich3-test/rma/testlist | 4 +- 8 files changed, 90 insertions(+), 6 deletions(-) diff --git a/include/smpi/smpi.h b/include/smpi/smpi.h index 507122a2d9..beb7c7b695 100644 --- a/include/smpi/smpi.h +++ b/include/smpi/smpi.h @@ -356,6 +356,7 @@ XBT_PUBLIC_DATA( MPI_Op ) MPI_BOR; XBT_PUBLIC_DATA( MPI_Op ) MPI_BXOR; //For accumulate XBT_PUBLIC_DATA( MPI_Op ) MPI_REPLACE; +XBT_PUBLIC_DATA( MPI_Op ) MPI_NO_OP; typedef SMPI_Group* MPI_Group; @@ -553,6 +554,9 @@ MPI_CALL(XBT_PUBLIC(int), MPI_Put,( void *origin_addr, int origin_count, MPI_Dat MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win)); MPI_CALL(XBT_PUBLIC(int), MPI_Accumulate,( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win)); +MPI_CALL(XBT_PUBLIC(int), MPI_Get_accumulate,( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, + void* result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, + int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win)); MPI_CALL(XBT_PUBLIC(int), MPI_Alloc_mem, (MPI_Aint size, MPI_Info info, void *baseptr)); MPI_CALL(XBT_PUBLIC(int), MPI_Free_mem, (void *base)); diff --git a/src/smpi/smpi_mpi.cpp b/src/smpi/smpi_mpi.cpp index e610f99ba7..e4a4f7e3a0 100644 --- a/src/smpi/smpi_mpi.cpp +++ b/src/smpi/smpi_mpi.cpp @@ -85,6 +85,7 @@ WRAPPED_PMPI_CALL(int,MPI_Get_library_version ,(char *version,int *len),(version WRAPPED_PMPI_CALL(int,MPI_Get_processor_name,(char *name, int *resultlen),(name, resultlen)) WRAPPED_PMPI_CALL(int,MPI_Get_version ,(int *version,int *subversion),(version,subversion)) WRAPPED_PMPI_CALL(int,MPI_Get,( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank,MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win),(origin_addr,origin_count, origin_datatype,target_rank, target_disp, target_count,target_datatype,win)) +WRAPPED_PMPI_CALL(int,MPI_Get_accumulate, (void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win),(origin_addr, origin_count, origin_datatype, result_addr, result_count, result_datatype, target_rank, target_disp, target_count, target_datatype, op, win)) WRAPPED_PMPI_CALL(int,MPI_Group_compare,(MPI_Group group1, MPI_Group group2, int *result),(group1, group2, result)) WRAPPED_PMPI_CALL(int,MPI_Group_difference,(MPI_Group group1, MPI_Group group2, MPI_Group * newgroup),(group1, group2, newgroup)) WRAPPED_PMPI_CALL(int,MPI_Group_excl,(MPI_Group group, int n, int *ranks, MPI_Group * newgroup),(group, n, ranks, newgroup)) diff --git a/src/smpi/smpi_op.cpp b/src/smpi/smpi_op.cpp index 96dd94142a..5ef10681d2 100644 --- a/src/smpi/smpi_op.cpp +++ b/src/smpi/smpi_op.cpp @@ -188,6 +188,10 @@ static void replace_func(void *a, void *b, int *length, MPI_Datatype * datatype) memcpy(b, a, *length * (*datatype)->size()); } +static void no_func(void *a, void *b, int *length, MPI_Datatype * datatype) +{ +} + #define CREATE_MPI_OP(name, func) \ static SMPI_Op mpi_##name (&(func) /* func */, true ); \ MPI_Op name = &mpi_##name; @@ -205,6 +209,7 @@ CREATE_MPI_OP(MPI_BXOR, bxor_func); CREATE_MPI_OP(MPI_MAXLOC, maxloc_func); CREATE_MPI_OP(MPI_MINLOC, minloc_func); CREATE_MPI_OP(MPI_REPLACE, replace_func); +CREATE_MPI_OP(MPI_NO_OP, no_func); namespace simgrid{ namespace smpi{ diff --git a/src/smpi/smpi_pmpi.cpp b/src/smpi/smpi_pmpi.cpp index 3db40738cd..5b41eb6c8d 100644 --- a/src/smpi/smpi_pmpi.cpp +++ b/src/smpi/smpi_pmpi.cpp @@ -2616,6 +2616,46 @@ int PMPI_Accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_da return retval; } +int PMPI_Get_accumulate(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr, +int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count, +MPI_Datatype target_datatype, MPI_Op op, MPI_Win win){ + int retval = 0; + smpi_bench_end(); + if (win == MPI_WIN_NULL) { + retval = MPI_ERR_WIN; + } else if (target_rank == MPI_PROC_NULL) { + retval = MPI_SUCCESS; + } else if (target_rank <0){ + retval = MPI_ERR_RANK; + } else if (target_disp <0){ + retval = MPI_ERR_ARG; + } else if ((origin_count < 0 || target_count < 0 || result_count <0) || + (origin_addr==nullptr && origin_count > 0) || + (result_addr==nullptr && result_count > 0)){ + retval = MPI_ERR_COUNT; + } else if ((!origin_datatype->is_valid()) || + (!target_datatype->is_valid())|| + (!result_datatype->is_valid())) { + retval = MPI_ERR_TYPE; + } else if (op == MPI_OP_NULL) { + retval = MPI_ERR_OP; + } else { + int rank = smpi_process()->index(); + MPI_Group group; + win->get_group(&group); + int src_traced = group->index(target_rank); + TRACE_smpi_ptp_in(rank, src_traced, rank, __FUNCTION__, nullptr); + + retval = win->get_accumulate( origin_addr, origin_count, origin_datatype, result_addr, + result_count, result_datatype, target_rank, target_disp, + target_count, target_datatype, op); + + TRACE_smpi_ptp_out(rank, src_traced, rank, __FUNCTION__); + } + smpi_bench_begin(); + return retval; +} + int PMPI_Win_post(MPI_Group group, int assert, MPI_Win win){ int retval = 0; smpi_bench_end(); diff --git a/src/smpi/smpi_win.cpp b/src/smpi/smpi_win.cpp index 080f51916a..9398095d5b 100644 --- a/src/smpi/smpi_win.cpp +++ b/src/smpi/smpi_win.cpp @@ -300,6 +300,37 @@ int Win::accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_da return MPI_SUCCESS; } +int Win::get_accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr, + int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Op op){ + + //get sender pointer + MPI_Win send_win = connected_wins_[target_rank]; + + if(opened_==0){//check that post/start has been done + // no fence or start .. lock ok ? + int locked=0; + for(auto it : send_win->lockers_) + if (it == comm_->rank()) + locked = 1; + if(locked != 1) + return MPI_ERR_WIN; + } + + if(target_count*target_datatype->get_extent()>send_win->size_) + return MPI_ERR_ARG; + + XBT_DEBUG("Entering MPI_Get_accumulate from %d", target_rank); + + get(result_addr, result_count, result_datatype, target_rank, + target_disp, target_count, target_datatype); + accumulate(origin_addr, origin_count, origin_datatype, target_rank, + target_disp, target_count, target_datatype, op); + + return MPI_SUCCESS; + +} + int Win::start(MPI_Group group, int assert){ /* From MPI forum advices The call to MPI_WIN_COMPLETE does not return until the put call has completed at the origin; and the target window diff --git a/src/smpi/smpi_win.hpp b/src/smpi/smpi_win.hpp index f88eb4fc9b..1680528630 100644 --- a/src/smpi/smpi_win.hpp +++ b/src/smpi/smpi_win.hpp @@ -59,6 +59,9 @@ public: MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype); int accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op); + int get_accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr, + int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count, + MPI_Datatype target_datatype, MPI_Op op); static Win* f2c(int id); int lock(int lock_type, int rank, int assert); int unlock(int rank); diff --git a/teshsuite/smpi/mpich3-test/rma/CMakeLists.txt b/teshsuite/smpi/mpich3-test/rma/CMakeLists.txt index efe580ee27..4caa6f2a72 100644 --- a/teshsuite/smpi/mpich3-test/rma/CMakeLists.txt +++ b/teshsuite/smpi/mpich3-test/rma/CMakeLists.txt @@ -13,8 +13,8 @@ if(enable_smpi AND enable_smpi_MPICH3_testsuite) foreach(file accfence1 accfence2_am accfence2 accpscw1 allocmem epochtest getfence1 getgroup manyrma3 nullpscw lockcontention lockopts contig_displ putfence1 putfidx putpscw1 test1_am test1 test1_dt test2_am test2 test3 test4 test5_am test5 transpose1 transpose2 lockcontention2 transpose3 transpose4 transpose5 transpose6 transpose7 window_creation selfrma locknull - at_complete acc-pairtype manyget large-small-acc - lock_nested winname attrorderwin baseattrwin fkeyvalwin strided_acc_indexed + at_complete acc-pairtype manyget large-small-acc lock_dt + lock_nested winname attrorderwin baseattrwin fkeyvalwin strided_acc_indexed strided_getacc_indexed strided_acc_onelock strided_get_indexed strided_putget_indexed contention_put contention_putget) # not compiled files: acc-loc adlb_mimic1 badrma compare_and_swap contention_put # contention_putget contig_displ fence_shm fetchandadd_am fetchandadd fetchandadd_tree_am fetchandadd_tree @@ -22,11 +22,11 @@ if(enable_smpi AND enable_smpi_MPICH3_testsuite) # linked_list_bench_lock_excl linked_list_bench_lock_shr linked_list linked_list_fop linked_list_lockall # manyrma2 mcs-mutex mixedsync mutex_bench lockcontention3 # pscw_ordering put_base put_bottom req_example reqops rmanull rmazero rma-contig selfrma - # strided_acc_subarray strided_getacc_indexed strided_getacc_indexed_shared + # strided_getacc_indexed_shared # win_dynamic_acc win_flavors win_info win_shared win_shared_noncontig win_shared_noncontig_put # win_large_shm win_zero wintest get-struct atomic_rmw_fop atomic_rmw_gacc rget-unlock atomic_get atomic_rmw_cas # win_shared_zerobyte aint derived-acc-flush_local large-acc-flush_local lockall_dt lockall_dt_flushall - # lockall_dt_flushlocalall lockall_dt_flushlocal lock_contention_dt lock_dt lock_dt_flush lock_dt_flushlocal + # lockall_dt_flushlocalall lockall_dt_flushlocal lock_contention_dt lock_dt_flush lock_dt_flushlocal #racc_local_comp rput_local_comp win_shared_create win_shared_put_flush_get win_shared_rma_flush_load # wrma_flush_get add_executable(${file} ${file}.c) diff --git a/teshsuite/smpi/mpich3-test/rma/testlist b/teshsuite/smpi/mpich3-test/rma/testlist index 8525de3915..17cc1c63f6 100644 --- a/teshsuite/smpi/mpich3-test/rma/testlist +++ b/teshsuite/smpi/mpich3-test/rma/testlist @@ -33,7 +33,7 @@ lockcontention2 8 #lockcontention3 8 lockopts 2 #needs get_accumulate -#lock_dt 2 +lock_dt 2 #lock_dt_flush 2 #lock_dt_flushlocal 2 #lockall_dt 4 timeLimit=240 @@ -78,7 +78,7 @@ strided_acc_onelock 2 strided_get_indexed 2 strided_putget_indexed 4 #strided_putget_indexed_shared 4 mpiversion=3.0 -#strided_getacc_indexed 4 mpiversion=3.0 +strided_getacc_indexed 4 #strided_getacc_indexed_shared 4 mpiversion=3.0 window_creation 2 contention_put 4 -- 2.20.1