XBT_PUBLIC_DATA( MPI_Op ) MPI_BXOR;
//For accumulate
XBT_PUBLIC_DATA( MPI_Op ) MPI_REPLACE;
+XBT_PUBLIC_DATA( MPI_Op ) MPI_NO_OP;
typedef SMPI_Group* MPI_Group;
MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win));
MPI_CALL(XBT_PUBLIC(int), MPI_Accumulate,( void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win));
+MPI_CALL(XBT_PUBLIC(int), MPI_Get_accumulate,( void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
+ void* result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
+ int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win));
MPI_CALL(XBT_PUBLIC(int), MPI_Alloc_mem, (MPI_Aint size, MPI_Info info, void *baseptr));
MPI_CALL(XBT_PUBLIC(int), MPI_Free_mem, (void *base));
WRAPPED_PMPI_CALL(int,MPI_Get_processor_name,(char *name, int *resultlen),(name, resultlen))
WRAPPED_PMPI_CALL(int,MPI_Get_version ,(int *version,int *subversion),(version,subversion))
WRAPPED_PMPI_CALL(int,MPI_Get,( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank,MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win),(origin_addr,origin_count, origin_datatype,target_rank, target_disp, target_count,target_datatype,win))
+WRAPPED_PMPI_CALL(int,MPI_Get_accumulate, (void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win),(origin_addr, origin_count, origin_datatype, result_addr, result_count, result_datatype, target_rank, target_disp, target_count, target_datatype, op, win))
WRAPPED_PMPI_CALL(int,MPI_Group_compare,(MPI_Group group1, MPI_Group group2, int *result),(group1, group2, result))
WRAPPED_PMPI_CALL(int,MPI_Group_difference,(MPI_Group group1, MPI_Group group2, MPI_Group * newgroup),(group1, group2, newgroup))
WRAPPED_PMPI_CALL(int,MPI_Group_excl,(MPI_Group group, int n, int *ranks, MPI_Group * newgroup),(group, n, ranks, newgroup))
memcpy(b, a, *length * (*datatype)->size());
}
+static void no_func(void *a, void *b, int *length, MPI_Datatype * datatype)
+{
+}
+
#define CREATE_MPI_OP(name, func) \
static SMPI_Op mpi_##name (&(func) /* func */, true ); \
MPI_Op name = &mpi_##name;
CREATE_MPI_OP(MPI_MAXLOC, maxloc_func);
CREATE_MPI_OP(MPI_MINLOC, minloc_func);
CREATE_MPI_OP(MPI_REPLACE, replace_func);
+CREATE_MPI_OP(MPI_NO_OP, no_func);
namespace simgrid{
namespace smpi{
return retval;
}
+int PMPI_Get_accumulate(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr,
+int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count,
+MPI_Datatype target_datatype, MPI_Op op, MPI_Win win){
+ int retval = 0;
+ smpi_bench_end();
+ if (win == MPI_WIN_NULL) {
+ retval = MPI_ERR_WIN;
+ } else if (target_rank == MPI_PROC_NULL) {
+ retval = MPI_SUCCESS;
+ } else if (target_rank <0){
+ retval = MPI_ERR_RANK;
+ } else if (target_disp <0){
+ retval = MPI_ERR_ARG;
+ } else if ((origin_count < 0 || target_count < 0 || result_count <0) ||
+ (origin_addr==nullptr && origin_count > 0) ||
+ (result_addr==nullptr && result_count > 0)){
+ retval = MPI_ERR_COUNT;
+ } else if ((!origin_datatype->is_valid()) ||
+ (!target_datatype->is_valid())||
+ (!result_datatype->is_valid())) {
+ retval = MPI_ERR_TYPE;
+ } else if (op == MPI_OP_NULL) {
+ retval = MPI_ERR_OP;
+ } else {
+ int rank = smpi_process()->index();
+ MPI_Group group;
+ win->get_group(&group);
+ int src_traced = group->index(target_rank);
+ TRACE_smpi_ptp_in(rank, src_traced, rank, __FUNCTION__, nullptr);
+
+ retval = win->get_accumulate( origin_addr, origin_count, origin_datatype, result_addr,
+ result_count, result_datatype, target_rank, target_disp,
+ target_count, target_datatype, op);
+
+ TRACE_smpi_ptp_out(rank, src_traced, rank, __FUNCTION__);
+ }
+ smpi_bench_begin();
+ return retval;
+}
+
int PMPI_Win_post(MPI_Group group, int assert, MPI_Win win){
int retval = 0;
smpi_bench_end();
return MPI_SUCCESS;
}
+int Win::get_accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr,
+ int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count,
+ MPI_Datatype target_datatype, MPI_Op op){
+
+ //get sender pointer
+ MPI_Win send_win = connected_wins_[target_rank];
+
+ if(opened_==0){//check that post/start has been done
+ // no fence or start .. lock ok ?
+ int locked=0;
+ for(auto it : send_win->lockers_)
+ if (it == comm_->rank())
+ locked = 1;
+ if(locked != 1)
+ return MPI_ERR_WIN;
+ }
+
+ if(target_count*target_datatype->get_extent()>send_win->size_)
+ return MPI_ERR_ARG;
+
+ XBT_DEBUG("Entering MPI_Get_accumulate from %d", target_rank);
+
+ get(result_addr, result_count, result_datatype, target_rank,
+ target_disp, target_count, target_datatype);
+ accumulate(origin_addr, origin_count, origin_datatype, target_rank,
+ target_disp, target_count, target_datatype, op);
+
+ return MPI_SUCCESS;
+
+}
+
int Win::start(MPI_Group group, int assert){
/* From MPI forum advices
The call to MPI_WIN_COMPLETE does not return until the put call has completed at the origin; and the target window
MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype);
int accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank,
MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op);
+ int get_accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr,
+ int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count,
+ MPI_Datatype target_datatype, MPI_Op op);
static Win* f2c(int id);
int lock(int lock_type, int rank, int assert);
int unlock(int rank);
foreach(file accfence1 accfence2_am accfence2 accpscw1 allocmem epochtest getfence1 getgroup manyrma3 nullpscw lockcontention lockopts contig_displ
putfence1 putfidx putpscw1 test1_am test1 test1_dt test2_am test2 test3 test4 test5_am test5 transpose1 transpose2 lockcontention2
transpose3 transpose4 transpose5 transpose6 transpose7 window_creation selfrma locknull
- at_complete acc-pairtype manyget large-small-acc
- lock_nested winname attrorderwin baseattrwin fkeyvalwin strided_acc_indexed
+ at_complete acc-pairtype manyget large-small-acc lock_dt
+ lock_nested winname attrorderwin baseattrwin fkeyvalwin strided_acc_indexed strided_getacc_indexed
strided_acc_onelock strided_get_indexed strided_putget_indexed contention_put contention_putget)
# not compiled files: acc-loc adlb_mimic1 badrma compare_and_swap contention_put
# contention_putget contig_displ fence_shm fetchandadd_am fetchandadd fetchandadd_tree_am fetchandadd_tree
# linked_list_bench_lock_excl linked_list_bench_lock_shr linked_list linked_list_fop linked_list_lockall
# manyrma2 mcs-mutex mixedsync mutex_bench lockcontention3
# pscw_ordering put_base put_bottom req_example reqops rmanull rmazero rma-contig selfrma
- # strided_acc_subarray strided_getacc_indexed strided_getacc_indexed_shared
+ # strided_getacc_indexed_shared
# win_dynamic_acc win_flavors win_info win_shared win_shared_noncontig win_shared_noncontig_put
# win_large_shm win_zero wintest get-struct atomic_rmw_fop atomic_rmw_gacc rget-unlock atomic_get atomic_rmw_cas
# win_shared_zerobyte aint derived-acc-flush_local large-acc-flush_local lockall_dt lockall_dt_flushall
- # lockall_dt_flushlocalall lockall_dt_flushlocal lock_contention_dt lock_dt lock_dt_flush lock_dt_flushlocal
+ # lockall_dt_flushlocalall lockall_dt_flushlocal lock_contention_dt lock_dt_flush lock_dt_flushlocal
#racc_local_comp rput_local_comp win_shared_create win_shared_put_flush_get win_shared_rma_flush_load
# wrma_flush_get
add_executable(${file} ${file}.c)
#lockcontention3 8
lockopts 2
#needs get_accumulate
-#lock_dt 2
+lock_dt 2
#lock_dt_flush 2
#lock_dt_flushlocal 2
#lockall_dt 4 timeLimit=240
strided_get_indexed 2
strided_putget_indexed 4
#strided_putget_indexed_shared 4 mpiversion=3.0
-#strided_getacc_indexed 4 mpiversion=3.0
+strided_getacc_indexed 4
#strided_getacc_indexed_shared 4 mpiversion=3.0
window_creation 2
contention_put 4