From 60923be9eac0b6919aaac49bc83083e77f6e63c6 Mon Sep 17 00:00:00 2001 From: Augustin Degomme Date: Tue, 24 Jun 2014 16:41:49 +0200 Subject: [PATCH] Change the way MPI_Accumulate works with serialization, to handle complex datatypes. --- src/smpi/private.h | 9 +-- src/smpi/smpi_base.c | 52 ++++++++++----- src/smpi/smpi_mpi_dt.c | 118 +++++++++++++++++---------------- src/smpi/smpi_mpi_dt_private.h | 60 +++++++++-------- src/smpi/smpi_rma.c | 36 +++++----- 5 files changed, 156 insertions(+), 119 deletions(-) diff --git a/src/smpi/private.h b/src/smpi/private.h index f8d7e7c77c..fe8c16ad2d 100644 --- a/src/smpi/private.h +++ b/src/smpi/private.h @@ -43,8 +43,8 @@ enum smpi_process_state{ // for each such structure these function should be implemented (vector // index hvector hindex struct) typedef struct s_smpi_subtype{ - void (*serialize)(const void * input, void *output, size_t count, void* subtype); - void (*unserialize)(const void * input, void *output, size_t count, void* subtype); + void (*serialize)(const void * input, void *output, int count, void* subtype); + void (*unserialize)(const void * input, void *output, int count, void* subtype, MPI_Op op); void (*subtype_free)(MPI_Datatype* type); } s_smpi_subtype_t; @@ -103,6 +103,7 @@ typedef struct s_smpi_mpi_request { int detached; MPI_Request detached_sender; int refcount; + MPI_Op op; #ifdef HAVE_TRACING int send; int recv; @@ -259,9 +260,9 @@ MPI_Request smpi_irecv_init(void *buf, int count, MPI_Datatype datatype, MPI_Request smpi_mpi_irecv(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm); MPI_Request smpi_rma_send_init(void *buf, int count, MPI_Datatype datatype, - int src, int dst, int tag, MPI_Comm comm); + int src, int dst, int tag, MPI_Comm comm, MPI_Op op); MPI_Request smpi_rma_recv_init(void *buf, int count, MPI_Datatype datatype, - int src, int dst, int tag, MPI_Comm comm); + int src, int dst, int tag, MPI_Comm comm, MPI_Op op); void smpi_mpi_recv(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm, MPI_Status * status); void smpi_mpi_send(void *buf, int count, MPI_Datatype datatype, int dst, diff --git a/src/smpi/smpi_base.c b/src/smpi/smpi_base.c index 830cd2d623..562ddde3f1 100644 --- a/src/smpi/smpi_base.c +++ b/src/smpi/smpi_base.c @@ -197,11 +197,11 @@ static MPI_Request build_request(void *buf, int count, s_smpi_subtype_t *subtype = datatype->substruct; - if(datatype->has_subtype == 1){ + if(((flags & RECV) && (flags & ACCUMULATE)) || (datatype->has_subtype == 1)){ // This part handles the problem of non-contiguous memory old_buf = buf; buf = count==0 ? NULL : xbt_malloc(count*smpi_datatype_size(datatype)); - if (flags & SEND) { + if ((datatype->has_subtype == 1) && (flags & SEND)) { subtype->serialize(old_buf, buf, count, datatype->substruct); } } @@ -230,7 +230,7 @@ static MPI_Request build_request(void *buf, int count, request->refcount = 1; else request->refcount = 0; - + request->op = MPI_REPLACE; #ifdef HAVE_TRACING request->send = 0; request->recv = 0; @@ -336,7 +336,7 @@ void smpi_mpi_start(MPI_Request request) smpi_comm_use(request->comm); request->action = simcall_comm_irecv(mailbox, request->buf, &request->real_size, &match_recv, - (request->flags & ACCUMULATE)? NULL : &smpi_comm_copy_buffer_callback, + &smpi_comm_copy_buffer_callback, request, -1.0); //integrate pseudo-timing for buffering of small messages, do not bother to execute the simcall if 0 @@ -412,7 +412,7 @@ void smpi_mpi_start(MPI_Request request) buf, request->real_size, &match_send, &xbt_free_f, // how to free the userdata if a detached send fails - (request->flags & ACCUMULATE)? NULL : &smpi_comm_copy_buffer_callback, + &smpi_comm_copy_buffer_callback, request, // detach if msg size < eager/rdv switch limit request->detached); @@ -458,20 +458,32 @@ void smpi_mpi_request_free(MPI_Request * request) MPI_Request smpi_rma_send_init(void *buf, int count, MPI_Datatype datatype, - int src, int dst, int tag, MPI_Comm comm) + int src, int dst, int tag, MPI_Comm comm, MPI_Op op) { MPI_Request request = NULL; /* MC needs the comm to be set to NULL during the call */ - request = build_request(buf==MPI_BOTTOM ? (void*)0 : buf , count, datatype, src, dst, tag, - comm, RMA | NON_PERSISTENT | ISEND | SEND | PREPARED); + if(op==MPI_OP_NULL){ + request = build_request(buf==MPI_BOTTOM ? (void*)0 : buf , count, datatype, src, dst, tag, + comm, RMA | NON_PERSISTENT | ISEND | SEND | PREPARED); + }else{ + request = build_request(buf==MPI_BOTTOM ? (void*)0 : buf, count, datatype, src, dst, tag, + comm, RMA | NON_PERSISTENT | ISEND | SEND | PREPARED | ACCUMULATE); + request->op = op; + } return request; } MPI_Request smpi_rma_recv_init(void *buf, int count, MPI_Datatype datatype, - int src, int dst, int tag, MPI_Comm comm) + int src, int dst, int tag, MPI_Comm comm, MPI_Op op) { MPI_Request request = NULL; /* MC needs the comm to be set to NULL during the call */ - request = build_request(buf==MPI_BOTTOM ? (void*)0 : buf, count, datatype, src, dst, tag, - comm, RMA | NON_PERSISTENT | RECV | PREPARED); + if(op==MPI_OP_NULL){ + request = build_request(buf==MPI_BOTTOM ? (void*)0 : buf, count, datatype, src, dst, tag, + comm, RMA | NON_PERSISTENT | RECV | PREPARED); + }else{ + request = build_request(buf==MPI_BOTTOM ? (void*)0 : buf, count, datatype, src, dst, tag, + comm, RMA | NON_PERSISTENT | RECV | PREPARED | ACCUMULATE); + request->op = op; + } return request; } @@ -612,7 +624,7 @@ static void finish_wait(MPI_Request * request, MPI_Status * status) print_request("Finishing", req); MPI_Datatype datatype = req->old_type; - if(datatype->has_subtype == 1){ + if((req->flags & ACCUMULATE) || (datatype->has_subtype == 1)){ if (!_xbt_replay_is_active()){ if( smpi_privatize_global_variables && ((char*)req->old_buf >= start_data_exe) @@ -622,13 +634,19 @@ static void finish_wait(MPI_Request * request, MPI_Status * status) switch_data_segment(smpi_process_index()); } } - // This part handles the problem of non-contignous memory - // the unserialization at the reception - s_smpi_subtype_t *subtype = datatype->substruct; if(req->flags & RECV) { - subtype->unserialize(req->buf, req->old_buf, req->real_size/smpi_datatype_size(datatype) , datatype->substruct); + + if(datatype->has_subtype == 1){ + // This part handles the problem of non-contignous memory + // the unserialization at the reception + s_smpi_subtype_t *subtype = datatype->substruct; + subtype->unserialize(req->buf, req->old_buf, req->real_size/smpi_datatype_size(datatype) , datatype->substruct, req->op); + if(req->detached == 0) free(req->buf); + }else{//apply op on contiguous buffer for accumulate + int n =req->real_size/smpi_datatype_size(datatype); + smpi_op_apply(req->op, req->buf, req->old_buf, &n, &datatype); + } } - if(req->detached == 0) free(req->buf); } smpi_comm_unuse(req->comm); smpi_datatype_unuse(datatype); diff --git a/src/smpi/smpi_mpi_dt.c b/src/smpi/smpi_mpi_dt.c index 2ad759ca9a..bfd4b03315 100644 --- a/src/smpi/smpi_mpi_dt.c +++ b/src/smpi/smpi_mpi_dt.c @@ -186,7 +186,7 @@ int smpi_datatype_copy(void *sendbuf, int sendcount, MPI_Datatype sendtype, else if (sendtype->has_subtype == 0) { s_smpi_subtype_t *subtype = recvtype->substruct; - subtype->unserialize( sendbuf, recvbuf,1, subtype); + subtype->unserialize( sendbuf, recvbuf,1, subtype, MPI_REPLACE); } else if (recvtype->has_subtype == 0) { @@ -200,7 +200,7 @@ int smpi_datatype_copy(void *sendbuf, int sendcount, MPI_Datatype sendtype, subtype->serialize( sendbuf, buf_tmp,count/smpi_datatype_size(sendtype), subtype); subtype = recvtype->substruct; - subtype->unserialize( buf_tmp, recvbuf,count/smpi_datatype_size(recvtype), subtype); + subtype->unserialize( buf_tmp, recvbuf,count/smpi_datatype_size(recvtype), subtype, MPI_REPLACE); free(buf_tmp); } @@ -220,7 +220,7 @@ int smpi_datatype_copy(void *sendbuf, int sendcount, MPI_Datatype sendtype, */ void serialize_vector( const void *noncontiguous_vector, void *contiguous_vector, - size_t count, + int count, void *type) { s_smpi_mpi_vector_t* type_c = (s_smpi_mpi_vector_t*)type; @@ -257,8 +257,9 @@ void serialize_vector( const void *noncontiguous_vector, */ void unserialize_vector( const void *contiguous_vector, void *noncontiguous_vector, - size_t count, - void *type) + int count, + void *type, + MPI_Op op) { s_smpi_mpi_vector_t* type_c = (s_smpi_mpi_vector_t*)type; int i; @@ -268,13 +269,16 @@ void unserialize_vector( const void *contiguous_vector, for (i = 0; i < type_c->block_count * count; i++) { if (type_c->old_type->has_subtype == 0) - memcpy(noncontiguous_vector_char, - contiguous_vector_char, type_c->block_length * type_c->size_oldtype); + smpi_op_apply(op, contiguous_vector_char, noncontiguous_vector_char, &type_c->block_length, + &type_c->old_type); + /* memcpy(noncontiguous_vector_char, + contiguous_vector_char, type_c->block_length * type_c->size_oldtype);*/ else ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_vector_char, noncontiguous_vector_char, type_c->block_length, - type_c->old_type->substruct); + type_c->old_type->substruct, + op); contiguous_vector_char += type_c->block_length*type_c->size_oldtype; if((i+1)%type_c->block_count ==0) noncontiguous_vector_char += type_c->block_length*smpi_datatype_get_extent(type_c->old_type); @@ -382,7 +386,7 @@ Contiguous Implementation */ void serialize_contiguous( const void *noncontiguous_hvector, void *contiguous_hvector, - size_t count, + int count, void *type) { s_smpi_mpi_contiguous_t* type_c = (s_smpi_mpi_contiguous_t*)type; @@ -402,15 +406,18 @@ void serialize_contiguous( const void *noncontiguous_hvector, */ void unserialize_contiguous( const void *contiguous_vector, void *noncontiguous_vector, - size_t count, - void *type) + int count, + void *type, + MPI_Op op) { s_smpi_mpi_contiguous_t* type_c = (s_smpi_mpi_contiguous_t*)type; char* contiguous_vector_char = (char*)contiguous_vector; char* noncontiguous_vector_char = (char*)noncontiguous_vector+type_c->lb; - - memcpy(noncontiguous_vector_char, - contiguous_vector_char, count* type_c->block_count * type_c->size_oldtype); + int n= count* type_c->block_count; + smpi_op_apply(op, contiguous_vector_char, noncontiguous_vector_char, &n, + &type_c->old_type); + /*memcpy(noncontiguous_vector_char, + contiguous_vector_char, count* type_c->block_count * type_c->size_oldtype);*/ } void free_contiguous(MPI_Datatype* d){ @@ -522,7 +529,7 @@ Hvector Implementation - Vector with stride in bytes */ void serialize_hvector( const void *noncontiguous_hvector, void *contiguous_hvector, - size_t count, + int count, void *type) { s_smpi_mpi_hvector_t* type_c = (s_smpi_mpi_hvector_t*)type; @@ -558,8 +565,9 @@ void serialize_hvector( const void *noncontiguous_hvector, */ void unserialize_hvector( const void *contiguous_vector, void *noncontiguous_vector, - size_t count, - void *type) + int count, + void *type, + MPI_Op op) { s_smpi_mpi_hvector_t* type_c = (s_smpi_mpi_hvector_t*)type; int i; @@ -569,13 +577,16 @@ void unserialize_hvector( const void *contiguous_vector, for (i = 0; i < type_c->block_count * count; i++) { if (type_c->old_type->has_subtype == 0) - memcpy(noncontiguous_vector_char, - contiguous_vector_char, type_c->block_length * type_c->size_oldtype); + smpi_op_apply(op, contiguous_vector_char, noncontiguous_vector_char, &type_c->block_length, + &type_c->old_type); + /*memcpy(noncontiguous_vector_char, + contiguous_vector_char, type_c->block_length * type_c->size_oldtype);*/ else ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_vector_char, noncontiguous_vector_char, type_c->block_length, - type_c->old_type->substruct); + type_c->old_type->substruct, + op); contiguous_vector_char += type_c->block_length*type_c->size_oldtype; if((i+1)%type_c->block_count ==0) noncontiguous_vector_char += type_c->block_length*type_c->size_oldtype; @@ -664,7 +675,7 @@ Indexed Implementation */ void serialize_indexed( const void *noncontiguous_indexed, void *contiguous_indexed, - size_t count, + int count, void *type) { s_smpi_mpi_indexed_t* type_c = (s_smpi_mpi_indexed_t*)type; @@ -701,8 +712,9 @@ void serialize_indexed( const void *noncontiguous_indexed, */ void unserialize_indexed( const void *contiguous_indexed, void *noncontiguous_indexed, - size_t count, - void *type) + int count, + void *type, + MPI_Op op) { s_smpi_mpi_indexed_t* type_c = (s_smpi_mpi_indexed_t*)type; @@ -712,13 +724,16 @@ void unserialize_indexed( const void *contiguous_indexed, for(j=0; jblock_count; i++) { if (type_c->old_type->has_subtype == 0) - memcpy(noncontiguous_indexed_char , - contiguous_indexed_char, type_c->block_lengths[i] * type_c->size_oldtype); + smpi_op_apply(op, contiguous_indexed_char, noncontiguous_indexed_char, &type_c->block_lengths[i], + &type_c->old_type); + /*memcpy(noncontiguous_indexed_char , + contiguous_indexed_char, type_c->block_lengths[i] * type_c->size_oldtype);*/ else ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_indexed_char, noncontiguous_indexed_char, type_c->block_lengths[i], - type_c->old_type->substruct); + type_c->old_type->substruct, + op); contiguous_indexed_char += type_c->block_lengths[i]*type_c->size_oldtype; if (iblock_count-1) @@ -829,7 +844,7 @@ Hindexed Implementation - Indexed with indices in bytes */ void serialize_hindexed( const void *noncontiguous_hindexed, void *contiguous_hindexed, - size_t count, + int count, void *type) { s_smpi_mpi_hindexed_t* type_c = (s_smpi_mpi_hindexed_t*)type; @@ -865,8 +880,9 @@ void serialize_hindexed( const void *noncontiguous_hindexed, */ void unserialize_hindexed( const void *contiguous_hindexed, void *noncontiguous_hindexed, - size_t count, - void *type) + int count, + void *type, + MPI_Op op) { s_smpi_mpi_hindexed_t* type_c = (s_smpi_mpi_hindexed_t*)type; int i,j; @@ -876,13 +892,16 @@ void unserialize_hindexed( const void *contiguous_hindexed, for(j=0; jblock_count; i++) { if (type_c->old_type->has_subtype == 0) - memcpy(noncontiguous_hindexed_char, - contiguous_hindexed_char, type_c->block_lengths[i] * type_c->size_oldtype); + smpi_op_apply(op, contiguous_hindexed_char, noncontiguous_hindexed_char, &type_c->block_lengths[i], + &type_c->old_type); + /*memcpy(noncontiguous_hindexed_char, + contiguous_hindexed_char, type_c->block_lengths[i] * type_c->size_oldtype);*/ else ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_hindexed_char, noncontiguous_hindexed_char, type_c->block_lengths[i], - type_c->old_type->substruct); + type_c->old_type->substruct, + op); contiguous_hindexed_char += type_c->block_lengths[i]*type_c->size_oldtype; if (iblock_count-1)noncontiguous_hindexed_char = (char*)noncontiguous_hindexed + type_c->block_indices[i+1]; @@ -991,7 +1010,7 @@ struct Implementation - Indexed with indices in bytes */ void serialize_struct( const void *noncontiguous_struct, void *contiguous_struct, - size_t count, + int count, void *type) { s_smpi_mpi_struct_t* type_c = (s_smpi_mpi_struct_t*)type; @@ -1028,8 +1047,9 @@ void serialize_struct( const void *noncontiguous_struct, */ void unserialize_struct( const void *contiguous_struct, void *noncontiguous_struct, - size_t count, - void *type) + int count, + void *type, + MPI_Op op) { s_smpi_mpi_struct_t* type_c = (s_smpi_mpi_struct_t*)type; int i,j; @@ -1039,13 +1059,16 @@ void unserialize_struct( const void *contiguous_struct, for(j=0; jblock_count; i++) { if (type_c->old_types[i]->has_subtype == 0) - memcpy(noncontiguous_struct_char, - contiguous_struct_char, type_c->block_lengths[i] * smpi_datatype_size(type_c->old_types[i])); + smpi_op_apply(op, contiguous_struct_char, noncontiguous_struct_char, &type_c->block_lengths[i], + & type_c->old_types[i]); + /*memcpy(noncontiguous_struct_char, + contiguous_struct_char, type_c->block_lengths[i] * smpi_datatype_size(type_c->old_types[i]));*/ else ((s_smpi_subtype_t*)type_c->old_types[i]->substruct)->unserialize( contiguous_struct_char, noncontiguous_struct_char, type_c->block_lengths[i], - type_c->old_types[i]->substruct); + type_c->old_types[i]->substruct, + op); contiguous_struct_char += type_c->block_lengths[i]*smpi_datatype_size(type_c->old_types[i]); if (iblock_count-1)noncontiguous_struct_char = (char*)noncontiguous_struct + type_c->block_indices[i+1]; @@ -1172,7 +1195,6 @@ typedef struct s_smpi_mpi_op { #define BXOR_OP(a, b) (b) ^= (a) #define MAXLOC_OP(a, b) (b) = (a.value) < (b.value) ? (b) : (a) #define MINLOC_OP(a, b) (b) = (a.value) < (b.value) ? (a) : (b) -#define REPLACE_OP(a,b) (b) = (a) #define APPLY_FUNC(a, b, length, type, func) \ { \ @@ -1480,23 +1502,7 @@ static void maxloc_func(void *a, void *b, int *length, static void replace_func(void *a, void *b, int *length, MPI_Datatype * datatype) { - if (*datatype == MPI_CHAR) { - APPLY_FUNC(a, b, length, char, REPLACE_OP); - } else if (*datatype == MPI_SHORT) { - APPLY_FUNC(a, b, length, short, REPLACE_OP); - } else if (*datatype == MPI_INT) { - APPLY_FUNC(a, b, length, int, REPLACE_OP); - } else if (*datatype == MPI_LONG) { - APPLY_FUNC(a, b, length, long, REPLACE_OP); - } else if (*datatype == MPI_UNSIGNED_SHORT) { - APPLY_FUNC(a, b, length, unsigned short, REPLACE_OP); - } else if (*datatype == MPI_UNSIGNED) { - APPLY_FUNC(a, b, length, unsigned int, REPLACE_OP); - } else if (*datatype == MPI_UNSIGNED_LONG) { - APPLY_FUNC(a, b, length, unsigned long, REPLACE_OP); - } else if (*datatype == MPI_BYTE) { - APPLY_FUNC(a, b, length, uint8_t, REPLACE_OP); - } + memcpy(b, a, *length * smpi_datatype_size(*datatype)); } #define CREATE_MPI_OP(name, func) \ diff --git a/src/smpi/smpi_mpi_dt_private.h b/src/smpi/smpi_mpi_dt_private.h index 6e81df3521..c88b436d9c 100644 --- a/src/smpi/smpi_mpi_dt_private.h +++ b/src/smpi/smpi_mpi_dt_private.h @@ -45,16 +45,16 @@ typedef struct s_smpi_mpi_contiguous{ MPI_Datatype old_type; MPI_Aint lb; size_t size_oldtype; - size_t block_count; + int block_count; } s_smpi_mpi_contiguous_t; typedef struct s_smpi_mpi_vector{ s_smpi_subtype_t base; MPI_Datatype old_type; size_t size_oldtype; - size_t block_stride; - size_t block_length; - size_t block_count; + int block_stride; + int block_length; + int block_count; } s_smpi_mpi_vector_t; typedef struct s_smpi_mpi_hvector{ @@ -62,8 +62,8 @@ typedef struct s_smpi_mpi_hvector{ MPI_Datatype old_type; size_t size_oldtype; MPI_Aint block_stride; - size_t block_length; - size_t block_count; + int block_length; + int block_count; } s_smpi_mpi_hvector_t; typedef struct s_smpi_mpi_indexed{ @@ -72,7 +72,7 @@ typedef struct s_smpi_mpi_indexed{ size_t size_oldtype; int* block_lengths; int* block_indices; - size_t block_count; + int block_count; } s_smpi_mpi_indexed_t; typedef struct s_smpi_mpi_hindexed{ @@ -81,7 +81,7 @@ typedef struct s_smpi_mpi_hindexed{ size_t size_oldtype; int* block_lengths; MPI_Aint* block_indices; - size_t block_count; + int block_count; } s_smpi_mpi_hindexed_t; typedef struct s_smpi_mpi_struct{ @@ -90,7 +90,7 @@ typedef struct s_smpi_mpi_struct{ size_t size_oldtype; int* block_lengths; MPI_Aint* block_indices; - size_t block_count; + int block_count; MPI_Datatype* old_types; } s_smpi_mpi_struct_t; @@ -100,12 +100,13 @@ typedef struct s_smpi_mpi_struct{ */ void unserialize_contiguous( const void *contiguous_vector, void *noncontiguous_vector, - size_t count, - void *type); + int count, + void *type, + MPI_Op op); void serialize_contiguous( const void *noncontiguous_vector, void *contiguous_vector, - size_t count, + int count, void *type); void free_contiguous(MPI_Datatype* type); @@ -117,12 +118,13 @@ s_smpi_mpi_contiguous_t* smpi_datatype_contiguous_create( MPI_Aint lb, void unserialize_vector( const void *contiguous_vector, void *noncontiguous_vector, - size_t count, - void *type); + int count, + void *type, + MPI_Op op); void serialize_vector( const void *noncontiguous_vector, void *contiguous_vector, - size_t count, + int count, void *type); void free_vector(MPI_Datatype* type); @@ -135,12 +137,13 @@ s_smpi_mpi_vector_t* smpi_datatype_vector_create( int block_stride, void unserialize_hvector( const void *contiguous_vector, void *noncontiguous_vector, - size_t count, - void *type); + int count, + void *type, + MPI_Op op); void serialize_hvector( const void *noncontiguous_vector, void *contiguous_vector, - size_t count, + int count, void *type); void free_hvector(MPI_Datatype* type); @@ -154,12 +157,13 @@ s_smpi_mpi_hvector_t* smpi_datatype_hvector_create( MPI_Aint block_stride, void unserialize_indexed( const void *contiguous_indexed, void *noncontiguous_indexed, - size_t count, - void *type); + int count, + void *type, + MPI_Op op); void serialize_indexed( const void *noncontiguous_vector, void *contiguous_vector, - size_t count, + int count, void *type); void free_indexed(MPI_Datatype* type); @@ -172,12 +176,13 @@ s_smpi_mpi_indexed_t* smpi_datatype_indexed_create(int* block_lengths, void unserialize_hindexed( const void *contiguous_indexed, void *noncontiguous_indexed, - size_t count, - void *type); + int count, + void *type, + MPI_Op op); void serialize_hindexed( const void *noncontiguous_vector, void *contiguous_vector, - size_t count, + int count, void *type); void free_hindexed(MPI_Datatype* type); @@ -190,12 +195,13 @@ s_smpi_mpi_hindexed_t* smpi_datatype_hindexed_create(int* block_lengths, void unserialize_struct( const void *contiguous_indexed, void *noncontiguous_indexed, - size_t count, - void *type); + int count, + void *type, + MPI_Op op); void serialize_struct( const void *noncontiguous_vector, void *contiguous_vector, - size_t count, + int count, void *type); void free_struct(MPI_Datatype* type); diff --git a/src/smpi/smpi_rma.c b/src/smpi/smpi_rma.c index 89224f8027..7c7d867538 100644 --- a/src/smpi/smpi_rma.c +++ b/src/smpi/smpi_rma.c @@ -116,7 +116,7 @@ int smpi_mpi_put( void *origin_addr, int origin_count, MPI_Datatype origin_datat //get receiver pointer MPI_Win recv_win = win->connected_wins[target_rank]; - void* recv_addr = (void*) ( ((char*)recv_win->base) + target_disp * smpi_datatype_size(target_datatype)); + void* recv_addr = (void*) ( ((char*)recv_win->base) + target_disp * recv_win->disp_unit); smpi_datatype_use(origin_datatype); smpi_datatype_use(target_datatype); XBT_DEBUG("Entering MPI_Put to %d", target_rank); @@ -124,11 +124,11 @@ int smpi_mpi_put( void *origin_addr, int origin_count, MPI_Datatype origin_datat if(target_rank != smpi_comm_rank(win->comm)){ //prepare send_request MPI_Request sreq = smpi_rma_send_init(origin_addr, origin_count, origin_datatype, - smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+1, win->comm); + smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+1, win->comm, MPI_OP_NULL); //prepare receiver request MPI_Request rreq = smpi_rma_recv_init(recv_addr, target_count, target_datatype, - smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+1, recv_win->comm); + smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+1, recv_win->comm, MPI_OP_NULL); //push request to receiver's win xbt_dynar_push_as(recv_win->requests, MPI_Request, rreq); @@ -138,6 +138,9 @@ int smpi_mpi_put( void *origin_addr, int origin_count, MPI_Datatype origin_datat //push request to sender's win xbt_dynar_push_as(win->requests, MPI_Request, sreq); + }else{ + smpi_datatype_copy(origin_addr, origin_count, origin_datatype, + recv_addr, target_count, target_datatype); } return MPI_SUCCESS; @@ -149,7 +152,7 @@ int smpi_mpi_get( void *origin_addr, int origin_count, MPI_Datatype origin_datat //get sender pointer MPI_Win send_win = win->connected_wins[target_rank]; - void* send_addr = (void*)( ((char*)send_win->base) + target_disp * smpi_datatype_size(target_datatype)); + void* send_addr = (void*)( ((char*)send_win->base) + target_disp * send_win->disp_unit); smpi_datatype_use(origin_datatype); smpi_datatype_use(target_datatype); XBT_DEBUG("Entering MPI_Get from %d", target_rank); @@ -157,11 +160,11 @@ int smpi_mpi_get( void *origin_addr, int origin_count, MPI_Datatype origin_datat if(target_rank != smpi_comm_rank(win->comm)){ //prepare send_request MPI_Request sreq = smpi_rma_send_init(send_addr, target_count, target_datatype, - smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), RMA_TAG+2, send_win->comm); + smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), RMA_TAG+2, send_win->comm, MPI_OP_NULL); //prepare receiver request MPI_Request rreq = smpi_rma_recv_init(origin_addr, origin_count, origin_datatype, - smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), RMA_TAG+2, win->comm); + smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), RMA_TAG+2, win->comm, MPI_OP_NULL); //push request to receiver's win xbt_dynar_push_as(send_win->requests, MPI_Request, sreq); @@ -171,6 +174,9 @@ int smpi_mpi_get( void *origin_addr, int origin_count, MPI_Datatype origin_datat //push request to sender's win xbt_dynar_push_as(win->requests, MPI_Request, rreq); + }else{ + smpi_datatype_copy(send_addr, target_count, target_datatype, + origin_addr, origin_count, origin_datatype); } return MPI_SUCCESS; @@ -180,33 +186,33 @@ int smpi_mpi_get( void *origin_addr, int origin_count, MPI_Datatype origin_datat int smpi_mpi_accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win) { + //FIXME: local version //get receiver pointer MPI_Win recv_win = win->connected_wins[target_rank]; - void* recv_addr = (void*)( ((char*)recv_win->base) + target_disp * smpi_datatype_size(target_datatype) ); + void* recv_addr = (void*)( ((char*)recv_win->base) + target_disp * recv_win->disp_unit); XBT_DEBUG("Entering MPI_Accumulate to %d", target_rank); smpi_datatype_use(origin_datatype); smpi_datatype_use(target_datatype); - if(target_rank != smpi_comm_rank(win->comm)){ + //prepare send_request MPI_Request sreq = smpi_rma_send_init(origin_addr, origin_count, origin_datatype, - smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+3, win->comm); + smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+3, win->comm, op); //prepare receiver request - MPI_Request rreq = smpi_rma_recv_init(NULL, 0, target_datatype, - smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+3, recv_win->comm); - rreq->flags |= ACCUMULATE; + MPI_Request rreq = smpi_rma_recv_init(recv_addr, target_count, target_datatype, + smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+3, recv_win->comm, op); //push request to receiver's win xbt_dynar_push_as(recv_win->requests, MPI_Request, rreq); //start send smpi_mpi_start(sreq); + //push request to sender's win xbt_dynar_push_as(win->requests, MPI_Request, sreq); - } - //perform actual accumulation - smpi_op_apply(op, origin_addr, recv_addr, &origin_count, &origin_datatype); + + return MPI_SUCCESS; } -- 2.20.1