Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Change the way MPI_Accumulate works with serialization, to handle complex datatypes.
authorAugustin Degomme <augustin.degomme@imag.fr>
Tue, 24 Jun 2014 14:41:49 +0000 (16:41 +0200)
committerAugustin Degomme <augustin.degomme@imag.fr>
Tue, 24 Jun 2014 14:41:49 +0000 (16:41 +0200)
src/smpi/private.h
src/smpi/smpi_base.c
src/smpi/smpi_mpi_dt.c
src/smpi/smpi_mpi_dt_private.h
src/smpi/smpi_rma.c

index f8d7e7c..fe8c16a 100644 (file)
@@ -43,8 +43,8 @@ enum smpi_process_state{
 // for each such structure these function should be implemented (vector
 // index hvector hindex struct)
 typedef struct s_smpi_subtype{
 // for each such structure these function should be implemented (vector
 // index hvector hindex struct)
 typedef struct s_smpi_subtype{
-  void (*serialize)(const void * input, void *output, size_t count, void* subtype);
-  void (*unserialize)(const void * input, void *output, size_t count, void* subtype);
+  void (*serialize)(const void * input, void *output, int count, void* subtype);
+  void (*unserialize)(const void * input, void *output, int count, void* subtype, MPI_Op op);
   void (*subtype_free)(MPI_Datatype* type);
 } s_smpi_subtype_t;
 
   void (*subtype_free)(MPI_Datatype* type);
 } s_smpi_subtype_t;
 
@@ -103,6 +103,7 @@ typedef struct s_smpi_mpi_request {
   int detached;
   MPI_Request detached_sender;
   int refcount;
   int detached;
   MPI_Request detached_sender;
   int refcount;
+  MPI_Op op;
 #ifdef HAVE_TRACING
   int send;
   int recv;
 #ifdef HAVE_TRACING
   int send;
   int recv;
@@ -259,9 +260,9 @@ MPI_Request smpi_irecv_init(void *buf, int count, MPI_Datatype datatype,
 MPI_Request smpi_mpi_irecv(void *buf, int count, MPI_Datatype datatype,
                            int src, int tag, MPI_Comm comm);
 MPI_Request smpi_rma_send_init(void *buf, int count, MPI_Datatype datatype,
 MPI_Request smpi_mpi_irecv(void *buf, int count, MPI_Datatype datatype,
                            int src, int tag, MPI_Comm comm);
 MPI_Request smpi_rma_send_init(void *buf, int count, MPI_Datatype datatype,
-                            int src, int dst, int tag, MPI_Comm comm);
+                            int src, int dst, int tag, MPI_Comm comm, MPI_Op op);
 MPI_Request smpi_rma_recv_init(void *buf, int count, MPI_Datatype datatype,
 MPI_Request smpi_rma_recv_init(void *buf, int count, MPI_Datatype datatype,
-                            int src, int dst, int tag, MPI_Comm comm);
+                            int src, int dst, int tag, MPI_Comm comm, MPI_Op op);
 void smpi_mpi_recv(void *buf, int count, MPI_Datatype datatype, int src,
                    int tag, MPI_Comm comm, MPI_Status * status);
 void smpi_mpi_send(void *buf, int count, MPI_Datatype datatype, int dst,
 void smpi_mpi_recv(void *buf, int count, MPI_Datatype datatype, int src,
                    int tag, MPI_Comm comm, MPI_Status * status);
 void smpi_mpi_send(void *buf, int count, MPI_Datatype datatype, int dst,
index 830cd2d..562ddde 100644 (file)
@@ -197,11 +197,11 @@ static MPI_Request build_request(void *buf, int count,
 
   s_smpi_subtype_t *subtype = datatype->substruct;
 
 
   s_smpi_subtype_t *subtype = datatype->substruct;
 
-  if(datatype->has_subtype == 1){
+  if(((flags & RECV) && (flags & ACCUMULATE)) || (datatype->has_subtype == 1)){
     // This part handles the problem of non-contiguous memory
     old_buf = buf;
     buf = count==0 ? NULL : xbt_malloc(count*smpi_datatype_size(datatype));
     // This part handles the problem of non-contiguous memory
     old_buf = buf;
     buf = count==0 ? NULL : xbt_malloc(count*smpi_datatype_size(datatype));
-    if (flags & SEND) {
+    if ((datatype->has_subtype == 1) && (flags & SEND)) {
       subtype->serialize(old_buf, buf, count, datatype->substruct);
     }
   }
       subtype->serialize(old_buf, buf, count, datatype->substruct);
     }
   }
@@ -230,7 +230,7 @@ static MPI_Request build_request(void *buf, int count,
     request->refcount = 1;
   else
     request->refcount = 0;
     request->refcount = 1;
   else
     request->refcount = 0;
-
+  request->op = MPI_REPLACE;
 #ifdef HAVE_TRACING
   request->send = 0;
   request->recv = 0;
 #ifdef HAVE_TRACING
   request->send = 0;
   request->recv = 0;
@@ -336,7 +336,7 @@ void smpi_mpi_start(MPI_Request request)
     smpi_comm_use(request->comm);
     request->action = simcall_comm_irecv(mailbox, request->buf,
                                          &request->real_size, &match_recv,
     smpi_comm_use(request->comm);
     request->action = simcall_comm_irecv(mailbox, request->buf,
                                          &request->real_size, &match_recv,
-                                         (request->flags & ACCUMULATE)? NULL : &smpi_comm_copy_buffer_callback,
+                                         &smpi_comm_copy_buffer_callback,
                                          request, -1.0);
 
     //integrate pseudo-timing for buffering of small messages, do not bother to execute the simcall if 0
                                          request, -1.0);
 
     //integrate pseudo-timing for buffering of small messages, do not bother to execute the simcall if 0
@@ -412,7 +412,7 @@ void smpi_mpi_start(MPI_Request request)
                          buf, request->real_size,
                          &match_send,
                          &xbt_free_f, // how to free the userdata if a detached send fails
                          buf, request->real_size,
                          &match_send,
                          &xbt_free_f, // how to free the userdata if a detached send fails
-                         (request->flags & ACCUMULATE)? NULL : &smpi_comm_copy_buffer_callback,
+                         &smpi_comm_copy_buffer_callback,
                          request,
                          // detach if msg size < eager/rdv switch limit
                          request->detached);
                          request,
                          // detach if msg size < eager/rdv switch limit
                          request->detached);
@@ -458,20 +458,32 @@ void smpi_mpi_request_free(MPI_Request * request)
 
 
 MPI_Request smpi_rma_send_init(void *buf, int count, MPI_Datatype datatype,
 
 
 MPI_Request smpi_rma_send_init(void *buf, int count, MPI_Datatype datatype,
-                            int src, int dst, int tag, MPI_Comm comm)
+                            int src, int dst, int tag, MPI_Comm comm, MPI_Op op)
 {
   MPI_Request request = NULL; /* MC needs the comm to be set to NULL during the call */
 {
   MPI_Request request = NULL; /* MC needs the comm to be set to NULL during the call */
-  request = build_request(buf==MPI_BOTTOM ? (void*)0 : buf , count, datatype, src, dst, tag,
-                          comm, RMA | NON_PERSISTENT | ISEND | SEND | PREPARED);
+  if(op==MPI_OP_NULL){
+    request = build_request(buf==MPI_BOTTOM ? (void*)0 : buf , count, datatype, src, dst, tag,
+                            comm, RMA | NON_PERSISTENT | ISEND | SEND | PREPARED);
+  }else{
+    request = build_request(buf==MPI_BOTTOM ? (void*)0 : buf, count, datatype,  src, dst, tag,
+                            comm, RMA | NON_PERSISTENT | ISEND | SEND | PREPARED | ACCUMULATE);
+    request->op = op;
+  }
   return request;
 }
 
 MPI_Request smpi_rma_recv_init(void *buf, int count, MPI_Datatype datatype,
   return request;
 }
 
 MPI_Request smpi_rma_recv_init(void *buf, int count, MPI_Datatype datatype,
-                            int src, int dst, int tag, MPI_Comm comm)
+                            int src, int dst, int tag, MPI_Comm comm, MPI_Op op)
 {
   MPI_Request request = NULL; /* MC needs the comm to be set to NULL during the call */
 {
   MPI_Request request = NULL; /* MC needs the comm to be set to NULL during the call */
-  request = build_request(buf==MPI_BOTTOM ? (void*)0 : buf, count, datatype,  src, dst, tag,
-                          comm, RMA | NON_PERSISTENT | RECV | PREPARED);
+  if(op==MPI_OP_NULL){
+    request = build_request(buf==MPI_BOTTOM ? (void*)0 : buf, count, datatype,  src, dst, tag,
+                            comm, RMA | NON_PERSISTENT | RECV | PREPARED);
+  }else{
+    request = build_request(buf==MPI_BOTTOM ? (void*)0 : buf, count, datatype,  src, dst, tag,
+                            comm, RMA | NON_PERSISTENT | RECV | PREPARED | ACCUMULATE);
+    request->op = op;
+  }
   return request;
 }
 
   return request;
 }
 
@@ -612,7 +624,7 @@ static void finish_wait(MPI_Request * request, MPI_Status * status)
     print_request("Finishing", req);
     MPI_Datatype datatype = req->old_type;
 
     print_request("Finishing", req);
     MPI_Datatype datatype = req->old_type;
 
-    if(datatype->has_subtype == 1){
+    if((req->flags & ACCUMULATE) || (datatype->has_subtype == 1)){
       if (!_xbt_replay_is_active()){
         if( smpi_privatize_global_variables
             && ((char*)req->old_buf >= start_data_exe)
       if (!_xbt_replay_is_active()){
         if( smpi_privatize_global_variables
             && ((char*)req->old_buf >= start_data_exe)
@@ -622,13 +634,19 @@ static void finish_wait(MPI_Request * request, MPI_Status * status)
             switch_data_segment(smpi_process_index());
         }
       }
             switch_data_segment(smpi_process_index());
         }
       }
-      // This part handles the problem of non-contignous memory
-      // the unserialization at the reception
-      s_smpi_subtype_t *subtype = datatype->substruct;
       if(req->flags & RECV) {
       if(req->flags & RECV) {
-        subtype->unserialize(req->buf, req->old_buf, req->real_size/smpi_datatype_size(datatype) , datatype->substruct);
+
+        if(datatype->has_subtype == 1){
+          // This part handles the problem of non-contignous memory
+          // the unserialization at the reception
+          s_smpi_subtype_t *subtype = datatype->substruct;
+            subtype->unserialize(req->buf, req->old_buf, req->real_size/smpi_datatype_size(datatype) , datatype->substruct, req->op);
+          if(req->detached == 0) free(req->buf);
+        }else{//apply op on contiguous buffer for accumulate
+            int n =req->real_size/smpi_datatype_size(datatype);
+            smpi_op_apply(req->op, req->buf, req->old_buf, &n, &datatype);
+        }
       }
       }
-      if(req->detached == 0) free(req->buf);
     }
     smpi_comm_unuse(req->comm);
     smpi_datatype_unuse(datatype);
     }
     smpi_comm_unuse(req->comm);
     smpi_datatype_unuse(datatype);
index 2ad759c..bfd4b03 100644 (file)
@@ -186,7 +186,7 @@ int smpi_datatype_copy(void *sendbuf, int sendcount, MPI_Datatype sendtype,
     else if (sendtype->has_subtype == 0)
     {
       s_smpi_subtype_t *subtype =  recvtype->substruct;
     else if (sendtype->has_subtype == 0)
     {
       s_smpi_subtype_t *subtype =  recvtype->substruct;
-      subtype->unserialize( sendbuf, recvbuf,1, subtype);
+      subtype->unserialize( sendbuf, recvbuf,1, subtype, MPI_REPLACE);
     }
     else if (recvtype->has_subtype == 0)
     {
     }
     else if (recvtype->has_subtype == 0)
     {
@@ -200,7 +200,7 @@ int smpi_datatype_copy(void *sendbuf, int sendcount, MPI_Datatype sendtype,
 
       subtype->serialize( sendbuf, buf_tmp,count/smpi_datatype_size(sendtype), subtype);
       subtype =  recvtype->substruct;
 
       subtype->serialize( sendbuf, buf_tmp,count/smpi_datatype_size(sendtype), subtype);
       subtype =  recvtype->substruct;
-      subtype->unserialize( buf_tmp, recvbuf,count/smpi_datatype_size(recvtype), subtype);
+      subtype->unserialize( buf_tmp, recvbuf,count/smpi_datatype_size(recvtype), subtype, MPI_REPLACE);
 
       free(buf_tmp);
     }
 
       free(buf_tmp);
     }
@@ -220,7 +220,7 @@ int smpi_datatype_copy(void *sendbuf, int sendcount, MPI_Datatype sendtype,
  */
 void serialize_vector( const void *noncontiguous_vector,
                        void *contiguous_vector,
  */
 void serialize_vector( const void *noncontiguous_vector,
                        void *contiguous_vector,
-                       size_t count,
+                       int count,
                        void *type)
 {
   s_smpi_mpi_vector_t* type_c = (s_smpi_mpi_vector_t*)type;
                        void *type)
 {
   s_smpi_mpi_vector_t* type_c = (s_smpi_mpi_vector_t*)type;
@@ -257,8 +257,9 @@ void serialize_vector( const void *noncontiguous_vector,
  */
 void unserialize_vector( const void *contiguous_vector,
                          void *noncontiguous_vector,
  */
 void unserialize_vector( const void *contiguous_vector,
                          void *noncontiguous_vector,
-                         size_t count,
-                         void *type)
+                         int count,
+                         void *type,
+                         MPI_Op op)
 {
   s_smpi_mpi_vector_t* type_c = (s_smpi_mpi_vector_t*)type;
   int i;
 {
   s_smpi_mpi_vector_t* type_c = (s_smpi_mpi_vector_t*)type;
   int i;
@@ -268,13 +269,16 @@ void unserialize_vector( const void *contiguous_vector,
 
   for (i = 0; i < type_c->block_count * count; i++) {
     if (type_c->old_type->has_subtype == 0)
 
   for (i = 0; i < type_c->block_count * count; i++) {
     if (type_c->old_type->has_subtype == 0)
-      memcpy(noncontiguous_vector_char,
-             contiguous_vector_char, type_c->block_length * type_c->size_oldtype);
+      smpi_op_apply(op, contiguous_vector_char, noncontiguous_vector_char, &type_c->block_length,
+          &type_c->old_type);
+     /* memcpy(noncontiguous_vector_char,
+             contiguous_vector_char, type_c->block_length * type_c->size_oldtype);*/
     else
       ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_vector_char,
                                                                      noncontiguous_vector_char,
                                                                      type_c->block_length,
     else
       ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_vector_char,
                                                                      noncontiguous_vector_char,
                                                                      type_c->block_length,
-                                                                     type_c->old_type->substruct);
+                                                                     type_c->old_type->substruct,
+                                                                     op);
     contiguous_vector_char += type_c->block_length*type_c->size_oldtype;
     if((i+1)%type_c->block_count ==0)
     noncontiguous_vector_char += type_c->block_length*smpi_datatype_get_extent(type_c->old_type);
     contiguous_vector_char += type_c->block_length*type_c->size_oldtype;
     if((i+1)%type_c->block_count ==0)
     noncontiguous_vector_char += type_c->block_length*smpi_datatype_get_extent(type_c->old_type);
@@ -382,7 +386,7 @@ Contiguous Implementation
  */
 void serialize_contiguous( const void *noncontiguous_hvector,
                        void *contiguous_hvector,
  */
 void serialize_contiguous( const void *noncontiguous_hvector,
                        void *contiguous_hvector,
-                       size_t count,
+                       int count,
                        void *type)
 {
   s_smpi_mpi_contiguous_t* type_c = (s_smpi_mpi_contiguous_t*)type;
                        void *type)
 {
   s_smpi_mpi_contiguous_t* type_c = (s_smpi_mpi_contiguous_t*)type;
@@ -402,15 +406,18 @@ void serialize_contiguous( const void *noncontiguous_hvector,
  */
 void unserialize_contiguous( const void *contiguous_vector,
                          void *noncontiguous_vector,
  */
 void unserialize_contiguous( const void *contiguous_vector,
                          void *noncontiguous_vector,
-                         size_t count,
-                         void *type)
+                         int count,
+                         void *type,
+                         MPI_Op op)
 {
   s_smpi_mpi_contiguous_t* type_c = (s_smpi_mpi_contiguous_t*)type;
   char* contiguous_vector_char = (char*)contiguous_vector;
   char* noncontiguous_vector_char = (char*)noncontiguous_vector+type_c->lb;
 {
   s_smpi_mpi_contiguous_t* type_c = (s_smpi_mpi_contiguous_t*)type;
   char* contiguous_vector_char = (char*)contiguous_vector;
   char* noncontiguous_vector_char = (char*)noncontiguous_vector+type_c->lb;
-
-  memcpy(noncontiguous_vector_char,
-           contiguous_vector_char, count*  type_c->block_count * type_c->size_oldtype);
+  int n= count* type_c->block_count;
+  smpi_op_apply(op, contiguous_vector_char, noncontiguous_vector_char, &n,
+            &type_c->old_type);
+       /*memcpy(noncontiguous_vector_char,
+           contiguous_vector_char, count*  type_c->block_count * type_c->size_oldtype);*/
 }
 
 void free_contiguous(MPI_Datatype* d){
 }
 
 void free_contiguous(MPI_Datatype* d){
@@ -522,7 +529,7 @@ Hvector Implementation - Vector with stride in bytes
  */
 void serialize_hvector( const void *noncontiguous_hvector,
                        void *contiguous_hvector,
  */
 void serialize_hvector( const void *noncontiguous_hvector,
                        void *contiguous_hvector,
-                       size_t count,
+                       int count,
                        void *type)
 {
   s_smpi_mpi_hvector_t* type_c = (s_smpi_mpi_hvector_t*)type;
                        void *type)
 {
   s_smpi_mpi_hvector_t* type_c = (s_smpi_mpi_hvector_t*)type;
@@ -558,8 +565,9 @@ void serialize_hvector( const void *noncontiguous_hvector,
  */
 void unserialize_hvector( const void *contiguous_vector,
                          void *noncontiguous_vector,
  */
 void unserialize_hvector( const void *contiguous_vector,
                          void *noncontiguous_vector,
-                         size_t count,
-                         void *type)
+                         int count,
+                         void *type,
+                         MPI_Op op)
 {
   s_smpi_mpi_hvector_t* type_c = (s_smpi_mpi_hvector_t*)type;
   int i;
 {
   s_smpi_mpi_hvector_t* type_c = (s_smpi_mpi_hvector_t*)type;
   int i;
@@ -569,13 +577,16 @@ void unserialize_hvector( const void *contiguous_vector,
 
   for (i = 0; i < type_c->block_count * count; i++) {
     if (type_c->old_type->has_subtype == 0)
 
   for (i = 0; i < type_c->block_count * count; i++) {
     if (type_c->old_type->has_subtype == 0)
-      memcpy(noncontiguous_vector_char,
-           contiguous_vector_char, type_c->block_length * type_c->size_oldtype);
+      smpi_op_apply(op, contiguous_vector_char, noncontiguous_vector_char, &type_c->block_length,
+                  &type_c->old_type);
+             /*memcpy(noncontiguous_vector_char,
+           contiguous_vector_char, type_c->block_length * type_c->size_oldtype);*/
     else
       ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_vector_char,
                                                                      noncontiguous_vector_char,
                                                                      type_c->block_length,
     else
       ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_vector_char,
                                                                      noncontiguous_vector_char,
                                                                      type_c->block_length,
-                                                                     type_c->old_type->substruct);
+                                                                     type_c->old_type->substruct,
+                                                                     op);
     contiguous_vector_char += type_c->block_length*type_c->size_oldtype;
     if((i+1)%type_c->block_count ==0)
     noncontiguous_vector_char += type_c->block_length*type_c->size_oldtype;
     contiguous_vector_char += type_c->block_length*type_c->size_oldtype;
     if((i+1)%type_c->block_count ==0)
     noncontiguous_vector_char += type_c->block_length*type_c->size_oldtype;
@@ -664,7 +675,7 @@ Indexed Implementation
  */
 void serialize_indexed( const void *noncontiguous_indexed,
                        void *contiguous_indexed,
  */
 void serialize_indexed( const void *noncontiguous_indexed,
                        void *contiguous_indexed,
-                       size_t count,
+                       int count,
                        void *type)
 {
   s_smpi_mpi_indexed_t* type_c = (s_smpi_mpi_indexed_t*)type;
                        void *type)
 {
   s_smpi_mpi_indexed_t* type_c = (s_smpi_mpi_indexed_t*)type;
@@ -701,8 +712,9 @@ void serialize_indexed( const void *noncontiguous_indexed,
  */
 void unserialize_indexed( const void *contiguous_indexed,
                          void *noncontiguous_indexed,
  */
 void unserialize_indexed( const void *contiguous_indexed,
                          void *noncontiguous_indexed,
-                         size_t count,
-                         void *type)
+                         int count,
+                         void *type,
+                         MPI_Op op)
 {
 
   s_smpi_mpi_indexed_t* type_c = (s_smpi_mpi_indexed_t*)type;
 {
 
   s_smpi_mpi_indexed_t* type_c = (s_smpi_mpi_indexed_t*)type;
@@ -712,13 +724,16 @@ void unserialize_indexed( const void *contiguous_indexed,
   for(j=0; j<count;j++){
     for (i = 0; i < type_c->block_count; i++) {
       if (type_c->old_type->has_subtype == 0)
   for(j=0; j<count;j++){
     for (i = 0; i < type_c->block_count; i++) {
       if (type_c->old_type->has_subtype == 0)
-        memcpy(noncontiguous_indexed_char ,
-             contiguous_indexed_char, type_c->block_lengths[i] * type_c->size_oldtype);
+        smpi_op_apply(op, contiguous_indexed_char, noncontiguous_indexed_char, &type_c->block_lengths[i],
+                    &type_c->old_type);
+               /*memcpy(noncontiguous_indexed_char ,
+             contiguous_indexed_char, type_c->block_lengths[i] * type_c->size_oldtype);*/
       else
         ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_indexed_char,
                                                                        noncontiguous_indexed_char,
                                                                        type_c->block_lengths[i],
       else
         ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_indexed_char,
                                                                        noncontiguous_indexed_char,
                                                                        type_c->block_lengths[i],
-                                                                       type_c->old_type->substruct);
+                                                                       type_c->old_type->substruct,
+                                                                       op);
 
       contiguous_indexed_char += type_c->block_lengths[i]*type_c->size_oldtype;
       if (i<type_c->block_count-1)
 
       contiguous_indexed_char += type_c->block_lengths[i]*type_c->size_oldtype;
       if (i<type_c->block_count-1)
@@ -829,7 +844,7 @@ Hindexed Implementation - Indexed with indices in bytes
  */
 void serialize_hindexed( const void *noncontiguous_hindexed,
                        void *contiguous_hindexed,
  */
 void serialize_hindexed( const void *noncontiguous_hindexed,
                        void *contiguous_hindexed,
-                       size_t count,
+                       int count,
                        void *type)
 {
   s_smpi_mpi_hindexed_t* type_c = (s_smpi_mpi_hindexed_t*)type;
                        void *type)
 {
   s_smpi_mpi_hindexed_t* type_c = (s_smpi_mpi_hindexed_t*)type;
@@ -865,8 +880,9 @@ void serialize_hindexed( const void *noncontiguous_hindexed,
  */
 void unserialize_hindexed( const void *contiguous_hindexed,
                          void *noncontiguous_hindexed,
  */
 void unserialize_hindexed( const void *contiguous_hindexed,
                          void *noncontiguous_hindexed,
-                         size_t count,
-                         void *type)
+                         int count,
+                         void *type,
+                         MPI_Op op)
 {
   s_smpi_mpi_hindexed_t* type_c = (s_smpi_mpi_hindexed_t*)type;
   int i,j;
 {
   s_smpi_mpi_hindexed_t* type_c = (s_smpi_mpi_hindexed_t*)type;
   int i,j;
@@ -876,13 +892,16 @@ void unserialize_hindexed( const void *contiguous_hindexed,
   for(j=0; j<count;j++){
     for (i = 0; i < type_c->block_count; i++) {
       if (type_c->old_type->has_subtype == 0)
   for(j=0; j<count;j++){
     for (i = 0; i < type_c->block_count; i++) {
       if (type_c->old_type->has_subtype == 0)
-        memcpy(noncontiguous_hindexed_char,
-               contiguous_hindexed_char, type_c->block_lengths[i] * type_c->size_oldtype);
+        smpi_op_apply(op, contiguous_hindexed_char, noncontiguous_hindexed_char, &type_c->block_lengths[i],
+                            &type_c->old_type);
+                       /*memcpy(noncontiguous_hindexed_char,
+               contiguous_hindexed_char, type_c->block_lengths[i] * type_c->size_oldtype);*/
       else
         ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_hindexed_char,
                                                                        noncontiguous_hindexed_char,
                                                                        type_c->block_lengths[i],
       else
         ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_hindexed_char,
                                                                        noncontiguous_hindexed_char,
                                                                        type_c->block_lengths[i],
-                                                                       type_c->old_type->substruct);
+                                                                       type_c->old_type->substruct,
+                                                                       op);
 
       contiguous_hindexed_char += type_c->block_lengths[i]*type_c->size_oldtype;
       if (i<type_c->block_count-1)noncontiguous_hindexed_char = (char*)noncontiguous_hindexed + type_c->block_indices[i+1];
 
       contiguous_hindexed_char += type_c->block_lengths[i]*type_c->size_oldtype;
       if (i<type_c->block_count-1)noncontiguous_hindexed_char = (char*)noncontiguous_hindexed + type_c->block_indices[i+1];
@@ -991,7 +1010,7 @@ struct Implementation - Indexed with indices in bytes
  */
 void serialize_struct( const void *noncontiguous_struct,
                        void *contiguous_struct,
  */
 void serialize_struct( const void *noncontiguous_struct,
                        void *contiguous_struct,
-                       size_t count,
+                       int count,
                        void *type)
 {
   s_smpi_mpi_struct_t* type_c = (s_smpi_mpi_struct_t*)type;
                        void *type)
 {
   s_smpi_mpi_struct_t* type_c = (s_smpi_mpi_struct_t*)type;
@@ -1028,8 +1047,9 @@ void serialize_struct( const void *noncontiguous_struct,
  */
 void unserialize_struct( const void *contiguous_struct,
                          void *noncontiguous_struct,
  */
 void unserialize_struct( const void *contiguous_struct,
                          void *noncontiguous_struct,
-                         size_t count,
-                         void *type)
+                         int count,
+                         void *type,
+                         MPI_Op op)
 {
   s_smpi_mpi_struct_t* type_c = (s_smpi_mpi_struct_t*)type;
   int i,j;
 {
   s_smpi_mpi_struct_t* type_c = (s_smpi_mpi_struct_t*)type;
   int i,j;
@@ -1039,13 +1059,16 @@ void unserialize_struct( const void *contiguous_struct,
   for(j=0; j<count;j++){
     for (i = 0; i < type_c->block_count; i++) {
       if (type_c->old_types[i]->has_subtype == 0)
   for(j=0; j<count;j++){
     for (i = 0; i < type_c->block_count; i++) {
       if (type_c->old_types[i]->has_subtype == 0)
-        memcpy(noncontiguous_struct_char,
-             contiguous_struct_char, type_c->block_lengths[i] * smpi_datatype_size(type_c->old_types[i]));
+        smpi_op_apply(op, contiguous_struct_char, noncontiguous_struct_char, &type_c->block_lengths[i],
+           & type_c->old_types[i]);
+                       /*memcpy(noncontiguous_struct_char,
+             contiguous_struct_char, type_c->block_lengths[i] * smpi_datatype_size(type_c->old_types[i]));*/
       else
         ((s_smpi_subtype_t*)type_c->old_types[i]->substruct)->unserialize( contiguous_struct_char,
                                                                            noncontiguous_struct_char,
                                                                            type_c->block_lengths[i],
       else
         ((s_smpi_subtype_t*)type_c->old_types[i]->substruct)->unserialize( contiguous_struct_char,
                                                                            noncontiguous_struct_char,
                                                                            type_c->block_lengths[i],
-                                                                           type_c->old_types[i]->substruct);
+                                                                           type_c->old_types[i]->substruct,
+                                                                           op);
 
       contiguous_struct_char += type_c->block_lengths[i]*smpi_datatype_size(type_c->old_types[i]);
       if (i<type_c->block_count-1)noncontiguous_struct_char =  (char*)noncontiguous_struct + type_c->block_indices[i+1];
 
       contiguous_struct_char += type_c->block_lengths[i]*smpi_datatype_size(type_c->old_types[i]);
       if (i<type_c->block_count-1)noncontiguous_struct_char =  (char*)noncontiguous_struct + type_c->block_indices[i+1];
@@ -1172,7 +1195,6 @@ typedef struct s_smpi_mpi_op {
 #define BXOR_OP(a, b) (b) ^= (a)
 #define MAXLOC_OP(a, b)  (b) = (a.value) < (b.value) ? (b) : (a)
 #define MINLOC_OP(a, b)  (b) = (a.value) < (b.value) ? (a) : (b)
 #define BXOR_OP(a, b) (b) ^= (a)
 #define MAXLOC_OP(a, b)  (b) = (a.value) < (b.value) ? (b) : (a)
 #define MINLOC_OP(a, b)  (b) = (a.value) < (b.value) ? (a) : (b)
-#define REPLACE_OP(a,b) (b) = (a)
 
 #define APPLY_FUNC(a, b, length, type, func) \
 {                                          \
 
 #define APPLY_FUNC(a, b, length, type, func) \
 {                                          \
@@ -1480,23 +1502,7 @@ static void maxloc_func(void *a, void *b, int *length,
 static void replace_func(void *a, void *b, int *length,
                         MPI_Datatype * datatype)
 {
 static void replace_func(void *a, void *b, int *length,
                         MPI_Datatype * datatype)
 {
-  if (*datatype == MPI_CHAR) {
-    APPLY_FUNC(a, b, length, char, REPLACE_OP);
-  } else if (*datatype == MPI_SHORT) {
-    APPLY_FUNC(a, b, length, short, REPLACE_OP);
-  } else if (*datatype == MPI_INT) {
-    APPLY_FUNC(a, b, length, int, REPLACE_OP);
-  } else if (*datatype == MPI_LONG) {
-    APPLY_FUNC(a, b, length, long, REPLACE_OP);
-  } else if (*datatype == MPI_UNSIGNED_SHORT) {
-    APPLY_FUNC(a, b, length, unsigned short, REPLACE_OP);
-  } else if (*datatype == MPI_UNSIGNED) {
-    APPLY_FUNC(a, b, length, unsigned int, REPLACE_OP);
-  } else if (*datatype == MPI_UNSIGNED_LONG) {
-    APPLY_FUNC(a, b, length, unsigned long, REPLACE_OP);
-  } else if (*datatype == MPI_BYTE) {
-    APPLY_FUNC(a, b, length, uint8_t, REPLACE_OP);
-  }
+  memcpy(b, a, *length * smpi_datatype_size(*datatype));
 }
 
 #define CREATE_MPI_OP(name, func)                             \
 }
 
 #define CREATE_MPI_OP(name, func)                             \
index 6e81df3..c88b436 100644 (file)
@@ -45,16 +45,16 @@ typedef struct s_smpi_mpi_contiguous{
   MPI_Datatype old_type;
   MPI_Aint lb;
   size_t size_oldtype;
   MPI_Datatype old_type;
   MPI_Aint lb;
   size_t size_oldtype;
-  size_t block_count;
+  int block_count;
 } s_smpi_mpi_contiguous_t;
 
 typedef struct s_smpi_mpi_vector{
   s_smpi_subtype_t base;
   MPI_Datatype old_type;
   size_t size_oldtype;
 } s_smpi_mpi_contiguous_t;
 
 typedef struct s_smpi_mpi_vector{
   s_smpi_subtype_t base;
   MPI_Datatype old_type;
   size_t size_oldtype;
-  size_t block_stride;
-  size_t block_length;
-  size_t block_count;
+  int block_stride;
+  int block_length;
+  int block_count;
 } s_smpi_mpi_vector_t;
 
 typedef struct s_smpi_mpi_hvector{
 } s_smpi_mpi_vector_t;
 
 typedef struct s_smpi_mpi_hvector{
@@ -62,8 +62,8 @@ typedef struct s_smpi_mpi_hvector{
   MPI_Datatype old_type;
   size_t size_oldtype;
   MPI_Aint block_stride;
   MPI_Datatype old_type;
   size_t size_oldtype;
   MPI_Aint block_stride;
-  size_t block_length;
-  size_t block_count;
+  int block_length;
+  int block_count;
 } s_smpi_mpi_hvector_t;
 
 typedef struct s_smpi_mpi_indexed{
 } s_smpi_mpi_hvector_t;
 
 typedef struct s_smpi_mpi_indexed{
@@ -72,7 +72,7 @@ typedef struct s_smpi_mpi_indexed{
   size_t size_oldtype;
   int* block_lengths;
   int* block_indices;
   size_t size_oldtype;
   int* block_lengths;
   int* block_indices;
-  size_t block_count;
+  int block_count;
 } s_smpi_mpi_indexed_t;
 
 typedef struct s_smpi_mpi_hindexed{
 } s_smpi_mpi_indexed_t;
 
 typedef struct s_smpi_mpi_hindexed{
@@ -81,7 +81,7 @@ typedef struct s_smpi_mpi_hindexed{
   size_t size_oldtype;
   int* block_lengths;
   MPI_Aint* block_indices;
   size_t size_oldtype;
   int* block_lengths;
   MPI_Aint* block_indices;
-  size_t block_count;
+  int block_count;
 } s_smpi_mpi_hindexed_t;
 
 typedef struct s_smpi_mpi_struct{
 } s_smpi_mpi_hindexed_t;
 
 typedef struct s_smpi_mpi_struct{
@@ -90,7 +90,7 @@ typedef struct s_smpi_mpi_struct{
   size_t size_oldtype;
   int* block_lengths;
   MPI_Aint* block_indices;
   size_t size_oldtype;
   int* block_lengths;
   MPI_Aint* block_indices;
-  size_t block_count;
+  int block_count;
   MPI_Datatype* old_types;
 } s_smpi_mpi_struct_t;
 
   MPI_Datatype* old_types;
 } s_smpi_mpi_struct_t;
 
@@ -100,12 +100,13 @@ typedef struct s_smpi_mpi_struct{
 */
 void unserialize_contiguous( const void *contiguous_vector,
                          void *noncontiguous_vector,
 */
 void unserialize_contiguous( const void *contiguous_vector,
                          void *noncontiguous_vector,
-                         size_t count,
-                         void *type);
+                         int count,
+                         void *type,
+                         MPI_Op op);
 
 void serialize_contiguous( const void *noncontiguous_vector,
                        void *contiguous_vector,
 
 void serialize_contiguous( const void *noncontiguous_vector,
                        void *contiguous_vector,
-                       size_t count,
+                       int count,
                        void *type);
 
 void free_contiguous(MPI_Datatype* type);
                        void *type);
 
 void free_contiguous(MPI_Datatype* type);
@@ -117,12 +118,13 @@ s_smpi_mpi_contiguous_t* smpi_datatype_contiguous_create( MPI_Aint lb,
                                                   
 void unserialize_vector( const void *contiguous_vector,
                          void *noncontiguous_vector,
                                                   
 void unserialize_vector( const void *contiguous_vector,
                          void *noncontiguous_vector,
-                         size_t count,
-                         void *type);
+                         int count,
+                         void *type,
+                         MPI_Op op);
 
 void serialize_vector( const void *noncontiguous_vector,
                        void *contiguous_vector,
 
 void serialize_vector( const void *noncontiguous_vector,
                        void *contiguous_vector,
-                       size_t count,
+                       int count,
                        void *type);
 
 void free_vector(MPI_Datatype* type);
                        void *type);
 
 void free_vector(MPI_Datatype* type);
@@ -135,12 +137,13 @@ s_smpi_mpi_vector_t* smpi_datatype_vector_create( int block_stride,
 
 void unserialize_hvector( const void *contiguous_vector,
                          void *noncontiguous_vector,
 
 void unserialize_hvector( const void *contiguous_vector,
                          void *noncontiguous_vector,
-                         size_t count,
-                         void *type);
+                         int count,
+                         void *type,
+                         MPI_Op op);
 
 void serialize_hvector( const void *noncontiguous_vector,
                        void *contiguous_vector,
 
 void serialize_hvector( const void *noncontiguous_vector,
                        void *contiguous_vector,
-                       size_t count,
+                       int count,
                        void *type);
 
 void free_hvector(MPI_Datatype* type);
                        void *type);
 
 void free_hvector(MPI_Datatype* type);
@@ -154,12 +157,13 @@ s_smpi_mpi_hvector_t* smpi_datatype_hvector_create( MPI_Aint block_stride,
 
 void unserialize_indexed( const void *contiguous_indexed,
                          void *noncontiguous_indexed,
 
 void unserialize_indexed( const void *contiguous_indexed,
                          void *noncontiguous_indexed,
-                         size_t count,
-                         void *type);
+                         int count,
+                         void *type,
+                         MPI_Op op);
 
 void serialize_indexed( const void *noncontiguous_vector,
                        void *contiguous_vector,
 
 void serialize_indexed( const void *noncontiguous_vector,
                        void *contiguous_vector,
-                       size_t count,
+                       int count,
                        void *type);
 
 void free_indexed(MPI_Datatype* type);
                        void *type);
 
 void free_indexed(MPI_Datatype* type);
@@ -172,12 +176,13 @@ s_smpi_mpi_indexed_t* smpi_datatype_indexed_create(int* block_lengths,
 
 void unserialize_hindexed( const void *contiguous_indexed,
                          void *noncontiguous_indexed,
 
 void unserialize_hindexed( const void *contiguous_indexed,
                          void *noncontiguous_indexed,
-                         size_t count,
-                         void *type);
+                         int count,
+                         void *type,
+                         MPI_Op op);
 
 void serialize_hindexed( const void *noncontiguous_vector,
                        void *contiguous_vector,
 
 void serialize_hindexed( const void *noncontiguous_vector,
                        void *contiguous_vector,
-                       size_t count,
+                       int count,
                        void *type);
 
 void free_hindexed(MPI_Datatype* type);
                        void *type);
 
 void free_hindexed(MPI_Datatype* type);
@@ -190,12 +195,13 @@ s_smpi_mpi_hindexed_t* smpi_datatype_hindexed_create(int* block_lengths,
 
 void unserialize_struct( const void *contiguous_indexed,
                          void *noncontiguous_indexed,
 
 void unserialize_struct( const void *contiguous_indexed,
                          void *noncontiguous_indexed,
-                         size_t count,
-                         void *type);
+                         int count,
+                         void *type,
+                         MPI_Op op);
 
 void serialize_struct( const void *noncontiguous_vector,
                        void *contiguous_vector,
 
 void serialize_struct( const void *noncontiguous_vector,
                        void *contiguous_vector,
-                       size_t count,
+                       int count,
                        void *type);
 
 void free_struct(MPI_Datatype* type);
                        void *type);
 
 void free_struct(MPI_Datatype* type);
index 89224f8..7c7d867 100644 (file)
@@ -116,7 +116,7 @@ int smpi_mpi_put( void *origin_addr, int origin_count, MPI_Datatype origin_datat
   //get receiver pointer
   MPI_Win recv_win = win->connected_wins[target_rank];
 
   //get receiver pointer
   MPI_Win recv_win = win->connected_wins[target_rank];
 
-  void* recv_addr = (void*) ( ((char*)recv_win->base) + target_disp * smpi_datatype_size(target_datatype));
+  void* recv_addr = (void*) ( ((char*)recv_win->base) + target_disp * recv_win->disp_unit);
   smpi_datatype_use(origin_datatype);
   smpi_datatype_use(target_datatype);
   XBT_DEBUG("Entering MPI_Put to %d", target_rank);
   smpi_datatype_use(origin_datatype);
   smpi_datatype_use(target_datatype);
   XBT_DEBUG("Entering MPI_Put to %d", target_rank);
@@ -124,11 +124,11 @@ int smpi_mpi_put( void *origin_addr, int origin_count, MPI_Datatype origin_datat
   if(target_rank != smpi_comm_rank(win->comm)){
     //prepare send_request
     MPI_Request sreq = smpi_rma_send_init(origin_addr, origin_count, origin_datatype,
   if(target_rank != smpi_comm_rank(win->comm)){
     //prepare send_request
     MPI_Request sreq = smpi_rma_send_init(origin_addr, origin_count, origin_datatype,
-        smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+1, win->comm);
+        smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+1, win->comm, MPI_OP_NULL);
 
     //prepare receiver request
     MPI_Request rreq = smpi_rma_recv_init(recv_addr, target_count, target_datatype,
 
     //prepare receiver request
     MPI_Request rreq = smpi_rma_recv_init(recv_addr, target_count, target_datatype,
-        smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+1, recv_win->comm);
+        smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+1, recv_win->comm, MPI_OP_NULL);
 
     //push request to receiver's win
     xbt_dynar_push_as(recv_win->requests, MPI_Request, rreq);
 
     //push request to receiver's win
     xbt_dynar_push_as(recv_win->requests, MPI_Request, rreq);
@@ -138,6 +138,9 @@ int smpi_mpi_put( void *origin_addr, int origin_count, MPI_Datatype origin_datat
 
     //push request to sender's win
     xbt_dynar_push_as(win->requests, MPI_Request, sreq);
 
     //push request to sender's win
     xbt_dynar_push_as(win->requests, MPI_Request, sreq);
+  }else{
+    smpi_datatype_copy(origin_addr, origin_count, origin_datatype,
+                       recv_addr, target_count, target_datatype);
   }
 
   return MPI_SUCCESS;
   }
 
   return MPI_SUCCESS;
@@ -149,7 +152,7 @@ int smpi_mpi_get( void *origin_addr, int origin_count, MPI_Datatype origin_datat
   //get sender pointer
   MPI_Win send_win = win->connected_wins[target_rank];
 
   //get sender pointer
   MPI_Win send_win = win->connected_wins[target_rank];
 
-  void* send_addr = (void*)( ((char*)send_win->base) + target_disp * smpi_datatype_size(target_datatype));
+  void* send_addr = (void*)( ((char*)send_win->base) + target_disp * send_win->disp_unit);
   smpi_datatype_use(origin_datatype);
   smpi_datatype_use(target_datatype);
   XBT_DEBUG("Entering MPI_Get from %d", target_rank);
   smpi_datatype_use(origin_datatype);
   smpi_datatype_use(target_datatype);
   XBT_DEBUG("Entering MPI_Get from %d", target_rank);
@@ -157,11 +160,11 @@ int smpi_mpi_get( void *origin_addr, int origin_count, MPI_Datatype origin_datat
   if(target_rank != smpi_comm_rank(win->comm)){
     //prepare send_request
     MPI_Request sreq = smpi_rma_send_init(send_addr, target_count, target_datatype,
   if(target_rank != smpi_comm_rank(win->comm)){
     //prepare send_request
     MPI_Request sreq = smpi_rma_send_init(send_addr, target_count, target_datatype,
-        smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), RMA_TAG+2, send_win->comm);
+        smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), RMA_TAG+2, send_win->comm, MPI_OP_NULL);
 
     //prepare receiver request
     MPI_Request rreq = smpi_rma_recv_init(origin_addr, origin_count, origin_datatype,
 
     //prepare receiver request
     MPI_Request rreq = smpi_rma_recv_init(origin_addr, origin_count, origin_datatype,
-        smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), RMA_TAG+2, win->comm);
+        smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), RMA_TAG+2, win->comm, MPI_OP_NULL);
 
     //push request to receiver's win
     xbt_dynar_push_as(send_win->requests, MPI_Request, sreq);
 
     //push request to receiver's win
     xbt_dynar_push_as(send_win->requests, MPI_Request, sreq);
@@ -171,6 +174,9 @@ int smpi_mpi_get( void *origin_addr, int origin_count, MPI_Datatype origin_datat
 
     //push request to sender's win
     xbt_dynar_push_as(win->requests, MPI_Request, rreq);
 
     //push request to sender's win
     xbt_dynar_push_as(win->requests, MPI_Request, rreq);
+  }else{
+    smpi_datatype_copy(send_addr, target_count, target_datatype,
+                       origin_addr, origin_count, origin_datatype);
   }
 
   return MPI_SUCCESS;
   }
 
   return MPI_SUCCESS;
@@ -180,33 +186,33 @@ int smpi_mpi_get( void *origin_addr, int origin_count, MPI_Datatype origin_datat
 int smpi_mpi_accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank,
               MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win)
 {
 int smpi_mpi_accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank,
               MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win)
 {
+  //FIXME: local version 
   //get receiver pointer
   MPI_Win recv_win = win->connected_wins[target_rank];
 
   //get receiver pointer
   MPI_Win recv_win = win->connected_wins[target_rank];
 
-  void* recv_addr = (void*)( ((char*)recv_win->base) + target_disp * smpi_datatype_size(target_datatype) );
+  void* recv_addr = (void*)( ((char*)recv_win->base) + target_disp * recv_win->disp_unit);
   XBT_DEBUG("Entering MPI_Accumulate to %d", target_rank);
 
   smpi_datatype_use(origin_datatype);
   smpi_datatype_use(target_datatype);
 
   XBT_DEBUG("Entering MPI_Accumulate to %d", target_rank);
 
   smpi_datatype_use(origin_datatype);
   smpi_datatype_use(target_datatype);
 
-  if(target_rank != smpi_comm_rank(win->comm)){
+
     //prepare send_request
     MPI_Request sreq = smpi_rma_send_init(origin_addr, origin_count, origin_datatype,
     //prepare send_request
     MPI_Request sreq = smpi_rma_send_init(origin_addr, origin_count, origin_datatype,
-        smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+3, win->comm);
+        smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+3, win->comm, op);
 
     //prepare receiver request
 
     //prepare receiver request
-    MPI_Request rreq = smpi_rma_recv_init(NULL, 0, target_datatype,
-        smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+3, recv_win->comm);
-    rreq->flags |= ACCUMULATE;
+    MPI_Request rreq = smpi_rma_recv_init(recv_addr, target_count, target_datatype,
+        smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+3, recv_win->comm, op);
     //push request to receiver's win
     xbt_dynar_push_as(recv_win->requests, MPI_Request, rreq);
     //start send
     smpi_mpi_start(sreq);
     //push request to receiver's win
     xbt_dynar_push_as(recv_win->requests, MPI_Request, rreq);
     //start send
     smpi_mpi_start(sreq);
+    
     //push request to sender's win
     xbt_dynar_push_as(win->requests, MPI_Request, sreq);
     //push request to sender's win
     xbt_dynar_push_as(win->requests, MPI_Request, sreq);
-   }
-  //perform actual accumulation
-  smpi_op_apply(op, origin_addr, recv_addr, &origin_count, &origin_datatype);
+  
+
 
   return MPI_SUCCESS;
 }
 
   return MPI_SUCCESS;
 }