Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'master' of git+ssh://scm.gforge.inria.fr//gitroot/simgrid/simgrid
[simgrid.git] / src / smpi / colls / reduce-ompi.c
index 6dc846c..3c51282 100644 (file)
@@ -1,3 +1,9 @@
+/* Copyright (c) 2013-2014. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
 /*
  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
  *                         University Research and Technology
  *                         University of Stuttgart.  All rights reserved.
  * Copyright (c) 2004-2005 The Regents of the University of California.
  *                         All rights reserved.
- * $COPYRIGHT$
  *
  * Additional copyrights may follow
- *
- * $HEADER$
  */
 
 #include "colls_private.h"
 #include "coll_tuned_topo.h"
-#define MCA_COLL_BASE_TAG_REDUCE 555
 
 
 
@@ -48,7 +50,6 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
     char *accumbuf = NULL, *accumbuf_free = NULL;
     char *local_op_buffer = NULL, *sendtmpbuf = NULL;
     ptrdiff_t extent, lower_bound, segment_increment;
-    size_t typelng;
     MPI_Request  reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL};
     int num_segments, line, ret, segindex, i, rank;
     int recvcount, prevcount, inbi;
@@ -58,7 +59,6 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
      * sent per operation
      */
     smpi_datatype_extent( datatype, &lower_bound, &extent);
-    typelng = smpi_datatype_size( datatype );
     num_segments = (original_count + count_by_segment - 1) / count_by_segment;
     segment_increment = count_by_segment * extent;
 
@@ -82,7 +82,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
         accumbuf = (char*)recvbuf;
         if( (NULL == accumbuf) || (root != rank) ) {
             /* Allocate temporary accumulator buffer. */
-            accumbuf_free = (char*)malloc(true_extent + 
+            accumbuf_free = (char*)smpi_get_tmp_sendbuffer(true_extent +
                                           (original_count - 1) * extent);
             if (accumbuf_free == NULL) { 
                 line = __LINE__; ret = -1; goto error_hndl; 
@@ -94,12 +94,12 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
            sendbuf to the accumbuf, in order to simplfy the loops */
         if (!smpi_op_is_commute(op)) {
             smpi_datatype_copy(
-                                                (char*)accumbuf, original_count, datatype,
-                                                (char*)sendtmpbuf, original_count, datatype);
+                                                (char*)sendtmpbuf, original_count, datatype,
+                                                (char*)accumbuf, original_count, datatype);
         }
         /* Allocate two buffers for incoming segments */
         real_segment_size = true_extent + (count_by_segment - 1) * extent;
-        inbuf_free[0] = (char*) malloc(real_segment_size);
+        inbuf_free[0] = (char*) smpi_get_tmp_recvbuffer(real_segment_size);
         if( inbuf_free[0] == NULL ) { 
             line = __LINE__; ret = -1; goto error_hndl; 
         }
@@ -107,7 +107,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
         /* if there is chance to overlap communication -
            allocate second buffer */
         if( (num_segments > 1) || (tree->tree_nextsize > 1) ) {
-            inbuf_free[1] = (char*) malloc(real_segment_size);
+            inbuf_free[1] = (char*) smpi_get_tmp_recvbuffer(real_segment_size);
             if( inbuf_free[1] == NULL ) { 
                 line = __LINE__; ret = -1; goto error_hndl;
             }
@@ -153,7 +153,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
 
                     reqs[inbi]=smpi_mpi_irecv(local_recvbuf, recvcount, datatype,
                                              tree->tree_next[i], 
-                                             MCA_COLL_BASE_TAG_REDUCE, comm
+                                             COLL_TAG_REDUCE, comm
                                              );
                 }
                 /* wait for previous req to complete, if any.
@@ -197,7 +197,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
                         /* send combined/accumulated data to parent */
                         smpi_mpi_send( accumulator, prevcount, 
                                                   datatype, tree->tree_prev, 
-                                                  MCA_COLL_BASE_TAG_REDUCE,
+                                                  COLL_TAG_REDUCE,
                                                   comm);
                     }
 
@@ -212,9 +212,9 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
         } /* end of for each segment */
 
         /* clean up */
-        if( inbuf_free[0] != NULL) free(inbuf_free[0]);
-        if( inbuf_free[1] != NULL) free(inbuf_free[1]);
-        if( accumbuf_free != NULL ) free(accumbuf_free);
+        smpi_free_tmp_buffer(inbuf_free[0]);
+        smpi_free_tmp_buffer(inbuf_free[1]);
+        smpi_free_tmp_buffer(accumbuf_free);
     }
 
     /* leaf nodes 
@@ -244,7 +244,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
                                          segindex * segment_increment,
                                          count_by_segment, datatype,
                                          tree->tree_prev, 
-                                         MCA_COLL_BASE_TAG_REDUCE,
+                                         COLL_TAG_REDUCE,
                                          comm) ;
                 segindex++;
                 original_count -= count_by_segment;
@@ -272,7 +272,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
                                           segindex * segment_increment,
                                           count_by_segment, datatype,
                                           tree->tree_prev, 
-                                          MCA_COLL_BASE_TAG_REDUCE,
+                                          COLL_TAG_REDUCE,
                                           comm);
                 original_count -= count_by_segment;
             }
@@ -290,7 +290,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
                                           segindex * segment_increment, 
                                           count_by_segment, datatype, 
                                           tree->tree_prev, 
-                                          MCA_COLL_BASE_TAG_REDUCE, 
+                                          COLL_TAG_REDUCE,
                                           comm );
                 creq = (creq + 1) % max_outstanding_reqs;
                 segindex++;
@@ -305,6 +305,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
             free(sreq);
         }
     }
+    free(tree);
     return MPI_SUCCESS;
 
  error_hndl:  /* error handler */
@@ -518,16 +519,16 @@ int smpi_coll_tuned_reduce_ompi_in_order_binary( void *sendbuf, void *recvbuf,
         text=smpi_datatype_get_extent(datatype);
 
         if ((root == rank) && (MPI_IN_PLACE == sendbuf)) {
-            tmpbuf = (char *) malloc(text + (count - 1) * ext);
+            tmpbuf = (char *) smpi_get_tmp_sendbuffer(text + (count - 1) * ext);
             if (NULL == tmpbuf) {
                 return MPI_ERR_INTERN;
             }
             smpi_datatype_copy (
-                                                (char*)tmpbuf, count, datatype,
-                                                (char*)recvbuf, count, datatype);
+                                                (char*)recvbuf, count, datatype,
+                                                (char*)tmpbuf, count, datatype);
             use_this_sendbuf = tmpbuf;
         } else if (io_root == rank) {
-            tmpbuf = (char *) malloc(text + (count - 1) * ext);
+            tmpbuf = (char *) smpi_get_tmp_recvbuffer(text + (count - 1) * ext);
             if (NULL == tmpbuf) {
                 return MPI_ERR_INTERN;
             }
@@ -547,18 +548,18 @@ int smpi_coll_tuned_reduce_ompi_in_order_binary( void *sendbuf, void *recvbuf,
         if (root == rank) {
             /* Receive result from rank io_root to recvbuf */
             smpi_mpi_recv(recvbuf, count, datatype, io_root,
-                                    MCA_COLL_BASE_TAG_REDUCE, comm,
+                                    COLL_TAG_REDUCE, comm,
                                     MPI_STATUS_IGNORE);
             if (MPI_IN_PLACE == sendbuf) {
-                free(use_this_sendbuf);
+              smpi_free_tmp_buffer(use_this_sendbuf);
             }
           
         } else if (io_root == rank) {
             /* Send result from use_this_recvbuf to root */
             smpi_mpi_send(use_this_recvbuf, count, datatype, root,
-                                    MCA_COLL_BASE_TAG_REDUCE, 
+                                    COLL_TAG_REDUCE,
                                     comm);
-            free(use_this_recvbuf);
+            smpi_free_tmp_buffer(use_this_recvbuf);
         }
     }
 
@@ -612,9 +613,9 @@ smpi_coll_tuned_reduce_ompi_basic_linear(void *sbuf, void *rbuf, int count,
 
     if (rank != root) {
         smpi_mpi_send(sbuf, count, dtype, root,
-                                MCA_COLL_BASE_TAG_REDUCE,
+                                COLL_TAG_REDUCE,
                                 comm);
-        return -1;
+        return MPI_SUCCESS;
     }
 
     /* see discussion in ompi_coll_basic_reduce_lin_intra about 
@@ -626,7 +627,7 @@ smpi_coll_tuned_reduce_ompi_basic_linear(void *sbuf, void *rbuf, int count,
 
     if (MPI_IN_PLACE == sbuf) {
         sbuf = rbuf;
-        inplace_temp = (char*)malloc(true_extent + (count - 1) * extent);
+        inplace_temp = (char*)smpi_get_tmp_recvbuffer(true_extent + (count - 1) * extent);
         if (NULL == inplace_temp) {
             return -1;
         }
@@ -634,18 +635,17 @@ smpi_coll_tuned_reduce_ompi_basic_linear(void *sbuf, void *rbuf, int count,
     }
 
     if (size > 1) {
-        free_buffer = (char*)malloc(true_extent + (count - 1) * extent);
+        free_buffer = (char*)smpi_get_tmp_recvbuffer(true_extent + (count - 1) * extent);
         pml_buffer = free_buffer - lb;
     }
 
     /* Initialize the receive buffer. */
 
     if (rank == (size - 1)) {
-        smpi_datatype_copy((char*)rbuf, count, dtype,
-                                                  (char*)sbuf, count, dtype);
+        smpi_datatype_copy((char*)sbuf, count, dtype,(char*)rbuf, count, dtype);
     } else {
         smpi_mpi_recv(rbuf, count, dtype, size - 1,
-                                MCA_COLL_BASE_TAG_REDUCE, comm,
+                                COLL_TAG_REDUCE, comm,
                                 MPI_STATUS_IGNORE);
     }
 
@@ -656,7 +656,7 @@ smpi_coll_tuned_reduce_ompi_basic_linear(void *sbuf, void *rbuf, int count,
             inbuf = (char*)sbuf;
         } else {
             smpi_mpi_recv(pml_buffer, count, dtype, i,
-                                    MCA_COLL_BASE_TAG_REDUCE, comm,
+                                    COLL_TAG_REDUCE, comm,
                                     MPI_STATUS_IGNORE);
             inbuf = pml_buffer;
         }
@@ -666,12 +666,12 @@ smpi_coll_tuned_reduce_ompi_basic_linear(void *sbuf, void *rbuf, int count,
     }
 
     if (NULL != inplace_temp) {
-        smpi_datatype_copy((char*)sbuf, count, dtype,
-                                                  inplace_temp,count , dtype);
-        free(inplace_temp);
+        smpi_datatype_copy(inplace_temp, count, dtype,(char*)sbuf
+                                                  ,count , dtype);
+        smpi_free_tmp_buffer(inplace_temp);
     }
     if (NULL != free_buffer) {
-        free(free_buffer);
+        smpi_free_tmp_buffer(free_buffer);
     }
 
     /* All done */