Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
kill all trailling whitespaces
[simgrid.git] / src / smpi / colls / reduce / reduce-ompi.cpp
index b476889..25d1cb8 100644 (file)
@@ -38,7 +38,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
  * the number of datatype to the original count (original_count)
  *
  * Note that for non-commutative operations we cannot save memory copy
- * for the first block: thus we must copy sendbuf to accumbuf on intermediate 
+ * for the first block: thus we must copy sendbuf to accumbuf on intermediate
  * to keep the optimized loop happy.
  */
 int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int original_count,
@@ -63,33 +63,33 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
     num_segments = (original_count + count_by_segment - 1) / count_by_segment;
     segment_increment = count_by_segment * extent;
 
-    sendtmpbuf = (char*) sendbuf; 
-    if( sendbuf == MPI_IN_PLACE ) { 
-        sendtmpbuf = (char *)recvbuf; 
+    sendtmpbuf = (char*) sendbuf;
+    if( sendbuf == MPI_IN_PLACE ) {
+        sendtmpbuf = (char *)recvbuf;
     }
 
     XBT_DEBUG( "coll:tuned:reduce_generic count %d, msg size %ld, segsize %ld, max_requests %d", original_count, (unsigned long)(num_segments * segment_increment), (unsigned long)segment_increment, max_outstanding_reqs);
 
     rank = comm->rank();
 
-    /* non-leaf nodes - wait for children to send me data & forward up 
+    /* non-leaf nodes - wait for children to send me data & forward up
        (if needed) */
     if( tree->tree_nextsize > 0 ) {
         ptrdiff_t true_extent, real_segment_size;
         true_extent=datatype->get_extent();
 
-        /* handle non existant recv buffer (i.e. its NULL) and 
+        /* handle non existant recv buffer (i.e. its NULL) and
            protect the recv buffer on non-root nodes */
         accumbuf = (char*)recvbuf;
         if( (NULL == accumbuf) || (root != rank) ) {
             /* Allocate temporary accumulator buffer. */
             accumbuf_free = (char*)smpi_get_tmp_sendbuffer(true_extent +
                                           (original_count - 1) * extent);
-            if (accumbuf_free == NULL) { 
-                line = __LINE__; ret = -1; goto error_hndl; 
+            if (accumbuf_free == NULL) {
+                line = __LINE__; ret = -1; goto error_hndl;
             }
             accumbuf = accumbuf_free - lower_bound;
-        } 
+        }
 
         /* If this is a non-commutative operation we must copy
            sendbuf to the accumbuf, in order to simplfy the loops */
@@ -99,19 +99,19 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
         /* Allocate two buffers for incoming segments */
         real_segment_size = true_extent + (count_by_segment - 1) * extent;
         inbuf_free[0] = (char*) smpi_get_tmp_recvbuffer(real_segment_size);
-        if( inbuf_free[0] == NULL ) { 
-            line = __LINE__; ret = -1; goto error_hndl; 
+        if( inbuf_free[0] == NULL ) {
+            line = __LINE__; ret = -1; goto error_hndl;
         }
         inbuf[0] = inbuf_free[0] - lower_bound;
         /* if there is chance to overlap communication -
            allocate second buffer */
         if( (num_segments > 1) || (tree->tree_nextsize > 1) ) {
             inbuf_free[1] = (char*) smpi_get_tmp_recvbuffer(real_segment_size);
-            if( inbuf_free[1] == NULL ) { 
+            if( inbuf_free[1] == NULL ) {
                 line = __LINE__; ret = -1; goto error_hndl;
             }
             inbuf[1] = inbuf_free[1] - lower_bound;
-        } 
+        }
 
         /* reset input buffer index and receive count */
         inbi = 0;
@@ -134,14 +134,14 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
                 if( segindex < num_segments ) {
                     void* local_recvbuf = inbuf[inbi];
                     if( 0 == i ) {
-                        /* for the first step (1st child per segment) and 
-                         * commutative operations we might be able to irecv 
-                         * directly into the accumulate buffer so that we can 
-                         * reduce(op) this with our sendbuf in one step as 
-                         * ompi_op_reduce only has two buffer pointers, 
+                        /* for the first step (1st child per segment) and
+                         * commutative operations we might be able to irecv
+                         * directly into the accumulate buffer so that we can
+                         * reduce(op) this with our sendbuf in one step as
+                         * ompi_op_reduce only has two buffer pointers,
                          * this avoids an extra memory copy.
                          *
-                         * BUT if the operation is non-commutative or 
+                         * BUT if the operation is non-commutative or
                          * we are root and are USING MPI_IN_PLACE this is wrong!
                          */
                         if(  (op==MPI_OP_NULL || op->is_commutative()) &&
@@ -151,32 +151,32 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
                     }
 
                     reqs[inbi]=Request::irecv(local_recvbuf, recvcount, datatype,
-                                             tree->tree_next[i], 
+                                             tree->tree_next[i],
                                              COLL_TAG_REDUCE, comm
                                              );
                 }
                 /* wait for previous req to complete, if any.
-                   if there are no requests reqs[inbi ^1] will be 
+                   if there are no requests reqs[inbi ^1] will be
                    MPI_REQUEST_NULL. */
                 /* wait on data from last child for previous segment */
-                Request::waitall( 1, &reqs[inbi ^ 1], 
+                Request::waitall( 1, &reqs[inbi ^ 1],
                                              MPI_STATUSES_IGNORE );
                 local_op_buffer = inbuf[inbi ^ 1];
                 if( i > 0 ) {
-                    /* our first operation is to combine our own [sendbuf] data 
-                     * with the data we recvd from down stream (but only 
-                     * the operation is commutative and if we are not root and 
+                    /* our first operation is to combine our own [sendbuf] data
+                     * with the data we recvd from down stream (but only
+                     * the operation is commutative and if we are not root and
                      * not using MPI_IN_PLACE)
                      */
                     if( 1 == i ) {
-                        if( (op==MPI_OP_NULL || op->is_commutative())&& 
+                        if( (op==MPI_OP_NULL || op->is_commutative())&&
                             !((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
                             local_op_buffer = sendtmpbuf + segindex * segment_increment;
                         }
                     }
                     /* apply operation */
-                    if(op!=MPI_OP_NULL) op->apply( local_op_buffer, 
-                                   accumbuf + segindex * segment_increment, 
+                    if(op!=MPI_OP_NULL) op->apply( local_op_buffer,
+                                   accumbuf + segindex * segment_increment,
                                    &recvcount, datatype );
                 } else if ( segindex > 0 ) {
                     void* accumulator = accumbuf + (segindex-1) * segment_increment;
@@ -186,21 +186,21 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
                             local_op_buffer = sendtmpbuf + (segindex-1) * segment_increment;
                         }
                     }
-                    if(op!=MPI_OP_NULL) op->apply( local_op_buffer, accumulator, &prevcount, 
+                    if(op!=MPI_OP_NULL) op->apply( local_op_buffer, accumulator, &prevcount,
                                    datatype );
 
-                    /* all reduced on available data this step (i) complete, 
+                    /* all reduced on available data this step (i) complete,
                      * pass to the next process unless you are the root.
                      */
                     if (rank != tree->tree_root) {
                         /* send combined/accumulated data to parent */
-                        Request::send( accumulator, prevcount, 
-                                                  datatype, tree->tree_prev, 
+                        Request::send( accumulator, prevcount,
+                                                  datatype, tree->tree_prev,
                                                   COLL_TAG_REDUCE,
                                                   comm);
                     }
 
-                    /* we stop when segindex = number of segments 
+                    /* we stop when segindex = number of segments
                        (i.e. we do num_segment+1 steps for pipelining */
                     if (segindex == num_segments) break;
                 }
@@ -216,33 +216,33 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
         smpi_free_tmp_buffer(accumbuf_free);
     }
 
-    /* leaf nodes 
-       Depending on the value of max_outstanding_reqs and 
+    /* leaf nodes
+       Depending on the value of max_outstanding_reqs and
        the number of segments we have two options:
        - send all segments using blocking send to the parent, or
-       - avoid overflooding the parent nodes by limiting the number of 
+       - avoid overflooding the parent nodes by limiting the number of
        outstanding requests to max_oustanding_reqs.
-       TODO/POSSIBLE IMPROVEMENT: If there is a way to determine the eager size 
-       for the current communication, synchronization should be used only 
+       TODO/POSSIBLE IMPROVEMENT: If there is a way to determine the eager size
+       for the current communication, synchronization should be used only
        when the message/segment size is smaller than the eager size.
     */
     else {
 
         /* If the number of segments is less than a maximum number of oustanding
-           requests or there is no limit on the maximum number of outstanding 
+           requests or there is no limit on the maximum number of outstanding
            requests, we send data to the parent using blocking send */
-        if ((0 == max_outstanding_reqs) || 
+        if ((0 == max_outstanding_reqs) ||
             (num_segments <= max_outstanding_reqs)) {
-            
+
             segindex = 0;
             while ( original_count > 0) {
                 if (original_count < count_by_segment) {
                     count_by_segment = original_count;
                 }
-                Request::send((char*)sendbuf + 
+                Request::send((char*)sendbuf +
                                          segindex * segment_increment,
                                          count_by_segment, datatype,
-                                         tree->tree_prev, 
+                                         tree->tree_prev,
                                          COLL_TAG_REDUCE,
                                          comm) ;
                 segindex++;
@@ -270,7 +270,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
                 sreq[segindex]=Request::isend((char*)sendbuf +
                                           segindex * segment_increment,
                                           count_by_segment, datatype,
-                                          tree->tree_prev, 
+                                          tree->tree_prev,
                                           COLL_TAG_REDUCE,
                                           comm);
                 original_count -= count_by_segment;
@@ -285,10 +285,10 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
                 if( original_count < count_by_segment ) {
                     count_by_segment = original_count;
                 }
-                sreq[creq]=Request::isend((char*)sendbuf + 
-                                          segindex * segment_increment, 
-                                          count_by_segment, datatype, 
-                                          tree->tree_prev, 
+                sreq[creq]=Request::isend((char*)sendbuf +
+                                          segindex * segment_increment,
+                                          count_by_segment, datatype,
+                                          tree->tree_prev,
                                           COLL_TAG_REDUCE,
                                           comm );
                 creq = (creq + 1) % max_outstanding_reqs;
@@ -297,7 +297,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
             }
 
             /* Wait on the remaining request to complete */
-            Request::waitall( max_outstanding_reqs, sreq, 
+            Request::waitall( max_outstanding_reqs, sreq,
                                          MPI_STATUSES_IGNORE );
 
             /* free requests */
@@ -308,7 +308,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
     return MPI_SUCCESS;
 
  error_hndl:  /* error handler */
-    XBT_DEBUG("ERROR_HNDL: node %d file %s line %d error %d\n", 
+    XBT_DEBUG("ERROR_HNDL: node %d file %s line %d error %d\n",
                    rank, __FILE__, line, ret );
     if( inbuf_free[0] != NULL ) free(inbuf_free[0]);
     if( inbuf_free[1] != NULL ) free(inbuf_free[1]);
@@ -325,8 +325,8 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
 
 
 int Coll_reduce_ompi_chain::reduce( void *sendbuf, void *recvbuf, int count,
-                                        MPI_Datatype datatype, 
-                                        MPI_Op  op, int root, 
+                                        MPI_Datatype datatype,
+                                        MPI_Op  op, int root,
                                         MPI_Comm  comm
                                         )
 {
@@ -342,12 +342,12 @@ int Coll_reduce_ompi_chain::reduce( void *sendbuf, void *recvbuf, int count,
      * sent per operation
      */
     typelng = datatype->size();
-    
+
     COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
 
-    return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype, 
+    return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype,
                                            op, root, comm,
-                                           ompi_coll_tuned_topo_build_chain(fanout, comm, root), 
+                                           ompi_coll_tuned_topo_build_chain(fanout, comm, root),
                                            segcount, 0 );
 }
 
@@ -373,16 +373,16 @@ int Coll_reduce_ompi_pipeline::reduce( void *sendbuf, void *recvbuf,
     const double b4 =  1.6761;
     typelng= datatype->size();
     int communicator_size = comm->size();
-    size_t message_size = typelng * count; 
+    size_t message_size = typelng * count;
 
     if (communicator_size > (a2 * message_size + b2)) {
-        // Pipeline_1K 
+        // Pipeline_1K
         segsize = 1024;
     }else if (communicator_size > (a4 * message_size + b4)) {
-        // Pipeline_32K 
+        // Pipeline_32K
         segsize = 32*1024;
     } else {
-        // Pipeline_64K 
+        // Pipeline_64K
         segsize = 64*1024;
     }
 
@@ -391,9 +391,9 @@ int Coll_reduce_ompi_pipeline::reduce( void *sendbuf, void *recvbuf,
 
     COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
 
-    return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype, 
+    return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype,
                                            op, root, comm,
-                                           ompi_coll_tuned_topo_build_chain( 1, comm, root), 
+                                           ompi_coll_tuned_topo_build_chain( 1, comm, root),
                                            segcount, 0);
 }
 
@@ -414,7 +414,7 @@ int Coll_reduce_ompi_binary::reduce( void *sendbuf, void *recvbuf,
      */
     typelng=datatype->size();
 
-        // Binary_32K 
+        // Binary_32K
     segsize = 32*1024;
 
     XBT_DEBUG("coll:tuned:reduce_intra_binary rank %d ss %5d",
@@ -422,9 +422,9 @@ int Coll_reduce_ompi_binary::reduce( void *sendbuf, void *recvbuf,
 
     COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
 
-    return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype, 
-                                           op, root, comm, 
-                                           ompi_coll_tuned_topo_build_tree(2, comm, root), 
+    return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype,
+                                           op, root, comm,
+                                           ompi_coll_tuned_topo_build_tree(2, comm, root),
                                            segcount, 0);
 }
 
@@ -449,13 +449,13 @@ int Coll_reduce_ompi_binomial::reduce( void *sendbuf, void *recvbuf,
      */
     typelng= datatype->size();
     int communicator_size = comm->size();
-    size_t message_size = typelng * count; 
+    size_t message_size = typelng * count;
     if (((communicator_size < 8) && (message_size < 20480)) ||
                (message_size < 2048) || (count <= 1)) {
         /* Binomial_0K */
         segsize = 0;
     } else if (communicator_size > (a1 * message_size + b1)) {
-        // Binomial_1K 
+        // Binomial_1K
         segsize = 1024;
     }
 
@@ -463,21 +463,21 @@ int Coll_reduce_ompi_binomial::reduce( void *sendbuf, void *recvbuf,
                  comm->rank(), segsize);
     COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
 
-    return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype, 
-                                           op, root, comm, 
-                                           ompi_coll_tuned_topo_build_in_order_bmtree(comm, root), 
+    return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype,
+                                           op, root, comm,
+                                           ompi_coll_tuned_topo_build_in_order_bmtree(comm, root),
                                            segcount, 0);
 }
 
 /*
- * reduce_intra_in_order_binary 
- * 
+ * reduce_intra_in_order_binary
+ *
  * Function:      Logarithmic reduce operation for non-commutative operations.
  * Acecpts:       same as MPI_Reduce()
  * Returns:       MPI_SUCCESS or error code
  */
 int Coll_reduce_ompi_in_order_binary::reduce( void *sendbuf, void *recvbuf,
-                                                  int count, 
+                                                  int count,
                                                   MPI_Datatype datatype,
                                                   MPI_Op  op, int root,
                                                   MPI_Comm  comm)
@@ -503,9 +503,9 @@ int Coll_reduce_ompi_in_order_binary::reduce( void *sendbuf, void *recvbuf,
 
     /* An in-order binary tree must use root (size-1) to preserve the order of
        operations.  Thus, if root is not rank (size - 1), then we must handle
-       1. MPI_IN_PLACE option on real root, and 
+       1. MPI_IN_PLACE option on real root, and
        2. we must allocate temporary recvbuf on rank (size - 1).
-       Note that generic function must be careful not to switch order of 
+       Note that generic function must be careful not to switch order of
        operations for non-commutative ops.
     */
     io_root = size - 1;
@@ -514,7 +514,7 @@ int Coll_reduce_ompi_in_order_binary::reduce( void *sendbuf, void *recvbuf,
     if (io_root != root) {
         ptrdiff_t text, ext;
         char *tmpbuf = NULL;
-    
+
         ext=datatype->get_extent();
         text=datatype->get_extent();
 
@@ -538,8 +538,8 @@ int Coll_reduce_ompi_in_order_binary::reduce( void *sendbuf, void *recvbuf,
 
     /* Use generic reduce with in-order binary tree topology and io_root */
     ret = smpi_coll_tuned_ompi_reduce_generic( use_this_sendbuf, use_this_recvbuf, count, datatype,
-                                          op, io_root, comm, 
-                                          ompi_coll_tuned_topo_build_in_order_bintree(comm), 
+                                          op, io_root, comm,
+                                          ompi_coll_tuned_topo_build_in_order_bintree(comm),
                                           segcount, 0 );
     if (MPI_SUCCESS != ret) { return ret; }
 
@@ -553,7 +553,7 @@ int Coll_reduce_ompi_in_order_binary::reduce( void *sendbuf, void *recvbuf,
             if (MPI_IN_PLACE == sendbuf) {
               smpi_free_tmp_buffer(use_this_sendbuf);
             }
-          
+
         } else if (io_root == rank) {
             /* Send result from use_this_recvbuf to root */
             Request::send(use_this_recvbuf, count, datatype, root,
@@ -569,8 +569,8 @@ int Coll_reduce_ompi_in_order_binary::reduce( void *sendbuf, void *recvbuf,
 /*
  * Linear functions are copied from the BASIC coll module
  * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as tuned/tree based segmenting operations
  * and as such may be selected by the decision functions
  * These are copied into this module due to the way we select modules
  * in V1. i.e. in V2 we will handle this differently and so will not
@@ -618,7 +618,7 @@ Coll_reduce_ompi_basic_linear::reduce(void *sbuf, void *rbuf, int count,
         return MPI_SUCCESS;
     }
 
-    /* see discussion in ompi_coll_basic_reduce_lin_intra about 
+    /* see discussion in ompi_coll_basic_reduce_lin_intra about
        extent and true extent */
     /* for reducing buffer allocation lengths.... */