Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
ompi selector was using the wrong algo for alltoallv, add the right one and use it
[simgrid.git] / src / smpi / colls / smpi_openmpi_selector.c
index c0e948b..8109fba 100644 (file)
@@ -32,16 +32,15 @@ int smpi_coll_tuned_allreduce_ompi(void *sbuf, void *rbuf, int count,
                                                                    op, comm));
     } 
 
-    if( /*smpi_op_is_commute(op) && */(count > comm_size) ) {
+    if( smpi_op_is_commute(op) && (count > comm_size) ) {
         const size_t segment_size = 1 << 20; /* 1 MB */
         if ((comm_size * segment_size >= block_dsize)) {
-            //return (smpi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype, 
             //FIXME: ok, these are not the right algorithms, try to find closer ones
+            // lr is a good match for allreduce_ring (difference is mainly the use of sendrecv)
             return smpi_coll_tuned_allreduce_lr(sbuf, rbuf, count, dtype,
                                               op, comm);
         } else {
-           // return (smpi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf, 
-           return (smpi_coll_tuned_allreduce_rab2 (sbuf, rbuf,
+           return (smpi_coll_tuned_allreduce_ompi_ring_segmented (sbuf, rbuf,
                                                                     count, dtype, 
                                                                     op, comm 
                                                                     /*segment_size*/));
@@ -95,32 +94,32 @@ int smpi_coll_tuned_alltoallv_ompi(void *sbuf, int *scounts, int *sdisps,
                                               )
 {
     /* For starters, just keep the original algorithm. */
-    return smpi_coll_tuned_alltoallv_bruck(sbuf, scounts, sdisps, sdtype, 
+    return smpi_coll_tuned_alltoallv_ompi_basic_linear(sbuf, scounts, sdisps, sdtype, 
                                                         rbuf, rcounts, rdisps,rdtype,
                                                         comm);
 }
 
-/*
-void smpi_coll_tuned_barrier_ompi(MPI_Comm  comm)
+
+int smpi_coll_tuned_barrier_ompi(MPI_Comm  comm)
 {    int communicator_size = smpi_comm_size(comm);
 
     if( 2 == communicator_size )
-        return smpi_coll_tuned_barrier_intra_two_procs(comm, module);
-     * Basic optimisation. If we have a power of 2 number of nodes
-     * the use the recursive doubling algorithm, otherwise
-     * bruck is the one we want.
+        return smpi_coll_tuned_barrier_ompi_two_procs(comm);
+/*     * Basic optimisation. If we have a power of 2 number of nodes*/
+/*     * the use the recursive doubling algorithm, otherwise*/
+/*     * bruck is the one we want.*/
     {
-        bool has_one = false;
+        int has_one = 0;
         for( ; communicator_size > 0; communicator_size >>= 1 ) {
             if( communicator_size & 0x1 ) {
                 if( has_one )
-                    return smpi_coll_tuned_barrier_intra_bruck(comm, module);
-                has_one = true;
+                    return smpi_coll_tuned_barrier_ompi_bruck(comm);
+                has_one = 1;
             }
         }
     }
-    return smpi_coll_tuned_barrier_intra_recursivedoubling(comm, module);
-}*/
+    return smpi_coll_tuned_barrier_ompi_recursivedoubling(comm);
+}
 
 int smpi_coll_tuned_bcast_ompi(void *buff, int count,
                                           MPI_Datatype datatype, int root,
@@ -129,17 +128,17 @@ int smpi_coll_tuned_bcast_ompi(void *buff, int count,
 {
     /* Decision function based on MX results for 
        messages up to 36MB and communicator sizes up to 64 nodes */
-    //const size_t small_message_size = 2048;
+    const size_t small_message_size = 2048;
     const size_t intermediate_message_size = 370728;
-    //const double a_p16  = 3.2118e-6; /* [1 / byte] */
-    //const double b_p16  = 8.7936;   
-    //const double a_p64  = 2.3679e-6; /* [1 / byte] */
-    //const double b_p64  = 1.1787;     
-    //const double a_p128 = 1.6134e-6; /* [1 / byte] */
-    //const double b_p128 = 2.1102;
+    const double a_p16  = 3.2118e-6; /* [1 / byte] */
+    const double b_p16  = 8.7936;   
+    const double a_p64  = 2.3679e-6; /* [1 / byte] */
+    const double b_p64  = 1.1787;     
+    const double a_p128 = 1.6134e-6; /* [1 / byte] */
+    const double b_p128 = 2.1102;
 
     int communicator_size;
-    int segsize = 0;
+    //int segsize = 0;
     size_t message_size, dsize;
 
     communicator_size = smpi_comm_size(comm);
@@ -150,52 +149,45 @@ int smpi_coll_tuned_bcast_ompi(void *buff, int count,
 
     /* Handle messages of small and intermediate size, and 
        single-element broadcasts */
-    if ((message_size < /*small_message_size*/intermediate_message_size) || (count <= 1)) {
+    if ((message_size < small_message_size) || (count <= 1)) {
         /* Binomial without segmentation */
-        segsize = 0;
         return  smpi_coll_tuned_bcast_binomial_tree (buff, count, datatype, 
-                                                      root, comm/*
-                                                      segsize*/);
+                                                      root, comm);
 
-    } /*else if (message_size < intermediate_message_size) {
+    } else if (message_size < intermediate_message_size) {
         // SplittedBinary with 1KB segments
-        segsize = 1024;
-        return smpi_coll_tuned_bcast_split_bintree(buff, count, datatype, 
-                                                         root, comm
-                                                         segsize);
+        return smpi_coll_tuned_bcast_ompi_split_bintree(buff, count, datatype, 
+                                                         root, comm);
 
-    } 
-     Handle large message sizes 
+    }
+     //Handle large message sizes 
     else if (communicator_size < (a_p128 * message_size + b_p128)) {
-         Pipeline with 128KB segments 
-        segsize = 1024  << 7;
-        return smpi_coll_tuned_bcast_flattree_pipeline (buff, count, datatype, 
-                                                     root, comm, module,
-                                                     segsize);
+        //Pipeline with 128KB segments 
+        //segsize = 1024  << 7;
+        return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype, 
+                                                     root, comm);
+                                                     
 
     } else if (communicator_size < 13) {
         // Split Binary with 8KB segments 
-        segsize = 1024 << 3;
-        return smpi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype, 
-                                                         root, comm, module,
-                                                         segsize);
+        return smpi_coll_tuned_bcast_ompi_split_bintree(buff, count, datatype, 
+                                                         root, comm);
        
     } else if (communicator_size < (a_p64 * message_size + b_p64)) {
         // Pipeline with 64KB segments 
-        segsize = 1024 << 6;
-        return smpi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, 
-                                                     root, comm, module,
-                                                     segsize);
+        //segsize = 1024 << 6;
+        return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype, 
+                                                     root, comm);
+                                                     
 
     } else if (communicator_size < (a_p16 * message_size + b_p16)) {
-         Pipeline with 16KB segments 
+        //Pipeline with 16KB segments 
         //segsize = 1024 << 4;
-        return smpi_coll_tuned_bcast_flattree_pipeline (buff, count, datatype, 
-                                                     root, comm, module,
-                                                     segsize);
-
-    }*/
+        return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype, 
+                                                     root, comm);
+                                                     
 
+    }
     /* Pipeline with 8KB segments */
     //segsize = 1024 << 3;
     return smpi_coll_tuned_bcast_flattree_pipeline (buff, count, datatype, 
@@ -239,12 +231,12 @@ int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf,
     int communicator_size=0;
     //int segsize = 0;
     size_t message_size, dsize;
-    //const double a1 =  0.6016 / 1024.0; /* [1/B] */
-    //const double b1 =  1.3496;
-    //const double a2 =  0.0410 / 1024.0; /* [1/B] */
-    //const double b2 =  9.7128;
-    //const double a3 =  0.0422 / 1024.0; /* [1/B] */
-    //const double b3 =  1.1614;
+    const double a1 =  0.6016 / 1024.0; /* [1/B] */
+    const double b1 =  1.3496;
+    const double a2 =  0.0410 / 1024.0; /* [1/B] */
+    const double b2 =  9.7128;
+    const double a3 =  0.0422 / 1024.0; /* [1/B] */
+    const double b3 =  1.1614;
     //const double a4 =  0.0033 / 1024.0; /* [1/B] */
     //const double b4 =  1.6761;
 
@@ -260,47 +252,47 @@ int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf,
      * If the operation is non commutative we currently have choice of linear 
      * or in-order binary tree algorithm.
      */
-/*    if( !ompi_op_is_commute(op) ) {
+    if( !smpi_op_is_commute(op) ) {
         if ((communicator_size < 12) && (message_size < 2048)) {
-            return smpi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module); 
+            return smpi_coll_tuned_reduce_ompi_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm/*, module*/); 
         } 
-        return smpi_coll_tuned_reduce_intra_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm, module,
-                                                             0, max_requests); 
-    }*/
+        return smpi_coll_tuned_reduce_ompi_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+                                                             0, max_requests*/); 
+    }
 
     if ((communicator_size < 8) && (message_size < 512)){
         /* Linear_0K */
-        return smpi_coll_tuned_reduce_flat_tree (sendbuf, recvbuf, count, datatype, op, root, comm); 
+        return smpi_coll_tuned_reduce_ompi_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm); 
     } else if (((communicator_size < 8) && (message_size < 20480)) ||
                (message_size < 2048) || (count <= 1)) {
         /* Binomial_0K */
         //segsize = 0;
-        return smpi_coll_tuned_reduce_binomial(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+        return smpi_coll_tuned_reduce_ompi_binomial(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
                                                      segsize, max_requests*/);
-    } /*else if (communicator_size > (a1 * message_size + b1)) {
+    } else if (communicator_size > (a1 * message_size + b1)) {
         // Binomial_1K 
-        segsize = 1024;
-        return smpi_coll_tuned_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module,
-                                                     segsize, max_requests);
+        //segsize = 1024;
+        return smpi_coll_tuned_reduce_ompi_binomial(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+                                                     segsize, max_requests*/);
     } else if (communicator_size > (a2 * message_size + b2)) {
         // Pipeline_1K 
-        segsize = 1024;
-        return smpi_coll_tuned_reduce_NTSL (sendbuf, recvbuf, count, datatype, op, root, comm, module, 
-                                                      segsize, max_requests);
+        //segsize = 1024;
+        return smpi_coll_tuned_reduce_ompi_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, 
+                                                      segsize, max_requests*/);
     } else if (communicator_size > (a3 * message_size + b3)) {
         // Binary_32K 
-        segsize = 32*1024;
-        return smpi_coll_tuned_reduce_intra_binary( sendbuf, recvbuf, count, datatype, op, root,
-                                                    comm, module, segsize, max_requests);
+        //segsize = 32*1024;
+        return smpi_coll_tuned_reduce_ompi_binary( sendbuf, recvbuf, count, datatype, op, root,
+                                                    comm/*, module, segsize, max_requests*/);
     }
-    if (communicator_size > (a4 * message_size + b4)) {
+    /*if (communicator_size > (a4 * message_size + b4)) {
         // Pipeline_32K 
         segsize = 32*1024;
     } else {
         // Pipeline_64K 
         segsize = 64*1024;
     }*/
-    return smpi_coll_tuned_reduce_NTSL (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, 
+    return smpi_coll_tuned_reduce_ompi_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, 
                                                   segsize, max_requests*/);
 
 #if 0
@@ -332,11 +324,11 @@ int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf,
 #endif  /* 0 */
 }
 
-/*int smpi_coll_tuned_reduce_scatter_ompi( void *sbuf, void *rbuf,
+int smpi_coll_tuned_reduce_scatter_ompi( void *sbuf, void *rbuf,
                                                     int *rcounts,
                                                     MPI_Datatype dtype,
                                                     MPI_Op  op,
-                                                    MPI_Comm  comm,
+                                                    MPI_Comm  comm
                                                     )
 {
     int comm_size, i, pow2;
@@ -345,25 +337,26 @@ int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf,
     const double b = 8.0;
     const size_t small_message_size = 12 * 1024;
     const size_t large_message_size = 256 * 1024;
-    bool zerocounts = false;
-
-    OPAL_OUTPUT((smpi_coll_tuned_stream, "smpi_coll_tuned_reduce_scatter_ompi"));
+    int zerocounts = 0;
 
+    XBT_DEBUG("smpi_coll_tuned_reduce_scatter_ompi");
+    
     comm_size = smpi_comm_size(comm);
     // We need data size for decision function 
-    ompi_datatype_type_size(dtype, &dsize);
+    dsize=smpi_datatype_size(dtype);
     total_message_size = 0;
     for (i = 0; i < comm_size; i++) { 
         total_message_size += rcounts[i];
         if (0 == rcounts[i]) {
-            zerocounts = true;
+            zerocounts = 1;
         }
     }
 
-    if( !ompi_op_is_commute(op) || (zerocounts)) {
-        return smpi_coll_tuned_reduce_scatter_intra_nonoverlapping (sbuf, rbuf, rcounts, 
+    if( !smpi_op_is_commute(op) || (zerocounts)) {
+        smpi_mpi_reduce_scatter (sbuf, rbuf, rcounts, 
                                                                     dtype, op, 
-                                                                    comm, module); 
+                                                                    comm); 
+        return MPI_SUCCESS;
     }
    
     total_message_size *= dsize;
@@ -375,20 +368,17 @@ int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf,
         ((total_message_size <= large_message_size) && (pow2 == comm_size)) ||
         (comm_size >= a * total_message_size + b)) {
         return 
-            smpi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
+            smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(sbuf, rbuf, rcounts,
                                                                         dtype, op,
-                                                                        comm, module);
+                                                                        comm);
     } 
-    return smpi_coll_tuned_reduce_scatter_intra_ring(sbuf, rbuf, rcounts,
+    return smpi_coll_tuned_reduce_scatter_ompi_ring(sbuf, rbuf, rcounts,
                                                      dtype, op,
-                                                     comm, module);
+                                                     comm);
 
-  
-    return smpi_coll_tuned_reduce_scatter(sbuf, rbuf, rcounts,
-                                                     dtype, op,
-                                                     comm;
 
-}*/
+
+}
 
 int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount, 
                                               MPI_Datatype sdtype,
@@ -434,15 +424,15 @@ int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount,
                                                          comm);
         }
     } else {
-        //if (communicator_size % 2) {
+        if (communicator_size % 2) {
             return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype, 
                                                         rbuf, rcount, rdtype, 
                                                         comm);
-        /*} else {
-            return  smpi_coll_tuned_allgather_intra_neighborexchange(sbuf, scount, sdtype,
+        } else {
+            return  smpi_coll_tuned_allgather_ompi_neighborexchange(sbuf, scount, sdtype,
                                                                      rbuf, rcount, rdtype,
-                                                                     comm, module);
-        }*/
+                                                                     comm);
+        }
     }
    
 #if defined(USE_MPICH2_DECISION)
@@ -456,17 +446,17 @@ int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount,
        - for everything else use ring.
     */
     if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
-        return smpi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, 
+        return smpi_coll_tuned_allgather_rdb(sbuf, scount, sdtype, 
                                                                  rbuf, rcount, rdtype, 
-                                                                 comm, module);
+                                                                 comm);
     } else if (total_dsize <= 81920) { 
-        return smpi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, 
+        return smpi_coll_tuned_allgather_bruck(sbuf, scount, sdtype, 
                                                      rbuf, rcount, rdtype,
-                                                     comm, module);
+                                                     comm);
     } 
-    return smpi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, 
+    return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype, 
                                                 rbuf, rcount, rdtype,
-                                                comm, module);
+                                                comm);
 #endif  /* defined(USE_MPICH2_DECISION) */
 }
 
@@ -508,30 +498,30 @@ int smpi_coll_tuned_allgatherv_ompi(void *sbuf, int scount,
                                                       comm);
 
     } else {
-//        if (communicator_size % 2) {
+        if (communicator_size % 2) {
             return smpi_coll_tuned_allgatherv_ring(sbuf, scount, sdtype, 
                                                          rbuf, rcounts, rdispls, rdtype, 
                                                          comm);
-/*        } else {
-            return  smpi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, sdtype,
+        } else {
+            return  smpi_coll_tuned_allgatherv_ompi_neighborexchange(sbuf, scount, sdtype,
                                                                       rbuf, rcounts, rdispls, rdtype, 
-                                                                      comm, module);
-        }*/
+                                                                      comm);
+        }
     }
 }
-/*
+
 int smpi_coll_tuned_gather_ompi(void *sbuf, int scount, 
                                            MPI_Datatype sdtype,
                                            void* rbuf, int rcount, 
                                            MPI_Datatype rdtype, 
                                            int root,
-                                           MPI_Comm  comm,
+                                           MPI_Comm  comm
                                            )
 {
-    const int large_segment_size = 32768;
-    const int small_segment_size = 1024;
+    //const int large_segment_size = 32768;
+    //const int small_segment_size = 1024;
 
-    const size_t large_block_size = 92160;
+    //const size_t large_block_size = 92160;
     const size_t intermediate_block_size = 6000;
     const size_t small_block_size = 1024;
 
@@ -541,52 +531,49 @@ int smpi_coll_tuned_gather_ompi(void *sbuf, int scount,
     int communicator_size, rank;
     size_t dsize, block_size;
 
-    OPAL_OUTPUT((smpi_coll_tuned_stream, 
-                 "smpi_coll_tuned_gather_ompi"));
+    XBT_DEBUG("smpi_coll_tuned_gather_ompi");
 
     communicator_size = smpi_comm_size(comm);
-    rank = ompi_comm_rank(comm);
+    rank = smpi_comm_rank(comm);
 
     // Determine block size 
     if (rank == root) {
-        ompi_datatype_type_size(rdtype, &dsize);
+        dsize = smpi_datatype_size(rdtype);
         block_size = dsize * rcount;
     } else {
-        ompi_datatype_type_size(sdtype, &dsize);
+        dsize = smpi_datatype_size(sdtype);
         block_size = dsize * scount;
     }
 
-    if (block_size > large_block_size) {
-        return smpi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype, 
-                                                         rbuf, rcount, rdtype, 
-                                                         root, comm, module,
-                                                         large_segment_size);
+/*    if (block_size > large_block_size) {*/
+/*        return smpi_coll_tuned_gather_ompi_linear_sync (sbuf, scount, sdtype, */
+/*                                                         rbuf, rcount, rdtype, */
+/*                                                         root, comm);*/
 
-    } else if (block_size > intermediate_block_size) {
-        return smpi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype, 
+/*    } else*/ if (block_size > intermediate_block_size) {
+        return smpi_coll_tuned_gather_ompi_linear_sync (sbuf, scount, sdtype, 
                                                          rbuf, rcount, rdtype, 
-                                                         root, comm, module,
-                                                         small_segment_size);
+                                                         root, comm);
 
     } else if ((communicator_size > large_communicator_size) ||
                ((communicator_size > small_communicator_size) &&
                 (block_size < small_block_size))) {
-        return smpi_coll_tuned_gather_intra_binomial (sbuf, scount, sdtype, 
+        return smpi_coll_tuned_gather_ompi_binomial (sbuf, scount, sdtype, 
                                                       rbuf, rcount, rdtype, 
-                                                      root, comm, module);
+                                                      root, comm);
 
     }
     // Otherwise, use basic linear 
-    return smpi_coll_tuned_gather_intra_basic_linear (sbuf, scount, sdtype, 
+    return smpi_coll_tuned_gather_ompi_basic_linear (sbuf, scount, sdtype, 
                                                       rbuf, rcount, rdtype, 
-                                                      root, comm, module);
-}*/
-/*
+                                                      root, comm);
+}
+
 int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount, 
                                             MPI_Datatype sdtype,
                                             void* rbuf, int rcount, 
                                             MPI_Datatype rdtype, 
-                                            int root, MPI_Comm  comm,
+                                            int root, MPI_Comm  comm
                                             )
 {
     const size_t small_block_size = 300;
@@ -594,28 +581,27 @@ int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount,
     int communicator_size, rank;
     size_t dsize, block_size;
 
-    OPAL_OUTPUT((smpi_coll_tuned_stream, 
-                 "smpi_coll_tuned_scatter_ompi"));
+    XBT_DEBUG("smpi_coll_tuned_scatter_ompi");
 
     communicator_size = smpi_comm_size(comm);
-    rank = ompi_comm_rank(comm);
+    rank = smpi_comm_rank(comm);
     // Determine block size 
     if (root == rank) {
-        ompi_datatype_type_size(sdtype, &dsize);
+        dsize=smpi_datatype_size(sdtype);
         block_size = dsize * scount;
     } else {
-        ompi_datatype_type_size(rdtype, &dsize);
+        dsize=smpi_datatype_size(rdtype);
         block_size = dsize * rcount;
     } 
 
     if ((communicator_size > small_comm_size) &&
         (block_size < small_block_size)) {
-        return smpi_coll_tuned_scatter_intra_binomial (sbuf, scount, sdtype, 
+        return smpi_coll_tuned_scatter_ompi_binomial (sbuf, scount, sdtype, 
                                                        rbuf, rcount, rdtype, 
-                                                       root, comm, module);
+                                                       root, comm);
     }
-    return smpi_coll_tuned_scatter_intra_basic_linear (sbuf, scount, sdtype, 
+    return smpi_coll_tuned_scatter_ompi_basic_linear (sbuf, scount, sdtype, 
                                                        rbuf, rcount, rdtype, 
-                                                       root, comm, module);
-}*/
+                                                       root, comm);
+}