X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/30ce1928845304200262ecc119ea735408d1098e..c25acb156967c222e2512e0168767fe488e7d25c:/src/smpi/colls/smpi_openmpi_selector.c diff --git a/src/smpi/colls/smpi_openmpi_selector.c b/src/smpi/colls/smpi_openmpi_selector.c index 6b463731c8..36f901feae 100644 --- a/src/smpi/colls/smpi_openmpi_selector.c +++ b/src/smpi/colls/smpi_openmpi_selector.c @@ -40,7 +40,6 @@ int smpi_coll_tuned_allreduce_ompi(void *sbuf, void *rbuf, int count, return smpi_coll_tuned_allreduce_lr(sbuf, rbuf, count, dtype, op, comm); } else { - // return (smpi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf, return (smpi_coll_tuned_allreduce_ompi_ring_segmented (sbuf, rbuf, count, dtype, op, comm @@ -129,20 +128,20 @@ int smpi_coll_tuned_bcast_ompi(void *buff, int count, { /* Decision function based on MX results for messages up to 36MB and communicator sizes up to 64 nodes */ - //const size_t small_message_size = 2048; + const size_t small_message_size = 2048; const size_t intermediate_message_size = 370728; - //const double a_p16 = 3.2118e-6; /* [1 / byte] */ - //const double b_p16 = 8.7936; - //const double a_p64 = 2.3679e-6; /* [1 / byte] */ - //const double b_p64 = 1.1787; - //const double a_p128 = 1.6134e-6; /* [1 / byte] */ - //const double b_p128 = 2.1102; - - //int communicator_size; + const double a_p16 = 3.2118e-6; /* [1 / byte] */ + const double b_p16 = 8.7936; + const double a_p64 = 2.3679e-6; /* [1 / byte] */ + const double b_p64 = 1.1787; + const double a_p128 = 1.6134e-6; /* [1 / byte] */ + const double b_p128 = 2.1102; + + int communicator_size; //int segsize = 0; size_t message_size, dsize; - //communicator_size = smpi_comm_size(comm); + communicator_size = smpi_comm_size(comm); /* else we need data size for decision function */ dsize = smpi_datatype_size(datatype); @@ -150,52 +149,45 @@ int smpi_coll_tuned_bcast_ompi(void *buff, int count, /* Handle messages of small and intermediate size, and single-element broadcasts */ - if ((message_size < /*small_message_size*/intermediate_message_size) || (count <= 1)) { + if ((message_size < small_message_size) || (count <= 1)) { /* Binomial without segmentation */ - //segsize = 0; return smpi_coll_tuned_bcast_binomial_tree (buff, count, datatype, - root, comm/* - segsize*/); + root, comm); - } /*else if (message_size < intermediate_message_size) { + } else if (message_size < intermediate_message_size) { // SplittedBinary with 1KB segments - segsize = 1024; - return smpi_coll_tuned_bcast_split_bintree(buff, count, datatype, - root, comm - segsize); + return smpi_coll_tuned_bcast_ompi_split_bintree(buff, count, datatype, + root, comm); - } - Handle large message sizes + } + //Handle large message sizes else if (communicator_size < (a_p128 * message_size + b_p128)) { - Pipeline with 128KB segments - segsize = 1024 << 7; - return smpi_coll_tuned_bcast_flattree_pipeline (buff, count, datatype, - root, comm, module, - segsize); + //Pipeline with 128KB segments + //segsize = 1024 << 7; + return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype, + root, comm); + } else if (communicator_size < 13) { // Split Binary with 8KB segments - segsize = 1024 << 3; - return smpi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype, - root, comm, module, - segsize); + return smpi_coll_tuned_bcast_ompi_split_bintree(buff, count, datatype, + root, comm); } else if (communicator_size < (a_p64 * message_size + b_p64)) { // Pipeline with 64KB segments - segsize = 1024 << 6; - return smpi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, - root, comm, module, - segsize); + //segsize = 1024 << 6; + return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype, + root, comm); + } else if (communicator_size < (a_p16 * message_size + b_p16)) { - Pipeline with 16KB segments + //Pipeline with 16KB segments //segsize = 1024 << 4; - return smpi_coll_tuned_bcast_flattree_pipeline (buff, count, datatype, - root, comm, module, - segsize); - - }*/ + return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype, + root, comm); + + } /* Pipeline with 8KB segments */ //segsize = 1024 << 3; return smpi_coll_tuned_bcast_flattree_pipeline (buff, count, datatype, @@ -239,12 +231,12 @@ int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf, int communicator_size=0; //int segsize = 0; size_t message_size, dsize; - //const double a1 = 0.6016 / 1024.0; /* [1/B] */ - //const double b1 = 1.3496; - //const double a2 = 0.0410 / 1024.0; /* [1/B] */ - //const double b2 = 9.7128; - //const double a3 = 0.0422 / 1024.0; /* [1/B] */ - //const double b3 = 1.1614; + const double a1 = 0.6016 / 1024.0; /* [1/B] */ + const double b1 = 1.3496; + const double a2 = 0.0410 / 1024.0; /* [1/B] */ + const double b2 = 9.7128; + const double a3 = 0.0422 / 1024.0; /* [1/B] */ + const double b3 = 1.1614; //const double a4 = 0.0033 / 1024.0; /* [1/B] */ //const double b4 = 1.6761; @@ -260,47 +252,47 @@ int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf, * If the operation is non commutative we currently have choice of linear * or in-order binary tree algorithm. */ -/* if( !ompi_op_is_commute(op) ) { + if( !smpi_op_is_commute(op) ) { if ((communicator_size < 12) && (message_size < 2048)) { - return smpi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module); + return smpi_coll_tuned_reduce_ompi_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm/*, module*/); } - return smpi_coll_tuned_reduce_intra_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm, module, - 0, max_requests); - }*/ + return smpi_coll_tuned_reduce_ompi_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, + 0, max_requests*/); + } if ((communicator_size < 8) && (message_size < 512)){ /* Linear_0K */ - return smpi_coll_tuned_reduce_flat_tree (sendbuf, recvbuf, count, datatype, op, root, comm); + return smpi_coll_tuned_reduce_ompi_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm); } else if (((communicator_size < 8) && (message_size < 20480)) || (message_size < 2048) || (count <= 1)) { /* Binomial_0K */ //segsize = 0; - return smpi_coll_tuned_reduce_binomial(sendbuf, recvbuf, count, datatype, op, root, comm/*, module, + return smpi_coll_tuned_reduce_ompi_binomial(sendbuf, recvbuf, count, datatype, op, root, comm/*, module, segsize, max_requests*/); - } /*else if (communicator_size > (a1 * message_size + b1)) { + } else if (communicator_size > (a1 * message_size + b1)) { // Binomial_1K - segsize = 1024; - return smpi_coll_tuned_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module, - segsize, max_requests); + //segsize = 1024; + return smpi_coll_tuned_reduce_ompi_binomial(sendbuf, recvbuf, count, datatype, op, root, comm/*, module, + segsize, max_requests*/); } else if (communicator_size > (a2 * message_size + b2)) { // Pipeline_1K - segsize = 1024; - return smpi_coll_tuned_reduce_NTSL (sendbuf, recvbuf, count, datatype, op, root, comm, module, - segsize, max_requests); + //segsize = 1024; + return smpi_coll_tuned_reduce_ompi_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, + segsize, max_requests*/); } else if (communicator_size > (a3 * message_size + b3)) { // Binary_32K - segsize = 32*1024; - return smpi_coll_tuned_reduce_intra_binary( sendbuf, recvbuf, count, datatype, op, root, - comm, module, segsize, max_requests); + //segsize = 32*1024; + return smpi_coll_tuned_reduce_ompi_binary( sendbuf, recvbuf, count, datatype, op, root, + comm/*, module, segsize, max_requests*/); } - if (communicator_size > (a4 * message_size + b4)) { + /*if (communicator_size > (a4 * message_size + b4)) { // Pipeline_32K segsize = 32*1024; } else { // Pipeline_64K segsize = 64*1024; }*/ - return smpi_coll_tuned_reduce_NTSL (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, + return smpi_coll_tuned_reduce_ompi_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, segsize, max_requests*/); #if 0 @@ -434,15 +426,15 @@ int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount, comm); } } else { - //if (communicator_size % 2) { + if (communicator_size % 2) { return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); - /*} else { - return smpi_coll_tuned_allgather_intra_neighborexchange(sbuf, scount, sdtype, + } else { + return smpi_coll_tuned_allgather_ompi_neighborexchange(sbuf, scount, sdtype, rbuf, rcount, rdtype, - comm, module); - }*/ + comm); + } } #if defined(USE_MPICH2_DECISION) @@ -456,17 +448,17 @@ int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount, - for everything else use ring. */ if ((pow2_size == communicator_size) && (total_dsize < 524288)) { - return smpi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, + return smpi_coll_tuned_allgather_rdb(sbuf, scount, sdtype, rbuf, rcount, rdtype, - comm, module); + comm); } else if (total_dsize <= 81920) { - return smpi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, + return smpi_coll_tuned_allgather_bruck(sbuf, scount, sdtype, rbuf, rcount, rdtype, - comm, module); + comm); } - return smpi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, + return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype, rbuf, rcount, rdtype, - comm, module); + comm); #endif /* defined(USE_MPICH2_DECISION) */ } @@ -508,15 +500,15 @@ int smpi_coll_tuned_allgatherv_ompi(void *sbuf, int scount, comm); } else { -// if (communicator_size % 2) { + if (communicator_size % 2) { return smpi_coll_tuned_allgatherv_ring(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm); -/* } else { - return smpi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, sdtype, + } else { + return smpi_coll_tuned_allgatherv_ompi_neighborexchange(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, - comm, module); - }*/ + comm); + } } } /*