/* selector for collective algorithms based on openmpi's default coll_tuned_decision_fixed selector */
-/* Copyright (c) 2009, 2010. The SimGrid Team.
+/* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team.
* All rights reserved. */
/* This program is free software; you can redistribute it and/or modify it
return smpi_coll_tuned_allreduce_lr(sbuf, rbuf, count, dtype,
op, comm);
} else {
- // return (smpi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf,
return (smpi_coll_tuned_allreduce_ompi_ring_segmented (sbuf, rbuf,
count, dtype,
op, comm
comm);
} else if (block_dsize < 3000) {
- return smpi_coll_tuned_alltoall_simple(sbuf, scount, sdtype,
+ return smpi_coll_tuned_alltoall_basic_linear(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}
- return smpi_coll_tuned_alltoall_pair (sbuf, scount, sdtype,
+ return smpi_coll_tuned_alltoall_ring (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}
)
{
/* For starters, just keep the original algorithm. */
- return smpi_coll_tuned_alltoallv_bruck(sbuf, scounts, sdisps, sdtype,
+ return smpi_coll_tuned_alltoallv_ompi_basic_linear(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps,rdtype,
comm);
}
-/*
-void smpi_coll_tuned_barrier_ompi(MPI_Comm comm)
+
+int smpi_coll_tuned_barrier_ompi(MPI_Comm comm)
{ int communicator_size = smpi_comm_size(comm);
if( 2 == communicator_size )
- return smpi_coll_tuned_barrier_intra_two_procs(comm, module);
- * Basic optimisation. If we have a power of 2 number of nodes
- * the use the recursive doubling algorithm, otherwise
- * bruck is the one we want.
+ return smpi_coll_tuned_barrier_ompi_two_procs(comm);
+/* * Basic optimisation. If we have a power of 2 number of nodes*/
+/* * the use the recursive doubling algorithm, otherwise*/
+/* * bruck is the one we want.*/
{
- bool has_one = false;
+ int has_one = 0;
for( ; communicator_size > 0; communicator_size >>= 1 ) {
if( communicator_size & 0x1 ) {
if( has_one )
- return smpi_coll_tuned_barrier_intra_bruck(comm, module);
- has_one = true;
+ return smpi_coll_tuned_barrier_ompi_bruck(comm);
+ has_one = 1;
}
}
}
- return smpi_coll_tuned_barrier_intra_recursivedoubling(comm, module);
-}*/
+ return smpi_coll_tuned_barrier_ompi_recursivedoubling(comm);
+}
int smpi_coll_tuned_bcast_ompi(void *buff, int count,
MPI_Datatype datatype, int root,
{
/* Decision function based on MX results for
messages up to 36MB and communicator sizes up to 64 nodes */
- //const size_t small_message_size = 2048;
+ const size_t small_message_size = 2048;
const size_t intermediate_message_size = 370728;
- //const double a_p16 = 3.2118e-6; /* [1 / byte] */
- //const double b_p16 = 8.7936;
- //const double a_p64 = 2.3679e-6; /* [1 / byte] */
- //const double b_p64 = 1.1787;
- //const double a_p128 = 1.6134e-6; /* [1 / byte] */
- //const double b_p128 = 2.1102;
-
- //int communicator_size;
+ const double a_p16 = 3.2118e-6; /* [1 / byte] */
+ const double b_p16 = 8.7936;
+ const double a_p64 = 2.3679e-6; /* [1 / byte] */
+ const double b_p64 = 1.1787;
+ const double a_p128 = 1.6134e-6; /* [1 / byte] */
+ const double b_p128 = 2.1102;
+
+ int communicator_size;
//int segsize = 0;
size_t message_size, dsize;
- //communicator_size = smpi_comm_size(comm);
+ communicator_size = smpi_comm_size(comm);
/* else we need data size for decision function */
dsize = smpi_datatype_size(datatype);
/* Handle messages of small and intermediate size, and
single-element broadcasts */
- if ((message_size < /*small_message_size*/intermediate_message_size) || (count <= 1)) {
+ if ((message_size < small_message_size) || (count <= 1)) {
/* Binomial without segmentation */
- //segsize = 0;
return smpi_coll_tuned_bcast_binomial_tree (buff, count, datatype,
- root, comm/*
- segsize*/);
+ root, comm);
- } /*else if (message_size < intermediate_message_size) {
+ } else if (message_size < intermediate_message_size) {
// SplittedBinary with 1KB segments
- segsize = 1024;
- return smpi_coll_tuned_bcast_split_bintree(buff, count, datatype,
- root, comm
- segsize);
+ return smpi_coll_tuned_bcast_ompi_split_bintree(buff, count, datatype,
+ root, comm);
- }
- Handle large message sizes
+ }
+ //Handle large message sizes
else if (communicator_size < (a_p128 * message_size + b_p128)) {
- Pipeline with 128KB segments
- segsize = 1024 << 7;
- return smpi_coll_tuned_bcast_flattree_pipeline (buff, count, datatype,
- root, comm, module,
- segsize);
+ //Pipeline with 128KB segments
+ //segsize = 1024 << 7;
+ return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype,
+ root, comm);
+
} else if (communicator_size < 13) {
// Split Binary with 8KB segments
- segsize = 1024 << 3;
- return smpi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype,
- root, comm, module,
- segsize);
+ return smpi_coll_tuned_bcast_ompi_split_bintree(buff, count, datatype,
+ root, comm);
} else if (communicator_size < (a_p64 * message_size + b_p64)) {
// Pipeline with 64KB segments
- segsize = 1024 << 6;
- return smpi_coll_tuned_bcast_intra_pipeline (buff, count, datatype,
- root, comm, module,
- segsize);
+ //segsize = 1024 << 6;
+ return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype,
+ root, comm);
+
} else if (communicator_size < (a_p16 * message_size + b_p16)) {
- Pipeline with 16KB segments
+ //Pipeline with 16KB segments
//segsize = 1024 << 4;
- return smpi_coll_tuned_bcast_flattree_pipeline (buff, count, datatype,
- root, comm, module,
- segsize);
-
- }*/
+ return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype,
+ root, comm);
+
+ }
/* Pipeline with 8KB segments */
//segsize = 1024 << 3;
return smpi_coll_tuned_bcast_flattree_pipeline (buff, count, datatype,
int communicator_size=0;
//int segsize = 0;
size_t message_size, dsize;
- //const double a1 = 0.6016 / 1024.0; /* [1/B] */
- //const double b1 = 1.3496;
- //const double a2 = 0.0410 / 1024.0; /* [1/B] */
- //const double b2 = 9.7128;
- //const double a3 = 0.0422 / 1024.0; /* [1/B] */
- //const double b3 = 1.1614;
+ const double a1 = 0.6016 / 1024.0; /* [1/B] */
+ const double b1 = 1.3496;
+ const double a2 = 0.0410 / 1024.0; /* [1/B] */
+ const double b2 = 9.7128;
+ const double a3 = 0.0422 / 1024.0; /* [1/B] */
+ const double b3 = 1.1614;
//const double a4 = 0.0033 / 1024.0; /* [1/B] */
//const double b4 = 1.6761;
* If the operation is non commutative we currently have choice of linear
* or in-order binary tree algorithm.
*/
-/* if( !ompi_op_is_commute(op) ) {
+ if( !smpi_op_is_commute(op) ) {
if ((communicator_size < 12) && (message_size < 2048)) {
- return smpi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module);
+ return smpi_coll_tuned_reduce_ompi_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm/*, module*/);
}
- return smpi_coll_tuned_reduce_intra_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm, module,
- 0, max_requests);
- }*/
+ return smpi_coll_tuned_reduce_ompi_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+ 0, max_requests*/);
+ }
if ((communicator_size < 8) && (message_size < 512)){
/* Linear_0K */
- return smpi_coll_tuned_reduce_flat_tree (sendbuf, recvbuf, count, datatype, op, root, comm);
+ return smpi_coll_tuned_reduce_ompi_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm);
} else if (((communicator_size < 8) && (message_size < 20480)) ||
(message_size < 2048) || (count <= 1)) {
/* Binomial_0K */
//segsize = 0;
- return smpi_coll_tuned_reduce_binomial(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+ return smpi_coll_tuned_reduce_ompi_binomial(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
segsize, max_requests*/);
- } /*else if (communicator_size > (a1 * message_size + b1)) {
+ } else if (communicator_size > (a1 * message_size + b1)) {
// Binomial_1K
- segsize = 1024;
- return smpi_coll_tuned_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module,
- segsize, max_requests);
+ //segsize = 1024;
+ return smpi_coll_tuned_reduce_ompi_binomial(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+ segsize, max_requests*/);
} else if (communicator_size > (a2 * message_size + b2)) {
// Pipeline_1K
- segsize = 1024;
- return smpi_coll_tuned_reduce_NTSL (sendbuf, recvbuf, count, datatype, op, root, comm, module,
- segsize, max_requests);
+ //segsize = 1024;
+ return smpi_coll_tuned_reduce_ompi_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+ segsize, max_requests*/);
} else if (communicator_size > (a3 * message_size + b3)) {
// Binary_32K
- segsize = 32*1024;
- return smpi_coll_tuned_reduce_intra_binary( sendbuf, recvbuf, count, datatype, op, root,
- comm, module, segsize, max_requests);
+ //segsize = 32*1024;
+ return smpi_coll_tuned_reduce_ompi_binary( sendbuf, recvbuf, count, datatype, op, root,
+ comm/*, module, segsize, max_requests*/);
}
- if (communicator_size > (a4 * message_size + b4)) {
+ /*if (communicator_size > (a4 * message_size + b4)) {
// Pipeline_32K
segsize = 32*1024;
} else {
// Pipeline_64K
segsize = 64*1024;
}*/
- return smpi_coll_tuned_reduce_NTSL (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+ return smpi_coll_tuned_reduce_ompi_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
segsize, max_requests*/);
#if 0
#endif /* 0 */
}
-/*int smpi_coll_tuned_reduce_scatter_ompi( void *sbuf, void *rbuf,
+int smpi_coll_tuned_reduce_scatter_ompi( void *sbuf, void *rbuf,
int *rcounts,
MPI_Datatype dtype,
MPI_Op op,
- MPI_Comm comm,
+ MPI_Comm comm
)
{
int comm_size, i, pow2;
const double b = 8.0;
const size_t small_message_size = 12 * 1024;
const size_t large_message_size = 256 * 1024;
- bool zerocounts = false;
-
- OPAL_OUTPUT((smpi_coll_tuned_stream, "smpi_coll_tuned_reduce_scatter_ompi"));
+ int zerocounts = 0;
+ XBT_DEBUG("smpi_coll_tuned_reduce_scatter_ompi");
+
comm_size = smpi_comm_size(comm);
// We need data size for decision function
- ompi_datatype_type_size(dtype, &dsize);
+ dsize=smpi_datatype_size(dtype);
total_message_size = 0;
for (i = 0; i < comm_size; i++) {
total_message_size += rcounts[i];
if (0 == rcounts[i]) {
- zerocounts = true;
+ zerocounts = 1;
}
}
- if( !ompi_op_is_commute(op) || (zerocounts)) {
- return smpi_coll_tuned_reduce_scatter_intra_nonoverlapping (sbuf, rbuf, rcounts,
+ if( !smpi_op_is_commute(op) || (zerocounts)) {
+ smpi_mpi_reduce_scatter (sbuf, rbuf, rcounts,
dtype, op,
- comm, module);
+ comm);
+ return MPI_SUCCESS;
}
total_message_size *= dsize;
((total_message_size <= large_message_size) && (pow2 == comm_size)) ||
(comm_size >= a * total_message_size + b)) {
return
- smpi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
+ smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(sbuf, rbuf, rcounts,
dtype, op,
- comm, module);
+ comm);
}
- return smpi_coll_tuned_reduce_scatter_intra_ring(sbuf, rbuf, rcounts,
+ return smpi_coll_tuned_reduce_scatter_ompi_ring(sbuf, rbuf, rcounts,
dtype, op,
- comm, module);
+ comm);
+
-
- return smpi_coll_tuned_reduce_scatter(sbuf, rbuf, rcounts,
- dtype, op,
- comm;
-}*/
+}
int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount,
MPI_Datatype sdtype,
comm);
}
} else {
- //if (communicator_size % 2) {
+ if (communicator_size % 2) {
return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
- /*} else {
- return smpi_coll_tuned_allgather_intra_neighborexchange(sbuf, scount, sdtype,
+ } else {
+ return smpi_coll_tuned_allgather_ompi_neighborexchange(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
- comm, module);
- }*/
+ comm);
+ }
}
#if defined(USE_MPICH2_DECISION)
- for everything else use ring.
*/
if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
- return smpi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype,
+ return smpi_coll_tuned_allgather_rdb(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
- comm, module);
+ comm);
} else if (total_dsize <= 81920) {
- return smpi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype,
+ return smpi_coll_tuned_allgather_bruck(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
- comm, module);
+ comm);
}
- return smpi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype,
+ return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
- comm, module);
+ comm);
#endif /* defined(USE_MPICH2_DECISION) */
}
comm);
} else {
-// if (communicator_size % 2) {
+ if (communicator_size % 2) {
return smpi_coll_tuned_allgatherv_ring(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm);
-/* } else {
- return smpi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, sdtype,
+ } else {
+ return smpi_coll_tuned_allgatherv_ompi_neighborexchange(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
- comm, module);
- }*/
+ comm);
+ }
}
}
-/*
+
int smpi_coll_tuned_gather_ompi(void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int rcount,
MPI_Datatype rdtype,
int root,
- MPI_Comm comm,
+ MPI_Comm comm
)
{
- const int large_segment_size = 32768;
- const int small_segment_size = 1024;
+ //const int large_segment_size = 32768;
+ //const int small_segment_size = 1024;
- const size_t large_block_size = 92160;
+ //const size_t large_block_size = 92160;
const size_t intermediate_block_size = 6000;
const size_t small_block_size = 1024;
int communicator_size, rank;
size_t dsize, block_size;
- OPAL_OUTPUT((smpi_coll_tuned_stream,
- "smpi_coll_tuned_gather_ompi"));
+ XBT_DEBUG("smpi_coll_tuned_gather_ompi");
communicator_size = smpi_comm_size(comm);
- rank = ompi_comm_rank(comm);
+ rank = smpi_comm_rank(comm);
// Determine block size
if (rank == root) {
- ompi_datatype_type_size(rdtype, &dsize);
+ dsize = smpi_datatype_size(rdtype);
block_size = dsize * rcount;
} else {
- ompi_datatype_type_size(sdtype, &dsize);
+ dsize = smpi_datatype_size(sdtype);
block_size = dsize * scount;
}
- if (block_size > large_block_size) {
- return smpi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- root, comm, module,
- large_segment_size);
+/* if (block_size > large_block_size) {*/
+/* return smpi_coll_tuned_gather_ompi_linear_sync (sbuf, scount, sdtype, */
+/* rbuf, rcount, rdtype, */
+/* root, comm);*/
- } else if (block_size > intermediate_block_size) {
- return smpi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype,
+/* } else*/ if (block_size > intermediate_block_size) {
+ return smpi_coll_tuned_gather_ompi_linear_sync (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
- root, comm, module,
- small_segment_size);
+ root, comm);
} else if ((communicator_size > large_communicator_size) ||
((communicator_size > small_communicator_size) &&
(block_size < small_block_size))) {
- return smpi_coll_tuned_gather_intra_binomial (sbuf, scount, sdtype,
+ return smpi_coll_tuned_gather_ompi_binomial (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
- root, comm, module);
+ root, comm);
}
// Otherwise, use basic linear
- return smpi_coll_tuned_gather_intra_basic_linear (sbuf, scount, sdtype,
+ return smpi_coll_tuned_gather_ompi_basic_linear (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
- root, comm, module);
-}*/
-/*
+ root, comm);
+}
+
int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int rcount,
MPI_Datatype rdtype,
- int root, MPI_Comm comm,
+ int root, MPI_Comm comm
)
{
const size_t small_block_size = 300;
int communicator_size, rank;
size_t dsize, block_size;
- OPAL_OUTPUT((smpi_coll_tuned_stream,
- "smpi_coll_tuned_scatter_ompi"));
+ XBT_DEBUG("smpi_coll_tuned_scatter_ompi");
communicator_size = smpi_comm_size(comm);
- rank = ompi_comm_rank(comm);
+ rank = smpi_comm_rank(comm);
// Determine block size
if (root == rank) {
- ompi_datatype_type_size(sdtype, &dsize);
+ dsize=smpi_datatype_size(sdtype);
block_size = dsize * scount;
} else {
- ompi_datatype_type_size(rdtype, &dsize);
+ dsize=smpi_datatype_size(rdtype);
block_size = dsize * rcount;
}
if ((communicator_size > small_comm_size) &&
(block_size < small_block_size)) {
- return smpi_coll_tuned_scatter_intra_binomial (sbuf, scount, sdtype,
- rbuf, rcount, rdtype,
- root, comm, module);
+ if(rank!=root){
+ sbuf=xbt_malloc(rcount*smpi_datatype_get_extent(rdtype));
+ scount=rcount;
+ sdtype=rdtype;
+ }
+ int ret=smpi_coll_tuned_scatter_ompi_binomial (sbuf, scount, sdtype,
+ rbuf, rcount, rdtype,
+ root, comm);
+ if(rank!=root){
+ xbt_free(sbuf);
+ }
+ return ret;
}
- return smpi_coll_tuned_scatter_intra_basic_linear (sbuf, scount, sdtype,
+ return smpi_coll_tuned_scatter_ompi_basic_linear (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
- root, comm, module);
-}*/
+ root, comm);
+}