XBT_DEBUG( "coll:tuned:reduce_generic count %d, msg size %ld, segsize %ld, max_requests %d", original_count, (unsigned long)(num_segments * segment_increment), (unsigned long)segment_increment, max_outstanding_reqs);
- rank = smpi_comm_rank(comm);
+ rank = comm->rank();
/* non-leaf nodes - wait for children to send me data & forward up
(if needed) */
/* If this is a non-commutative operation we must copy
sendbuf to the accumbuf, in order to simplfy the loops */
- if (!smpi_op_is_commute(op)) {
+ if ( (op!=MPI_OP_NULL && !op->is_commutative())) {
smpi_datatype_copy(
(char*)sendtmpbuf, original_count, datatype,
(char*)accumbuf, original_count, datatype);
* BUT if the operation is non-commutative or
* we are root and are USING MPI_IN_PLACE this is wrong!
*/
- if( (smpi_op_is_commute(op)) &&
+ if( (op==MPI_OP_NULL || op->is_commutative()) &&
!((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
local_recvbuf = accumbuf + segindex * segment_increment;
}
}
- reqs[inbi]=smpi_mpi_irecv(local_recvbuf, recvcount, datatype,
+ reqs[inbi]=Request::irecv(local_recvbuf, recvcount, datatype,
tree->tree_next[i],
COLL_TAG_REDUCE, comm
);
if there are no requests reqs[inbi ^1] will be
MPI_REQUEST_NULL. */
/* wait on data from last child for previous segment */
- smpi_mpi_waitall( 1, &reqs[inbi ^ 1],
+ Request::waitall( 1, &reqs[inbi ^ 1],
MPI_STATUSES_IGNORE );
local_op_buffer = inbuf[inbi ^ 1];
if( i > 0 ) {
* not using MPI_IN_PLACE)
*/
if( 1 == i ) {
- if( (smpi_op_is_commute(op)) &&
+ if( (op==MPI_OP_NULL || op->is_commutative())&&
!((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
local_op_buffer = sendtmpbuf + segindex * segment_increment;
}
}
/* apply operation */
- smpi_op_apply(op, local_op_buffer,
+ if(op!=MPI_OP_NULL) op->apply( local_op_buffer,
accumbuf + segindex * segment_increment,
- &recvcount, &datatype );
+ &recvcount, datatype );
} else if ( segindex > 0 ) {
void* accumulator = accumbuf + (segindex-1) * segment_increment;
if( tree->tree_nextsize <= 1 ) {
- if( (smpi_op_is_commute(op)) &&
+ if( (op==MPI_OP_NULL || op->is_commutative()) &&
!((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
local_op_buffer = sendtmpbuf + (segindex-1) * segment_increment;
}
}
- smpi_op_apply(op, local_op_buffer, accumulator, &prevcount,
- &datatype );
+ if(op!=MPI_OP_NULL) op->apply( local_op_buffer, accumulator, &prevcount,
+ datatype );
/* all reduced on available data this step (i) complete,
* pass to the next process unless you are the root.
*/
if (rank != tree->tree_root) {
/* send combined/accumulated data to parent */
- smpi_mpi_send( accumulator, prevcount,
+ Request::send( accumulator, prevcount,
datatype, tree->tree_prev,
COLL_TAG_REDUCE,
comm);
if (original_count < count_by_segment) {
count_by_segment = original_count;
}
- smpi_mpi_send((char*)sendbuf +
+ Request::send((char*)sendbuf +
segindex * segment_increment,
count_by_segment, datatype,
tree->tree_prev,
/* post first group of requests */
for (segindex = 0; segindex < max_outstanding_reqs; segindex++) {
- sreq[segindex]=smpi_mpi_isend((char*)sendbuf +
+ sreq[segindex]=Request::isend((char*)sendbuf +
segindex * segment_increment,
count_by_segment, datatype,
tree->tree_prev,
creq = 0;
while ( original_count > 0 ) {
/* wait on a posted request to complete */
- smpi_mpi_wait(&sreq[creq], MPI_STATUS_IGNORE);
+ Request::wait(&sreq[creq], MPI_STATUS_IGNORE);
sreq[creq] = MPI_REQUEST_NULL;
if( original_count < count_by_segment ) {
count_by_segment = original_count;
}
- sreq[creq]=smpi_mpi_isend((char*)sendbuf +
+ sreq[creq]=Request::isend((char*)sendbuf +
segindex * segment_increment,
count_by_segment, datatype,
tree->tree_prev,
}
/* Wait on the remaining request to complete */
- smpi_mpi_waitall( max_outstanding_reqs, sreq,
+ Request::waitall( max_outstanding_reqs, sreq,
MPI_STATUSES_IGNORE );
/* free requests */
uint32_t segsize=64*1024;
int segcount = count;
size_t typelng;
- int fanout = smpi_comm_size(comm)/2;
+ int fanout = comm->size()/2;
- XBT_DEBUG("coll:tuned:reduce_intra_chain rank %d fo %d ss %5d", smpi_comm_rank(comm), fanout, segsize);
+ XBT_DEBUG("coll:tuned:reduce_intra_chain rank %d fo %d ss %5d", comm->rank(), fanout, segsize);
/**
* Determine number of segments and number of elements
const double a4 = 0.0033 / 1024.0; /* [1/B] */
const double b4 = 1.6761;
typelng= smpi_datatype_size( datatype);
- int communicator_size = smpi_comm_size(comm);
+ int communicator_size = comm->size();
size_t message_size = typelng * count;
if (communicator_size > (a2 * message_size + b2)) {
}
XBT_DEBUG("coll:tuned:reduce_intra_pipeline rank %d ss %5d",
- smpi_comm_rank(comm), segsize);
+ comm->rank(), segsize);
COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
segsize = 32*1024;
XBT_DEBUG("coll:tuned:reduce_intra_binary rank %d ss %5d",
- smpi_comm_rank(comm), segsize);
+ comm->rank(), segsize);
COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
* sent per operation
*/
typelng= smpi_datatype_size( datatype);
- int communicator_size = smpi_comm_size(comm);
+ int communicator_size = comm->size();
size_t message_size = typelng * count;
if (((communicator_size < 8) && (message_size < 20480)) ||
(message_size < 2048) || (count <= 1)) {
}
XBT_DEBUG("coll:tuned:reduce_intra_binomial rank %d ss %5d",
- smpi_comm_rank(comm), segsize);
+ comm->rank(), segsize);
COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype,
void *use_this_sendbuf = NULL, *use_this_recvbuf = NULL;
size_t typelng;
- rank = smpi_comm_rank(comm);
- size = smpi_comm_size(comm);
+ rank = comm->rank();
+ size = comm->size();
XBT_DEBUG("coll:tuned:reduce_intra_in_order_binary rank %d ss %5d",
rank, segsize);
if (io_root != root) {
if (root == rank) {
/* Receive result from rank io_root to recvbuf */
- smpi_mpi_recv(recvbuf, count, datatype, io_root,
+ Request::recv(recvbuf, count, datatype, io_root,
COLL_TAG_REDUCE, comm,
MPI_STATUS_IGNORE);
if (MPI_IN_PLACE == sendbuf) {
} else if (io_root == rank) {
/* Send result from use_this_recvbuf to root */
- smpi_mpi_send(use_this_recvbuf, count, datatype, root,
+ Request::send(use_this_recvbuf, count, datatype, root,
COLL_TAG_REDUCE,
comm);
smpi_free_tmp_buffer(use_this_recvbuf);
/* Initialize */
- rank = smpi_comm_rank(comm);
- size = smpi_comm_size(comm);
+ rank = comm->rank();
+ size = comm->size();
XBT_DEBUG("coll:tuned:reduce_intra_basic_linear rank %d", rank);
/* If not root, send data to the root. */
if (rank != root) {
- smpi_mpi_send(sbuf, count, dtype, root,
+ Request::send(sbuf, count, dtype, root,
COLL_TAG_REDUCE,
comm);
return MPI_SUCCESS;
if (rank == (size - 1)) {
smpi_datatype_copy((char*)sbuf, count, dtype,(char*)rbuf, count, dtype);
} else {
- smpi_mpi_recv(rbuf, count, dtype, size - 1,
+ Request::recv(rbuf, count, dtype, size - 1,
COLL_TAG_REDUCE, comm,
MPI_STATUS_IGNORE);
}
if (rank == i) {
inbuf = (char*)sbuf;
} else {
- smpi_mpi_recv(pml_buffer, count, dtype, i,
+ Request::recv(pml_buffer, count, dtype, i,
COLL_TAG_REDUCE, comm,
MPI_STATUS_IGNORE);
inbuf = pml_buffer;
}
/* Perform the reduction */
- smpi_op_apply(op, inbuf, rbuf, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( inbuf, rbuf, &count, dtype);
}
if (NULL != inplace_temp) {