ADD_TEST(smpi-replay ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --cd ${CMAKE_BINARY_DIR}/examples/smpi ${CMAKE_HOME_DIRECTORY}/examples/smpi/replay/smpi_replay.tesh)
endif()
- FOREACH (ALLGATHER_COLL default GB loosely_lr lr NTSLR NTSLR_NB pair rdb rhv ring SMP_NTS
+ FOREACH (ALLGATHER_COLL default 2dmesh 3dmesh bruck GB loosely_lr lr
+ NTSLR NTSLR_NB pair rdb rhv ring SMP_NTS
smp_simple spreading_simple)
ADD_TEST(smpi-allgather-coll-${ALLGATHER_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allgather:${ALLGATHER_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgather_coll.tesh)
ENDFOREACH()
FOREACH (ALLGATHERV_COLL default pair ring)
ADD_TEST(smpi-allgatherv-coll-${ALLGATHERV_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allgatherv:${ALLGATHERV_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgatherv_coll.tesh)
ENDFOREACH()
- FOREACH (ALLREDUCE_COLL default lr NTS rab1 rab2 rab_rsag rdb smp_binomial smp_rdb smp_rsag
- smp_rsag_lr smp_rsag_rab redbcast)
+ FOREACH (ALLREDUCE_COLL default lr NTS rab1 rab2 rab_rdb
+ rab_rsag rdb smp_binomial smp_binomial_pipeline
+ smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast)
ADD_TEST(smpi-allreduce-coll-${ALLREDUCE_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allreduce:${ALLREDUCE_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allreduce_coll.tesh)
ENDFOREACH()
FOREACH (ALLTOALL_COLL 2dmesh 3dmesh pair pair_one_barrier pair_light_barrier
src/smpi/smpi_pmpi.c
src/smpi/smpi_replay.c
src/smpi/colls/colls_global.c
- #src/smpi/colls/allgather-2dmesh.c
- #src/smpi/colls/allgather-3dmesh.c
- #src/smpi/colls/allgather-bruck.c
+ src/smpi/colls/allgather-2dmesh.c
+ src/smpi/colls/allgather-3dmesh.c
+ src/smpi/colls/allgather-bruck.c
src/smpi/colls/allgather-GB.c
src/smpi/colls/allgather-loosely-lr.c
src/smpi/colls/allgather-lr.c
src/smpi/colls/allreduce-NTS.c
src/smpi/colls/allreduce-rab1.c
src/smpi/colls/allreduce-rab2.c
- #src/smpi/colls/allreduce-rab-rdb.c
+ src/smpi/colls/allreduce-rab-rdb.c
#src/smpi/colls/allreduce-rab-reduce-scatter.c
src/smpi/colls/allreduce-rab-rsag.c
src/smpi/colls/allreduce-rdb.c
src/smpi/colls/allreduce-redbcast.c
src/smpi/colls/allreduce-smp-binomial.c
- #src/smpi/colls/allreduce-smp-binomial-pipeline.c
+ src/smpi/colls/allreduce-smp-binomial-pipeline.c
src/smpi/colls/allreduce-smp-rdb.c
src/smpi/colls/allreduce-smp-rsag.c
src/smpi/colls/allreduce-smp-rsag-lr.c
int i, src, dst, rank, num_procs;
int X, Y, send_offset, recv_offset;
int my_row_base, my_col_base, src_row_base, block_size, num_reqs;
- int success = 0;
- int failure = 1;
int tag = 1;
rank = smpi_comm_rank(comm);
// do local allgather/local copy
recv_offset = rank * block_size;
- MPIR_Localcopy(send_buff, send_count, send_type, (char *)recv_buff + recv_offset,
+ smpi_datatype_copy(send_buff, send_count, send_type, (char *)recv_buff + recv_offset,
recv_count, recv_type);
// do row-wise comm
if (src == rank)
continue;
recv_offset = src * block_size;
- MPIC_Irecv((char *)recv_buff + recv_offset, recv_count, recv_type, src, tag,
- comm, req_ptr++);
+ *(req_ptr++) = smpi_mpi_irecv((char *)recv_buff + recv_offset, recv_count, recv_type, src, tag,
+ comm);
}
dst = i + my_row_base;
if (dst == rank)
continue;
- MPIC_Send(send_buff, send_count, send_type, dst, tag, comm);
+ smpi_mpi_send(send_buff, send_count, send_type, dst, tag, comm);
}
smpi_mpi_waitall(Y - 1, req, MPI_STATUSES_IGNORE);
continue;
src_row_base = (src / Y) * Y;
recv_offset = src_row_base * block_size;
- MPIC_Irecv((char *)recv_buff + recv_offset, recv_count * Y, recv_type, src, tag,
- comm, req_ptr++);
+ *(req_ptr++) = smpi_mpi_irecv((char *)recv_buff + recv_offset, recv_count * Y, recv_type, src, tag,
+ comm);
}
for (i = 0; i < X; i++) {
if (dst == rank)
continue;
send_offset = my_row_base * block_size;
- MPIC_Send((char *)recv_buff + send_offset, send_count * Y, send_type, dst, tag,
+ smpi_mpi_send((char *)recv_buff + send_offset, send_count * Y, send_type, dst, tag,
comm);
}
free(req);
- return success;
+ return MPI_SUCCESS;
}
int i, src, dst, rank, num_procs, block_size, my_z_base;
int my_z, X, Y, Z, send_offset, recv_offset;
int two_dsize, my_row_base, my_col_base, src_row_base, src_z_base, num_reqs;
- int success = 0;
- int failure = 1;
int tag = 1;
rank = smpi_comm_rank(comm);
block_size = extent * send_count;
req = (MPI_Request *) xbt_malloc(num_reqs * sizeof(MPI_Request));
- if (!req) {
- printf("allgather-3dmesh-shoot.c:85: cannot allocate memory\n");
- MPI_Finalize();
- exit(failure);
- }
req_ptr = req;
// do local allgather/local copy
recv_offset = rank * block_size;
- MPIR_Localcopy(send_buff, send_count, send_type, (char *)recv_buff + recv_offset,
+ smpi_datatype_copy(send_buff, send_count, send_type, (char *)recv_buff + recv_offset,
recv_count, recv_type);
// do rowwise comm
if (src == rank)
continue;
recv_offset = src * block_size;
- MPIC_Irecv((char *)recv_buff + recv_offset, send_count, recv_type, src, tag,
- comm, req_ptr++);
+ *(req_ptr++) = smpi_mpi_irecv((char *)recv_buff + recv_offset, send_count, recv_type, src, tag,
+ comm);
}
for (i = 0; i < Y; i++) {
dst = i + my_row_base;
if (dst == rank)
continue;
- MPIC_Send(send_buff, send_count, send_type, dst, tag, comm);
+ smpi_mpi_send(send_buff, send_count, send_type, dst, tag, comm);
}
smpi_mpi_waitall(Y - 1, req, MPI_STATUSES_IGNORE);
src_row_base = (src / X) * X;
recv_offset = src_row_base * block_size;
- MPIC_Irecv((char *)recv_buff + recv_offset, recv_count * Y, recv_type, src, tag,
- comm, req_ptr++);
+ *(req_ptr++) = smpi_mpi_irecv((char *)recv_buff + recv_offset, recv_count * Y, recv_type, src, tag,
+ comm);
}
send_offset = my_row_base * block_size;
dst = (i * Y + my_col_base);
if (dst == rank)
continue;
- MPIC_Send((char *)recv_buff + send_offset, send_count * Y, send_type, dst, tag,
+ smpi_mpi_send((char *)recv_buff + send_offset, send_count * Y, send_type, dst, tag,
comm);
}
recv_offset = (src_z_base * block_size);
- MPIC_Irecv((char *)recv_buff + recv_offset, recv_count * two_dsize, recv_type,
- src, tag, comm, req_ptr++);
+ *(req_ptr++) = smpi_mpi_irecv((char *)recv_buff + recv_offset, recv_count * two_dsize, recv_type,
+ src, tag, comm);
}
for (i = 1; i < Z; i++) {
dst = (rank + i * two_dsize) % num_procs;
send_offset = my_z_base * block_size;
- MPIC_Send((char *)recv_buff + send_offset, send_count * two_dsize, send_type,
+ smpi_mpi_send((char *)recv_buff + send_offset, send_count * two_dsize, send_type,
dst, tag, comm);
}
smpi_mpi_waitall(Z - 1, req, MPI_STATUSES_IGNORE);
free(req);
- return success;
+ return MPI_SUCCESS;
}
MPI_Aint recv_extent;
// local int variables
- int i, src, dst, rank, num_procs, count, remainder;
+ int src, dst, rank, num_procs, count, remainder;
int tag = 1;
int pof2 = 1;
- int success = 0;
// local string variables
char *tmp_buff;
count = recv_count;
tmp_buff = (char *) xbt_malloc(num_procs * recv_count * recv_extent);
- if (!tmp_buff) {
- printf("allgather-bruck:54: cannot allocate memory\n");
- MPI_Finalize();
- exit(0);
- }
- // perform a local copy
- MPIR_Localcopy(send_ptr, send_count, send_type, tmp_buff, recv_count,
- recv_type);
+ // perform a local copy
+ smpi_datatype_copy(send_ptr, send_count, send_type,
+ tmp_buff, recv_count, recv_type);
while (pof2 <= (num_procs / 2)) {
src = (rank + pof2) % num_procs;
dst = (rank - pof2 + num_procs) % num_procs;
- MPIC_Sendrecv(tmp_buff, count, recv_type, dst, tag,
+ smpi_mpi_sendrecv(tmp_buff, count, recv_type, dst, tag,
tmp_buff + count * recv_extent, count, recv_type,
src, tag, comm, &status);
count *= 2;
src = (rank + pof2) % num_procs;
dst = (rank - pof2 + num_procs) % num_procs;
- MPIC_Sendrecv(tmp_buff, remainder * recv_count, recv_type, dst, tag,
+ smpi_mpi_sendrecv(tmp_buff, remainder * recv_count, recv_type, dst, tag,
tmp_buff + count * recv_extent, remainder * recv_count,
recv_type, src, tag, comm, &status);
}
- MPIC_Sendrecv(tmp_buff, (num_procs - rank) * recv_count, recv_type, rank,
+ smpi_mpi_sendrecv(tmp_buff, (num_procs - rank) * recv_count, recv_type, rank,
tag, recv_ptr + rank * recv_count * recv_extent,
(num_procs - rank) * recv_count, recv_type, rank, tag, comm,
&status);
if (rank)
- MPIC_Sendrecv(tmp_buff + (num_procs - rank) * recv_count * recv_extent,
+ smpi_mpi_sendrecv(tmp_buff + (num_procs - rank) * recv_count * recv_extent,
rank * recv_count, recv_type, rank, tag, recv_ptr,
rank * recv_count, recv_type, rank, tag, comm, &status);
free(tmp_buff);
- return success;
-}
-
-/*#include "ompi_bindings.h"
-
-int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
- MPI_Datatype sdtype,
- void* rbuf, int rcount,
- MPI_Datatype rdtype,
- MPI_Comm comm)
-{
- int line = -1, err = 0;
- int rank, size, step;
- int sendto, recvfrom;
- void * tmpsend, *tmprecv;
- ptrdiff_t lb, sext, rext;
-
- size = ompi_comm_size(comm);
- rank = ompi_comm_rank(comm);
-
- OPAL_OUTPUT((ompi_coll_tuned_stream,
- "coll:tuned:alltoall_intra_pairwise rank %d", rank));
-
- err = ompi_datatype_get_extent (sdtype, &lb, &sext);
- if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
- err = ompi_datatype_get_extent (rdtype, &lb, &rext);
- if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
-
-
- // Perform pairwise exchange - starting from 1 so the local copy is last
- for (step = 1; step < size + 1; step++) {
-
- // Determine sender and receiver for this step.
- sendto = (rank + step) % size;
- recvfrom = (rank + size - step) % size;
-
- // Determine sending and receiving locations
- tmpsend = (char*)sbuf + sendto * sext * scount;
- tmprecv = (char*)rbuf + recvfrom * rext * rcount;
-
- // send and receive
- err = ompi_coll_tuned_sendrecv( tmpsend, scount, sdtype, sendto,
- MCA_COLL_BASE_TAG_ALLTOALL,
- tmprecv, rcount, rdtype, recvfrom,
- MCA_COLL_BASE_TAG_ALLTOALL,
- comm, MPI_STATUS_IGNORE, rank);
- if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
- }
-
- return MPI_SUCCESS;
-
- err_hndl:
- OPAL_OUTPUT((ompi_coll_tuned_stream,
- "%s:%4d\tError occurred %d, rank %2d", __FILE__, line,
- err, rank));
- return err;
+ return MPI_SUCCESS;
}
-*/
MPI_Datatype dtype, MPI_Op op,
MPI_Comm comm)
{
- int nprocs, rank, type_size, tag = 543;
+ int nprocs, rank, tag = 543;
int mask, dst, pof2, newrank, rem, newdst, i,
send_idx, recv_idx, last_idx, send_cnt, recv_cnt, *cnts, *disps;
- MPI_Aint lb, extent;
+ MPI_Aint extent;
MPI_Status status;
void *tmp_buf = NULL;
-#ifdef MPICH2_REDUCTION
- MPI_User_function *uop = MPIR_Op_table[op % 16 - 1];
-#else
- MPI_User_function *uop;
- struct MPIR_OP *op_ptr;
- op_ptr = (MPI_User_function *) MPIR_ToPointer(op);
- uop = op_ptr->op;
-#endif
-
nprocs = smpi_comm_size(comm);
rank = smpi_comm_rank(comm);
extent = smpi_datatype_get_extent(dtype);
tmp_buf = (void *) xbt_malloc(count * extent);
- MPIR_Localcopy(sbuff, count, dtype, rbuff, count, dtype);
-
- type_size = smpi_datatype_size(dtype);
+ smpi_datatype_copy(sbuff, count, dtype, rbuff, count, dtype);
// find nearest power-of-two less than or equal to comm_size
pof2 = 1;
// do the reduction on received data. since the
// ordering is right, it doesn't matter whether
// the operation is commutative or not.
- (*uop) (tmp_buf, rbuff, &count, &dtype);
+ smpi_op_apply(op, tmp_buf, rbuff, &count, &dtype);
// change the rank
newrank = rank / 2;
// This algorithm is used only for predefined ops
// and predefined ops are always commutative.
- (*uop) ((char *) tmp_buf + disps[recv_idx] * extent,
- (char *) rbuff + disps[recv_idx] * extent, &recv_cnt, &dtype);
+ smpi_op_apply(op, (char *) tmp_buf + disps[recv_idx] * extent,
+ (char *) rbuff + disps[recv_idx] * extent, &recv_cnt, &dtype);
// update send_idx for next iteration
send_idx = recv_idx;
This code assume commutative and associative reduce operator (MPI_SUM, MPI_MAX, etc).
*/
-#ifndef MPICH2
-extern void *MPIR_ToPointer();
-
-struct MPIR_OP {
- MPI_User_function *op;
- int commute, permanent;
-};
-
-#else
-extern MPI_User_function *MPIR_Op_table[];
-#endif
-
/*
This fucntion performs all-reduce operation as follow. ** in a pipeline fashion **
1) binomial_tree reduce inside each SMP node
MPI_Status status;
int num_core = NUM_CORE;
- MPI_User_function *uop;
-#ifndef MPICH2
- struct MPIR_OP *op_ptr = MPIR_ToPointer(op);
- uop = (MPI_User_function *) op_ptr->op;
-#else
- uop = MPIR_Op_table[op % 16 - 1];
-#endif
-
comm_size = smpi_comm_size(comm);
rank = smpi_comm_rank(comm);
MPI_Aint extent;
if (src < comm_size) {
recv_offset = phase * pcount * extent;
smpi_mpi_recv(tmp_buf, pcount, dtype, src, tag, comm, &status);
- (*uop) (tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype);
+ smpi_op_apply(op, tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype);
}
} else {
send_offset = phase * pcount * extent;
if (src < comm_size) {
recv_offset = (phase - 1) * pcount * extent;
smpi_mpi_recv(tmp_buf, pcount, dtype, src, tag, comm, &status);
- (*uop) (tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype);
+ smpi_op_apply(op, tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype);
}
} else {
dst = (inter_rank & (~mask)) * num_core;
int mask, src, dst;
int num_core = NUM_CORE;
MPI_Status status;
- /*
- #ifdef MPICH2_REDUCTION
- MPI_User_function * uop = MPIR_Op_table[op % 16 - 1];
- #else
- MPI_User_function *uop;
- struct MPIR_OP *op_ptr;
- op_ptr = MPIR_ToPointer(op);
- uop = op_ptr->op;
- #endif
- */
comm_size=smpi_comm_size(comm);
rank=smpi_comm_rank(comm);
int
smpi_coll_tuned_alltoallv_ring(void *send_buff, int *send_counts, int *send_disps,
MPI_Datatype send_type,
- void *recv_buff,int *recv_counts, int *recv_disps,
+ void *recv_buff, int *recv_counts, int *recv_disps,
MPI_Datatype recv_type,
MPI_Comm comm)
{
MPI_Comm comm)
#define COLL_ALLGATHERS(action, COLL_sep) \
-COLL_NOTHING(COLL_APPLY(action, COLL_ALLGATHER_SIG, 2dmesh) COLL_sep) \
-COLL_NOTHING(COLL_APPLY(action, COLL_ALLGATHER_SIG, 3dmesh) COLL_sep) \
-COLL_NOTHING(COLL_APPLY(action, COLL_ALLGATHER_SIG, bruck) COLL_sep) \
+COLL_APPLY(action, COLL_ALLGATHER_SIG, 2dmesh) COLL_sep \
+COLL_APPLY(action, COLL_ALLGATHER_SIG, 3dmesh) COLL_sep \
+COLL_APPLY(action, COLL_ALLGATHER_SIG, bruck) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHER_SIG, GB) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHER_SIG, loosely_lr) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHER_SIG, lr) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, NTS) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab1) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab2) COLL_sep \
-COLL_NOTHING(COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab_rdb) COLL_sep) \
+COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab_rdb) COLL_sep \
COLL_NOTHING(COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab_reduce_scatter) COLL_sep) \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab_rsag) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, rdb) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_binomial) COLL_sep \
-COLL_NOTHING(COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_binomial_pipeline) COLL_sep) \
+COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_binomial_pipeline) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_rdb) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_rsag) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_rsag_lr) COLL_sep \