From: Paul Bédaride Date: Mon, 8 Apr 2013 11:26:28 +0000 (+0200) Subject: Use simgrid function instead of MPI in collectives X-Git-Tag: v3_9_90~412^2~46 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/e7c0c67af63b3979a597a66e5e1c8b0435fc6e19 Use simgrid function instead of MPI in collectives --- diff --git a/buildtools/Cmake/DefinePackages.cmake b/buildtools/Cmake/DefinePackages.cmake index ec5e01cc36..efa7a3f4f7 100644 --- a/buildtools/Cmake/DefinePackages.cmake +++ b/buildtools/Cmake/DefinePackages.cmake @@ -109,6 +109,7 @@ set(SMPI_SRC src/smpi/smpi_mpi_dt.c src/smpi/smpi_pmpi.c src/smpi/smpi_replay.c + src/smpi/colls/colls_global.c #src/smpi/colls/allgather-2dmesh.c #src/smpi/colls/allgather-3dmesh.c #src/smpi/colls/allgather-bruck.c diff --git a/src/smpi/colls/allgather-2dmesh.c b/src/smpi/colls/allgather-2dmesh.c index 0a9f688e39..91c9cfbcb8 100644 --- a/src/smpi/colls/allgather-2dmesh.c +++ b/src/smpi/colls/allgather-2dmesh.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** @@ -114,10 +114,10 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype int failure = 1; int tag = 1; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); - MPI_Type_extent(send_type, &extent); + extent = smpi_datatype_get_extent(send_type); block_size = extent * send_count; @@ -156,7 +156,7 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype MPIC_Send(send_buff, send_count, send_type, dst, tag, comm); } - MPI_Waitall(Y - 1, req, MPI_STATUSES_IGNORE); + smpi_mpi_waitall(Y - 1, req, MPI_STATUSES_IGNORE); req_ptr = req; @@ -180,7 +180,7 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype comm); } - MPI_Waitall(X - 1, req, MPI_STATUSES_IGNORE); + smpi_mpi_waitall(X - 1, req, MPI_STATUSES_IGNORE); free(req); diff --git a/src/smpi/colls/allgather-3dmesh.c b/src/smpi/colls/allgather-3dmesh.c index 035e98103e..e75c9c3d8e 100644 --- a/src/smpi/colls/allgather-3dmesh.c +++ b/src/smpi/colls/allgather-3dmesh.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** @@ -101,9 +101,9 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, int failure = 1; int tag = 1; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &extent); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + extent = smpi_datatype_get_extent(send_type); is_3dmesh(num_procs, &X, &Y, &Z); @@ -123,7 +123,7 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, block_size = extent * send_count; - req = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request)); + req = (MPI_Request *) xbt_malloc(num_reqs * sizeof(MPI_Request)); if (!req) { printf("allgather-3dmesh-shoot.c:85: cannot allocate memory\n"); MPI_Finalize(); @@ -154,7 +154,7 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, MPIC_Send(send_buff, send_count, send_type, dst, tag, comm); } - MPI_Waitall(Y - 1, req, MPI_STATUSES_IGNORE); + smpi_mpi_waitall(Y - 1, req, MPI_STATUSES_IGNORE); req_ptr = req; // do colwise comm, it does not matter here if i*X or i *Y since X == Y @@ -180,7 +180,7 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, comm); } - MPI_Waitall(X - 1, req, MPI_STATUSES_IGNORE); + smpi_mpi_waitall(X - 1, req, MPI_STATUSES_IGNORE); req_ptr = req; for (i = 1; i < Z; i++) { @@ -199,7 +199,7 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, MPIC_Send((char *)recv_buff + send_offset, send_count * two_dsize, send_type, dst, tag, comm); } - MPI_Waitall(Z - 1, req, MPI_STATUSES_IGNORE); + smpi_mpi_waitall(Z - 1, req, MPI_STATUSES_IGNORE); free(req); diff --git a/src/smpi/colls/allgather-GB.c b/src/smpi/colls/allgather-GB.c index b9df40e577..f9a1e072b1 100644 --- a/src/smpi/colls/allgather-GB.c +++ b/src/smpi/colls/allgather-GB.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" // Allgather - gather/bcast algorithm int smpi_coll_tuned_allgather_GB(void *send_buff, int send_count, @@ -7,10 +7,10 @@ int smpi_coll_tuned_allgather_GB(void *send_buff, int send_count, MPI_Comm comm) { int num_procs; - MPI_Comm_size(comm, &num_procs); - MPI_Gather(send_buff, send_count, send_type, recv_buff, recv_count, recv_type, + num_procs = smpi_comm_size(comm); + smpi_mpi_gather(send_buff, send_count, send_type, recv_buff, recv_count, recv_type, 0, comm); - MPI_Bcast(recv_buff, (recv_count * num_procs), recv_type, 0, comm); + mpi_coll_bcast_fun(recv_buff, (recv_count * num_procs), recv_type, 0, comm); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allgather-NTSLR-NB.c b/src/smpi/colls/allgather-NTSLR-NB.c index 26fb9707a9..d0c6ef0646 100644 --- a/src/smpi/colls/allgather-NTSLR-NB.c +++ b/src/smpi/colls/allgather-NTSLR-NB.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" // Allgather-Non-Topoloty-Scecific-Logical-Ring algorithm int @@ -12,18 +12,21 @@ smpi_coll_tuned_allgather_NTSLR_NB(void *sbuf, int scount, MPI_Datatype stype, int send_offset, recv_offset; int tag = 500; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); - MPI_Type_extent(rtype, &rextent); - MPI_Type_extent(stype, &sextent); + rank = smpi_comm_rank(comm); + size = smpi_comm_size(comm); + rextent = smpi_datatype_get_extent(rtype); + sextent = smpi_datatype_get_extent(stype); MPI_Request *rrequest_array; MPI_Request *srequest_array; - rrequest_array = (MPI_Request *) malloc(size * sizeof(MPI_Request)); - srequest_array = (MPI_Request *) malloc(size * sizeof(MPI_Request)); + rrequest_array = (MPI_Request *) xbt_malloc(size * sizeof(MPI_Request)); + srequest_array = (MPI_Request *) xbt_malloc(size * sizeof(MPI_Request)); // irregular case use default MPI fucntions - if (scount * sextent != rcount * rextent) - MPI_Allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm); + if (scount * sextent != rcount * rextent) { + XBT_WARN("MPI_allgather_NTSLR_NB use default MPI_allgather."); + smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm); + return MPI_SUCCESS; + } // topo non-specific to = (rank + 1) % size; @@ -32,7 +35,7 @@ smpi_coll_tuned_allgather_NTSLR_NB(void *sbuf, int scount, MPI_Datatype stype, //copy a single segment from sbuf to rbuf send_offset = rank * scount * sextent; - MPI_Sendrecv(sbuf, scount, stype, rank, tag, + smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag, (char *)rbuf + send_offset, rcount, rtype, rank, tag, comm, &status); @@ -42,17 +45,15 @@ smpi_coll_tuned_allgather_NTSLR_NB(void *sbuf, int scount, MPI_Datatype stype, //post all irecv first for (i = 0; i < size - 1; i++) { recv_offset = ((rank - i - 1 + size) % size) * increment; - MPI_Irecv((char *)rbuf + recv_offset, rcount, rtype, from, tag + i, comm, - &rrequest_array[i]); + rrequest_array[i] = smpi_mpi_irecv((char *)rbuf + recv_offset, rcount, rtype, from, tag + i, comm); } for (i = 0; i < size - 1; i++) { send_offset = ((rank - i + size) % size) * increment; - MPI_Isend((char *)rbuf + send_offset, scount, stype, to, tag + i, comm, - &srequest_array[i]); - MPI_Wait(&rrequest_array[i], &status); - MPI_Wait(&srequest_array[i], &status2); + srequest_array[i] = smpi_mpi_isend((char *)rbuf + send_offset, scount, stype, to, tag + i, comm); + smpi_mpi_wait(&rrequest_array[i], &status); + smpi_mpi_wait(&srequest_array[i], &status2); } free(rrequest_array); diff --git a/src/smpi/colls/allgather-NTSLR.c b/src/smpi/colls/allgather-NTSLR.c index 08be50f826..bcaabfd235 100644 --- a/src/smpi/colls/allgather-NTSLR.c +++ b/src/smpi/colls/allgather-NTSLR.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" // Allgather-Non-Topoloty-Scecific-Logical-Ring algorithm int @@ -12,14 +12,17 @@ smpi_coll_tuned_allgather_NTSLR(void *sbuf, int scount, MPI_Datatype stype, int send_offset, recv_offset; int tag = 500; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); - MPI_Type_extent(rtype, &rextent); - MPI_Type_extent(stype, &sextent); + rank = smpi_comm_rank(comm); + size = smpi_comm_size(comm); + rextent = smpi_datatype_get_extent(rtype); + sextent = smpi_datatype_get_extent(stype); // irregular case use default MPI fucntions - if (scount * sextent != rcount * rextent) - MPI_Allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm); + if (scount * sextent != rcount * rextent) { + XBT_WARN("MPI_allgather_NTSLR use default MPI_allgather."); + smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm); + return MPI_SUCCESS; + } // topo non-specific to = (rank + 1) % size; @@ -28,7 +31,7 @@ smpi_coll_tuned_allgather_NTSLR(void *sbuf, int scount, MPI_Datatype stype, //copy a single segment from sbuf to rbuf send_offset = rank * scount * sextent; - MPI_Sendrecv(sbuf, scount, stype, rank, tag, + smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag, (char *)rbuf + send_offset, rcount, rtype, rank, tag, comm, &status); @@ -38,7 +41,7 @@ smpi_coll_tuned_allgather_NTSLR(void *sbuf, int scount, MPI_Datatype stype, for (i = 0; i < size - 1; i++) { send_offset = ((rank - i + size) % size) * increment; recv_offset = ((rank - i - 1 + size) % size) * increment; - MPI_Sendrecv((char *) rbuf + send_offset, scount, stype, to, tag + i, + smpi_mpi_sendrecv((char *) rbuf + send_offset, scount, stype, to, tag + i, (char *) rbuf + recv_offset, rcount, rtype, from, tag + i, comm, &status); } diff --git a/src/smpi/colls/allgather-SMP-NTS.c b/src/smpi/colls/allgather-SMP-NTS.c index 2f760f6312..1b49bd4be8 100644 --- a/src/smpi/colls/allgather-SMP-NTS.c +++ b/src/smpi/colls/allgather-SMP-NTS.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" #ifndef NUM_CORE #define NUM_CORE 8 #endif @@ -9,11 +9,11 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, MPI_Comm comm) { int src, dst, comm_size, rank; - MPI_Comm_size(comm, &comm_size); - MPI_Comm_rank(comm, &rank); + comm_size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); MPI_Aint rextent, sextent; - MPI_Type_extent(rtype, &rextent); - MPI_Type_extent(stype, &sextent); + rextent = smpi_datatype_get_extent(rtype); + sextent = smpi_datatype_get_extent(stype); int tag = 50; MPI_Request request; MPI_Request rrequest_array[128]; @@ -29,15 +29,18 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, /* for too small number of processes, use default implementation */ if (comm_size <= NUM_CORE) { - return MPI_Allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm); + XBT_WARN("MPI_allgather_SMP_NTS use default MPI_allgather."); + smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm); + return MPI_SUCCESS; } + // the last SMP node may have fewer number of running processes than all others if (inter_rank == (inter_comm_size - 1)) { num_core_in_current_smp = comm_size - (inter_rank * NUM_CORE); } //copy corresponding message from sbuf to rbuf recv_offset = rank * rextent * rcount; - MPI_Sendrecv(sbuf, scount, stype, rank, tag, + smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag, ((char *) rbuf + recv_offset), rcount, rtype, rank, tag, comm, &status); @@ -53,7 +56,7 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, (num_core_in_current_smp); recv_offset = src * rextent * rcount; - MPI_Sendrecv(sbuf, scount, stype, dst, tag, + smpi_mpi_sendrecv(sbuf, scount, stype, dst, tag, ((char *) rbuf + recv_offset), rcount, rtype, src, tag, comm, &status); @@ -73,28 +76,27 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, recv_offset = ((inter_rank - i - 1 + inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount; - MPI_Irecv((char *) rbuf + recv_offset, rcount * NUM_CORE, rtype, src, - tag + i, comm, &rrequest_array[i]); + rrequest_array[i] = smpi_mpi_irecv((char *)rbuf+recv_offset, rcount * NUM_CORE, rtype, src, tag+i, comm); } // send first message send_offset = ((inter_rank + inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount; - MPI_Isend((char *) rbuf + send_offset, scount * NUM_CORE, stype, dst, tag, - comm, &srequest_array[0]); + srequest_array[0] = smpi_mpi_isend((char *) rbuf + send_offset, scount * NUM_CORE, stype, dst, tag, + comm); // loop : recv-inter , send-inter, send-intra (linear-bcast) for (i = 0; i < inter_comm_size - 2; i++) { recv_offset = ((inter_rank - i - 1 + inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount; - MPI_Wait(&rrequest_array[i], &status); - MPI_Isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype, dst, - tag + i + 1, comm, &srequest_array[i + 1]); + smpi_mpi_wait(&rrequest_array[i], &status); + srequest_array[i + 1] = smpi_mpi_isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype, dst, + tag + i + 1, comm); if (num_core_in_current_smp > 1) { - MPI_Isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype, - (rank + 1), tag + i + 1, comm, &request); + request = smpi_mpi_isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype, + (rank + 1), tag + i + 1, comm); } } @@ -104,10 +106,10 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount; //recv_offset = ((inter_rank + 1) % inter_comm_size) * NUM_CORE * sextent * scount; //i=inter_comm_size-2; - MPI_Wait(&rrequest_array[i], &status); + smpi_mpi_wait(&rrequest_array[i], &status); if (num_core_in_current_smp > 1) { - MPI_Isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype, - (rank + 1), tag + i + 1, comm, &request); + request = smpi_mpi_isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype, + (rank + 1), tag + i + 1, comm); } } // last rank of each SMP @@ -116,9 +118,9 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, recv_offset = ((inter_rank - i - 1 + inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount; - MPI_Irecv((char *) rbuf + recv_offset, (rcount * NUM_CORE), rtype, - rank - 1, tag + i + 1, comm, &request); - MPI_Wait(&request, &status); + request = smpi_mpi_irecv((char *) rbuf + recv_offset, (rcount * NUM_CORE), rtype, + rank - 1, tag + i + 1, comm); + smpi_mpi_wait(&request, &status); } } // intermediate rank of each SMP @@ -127,11 +129,11 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, recv_offset = ((inter_rank - i - 1 + inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount; - MPI_Irecv((char *) rbuf + recv_offset, (rcount * NUM_CORE), rtype, - rank - 1, tag + i + 1, comm, &request); - MPI_Wait(&request, &status); - MPI_Isend((char *) rbuf + recv_offset, (scount * NUM_CORE), stype, - (rank + 1), tag + i + 1, comm, &request); + request = smpi_mpi_irecv((char *) rbuf + recv_offset, (rcount * NUM_CORE), rtype, + rank - 1, tag + i + 1, comm); + smpi_mpi_wait(&request, &status); + request = smpi_mpi_isend((char *) rbuf + recv_offset, (scount * NUM_CORE), stype, + (rank + 1), tag + i + 1, comm); } } diff --git a/src/smpi/colls/allgather-bruck.c b/src/smpi/colls/allgather-bruck.c index 29bb9cba48..4ca844c5ab 100644 --- a/src/smpi/colls/allgather-bruck.c +++ b/src/smpi/colls/allgather-bruck.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** @@ -82,15 +82,15 @@ int smpi_coll_tuned_allgather_bruck(void *send_buff, int send_count, char *recv_ptr = (char *) recv_buff; // get size of the communicator, followed by rank - MPI_Comm_size(comm, &num_procs); - MPI_Comm_rank(comm, &rank); + num_procs = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); // get size of single element's type for recv buffer - MPI_Type_extent(recv_type, &recv_extent); + recv_extent = smpi_datatype_get_extent(recv_type); count = recv_count; - tmp_buff = (char *) malloc(num_procs * recv_count * recv_extent); + tmp_buff = (char *) xbt_malloc(num_procs * recv_count * recv_extent); if (!tmp_buff) { printf("allgather-bruck:54: cannot allocate memory\n"); MPI_Finalize(); diff --git a/src/smpi/colls/allgather-loosely-lr.c b/src/smpi/colls/allgather-loosely-lr.c index 2074e2f820..6455db135a 100644 --- a/src/smpi/colls/allgather-loosely-lr.c +++ b/src/smpi/colls/allgather-loosely-lr.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" #ifndef NUM_CORE #define NUM_CORE 4 @@ -15,11 +15,11 @@ int smpi_coll_tuned_allgather_loosely_lr(void *sbuf, int scount, int intra_rank, inter_rank, inter_comm_size, intra_comm_size; int inter_dst, inter_src; - MPI_Comm_size(comm, &comm_size); - MPI_Comm_rank(comm, &rank); + comm_size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); MPI_Aint rextent, sextent; - MPI_Type_extent(rtype, &rextent); - MPI_Type_extent(stype, &sextent); + rextent = smpi_datatype_get_extent(rtype); + sextent = smpi_datatype_get_extent(stype); MPI_Request inter_rrequest; MPI_Request rrequest_array[128]; MPI_Request srequest_array[128]; @@ -41,7 +41,7 @@ int smpi_coll_tuned_allgather_loosely_lr(void *sbuf, int scount, //copy corresponding message from sbuf to rbuf recv_offset = rank * rextent * rcount; - MPI_Sendrecv(sbuf, scount, stype, rank, tag, + smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag, (char *)rbuf + recv_offset, rcount, rtype, rank, tag, comm, &status); int dst, src; @@ -74,11 +74,10 @@ int smpi_coll_tuned_allgather_loosely_lr(void *sbuf, int scount, if (intra_rank == j) { if (i != inter_comm_size - 1) { - MPI_Irecv((char *)rbuf + inter_recv_offset, rcount, rtype, inter_src, tag, - comm, &inter_rrequest); - MPI_Isend((char *)rbuf + inter_send_offset, scount, stype, inter_dst, tag, - comm, &inter_srequest_array[inter_srequest_count++]); - + inter_rrequest = smpi_mpi_irecv((char *)rbuf + inter_recv_offset, rcount, rtype, + inter_src, tag, comm); + inter_srequest_array[inter_srequest_count++] = smpi_mpi_isend((char *)rbuf + inter_send_offset, scount, stype, + inter_dst, tag, comm); } } //intra_communication @@ -98,10 +97,8 @@ int smpi_coll_tuned_allgather_loosely_lr(void *sbuf, int scount, if (j != intra_rank) { - MPI_Irecv((char *)rbuf + recv_offset, rcount, rtype, src, tag, comm, - &rrequest_array[rrequest_count++]); - MPI_Isend((char *)rbuf + send_offset, scount, stype, dst, tag, comm, - &srequest_array[srequest_count++]); + rrequest_array[rrequest_count++] = smpi_mpi_irecv((char *)rbuf + recv_offset, rcount, rtype, src, tag, comm); + srequest_array[srequest_count++] = smpi_mpi_isend((char *)rbuf + send_offset, scount, stype, dst, tag, comm); } } // intra loop @@ -109,14 +106,14 @@ int smpi_coll_tuned_allgather_loosely_lr(void *sbuf, int scount, // wait for inter communication to finish for these rounds (# of round equals NUM_CORE) if (i != inter_comm_size - 1) { - MPI_Wait(&inter_rrequest, &status); + smpi_mpi_wait(&inter_rrequest, &status); } } //inter loop - MPI_Waitall(rrequest_count, rrequest_array, MPI_STATUSES_IGNORE); - MPI_Waitall(srequest_count, srequest_array, MPI_STATUSES_IGNORE); - MPI_Waitall(inter_srequest_count, inter_srequest_array, MPI_STATUSES_IGNORE); + smpi_mpi_waitall(rrequest_count, rrequest_array, MPI_STATUSES_IGNORE); + smpi_mpi_waitall(srequest_count, srequest_array, MPI_STATUSES_IGNORE); + smpi_mpi_waitall(inter_srequest_count, inter_srequest_array, MPI_STATUSES_IGNORE); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allgather-lr.c b/src/smpi/colls/allgather-lr.c index 407cc25755..7656069702 100644 --- a/src/smpi/colls/allgather-lr.c +++ b/src/smpi/colls/allgather-lr.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" // Allgather-Non-Topoloty-Scecific-Logical-Ring algorithm int @@ -12,14 +12,17 @@ smpi_coll_tuned_allgather_lr(void *sbuf, int scount, MPI_Datatype stype, int send_offset, recv_offset; int tag = 500; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); - MPI_Type_extent(rtype, &rextent); - MPI_Type_extent(stype, &sextent); + rank = smpi_comm_rank(comm); + size = smpi_comm_size(comm); + rextent = smpi_datatype_get_extent(rtype); + sextent = smpi_datatype_get_extent(stype); // irregular case use default MPI fucntions - if (scount * sextent != rcount * rextent) - MPI_Allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm); + if (scount * sextent != rcount * rextent) { + XBT_WARN("MPI_allgather_lr use default MPI_allgather."); + smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm); + return MPI_SUCCESS; + } // topo non-specific to = (rank + 1) % size; @@ -27,7 +30,7 @@ smpi_coll_tuned_allgather_lr(void *sbuf, int scount, MPI_Datatype stype, //copy a single segment from sbuf to rbuf send_offset = rank * scount * sextent; - MPI_Sendrecv(sbuf, scount, stype, rank, tag, + smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag, (char *) rbuf + send_offset, rcount, rtype, rank, tag, comm, &status); @@ -36,7 +39,7 @@ smpi_coll_tuned_allgather_lr(void *sbuf, int scount, MPI_Datatype stype, for (i = 0; i < size - 1; i++) { send_offset = ((rank - i + size) % size) * increment; recv_offset = ((rank - i - 1 + size) % size) * increment; - MPI_Sendrecv((char *) rbuf + send_offset, scount, stype, to, tag + i, + smpi_mpi_sendrecv((char *) rbuf + send_offset, scount, stype, to, tag + i, (char *) rbuf + recv_offset, rcount, rtype, from, tag + i, comm, &status); } diff --git a/src/smpi/colls/allgather-pair.c b/src/smpi/colls/allgather-pair.c index a113ea9541..0dc4aec3c3 100644 --- a/src/smpi/colls/allgather-pair.c +++ b/src/smpi/colls/allgather-pair.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** @@ -73,18 +73,18 @@ smpi_coll_tuned_allgather_pair(void *send_buff, int send_count, char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &extent); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + extent = smpi_datatype_get_extent(send_type); // local send/recv - MPI_Sendrecv(send_ptr, send_count, send_type, rank, tag, + smpi_mpi_sendrecv(send_ptr, send_count, send_type, rank, tag, recv_ptr + rank * recv_count * extent, recv_count, recv_type, rank, tag, comm, &status); for (i = 1; i < num_procs; i++) { src = dst = rank ^ i; - MPI_Sendrecv(send_ptr, send_count, send_type, dst, tag, + smpi_mpi_sendrecv(send_ptr, send_count, send_type, dst, tag, recv_ptr + src * recv_count * extent, recv_count, recv_type, src, tag, comm, &status); } diff --git a/src/smpi/colls/allgather-rdb.c b/src/smpi/colls/allgather-rdb.c index fc1b36d621..520819a5f9 100644 --- a/src/smpi/colls/allgather-rdb.c +++ b/src/smpi/colls/allgather-rdb.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" int smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count, @@ -12,7 +12,7 @@ smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count, // local int variables int i, j, k, dst, rank, num_procs, send_offset, recv_offset, tree_root; - int dst_tree_root, rank_tree_root, last_recv_count, num_procs_completed; + int dst_tree_root, rank_tree_root, last_recv_count = 0, num_procs_completed; int offset, tmp_mask; int tag = 1; int mask = 1; @@ -24,19 +24,19 @@ smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count, char *recv_ptr = (char *) rbuf; // get size of the communicator, followed by rank - MPI_Comm_size(comm, &num_procs); - MPI_Comm_rank(comm, &rank); + num_procs = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); // get size of single element's type for send buffer and recv buffer - MPI_Type_extent(send_type, &send_chunk); - MPI_Type_extent(recv_type, &recv_chunk); + send_chunk = smpi_datatype_get_extent(send_type); + recv_chunk = smpi_datatype_get_extent(recv_type); // multiply size of each element by number of elements to send or recv send_chunk *= send_count; recv_chunk *= recv_count; // perform a local copy - MPI_Sendrecv(send_ptr, send_count, send_type, rank, tag, + smpi_mpi_sendrecv(send_ptr, send_count, send_type, rank, tag, recv_ptr + rank * recv_chunk, recv_count, recv_type, rank, tag, comm, &status); @@ -51,10 +51,10 @@ smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count, recv_offset = dst_tree_root * recv_chunk; if (dst < num_procs) { - MPI_Sendrecv(recv_ptr + send_offset, curr_count, send_type, dst, + smpi_mpi_sendrecv(recv_ptr + send_offset, curr_count, send_type, dst, tag, recv_ptr + recv_offset, mask * recv_count, recv_type, dst, tag, comm, &status); - MPI_Get_count(&status, recv_type, &last_recv_count); + last_recv_count = smpi_mpi_get_count(&status, recv_type); curr_count += last_recv_count; } @@ -90,7 +90,7 @@ smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count, if ((dst > rank) && (rank < tree_root + num_procs_completed) && (dst >= tree_root + num_procs_completed)) { - MPI_Send(recv_ptr + offset, last_recv_count, recv_type, dst, + smpi_mpi_send(recv_ptr + offset, last_recv_count, recv_type, dst, tag, comm); /* last_recv_cnt was set in the previous @@ -102,12 +102,12 @@ smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count, else if ((dst < rank) && (dst < tree_root + num_procs_completed) && (rank >= tree_root + num_procs_completed)) { - MPI_Recv(recv_ptr + offset, + smpi_mpi_recv(recv_ptr + offset, recv_count * num_procs_completed, recv_type, dst, tag, comm, &status); // num_procs_completed is also equal to the no. of processes // whose data we don't have - MPI_Get_count(&status, recv_type, &last_recv_count); + last_recv_count = smpi_mpi_get_count(&status, recv_type); curr_count += last_recv_count; } tmp_mask >>= 1; diff --git a/src/smpi/colls/allgather-rhv.c b/src/smpi/colls/allgather-rhv.c index dab0e6ee64..c3db821070 100644 --- a/src/smpi/colls/allgather-rhv.c +++ b/src/smpi/colls/allgather-rhv.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" // now only work with power of two processes @@ -20,20 +20,23 @@ smpi_coll_tuned_allgather_rhv(void *sbuf, int send_count, int curr_count; // get size of the communicator, followed by rank - MPI_Comm_size(comm, &num_procs); - MPI_Comm_rank(comm, &rank); + num_procs = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); // get size of single element's type for send buffer and recv buffer - MPI_Type_extent(send_type, &s_extent); - MPI_Type_extent(recv_type, &r_extent); + s_extent = smpi_datatype_get_extent(send_type); + r_extent = smpi_datatype_get_extent(recv_type); // multiply size of each element by number of elements to send or recv send_chunk = s_extent * send_count; recv_chunk = r_extent * recv_count; - if (send_chunk != recv_chunk) - return MPI_Allgather(sbuf, send_count, send_type, rbuf, recv_count, - recv_type, comm); + if (send_chunk != recv_chunk) { + XBT_WARN("MPI_allgather_rhv use default MPI_allgather."); + smpi_mpi_allgather(sbuf, send_count, send_type, rbuf, recv_count, + recv_type, comm); + return MPI_SUCCESS; + } // compute starting offset location to perform local copy int size = num_procs / 2; @@ -52,7 +55,7 @@ smpi_coll_tuned_allgather_rhv(void *sbuf, int send_count, //perform a remote copy dst = base_offset; - MPI_Sendrecv(sbuf, send_count, send_type, dst, tag, + smpi_mpi_sendrecv(sbuf, send_count, send_type, dst, tag, (char *)rbuf + base_offset * recv_chunk, recv_count, recv_type, dst, tag, comm, &status); @@ -78,7 +81,7 @@ smpi_coll_tuned_allgather_rhv(void *sbuf, int send_count, // printf("node %d send to %d in phase %d s_offset = %d r_offset = %d count = %d\n",rank,dst,phase, send_base_offset, recv_base_offset, curr_count); - MPI_Sendrecv((char *)rbuf + send_offset, curr_count, recv_type, dst, tag, + smpi_mpi_sendrecv((char *)rbuf + send_offset, curr_count, recv_type, dst, tag, (char *)rbuf + recv_offset, curr_count, recv_type, dst, tag, comm, &status); diff --git a/src/smpi/colls/allgather-ring.c b/src/smpi/colls/allgather-ring.c index 9e143ded7b..6108ae5be8 100644 --- a/src/smpi/colls/allgather-ring.c +++ b/src/smpi/colls/allgather-ring.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** @@ -72,19 +72,19 @@ smpi_coll_tuned_allgather_ring(void *send_buff, int send_count, char *sendptr = (char *) send_buff; char *recvptr = (char *) recv_buff; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &extent); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + extent = smpi_datatype_get_extent(send_type); // local send/recv - MPI_Sendrecv(sendptr, send_count, send_type, rank, tag, + smpi_mpi_sendrecv(sendptr, send_count, send_type, rank, tag, recvptr + rank * recv_count * extent, recv_count, recv_type, rank, tag, comm, &status); for (i = 1; i < num_procs; i++) { src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; - MPI_Sendrecv(sendptr, send_count, send_type, dst, tag, + smpi_mpi_sendrecv(sendptr, send_count, send_type, dst, tag, recvptr + src * recv_count * extent, recv_count, recv_type, src, tag, comm, &status); } diff --git a/src/smpi/colls/allgather-smp-simple.c b/src/smpi/colls/allgather-smp-simple.c index c8f0c68d6b..21cab26034 100644 --- a/src/smpi/colls/allgather-smp-simple.c +++ b/src/smpi/colls/allgather-smp-simple.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" #ifndef NUM_CORE #define NUM_CORE 8 #endif @@ -9,11 +9,11 @@ int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, MPI_Comm comm) { int src, dst, comm_size, rank; - MPI_Comm_size(comm, &comm_size); - MPI_Comm_rank(comm, &rank); + comm_size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); MPI_Aint rextent, sextent; - MPI_Type_extent(rtype, &rextent); - MPI_Type_extent(stype, &sextent); + rextent = smpi_datatype_get_extent(rtype); + sextent = smpi_datatype_get_extent(stype); int tag = 50; MPI_Status status; int i, send_offset, recv_offset; @@ -30,7 +30,7 @@ int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, } //INTRA-SMP-ALLGATHER recv_offset = rank * rextent * rcount; - MPI_Sendrecv(send_buf, scount, stype, rank, tag, + smpi_mpi_sendrecv(send_buf, scount, stype, rank, tag, ((char *) recv_buf + recv_offset), rcount, rtype, rank, tag, comm, &status); for (i = 1; i < num_core_in_current_smp; i++) { @@ -43,7 +43,7 @@ int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, (num_core_in_current_smp); recv_offset = src * rextent * rcount; - MPI_Sendrecv(send_buf, scount, stype, dst, tag, + smpi_mpi_sendrecv(send_buf, scount, stype, dst, tag, ((char *) recv_buf + recv_offset), rcount, rtype, src, tag, comm, &status); @@ -57,10 +57,10 @@ int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, if (intra_rank == 0) { MPI_Request *reqs, *req_ptr; int num_req = (inter_comm_size - 1) * 2; - reqs = (MPI_Request *) malloc(num_req * sizeof(MPI_Request)); + reqs = (MPI_Request *) xbt_malloc(num_req * sizeof(MPI_Request)); req_ptr = reqs; MPI_Status *stat; - stat = (MPI_Status *) malloc(num_req * sizeof(MPI_Status)); + stat = (MPI_Status *) xbt_malloc(num_req * sizeof(MPI_Status)); for (i = 1; i < inter_comm_size; i++) { @@ -68,11 +68,11 @@ int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, src = ((inter_rank - i + inter_comm_size) % inter_comm_size) * num_core; //send_offset = (rank * sextent * scount); recv_offset = (src * sextent * scount); - // MPI_Sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag, + // smpi_mpi_sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag, // (recv_buf+recv_offset), (rcount * num_core), rtype, src, tag, comm, &status); //MPIC_Isend((recv_buf+send_offset), (scount * num_core), stype, dst, tag, comm, req_ptr++); - MPI_Irecv(((char *) recv_buf + recv_offset), (rcount * num_core), rtype, - src, tag, comm, req_ptr++); + *(req_ptr++) = smpi_mpi_irecv(((char *) recv_buf + recv_offset), (rcount * num_core), rtype, + src, tag, comm); } for (i = 1; i < inter_comm_size; i++) { @@ -80,13 +80,13 @@ int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, //src = ((inter_rank-i+inter_comm_size)%inter_comm_size) * num_core; send_offset = (rank * sextent * scount); //recv_offset = (src * sextent * scount); - // MPI_Sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag, + // smpi_mpi_sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag, // (recv_buf+recv_offset), (rcount * num_core), rtype, src, tag, comm, &status); - MPI_Isend(((char *) recv_buf + send_offset), (scount * num_core), stype, - dst, tag, comm, req_ptr++); + *(req_ptr++) = smpi_mpi_isend(((char *) recv_buf + send_offset), (scount * num_core), stype, + dst, tag, comm); //MPIC_Irecv((recv_buf+recv_offset), (rcount * num_core), rtype, src, tag, comm, req_ptr++); } - MPI_Waitall(num_req, reqs, stat); + smpi_mpi_waitall(num_req, reqs, stat); free(reqs); free(stat); @@ -96,11 +96,11 @@ int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, if (intra_rank == 0) { for (i = 1; i < num_core_in_current_smp; i++) { //printf("rank = %d, num = %d send to %d\n",rank, num_core_in_current_smp, (rank + i)); - MPI_Send(recv_buf, (scount * comm_size), stype, (rank + i), tag, comm); + smpi_mpi_send(recv_buf, (scount * comm_size), stype, (rank + i), tag, comm); } } else { //printf("rank = %d recv from %d\n",rank, (inter_rank * num_core)); - MPI_Recv(recv_buf, (rcount * comm_size), rtype, (inter_rank * num_core), + smpi_mpi_recv(recv_buf, (rcount * comm_size), rtype, (inter_rank * num_core), tag, comm, &status); } diff --git a/src/smpi/colls/allgather-spreading-simple.c b/src/smpi/colls/allgather-spreading-simple.c index 3b109e4177..822ab5346d 100644 --- a/src/smpi/colls/allgather-spreading-simple.c +++ b/src/smpi/colls/allgather-spreading-simple.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** @@ -74,12 +74,12 @@ smpi_coll_tuned_allgather_spreading_simple(void *send_buff, int send_count, MPI_Status status; char *recv_ptr = (char *) recv_buff; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &extent); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + extent = smpi_datatype_get_extent(send_type); num_reqs = (2 * num_procs) - 2; - reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request)); + reqs = (MPI_Request *) xbt_malloc(num_reqs * sizeof(MPI_Request)); if (!reqs) { printf("allgather-spreading-simple.c:40: cannot allocate memory\n"); MPI_Finalize(); @@ -87,7 +87,7 @@ smpi_coll_tuned_allgather_spreading_simple(void *send_buff, int send_count, } req_ptr = reqs; - MPI_Sendrecv(send_buff, send_count, send_type, rank, tag, + smpi_mpi_sendrecv(send_buff, send_count, send_type, rank, tag, (char *) recv_buff + rank * recv_count * extent, recv_count, recv_type, rank, tag, comm, &status); @@ -95,18 +95,18 @@ smpi_coll_tuned_allgather_spreading_simple(void *send_buff, int send_count, src = (rank + i) % num_procs; if (src == rank) continue; - MPI_Irecv(recv_ptr + src * recv_count * extent, recv_count, recv_type, - src, tag, comm, req_ptr++); + *(req_ptr++) = smpi_mpi_irecv(recv_ptr + src * recv_count * extent, recv_count, recv_type, + src, tag, comm); } for (i = 0; i < num_procs; i++) { dst = (rank + i) % num_procs; if (dst == rank) continue; - MPI_Isend(send_buff, send_count, send_type, dst, tag, comm, req_ptr++); + *(req_ptr++) = smpi_mpi_isend(send_buff, send_count, send_type, dst, tag, comm); } - MPI_Waitall(num_reqs, reqs, MPI_STATUSES_IGNORE); + smpi_mpi_waitall(num_reqs, reqs, MPI_STATUSES_IGNORE); free(reqs); return MPI_SUCCESS; diff --git a/src/smpi/colls/allreduce-NTS.c b/src/smpi/colls/allreduce-NTS.c index 70c6924869..c7e145de61 100644 --- a/src/smpi/colls/allreduce-NTS.c +++ b/src/smpi/colls/allreduce-NTS.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /* IMPLEMENTED BY PITCH PATARASUK Non-topoloty-specific all-reduce operation designed bandwidth optimally */ @@ -20,16 +20,16 @@ smpi_coll_tuned_allreduce_NTS(void *sbuf, void *rbuf, int rcount, int send_offset, recv_offset; int remainder, remainder_flag, remainder_offset; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); + rank = smpi_comm_rank(MPI_COMM_WORLD); + size = smpi_comm_size(MPI_COMM_WORLD); /* make it compatible with all data type */ MPI_Aint extent; - MPI_Type_extent(dtype, &extent); + extent = smpi_datatype_get_extent(dtype); /* when communication size is smaller than number of process (not support) */ if (rcount < size) { - return MPI_Allreduce(sbuf, rbuf, rcount, dtype, op, comm); + return mpi_coll_allreduce_fun(sbuf, rbuf, rcount, dtype, op, comm); } /* when communication size is not divisible by number of process: @@ -56,7 +56,7 @@ smpi_coll_tuned_allreduce_NTS(void *sbuf, void *rbuf, int rcount, // copy partial data send_offset = ((rank - 1 + size) % size) * count * extent; recv_offset = ((rank - 1 + size) % size) * count * extent; - MPI_Sendrecv((char *) sbuf + send_offset, count, dtype, rank, tag - 1, + smpi_mpi_sendrecv((char *) sbuf + send_offset, count, dtype, rank, tag - 1, (char *) rbuf + recv_offset, count, dtype, rank, tag - 1, comm, &status); @@ -64,7 +64,7 @@ smpi_coll_tuned_allreduce_NTS(void *sbuf, void *rbuf, int rcount, for (i = 0; i < (size - 1); i++) { send_offset = ((rank - 1 - i + size) % size) * count * extent; recv_offset = ((rank - 2 - i + size) % size) * count * extent; - MPI_Sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size), + smpi_mpi_sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size), tag + i, (char *) rbuf + recv_offset, count, dtype, ((rank + size - 1) % size), tag + i, comm, &status); @@ -76,7 +76,7 @@ smpi_coll_tuned_allreduce_NTS(void *sbuf, void *rbuf, int rcount, for (i = 0; i < (size - 1); i++) { send_offset = ((rank - i + size) % size) * count * extent; recv_offset = ((rank - 1 - i + size) % size) * count * extent; - MPI_Sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size), + smpi_mpi_sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size), tag + i, (char *) rbuf + recv_offset, count, dtype, ((rank + size - 1) % size), tag + i, comm, &status); } @@ -84,9 +84,11 @@ smpi_coll_tuned_allreduce_NTS(void *sbuf, void *rbuf, int rcount, /* when communication size is not divisible by number of process: call the native implementation for the remain chunk at the end of the operation */ if (remainder_flag) { - return MPI_Allreduce((char *) sbuf + remainder_offset, + XBT_WARN("MPI_allreduce_NTS use default MPI_allreduce."); + smpi_mpi_allreduce((char *) sbuf + remainder_offset, (char *) rbuf + remainder_offset, remainder, dtype, op, comm); + return MPI_SUCCESS; } return MPI_SUCCESS; diff --git a/src/smpi/colls/allreduce-lr.c b/src/smpi/colls/allreduce-lr.c index d4bf82aae8..a7f733690c 100644 --- a/src/smpi/colls/allreduce-lr.c +++ b/src/smpi/colls/allreduce-lr.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /* IMPLEMENTED BY PITCH PATARASUK Non-topoloty-specific all-reduce operation designed bandwidth optimally @@ -23,16 +23,18 @@ smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount, int send_offset, recv_offset; int remainder, remainder_flag, remainder_offset; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); + rank = smpi_comm_rank(MPI_COMM_WORLD); + size = smpi_comm_size(MPI_COMM_WORLD); /* make it compatible with all data type */ MPI_Aint extent; - MPI_Type_extent(dtype, &extent); + extent = smpi_datatype_get_extent(dtype); /* when communication size is smaller than number of process (not support) */ if (rcount < size) { - return MPI_Allreduce(sbuf, rbuf, rcount, dtype, op, comm); + XBT_WARN("MPI_allreduce_lr use default MPI_allreduce."); + smpi_mpi_allreduce(sbuf, rbuf, rcount, dtype, op, comm); + return MPI_SUCCESS; } /* when communication size is not divisible by number of process: @@ -59,7 +61,7 @@ smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount, // copy partial data send_offset = ((rank - 1 + size) % size) * count * extent; recv_offset = ((rank - 1 + size) % size) * count * extent; - MPI_Sendrecv((char *) sbuf + send_offset, count, dtype, rank, tag - 1, + smpi_mpi_sendrecv((char *) sbuf + send_offset, count, dtype, rank, tag - 1, (char *) rbuf + recv_offset, count, dtype, rank, tag - 1, comm, &status); @@ -68,7 +70,7 @@ smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount, send_offset = ((rank - 1 - i + 2 * size) % size) * count * extent; recv_offset = ((rank - 2 - i + 2 * size) % size) * count * extent; // recv_offset = ((rank-i+2*size)%size)*count*extent; - MPI_Sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size), + smpi_mpi_sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size), tag + i, (char *) rbuf + recv_offset, count, dtype, ((rank + size - 1) % size), tag + i, comm, &status); @@ -81,7 +83,7 @@ smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount, for (i = 0; i < (size - 1); i++) { send_offset = ((rank - i + 2 * size) % size) * count * extent; recv_offset = ((rank - 1 - i + 2 * size) % size) * count * extent; - MPI_Sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size), + smpi_mpi_sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size), tag + i, (char *) rbuf + recv_offset, count, dtype, ((rank + size - 1) % size), tag + i, comm, &status); } @@ -89,7 +91,7 @@ smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount, /* when communication size is not divisible by number of process: call the native implementation for the remain chunk at the end of the operation */ if (remainder_flag) { - return MPI_Allreduce((char *) sbuf + remainder_offset, + return mpi_coll_allreduce_fun((char *) sbuf + remainder_offset, (char *) rbuf + remainder_offset, remainder, dtype, op, comm); } diff --git a/src/smpi/colls/allreduce-rab-rdb.c b/src/smpi/colls/allreduce-rab-rdb.c index 2c7b49bf92..6d49e711da 100644 --- a/src/smpi/colls/allreduce-rab-rdb.c +++ b/src/smpi/colls/allreduce-rab-rdb.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count, MPI_Datatype dtype, MPI_Op op, @@ -20,15 +20,15 @@ int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count, uop = op_ptr->op; #endif - MPI_Comm_size(comm, &nprocs); - MPI_Comm_rank(comm, &rank); + nprocs = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); - MPI_Type_extent(dtype, &extent); + extent = smpi_datatype_get_extent(dtype); tmp_buf = (void *) xbt_malloc(count * extent); MPIR_Localcopy(sbuff, count, dtype, rbuff, count, dtype); - MPI_Type_size(dtype, &type_size); + type_size = smpi_datatype_size(dtype); // find nearest power-of-two less than or equal to comm_size pof2 = 1; @@ -48,7 +48,7 @@ int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count, // even if (rank % 2 == 0) { - MPI_Send(rbuff, count, dtype, rank + 1, tag, comm); + smpi_mpi_send(rbuff, count, dtype, rank + 1, tag, comm); // temporarily set the rank to -1 so that this // process does not pariticipate in recursive @@ -56,7 +56,7 @@ int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count, newrank = -1; } else // odd { - MPI_Recv(tmp_buf, count, dtype, rank - 1, tag, comm, &status); + smpi_mpi_recv(tmp_buf, count, dtype, rank - 1, tag, comm, &status); // do the reduction on received data. since the // ordering is right, it doesn't matter whether // the operation is commutative or not. @@ -84,8 +84,8 @@ int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count, // reduce-scatter, calculate the count that each process receives // and the displacement within the buffer - cnts = (int *) malloc(pof2 * sizeof(int)); - disps = (int *) malloc(pof2 * sizeof(int)); + cnts = (int *) xbt_malloc(pof2 * sizeof(int)); + disps = (int *) xbt_malloc(pof2 * sizeof(int)); for (i = 0; i < (pof2 - 1); i++) cnts[i] = count / pof2; @@ -119,7 +119,7 @@ int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count, } // Send data from recvbuf. Recv into tmp_buf - MPI_Sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt, + smpi_mpi_sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt, dtype, dst, tag, (char *) tmp_buf + disps[recv_idx] * extent, recv_cnt, dtype, dst, tag, comm, &status); @@ -169,7 +169,7 @@ int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count, recv_cnt += cnts[i]; } - MPI_Sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt, + smpi_mpi_sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt, dtype, dst, tag, (char *) rbuff + disps[recv_idx] * extent, recv_cnt, dtype, dst, tag, comm, &status); @@ -190,9 +190,9 @@ int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count, if (rank < 2 * rem) { if (rank % 2) // odd - MPI_Send(rbuff, count, dtype, rank - 1, tag, comm); + smpi_mpi_send(rbuff, count, dtype, rank - 1, tag, comm); else // even - MPI_Recv(rbuff, count, dtype, rank + 1, tag, comm, &status); + smpi_mpi_recv(rbuff, count, dtype, rank + 1, tag, comm, &status); } free(tmp_buf); diff --git a/src/smpi/colls/allreduce-rab-reduce-scatter.c b/src/smpi/colls/allreduce-rab-reduce-scatter.c index 29b38660cc..5650ef233d 100755 --- a/src/smpi/colls/allreduce-rab-reduce-scatter.c +++ b/src/smpi/colls/allreduce-rab-reduce-scatter.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" #ifndef REDUCE_STUFF #define REDUCE_STUFF /***************************************************************************** @@ -341,15 +341,15 @@ int smpi_coll_tuned_allreduce_rab_reduce_scatter(void *sbuff, void *rbuff, MPI_Status status; void *tmp_buf = NULL; MPI_User_function *func = get_op_func(op); - MPI_Comm_size(comm, &nprocs); - MPI_Comm_rank(comm, &rank); + nprocs = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); - MPI_Type_extent(dtype, &extent); + extent = smpi_datatype_get_extent(dtype); tmp_buf = (void *) xbt_malloc(count * extent); MPIR_Localcopy(sbuff, count, dtype, rbuff, count, dtype); - MPI_Type_size(dtype, &type_size); + type_size = smpi_datatype_size(dtype); // find nearest power-of-two less than or equal to comm_size pof2 = 1; @@ -405,8 +405,8 @@ int smpi_coll_tuned_allreduce_rab_reduce_scatter(void *sbuff, void *rbuff, // reduce-scatter, calculate the count that each process receives // and the displacement within the buffer - cnts = (int *) malloc(pof2 * sizeof(int)); - disps = (int *) malloc(pof2 * sizeof(int)); + cnts = (int *) xbt_malloc(pof2 * sizeof(int)); + disps = (int *) xbt_malloc(pof2 * sizeof(int)); for (i = 0; i < (pof2 - 1); i++) cnts[i] = count / pof2; diff --git a/src/smpi/colls/allreduce-rab-rsag.c b/src/smpi/colls/allreduce-rab-rsag.c index 54149d4db4..268ac8b80d 100644 --- a/src/smpi/colls/allreduce-rab-rsag.c +++ b/src/smpi/colls/allreduce-rab-rsag.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" //#include int smpi_coll_tuned_allreduce_rab_rsag(void *sbuff, void *rbuff, int count, @@ -11,16 +11,16 @@ int smpi_coll_tuned_allreduce_rab_rsag(void *sbuff, void *rbuff, int count, MPI_Aint extent; MPI_Status status; void *tmp_buf = NULL; - MPI_Comm_size(comm, &nprocs); - MPI_Comm_rank(comm, &rank); + nprocs = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); - MPI_Type_extent(dtype, &extent); + extent = smpi_datatype_get_extent(dtype); tmp_buf = (void *) xbt_malloc(count * extent); - MPI_Sendrecv(sbuff, count, dtype, rank, tag, rbuff, count, dtype, rank, tag, + smpi_mpi_sendrecv(sbuff, count, dtype, rank, tag, rbuff, count, dtype, rank, tag, comm, &status); - MPI_Type_size(dtype, &type_size); + type_size = smpi_datatype_size(dtype); // find nearest power-of-two less than or equal to comm_size pof2 = 1; @@ -40,7 +40,7 @@ int smpi_coll_tuned_allreduce_rab_rsag(void *sbuff, void *rbuff, int count, // even if (rank % 2 == 0) { - MPI_Send(rbuff, count, dtype, rank + 1, tag, comm); + smpi_mpi_send(rbuff, count, dtype, rank + 1, tag, comm); // temporarily set the rank to -1 so that this // process does not pariticipate in recursive @@ -48,7 +48,7 @@ int smpi_coll_tuned_allreduce_rab_rsag(void *sbuff, void *rbuff, int count, newrank = -1; } else // odd { - MPI_Recv(tmp_buf, count, dtype, rank - 1, tag, comm, &status); + smpi_mpi_recv(tmp_buf, count, dtype, rank - 1, tag, comm, &status); // do the reduction on received data. since the // ordering is right, it doesn't matter whether // the operation is commutative or not. @@ -76,8 +76,8 @@ int smpi_coll_tuned_allreduce_rab_rsag(void *sbuff, void *rbuff, int count, // reduce-scatter, calculate the count that each process receives // and the displacement within the buffer - cnts = (int *) malloc(pof2 * sizeof(int)); - disps = (int *) malloc(pof2 * sizeof(int)); + cnts = (int *) xbt_malloc(pof2 * sizeof(int)); + disps = (int *) xbt_malloc(pof2 * sizeof(int)); for (i = 0; i < (pof2 - 1); i++) cnts[i] = count / pof2; @@ -111,7 +111,7 @@ int smpi_coll_tuned_allreduce_rab_rsag(void *sbuff, void *rbuff, int count, } // Send data from recvbuf. Recv into tmp_buf - MPI_Sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt, + smpi_mpi_sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt, dtype, dst, tag, (char *) tmp_buf + disps[recv_idx] * extent, recv_cnt, dtype, dst, tag, comm, &status); @@ -162,7 +162,7 @@ int smpi_coll_tuned_allreduce_rab_rsag(void *sbuff, void *rbuff, int count, recv_cnt += cnts[i]; } - MPI_Sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt, + smpi_mpi_sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt, dtype, dst, tag, (char *) rbuff + disps[recv_idx] * extent, recv_cnt, dtype, dst, tag, comm, &status); @@ -183,9 +183,9 @@ int smpi_coll_tuned_allreduce_rab_rsag(void *sbuff, void *rbuff, int count, if (rank < 2 * rem) { if (rank % 2) // odd - MPI_Send(rbuff, count, dtype, rank - 1, tag, comm); + smpi_mpi_send(rbuff, count, dtype, rank - 1, tag, comm); else // even - MPI_Recv(rbuff, count, dtype, rank + 1, tag, comm, &status); + smpi_mpi_recv(rbuff, count, dtype, rank + 1, tag, comm, &status); } free(tmp_buf); diff --git a/src/smpi/colls/allreduce-rab1.c b/src/smpi/colls/allreduce-rab1.c index ec6afc74e5..e57ac2fcd0 100644 --- a/src/smpi/colls/allreduce-rab1.c +++ b/src/smpi/colls/allreduce-rab1.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" //#include // NP pow of 2 for now @@ -13,10 +13,10 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, void *recv, *tmp_buf; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &nprocs); + rank = smpi_comm_rank(comm); + nprocs = smpi_comm_size(comm); - MPI_Type_extent(dtype, &extent); + extent = smpi_datatype_get_extent(dtype); pof2 = 1; while (pof2 <= nprocs) @@ -31,8 +31,8 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, send_size = (count + nprocs) / nprocs; newcnt = send_size * nprocs; - recv = (void *) malloc(extent * newcnt); - tmp_buf = (void *) malloc(extent * newcnt); + recv = (void *) xbt_malloc(extent * newcnt); + tmp_buf = (void *) xbt_malloc(extent * newcnt); memcpy(recv, sbuff, extent * count); @@ -47,7 +47,7 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, else recv_idx = send_idx + (mask * share); - MPI_Sendrecv((char *) recv + send_idx * extent, send_cnt, dtype, dst, tag, + smpi_mpi_sendrecv((char *) recv + send_idx * extent, send_cnt, dtype, dst, tag, tmp_buf, recv_cnt, dtype, dst, tag, comm, &status); star_reduction(op, tmp_buf, (char *) recv + recv_idx * extent, &recv_cnt, @@ -59,7 +59,7 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, } memcpy(tmp_buf, (char *) recv + recv_idx * extent, recv_cnt * extent); - MPI_Allgather(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm); + mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm); memcpy(rbuff, recv, count * extent); free(recv); @@ -68,7 +68,7 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, } else { - tmp_buf = (void *) malloc(extent * count); + tmp_buf = (void *) xbt_malloc(extent * count); memcpy(rbuff, sbuff, count * extent); mask = pof2 / 2; share = count / pof2; @@ -81,7 +81,7 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, else recv_idx = send_idx + (mask * share); - MPI_Sendrecv((char *) rbuff + send_idx * extent, send_cnt, dtype, dst, + smpi_mpi_sendrecv((char *) rbuff + send_idx * extent, send_cnt, dtype, dst, tag, tmp_buf, recv_cnt, dtype, dst, tag, comm, &status); star_reduction(op, tmp_buf, (char *) rbuff + recv_idx * extent, &recv_cnt, @@ -93,7 +93,7 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, } memcpy(tmp_buf, (char *) rbuff + recv_idx * extent, recv_cnt * extent); - MPI_Allgather(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm); + mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm); free(tmp_buf); } diff --git a/src/smpi/colls/allreduce-rab2.c b/src/smpi/colls/allreduce-rab2.c index b3c2c74eac..ca1dee9147 100644 --- a/src/smpi/colls/allreduce-rab2.c +++ b/src/smpi/colls/allreduce-rab2.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" //#include // this requires that count >= NP @@ -20,11 +20,11 @@ int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff, uop = op_ptr->op; #endif */ - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &nprocs); + rank = smpi_comm_rank(comm); + nprocs = smpi_comm_size(comm); - MPI_Type_extent(dtype, &s_extent); + s_extent = smpi_datatype_get_extent(dtype); // uneven count if (count % nprocs) { @@ -34,20 +34,20 @@ int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff, send_size = (count + nprocs) / nprocs; nbytes = send_size * s_extent; - send = (void *) malloc(s_extent * send_size * nprocs); - recv = (void *) malloc(s_extent * send_size * nprocs); - tmp = (void *) malloc(nbytes); + send = (void *) xbt_malloc(s_extent * send_size * nprocs); + recv = (void *) xbt_malloc(s_extent * send_size * nprocs); + tmp = (void *) xbt_malloc(nbytes); memcpy(send, sbuff, s_extent * count); - MPI_Alltoall(send, send_size, dtype, recv, send_size, dtype, comm); + mpi_coll_alltoall_fun(send, send_size, dtype, recv, send_size, dtype, comm); memcpy(tmp, recv, nbytes); for (i = 1, s_offset = nbytes; i < nprocs; i++, s_offset = i * nbytes) star_reduction(op, (char *) recv + s_offset, tmp, &send_size, &dtype); - MPI_Allgather(tmp, send_size, dtype, recv, send_size, dtype, comm); + mpi_coll_allgather_fun(tmp, send_size, dtype, recv, send_size, dtype, comm); memcpy(rbuff, recv, count * s_extent); free(recv); @@ -59,9 +59,9 @@ int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff, nbytes = send_size * s_extent; r_offset = rank * nbytes; - recv = (void *) malloc(s_extent * send_size * nprocs); + recv = (void *) xbt_malloc(s_extent * send_size * nprocs); - MPI_Alltoall(send, send_size, dtype, recv, send_size, dtype, comm); + mpi_coll_alltoall_fun(send, send_size, dtype, recv, send_size, dtype, comm); memcpy((char *) rbuff + r_offset, recv, nbytes); @@ -69,7 +69,7 @@ int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff, star_reduction(op, (char *) recv + s_offset, (char *) rbuff + r_offset, &send_size, &dtype); - MPI_Allgather((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size, + mpi_coll_allgather_fun((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size, dtype, comm); free(recv); } diff --git a/src/smpi/colls/allreduce-rdb.c b/src/smpi/colls/allreduce-rdb.c index 204de0291f..bb9c82e3d1 100644 --- a/src/smpi/colls/allreduce-rdb.c +++ b/src/smpi/colls/allreduce-rdb.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" //#include int smpi_coll_tuned_allreduce_rdb(void *sbuff, void *rbuff, int count, diff --git a/src/smpi/colls/allreduce-redbcast.c b/src/smpi/colls/allreduce-redbcast.c index 88d35617ee..69c552dfe2 100644 --- a/src/smpi/colls/allreduce-redbcast.c +++ b/src/smpi/colls/allreduce-redbcast.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" int smpi_coll_tuned_allreduce_redbcast(void *buf, void *buf2, int count, MPI_Datatype datatype, MPI_Op op, diff --git a/src/smpi/colls/allreduce-smp-binomial-pipeline.c b/src/smpi/colls/allreduce-smp-binomial-pipeline.c index b5efc1eb29..05ecfd6cf8 100644 --- a/src/smpi/colls/allreduce-smp-binomial-pipeline.c +++ b/src/smpi/colls/allreduce-smp-binomial-pipeline.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /* IMPLEMENTED BY PITCH PATARASUK Non-topoloty-specific (however, number of cores/node need to be changed) all-reduce operation designed for smp clusters @@ -68,11 +68,11 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, uop = MPIR_Op_table[op % 16 - 1]; #endif - MPI_Comm_size(comm, &comm_size); - MPI_Comm_rank(comm, &rank); + comm_size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); MPI_Aint extent; - MPI_Type_extent(dtype, &extent); - tmp_buf = (void *) malloc(count * extent); + extent = smpi_datatype_get_extent(dtype); + tmp_buf = (void *) xbt_malloc(count * extent); int intra_rank, inter_rank; intra_rank = rank % num_core; @@ -91,7 +91,7 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, int inter_comm_size = (comm_size + num_core - 1) / num_core; /* copy input buffer to output buffer */ - MPI_Sendrecv(send_buf, count, dtype, rank, tag, + smpi_mpi_sendrecv(send_buf, count, dtype, rank, tag, recv_buf, count, dtype, rank, tag, comm, &status); /* compute pipe length */ @@ -110,13 +110,13 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, src = (inter_rank * num_core) + (intra_rank | mask); if (src < comm_size) { recv_offset = phase * pcount * extent; - MPI_Recv(tmp_buf, pcount, dtype, src, tag, comm, &status); + smpi_mpi_recv(tmp_buf, pcount, dtype, src, tag, comm, &status); (*uop) (tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype); } } else { send_offset = phase * pcount * extent; dst = (inter_rank * num_core) + (intra_rank & (~mask)); - MPI_Send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm); + smpi_mpi_send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm); break; } mask <<= 1; @@ -134,13 +134,13 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, src = (inter_rank | mask) * num_core; if (src < comm_size) { recv_offset = (phase - 1) * pcount * extent; - MPI_Recv(tmp_buf, pcount, dtype, src, tag, comm, &status); + smpi_mpi_recv(tmp_buf, pcount, dtype, src, tag, comm, &status); (*uop) (tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype); } } else { dst = (inter_rank & (~mask)) * num_core; send_offset = (phase - 1) * pcount * extent; - MPI_Send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm); + smpi_mpi_send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm); break; } mask <<= 1; @@ -157,7 +157,7 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, if (inter_rank & mask) { src = (inter_rank - mask) * num_core; recv_offset = (phase - 2) * pcount * extent; - MPI_Recv((char *)recv_buf + recv_offset, pcount, dtype, src, tag, comm, + smpi_mpi_recv((char *)recv_buf + recv_offset, pcount, dtype, src, tag, comm, &status); break; } @@ -171,7 +171,7 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, if (dst < comm_size) { //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); send_offset = (phase - 2) * pcount * extent; - MPI_Send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm); + smpi_mpi_send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm); } } mask >>= 1; @@ -190,7 +190,7 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, if (intra_rank & mask) { src = (inter_rank * num_core) + (intra_rank - mask); recv_offset = (phase - 3) * pcount * extent; - MPI_Recv((char *)recv_buf + recv_offset, pcount, dtype, src, tag, comm, + smpi_mpi_recv((char *)recv_buf + recv_offset, pcount, dtype, src, tag, comm, &status); break; } @@ -202,7 +202,7 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, dst = (inter_rank * num_core) + (intra_rank + mask); if (dst < comm_size) { send_offset = (phase - 3) * pcount * extent; - MPI_Send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm); + smpi_mpi_send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm); } mask >>= 1; } diff --git a/src/smpi/colls/allreduce-smp-binomial.c b/src/smpi/colls/allreduce-smp-binomial.c index c73a6c6dab..49bf0ab49c 100644 --- a/src/smpi/colls/allreduce-smp-binomial.c +++ b/src/smpi/colls/allreduce-smp-binomial.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /* IMPLEMENTED BY PITCH PATARASUK Non-topoloty-specific (however, number of cores/node need to be changed) all-reduce operation designed for smp clusters @@ -51,7 +51,7 @@ int smpi_coll_tuned_allreduce_smp_binomial(void *send_buf, void *recv_buf, rank=smpi_comm_rank(comm); MPI_Aint extent, lb; smpi_datatype_extent(dtype, &lb, &extent); - tmp_buf = (void *) malloc(count * extent); + tmp_buf = (void *) xbt_malloc(count * extent); /* compute intra and inter ranking */ int intra_rank, inter_rank; diff --git a/src/smpi/colls/allreduce-smp-rdb.c b/src/smpi/colls/allreduce-smp-rdb.c index 2bdfacdd67..ee6037917e 100644 --- a/src/smpi/colls/allreduce-smp-rdb.c +++ b/src/smpi/colls/allreduce-smp-rdb.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /* IMPLEMENTED BY PITCH PATARASUK Non-topoloty-specific (however, number of cores/node need to be changed) all-reduce operation designed for smp clusters @@ -45,11 +45,11 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, uop = op_ptr->op; #endif */ - MPI_Comm_size(comm, &comm_size); - MPI_Comm_rank(comm, &rank); + comm_size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); MPI_Aint extent; - MPI_Type_extent(dtype, &extent); - tmp_buf = (void *) malloc(count * extent); + extent = smpi_datatype_get_extent(dtype); + tmp_buf = (void *) xbt_malloc(count * extent); /* compute intra and inter ranking */ int intra_rank, inter_rank; @@ -61,7 +61,7 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, int inter_comm_size = (comm_size + num_core - 1) / num_core; /* copy input buffer to output buffer */ - MPI_Sendrecv(send_buf, count, dtype, rank, tag, + smpi_mpi_sendrecv(send_buf, count, dtype, rank, tag, recv_buf, count, dtype, rank, tag, comm, &status); /* start binomial reduce intra communication inside each SMP node */ @@ -70,12 +70,12 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, if ((mask & intra_rank) == 0) { src = (inter_rank * num_core) + (intra_rank | mask); if (src < comm_size) { - MPI_Recv(tmp_buf, count, dtype, src, tag, comm, &status); + smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); star_reduction(op, tmp_buf, recv_buf, &count, &dtype); } } else { dst = (inter_rank * num_core) + (intra_rank & (~mask)); - MPI_Send(recv_buf, count, dtype, dst, tag, comm); + smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); break; } mask <<= 1; @@ -103,11 +103,11 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, if (inter_rank < 2 * rem) { if (inter_rank % 2 == 0) { dst = rank + num_core; - MPI_Send(recv_buf, count, dtype, dst, tag, comm); + smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); newrank = -1; } else { src = rank - num_core; - MPI_Recv(tmp_buf, count, dtype, src, tag, comm, &status); + smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); star_reduction(op, tmp_buf, recv_buf, &count, &dtype); newrank = inter_rank / 2; } @@ -132,7 +132,7 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, dst *= num_core; /* exchange data in rdb manner */ - MPI_Sendrecv(recv_buf, count, dtype, dst, tag, tmp_buf, count, dtype, + smpi_mpi_sendrecv(recv_buf, count, dtype, dst, tag, tmp_buf, count, dtype, dst, tag, comm, &status); star_reduction(op, tmp_buf, recv_buf, &count, &dtype); mask <<= 1; @@ -144,9 +144,9 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, */ if (inter_rank < 2 * rem) { if (inter_rank % 2) { - MPI_Send(recv_buf, count, dtype, rank - num_core, tag, comm); + smpi_mpi_send(recv_buf, count, dtype, rank - num_core, tag, comm); } else { - MPI_Recv(recv_buf, count, dtype, rank + num_core, tag, comm, &status); + smpi_mpi_recv(recv_buf, count, dtype, rank + num_core, tag, comm, &status); } } } @@ -160,7 +160,7 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, while (mask < num_core_in_current_smp) { if (intra_rank & mask) { src = (inter_rank * num_core) + (intra_rank - mask); - MPI_Recv(recv_buf, count, dtype, src, tag, comm, &status); + smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status); break; } mask <<= 1; @@ -170,7 +170,7 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, while (mask > 0) { dst = (inter_rank * num_core) + (intra_rank + mask); if (dst < comm_size) { - MPI_Send(recv_buf, count, dtype, dst, tag, comm); + smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); } mask >>= 1; } diff --git a/src/smpi/colls/allreduce-smp-rsag-lr.c b/src/smpi/colls/allreduce-smp-rsag-lr.c index 713ae314e7..dba0a065ff 100644 --- a/src/smpi/colls/allreduce-smp-rsag-lr.c +++ b/src/smpi/colls/allreduce-smp-rsag-lr.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" //#include /* change number of core per smp-node @@ -34,11 +34,11 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, uop = op_ptr->op; #endif */ - MPI_Comm_size(comm, &comm_size); - MPI_Comm_rank(comm, &rank); + comm_size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); MPI_Aint extent; - MPI_Type_extent(dtype, &extent); - tmp_buf = (void *) malloc(count * extent); + extent = smpi_datatype_get_extent(dtype); + tmp_buf = (void *) xbt_malloc(count * extent); int intra_rank, inter_rank; intra_rank = rank % num_core; @@ -54,7 +54,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, } - MPI_Sendrecv(send_buf, count, dtype, rank, tag, + smpi_mpi_sendrecv(send_buf, count, dtype, rank, tag, recv_buf, count, dtype, rank, tag, comm, &status); @@ -65,14 +65,14 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, src = (inter_rank * num_core) + (intra_rank | mask); // if (src < ((inter_rank + 1) * num_core)) { if (src < comm_size) { - MPI_Recv(tmp_buf, count, dtype, src, tag, comm, &status); + smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); star_reduction(op, tmp_buf, recv_buf, &count, &dtype); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); } } else { dst = (inter_rank * num_core) + (intra_rank & (~mask)); - MPI_Send(recv_buf, count, dtype, dst, tag, comm); + smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); break; } @@ -119,7 +119,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, else recv_count = curr_size + curr_remainder; - MPI_Sendrecv((char *) recv_buf + send_offset, send_count, dtype, to, + smpi_mpi_sendrecv((char *) recv_buf + send_offset, send_count, dtype, to, tag + i, tmp_buf, recv_count, dtype, from, tag + i, comm, &status); @@ -149,7 +149,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, else recv_count = curr_size + curr_remainder; - MPI_Sendrecv((char *) recv_buf + send_offset, send_count, dtype, to, + smpi_mpi_sendrecv((char *) recv_buf + send_offset, send_count, dtype, to, tag + i, (char *) recv_buf + recv_offset, recv_count, dtype, from, tag + i, comm, &status); @@ -169,14 +169,14 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, if ((mask & inter_rank) == 0) { src = (inter_rank | mask) * num_core; if (src < comm_size) { - MPI_Recv(tmp_buf, count, dtype, src, tag, comm, &status); + smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); (* uop) (tmp_buf, recv_buf, &count, &dtype); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); } } else { dst = (inter_rank & (~mask)) * num_core; - MPI_Send(recv_buf, count, dtype, dst, tag, comm); + smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); break; } @@ -195,7 +195,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, if (inter_rank & mask) { src = (inter_rank - mask) * num_core; //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); - MPI_Recv(recv_buf, count, dtype, src, tag, comm, &status); + smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status); break; } mask <<= 1; @@ -209,7 +209,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, dst = (inter_rank + mask) * num_core; if (dst < comm_size) { //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); - MPI_Send(recv_buf, count, dtype, dst, tag, comm); + smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); } } mask >>= 1; @@ -230,7 +230,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, if (intra_rank & mask) { src = (inter_rank * num_core) + (intra_rank - mask); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); - MPI_Recv(recv_buf, count, dtype, src, tag, comm, &status); + smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status); break; } mask <<= 1; @@ -243,7 +243,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, dst = (inter_rank * num_core) + (intra_rank + mask); if (dst < comm_size) { //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); - MPI_Send(recv_buf, count, dtype, dst, tag, comm); + smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); } mask >>= 1; } diff --git a/src/smpi/colls/allreduce-smp-rsag-rab.c b/src/smpi/colls/allreduce-smp-rsag-rab.c index 8ccb479e18..af6871acaf 100644 --- a/src/smpi/colls/allreduce-smp-rsag-rab.c +++ b/src/smpi/colls/allreduce-smp-rsag-rab.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /* * implemented by Pitch Patarasuk, 07/01/2007 */ @@ -28,11 +28,11 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, MPI_Status status; int num_core = NUM_CORE; - MPI_Comm_size(comm, &comm_size); - MPI_Comm_rank(comm, &rank); + comm_size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); MPI_Aint extent; - MPI_Type_extent(dtype, &extent); - tmp_buf = (void *) malloc(count * extent); + extent = smpi_datatype_get_extent(dtype); + tmp_buf = (void *) xbt_malloc(count * extent); int intra_rank, inter_rank; intra_rank = rank % num_core; @@ -42,7 +42,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, int inter_comm_size = (comm_size + num_core - 1) / num_core; - MPI_Sendrecv(sbuf, count, dtype, rank, tag, + smpi_mpi_sendrecv(sbuf, count, dtype, rank, tag, rbuf, count, dtype, rank, tag, comm, &status); // SMP_binomial_reduce @@ -52,14 +52,14 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, src = (inter_rank * num_core) + (intra_rank | mask); // if (src < ((inter_rank + 1) * num_core)) { if (src < comm_size) { - MPI_Recv(tmp_buf, count, dtype, src, tag, comm, &status); + smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); star_reduction(op, tmp_buf, rbuf, &count, &dtype); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); } } else { dst = (inter_rank * num_core) + (intra_rank & (~mask)); - MPI_Send(rbuf, count, dtype, dst, tag, comm); + smpi_mpi_send(rbuf, count, dtype, dst, tag, comm); //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); break; } @@ -108,7 +108,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, // if (rank==7) // printf("node %d send to %d in phase %d s_offset = %d r_offset = %d count = %d\n",rank,dst,phase, send_offset, recv_offset, curr_count); - MPI_Sendrecv((char *)rbuf + send_offset, curr_count, dtype, (dst * num_core), tag, + smpi_mpi_sendrecv((char *)rbuf + send_offset, curr_count, dtype, (dst * num_core), tag, tmp_buf, curr_count, dtype, (dst * num_core), tag, comm, &status); @@ -155,7 +155,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, // if (rank==7) //printf("node %d send to %d in phase %d s_offset = %d r_offset = %d count = %d\n",rank,dst,phase, send_offset, recv_offset, curr_count); - MPI_Sendrecv((char *)rbuf + send_offset, curr_count, dtype, (dst * num_core), tag, + smpi_mpi_sendrecv((char *)rbuf + send_offset, curr_count, dtype, (dst * num_core), tag, (char *)rbuf + recv_offset, curr_count, dtype, (dst * num_core), tag, comm, &status); @@ -181,7 +181,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, if (intra_rank & mask) { src = (inter_rank * num_core) + (intra_rank - mask); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); - MPI_Recv(rbuf, count, dtype, src, tag, comm, &status); + smpi_mpi_recv(rbuf, count, dtype, src, tag, comm, &status); break; } mask <<= 1; @@ -194,7 +194,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, dst = (inter_rank * num_core) + (intra_rank + mask); if (dst < comm_size) { //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); - MPI_Send(rbuf, count, dtype, dst, tag, comm); + smpi_mpi_send(rbuf, count, dtype, dst, tag, comm); } mask >>= 1; } diff --git a/src/smpi/colls/allreduce-smp-rsag.c b/src/smpi/colls/allreduce-smp-rsag.c index a40f18979d..53d1a7f410 100644 --- a/src/smpi/colls/allreduce-smp-rsag.c +++ b/src/smpi/colls/allreduce-smp-rsag.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /* change number of core per smp-node we assume that number of core per process will be the same for all implementations */ @@ -33,11 +33,11 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, uop = op_ptr->op; #endif */ - MPI_Comm_size(comm, &comm_size); - MPI_Comm_rank(comm, &rank); + comm_size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); MPI_Aint extent; - MPI_Type_extent(dtype, &extent); - tmp_buf = (void *) malloc(count * extent); + extent = smpi_datatype_get_extent(dtype); + tmp_buf = (void *) xbt_malloc(count * extent); int intra_rank, inter_rank; intra_rank = rank % num_core; @@ -53,7 +53,7 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, } - MPI_Sendrecv(send_buf, count, dtype, rank, tag, + smpi_mpi_sendrecv(send_buf, count, dtype, rank, tag, recv_buf, count, dtype, rank, tag, comm, &status); @@ -64,14 +64,14 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, src = (inter_rank * num_core) + (intra_rank | mask); // if (src < ((inter_rank + 1) * num_core)) { if (src < comm_size) { - MPI_Recv(tmp_buf, count, dtype, src, tag, comm, &status); + smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); star_reduction(op, tmp_buf, recv_buf, &count, &dtype); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); } } else { dst = (inter_rank * num_core) + (intra_rank & (~mask)); - MPI_Send(recv_buf, count, dtype, dst, tag, comm); + smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); break; } @@ -100,7 +100,7 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, ((inter_rank - 2 - i + inter_comm_size) % inter_comm_size) * seg_count * extent; - MPI_Sendrecv((char *) recv_buf + send_offset, seg_count, dtype, to, + smpi_mpi_sendrecv((char *) recv_buf + send_offset, seg_count, dtype, to, tag + i, tmp_buf, seg_count, dtype, from, tag + i, comm, &status); @@ -119,7 +119,7 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, ((inter_rank - 1 - i + inter_comm_size) % inter_comm_size) * seg_count * extent; - MPI_Sendrecv((char *) recv_buf + send_offset, seg_count, dtype, to, + smpi_mpi_sendrecv((char *) recv_buf + send_offset, seg_count, dtype, to, tag + i, (char *) recv_buf + recv_offset, seg_count, dtype, from, tag + i, comm, &status); @@ -139,14 +139,14 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, if ((mask & inter_rank) == 0) { src = (inter_rank | mask) * num_core; if (src < comm_size) { - MPI_Recv(tmp_buf, count, dtype, src, tag, comm, &status); + smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); (* uop) (tmp_buf, recv_buf, &count, &dtype); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); } } else { dst = (inter_rank & (~mask)) * num_core; - MPI_Send(recv_buf, count, dtype, dst, tag, comm); + smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); break; } @@ -165,7 +165,7 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, if (inter_rank & mask) { src = (inter_rank - mask) * num_core; //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); - MPI_Recv(recv_buf, count, dtype, src, tag, comm, &status); + smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status); break; } mask <<= 1; @@ -179,7 +179,7 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, dst = (inter_rank + mask) * num_core; if (dst < comm_size) { //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); - MPI_Send(recv_buf, count, dtype, dst, tag, comm); + smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); } } mask >>= 1; @@ -200,7 +200,7 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, if (intra_rank & mask) { src = (inter_rank * num_core) + (intra_rank - mask); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); - MPI_Recv(recv_buf, count, dtype, src, tag, comm, &status); + smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status); break; } mask <<= 1; @@ -213,7 +213,7 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, dst = (inter_rank * num_core) + (intra_rank + mask); if (dst < comm_size) { //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); - MPI_Send(recv_buf, count, dtype, dst, tag, comm); + smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); } mask >>= 1; } diff --git a/src/smpi/colls/alltoall-2dmesh.c b/src/smpi/colls/alltoall-2dmesh.c index 644e6b9962..61ea21efe8 100644 --- a/src/smpi/colls/alltoall-2dmesh.c +++ b/src/smpi/colls/alltoall-2dmesh.c @@ -1,9 +1,6 @@ -#include "colls.h" +#include "colls_private.h" #include -XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_colls, smpi, - "Logging specific to SMPI collectives"); - /***************************************************************************** * Function: alltoall_2dmesh_shoot @@ -65,9 +62,9 @@ int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, int my_row_base, my_col_base, src_row_base, block_size; int tag = 1; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &extent); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + extent = smpi_datatype_get_extent(send_type); if (!alltoall_check_is_2dmesh(num_procs, &X, &Y)) return MPI_ERR_OTHER; @@ -77,33 +74,15 @@ int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, block_size = extent * send_count; - tmp_buff1 = (char *) malloc(block_size * num_procs * Y); - if (!tmp_buff1) { - XBT_DEBUG("alltoall-2dmesh_shoot.c:88: cannot allocate memory"); - MPI_Finalize(); - exit(MPI_ERR_OTHER); - } - - tmp_buff2 = (char *) malloc(block_size * Y); - if (!tmp_buff2) { - XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory"); - MPI_Finalize(); - exit(MPI_ERR_OTHER); - } - - + tmp_buff1 = (char *) xbt_malloc(block_size * num_procs * Y); + tmp_buff2 = (char *) xbt_malloc(block_size * Y); num_reqs = X; if (Y > X) num_reqs = Y; - statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status)); - reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request)); - if (!reqs) { - XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory"); - MPI_Finalize(); - exit(MPI_ERR_OTHER); - } + statuses = (MPI_Status *) xbt_malloc(num_reqs * sizeof(MPI_Status)); + reqs = (MPI_Request *) xbt_malloc(num_reqs * sizeof(MPI_Request)); req_ptr = reqs; @@ -117,18 +96,17 @@ int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, continue; recv_offset = (src % Y) * block_size * num_procs; - MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm, - req_ptr++); + *(req_ptr++) = smpi_mpi_irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm); } for (i = 0; i < Y; i++) { dst = i + my_row_base; if (dst == rank) continue; - MPI_Send(send_buff, count, send_type, dst, tag, comm); + smpi_mpi_send(send_buff, count, send_type, dst, tag, comm); } - MPI_Waitall(Y - 1, reqs, statuses); + smpi_mpi_waitall(Y - 1, reqs, statuses); req_ptr = reqs; for (i = 0; i < Y; i++) { @@ -136,13 +114,13 @@ int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, recv_offset = (my_row_base * block_size) + (i * block_size); if (i + my_row_base == rank) - MPI_Sendrecv((char *) send_buff + recv_offset, send_count, send_type, + smpi_mpi_sendrecv((char *) send_buff + recv_offset, send_count, send_type, rank, tag, (char *) recv_buff + recv_offset, recv_count, recv_type, rank, tag, comm, &s); else - MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, + smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag, (char *) recv_buff + recv_offset, recv_count, recv_type, rank, tag, comm, &s); @@ -155,8 +133,8 @@ int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, continue; src_row_base = (src / Y) * Y; - MPI_Irecv((char *) recv_buff + src_row_base * block_size, recv_count * Y, - recv_type, src, tag, comm, req_ptr++); + *(req_ptr++) = smpi_mpi_irecv((char *) recv_buff + src_row_base * block_size, recv_count * Y, + recv_type, src, tag, comm); } for (i = 0; i < X; i++) { @@ -169,11 +147,11 @@ int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, send_offset = (dst + j * num_procs) * block_size; if (j + my_row_base == rank) - MPI_Sendrecv((char *) send_buff + dst * block_size, send_count, + smpi_mpi_sendrecv((char *) send_buff + dst * block_size, send_count, send_type, rank, tag, tmp_buff2 + recv_offset, recv_count, recv_type, rank, tag, comm, &s); else - MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, + smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag, tmp_buff2 + recv_offset, recv_count, recv_type, rank, tag, comm, &s); @@ -181,9 +159,9 @@ int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, recv_offset += block_size; } - MPI_Send(tmp_buff2, send_count * Y, send_type, dst, tag, comm); + smpi_mpi_send(tmp_buff2, send_count * Y, send_type, dst, tag, comm); } - MPI_Waitall(X - 1, reqs, statuses); + smpi_mpi_waitall(X - 1, reqs, statuses); free(reqs); free(statuses); free(tmp_buff1); diff --git a/src/smpi/colls/alltoall-3dmesh.c b/src/smpi/colls/alltoall-3dmesh.c index 7ffac2bb9b..ca10de7191 100644 --- a/src/smpi/colls/alltoall-3dmesh.c +++ b/src/smpi/colls/alltoall-3dmesh.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" #include /***************************************************************************** @@ -54,9 +54,9 @@ int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, char *tmp_buff1, *tmp_buff2; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &extent); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + extent = smpi_datatype_get_extent(send_type); if (!alltoall_check_is_3dmesh(num_procs, &X, &Y, &Z)) return MPI_ERR_OTHER; @@ -86,7 +86,7 @@ int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, send_offset = recv_offset = (rank % two_dsize) * block_size * num_procs; - MPI_Sendrecv(send_buff, send_count * num_procs, send_type, rank, tag, + smpi_mpi_sendrecv(send_buff, send_count * num_procs, send_type, rank, tag, tmp_buff1 + recv_offset, num_procs * recv_count, recv_type, rank, tag, comm, &status); @@ -97,18 +97,17 @@ int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, if (src == rank) continue; recv_offset = (src % two_dsize) * block_size * num_procs; - MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm, - req_ptr++); + *(req_ptr++) = smpi_mpi_irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm); } for (i = 0; i < Y; i++) { dst = i + my_row_base; if (dst == rank) continue; - MPI_Send(send_buff, count, send_type, dst, tag, comm); + smpi_mpi_send(send_buff, count, send_type, dst, tag, comm); } - MPI_Waitall(Y - 1, reqs, statuses); + smpi_mpi_waitall(Y - 1, reqs, statuses); req_ptr = reqs; @@ -120,8 +119,8 @@ int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, src_row_base = (src / X) * X; recv_offset = (src_row_base % two_dsize) * block_size * num_procs; - MPI_Irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y, - recv_type, src, tag, comm, req_ptr++); + *(req_ptr++) = smpi_mpi_irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y, + recv_type, src, tag, comm); } send_offset = (my_row_base % two_dsize) * block_size * num_procs; @@ -129,17 +128,17 @@ int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, dst = (i * Y + my_col_base); if (dst == rank) continue; - MPI_Send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type, + smpi_mpi_send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type, dst, tag, comm); } - MPI_Waitall(X - 1, reqs, statuses); + smpi_mpi_waitall(X - 1, reqs, statuses); req_ptr = reqs; for (i = 0; i < two_dsize; i++) { send_offset = (rank * block_size) + (i * block_size * num_procs); recv_offset = (my_z_base * block_size) + (i * block_size); - MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag, + smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag, (char *) recv_buff + recv_offset, recv_count, recv_type, rank, tag, comm, &status); } @@ -150,8 +149,8 @@ int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, recv_offset = (src_z_base * block_size); - MPI_Irecv((char *) recv_buff + recv_offset, recv_count * two_dsize, - recv_type, src, tag, comm, req_ptr++); + *(req_ptr++) = smpi_mpi_irecv((char *) recv_buff + recv_offset, recv_count * two_dsize, + recv_type, src, tag, comm); } for (i = 1; i < Z; i++) { @@ -160,18 +159,18 @@ int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, recv_offset = 0; for (j = 0; j < two_dsize; j++) { send_offset = (dst + j * num_procs) * block_size; - MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, + smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag, tmp_buff2 + recv_offset, recv_count, recv_type, rank, tag, comm, &status); recv_offset += block_size; } - MPI_Send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm); + smpi_mpi_send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm); } - MPI_Waitall(Z - 1, reqs, statuses); + smpi_mpi_waitall(Z - 1, reqs, statuses); free(reqs); free(statuses); diff --git a/src/smpi/colls/alltoall-bruck.c b/src/smpi/colls/alltoall-bruck.c index d27539521d..d38b3e59cc 100644 --- a/src/smpi/colls/alltoall-bruck.c +++ b/src/smpi/colls/alltoall-bruck.c @@ -37,21 +37,21 @@ smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count, char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; - MPI_Comm_size(comm, &num_procs); - MPI_Comm_rank(comm, &rank); + num_procs = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); - MPI_Type_extent(recv_type, &extent); + extent = smpi_datatype_get_extent(recv_type); tmp_buff = (char *) xbt_malloc(num_procs * recv_count * extent); disps = (int *) xbt_malloc(sizeof(int) * num_procs); blocks_length = (int *) xbt_malloc(sizeof(int) * num_procs); - MPI_Sendrecv(send_ptr + rank * send_count * extent, + smpi_mpi_sendrecv(send_ptr + rank * send_count * extent, (num_procs - rank) * send_count, send_type, rank, tag, recv_ptr, (num_procs - rank) * recv_count, recv_type, rank, tag, comm, &status); - MPI_Sendrecv(send_ptr, rank * send_count, send_type, rank, tag, + smpi_mpi_sendrecv(send_ptr, rank * send_count, send_type, rank, tag, recv_ptr + (num_procs - rank) * recv_count * extent, rank * recv_count, recv_type, rank, tag, comm, &status); @@ -73,14 +73,14 @@ smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count, } MPI_Type_indexed(count, blocks_length, disps, recv_type, &new_type); - MPI_Type_commit(&new_type); + smpi_datatype_commit(&new_type); position = 0; MPI_Pack(recv_buff, 1, new_type, tmp_buff, pack_size, &position, comm); - MPI_Sendrecv(tmp_buff, position, MPI_PACKED, dst, tag, recv_buff, 1, + smpi_mpi_sendrecv(tmp_buff, position, MPI_PACKED, dst, tag, recv_buff, 1, new_type, src, tag, comm, &status); - MPI_Type_free(&new_type); + smpi_datatype_free(&new_type); pof2 *= 2; } @@ -88,18 +88,18 @@ smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count, free(disps); free(blocks_length); - MPI_Sendrecv(recv_ptr + (rank + 1) * recv_count * extent, + smpi_mpi_sendrecv(recv_ptr + (rank + 1) * recv_count * extent, (num_procs - rank - 1) * recv_count, send_type, rank, tag, tmp_buff, (num_procs - rank - 1) * recv_count, recv_type, rank, tag, comm, &status); - MPI_Sendrecv(recv_ptr, (rank + 1) * recv_count, send_type, rank, tag, + smpi_mpi_sendrecv(recv_ptr, (rank + 1) * recv_count, send_type, rank, tag, tmp_buff + (num_procs - rank - 1) * recv_count * extent, (rank + 1) * recv_count, recv_type, rank, tag, comm, &status); for (i = 0; i < num_procs; i++) - MPI_Sendrecv(tmp_buff + i * recv_count * extent, recv_count, send_type, + smpi_mpi_sendrecv(tmp_buff + i * recv_count * extent, recv_count, send_type, rank, tag, recv_ptr + (num_procs - i - 1) * recv_count * extent, recv_count, recv_type, rank, tag, comm, &status); diff --git a/src/smpi/colls/alltoall-pair-light-barrier.c b/src/smpi/colls/alltoall-pair-light-barrier.c index bde809bdc0..96cf68d0f9 100644 --- a/src/smpi/colls/alltoall-pair-light-barrier.c +++ b/src/smpi/colls/alltoall-pair-light-barrier.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** * Function: alltoall_pair_light_barrier @@ -37,28 +37,28 @@ smpi_coll_tuned_alltoall_pair_light_barrier(void *send_buff, int send_count, char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &send_chunk); - MPI_Type_extent(recv_type, &recv_chunk); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + send_chunk = smpi_datatype_get_extent(send_type); + recv_chunk = smpi_datatype_get_extent(recv_type); send_chunk *= send_count; recv_chunk *= recv_count; - MPI_Sendrecv(send_ptr + rank * send_chunk, send_count, send_type, rank, tag, + smpi_mpi_sendrecv(send_ptr + rank * send_chunk, send_count, send_type, rank, tag, recv_ptr + rank * recv_chunk, recv_count, recv_type, rank, tag, comm, &s); for (i = 1; i < num_procs; i++) { src = dst = rank ^ i; - MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, + smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); if ((i + 1) < num_procs) { next_partner = rank ^ (i + 1); - MPI_Sendrecv(&send_sync, 1, MPI_CHAR, next_partner, tag, + smpi_mpi_sendrecv(&send_sync, 1, MPI_CHAR, next_partner, tag, &recv_sync, 1, MPI_CHAR, next_partner, tag, comm, &s); } } diff --git a/src/smpi/colls/alltoall-pair-mpi-barrier.c b/src/smpi/colls/alltoall-pair-mpi-barrier.c index 78d3caa8da..f12c3f92a6 100644 --- a/src/smpi/colls/alltoall-pair-mpi-barrier.c +++ b/src/smpi/colls/alltoall-pair-mpi-barrier.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** * Function: alltoall_pair_mpi_barrier @@ -34,18 +34,18 @@ smpi_coll_tuned_alltoall_pair_mpi_barrier(void *send_buff, int send_count, char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &send_chunk); - MPI_Type_extent(recv_type, &recv_chunk); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + send_chunk = smpi_datatype_get_extent(send_type); + recv_chunk = smpi_datatype_get_extent(recv_type); send_chunk *= send_count; recv_chunk *= recv_count; for (i = 0; i < num_procs; i++) { src = dst = rank ^ i; - MPI_Barrier(comm); - MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, + smpi_mpi_barrier(comm); + smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); } diff --git a/src/smpi/colls/alltoall-pair-one-barrier.c b/src/smpi/colls/alltoall-pair-one-barrier.c index 195f455705..19e693d447 100644 --- a/src/smpi/colls/alltoall-pair-one-barrier.c +++ b/src/smpi/colls/alltoall-pair-one-barrier.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** * Function: alltoall_pair @@ -35,18 +35,18 @@ smpi_coll_tuned_alltoall_pair_one_barrier(void *send_buff, int send_count, char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &send_chunk); - MPI_Type_extent(recv_type, &recv_chunk); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + send_chunk = smpi_datatype_get_extent(send_type); + recv_chunk = smpi_datatype_get_extent(recv_type); send_chunk *= send_count; recv_chunk *= recv_count; - MPI_Barrier(comm); + smpi_mpi_barrier(comm); for (i = 0; i < num_procs; i++) { src = dst = rank ^ i; - MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, + smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); } diff --git a/src/smpi/colls/alltoall-pair.c b/src/smpi/colls/alltoall-pair.c index 6c5d8eed09..24ef5f769c 100644 --- a/src/smpi/colls/alltoall-pair.c +++ b/src/smpi/colls/alltoall-pair.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** @@ -37,10 +37,10 @@ int alltoall_pair(void *send_buff, int send_count, MPI_Datatype send_type, char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &send_chunk); - MPI_Type_extent(recv_type, &recv_chunk); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + send_chunk = smpi_datatype_get_extent(send_type); + recv_chunk = smpi_datatype_get_extent(recv_type); MPI_Win_create(recv_buff, num_procs * recv_chunk * send_count, recv_chunk, 0, comm, &win); @@ -72,17 +72,17 @@ int smpi_coll_tuned_alltoall_pair(void *send_buff, int send_count, char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &send_chunk); - MPI_Type_extent(recv_type, &recv_chunk); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + send_chunk = smpi_datatype_get_extent(send_type); + recv_chunk = smpi_datatype_get_extent(recv_type); send_chunk *= send_count; recv_chunk *= recv_count; for (i = 0; i < num_procs; i++) { src = dst = rank ^ i; - MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, + smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); } diff --git a/src/smpi/colls/alltoall-rdb.c b/src/smpi/colls/alltoall-rdb.c index edee61aa0f..45b529f16a 100644 --- a/src/smpi/colls/alltoall-rdb.c +++ b/src/smpi/colls/alltoall-rdb.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** @@ -32,18 +32,18 @@ int smpi_coll_tuned_alltoall_rdb(void *send_buff, int send_count, int dst_tree_root, rank_tree_root, send_offset, recv_offset; int rank, num_procs, j, k, dst, curr_size, max_size; - int last_recv_count, tmp_mask, tree_root, num_procs_completed; + int last_recv_count = 0, tmp_mask, tree_root, num_procs_completed; int tag = 1, mask = 1, i = 0; char *tmp_buff; char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; - MPI_Comm_size(comm, &num_procs); - MPI_Comm_rank(comm, &rank); - MPI_Type_extent(send_type, &send_increment); - MPI_Type_extent(recv_type, &recv_increment); - MPI_Type_extent(recv_type, &extent); + num_procs = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); + send_increment = smpi_datatype_get_extent(send_type); + recv_increment = smpi_datatype_get_extent(recv_type); + extent = smpi_datatype_get_extent(recv_type); send_increment *= (send_count * num_procs); recv_increment *= (recv_count * num_procs); @@ -54,7 +54,7 @@ int smpi_coll_tuned_alltoall_rdb(void *send_buff, int send_count, curr_size = send_count * num_procs; - MPI_Sendrecv(send_ptr, curr_size, send_type, rank, tag, + smpi_mpi_sendrecv(send_ptr, curr_size, send_type, rank, tag, tmp_buff + (rank * recv_increment), curr_size, recv_type, rank, tag, comm, &status); @@ -68,11 +68,11 @@ int smpi_coll_tuned_alltoall_rdb(void *send_buff, int send_count, recv_offset = dst_tree_root * recv_increment; if (dst < num_procs) { - MPI_Sendrecv(tmp_buff + send_offset, curr_size, send_type, dst, tag, + smpi_mpi_sendrecv(tmp_buff + send_offset, curr_size, send_type, dst, tag, tmp_buff + recv_offset, mask * recv_count * num_procs, recv_type, dst, tag, comm, &status); - MPI_Get_count(&status, recv_type, &last_recv_count); + last_recv_count = smpi_mpi_get_count(&status, recv_type); curr_size += last_recv_count; } @@ -110,7 +110,7 @@ int smpi_coll_tuned_alltoall_rdb(void *send_buff, int send_count, if ((dst > rank) && (rank < tree_root + num_procs_completed) && (dst >= tree_root + num_procs_completed)) { - MPI_Send(tmp_buff + dst_tree_root * send_increment, + smpi_mpi_send(tmp_buff + dst_tree_root * send_increment, last_recv_count, send_type, dst, tag, comm); } @@ -121,11 +121,11 @@ int smpi_coll_tuned_alltoall_rdb(void *send_buff, int send_count, else if ((dst < rank) && (dst < tree_root + num_procs_completed) && (rank >= tree_root + num_procs_completed)) { - MPI_Recv(tmp_buff + dst_tree_root * send_increment, + smpi_mpi_recv(tmp_buff + dst_tree_root * send_increment, mask * num_procs * send_count, send_type, dst, tag, comm, &status); - MPI_Get_count(&status, send_type, &last_recv_count); + last_recv_count = smpi_mpi_get_count(&status, send_type); curr_size += last_recv_count; } @@ -139,7 +139,7 @@ int smpi_coll_tuned_alltoall_rdb(void *send_buff, int send_count, } for (i = 0; i < num_procs; i++) - MPI_Sendrecv(tmp_buff + (rank + i * num_procs) * send_count * extent, + smpi_mpi_sendrecv(tmp_buff + (rank + i * num_procs) * send_count * extent, send_count, send_type, rank, tag, recv_ptr + (i * recv_count * extent), recv_count, recv_type, rank, tag, comm, &status); diff --git a/src/smpi/colls/alltoall-ring-light-barrier.c b/src/smpi/colls/alltoall-ring-light-barrier.c index a721c7782d..d45912bc6a 100644 --- a/src/smpi/colls/alltoall-ring-light-barrier.c +++ b/src/smpi/colls/alltoall-ring-light-barrier.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** * Function: alltoall_ring_light_barrier @@ -37,15 +37,15 @@ smpi_coll_tuned_alltoall_ring_light_barrier(void *send_buff, int send_count, char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &send_chunk); - MPI_Type_extent(recv_type, &recv_chunk); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + send_chunk = smpi_datatype_get_extent(send_type); + recv_chunk = smpi_datatype_get_extent(recv_type); send_chunk *= send_count; recv_chunk *= recv_count; - MPI_Sendrecv(send_ptr + rank * send_chunk, send_count, send_type, rank, tag, + smpi_mpi_sendrecv(send_ptr + rank * send_chunk, send_count, send_type, rank, tag, recv_ptr + rank * recv_chunk, recv_count, recv_type, rank, tag, comm, &s); @@ -53,14 +53,14 @@ smpi_coll_tuned_alltoall_ring_light_barrier(void *send_buff, int send_count, src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; - MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, + smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); if ((i + 1) < num_procs) { next_src = (rank - (i + 1) + num_procs) % num_procs; next_dst = (rank + (i + 1) + num_procs) % num_procs; - MPI_Sendrecv(&send_sync, 1, MPI_CHAR, next_src, tag, + smpi_mpi_sendrecv(&send_sync, 1, MPI_CHAR, next_src, tag, &recv_sync, 1, MPI_CHAR, next_dst, tag, comm, &s); } diff --git a/src/smpi/colls/alltoall-ring-mpi-barrier.c b/src/smpi/colls/alltoall-ring-mpi-barrier.c index 94a5fb950d..e3a3f2da8f 100644 --- a/src/smpi/colls/alltoall-ring-mpi-barrier.c +++ b/src/smpi/colls/alltoall-ring-mpi-barrier.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** * Function: alltoall_ring_mpi_barrier @@ -34,10 +34,10 @@ smpi_coll_tuned_alltoall_ring_mpi_barrier(void *send_buff, int send_count, char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &send_chunk); - MPI_Type_extent(recv_type, &recv_chunk); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + send_chunk = smpi_datatype_get_extent(send_type); + recv_chunk = smpi_datatype_get_extent(recv_type); send_chunk *= send_count; recv_chunk *= recv_count; @@ -46,8 +46,8 @@ smpi_coll_tuned_alltoall_ring_mpi_barrier(void *send_buff, int send_count, src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; - MPI_Barrier(comm); - MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, + smpi_mpi_barrier(comm); + smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); } diff --git a/src/smpi/colls/alltoall-ring-one-barrier.c b/src/smpi/colls/alltoall-ring-one-barrier.c index 73ce56188e..97b912b28c 100644 --- a/src/smpi/colls/alltoall-ring-one-barrier.c +++ b/src/smpi/colls/alltoall-ring-one-barrier.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** * Function: alltoall_ring @@ -33,20 +33,20 @@ smpi_coll_tuned_alltoall_ring_one_barrier(void *send_buff, int send_count, char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &send_chunk); - MPI_Type_extent(recv_type, &recv_chunk); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + send_chunk = smpi_datatype_get_extent(send_type); + recv_chunk = smpi_datatype_get_extent(recv_type); send_chunk *= send_count; recv_chunk *= recv_count; - MPI_Barrier(comm); + smpi_mpi_barrier(comm); for (i = 0; i < num_procs; i++) { src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; - MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, + smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); } diff --git a/src/smpi/colls/alltoall-ring.c b/src/smpi/colls/alltoall-ring.c index 88edd6ed7a..622fd9dbda 100644 --- a/src/smpi/colls/alltoall-ring.c +++ b/src/smpi/colls/alltoall-ring.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** * Function: alltoall_ring @@ -33,10 +33,10 @@ smpi_coll_tuned_alltoall_ring(void *send_buff, int send_count, char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(send_type, &send_chunk); - MPI_Type_extent(recv_type, &recv_chunk); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + send_chunk = smpi_datatype_get_extent(send_type); + recv_chunk = smpi_datatype_get_extent(recv_type); send_chunk *= send_count; recv_chunk *= recv_count; @@ -45,7 +45,7 @@ smpi_coll_tuned_alltoall_ring(void *send_buff, int send_count, src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; - MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, + smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); } diff --git a/src/smpi/colls/alltoall-simple.c b/src/smpi/colls/alltoall-simple.c index d2988a70d9..bb9d87fa26 100644 --- a/src/smpi/colls/alltoall-simple.c +++ b/src/smpi/colls/alltoall-simple.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** @@ -27,7 +27,7 @@ int smpi_coll_tuned_alltoall_simple(void *send_buff, int send_count, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) { - int i, rank, size, nreqs, err, src, dst, tag = 101; + int i, rank, size, nreqs, src, dst, tag = 101; char *psnd; char *prcv; MPI_Aint sndinc; @@ -38,10 +38,10 @@ int smpi_coll_tuned_alltoall_simple(void *send_buff, int send_count, MPI_Status s, *statuses; - MPI_Comm_size(comm, &size); - MPI_Comm_rank(comm, &rank); - MPI_Type_extent(send_type, &sndinc); - MPI_Type_extent(recv_type, &rcvinc); + size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); + sndinc = smpi_datatype_get_extent(send_type); + rcvinc = smpi_datatype_get_extent(recv_type); sndinc *= send_count; rcvinc *= recv_count; @@ -60,7 +60,7 @@ int smpi_coll_tuned_alltoall_simple(void *send_buff, int send_count, psnd = ((char *) send_buff) + (rank * sndinc); prcv = ((char *) recv_buff) + (rank * rcvinc); - MPI_Sendrecv(psnd, send_count, send_type, rank, tag, + smpi_mpi_sendrecv(psnd, send_count, send_type, rank, tag, prcv, recv_count, recv_type, rank, tag, comm, &s); @@ -76,35 +76,22 @@ int smpi_coll_tuned_alltoall_simple(void *send_buff, int send_count, continue; if (dst == rank) continue; - MPI_Recv_init(prcv + (src * rcvinc), recv_count, recv_type, src, - tag, comm, preq++); - MPI_Send_init(psnd + (dst * sndinc), send_count, send_type, dst, - tag, comm, qreq++); + *(preq++) = smpi_mpi_recv_init(prcv + (src * rcvinc), recv_count, recv_type, src, + tag, comm); + *(qreq++) = smpi_mpi_send_init(psnd + (dst * sndinc), send_count, send_type, dst, + tag, comm); } /* Start all the requests. */ - err = MPI_Startall(nreqs, req); + smpi_mpi_startall(nreqs, req); /* Wait for them all. */ - err = MPI_Waitall(nreqs, req, statuses); - - if (err != MPI_SUCCESS) { - if (req) - free((char *) req); - return err; - } + smpi_mpi_waitall(nreqs, req, statuses); for (i = 0, preq = req; i < nreqs; ++i, ++preq) { - err = MPI_Request_free(preq); - if (err != MPI_SUCCESS) { - if (req) - free((char *) req); - if (statuses) - free(statuses); - return err; - } + smpi_mpi_request_free(preq); } /* All done */ diff --git a/src/smpi/colls/bcast-NTSB.c b/src/smpi/colls/bcast-NTSB.c index f7a7e4e73d..93ace1d3e8 100644 --- a/src/smpi/colls/bcast-NTSB.c +++ b/src/smpi/colls/bcast-NTSB.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" int bcast_NTSB_segment_size_in_byte = 8192; @@ -16,10 +16,10 @@ int smpi_coll_tuned_bcast_NTSB(void *buf, int count, MPI_Datatype datatype, MPI_Status *recv_status_array; MPI_Aint extent; - MPI_Type_extent(datatype, &extent); + extent = smpi_datatype_get_extent(datatype); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); + rank = smpi_comm_rank(MPI_COMM_WORLD); + size = smpi_comm_size(MPI_COMM_WORLD); /* source node and destination nodes (same through out the functions) */ int from = (rank - 1) / 2; @@ -46,9 +46,9 @@ int smpi_coll_tuned_bcast_NTSB(void *buf, int count, MPI_Datatype datatype, /* if root is not zero send to rank zero first */ if (root != 0) { if (rank == root) { - MPI_Send(buf, count, datatype, 0, tag, comm); + smpi_mpi_send(buf, count, datatype, 0, tag, comm); } else if (rank == 0) { - MPI_Recv(buf, count, datatype, root, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); } } @@ -59,31 +59,31 @@ int smpi_coll_tuned_bcast_NTSB(void *buf, int count, MPI_Datatype datatype, if (rank == 0) { /* case root has only a left child */ if (to_right == -1) { - MPI_Send(buf, count, datatype, to_left, tag, comm); + smpi_mpi_send(buf, count, datatype, to_left, tag, comm); } /* case root has both left and right children */ else { - MPI_Send(buf, count, datatype, to_left, tag, comm); - MPI_Send(buf, count, datatype, to_right, tag, comm); + smpi_mpi_send(buf, count, datatype, to_left, tag, comm); + smpi_mpi_send(buf, count, datatype, to_right, tag, comm); } } /* case: leaf ==> receive only */ else if (to_left == -1) { - MPI_Recv(buf, count, datatype, from, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, from, tag, comm, &status); } /* case: intermidiate node with only left child ==> relay message */ else if (to_right == -1) { - MPI_Recv(buf, count, datatype, from, tag, comm, &status); - MPI_Send(buf, count, datatype, to_left, tag, comm); + smpi_mpi_recv(buf, count, datatype, from, tag, comm, &status); + smpi_mpi_send(buf, count, datatype, to_left, tag, comm); } /* case: intermidiate node with both left and right children ==> relay message */ else { - MPI_Recv(buf, count, datatype, from, tag, comm, &status); - MPI_Send(buf, count, datatype, to_left, tag, comm); - MPI_Send(buf, count, datatype, to_right, tag, comm); + smpi_mpi_recv(buf, count, datatype, from, tag, comm, &status); + smpi_mpi_send(buf, count, datatype, to_left, tag, comm); + smpi_mpi_send(buf, count, datatype, to_right, tag, comm); } return MPI_SUCCESS; } @@ -91,13 +91,13 @@ int smpi_coll_tuned_bcast_NTSB(void *buf, int count, MPI_Datatype datatype, else { send_request_array = - (MPI_Request *) malloc(2 * (size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc(2 * (size + pipe_length) * sizeof(MPI_Request)); recv_request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); send_status_array = - (MPI_Status *) malloc(2 * (size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc(2 * (size + pipe_length) * sizeof(MPI_Status)); recv_status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); @@ -106,60 +106,60 @@ int smpi_coll_tuned_bcast_NTSB(void *buf, int count, MPI_Datatype datatype, /* case root has only a left child */ if (to_right == -1) { for (i = 0; i < pipe_length; i++) { - MPI_Isend((char *) buf + (i * increment), segment, datatype, to_left, - tag + i, comm, &send_request_array[i]); + send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_left, + tag + i, comm); } - MPI_Waitall((pipe_length), send_request_array, send_status_array); + smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); } /* case root has both left and right children */ else { for (i = 0; i < pipe_length; i++) { - MPI_Isend((char *) buf + (i * increment), segment, datatype, to_left, - tag + i, comm, &send_request_array[i]); - MPI_Isend((char *) buf + (i * increment), segment, datatype, to_right, - tag + i, comm, &send_request_array[i + pipe_length]); + send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_left, + tag + i, comm); + send_request_array[i + pipe_length] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_right, + tag + i, comm); } - MPI_Waitall((2 * pipe_length), send_request_array, send_status_array); + smpi_mpi_waitall((2 * pipe_length), send_request_array, send_status_array); } } /* case: leaf ==> receive only */ else if (to_left == -1) { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) buf + (i * increment), segment, datatype, from, - tag + i, comm, &recv_request_array[i]); + recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from, + tag + i, comm); } - MPI_Waitall((pipe_length), recv_request_array, recv_status_array); + smpi_mpi_waitall((pipe_length), recv_request_array, recv_status_array); } /* case: intermidiate node with only left child ==> relay message */ else if (to_right == -1) { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) buf + (i * increment), segment, datatype, from, - tag + i, comm, &recv_request_array[i]); + recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from, + tag + i, comm); } for (i = 0; i < pipe_length; i++) { - MPI_Wait(&recv_request_array[i], &status); - MPI_Isend((char *) buf + (i * increment), segment, datatype, to_left, - tag + i, comm, &send_request_array[i]); + smpi_mpi_wait(&recv_request_array[i], &status); + send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_left, + tag + i, comm); } - MPI_Waitall(pipe_length, send_request_array, send_status_array); + smpi_mpi_waitall(pipe_length, send_request_array, send_status_array); } /* case: intermidiate node with both left and right children ==> relay message */ else { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) buf + (i * increment), segment, datatype, from, - tag + i, comm, &recv_request_array[i]); + recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from, + tag + i, comm); } for (i = 0; i < pipe_length; i++) { - MPI_Wait(&recv_request_array[i], &status); - MPI_Isend((char *) buf + (i * increment), segment, datatype, to_left, - tag + i, comm, &send_request_array[i]); - MPI_Isend((char *) buf + (i * increment), segment, datatype, to_right, - tag + i, comm, &send_request_array[i + pipe_length]); + smpi_mpi_wait(&recv_request_array[i], &status); + send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_left, + tag + i, comm); + send_request_array[i + pipe_length] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_right, + tag + i, comm); } - MPI_Waitall((2 * pipe_length), send_request_array, send_status_array); + smpi_mpi_waitall((2 * pipe_length), send_request_array, send_status_array); } free(send_request_array); @@ -170,7 +170,8 @@ int smpi_coll_tuned_bcast_NTSB(void *buf, int count, MPI_Datatype datatype, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { - MPI_Bcast((char *) buf + (pipe_length * increment), remainder, datatype, + XBT_WARN("MPI_bcast_NTSB use default MPI_bcast."); + smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype, root, comm); } diff --git a/src/smpi/colls/bcast-NTSL-Isend.c b/src/smpi/colls/bcast-NTSL-Isend.c index c817759c57..89f8f5c351 100644 --- a/src/smpi/colls/bcast-NTSL-Isend.c +++ b/src/smpi/colls/bcast-NTSL-Isend.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" static int bcast_NTSL_segment_size_in_byte = 8192; @@ -18,10 +18,10 @@ int smpi_coll_tuned_bcast_NTSL_Isend(void *buf, int count, MPI_Datatype datatype int rank, size; int i; MPI_Aint extent; - MPI_Type_extent(datatype, &extent); + extent = smpi_datatype_get_extent(datatype); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); + rank = smpi_comm_rank(MPI_COMM_WORLD); + size = smpi_comm_size(MPI_COMM_WORLD); /* source node and destination nodes (same through out the functions) */ int to = (rank + 1) % size; @@ -45,23 +45,23 @@ int smpi_coll_tuned_bcast_NTSL_Isend(void *buf, int count, MPI_Datatype datatype */ if (root != 0) { if (rank == root) { - MPI_Send(buf, count, datatype, 0, tag, comm); + smpi_mpi_send(buf, count, datatype, 0, tag, comm); } else if (rank == 0) { - MPI_Recv(buf, count, datatype, root, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); } } /* when a message is smaller than a block size => no pipeline */ if (count <= segment) { if (rank == 0) { - MPI_Send(buf, count, datatype, to, tag, comm); + smpi_mpi_send(buf, count, datatype, to, tag, comm); } else if (rank == (size - 1)) { - MPI_Irecv(buf, count, datatype, from, tag, comm, &request); - MPI_Wait(&request, &status); + request = smpi_mpi_irecv(buf, count, datatype, from, tag, comm); + smpi_mpi_wait(&request, &status); } else { - MPI_Irecv(buf, count, datatype, from, tag, comm, &request); - MPI_Wait(&request, &status); - MPI_Send(buf, count, datatype, to, tag, comm); + request = smpi_mpi_irecv(buf, count, datatype, from, tag, comm); + smpi_mpi_wait(&request, &status); + smpi_mpi_send(buf, count, datatype, to, tag, comm); } return MPI_SUCCESS; } @@ -69,44 +69,44 @@ int smpi_coll_tuned_bcast_NTSL_Isend(void *buf, int count, MPI_Datatype datatype /* pipeline bcast */ else { send_request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); recv_request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); send_status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); recv_status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); /* root send data */ if (rank == 0) { for (i = 0; i < pipe_length; i++) { - MPI_Isend((char *) buf + (i * increment), segment, datatype, to, - (tag + i), comm, &send_request_array[i]); + send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to, + (tag + i), comm); } - MPI_Waitall((pipe_length), send_request_array, send_status_array); + smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); } /* last node only receive data */ else if (rank == (size - 1)) { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) buf + (i * increment), segment, datatype, from, - (tag + i), comm, &recv_request_array[i]); + recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from, + (tag + i), comm); } - MPI_Waitall((pipe_length), recv_request_array, recv_status_array); + smpi_mpi_waitall((pipe_length), recv_request_array, recv_status_array); } /* intermediate nodes relay (receive, then send) data */ else { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) buf + (i * increment), segment, datatype, from, - (tag + i), comm, &recv_request_array[i]); + recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from, + (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - MPI_Wait(&recv_request_array[i], &status); - MPI_Isend((char *) buf + (i * increment), segment, datatype, to, - (tag + i), comm, &send_request_array[i]); + smpi_mpi_wait(&recv_request_array[i], &status); + send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to, + (tag + i), comm); } - MPI_Waitall((pipe_length), send_request_array, send_status_array); + smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); } free(send_request_array); @@ -117,7 +117,8 @@ int smpi_coll_tuned_bcast_NTSL_Isend(void *buf, int count, MPI_Datatype datatype /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { - MPI_Bcast((char *) buf + (pipe_length * increment), remainder, datatype, + XBT_WARN("MPI_bcast_NTSL_Isend_nb use default MPI_bcast."); + smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype, root, comm); } diff --git a/src/smpi/colls/bcast-NTSL.c b/src/smpi/colls/bcast-NTSL.c index 23293f1c60..090edc7595 100644 --- a/src/smpi/colls/bcast-NTSL.c +++ b/src/smpi/colls/bcast-NTSL.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" static int bcast_NTSL_segment_size_in_byte = 8192; @@ -18,10 +18,10 @@ int smpi_coll_tuned_bcast_NTSL(void *buf, int count, MPI_Datatype datatype, int rank, size; int i; MPI_Aint extent; - MPI_Type_extent(datatype, &extent); + extent = smpi_datatype_get_extent(datatype); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); + rank = smpi_comm_rank(MPI_COMM_WORLD); + size = smpi_comm_size(MPI_COMM_WORLD); /* source node and destination nodes (same through out the functions) */ int to = (rank + 1) % size; @@ -45,23 +45,23 @@ int smpi_coll_tuned_bcast_NTSL(void *buf, int count, MPI_Datatype datatype, */ if (root != 0) { if (rank == root) { - MPI_Send(buf, count, datatype, 0, tag, comm); + smpi_mpi_send(buf, count, datatype, 0, tag, comm); } else if (rank == 0) { - MPI_Recv(buf, count, datatype, root, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); } } /* when a message is smaller than a block size => no pipeline */ if (count <= segment) { if (rank == 0) { - MPI_Send(buf, count, datatype, to, tag, comm); + smpi_mpi_send(buf, count, datatype, to, tag, comm); } else if (rank == (size - 1)) { - MPI_Irecv(buf, count, datatype, from, tag, comm, &request); - MPI_Wait(&request, &status); + request = smpi_mpi_irecv(buf, count, datatype, from, tag, comm); + smpi_mpi_wait(&request, &status); } else { - MPI_Irecv(buf, count, datatype, from, tag, comm, &request); - MPI_Wait(&request, &status); - MPI_Send(buf, count, datatype, to, tag, comm); + request = smpi_mpi_irecv(buf, count, datatype, from, tag, comm); + smpi_mpi_wait(&request, &status); + smpi_mpi_send(buf, count, datatype, to, tag, comm); } return MPI_SUCCESS; } @@ -69,44 +69,44 @@ int smpi_coll_tuned_bcast_NTSL(void *buf, int count, MPI_Datatype datatype, /* pipeline bcast */ else { send_request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); recv_request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); send_status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); recv_status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); /* root send data */ if (rank == 0) { for (i = 0; i < pipe_length; i++) { - MPI_Isend((char *) buf + (i * increment), segment, datatype, to, - (tag + i), comm, &send_request_array[i]); + send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to, + (tag + i), comm); } - MPI_Waitall((pipe_length), send_request_array, send_status_array); + smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); } /* last node only receive data */ else if (rank == (size - 1)) { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) buf + (i * increment), segment, datatype, from, - (tag + i), comm, &recv_request_array[i]); + recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from, + (tag + i), comm); } - MPI_Waitall((pipe_length), recv_request_array, recv_status_array); + smpi_mpi_waitall((pipe_length), recv_request_array, recv_status_array); } /* intermediate nodes relay (receive, then send) data */ else { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) buf + (i * increment), segment, datatype, from, - (tag + i), comm, &recv_request_array[i]); + recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from, + (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - MPI_Wait(&recv_request_array[i], &status); - MPI_Isend((char *) buf + (i * increment), segment, datatype, to, - (tag + i), comm, &send_request_array[i]); + smpi_mpi_wait(&recv_request_array[i], &status); + send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to, + (tag + i), comm); } - MPI_Waitall((pipe_length), send_request_array, send_status_array); + smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); } free(send_request_array); @@ -117,7 +117,8 @@ int smpi_coll_tuned_bcast_NTSL(void *buf, int count, MPI_Datatype datatype, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { - MPI_Bcast((char *) buf + (pipe_length * increment), remainder, datatype, + XBT_WARN("MPI_bcast_arrival_NTSL use default MPI_bcast."); + smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype, root, comm); } diff --git a/src/smpi/colls/bcast-SMP-binary.c b/src/smpi/colls/bcast-SMP-binary.c index 68ce0fe694..ec717e276b 100644 --- a/src/smpi/colls/bcast-SMP-binary.c +++ b/src/smpi/colls/bcast-SMP-binary.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" #ifndef NUM_CORE #define NUM_CORE 8 #endif @@ -17,10 +17,10 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, int rank, size; int i; MPI_Aint extent; - MPI_Type_extent(datatype, &extent); + extent = smpi_datatype_get_extent(datatype); - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); + rank = smpi_comm_rank(comm); + size = smpi_comm_size(comm); int segment = bcast_SMP_binary_segment_byte / extent; int pipe_length = count / segment; @@ -42,9 +42,9 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, // if root is not zero send to rank zero first if (root != 0) { if (rank == root) - MPI_Send(buf, count, datatype, 0, tag, comm); + smpi_mpi_send(buf, count, datatype, 0, tag, comm); else if (rank == 0) - MPI_Recv(buf, count, datatype, root, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); } // when a message is smaller than a block size => no pipeline if (count <= segment) { @@ -54,52 +54,52 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, if (rank == 0) { //printf("node %d left %d right %d\n",rank,to_inter_left,to_inter_right); if (to_inter_left < size) - MPI_Send(buf, count, datatype, to_inter_left, tag, comm); + smpi_mpi_send(buf, count, datatype, to_inter_left, tag, comm); if (to_inter_right < size) - MPI_Send(buf, count, datatype, to_inter_right, tag, comm); + smpi_mpi_send(buf, count, datatype, to_inter_right, tag, comm); if ((to_intra_left - base) < num_core) - MPI_Send(buf, count, datatype, to_intra_left, tag, comm); + smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm); if ((to_intra_right - base) < num_core) - MPI_Send(buf, count, datatype, to_intra_right, tag, comm); + smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm); } // case LEAVES ROOT-of-eash-SMP else if (to_inter_left >= size) { //printf("node %d from %d\n",rank,from_inter); - MPI_Irecv(buf, count, datatype, from_inter, tag, comm, &request); - MPI_Wait(&request, &status); + request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm); + smpi_mpi_wait(&request, &status); if ((to_intra_left - base) < num_core) - MPI_Send(buf, count, datatype, to_intra_left, tag, comm); + smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm); if ((to_intra_right - base) < num_core) - MPI_Send(buf, count, datatype, to_intra_right, tag, comm); + smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm); } // case INTERMEDIAT ROOT-of-each-SMP else { //printf("node %d left %d right %d from %d\n",rank,to_inter_left,to_inter_right,from_inter); - MPI_Irecv(buf, count, datatype, from_inter, tag, comm, &request); - MPI_Wait(&request, &status); - MPI_Send(buf, count, datatype, to_inter_left, tag, comm); + request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm); + smpi_mpi_wait(&request, &status); + smpi_mpi_send(buf, count, datatype, to_inter_left, tag, comm); if (to_inter_right < size) - MPI_Send(buf, count, datatype, to_inter_right, tag, comm); + smpi_mpi_send(buf, count, datatype, to_inter_right, tag, comm); if ((to_intra_left - base) < num_core) - MPI_Send(buf, count, datatype, to_intra_left, tag, comm); + smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm); if ((to_intra_right - base) < num_core) - MPI_Send(buf, count, datatype, to_intra_right, tag, comm); + smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm); } } // case non ROOT-of-each-SMP else { // case leaves if ((to_intra_left - base) >= num_core) { - MPI_Irecv(buf, count, datatype, from_intra, tag, comm, &request); - MPI_Wait(&request, &status); + request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm); + smpi_mpi_wait(&request, &status); } // case intermediate else { - MPI_Irecv(buf, count, datatype, from_intra, tag, comm, &request); - MPI_Wait(&request, &status); - MPI_Send(buf, count, datatype, to_intra_left, tag, comm); + request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm); + smpi_mpi_wait(&request, &status); + smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm); if ((to_intra_right - base) < num_core) - MPI_Send(buf, count, datatype, to_intra_right, tag, comm); + smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm); } } @@ -109,9 +109,9 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, // pipeline bcast else { request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); // case ROOT-of-each-SMP if (rank % NUM_CORE == 0) { @@ -120,16 +120,16 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, for (i = 0; i < pipe_length; i++) { //printf("node %d left %d right %d\n",rank,to_inter_left,to_inter_right); if (to_inter_left < size) - MPI_Send((char *) buf + (i * increment), segment, datatype, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_inter_left, (tag + i), comm); if (to_inter_right < size) - MPI_Send((char *) buf + (i * increment), segment, datatype, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_inter_right, (tag + i), comm); if ((to_intra_left - base) < num_core) - MPI_Send((char *) buf + (i * increment), segment, datatype, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra_left, (tag + i), comm); if ((to_intra_right - base) < num_core) - MPI_Send((char *) buf + (i * increment), segment, datatype, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra_right, (tag + i), comm); } } @@ -137,16 +137,16 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, else if (to_inter_left >= size) { //printf("node %d from %d\n",rank,from_inter); for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) buf + (i * increment), segment, datatype, - from_inter, (tag + i), comm, &request_array[i]); + request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + from_inter, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - MPI_Wait(&request_array[i], &status); + smpi_mpi_wait(&request_array[i], &status); if ((to_intra_left - base) < num_core) - MPI_Send((char *) buf + (i * increment), segment, datatype, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra_left, (tag + i), comm); if ((to_intra_right - base) < num_core) - MPI_Send((char *) buf + (i * increment), segment, datatype, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra_right, (tag + i), comm); } } @@ -154,21 +154,21 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, else { //printf("node %d left %d right %d from %d\n",rank,to_inter_left,to_inter_right,from_inter); for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) buf + (i * increment), segment, datatype, - from_inter, (tag + i), comm, &request_array[i]); + request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + from_inter, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - MPI_Wait(&request_array[i], &status); - MPI_Send((char *) buf + (i * increment), segment, datatype, + smpi_mpi_wait(&request_array[i], &status); + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_inter_left, (tag + i), comm); if (to_inter_right < size) - MPI_Send((char *) buf + (i * increment), segment, datatype, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_inter_right, (tag + i), comm); if ((to_intra_left - base) < num_core) - MPI_Send((char *) buf + (i * increment), segment, datatype, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra_left, (tag + i), comm); if ((to_intra_right - base) < num_core) - MPI_Send((char *) buf + (i * increment), segment, datatype, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra_right, (tag + i), comm); } } @@ -178,23 +178,23 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, // case leaves if ((to_intra_left - base) >= num_core) { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) buf + (i * increment), segment, datatype, - from_intra, (tag + i), comm, &request_array[i]); + request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + from_intra, (tag + i), comm); } - MPI_Waitall((pipe_length), request_array, status_array); + smpi_mpi_waitall((pipe_length), request_array, status_array); } // case intermediate else { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) buf + (i * increment), segment, datatype, - from_intra, (tag + i), comm, &request_array[i]); + request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + from_intra, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - MPI_Wait(&request_array[i], &status); - MPI_Send((char *) buf + (i * increment), segment, datatype, + smpi_mpi_wait(&request_array[i], &status); + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra_left, (tag + i), comm); if ((to_intra_right - base) < num_core) - MPI_Send((char *) buf + (i * increment), segment, datatype, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra_right, (tag + i), comm); } } @@ -206,7 +206,8 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, // when count is not divisible by block size, use default BCAST for the remainder if ((remainder != 0) && (count > segment)) { - MPI_Bcast((char *) buf + (pipe_length * increment), remainder, datatype, + XBT_WARN("MPI_bcast_SMP_binary use default MPI_bcast."); + smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype, root, comm); } diff --git a/src/smpi/colls/bcast-SMP-binomial.c b/src/smpi/colls/bcast-SMP-binomial.c index 0e26eef54b..99193022b1 100644 --- a/src/smpi/colls/bcast-SMP-binomial.c +++ b/src/smpi/colls/bcast-SMP-binomial.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" #ifndef NUM_CORE #define NUM_CORE 8 #endif @@ -13,8 +13,8 @@ int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count, MPI_Status status; int tag = 50; - MPI_Comm_size(comm, &size); - MPI_Comm_rank(comm, &rank); + size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); int to_intra, to_inter; int from_intra, from_inter; @@ -28,9 +28,9 @@ int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count, // if root is not zero send to rank zero first if (root != 0) { if (rank == root) - MPI_Send(buf, count, datatype, 0, tag, comm); + smpi_mpi_send(buf, count, datatype, 0, tag, comm); else if (rank == 0) - MPI_Recv(buf, count, datatype, root, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); } //FIRST STEP node 0 send to every root-of-each-SMP with binomial tree @@ -42,7 +42,7 @@ int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count, if (inter_rank & mask) { from_inter = (inter_rank - mask) * NUM_CORE; //printf("Node %d recv from node %d when mask is %d\n", rank, from_inter, mask); - MPI_Recv(buf, count, datatype, from_inter, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, from_inter, tag, comm, &status); break; } mask <<= 1; @@ -56,7 +56,7 @@ int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count, to_inter = (inter_rank + mask) * NUM_CORE; if (to_inter < size) { //printf("Node %d send to node %d when mask is %d\n", rank, to_inter, mask); - MPI_Send(buf, count, datatype, to_inter, tag, comm); + smpi_mpi_send(buf, count, datatype, to_inter, tag, comm); } } mask >>= 1; @@ -70,7 +70,7 @@ int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count, if (intra_rank & mask) { from_intra = base + (intra_rank - mask); //printf("Node %d recv from node %d when mask is %d\n", rank, from_inter, mask); - MPI_Recv(buf, count, datatype, from_intra, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, from_intra, tag, comm, &status); break; } mask <<= 1; @@ -85,7 +85,7 @@ int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count, to_intra = base + (intra_rank + mask); if (to_intra < size) { //printf("Node %d send to node %d when mask is %d\n", rank, to_inter, mask); - MPI_Send(buf, count, datatype, to_intra, tag, comm); + smpi_mpi_send(buf, count, datatype, to_intra, tag, comm); } } mask >>= 1; diff --git a/src/smpi/colls/bcast-SMP-linear.c b/src/smpi/colls/bcast-SMP-linear.c index 9320464dd4..673f6cf392 100644 --- a/src/smpi/colls/bcast-SMP-linear.c +++ b/src/smpi/colls/bcast-SMP-linear.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" #ifndef NUM_CORE #define NUM_CORE 8 #endif @@ -17,10 +17,10 @@ int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count, int rank, size; int i; MPI_Aint extent; - MPI_Type_extent(datatype, &extent); + extent = smpi_datatype_get_extent(datatype); - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); + rank = smpi_comm_rank(comm); + size = smpi_comm_size(comm); int segment = bcast_SMP_linear_segment_byte / extent; int pipe_length = count / segment; @@ -37,111 +37,113 @@ int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count, // call native when MPI communication size is too small if (size <= NUM_CORE) { - return MPI_Bcast(buf, count, datatype, root, comm); + XBT_WARN("MPI_bcast_SMP_linear use default MPI_bcast."); + smpi_mpi_bcast(buf, count, datatype, root, comm); + return MPI_SUCCESS; } // if root is not zero send to rank zero first if (root != 0) { if (rank == root) - MPI_Send(buf, count, datatype, 0, tag, comm); + smpi_mpi_send(buf, count, datatype, 0, tag, comm); else if (rank == 0) - MPI_Recv(buf, count, datatype, root, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); } // when a message is smaller than a block size => no pipeline if (count <= segment) { // case ROOT if (rank == 0) { - MPI_Send(buf, count, datatype, to_inter, tag, comm); - MPI_Send(buf, count, datatype, to_intra, tag, comm); + smpi_mpi_send(buf, count, datatype, to_inter, tag, comm); + smpi_mpi_send(buf, count, datatype, to_intra, tag, comm); } // case last ROOT of each SMP else if (rank == (((size - 1) / NUM_CORE) * NUM_CORE)) { - MPI_Irecv(buf, count, datatype, from_inter, tag, comm, &request); - MPI_Wait(&request, &status); - MPI_Send(buf, count, datatype, to_intra, tag, comm); + request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm); + smpi_mpi_wait(&request, &status); + smpi_mpi_send(buf, count, datatype, to_intra, tag, comm); } // case intermediate ROOT of each SMP else if (rank % NUM_CORE == 0) { - MPI_Irecv(buf, count, datatype, from_inter, tag, comm, &request); - MPI_Wait(&request, &status); - MPI_Send(buf, count, datatype, to_inter, tag, comm); - MPI_Send(buf, count, datatype, to_intra, tag, comm); + request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm); + smpi_mpi_wait(&request, &status); + smpi_mpi_send(buf, count, datatype, to_inter, tag, comm); + smpi_mpi_send(buf, count, datatype, to_intra, tag, comm); } // case last non-ROOT of each SMP else if (((rank + 1) % NUM_CORE == 0) || (rank == (size - 1))) { - MPI_Irecv(buf, count, datatype, from_intra, tag, comm, &request); - MPI_Wait(&request, &status); + request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm); + smpi_mpi_wait(&request, &status); } // case intermediate non-ROOT of each SMP else { - MPI_Irecv(buf, count, datatype, from_intra, tag, comm, &request); - MPI_Wait(&request, &status); - MPI_Send(buf, count, datatype, to_intra, tag, comm); + request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm); + smpi_mpi_wait(&request, &status); + smpi_mpi_send(buf, count, datatype, to_intra, tag, comm); } return MPI_SUCCESS; } // pipeline bcast else { request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); // case ROOT of each SMP if (rank % NUM_CORE == 0) { // case real root if (rank == 0) { for (i = 0; i < pipe_length; i++) { - MPI_Send((char *) buf + (i * increment), segment, datatype, to_inter, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_inter, (tag + i), comm); - MPI_Send((char *) buf + (i * increment), segment, datatype, to_intra, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra, (tag + i), comm); } } // case last ROOT of each SMP else if (rank == (((size - 1) / NUM_CORE) * NUM_CORE)) { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) buf + (i * increment), segment, datatype, - from_inter, (tag + i), comm, &request_array[i]); + request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + from_inter, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - MPI_Wait(&request_array[i], &status); - MPI_Send((char *) buf + (i * increment), segment, datatype, to_intra, + smpi_mpi_wait(&request_array[i], &status); + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra, (tag + i), comm); } } // case intermediate ROOT of each SMP else { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) buf + (i * increment), segment, datatype, - from_inter, (tag + i), comm, &request_array[i]); + request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + from_inter, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - MPI_Wait(&request_array[i], &status); - MPI_Send((char *) buf + (i * increment), segment, datatype, to_inter, + smpi_mpi_wait(&request_array[i], &status); + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_inter, (tag + i), comm); - MPI_Send((char *) buf + (i * increment), segment, datatype, to_intra, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra, (tag + i), comm); } } } else { // case last non-ROOT of each SMP if (((rank + 1) % NUM_CORE == 0) || (rank == (size - 1))) { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) buf + (i * increment), segment, datatype, - from_intra, (tag + i), comm, &request_array[i]); + request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + from_intra, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - MPI_Wait(&request_array[i], &status); + smpi_mpi_wait(&request_array[i], &status); } } // case intermediate non-ROOT of each SMP else { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) buf + (i * increment), segment, datatype, - from_intra, (tag + i), comm, &request_array[i]); + request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + from_intra, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - MPI_Wait(&request_array[i], &status); - MPI_Send((char *) buf + (i * increment), segment, datatype, to_intra, + smpi_mpi_wait(&request_array[i], &status); + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra, (tag + i), comm); } } @@ -152,9 +154,10 @@ int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count, // when count is not divisible by block size, use default BCAST for the remainder if ((remainder != 0) && (count > segment)) { - MPI_Bcast((char *) buf + (pipe_length * increment), remainder, datatype, + XBT_WARN("MPI_bcast_SMP_linear use default MPI_bcast."); + smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype, root, comm); } - return 1; + return MPI_SUCCESS; } diff --git a/src/smpi/colls/bcast-TSB.c b/src/smpi/colls/bcast-TSB.c index d701522cee..650acc30b2 100644 --- a/src/smpi/colls/bcast-TSB.c +++ b/src/smpi/colls/bcast-TSB.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" int binary_pipeline_bcast_tree_height = 10; int binary_pipeline_bcast_send_to[2][128] = { @@ -44,10 +44,10 @@ int smpi_coll_tuned_bcast_TSB(void *buf, int count, MPI_Datatype datatype, int i; MPI_Aint extent; - MPI_Type_extent(datatype, &extent); + extent = smpi_datatype_get_extent(datatype); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); + rank = smpi_comm_rank(MPI_COMM_WORLD); + size = smpi_comm_size(MPI_COMM_WORLD); /* source node and destination nodes (same through out the functions) */ int to_left = binary_pipeline_bcast_send_to[0][rank]; @@ -70,9 +70,9 @@ int smpi_coll_tuned_bcast_TSB(void *buf, int count, MPI_Datatype datatype, /* if root is not zero send to rank zero first */ if (root != 0) { if (rank == root) { - MPI_Send(buf, count, datatype, 0, tag, comm); + smpi_mpi_send(buf, count, datatype, 0, tag, comm); } else if (rank == 0) { - MPI_Recv(buf, count, datatype, root, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); } } @@ -83,31 +83,31 @@ int smpi_coll_tuned_bcast_TSB(void *buf, int count, MPI_Datatype datatype, if (rank == 0) { /* case root has only a left child */ if (to_right == -1) { - MPI_Send(buf, count, datatype, to_left, tag, comm); + smpi_mpi_send(buf, count, datatype, to_left, tag, comm); } /* case root has both left and right children */ else { - MPI_Send(buf, count, datatype, to_left, tag, comm); - MPI_Send(buf, count, datatype, to_right, tag, comm); + smpi_mpi_send(buf, count, datatype, to_left, tag, comm); + smpi_mpi_send(buf, count, datatype, to_right, tag, comm); } } /* case: leaf ==> receive only */ else if (to_left == -1) { - MPI_Recv(buf, count, datatype, from, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, from, tag, comm, &status); } /* case: intermidiate node with only left child ==> relay message */ else if (to_right == -1) { - MPI_Recv(buf, count, datatype, from, tag, comm, &status); - MPI_Send(buf, count, datatype, to_left, tag, comm); + smpi_mpi_recv(buf, count, datatype, from, tag, comm, &status); + smpi_mpi_send(buf, count, datatype, to_left, tag, comm); } /* case: intermidiate node with both left and right children ==> relay message */ else { - MPI_Recv(buf, count, datatype, from, tag, comm, &status); - MPI_Send(buf, count, datatype, to_left, tag, comm); - MPI_Send(buf, count, datatype, to_right, tag, comm); + smpi_mpi_recv(buf, count, datatype, from, tag, comm, &status); + smpi_mpi_send(buf, count, datatype, to_left, tag, comm); + smpi_mpi_send(buf, count, datatype, to_right, tag, comm); } return MPI_SUCCESS; } @@ -119,16 +119,16 @@ int smpi_coll_tuned_bcast_TSB(void *buf, int count, MPI_Datatype datatype, /* case root has only a left child */ if (to_right == -1) { for (i = 0; i < pipe_length; i++) { - MPI_Send((char *) buf + (i * increment), segment, datatype, to_left, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_left, tag + i, comm); } } /* case root has both left and right children */ else { for (i = 0; i < pipe_length; i++) { - MPI_Send((char *) buf + (i * increment), segment, datatype, to_left, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_left, tag + i, comm); - MPI_Send((char *) buf + (i * increment), segment, datatype, to_right, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_right, tag + i, comm); } } @@ -137,7 +137,7 @@ int smpi_coll_tuned_bcast_TSB(void *buf, int count, MPI_Datatype datatype, /* case: leaf ==> receive only */ else if (to_left == -1) { for (i = 0; i < pipe_length; i++) { - MPI_Recv((char *) buf + (i * increment), segment, datatype, from, + smpi_mpi_recv((char *) buf + (i * increment), segment, datatype, from, tag + i, comm, &status); } } @@ -145,20 +145,20 @@ int smpi_coll_tuned_bcast_TSB(void *buf, int count, MPI_Datatype datatype, /* case: intermidiate node with only left child ==> relay message */ else if (to_right == -1) { for (i = 0; i < pipe_length; i++) { - MPI_Recv((char *) buf + (i * increment), segment, datatype, from, + smpi_mpi_recv((char *) buf + (i * increment), segment, datatype, from, tag + i, comm, &status); - MPI_Send((char *) buf + (i * increment), segment, datatype, to_left, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_left, tag + i, comm); } } /* case: intermidiate node with both left and right children ==> relay message */ else { for (i = 0; i < pipe_length; i++) { - MPI_Recv((char *) buf + (i * increment), segment, datatype, from, + smpi_mpi_recv((char *) buf + (i * increment), segment, datatype, from, tag + i, comm, &status); - MPI_Send((char *) buf + (i * increment), segment, datatype, to_left, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_left, tag + i, comm); - MPI_Send((char *) buf + (i * increment), segment, datatype, to_right, + smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_right, tag + i, comm); } } @@ -166,7 +166,8 @@ int smpi_coll_tuned_bcast_TSB(void *buf, int count, MPI_Datatype datatype, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { - MPI_Bcast((char *) buf + (pipe_length * increment), remainder, datatype, + XBT_WARN("MPI_bcast_TSB use default MPI_bcast."); + smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype, root, comm); } diff --git a/src/smpi/colls/bcast-arrival-nb.c b/src/smpi/colls/bcast-arrival-nb.c index a4246c4656..9ff27b4607 100644 --- a/src/smpi/colls/bcast-arrival-nb.c +++ b/src/smpi/colls/bcast-arrival-nb.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" static int bcast_NTSL_segment_size_in_byte = 8192; @@ -32,15 +32,15 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, char temp_buf[MAX_NODE]; MPI_Aint extent; - MPI_Type_extent(datatype, &extent); + extent = smpi_datatype_get_extent(datatype); /* destination */ int to; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); + rank = smpi_comm_rank(MPI_COMM_WORLD); + size = smpi_comm_size(MPI_COMM_WORLD); /* segment is segment size in number of elements (not bytes) */ @@ -61,9 +61,9 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, */ if (root != 0) { if (rank == root) { - MPI_Send(buf, count, datatype, 0, tag, comm); + smpi_mpi_send(buf, count, datatype, 0, tag, comm); } else if (rank == 0) { - MPI_Recv(buf, count, datatype, root, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); } } @@ -83,7 +83,7 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, // for (j=0;j<1000;j++) { for (i = 1; i < size; i++) { if (already_sent[i] == 0) - MPI_Iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i], + smpi_mpi_iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i], MPI_STATUSES_IGNORE); } //} @@ -94,7 +94,7 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, /* message arrive */ if ((flag_array[i] == 1) && (already_sent[i] == 0)) { - MPI_Recv(temp_buf, 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); + smpi_mpi_recv(temp_buf, 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); header_buf[header_index] = i; header_index++; sent_count++; @@ -108,8 +108,8 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, if (header_index != 0) { header_buf[header_index] = -1; to = header_buf[0]; - MPI_Send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); - MPI_Send(buf, count, datatype, to, tag, comm); + smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); + smpi_mpi_send(buf, count, datatype, to, tag, comm); } /* randomly MPI_Send to one */ @@ -119,8 +119,8 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, if (already_sent[i] == 0) { header_buf[0] = i; header_buf[1] = -1; - MPI_Send(header_buf, HEADER_SIZE, MPI_INT, i, tag, comm); - MPI_Send(buf, count, datatype, i, tag, comm); + smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, i, tag, comm); + smpi_mpi_send(buf, count, datatype, i, tag, comm); already_sent[i] = 1; sent_count++; break; @@ -136,12 +136,12 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, else { /* send 1-byte message to root */ - MPI_Send(temp_buf, 1, MPI_CHAR, 0, tag, comm); + smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm); /* wait for header and data, forward when required */ - MPI_Recv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm, + smpi_mpi_recv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm, &status); - MPI_Recv(buf, count, datatype, MPI_ANY_SOURCE, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, MPI_ANY_SOURCE, tag, comm, &status); /* search for where it is */ int myordering = 0; @@ -151,22 +151,22 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, /* send header followed by data */ if (header_buf[myordering + 1] != -1) { - MPI_Send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1], + smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1], tag, comm); - MPI_Send(buf, count, datatype, header_buf[myordering + 1], tag, comm); + smpi_mpi_send(buf, count, datatype, header_buf[myordering + 1], tag, comm); } } } /* pipeline bcast */ else { send_request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); recv_request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); send_status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); recv_status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); if (rank == 0) { sent_count = 0; @@ -183,11 +183,11 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, for (k = 0; k < 3; k++) { for (i = 1; i < size; i++) { if ((already_sent[i] == 0) && (will_send[i] == 0)) { - MPI_Iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i], + smpi_mpi_iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i], &temp_status_array[i]); if (flag_array[i] == 1) { will_send[i] = 1; - MPI_Recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, + smpi_mpi_recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); i = 1; } @@ -238,7 +238,7 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, //start = MPI_Wtime(); /* send header */ - MPI_Send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); + smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); //total = MPI_Wtime() - start; //total *= 1000; @@ -250,16 +250,16 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, if (0 == 1) { //if (header_index == 1) { - MPI_Send(buf, count, datatype, to, tag, comm); + smpi_mpi_send(buf, count, datatype, to, tag, comm); } /* send data - pipeline */ else { for (i = 0; i < pipe_length; i++) { - MPI_Send((char *)buf + (i * increment), segment, datatype, to, tag, comm); + smpi_mpi_send((char *)buf + (i * increment), segment, datatype, to, tag, comm); } - //MPI_Waitall((pipe_length), send_request_array, send_status_array); + //smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); } //total = MPI_Wtime() - start; //total *= 1000; @@ -279,16 +279,16 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, to = i; //start = MPI_Wtime(); - MPI_Send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); + smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); /* still need to chop data so that we can use the same non-root code */ for (j = 0; j < pipe_length; j++) { - MPI_Send((char *)buf + (j * increment), segment, datatype, to, tag, + smpi_mpi_send((char *)buf + (j * increment), segment, datatype, to, tag, comm); } - //MPI_Send(buf,count,datatype,to,tag,comm); - //MPI_Wait(&request,MPI_STATUS_IGNORE); + //smpi_mpi_send(buf,count,datatype,to,tag,comm); + //smpi_mpi_wait(&request,MPI_STATUS_IGNORE); //total = MPI_Wtime() - start; //total *= 1000; @@ -310,10 +310,10 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, /* probe before exit in case there are messages to recv */ for (i = 1; i < size; i++) { - MPI_Iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i], + smpi_mpi_iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i], &temp_status_array[i]); if (flag_array[i] == 1) - MPI_Recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); + smpi_mpi_recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); } } @@ -322,16 +322,15 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, else { /* if root already send a message to this node, don't send one-byte message */ - MPI_Iprobe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[0], &status); + smpi_mpi_iprobe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[0], &status); /* send 1-byte message to root */ if (flag_array[0] == 0) - MPI_Send(temp_buf, 1, MPI_CHAR, 0, tag, comm); + smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm); /* wait for header forward when required */ - MPI_Irecv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm, - &request); - MPI_Wait(&request, MPI_STATUS_IGNORE); + request = smpi_mpi_irecv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm); + smpi_mpi_wait(&request, MPI_STATUS_IGNORE); /* search for where it is */ int myordering = 0; @@ -341,7 +340,7 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, /* send header when required */ if (header_buf[myordering + 1] != -1) { - MPI_Send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1], + smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1], tag, comm); } @@ -349,25 +348,24 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, if (0 == -1) { //if (header_buf[1] == -1) { - MPI_Irecv(buf, count, datatype, 0, tag, comm, &request); - MPI_Wait(&request, MPI_STATUS_IGNORE); + request = smpi_mpi_irecv(buf, count, datatype, 0, tag, comm); + smpi_mpi_wait(&request, MPI_STATUS_IGNORE); //printf("\t\tnode %d ordering = %d receive data from root\n",rank,myordering); } else { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *)buf + (i * increment), segment, datatype, MPI_ANY_SOURCE, - tag, comm, &recv_request_array[i]); + recv_request_array[i] = smpi_mpi_irecv((char *)buf + (i * increment), segment, datatype, MPI_ANY_SOURCE, + tag, comm); } } /* send data */ if (header_buf[myordering + 1] != -1) { for (i = 0; i < pipe_length; i++) { - MPI_Wait(&recv_request_array[i], MPI_STATUS_IGNORE); - MPI_Isend((char *)buf + (i * increment), segment, datatype, - header_buf[myordering + 1], tag, comm, - &send_request_array[i]); + smpi_mpi_wait(&recv_request_array[i], MPI_STATUS_IGNORE); + send_request_array[i] = smpi_mpi_isend((char *)buf + (i * increment), segment, datatype, + header_buf[myordering + 1], tag, comm); } - MPI_Waitall((pipe_length), send_request_array, send_status_array); + smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); } } @@ -380,7 +378,8 @@ int smpi_coll_tuned_bcast_arrival_nb(void *buf, int count, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { - MPI_Bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm); + XBT_WARN("MPI_bcast_arrival_nb use default MPI_bcast."); + smpi_mpi_bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm); } return MPI_SUCCESS; diff --git a/src/smpi/colls/bcast-arrival-pattern-aware-wait.c b/src/smpi/colls/bcast-arrival-pattern-aware-wait.c index 7a91dd821f..0a1224608f 100644 --- a/src/smpi/colls/bcast-arrival-pattern-aware-wait.c +++ b/src/smpi/colls/bcast-arrival-pattern-aware-wait.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" int bcast_arrival_pattern_aware_wait_segment_size_in_byte = 8192; @@ -42,15 +42,15 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, int header_size = BCAST_ARRIVAL_PATTERN_AWARE_HEADER_SIZE; MPI_Aint extent; - MPI_Type_extent(datatype, &extent); + extent = smpi_datatype_get_extent(datatype); /* source and destination */ int to, from; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); + rank = smpi_comm_rank(MPI_COMM_WORLD); + size = smpi_comm_size(MPI_COMM_WORLD); /* segment is segment size in number of elements (not bytes) */ @@ -71,9 +71,9 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, */ if (root != 0) { if (rank == root) { - MPI_Send(buf, count, datatype, 0, tag, comm); + smpi_mpi_send(buf, count, datatype, 0, tag, comm); } else if (rank == 0) { - MPI_Recv(buf, count, datatype, root, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); } } @@ -92,13 +92,13 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, /* start pipeline bcast */ send_request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); recv_request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); send_status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); recv_status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); /* root */ if (rank == 0) { @@ -114,11 +114,11 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, for (k = 0; k < 3; k++) { for (i = 1; i < size; i++) { if ((already_sent[i] == 0) && (will_send[i] == 0)) { - MPI_Iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i], + smpi_mpi_iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i], &temp_status_array[i]); if (flag_array[i] == 1) { will_send[i] = 1; - MPI_Recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, + smpi_mpi_recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); i = 0; } @@ -147,14 +147,13 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, to = header_buf[0]; /* send header */ - MPI_Send(header_buf, header_size, MPI_INT, to, tag, comm); + smpi_mpi_send(header_buf, header_size, MPI_INT, to, tag, comm); /* send data - pipeline */ for (i = 0; i < pipe_length; i++) { - MPI_Isend((char *)buf + (i * increment), segment, datatype, to, tag, comm, - &send_request_array[i]); + send_request_array[i] = smpi_mpi_isend((char *)buf + (i * increment), segment, datatype, to, tag, comm); } - MPI_Waitall((pipe_length), send_request_array, send_status_array); + smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); } @@ -171,11 +170,11 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, header_buf[1] = -1; to = i; - MPI_Send(header_buf, header_size, MPI_INT, to, tag, comm); + smpi_mpi_send(header_buf, header_size, MPI_INT, to, tag, comm); /* still need to chop data so that we can use the same non-root code */ for (j = 0; j < pipe_length; j++) { - MPI_Send((char *)buf + (j * increment), segment, datatype, to, tag, comm); + smpi_mpi_send((char *)buf + (j * increment), segment, datatype, to, tag, comm); } } } @@ -188,12 +187,11 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, else { /* send 1-byte message to root */ - MPI_Send(temp_buf, 1, MPI_CHAR, 0, tag, comm); + smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm); /* wait for header forward when required */ - MPI_Irecv(header_buf, header_size, MPI_INT, MPI_ANY_SOURCE, tag, comm, - &request); - MPI_Wait(&request, MPI_STATUS_IGNORE); + request = smpi_mpi_irecv(header_buf, header_size, MPI_INT, MPI_ANY_SOURCE, tag, comm); + smpi_mpi_wait(&request, MPI_STATUS_IGNORE); /* search for where it is */ int myordering = 0; @@ -210,29 +208,27 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, /* send header when required */ if (to != -1) { - MPI_Send(header_buf, header_size, MPI_INT, to, tag, comm); + smpi_mpi_send(header_buf, header_size, MPI_INT, to, tag, comm); } /* receive data */ for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *)buf + (i * increment), segment, datatype, from, tag, comm, - &recv_request_array[i]); + recv_request_array[i] = smpi_mpi_irecv((char *)buf + (i * increment), segment, datatype, from, tag, comm); } /* forward data */ if (to != -1) { for (i = 0; i < pipe_length; i++) { - MPI_Wait(&recv_request_array[i], MPI_STATUS_IGNORE); - MPI_Isend((char *)buf + (i * increment), segment, datatype, to, tag, comm, - &send_request_array[i]); + smpi_mpi_wait(&recv_request_array[i], MPI_STATUS_IGNORE); + send_request_array[i] = smpi_mpi_isend((char *)buf + (i * increment), segment, datatype, to, tag, comm); } - MPI_Waitall((pipe_length), send_request_array, send_status_array); + smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); } /* recv only */ else { - MPI_Waitall((pipe_length), recv_request_array, recv_status_array); + smpi_mpi_waitall((pipe_length), recv_request_array, recv_status_array); } } @@ -244,7 +240,8 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { - MPI_Bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm); + XBT_WARN("MPI_bcast_arrival_pattern_aware_wait use default MPI_bcast."); + smpi_mpi_bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm); } return MPI_SUCCESS; diff --git a/src/smpi/colls/bcast-arrival-pattern-aware.c b/src/smpi/colls/bcast-arrival-pattern-aware.c index 9c84a49271..f4a482cab3 100644 --- a/src/smpi/colls/bcast-arrival-pattern-aware.c +++ b/src/smpi/colls/bcast-arrival-pattern-aware.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" static int bcast_NTSL_segment_size_in_byte = 8192; @@ -32,15 +32,15 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, char temp_buf[MAX_NODE]; MPI_Aint extent; - MPI_Type_extent(datatype, &extent); + extent = smpi_datatype_get_extent(datatype); /* destination */ int to; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); + rank = smpi_comm_rank(MPI_COMM_WORLD); + size = smpi_comm_size(MPI_COMM_WORLD); /* segment is segment size in number of elements (not bytes) */ @@ -61,9 +61,9 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, */ if (root != 0) { if (rank == root) { - MPI_Send(buf, count, datatype, 0, tag, comm); + smpi_mpi_send(buf, count, datatype, 0, tag, comm); } else if (rank == 0) { - MPI_Recv(buf, count, datatype, root, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); } } @@ -79,7 +79,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, while (sent_count < (size - 1)) { for (i = 1; i < size; i++) { - MPI_Iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i], + smpi_mpi_iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i], MPI_STATUSES_IGNORE); } @@ -89,7 +89,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, /* message arrive */ if ((flag_array[i] == 1) && (already_sent[i] == 0)) { - MPI_Recv(temp_buf, 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); + smpi_mpi_recv(temp_buf, 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); header_buf[header_index] = i; header_index++; sent_count++; @@ -103,8 +103,8 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, if (header_index != 0) { header_buf[header_index] = -1; to = header_buf[0]; - MPI_Send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); - MPI_Send(buf, count, datatype, to, tag, comm); + smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); + smpi_mpi_send(buf, count, datatype, to, tag, comm); } /* randomly MPI_Send to one */ @@ -114,8 +114,8 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, if (already_sent[i] == 0) { header_buf[0] = i; header_buf[1] = -1; - MPI_Send(header_buf, HEADER_SIZE, MPI_INT, i, tag, comm); - MPI_Send(buf, count, datatype, i, tag, comm); + smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, i, tag, comm); + smpi_mpi_send(buf, count, datatype, i, tag, comm); already_sent[i] = 1; sent_count++; break; @@ -131,12 +131,12 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, else { /* send 1-byte message to root */ - MPI_Send(temp_buf, 1, MPI_CHAR, 0, tag, comm); + smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm); /* wait for header and data, forward when required */ - MPI_Recv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm, + smpi_mpi_recv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm, &status); - MPI_Recv(buf, count, datatype, MPI_ANY_SOURCE, tag, comm, &status); + smpi_mpi_recv(buf, count, datatype, MPI_ANY_SOURCE, tag, comm, &status); /* search for where it is */ int myordering = 0; @@ -146,22 +146,22 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, /* send header followed by data */ if (header_buf[myordering + 1] != -1) { - MPI_Send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1], + smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1], tag, comm); - MPI_Send(buf, count, datatype, header_buf[myordering + 1], tag, comm); + smpi_mpi_send(buf, count, datatype, header_buf[myordering + 1], tag, comm); } } } /* pipeline bcast */ else { send_request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); recv_request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); send_status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); recv_status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); if (rank == 0) { //double start2 = MPI_Wtime(); @@ -171,7 +171,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, //iteration++; //start = MPI_Wtime(); for (i = 1; i < size; i++) { - MPI_Iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i], + smpi_mpi_iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i], &temp_status_array[i]); } //total = MPI_Wtime() - start; @@ -184,7 +184,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, for (i = 1; i < size; i++) { /* message arrive */ if ((flag_array[i] == 1) && (already_sent[i] == 0)) { - MPI_Recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, + smpi_mpi_recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); header_buf[header_index] = i; header_index++; @@ -216,7 +216,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, //start = MPI_Wtime(); /* send header */ - MPI_Send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); + smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); //total = MPI_Wtime() - start; //total *= 1000; @@ -228,16 +228,16 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, if (0 == 1) { //if (header_index == 1) { - MPI_Send(buf, count, datatype, to, tag, comm); + smpi_mpi_send(buf, count, datatype, to, tag, comm); } /* send data - pipeline */ else { for (i = 0; i < pipe_length; i++) { - MPI_Send((char *)buf + (i * increment), segment, datatype, to, tag, comm); + smpi_mpi_send((char *)buf + (i * increment), segment, datatype, to, tag, comm); } - //MPI_Waitall((pipe_length), send_request_array, send_status_array); + //smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); } //total = MPI_Wtime() - start; //total *= 1000; @@ -257,16 +257,16 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, to = i; //start = MPI_Wtime(); - MPI_Send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); + smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); /* still need to chop data so that we can use the same non-root code */ for (j = 0; j < pipe_length; j++) { - MPI_Send((char *)buf + (j * increment), segment, datatype, to, tag, + smpi_mpi_send((char *)buf + (j * increment), segment, datatype, to, tag, comm); } - //MPI_Send(buf,count,datatype,to,tag,comm); - //MPI_Wait(&request,MPI_STATUS_IGNORE); + //smpi_mpi_send(buf,count,datatype,to,tag,comm); + //smpi_mpi_wait(&request,MPI_STATUS_IGNORE); //total = MPI_Wtime() - start; //total *= 1000; @@ -291,12 +291,11 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, /* none root */ else { /* send 1-byte message to root */ - MPI_Send(temp_buf, 1, MPI_CHAR, 0, tag, comm); + smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm); /* wait for header forward when required */ - MPI_Irecv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm, - &request); - MPI_Wait(&request, MPI_STATUS_IGNORE); + request = smpi_mpi_irecv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm); + smpi_mpi_wait(&request, MPI_STATUS_IGNORE); /* search for where it is */ int myordering = 0; @@ -306,7 +305,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, /* send header when required */ if (header_buf[myordering + 1] != -1) { - MPI_Send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1], + smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1], tag, comm); } @@ -314,25 +313,24 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, if (0 == -1) { //if (header_buf[1] == -1) { - MPI_Irecv(buf, count, datatype, 0, tag, comm, &request); - MPI_Wait(&request, MPI_STATUS_IGNORE); + request = smpi_mpi_irecv(buf, count, datatype, 0, tag, comm); + smpi_mpi_wait(&request, MPI_STATUS_IGNORE); //printf("\t\tnode %d ordering = %d receive data from root\n",rank,myordering); } else { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *)buf + (i * increment), segment, datatype, MPI_ANY_SOURCE, - tag, comm, &recv_request_array[i]); + recv_request_array[i] = smpi_mpi_irecv((char *)buf + (i * increment), segment, datatype, MPI_ANY_SOURCE, + tag, comm); } } /* send data */ if (header_buf[myordering + 1] != -1) { for (i = 0; i < pipe_length; i++) { - MPI_Wait(&recv_request_array[i], MPI_STATUS_IGNORE); - MPI_Isend((char *)buf + (i * increment), segment, datatype, - header_buf[myordering + 1], tag, comm, - &send_request_array[i]); + smpi_mpi_wait(&recv_request_array[i], MPI_STATUS_IGNORE); + send_request_array[i] = smpi_mpi_isend((char *)buf + (i * increment), segment, datatype, + header_buf[myordering + 1], tag, comm); } - MPI_Waitall((pipe_length), send_request_array, send_status_array); + smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); } } @@ -345,7 +343,8 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { - MPI_Bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm); + XBT_WARN("MPI_bcast_arrival_pattern_aware use default MPI_bcast."); + smpi_mpi_bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm); } return MPI_SUCCESS; diff --git a/src/smpi/colls/bcast-arrival-scatter.c b/src/smpi/colls/bcast-arrival-scatter.c index 5c1df67440..6ad805d6e6 100644 --- a/src/smpi/colls/bcast-arrival-scatter.c +++ b/src/smpi/colls/bcast-arrival-scatter.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" #ifndef BCAST_ARRIVAL_PATTERN_AWARE_HEADER_SIZE #define BCAST_ARRIVAL_PATTERN_AWARE_HEADER_SIZE 128 @@ -41,18 +41,20 @@ int smpi_coll_tuned_bcast_arrival_scatter(void *buf, int count, int header_size = BCAST_ARRIVAL_PATTERN_AWARE_HEADER_SIZE; MPI_Aint extent; - MPI_Type_extent(datatype, &extent); + extent = smpi_datatype_get_extent(datatype); /* source and destination */ int to, from; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); + rank = smpi_comm_rank(MPI_COMM_WORLD); + size = smpi_comm_size(MPI_COMM_WORLD); /* message too small */ if (count < size) { - return MPI_Bcast(buf, count, datatype, root, comm); + XBT_WARN("MPI_bcast_arrival_scatter use default MPI_bcast."); + smpi_mpi_bcast(buf, count, datatype, root, comm); + return MPI_SUCCESS; } @@ -62,9 +64,9 @@ int smpi_coll_tuned_bcast_arrival_scatter(void *buf, int count, */ if (root != 0) { if (rank == root) { - MPI_Send(buf, count, datatype, 0, tag - 1, comm); + smpi_mpi_send(buf, count, datatype, 0, tag - 1, comm); } else if (rank == 0) { - MPI_Recv(buf, count, datatype, root, tag - 1, comm, &status); + smpi_mpi_recv(buf, count, datatype, root, tag - 1, comm, &status); } } @@ -88,11 +90,11 @@ int smpi_coll_tuned_bcast_arrival_scatter(void *buf, int count, for (k = 0; k < 3; k++) { for (i = 1; i < size; i++) { if ((already_sent[i] == 0) && (will_send[i] == 0)) { - MPI_Iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i], + smpi_mpi_iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i], &temp_status_array[i]); if (flag_array[i] == 1) { will_send[i] = 1; - MPI_Recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, + smpi_mpi_recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); i = 0; } @@ -131,7 +133,7 @@ int smpi_coll_tuned_bcast_arrival_scatter(void *buf, int count, /* send header */ for (i = 0; i < header_index; i++) { to = header_buf[i]; - MPI_Send(header_buf, header_size, MPI_INT, to, header_tag, comm); + smpi_mpi_send(header_buf, header_size, MPI_INT, to, header_tag, comm); } curr_remainder = count % header_index; @@ -145,7 +147,7 @@ int smpi_coll_tuned_bcast_arrival_scatter(void *buf, int count, if ((i == (header_index - 1)) || (curr_size == 0)) curr_size += curr_remainder; //printf("Root send to %d index %d\n",to,(i*curr_increment)); - MPI_Send((char *) buf + (i * curr_increment), curr_size, datatype, to, + smpi_mpi_send((char *) buf + (i * curr_increment), curr_size, datatype, to, tag, comm); } } @@ -156,10 +158,10 @@ int smpi_coll_tuned_bcast_arrival_scatter(void *buf, int count, /* none root */ else { /* send 1-byte message to root */ - MPI_Send(temp_buf, 1, MPI_CHAR, 0, tag, comm); + smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm); /* wait for header forward when required */ - MPI_Recv(header_buf, header_size, MPI_INT, 0, header_tag, comm, &status); + smpi_mpi_recv(header_buf, header_size, MPI_INT, 0, header_tag, comm, &status); /* search for where it is */ int myordering = 0; @@ -180,7 +182,7 @@ int smpi_coll_tuned_bcast_arrival_scatter(void *buf, int count, /* receive data */ if (myordering == (total_nodes - 1)) recv_size += curr_remainder; - MPI_Recv((char *) buf + (myordering * curr_increment), recv_size, datatype, + smpi_mpi_recv((char *) buf + (myordering * curr_increment), recv_size, datatype, 0, tag, comm, &status); /* at this point all nodes in this set perform all-gather operation */ @@ -218,7 +220,7 @@ int smpi_coll_tuned_bcast_arrival_scatter(void *buf, int count, //printf("\tnode %d sent_offset %d send_count %d\n",rank,send_offset,send_count); - MPI_Sendrecv((char *) buf + send_offset, send_count, datatype, to, + smpi_mpi_sendrecv((char *) buf + send_offset, send_count, datatype, to, tag + i, (char *) buf + recv_offset, recv_count, datatype, from, tag + i, comm, &status); } diff --git a/src/smpi/colls/bcast-binomial-tree.c b/src/smpi/colls/bcast-binomial-tree.c index 284f2c4efd..e840c027dc 100644 --- a/src/smpi/colls/bcast-binomial-tree.c +++ b/src/smpi/colls/bcast-binomial-tree.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** @@ -70,8 +70,8 @@ smpi_coll_tuned_bcast_binomial_tree(void *buff, int count, int src, dst, rank, num_procs, mask, relative_rank; int tag = 1; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); relative_rank = (rank >= root) ? rank - root : rank - root + num_procs; @@ -81,7 +81,7 @@ smpi_coll_tuned_bcast_binomial_tree(void *buff, int count, src = rank - mask; if (src < 0) src += num_procs; - MPI_Recv(buff, count, data_type, src, tag, comm, MPI_STATUS_IGNORE); + smpi_mpi_recv(buff, count, data_type, src, tag, comm, MPI_STATUS_IGNORE); break; } mask <<= 1; @@ -93,7 +93,7 @@ smpi_coll_tuned_bcast_binomial_tree(void *buff, int count, dst = rank + mask; if (dst >= num_procs) dst -= num_procs; - MPI_Send(buff, count, data_type, dst, tag, comm); + smpi_mpi_send(buff, count, data_type, dst, tag, comm); } mask >>= 1; } diff --git a/src/smpi/colls/bcast-flattree-pipeline.c b/src/smpi/colls/bcast-flattree-pipeline.c index 521203247d..f01bbae76d 100644 --- a/src/smpi/colls/bcast-flattree-pipeline.c +++ b/src/smpi/colls/bcast-flattree-pipeline.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" int flattree_segment_in_byte = 8192; @@ -11,27 +11,26 @@ smpi_coll_tuned_bcast_flattree_pipeline(void *buff, int count, int tag = 1; MPI_Aint extent; - MPI_Type_extent(data_type, &extent); + extent = smpi_datatype_get_extent(data_type); int segment = flattree_segment_in_byte / extent; int pipe_length = count / segment; int increment = segment * extent; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); MPI_Request *request_array; MPI_Status *status_array; - request_array = (MPI_Request *) malloc(pipe_length * sizeof(MPI_Request)); - status_array = (MPI_Status *) malloc(pipe_length * sizeof(MPI_Status)); + request_array = (MPI_Request *) xbt_malloc(pipe_length * sizeof(MPI_Request)); + status_array = (MPI_Status *) xbt_malloc(pipe_length * sizeof(MPI_Status)); if (rank != root) { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *)buff + (i * increment), segment, data_type, root, tag, comm, - &request_array[i]); + request_array[i] = smpi_mpi_irecv((char *)buff + (i * increment), segment, data_type, root, tag, comm); } - MPI_Waitall(pipe_length, request_array, status_array); + smpi_mpi_waitall(pipe_length, request_array, status_array); } else { @@ -41,7 +40,7 @@ smpi_coll_tuned_bcast_flattree_pipeline(void *buff, int count, continue; else { for (i = 0; i < pipe_length; i++) { - MPI_Send((char *)buff + (i * increment), segment, data_type, j, tag, comm); + smpi_mpi_send((char *)buff + (i * increment), segment, data_type, j, tag, comm); } } } diff --git a/src/smpi/colls/bcast-flattree.c b/src/smpi/colls/bcast-flattree.c index 693d83d555..626c20381c 100644 --- a/src/smpi/colls/bcast-flattree.c +++ b/src/smpi/colls/bcast-flattree.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" int smpi_coll_tuned_bcast_flattree(void *buff, int count, MPI_Datatype data_type, @@ -10,26 +10,26 @@ smpi_coll_tuned_bcast_flattree(void *buff, int count, MPI_Datatype data_type, int i, rank, num_procs; int tag = 1; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); if (rank != root) { - MPI_Recv(buff, count, data_type, root, tag, comm, MPI_STATUS_IGNORE); + smpi_mpi_recv(buff, count, data_type, root, tag, comm, MPI_STATUS_IGNORE); } else { - reqs = (MPI_Request *) malloc((num_procs - 1) * sizeof(MPI_Request)); + reqs = (MPI_Request *) xbt_malloc((num_procs - 1) * sizeof(MPI_Request)); req_ptr = reqs; // Root sends data to all others for (i = 0; i < num_procs; i++) { if (i == rank) continue; - MPI_Isend(buff, count, data_type, i, tag, comm, req_ptr++); + *(req_ptr++) = smpi_mpi_isend(buff, count, data_type, i, tag, comm); } // wait on all requests - MPI_Waitall(num_procs - 1, reqs, MPI_STATUSES_IGNORE); + smpi_mpi_waitall(num_procs - 1, reqs, MPI_STATUSES_IGNORE); free(reqs); } diff --git a/src/smpi/colls/bcast-scatter-LR-allgather.c b/src/smpi/colls/bcast-scatter-LR-allgather.c index 759f979853..ebd55391ac 100644 --- a/src/smpi/colls/bcast-scatter-LR-allgather.c +++ b/src/smpi/colls/bcast-scatter-LR-allgather.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** @@ -73,9 +73,9 @@ smpi_coll_tuned_bcast_scatter_LR_allgather(void *buff, int count, int scatter_size, left, right, next_src, *recv_counts, *disps; int tag = 1; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(data_type, &extent); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + extent = smpi_datatype_get_extent(data_type); nbytes = extent * count; @@ -97,9 +97,9 @@ smpi_coll_tuned_bcast_scatter_LR_allgather(void *buff, int count, curr_size = 0; // this process doesn't receive any data // because of uneven division else { - MPI_Recv((char *) buff + relative_rank * scatter_size, recv_size, + smpi_mpi_recv((char *) buff + relative_rank * scatter_size, recv_size, MPI_BYTE, src, tag, comm, &status); - MPI_Get_count(&status, MPI_BYTE, &curr_size); + curr_size = smpi_mpi_get_count(&status, MPI_BYTE); } break; } @@ -121,7 +121,7 @@ smpi_coll_tuned_bcast_scatter_LR_allgather(void *buff, int count, dst = rank + mask; if (dst >= num_procs) dst -= num_procs; - MPI_Send((char *) buff + scatter_size * (relative_rank + mask), + smpi_mpi_send((char *) buff + scatter_size * (relative_rank + mask), send_size, MPI_BYTE, dst, tag, comm); curr_size -= send_size; @@ -153,7 +153,7 @@ smpi_coll_tuned_bcast_scatter_LR_allgather(void *buff, int count, next_src = left; for (i = 1; i < num_procs; i++) { - MPI_Sendrecv((char *) buff + disps[(src - root + num_procs) % num_procs], + smpi_mpi_sendrecv((char *) buff + disps[(src - root + num_procs) % num_procs], recv_counts[(src - root + num_procs) % num_procs], MPI_BYTE, right, tag, (char *) buff + diff --git a/src/smpi/colls/bcast-scatter-rdb-allgather.c b/src/smpi/colls/bcast-scatter-rdb-allgather.c index db3402c15e..ab458e46b3 100644 --- a/src/smpi/colls/bcast-scatter-rdb-allgather.c +++ b/src/smpi/colls/bcast-scatter-rdb-allgather.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /***************************************************************************** @@ -70,14 +70,14 @@ smpi_coll_tuned_bcast_scatter_rdb_allgather(void *buff, int count, MPI_Datatype MPI_Status status; int i, j, k, src, dst, rank, num_procs, send_offset, recv_offset; - int mask, relative_rank, curr_size, recv_size, send_size, nbytes; + int mask, relative_rank, curr_size, recv_size = 0, send_size, nbytes; int scatter_size, tree_root, relative_dst, dst_tree_root; int my_tree_root, offset, tmp_mask, num_procs_completed; int tag = 1; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &num_procs); - MPI_Type_extent(data_type, &extent); + rank = smpi_comm_rank(comm); + num_procs = smpi_comm_size(comm); + extent = smpi_datatype_get_extent(data_type); nbytes = extent * count; scatter_size = (nbytes + num_procs - 1) / num_procs; // ceiling division @@ -98,9 +98,9 @@ smpi_coll_tuned_bcast_scatter_rdb_allgather(void *buff, int count, MPI_Datatype curr_size = 0; // this process doesn't receive any data // because of uneven division else { - MPI_Recv((char *)buff + relative_rank * scatter_size, recv_size, + smpi_mpi_recv((char *)buff + relative_rank * scatter_size, recv_size, MPI_BYTE, src, tag, comm, &status); - MPI_Get_count(&status, MPI_BYTE, &curr_size); + curr_size = smpi_mpi_get_count(&status, MPI_BYTE); } break; } @@ -122,7 +122,7 @@ smpi_coll_tuned_bcast_scatter_rdb_allgather(void *buff, int count, MPI_Datatype dst = rank + mask; if (dst >= num_procs) dst -= num_procs; - MPI_Send((char *)buff + scatter_size * (relative_rank + mask), + smpi_mpi_send((char *)buff + scatter_size * (relative_rank + mask), send_size, MPI_BYTE, dst, tag, comm); curr_size -= send_size; @@ -157,10 +157,10 @@ smpi_coll_tuned_bcast_scatter_rdb_allgather(void *buff, int count, MPI_Datatype recv_offset = dst_tree_root * scatter_size; if (relative_dst < num_procs) { - MPI_Sendrecv((char *)buff + send_offset, curr_size, MPI_BYTE, dst, tag, + smpi_mpi_sendrecv((char *)buff + send_offset, curr_size, MPI_BYTE, dst, tag, (char *)buff + recv_offset, scatter_size * mask, MPI_BYTE, dst, tag, comm, &status); - MPI_Get_count(&status, MPI_BYTE, &recv_size); + recv_size = smpi_mpi_get_count(&status, MPI_BYTE); curr_size += recv_size; } @@ -204,7 +204,7 @@ smpi_coll_tuned_bcast_scatter_rdb_allgather(void *buff, int count, MPI_Datatype if ((relative_dst > relative_rank) && (relative_rank < tree_root + num_procs_completed) && (relative_dst >= tree_root + num_procs_completed)) { - MPI_Send((char *)buff + offset, recv_size, MPI_BYTE, dst, tag, comm); + smpi_mpi_send((char *)buff + offset, recv_size, MPI_BYTE, dst, tag, comm); /* recv_size was set in the previous receive. that's the amount of data to be @@ -216,12 +216,12 @@ smpi_coll_tuned_bcast_scatter_rdb_allgather(void *buff, int count, MPI_Datatype && (relative_dst < tree_root + num_procs_completed) && (relative_rank >= tree_root + num_procs_completed)) { - MPI_Recv((char *)buff + offset, scatter_size * num_procs_completed, + smpi_mpi_recv((char *)buff + offset, scatter_size * num_procs_completed, MPI_BYTE, dst, tag, comm, &status); /* num_procs_completed is also equal to the no. of processes whose data we don't have */ - MPI_Get_count(&status, MPI_BYTE, &recv_size); + recv_size = smpi_mpi_get_count(&status, MPI_BYTE); curr_size += recv_size; } tmp_mask >>= 1; diff --git a/src/smpi/colls/colls.h b/src/smpi/colls/colls.h index 2a4b16f6ca..5efba4a590 100644 --- a/src/smpi/colls/colls.h +++ b/src/smpi/colls/colls.h @@ -6,8 +6,6 @@ #include "smpi/private.h" #include "xbt.h" -void star_reduction(MPI_Op op, void *src, void *target, int *count, MPI_Datatype *dtype); - #define COLL_DESCRIPTION(cat, ret, args, name) \ {# name,\ # cat " " # name " collective",\ diff --git a/src/smpi/colls/colls_global.c b/src/smpi/colls/colls_global.c new file mode 100644 index 0000000000..65d5285d0a --- /dev/null +++ b/src/smpi/colls/colls_global.c @@ -0,0 +1,4 @@ +#include "xbt.h" + +XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_colls, smpi, + "Logging specific to SMPI collectives"); diff --git a/src/smpi/colls/colls_private.h b/src/smpi/colls/colls_private.h new file mode 100644 index 0000000000..5686a71282 --- /dev/null +++ b/src/smpi/colls/colls_private.h @@ -0,0 +1,9 @@ +#ifndef SMPI_COLLS_PRIVATE_H +#define SMPI_COLLS_PRIVATE_H + +#include "colls.h" + +XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(smpi_colls); +void star_reduction(MPI_Op op, void *src, void *target, int *count, MPI_Datatype *dtype); + +#endif diff --git a/src/smpi/colls/reduce-NTSL.c b/src/smpi/colls/reduce-NTSL.c index 0dfc39b1bd..a4625335b2 100644 --- a/src/smpi/colls/reduce-NTSL.c +++ b/src/smpi/colls/reduce-NTSL.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" //#include int reduce_NTSL_segment_size_in_byte = 8192; @@ -19,10 +19,10 @@ int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count, int rank, size; int i; MPI_Aint extent; - MPI_Type_extent(datatype, &extent); + extent = smpi_datatype_get_extent(datatype); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); + rank = smpi_comm_rank(MPI_COMM_WORLD); + size = smpi_comm_size(MPI_COMM_WORLD); /* source node and destination nodes (same through out the functions) */ int to = (rank - 1 + size) % size; @@ -48,31 +48,31 @@ int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count, /* if (root != 0) { if (rank == root){ - MPI_Send(buf,count,datatype,0,tag,comm); + smpi_mpi_send(buf,count,datatype,0,tag,comm); } else if (rank == 0) { - MPI_Recv(buf,count,datatype,root,tag,comm,&status); + smpi_mpi_recv(buf,count,datatype,root,tag,comm,&status); } } */ char *tmp_buf; - tmp_buf = (char *) malloc(count * extent); + tmp_buf = (char *) xbt_malloc(count * extent); - MPI_Sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank, + smpi_mpi_sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank, tag, comm, &status); /* when a message is smaller than a block size => no pipeline */ if (count <= segment) { if (rank == root) { - MPI_Recv(tmp_buf, count, datatype, from, tag, comm, &status); + smpi_mpi_recv(tmp_buf, count, datatype, from, tag, comm, &status); star_reduction(op, tmp_buf, rbuf, &count, &datatype); } else if (rank == ((root - 1 + size) % size)) { - MPI_Send(rbuf, count, datatype, to, tag, comm); + smpi_mpi_send(rbuf, count, datatype, to, tag, comm); } else { - MPI_Recv(tmp_buf, count, datatype, from, tag, comm, &status); + smpi_mpi_recv(tmp_buf, count, datatype, from, tag, comm, &status); star_reduction(op, tmp_buf, rbuf, &count, &datatype); - MPI_Send(rbuf, count, datatype, to, tag, comm); + smpi_mpi_send(rbuf, count, datatype, to, tag, comm); } free(tmp_buf); return MPI_SUCCESS; @@ -81,22 +81,22 @@ int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count, /* pipeline */ else { send_request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); recv_request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); send_status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); recv_status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); /* root recv data */ if (rank == root) { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) tmp_buf + (i * increment), segment, datatype, from, - (tag + i), comm, &recv_request_array[i]); + recv_request_array[i] = smpi_mpi_irecv((char *) tmp_buf + (i * increment), segment, datatype, from, + (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - MPI_Wait(&recv_request_array[i], &status); + smpi_mpi_wait(&recv_request_array[i], &status); star_reduction(op, tmp_buf + (i * increment), (char *)rbuf + (i * increment), &segment, &datatype); } @@ -105,26 +105,26 @@ int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count, /* last node only sends data */ else if (rank == ((root - 1 + size) % size)) { for (i = 0; i < pipe_length; i++) { - MPI_Isend((char *)rbuf + (i * increment), segment, datatype, to, (tag + i), - comm, &send_request_array[i]); + send_request_array[i] = smpi_mpi_isend((char *)rbuf + (i * increment), segment, datatype, to, (tag + i), + comm); } - MPI_Waitall((pipe_length), send_request_array, send_status_array); + smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); } /* intermediate nodes relay (receive, reduce, then send) data */ else { for (i = 0; i < pipe_length; i++) { - MPI_Irecv((char *) tmp_buf + (i * increment), segment, datatype, from, - (tag + i), comm, &recv_request_array[i]); + recv_request_array[i] = smpi_mpi_irecv((char *) tmp_buf + (i * increment), segment, datatype, from, + (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - MPI_Wait(&recv_request_array[i], &status); + smpi_mpi_wait(&recv_request_array[i], &status); star_reduction(op, tmp_buf + (i * increment), (char *)rbuf + (i * increment), &segment, &datatype); - MPI_Isend((char *) rbuf + (i * increment), segment, datatype, to, - (tag + i), comm, &send_request_array[i]); + send_request_array[i] = smpi_mpi_isend((char *) rbuf + (i * increment), segment, datatype, to, + (tag + i), comm); } - MPI_Waitall((pipe_length), send_request_array, send_status_array); + smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); } free(send_request_array); @@ -135,7 +135,8 @@ int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { - MPI_Reduce((char *)buf + (pipe_length * increment), + XBT_WARN("MPI_reduce_NTSL use default MPI_reduce."); + smpi_mpi_reduce((char *)buf + (pipe_length * increment), (char *)rbuf + (pipe_length * increment), remainder, datatype, op, root, comm); } diff --git a/src/smpi/colls/reduce-arrival-pattern-aware.c b/src/smpi/colls/reduce-arrival-pattern-aware.c index cf5df9eae6..db6c0dc47b 100644 --- a/src/smpi/colls/reduce-arrival-pattern-aware.c +++ b/src/smpi/colls/reduce-arrival-pattern-aware.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" //#include int reduce_arrival_pattern_aware_segment_size_in_byte = 8192; @@ -19,7 +19,7 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, MPI_Comm comm) { int rank; - MPI_Comm_rank(comm, &rank); + rank = smpi_comm_rank(comm); int tag = 50; MPI_Status status; @@ -72,7 +72,7 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, } char *tmp_buf; - tmp_buf = (char *) malloc(count * extent); + tmp_buf = (char *) xbt_malloc(count * extent); smpi_mpi_sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank, tag, comm, &status); @@ -142,7 +142,7 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, /* wait for header and data, forward when required */ smpi_mpi_recv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm, &status); - // MPI_Recv(buf,count,datatype,MPI_ANY_SOURCE,tag,comm,&status); + // smpi_mpi_recv(buf,count,datatype,MPI_ANY_SOURCE,tag,comm,&status); /* search for where it is */ int myordering = 0; @@ -189,13 +189,13 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, // printf("node %d start\n",rank); send_request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); recv_request_array = - (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request)); + (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request)); send_status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); recv_status_array = - (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status)); + (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status)); if (rank == 0) { sent_count = 0; diff --git a/src/smpi/colls/reduce-binomial.c b/src/smpi/colls/reduce-binomial.c index 580e3dbbb5..76fef91981 100644 --- a/src/smpi/colls/reduce-binomial.c +++ b/src/smpi/colls/reduce-binomial.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" //#include @@ -16,14 +16,14 @@ int smpi_coll_tuned_reduce_binomial(void *sendbuf, void *recvbuf, int count, if (count == 0) return 0; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &comm_size); + rank = smpi_comm_rank(comm); + comm_size = smpi_comm_size(comm); - MPI_Type_extent(datatype, &extent); + extent = smpi_datatype_get_extent(datatype); - tmp_buf = (void *) malloc(count * extent); + tmp_buf = (void *) xbt_malloc(count * extent); - MPI_Sendrecv(sendbuf, count, datatype, rank, tag, + smpi_mpi_sendrecv(sendbuf, count, datatype, rank, tag, recvbuf, count, datatype, rank, tag, comm, &status); mask = 1; relrank = (rank - root + comm_size) % comm_size; @@ -34,12 +34,12 @@ int smpi_coll_tuned_reduce_binomial(void *sendbuf, void *recvbuf, int count, source = (relrank | mask); if (source < comm_size) { source = (source + root) % comm_size; - MPI_Recv(tmp_buf, count, datatype, source, tag, comm, &status); + smpi_mpi_recv(tmp_buf, count, datatype, source, tag, comm, &status); star_reduction(op, tmp_buf, recvbuf, &count, &datatype); } } else { dst = ((relrank & (~mask)) + root) % comm_size; - MPI_Send(recvbuf, count, datatype, dst, tag, comm); + smpi_mpi_send(recvbuf, count, datatype, dst, tag, comm); break; } mask <<= 1; diff --git a/src/smpi/colls/reduce-flat-tree.c b/src/smpi/colls/reduce-flat-tree.c index e6434c1144..7d178e1451 100644 --- a/src/smpi/colls/reduce-flat-tree.c +++ b/src/smpi/colls/reduce-flat-tree.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" //#include int @@ -14,14 +14,14 @@ smpi_coll_tuned_reduce_flat_tree(void *sbuf, void *rbuf, int count, char *inbuf; MPI_Status status; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); + rank = smpi_comm_rank(comm); + size = smpi_comm_size(comm); /* If not root, send data to the root. */ - MPI_Type_extent(dtype, &extent); + extent = smpi_datatype_get_extent(dtype); if (rank != root) { - MPI_Send(sbuf, count, dtype, root, tag, comm); + smpi_mpi_send(sbuf, count, dtype, root, tag, comm); return 0; } @@ -29,15 +29,15 @@ smpi_coll_tuned_reduce_flat_tree(void *sbuf, void *rbuf, int count, messages. */ if (size > 1) - origin = (char *) malloc(count * extent); + origin = (char *) xbt_malloc(count * extent); /* Initialize the receive buffer. */ if (rank == (size - 1)) - MPI_Sendrecv(sbuf, count, dtype, rank, tag, + smpi_mpi_sendrecv(sbuf, count, dtype, rank, tag, rbuf, count, dtype, rank, tag, comm, &status); else - MPI_Recv(rbuf, count, dtype, size - 1, tag, comm, &status); + smpi_mpi_recv(rbuf, count, dtype, size - 1, tag, comm, &status); /* Loop receiving and calling reduction function (C or Fortran). */ @@ -45,7 +45,7 @@ smpi_coll_tuned_reduce_flat_tree(void *sbuf, void *rbuf, int count, if (rank == i) inbuf = sbuf; else { - MPI_Recv(origin, count, dtype, i, tag, comm, &status); + smpi_mpi_recv(origin, count, dtype, i, tag, comm, &status); inbuf = origin; } diff --git a/src/smpi/colls/reduce-scatter-gather.c b/src/smpi/colls/reduce-scatter-gather.c index 481079adac..4dbcbf1327 100644 --- a/src/smpi/colls/reduce-scatter-gather.c +++ b/src/smpi/colls/reduce-scatter-gather.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /* reduce @@ -25,11 +25,11 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, if (count == 0) return 0; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &comm_size); + rank = smpi_comm_rank(comm); + comm_size = smpi_comm_size(comm); - MPI_Type_extent(datatype, &extent); - MPI_Type_size(datatype, &type_size); + extent = smpi_datatype_get_extent(datatype); + type_size = smpi_datatype_size(datatype); /* find nearest power-of-two less than or equal to comm_size */ pof2 = 1; @@ -39,31 +39,31 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, if (count < comm_size) { new_count = comm_size; - send_ptr = (void *) malloc(new_count * extent); - recv_ptr = (void *) malloc(new_count * extent); - tmp_buf = (void *) malloc(new_count * extent); + send_ptr = (void *) xbt_malloc(new_count * extent); + recv_ptr = (void *) xbt_malloc(new_count * extent); + tmp_buf = (void *) xbt_malloc(new_count * extent); memcpy(send_ptr, sendbuf, extent * new_count); //if ((rank != root)) - MPI_Sendrecv(send_ptr, new_count, datatype, rank, tag, + smpi_mpi_sendrecv(send_ptr, new_count, datatype, rank, tag, recv_ptr, new_count, datatype, rank, tag, comm, &status); rem = comm_size - pof2; if (rank < 2 * rem) { if (rank % 2 != 0) { /* odd */ - MPI_Send(recv_ptr, new_count, datatype, rank - 1, tag, comm); + smpi_mpi_send(recv_ptr, new_count, datatype, rank - 1, tag, comm); newrank = -1; } else { - MPI_Recv(tmp_buf, count, datatype, rank + 1, tag, comm, &status); + smpi_mpi_recv(tmp_buf, count, datatype, rank + 1, tag, comm, &status); star_reduction(op, tmp_buf, recv_ptr, &new_count, &datatype); newrank = rank / 2; } } else /* rank >= 2*rem */ newrank = rank - rem; - cnts = (int *) malloc(pof2 * sizeof(int)); - disps = (int *) malloc(pof2 * sizeof(int)); + cnts = (int *) xbt_malloc(pof2 * sizeof(int)); + disps = (int *) xbt_malloc(pof2 * sizeof(int)); if (newrank != -1) { for (i = 0; i < (pof2 - 1); i++) @@ -98,7 +98,7 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, } /* Send data from recvbuf. Recv into tmp_buf */ - MPI_Sendrecv((char *) recv_ptr + + smpi_mpi_sendrecv((char *) recv_ptr + disps[send_idx] * extent, send_cnt, datatype, dst, tag, @@ -136,13 +136,13 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, for (i = 1; i < pof2; i++) disps[i] = disps[i - 1] + cnts[i - 1]; - MPI_Recv(recv_ptr, cnts[0], datatype, 0, tag, comm, &status); + smpi_mpi_recv(recv_ptr, cnts[0], datatype, 0, tag, comm, &status); newrank = 0; send_idx = 0; last_idx = 2; } else if (newrank == 0) { - MPI_Send(recv_ptr, cnts[0], datatype, root, tag, comm); + smpi_mpi_send(recv_ptr, cnts[0], datatype, root, tag, comm); newrank = -1; } newroot = 0; @@ -194,12 +194,12 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, } if (newdst_tree_root == newroot_tree_root) { - MPI_Send((char *) recv_ptr + + smpi_mpi_send((char *) recv_ptr + disps[send_idx] * extent, send_cnt, datatype, dst, tag, comm); break; } else { - MPI_Recv((char *) recv_ptr + + smpi_mpi_recv((char *) recv_ptr + disps[recv_idx] * extent, recv_cnt, datatype, dst, tag, comm, &status); } @@ -218,29 +218,29 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, else if (count >= comm_size) { - tmp_buf = (void *) malloc(count * extent); + tmp_buf = (void *) xbt_malloc(count * extent); //if ((rank != root)) - MPI_Sendrecv(sendbuf, count, datatype, rank, tag, + smpi_mpi_sendrecv(sendbuf, count, datatype, rank, tag, recvbuf, count, datatype, rank, tag, comm, &status); rem = comm_size - pof2; if (rank < 2 * rem) { if (rank % 2 != 0) { /* odd */ - MPI_Send(recvbuf, count, datatype, rank - 1, tag, comm); + smpi_mpi_send(recvbuf, count, datatype, rank - 1, tag, comm); newrank = -1; } else { - MPI_Recv(tmp_buf, count, datatype, rank + 1, tag, comm, &status); + smpi_mpi_recv(tmp_buf, count, datatype, rank + 1, tag, comm, &status); star_reduction(op, tmp_buf, recvbuf, &count, &datatype); newrank = rank / 2; } } else /* rank >= 2*rem */ newrank = rank - rem; - cnts = (int *) malloc(pof2 * sizeof(int)); - disps = (int *) malloc(pof2 * sizeof(int)); + cnts = (int *) xbt_malloc(pof2 * sizeof(int)); + disps = (int *) xbt_malloc(pof2 * sizeof(int)); if (newrank != -1) { for (i = 0; i < (pof2 - 1); i++) @@ -275,7 +275,7 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, } /* Send data from recvbuf. Recv into tmp_buf */ - MPI_Sendrecv((char *) recvbuf + + smpi_mpi_sendrecv((char *) recvbuf + disps[send_idx] * extent, send_cnt, datatype, dst, tag, @@ -312,13 +312,13 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, for (i = 1; i < pof2; i++) disps[i] = disps[i - 1] + cnts[i - 1]; - MPI_Recv(recvbuf, cnts[0], datatype, 0, tag, comm, &status); + smpi_mpi_recv(recvbuf, cnts[0], datatype, 0, tag, comm, &status); newrank = 0; send_idx = 0; last_idx = 2; } else if (newrank == 0) { - MPI_Send(recvbuf, cnts[0], datatype, root, tag, comm); + smpi_mpi_send(recvbuf, cnts[0], datatype, root, tag, comm); newrank = -1; } newroot = 0; @@ -370,12 +370,12 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, } if (newdst_tree_root == newroot_tree_root) { - MPI_Send((char *) recvbuf + + smpi_mpi_send((char *) recvbuf + disps[send_idx] * extent, send_cnt, datatype, dst, tag, comm); break; } else { - MPI_Recv((char *) recvbuf + + smpi_mpi_recv((char *) recvbuf + disps[recv_idx] * extent, recv_cnt, datatype, dst, tag, comm, &status); } diff --git a/src/smpi/colls/star-reduction.c b/src/smpi/colls/star-reduction.c index 14281f57c1..2ca8149813 100644 --- a/src/smpi/colls/star-reduction.c +++ b/src/smpi/colls/star-reduction.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" /* * created by Pitch Patarasuk diff --git a/src/smpi/smpi_global.c b/src/smpi/smpi_global.c index ab6e4b21e6..7b5a844ed1 100644 --- a/src/smpi/smpi_global.c +++ b/src/smpi/smpi_global.c @@ -315,7 +315,7 @@ int __attribute__((weak)) MAIN__(){ int smpi_main(int (*realmain) (int argc, char *argv[]),int argc, char *argv[]) { srand(SMPI_RAND_SEED); - + if(getenv("SMPI_PRETEND_CC") != NULL) { /* Hack to ensure that smpicc can pretend to be a simple compiler. Particularly handy to pass it to the configuration tools */ return 0; @@ -360,6 +360,36 @@ int smpi_main(int (*realmain) (int argc, char *argv[]),int argc, char *argv[]) SIMIX_function_register_default(realmain); SIMIX_launch_application(argv[2]); + int allgather_id = find_coll_description(mpi_coll_allgather_description, + sg_cfg_get_string("smpi/allgather")); + mpi_coll_allgather_fun = (int (*)(void *, int, MPI_Datatype, + void*, int, MPI_Datatype, MPI_Comm)) + mpi_coll_allgather_description[allgather_id].coll; + + int allreduce_id = find_coll_description(mpi_coll_allreduce_description, + sg_cfg_get_string("smpi/allreduce")); + mpi_coll_allreduce_fun = (int (*)(void *sbuf, void *rbuf, int rcount, \ + MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)) + mpi_coll_allreduce_description[allreduce_id].coll; + + int alltoall_id = find_coll_description(mpi_coll_alltoall_description, + sg_cfg_get_string("smpi/alltoall")); + mpi_coll_alltoall_fun = (int (*)(void *, int, MPI_Datatype, + void*, int, MPI_Datatype, MPI_Comm)) + mpi_coll_alltoall_description[alltoall_id].coll; + + int bcast_id = find_coll_description(mpi_coll_bcast_description, + sg_cfg_get_string("smpi/bcast")); + mpi_coll_bcast_fun = (int (*)(void *buf, int count, MPI_Datatype datatype, \ + int root, MPI_Comm com)) + mpi_coll_bcast_description[bcast_id].coll; + + int reduce_id = find_coll_description(mpi_coll_reduce_description, + sg_cfg_get_string("smpi/reduce")); + mpi_coll_reduce_fun = (int (*)(void *buf, void *rbuf, int count, MPI_Datatype datatype, \ + MPI_Op op, int root, MPI_Comm comm)) + mpi_coll_reduce_description[reduce_id].coll; + smpi_global_init(); /* Clean IO before the run */ diff --git a/src/smpi/smpi_pmpi.c b/src/smpi/smpi_pmpi.c index bf9551c1a8..52de7c9d6b 100644 --- a/src/smpi/smpi_pmpi.c +++ b/src/smpi/smpi_pmpi.c @@ -1558,7 +1558,7 @@ int PMPI_Bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm c if (comm == MPI_COMM_NULL) { retval = MPI_ERR_COMM; } else { - smpi_mpi_bcast(buf, count, datatype, root, comm); + mpi_coll_bcast_fun(buf, count, datatype, root, comm); retval = MPI_SUCCESS; } #ifdef HAVE_TRACING @@ -1675,8 +1675,8 @@ int PMPI_Allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype, || recvtype == MPI_DATATYPE_NULL) { retval = MPI_ERR_TYPE; } else { - smpi_mpi_allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, comm); + mpi_coll_allgather_fun(sendbuf, sendcount, sendtype, recvbuf, recvcount, + recvtype, comm); retval = MPI_SUCCESS; } #ifdef HAVE_TRACING @@ -1800,7 +1800,7 @@ int PMPI_Reduce(void *sendbuf, void *recvbuf, int count, } else if (datatype == MPI_DATATYPE_NULL || op == MPI_OP_NULL) { retval = MPI_ERR_ARG; } else { - smpi_mpi_reduce(sendbuf, recvbuf, count, datatype, op, root, comm); + mpi_coll_reduce_fun(sendbuf, recvbuf, count, datatype, op, root, comm); retval = MPI_SUCCESS; } #ifdef HAVE_TRACING @@ -1899,7 +1899,7 @@ int PMPI_Reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts, count += recvcounts[i]; displs[i] = 0; } - smpi_mpi_reduce(sendbuf, recvbuf, count, datatype, op, 0, comm); + mpi_coll_reduce_fun(sendbuf, recvbuf, count, datatype, op, 0, comm); smpi_mpi_scatterv(recvbuf, recvcounts, displs, datatype, recvbuf, recvcounts[rank], datatype, 0, comm); xbt_free(displs);