From 004b932fe967a47a2ded3795af9dc069c3de9671 Mon Sep 17 00:00:00 2001 From: degomme Date: Tue, 7 Mar 2017 22:49:50 +0100 Subject: [PATCH] welcome simgrid::smpi::Request --- include/smpi/forward.hpp | 11 +- include/smpi/smpi.h | 3 +- src/smpi/colls/allgather-2dmesh.cpp | 12 +- src/smpi/colls/allgather-3dmesh.cpp | 18 +- src/smpi/colls/allgather-NTSLR-NB.cpp | 10 +- src/smpi/colls/allgather-NTSLR.cpp | 4 +- src/smpi/colls/allgather-SMP-NTS.cpp | 26 +- src/smpi/colls/allgather-bruck.cpp | 8 +- src/smpi/colls/allgather-loosely-lr.cpp | 18 +- .../colls/allgather-ompi-neighborexchange.cpp | 4 +- src/smpi/colls/allgather-pair.cpp | 4 +- src/smpi/colls/allgather-rdb.cpp | 8 +- src/smpi/colls/allgather-rhv.cpp | 4 +- src/smpi/colls/allgather-ring.cpp | 4 +- src/smpi/colls/allgather-smp-simple.cpp | 18 +- src/smpi/colls/allgather-spreading-simple.cpp | 8 +- src/smpi/colls/allgatherv-mpich-rdb.cpp | 6 +- src/smpi/colls/allgatherv-mpich-ring.cpp | 6 +- src/smpi/colls/allgatherv-ompi-bruck.cpp | 2 +- .../allgatherv-ompi-neighborexchange.cpp | 4 +- src/smpi/colls/allgatherv-pair.cpp | 4 +- src/smpi/colls/allgatherv-ring.cpp | 4 +- src/smpi/colls/allreduce-lr.cpp | 6 +- src/smpi/colls/allreduce-mvapich-rs.cpp | 14 +- .../colls/allreduce-ompi-ring-segmented.cpp | 14 +- src/smpi/colls/allreduce-rab-rdb.cpp | 12 +- src/smpi/colls/allreduce-rab1.cpp | 4 +- src/smpi/colls/allreduce-rdb.cpp | 14 +- .../colls/allreduce-smp-binomial-pipeline.cpp | 18 +- src/smpi/colls/allreduce-smp-binomial.cpp | 18 +- src/smpi/colls/allreduce-smp-rdb.cpp | 20 +- src/smpi/colls/allreduce-smp-rsag-lr.cpp | 22 +- src/smpi/colls/allreduce-smp-rsag-rab.cpp | 14 +- src/smpi/colls/allreduce-smp-rsag.cpp | 22 +- src/smpi/colls/alltoall-2dmesh.cpp | 20 +- src/smpi/colls/alltoall-3dmesh.cpp | 24 +- src/smpi/colls/alltoall-bruck.cpp | 12 +- .../colls/alltoall-mvapich-scatter-dest.cpp | 6 +- .../colls/alltoall-pair-light-barrier.cpp | 6 +- src/smpi/colls/alltoall-pair-mpi-barrier.cpp | 2 +- src/smpi/colls/alltoall-pair-one-barrier.cpp | 2 +- src/smpi/colls/alltoall-pair.cpp | 2 +- src/smpi/colls/alltoall-rdb.cpp | 10 +- .../colls/alltoall-ring-light-barrier.cpp | 6 +- src/smpi/colls/alltoall-ring-mpi-barrier.cpp | 2 +- src/smpi/colls/alltoall-ring-one-barrier.cpp | 2 +- src/smpi/colls/alltoall-ring.cpp | 2 +- src/smpi/colls/alltoallv-bruck.cpp | 6 +- .../colls/alltoallv-ompi-basic-linear.cpp | 11 +- .../colls/alltoallv-pair-light-barrier.cpp | 6 +- src/smpi/colls/alltoallv-pair-mpi-barrier.cpp | 2 +- src/smpi/colls/alltoallv-pair-one-barrier.cpp | 2 +- src/smpi/colls/alltoallv-pair.cpp | 2 +- .../colls/alltoallv-ring-light-barrier.cpp | 6 +- src/smpi/colls/alltoallv-ring-mpi-barrier.cpp | 2 +- src/smpi/colls/alltoallv-ring-one-barrier.cpp | 2 +- src/smpi/colls/alltoallv-ring.cpp | 2 +- src/smpi/colls/barrier-mvapich2-pair.cpp | 8 +- src/smpi/colls/barrier-ompi.cpp | 44 +- src/smpi/colls/bcast-NTSB.cpp | 54 +- src/smpi/colls/bcast-NTSL-Isend.cpp | 32 +- src/smpi/colls/bcast-NTSL.cpp | 32 +- src/smpi/colls/bcast-SMP-binary.cpp | 84 +- src/smpi/colls/bcast-SMP-binomial.cpp | 12 +- src/smpi/colls/bcast-SMP-linear.cpp | 60 +- .../bcast-arrival-pattern-aware-wait.cpp | 36 +- .../colls/bcast-arrival-pattern-aware.cpp | 70 +- src/smpi/colls/bcast-arrival-scatter.cpp | 20 +- src/smpi/colls/bcast-binomial-tree.cpp | 4 +- src/smpi/colls/bcast-flattree-pipeline.cpp | 6 +- src/smpi/colls/bcast-flattree.cpp | 6 +- src/smpi/colls/bcast-mvapich-smp.cpp | 10 +- src/smpi/colls/bcast-ompi-pipeline.cpp | 28 +- src/smpi/colls/bcast-ompi-split-bintree.cpp | 24 +- src/smpi/colls/bcast-scatter-LR-allgather.cpp | 6 +- .../colls/bcast-scatter-rdb-allgather.cpp | 10 +- src/smpi/colls/gather-mvapich.cpp | 4 +- src/smpi/colls/gather-ompi.cpp | 24 +- src/smpi/colls/reduce-NTSL.cpp | 30 +- .../colls/reduce-arrival-pattern-aware.cpp | 56 +- src/smpi/colls/reduce-binomial.cpp | 8 +- src/smpi/colls/reduce-flat-tree.cpp | 8 +- src/smpi/colls/reduce-mvapich-knomial.cpp | 8 +- src/smpi/colls/reduce-mvapich-two-level.cpp | 8 +- src/smpi/colls/reduce-ompi.cpp | 26 +- src/smpi/colls/reduce-rab.cpp | 28 +- src/smpi/colls/reduce-scatter-gather.cpp | 32 +- src/smpi/colls/reduce_scatter-mpich.cpp | 12 +- src/smpi/colls/reduce_scatter-ompi.cpp | 26 +- src/smpi/colls/scatter-mvapich-two-level.cpp | 8 +- src/smpi/colls/scatter-ompi.cpp | 10 +- src/smpi/private.h | 72 +- src/smpi/smpi_base.cpp | 1068 +---------------- src/smpi/smpi_coll.cpp | 30 +- src/smpi/smpi_comm.cpp | 6 +- src/smpi/smpi_global.cpp | 6 - src/smpi/smpi_pmpi.cpp | 67 +- src/smpi/smpi_replay.cpp | 38 +- src/smpi/smpi_request.cpp | 1000 +++++++++++++++ src/smpi/smpi_request.hpp | 94 ++ src/smpi/smpi_win.cpp | 74 +- tools/cmake/DefinePackages.cmake | 2 + 102 files changed, 1942 insertions(+), 1850 deletions(-) create mode 100644 src/smpi/smpi_request.cpp create mode 100644 src/smpi/smpi_request.hpp diff --git a/include/smpi/forward.hpp b/include/smpi/forward.hpp index 9097819bce..7b812078ff 100644 --- a/include/smpi/forward.hpp +++ b/include/smpi/forward.hpp @@ -13,19 +13,23 @@ namespace simgrid { namespace smpi { -class Group; class Comm; +class Group; +class Request; class Topo; class Win; +//TODO : rename these class Cart; class Graph; class Dist_Graph; + } } -typedef simgrid::smpi::Group SMPI_Group; typedef simgrid::smpi::Comm SMPI_Comm; +typedef simgrid::smpi::Group SMPI_Group; +typedef simgrid::smpi::Request SMPI_Request; typedef simgrid::smpi::Topo SMPI_Topology; typedef simgrid::smpi::Win SMPI_Win; typedef simgrid::smpi::Graph SMPI_Graph_topology; @@ -34,8 +38,9 @@ typedef simgrid::smpi::Dist_Graph SMPI_Dist_Graph_topology; #else -typedef struct SMPI_Group SMPI_Group; typedef struct SMPI_Comm SMPI_Comm; +typedef struct SMPI_Group SMPI_Group; +typedef struct SMPI_Request SMPI_Request; typedef struct SMPI_Topology SMPI_Topology; typedef struct SMPI_Win SMPI_Win; typedef struct SMPI_Graph_topology SMPI_Graph_topology; diff --git a/include/smpi/smpi.h b/include/smpi/smpi.h index e7f73fcfea..3d2ec2b9d0 100644 --- a/include/smpi/smpi.h +++ b/include/smpi/smpi.h @@ -376,8 +376,7 @@ typedef SMPI_Comm *MPI_Comm; XBT_PUBLIC_DATA( MPI_Comm ) MPI_COMM_WORLD; #define MPI_COMM_SELF smpi_process_comm_self() -struct s_smpi_mpi_request; -typedef struct s_smpi_mpi_request *MPI_Request; +typedef SMPI_Request *MPI_Request; #define MPI_REQUEST_NULL ((MPI_Request)NULL) #define MPI_FORTRAN_REQUEST_NULL -1 diff --git a/src/smpi/colls/allgather-2dmesh.cpp b/src/smpi/colls/allgather-2dmesh.cpp index 7452654f66..df642932db 100644 --- a/src/smpi/colls/allgather-2dmesh.cpp +++ b/src/smpi/colls/allgather-2dmesh.cpp @@ -150,7 +150,7 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype if (src == rank) continue; recv_offset = src * block_size; - *(req_ptr++) = smpi_mpi_irecv((char *)recv_buff + recv_offset, recv_count, recv_type, src, tag, + *(req_ptr++) = Request::irecv((char *)recv_buff + recv_offset, recv_count, recv_type, src, tag, comm); } @@ -159,10 +159,10 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype dst = i + my_row_base; if (dst == rank) continue; - smpi_mpi_send(send_buff, send_count, send_type, dst, tag, comm); + Request::send(send_buff, send_count, send_type, dst, tag, comm); } - smpi_mpi_waitall(Y - 1, req, MPI_STATUSES_IGNORE); + Request::waitall(Y - 1, req, MPI_STATUSES_IGNORE); req_ptr = req; @@ -173,7 +173,7 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype continue; src_row_base = (src / Y) * Y; recv_offset = src_row_base * block_size; - *(req_ptr++) = smpi_mpi_irecv((char *)recv_buff + recv_offset, recv_count * Y, recv_type, src, tag, + *(req_ptr++) = Request::irecv((char *)recv_buff + recv_offset, recv_count * Y, recv_type, src, tag, comm); } @@ -182,11 +182,11 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype if (dst == rank) continue; send_offset = my_row_base * block_size; - smpi_mpi_send((char *)recv_buff + send_offset, send_count * Y, send_type, dst, tag, + Request::send((char *)recv_buff + send_offset, send_count * Y, send_type, dst, tag, comm); } - smpi_mpi_waitall(X - 1, req, MPI_STATUSES_IGNORE); + Request::waitall(X - 1, req, MPI_STATUSES_IGNORE); free(req); diff --git a/src/smpi/colls/allgather-3dmesh.cpp b/src/smpi/colls/allgather-3dmesh.cpp index 7e57df13d6..8ec64ca6c6 100644 --- a/src/smpi/colls/allgather-3dmesh.cpp +++ b/src/smpi/colls/allgather-3dmesh.cpp @@ -144,7 +144,7 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, if (src == rank) continue; recv_offset = src * block_size; - *(req_ptr++) = smpi_mpi_irecv((char *)recv_buff + recv_offset, send_count, recv_type, src, tag, + *(req_ptr++) = Request::irecv((char *)recv_buff + recv_offset, send_count, recv_type, src, tag, comm); } @@ -152,10 +152,10 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, dst = i + my_row_base; if (dst == rank) continue; - smpi_mpi_send(send_buff, send_count, send_type, dst, tag, comm); + Request::send(send_buff, send_count, send_type, dst, tag, comm); } - smpi_mpi_waitall(Y - 1, req, MPI_STATUSES_IGNORE); + Request::waitall(Y - 1, req, MPI_STATUSES_IGNORE); req_ptr = req; // do colwise comm, it does not matter here if i*X or i *Y since X == Y @@ -167,7 +167,7 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, src_row_base = (src / X) * X; recv_offset = src_row_base * block_size; - *(req_ptr++) = smpi_mpi_irecv((char *)recv_buff + recv_offset, recv_count * Y, recv_type, src, tag, + *(req_ptr++) = Request::irecv((char *)recv_buff + recv_offset, recv_count * Y, recv_type, src, tag, comm); } @@ -177,11 +177,11 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, dst = (i * Y + my_col_base); if (dst == rank) continue; - smpi_mpi_send((char *)recv_buff + send_offset, send_count * Y, send_type, dst, tag, + Request::send((char *)recv_buff + send_offset, send_count * Y, send_type, dst, tag, comm); } - smpi_mpi_waitall(X - 1, req, MPI_STATUSES_IGNORE); + Request::waitall(X - 1, req, MPI_STATUSES_IGNORE); req_ptr = req; for (i = 1; i < Z; i++) { @@ -190,17 +190,17 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, recv_offset = (src_z_base * block_size); - *(req_ptr++) = smpi_mpi_irecv((char *)recv_buff + recv_offset, recv_count * two_dsize, recv_type, + *(req_ptr++) = Request::irecv((char *)recv_buff + recv_offset, recv_count * two_dsize, recv_type, src, tag, comm); } for (i = 1; i < Z; i++) { dst = (rank + i * two_dsize) % num_procs; send_offset = my_z_base * block_size; - smpi_mpi_send((char *)recv_buff + send_offset, send_count * two_dsize, send_type, + Request::send((char *)recv_buff + send_offset, send_count * two_dsize, send_type, dst, tag, comm); } - smpi_mpi_waitall(Z - 1, req, MPI_STATUSES_IGNORE); + Request::waitall(Z - 1, req, MPI_STATUSES_IGNORE); free(req); diff --git a/src/smpi/colls/allgather-NTSLR-NB.cpp b/src/smpi/colls/allgather-NTSLR-NB.cpp index cfb643f35b..7ae8569207 100644 --- a/src/smpi/colls/allgather-NTSLR-NB.cpp +++ b/src/smpi/colls/allgather-NTSLR-NB.cpp @@ -41,7 +41,7 @@ smpi_coll_tuned_allgather_NTSLR_NB(void *sbuf, int scount, MPI_Datatype stype, //copy a single segment from sbuf to rbuf send_offset = rank * scount * sextent; - smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag, + Request::sendrecv(sbuf, scount, stype, rank, tag, (char *)rbuf + send_offset, rcount, rtype, rank, tag, comm, &status); @@ -51,15 +51,15 @@ smpi_coll_tuned_allgather_NTSLR_NB(void *sbuf, int scount, MPI_Datatype stype, //post all irecv first for (i = 0; i < size - 1; i++) { recv_offset = ((rank - i - 1 + size) % size) * increment; - rrequest_array[i] = smpi_mpi_irecv((char *)rbuf + recv_offset, rcount, rtype, from, tag + i, comm); + rrequest_array[i] = Request::irecv((char *)rbuf + recv_offset, rcount, rtype, from, tag + i, comm); } for (i = 0; i < size - 1; i++) { send_offset = ((rank - i + size) % size) * increment; - srequest_array[i] = smpi_mpi_isend((char *)rbuf + send_offset, scount, stype, to, tag + i, comm); - smpi_mpi_wait(&rrequest_array[i], &status); - smpi_mpi_wait(&srequest_array[i], &status2); + srequest_array[i] = Request::isend((char *)rbuf + send_offset, scount, stype, to, tag + i, comm); + Request::wait(&rrequest_array[i], &status); + Request::wait(&srequest_array[i], &status2); } free(rrequest_array); diff --git a/src/smpi/colls/allgather-NTSLR.cpp b/src/smpi/colls/allgather-NTSLR.cpp index 234ceeac56..2c16c930d5 100644 --- a/src/smpi/colls/allgather-NTSLR.cpp +++ b/src/smpi/colls/allgather-NTSLR.cpp @@ -37,7 +37,7 @@ smpi_coll_tuned_allgather_NTSLR(void *sbuf, int scount, MPI_Datatype stype, //copy a single segment from sbuf to rbuf send_offset = rank * scount * sextent; - smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag, + Request::sendrecv(sbuf, scount, stype, rank, tag, (char *)rbuf + send_offset, rcount, rtype, rank, tag, comm, &status); @@ -47,7 +47,7 @@ smpi_coll_tuned_allgather_NTSLR(void *sbuf, int scount, MPI_Datatype stype, for (i = 0; i < size - 1; i++) { send_offset = ((rank - i + size) % size) * increment; recv_offset = ((rank - i - 1 + size) % size) * increment; - smpi_mpi_sendrecv((char *) rbuf + send_offset, scount, stype, to, tag + i, + Request::sendrecv((char *) rbuf + send_offset, scount, stype, to, tag + i, (char *) rbuf + recv_offset, rcount, rtype, from, tag + i, comm, &status); } diff --git a/src/smpi/colls/allgather-SMP-NTS.cpp b/src/smpi/colls/allgather-SMP-NTS.cpp index f64ca508a9..91f64911f7 100644 --- a/src/smpi/colls/allgather-SMP-NTS.cpp +++ b/src/smpi/colls/allgather-SMP-NTS.cpp @@ -52,7 +52,7 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, } //copy corresponding message from sbuf to rbuf recv_offset = rank * rextent * rcount; - smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag, + Request::sendrecv(sbuf, scount, stype, rank, tag, ((char *) rbuf + recv_offset), rcount, rtype, rank, tag, comm, MPI_STATUS_IGNORE); @@ -68,7 +68,7 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, (num_core_in_current_smp); recv_offset = src * rextent * rcount; - smpi_mpi_sendrecv(sbuf, scount, stype, dst, tag, + Request::sendrecv(sbuf, scount, stype, dst, tag, ((char *) rbuf + recv_offset), rcount, rtype, src, tag, comm, MPI_STATUS_IGNORE); @@ -91,7 +91,7 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, recv_offset = ((inter_rank - i - 1 + inter_comm_size) % inter_comm_size) * num_core * sextent * scount; - rrequest_array[i] = smpi_mpi_irecv((char *)rbuf + recv_offset, rcount * num_core, + rrequest_array[i] = Request::irecv((char *)rbuf + recv_offset, rcount * num_core, rtype, src, tag + i, comm); } @@ -99,7 +99,7 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, send_offset = ((inter_rank + inter_comm_size) % inter_comm_size) * num_core * sextent * scount; - srequest_array[0] = smpi_mpi_isend((char *)rbuf + send_offset, scount * num_core, + srequest_array[0] = Request::isend((char *)rbuf + send_offset, scount * num_core, stype, dst, tag, comm); // loop : recv-inter , send-inter, send-intra (linear-bcast) @@ -107,11 +107,11 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, recv_offset = ((inter_rank - i - 1 + inter_comm_size) % inter_comm_size) * num_core * sextent * scount; - smpi_mpi_wait(&rrequest_array[i], MPI_STATUS_IGNORE); - srequest_array[i + 1] = smpi_mpi_isend((char *)rbuf + recv_offset, scount * num_core, + Request::wait(&rrequest_array[i], MPI_STATUS_IGNORE); + srequest_array[i + 1] = Request::isend((char *)rbuf + recv_offset, scount * num_core, stype, dst, tag + i + 1, comm); if (num_core_in_current_smp > 1) { - smpi_mpi_send((char *)rbuf + recv_offset, scount * num_core, + Request::send((char *)rbuf + recv_offset, scount * num_core, stype, (rank + 1), tag + i + 1, comm); } } @@ -122,13 +122,13 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, inter_comm_size) % inter_comm_size) * num_core * sextent * scount; //recv_offset = ((inter_rank + 1) % inter_comm_size) * num_core * sextent * scount; //i=inter_comm_size-2; - smpi_mpi_wait(&rrequest_array[i], MPI_STATUS_IGNORE); + Request::wait(&rrequest_array[i], MPI_STATUS_IGNORE); if (num_core_in_current_smp > 1) { - smpi_mpi_send((char *)rbuf + recv_offset, scount * num_core, + Request::send((char *)rbuf + recv_offset, scount * num_core, stype, (rank + 1), tag + i + 1, comm); } - smpi_mpi_waitall(inter_comm_size - 1, srequest_array, MPI_STATUSES_IGNORE); + Request::waitall(inter_comm_size - 1, srequest_array, MPI_STATUSES_IGNORE); xbt_free(rrequest_array); xbt_free(srequest_array); } @@ -138,7 +138,7 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, recv_offset = ((inter_rank - i - 1 + inter_comm_size) % inter_comm_size) * num_core * sextent * scount; - smpi_mpi_recv((char *) rbuf + recv_offset, (rcount * num_core), rtype, + Request::recv((char *) rbuf + recv_offset, (rcount * num_core), rtype, rank - 1, tag + i + 1, comm, MPI_STATUS_IGNORE); } } @@ -148,9 +148,9 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, recv_offset = ((inter_rank - i - 1 + inter_comm_size) % inter_comm_size) * num_core * sextent * scount; - smpi_mpi_recv((char *) rbuf + recv_offset, (rcount * num_core), rtype, + Request::recv((char *) rbuf + recv_offset, (rcount * num_core), rtype, rank - 1, tag + i + 1, comm, MPI_STATUS_IGNORE); - smpi_mpi_send((char *) rbuf + recv_offset, (scount * num_core), stype, + Request::send((char *) rbuf + recv_offset, (scount * num_core), stype, (rank + 1), tag + i + 1, comm); } } diff --git a/src/smpi/colls/allgather-bruck.cpp b/src/smpi/colls/allgather-bruck.cpp index aaab5e38de..60a6c2cbe3 100644 --- a/src/smpi/colls/allgather-bruck.cpp +++ b/src/smpi/colls/allgather-bruck.cpp @@ -104,7 +104,7 @@ int smpi_coll_tuned_allgather_bruck(void *send_buff, int send_count, src = (rank + pof2) % num_procs; dst = (rank - pof2 + num_procs) % num_procs; - smpi_mpi_sendrecv(tmp_buff, count, recv_type, dst, tag, + Request::sendrecv(tmp_buff, count, recv_type, dst, tag, tmp_buff + count * recv_extent, count, recv_type, src, tag, comm, &status); count *= 2; @@ -116,18 +116,18 @@ int smpi_coll_tuned_allgather_bruck(void *send_buff, int send_count, src = (rank + pof2) % num_procs; dst = (rank - pof2 + num_procs) % num_procs; - smpi_mpi_sendrecv(tmp_buff, remainder * recv_count, recv_type, dst, tag, + Request::sendrecv(tmp_buff, remainder * recv_count, recv_type, dst, tag, tmp_buff + count * recv_extent, remainder * recv_count, recv_type, src, tag, comm, &status); } - smpi_mpi_sendrecv(tmp_buff, (num_procs - rank) * recv_count, recv_type, rank, + Request::sendrecv(tmp_buff, (num_procs - rank) * recv_count, recv_type, rank, tag, recv_ptr + rank * recv_count * recv_extent, (num_procs - rank) * recv_count, recv_type, rank, tag, comm, &status); if (rank) - smpi_mpi_sendrecv(tmp_buff + (num_procs - rank) * recv_count * recv_extent, + Request::sendrecv(tmp_buff + (num_procs - rank) * recv_count * recv_extent, rank * recv_count, recv_type, rank, tag, recv_ptr, rank * recv_count, recv_type, rank, tag, comm, &status); smpi_free_tmp_buffer(tmp_buff); diff --git a/src/smpi/colls/allgather-loosely-lr.cpp b/src/smpi/colls/allgather-loosely-lr.cpp index 8724ddafa1..832d65bb3e 100644 --- a/src/smpi/colls/allgather-loosely-lr.cpp +++ b/src/smpi/colls/allgather-loosely-lr.cpp @@ -55,7 +55,7 @@ if(comm->get_leaders_comm()==MPI_COMM_NULL){ //copy corresponding message from sbuf to rbuf recv_offset = rank * rextent * rcount; - smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag, + Request::sendrecv(sbuf, scount, stype, rank, tag, (char *)rbuf + recv_offset, rcount, rtype, rank, tag, comm, &status); int dst, src; @@ -88,9 +88,9 @@ if(comm->get_leaders_comm()==MPI_COMM_NULL){ if (intra_rank == j) { if (i != inter_comm_size - 1) { - inter_rrequest = smpi_mpi_irecv((char *)rbuf + inter_recv_offset, rcount, rtype, + inter_rrequest = Request::irecv((char *)rbuf + inter_recv_offset, rcount, rtype, inter_src, tag, comm); - inter_srequest_array[inter_srequest_count++] = smpi_mpi_isend((char *)rbuf + inter_send_offset, scount, stype, + inter_srequest_array[inter_srequest_count++] = Request::isend((char *)rbuf + inter_send_offset, scount, stype, inter_dst, tag, comm); } } @@ -111,8 +111,8 @@ if(comm->get_leaders_comm()==MPI_COMM_NULL){ if (j != intra_rank) { - rrequest_array[rrequest_count++] = smpi_mpi_irecv((char *)rbuf + recv_offset, rcount, rtype, src, tag, comm); - srequest_array[srequest_count++] = smpi_mpi_isend((char *)rbuf + send_offset, scount, stype, dst, tag, comm); + rrequest_array[rrequest_count++] = Request::irecv((char *)rbuf + recv_offset, rcount, rtype, src, tag, comm); + srequest_array[srequest_count++] = Request::isend((char *)rbuf + send_offset, scount, stype, dst, tag, comm); } } // intra loop @@ -120,14 +120,14 @@ if(comm->get_leaders_comm()==MPI_COMM_NULL){ // wait for inter communication to finish for these rounds (# of round equals num_core) if (i != inter_comm_size - 1) { - smpi_mpi_wait(&inter_rrequest, &status); + Request::wait(&inter_rrequest, &status); } } //inter loop - smpi_mpi_waitall(rrequest_count, rrequest_array, MPI_STATUSES_IGNORE); - smpi_mpi_waitall(srequest_count, srequest_array, MPI_STATUSES_IGNORE); - smpi_mpi_waitall(inter_srequest_count, inter_srequest_array, MPI_STATUSES_IGNORE); + Request::waitall(rrequest_count, rrequest_array, MPI_STATUSES_IGNORE); + Request::waitall(srequest_count, srequest_array, MPI_STATUSES_IGNORE); + Request::waitall(inter_srequest_count, inter_srequest_array, MPI_STATUSES_IGNORE); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allgather-ompi-neighborexchange.cpp b/src/smpi/colls/allgather-ompi-neighborexchange.cpp index 9227d51dff..248651ef94 100644 --- a/src/smpi/colls/allgather-ompi-neighborexchange.cpp +++ b/src/smpi/colls/allgather-ompi-neighborexchange.cpp @@ -138,7 +138,7 @@ smpi_coll_tuned_allgather_ompi_neighborexchange(void *sbuf, int scount, tmprecv = (char*)rbuf + neighbor[0] * rcount * rext; tmpsend = (char*)rbuf + rank * rcount * rext; /* Sendreceive */ - smpi_mpi_sendrecv(tmpsend, rcount, rdtype, neighbor[0], + Request::sendrecv(tmpsend, rcount, rdtype, neighbor[0], COLL_TAG_ALLGATHER, tmprecv, rcount, rdtype, neighbor[0], COLL_TAG_ALLGATHER, @@ -160,7 +160,7 @@ smpi_coll_tuned_allgather_ompi_neighborexchange(void *sbuf, int scount, tmpsend = (char*)rbuf + send_data_from * rcount * rext; /* Sendreceive */ - smpi_mpi_sendrecv(tmpsend, 2 * rcount, rdtype, + Request::sendrecv(tmpsend, 2 * rcount, rdtype, neighbor[i_parity], COLL_TAG_ALLGATHER, tmprecv, 2 * rcount, rdtype, diff --git a/src/smpi/colls/allgather-pair.cpp b/src/smpi/colls/allgather-pair.cpp index 1d1d2b0e52..74f801b1d4 100644 --- a/src/smpi/colls/allgather-pair.cpp +++ b/src/smpi/colls/allgather-pair.cpp @@ -88,13 +88,13 @@ smpi_coll_tuned_allgather_pair(void *send_buff, int send_count, extent = smpi_datatype_get_extent(send_type); // local send/recv - smpi_mpi_sendrecv(send_ptr, send_count, send_type, rank, tag, + Request::sendrecv(send_ptr, send_count, send_type, rank, tag, recv_ptr + rank * recv_count * extent, recv_count, recv_type, rank, tag, comm, &status); for (i = 1; i < num_procs; i++) { src = dst = rank ^ i; - smpi_mpi_sendrecv(send_ptr, send_count, send_type, dst, tag, + Request::sendrecv(send_ptr, send_count, send_type, dst, tag, recv_ptr + src * recv_count * extent, recv_count, recv_type, src, tag, comm, &status); } diff --git a/src/smpi/colls/allgather-rdb.cpp b/src/smpi/colls/allgather-rdb.cpp index 1d43ca2e38..2490663648 100644 --- a/src/smpi/colls/allgather-rdb.cpp +++ b/src/smpi/colls/allgather-rdb.cpp @@ -42,7 +42,7 @@ smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count, recv_chunk *= recv_count; // perform a local copy - smpi_mpi_sendrecv(send_ptr, send_count, send_type, rank, tag, + Request::sendrecv(send_ptr, send_count, send_type, rank, tag, recv_ptr + rank * recv_chunk, recv_count, recv_type, rank, tag, comm, &status); @@ -57,7 +57,7 @@ smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count, recv_offset = dst_tree_root * recv_chunk; if (dst < num_procs) { - smpi_mpi_sendrecv(recv_ptr + send_offset, curr_count, send_type, dst, + Request::sendrecv(recv_ptr + send_offset, curr_count, send_type, dst, tag, recv_ptr + recv_offset, mask * recv_count, recv_type, dst, tag, comm, &status); last_recv_count = smpi_mpi_get_count(&status, recv_type); @@ -96,7 +96,7 @@ smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count, if ((dst > rank) && (rank < tree_root + num_procs_completed) && (dst >= tree_root + num_procs_completed)) { - smpi_mpi_send(recv_ptr + offset, last_recv_count, recv_type, dst, + Request::send(recv_ptr + offset, last_recv_count, recv_type, dst, tag, comm); /* last_recv_cnt was set in the previous @@ -108,7 +108,7 @@ smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count, else if ((dst < rank) && (dst < tree_root + num_procs_completed) && (rank >= tree_root + num_procs_completed)) { - smpi_mpi_recv(recv_ptr + offset, + Request::recv(recv_ptr + offset, recv_count * num_procs_completed, recv_type, dst, tag, comm, &status); // num_procs_completed is also equal to the no. of processes diff --git a/src/smpi/colls/allgather-rhv.cpp b/src/smpi/colls/allgather-rhv.cpp index 08edb89fa2..7adc09b233 100644 --- a/src/smpi/colls/allgather-rhv.cpp +++ b/src/smpi/colls/allgather-rhv.cpp @@ -64,7 +64,7 @@ smpi_coll_tuned_allgather_rhv(void *sbuf, int send_count, //perform a remote copy dst = base_offset; - smpi_mpi_sendrecv(sbuf, send_count, send_type, dst, tag, + Request::sendrecv(sbuf, send_count, send_type, dst, tag, (char *)rbuf + base_offset * recv_chunk, recv_count, recv_type, dst, tag, comm, &status); @@ -90,7 +90,7 @@ smpi_coll_tuned_allgather_rhv(void *sbuf, int send_count, // printf("node %d send to %d in phase %d s_offset = %d r_offset = %d count = %d\n",rank,dst,phase, send_base_offset, recv_base_offset, curr_count); - smpi_mpi_sendrecv((char *)rbuf + send_offset, curr_count, recv_type, dst, tag, + Request::sendrecv((char *)rbuf + send_offset, curr_count, recv_type, dst, tag, (char *)rbuf + recv_offset, curr_count, recv_type, dst, tag, comm, &status); diff --git a/src/smpi/colls/allgather-ring.cpp b/src/smpi/colls/allgather-ring.cpp index 2a6d84a63d..f728fef72e 100644 --- a/src/smpi/colls/allgather-ring.cpp +++ b/src/smpi/colls/allgather-ring.cpp @@ -83,14 +83,14 @@ smpi_coll_tuned_allgather_ring(void *send_buff, int send_count, extent = smpi_datatype_get_extent(send_type); // local send/recv - smpi_mpi_sendrecv(sendptr, send_count, send_type, rank, tag, + Request::sendrecv(sendptr, send_count, send_type, rank, tag, recvptr + rank * recv_count * extent, recv_count, recv_type, rank, tag, comm, &status); for (i = 1; i < num_procs; i++) { src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; - smpi_mpi_sendrecv(sendptr, send_count, send_type, dst, tag, + Request::sendrecv(sendptr, send_count, send_type, dst, tag, recvptr + src * recv_count * extent, recv_count, recv_type, src, tag, comm, &status); } diff --git a/src/smpi/colls/allgather-smp-simple.cpp b/src/smpi/colls/allgather-smp-simple.cpp index 1db7bc2ff7..4dd59a0273 100644 --- a/src/smpi/colls/allgather-smp-simple.cpp +++ b/src/smpi/colls/allgather-smp-simple.cpp @@ -44,7 +44,7 @@ int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, } //INTRA-SMP-ALLGATHER recv_offset = rank * rextent * rcount; - smpi_mpi_sendrecv(send_buf, scount, stype, rank, tag, + Request::sendrecv(send_buf, scount, stype, rank, tag, ((char *) recv_buf + recv_offset), rcount, rtype, rank, tag, comm, &status); for (i = 1; i < num_core_in_current_smp; i++) { @@ -57,7 +57,7 @@ int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, (num_core_in_current_smp); recv_offset = src * rextent * rcount; - smpi_mpi_sendrecv(send_buf, scount, stype, dst, tag, + Request::sendrecv(send_buf, scount, stype, dst, tag, ((char *) recv_buf + recv_offset), rcount, rtype, src, tag, comm, &status); @@ -82,10 +82,10 @@ int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, src = ((inter_rank - i + inter_comm_size) % inter_comm_size) * num_core; //send_offset = (rank * sextent * scount); recv_offset = (src * sextent * scount); - // smpi_mpi_sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag, + // Request::sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag, // (recv_buf+recv_offset), (rcount * num_core), rtype, src, tag, comm, &status); //MPIC_Isend((recv_buf+send_offset), (scount * num_core), stype, dst, tag, comm, req_ptr++); - *(req_ptr++) = smpi_mpi_irecv(((char *) recv_buf + recv_offset), (rcount * num_core), rtype, + *(req_ptr++) = Request::irecv(((char *) recv_buf + recv_offset), (rcount * num_core), rtype, src, tag, comm); } for (i = 1; i < inter_comm_size; i++) { @@ -94,13 +94,13 @@ int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, //src = ((inter_rank-i+inter_comm_size)%inter_comm_size) * num_core; send_offset = (rank * sextent * scount); //recv_offset = (src * sextent * scount); - // smpi_mpi_sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag, + // Request::sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag, // (recv_buf+recv_offset), (rcount * num_core), rtype, src, tag, comm, &status); - *(req_ptr++) = smpi_mpi_isend(((char *) recv_buf + send_offset), (scount * num_core), stype, + *(req_ptr++) = Request::isend(((char *) recv_buf + send_offset), (scount * num_core), stype, dst, tag, comm); //MPIC_Irecv((recv_buf+recv_offset), (rcount * num_core), rtype, src, tag, comm, req_ptr++); } - smpi_mpi_waitall(num_req, reqs, stat); + Request::waitall(num_req, reqs, stat); free(reqs); free(stat); @@ -110,11 +110,11 @@ int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, if (intra_rank == 0) { for (i = 1; i < num_core_in_current_smp; i++) { //printf("rank = %d, num = %d send to %d\n",rank, num_core_in_current_smp, (rank + i)); - smpi_mpi_send(recv_buf, (scount * comm_size), stype, (rank + i), tag, comm); + Request::send(recv_buf, (scount * comm_size), stype, (rank + i), tag, comm); } } else { //printf("rank = %d recv from %d\n",rank, (inter_rank * num_core)); - smpi_mpi_recv(recv_buf, (rcount * comm_size), rtype, (inter_rank * num_core), + Request::recv(recv_buf, (rcount * comm_size), rtype, (inter_rank * num_core), tag, comm, &status); } diff --git a/src/smpi/colls/allgather-spreading-simple.cpp b/src/smpi/colls/allgather-spreading-simple.cpp index 671f9fcb96..053c0cbcc2 100644 --- a/src/smpi/colls/allgather-spreading-simple.cpp +++ b/src/smpi/colls/allgather-spreading-simple.cpp @@ -93,7 +93,7 @@ smpi_coll_tuned_allgather_spreading_simple(void *send_buff, int send_count, } req_ptr = reqs; - smpi_mpi_sendrecv(send_buff, send_count, send_type, rank, tag, + Request::sendrecv(send_buff, send_count, send_type, rank, tag, (char *) recv_buff + rank * recv_count * extent, recv_count, recv_type, rank, tag, comm, &status); @@ -101,7 +101,7 @@ smpi_coll_tuned_allgather_spreading_simple(void *send_buff, int send_count, src = (rank + i) % num_procs; if (src == rank) continue; - *(req_ptr++) = smpi_mpi_irecv(recv_ptr + src * recv_count * extent, recv_count, recv_type, + *(req_ptr++) = Request::irecv(recv_ptr + src * recv_count * extent, recv_count, recv_type, src, tag, comm); } @@ -109,10 +109,10 @@ smpi_coll_tuned_allgather_spreading_simple(void *send_buff, int send_count, dst = (rank + i) % num_procs; if (dst == rank) continue; - *(req_ptr++) = smpi_mpi_isend(send_buff, send_count, send_type, dst, tag, comm); + *(req_ptr++) = Request::isend(send_buff, send_count, send_type, dst, tag, comm); } - smpi_mpi_waitall(num_reqs, reqs, MPI_STATUSES_IGNORE); + Request::waitall(num_reqs, reqs, MPI_STATUSES_IGNORE); free(reqs); return MPI_SUCCESS; diff --git a/src/smpi/colls/allgatherv-mpich-rdb.cpp b/src/smpi/colls/allgatherv-mpich-rdb.cpp index b2ccc5d9eb..651dbd7dc1 100644 --- a/src/smpi/colls/allgatherv-mpich-rdb.cpp +++ b/src/smpi/colls/allgatherv-mpich-rdb.cpp @@ -96,7 +96,7 @@ int smpi_coll_tuned_allgatherv_mpich_rdb ( for (j=0; j>= 1; @@ -178,7 +178,7 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, if (intra_rank & mask) { src = (inter_rank * num_core) + (intra_rank - mask); recv_offset = (phase - 3) * pcount * extent; - smpi_mpi_recv((char *)recv_buf + recv_offset, pcount, dtype, src, tag, comm, + Request::recv((char *)recv_buf + recv_offset, pcount, dtype, src, tag, comm, &status); break; } @@ -190,7 +190,7 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, dst = (inter_rank * num_core) + (intra_rank + mask); if (dst < comm_size) { send_offset = (phase - 3) * pcount * extent; - smpi_mpi_send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm); + Request::send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm); } mask >>= 1; } diff --git a/src/smpi/colls/allreduce-smp-binomial.cpp b/src/smpi/colls/allreduce-smp-binomial.cpp index 37829934e4..b4ec04f6cf 100644 --- a/src/smpi/colls/allreduce-smp-binomial.cpp +++ b/src/smpi/colls/allreduce-smp-binomial.cpp @@ -61,7 +61,7 @@ int smpi_coll_tuned_allreduce_smp_binomial(void *send_buf, void *recv_buf, int inter_comm_size = (comm_size + num_core - 1) / num_core; /* copy input buffer to output buffer */ - smpi_mpi_sendrecv(send_buf, count, dtype, rank, tag, + Request::sendrecv(send_buf, count, dtype, rank, tag, recv_buf, count, dtype, rank, tag, comm, &status); /* start binomial reduce intra communication inside each SMP node */ @@ -70,12 +70,12 @@ int smpi_coll_tuned_allreduce_smp_binomial(void *send_buf, void *recv_buf, if ((mask & intra_rank) == 0) { src = (inter_rank * num_core) + (intra_rank | mask); if (src < comm_size) { - smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); + Request::recv(tmp_buf, count, dtype, src, tag, comm, &status); smpi_op_apply(op, tmp_buf, recv_buf, &count, &dtype); } } else { dst = (inter_rank * num_core) + (intra_rank & (~mask)); - smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); + Request::send(recv_buf, count, dtype, dst, tag, comm); break; } mask <<= 1; @@ -89,12 +89,12 @@ int smpi_coll_tuned_allreduce_smp_binomial(void *send_buf, void *recv_buf, if ((mask & inter_rank) == 0) { src = (inter_rank | mask) * num_core; if (src < comm_size) { - smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); + Request::recv(tmp_buf, count, dtype, src, tag, comm, &status); smpi_op_apply(op, tmp_buf, recv_buf, &count, &dtype); } } else { dst = (inter_rank & (~mask)) * num_core; - smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); + Request::send(recv_buf, count, dtype, dst, tag, comm); break; } mask <<= 1; @@ -108,7 +108,7 @@ int smpi_coll_tuned_allreduce_smp_binomial(void *send_buf, void *recv_buf, while (mask < inter_comm_size) { if (inter_rank & mask) { src = (inter_rank - mask) * num_core; - smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status); + Request::recv(recv_buf, count, dtype, src, tag, comm, &status); break; } mask <<= 1; @@ -119,7 +119,7 @@ int smpi_coll_tuned_allreduce_smp_binomial(void *send_buf, void *recv_buf, if (inter_rank < inter_comm_size) { dst = (inter_rank + mask) * num_core; if (dst < comm_size) { - smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); + Request::send(recv_buf, count, dtype, dst, tag, comm); } } mask >>= 1; @@ -135,7 +135,7 @@ int smpi_coll_tuned_allreduce_smp_binomial(void *send_buf, void *recv_buf, while (mask < num_core_in_current_smp) { if (intra_rank & mask) { src = (inter_rank * num_core) + (intra_rank - mask); - smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status); + Request::recv(recv_buf, count, dtype, src, tag, comm, &status); break; } mask <<= 1; @@ -145,7 +145,7 @@ int smpi_coll_tuned_allreduce_smp_binomial(void *send_buf, void *recv_buf, while (mask > 0) { dst = (inter_rank * num_core) + (intra_rank + mask); if (dst < comm_size) { - smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); + Request::send(recv_buf, count, dtype, dst, tag, comm); } mask >>= 1; } diff --git a/src/smpi/colls/allreduce-smp-rdb.cpp b/src/smpi/colls/allreduce-smp-rdb.cpp index b95bf7f443..a330eef93d 100644 --- a/src/smpi/colls/allreduce-smp-rdb.cpp +++ b/src/smpi/colls/allreduce-smp-rdb.cpp @@ -68,7 +68,7 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, int inter_comm_size = (comm_size + num_core - 1) / num_core; /* copy input buffer to output buffer */ - smpi_mpi_sendrecv(send_buf, count, dtype, rank, tag, + Request::sendrecv(send_buf, count, dtype, rank, tag, recv_buf, count, dtype, rank, tag, comm, &status); /* start binomial reduce intra communication inside each SMP node */ @@ -77,12 +77,12 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, if ((mask & intra_rank) == 0) { src = (inter_rank * num_core) + (intra_rank | mask); if (src < comm_size) { - smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); + Request::recv(tmp_buf, count, dtype, src, tag, comm, &status); smpi_op_apply(op, tmp_buf, recv_buf, &count, &dtype); } } else { dst = (inter_rank * num_core) + (intra_rank & (~mask)); - smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); + Request::send(recv_buf, count, dtype, dst, tag, comm); break; } mask <<= 1; @@ -110,11 +110,11 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, if (inter_rank < 2 * rem) { if (inter_rank % 2 == 0) { dst = rank + num_core; - smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); + Request::send(recv_buf, count, dtype, dst, tag, comm); newrank = -1; } else { src = rank - num_core; - smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); + Request::recv(tmp_buf, count, dtype, src, tag, comm, &status); smpi_op_apply(op, tmp_buf, recv_buf, &count, &dtype); newrank = inter_rank / 2; } @@ -139,7 +139,7 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, dst *= num_core; /* exchange data in rdb manner */ - smpi_mpi_sendrecv(recv_buf, count, dtype, dst, tag, tmp_buf, count, dtype, + Request::sendrecv(recv_buf, count, dtype, dst, tag, tmp_buf, count, dtype, dst, tag, comm, &status); smpi_op_apply(op, tmp_buf, recv_buf, &count, &dtype); mask <<= 1; @@ -151,9 +151,9 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, */ if (inter_rank < 2 * rem) { if (inter_rank % 2) { - smpi_mpi_send(recv_buf, count, dtype, rank - num_core, tag, comm); + Request::send(recv_buf, count, dtype, rank - num_core, tag, comm); } else { - smpi_mpi_recv(recv_buf, count, dtype, rank + num_core, tag, comm, &status); + Request::recv(recv_buf, count, dtype, rank + num_core, tag, comm, &status); } } } @@ -167,7 +167,7 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, while (mask < num_core_in_current_smp) { if (intra_rank & mask) { src = (inter_rank * num_core) + (intra_rank - mask); - smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status); + Request::recv(recv_buf, count, dtype, src, tag, comm, &status); break; } mask <<= 1; @@ -177,7 +177,7 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, while (mask > 0) { dst = (inter_rank * num_core) + (intra_rank + mask); if (dst < comm_size) { - smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); + Request::send(recv_buf, count, dtype, dst, tag, comm); } mask >>= 1; } diff --git a/src/smpi/colls/allreduce-smp-rsag-lr.cpp b/src/smpi/colls/allreduce-smp-rsag-lr.cpp index 7e3dfefb1d..253376a460 100644 --- a/src/smpi/colls/allreduce-smp-rsag-lr.cpp +++ b/src/smpi/colls/allreduce-smp-rsag-lr.cpp @@ -60,7 +60,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, } - smpi_mpi_sendrecv(send_buf, count, dtype, rank, tag, + Request::sendrecv(send_buf, count, dtype, rank, tag, recv_buf, count, dtype, rank, tag, comm, &status); @@ -71,14 +71,14 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, src = (inter_rank * num_core) + (intra_rank | mask); // if (src < ((inter_rank + 1) * num_core)) { if (src < comm_size) { - smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); + Request::recv(tmp_buf, count, dtype, src, tag, comm, &status); smpi_op_apply(op, tmp_buf, recv_buf, &count, &dtype); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); } } else { dst = (inter_rank * num_core) + (intra_rank & (~mask)); - smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); + Request::send(recv_buf, count, dtype, dst, tag, comm); //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); break; } @@ -125,7 +125,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, else recv_count = curr_size + curr_remainder; - smpi_mpi_sendrecv((char *) recv_buf + send_offset, send_count, dtype, to, + Request::sendrecv((char *) recv_buf + send_offset, send_count, dtype, to, tag + i, tmp_buf, recv_count, dtype, from, tag + i, comm, &status); @@ -155,7 +155,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, else recv_count = curr_size + curr_remainder; - smpi_mpi_sendrecv((char *) recv_buf + send_offset, send_count, dtype, to, + Request::sendrecv((char *) recv_buf + send_offset, send_count, dtype, to, tag + i, (char *) recv_buf + recv_offset, recv_count, dtype, from, tag + i, comm, &status); @@ -175,14 +175,14 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, if ((mask & inter_rank) == 0) { src = (inter_rank | mask) * num_core; if (src < comm_size) { - smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); + Request::recv(tmp_buf, count, dtype, src, tag, comm, &status); (* uop) (tmp_buf, recv_buf, &count, &dtype); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); } } else { dst = (inter_rank & (~mask)) * num_core; - smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); + Request::send(recv_buf, count, dtype, dst, tag, comm); //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); break; } @@ -200,7 +200,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, // if (inter_rank & mask) { // src = (inter_rank - mask) * num_core; //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); -// smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status); +// Request::recv(recv_buf, count, dtype, src, tag, comm, &status); // break; // } // mask <<= 1; @@ -214,7 +214,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, // dst = (inter_rank + mask) * num_core; // if (dst < comm_size) { // //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); -// smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); +// Request::send(recv_buf, count, dtype, dst, tag, comm); // } // } // mask >>= 1; @@ -233,7 +233,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, if (intra_rank & mask) { src = (inter_rank * num_core) + (intra_rank - mask); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); - smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status); + Request::recv(recv_buf, count, dtype, src, tag, comm, &status); break; } mask <<= 1; @@ -246,7 +246,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, dst = (inter_rank * num_core) + (intra_rank + mask); if (dst < comm_size) { //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); - smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); + Request::send(recv_buf, count, dtype, dst, tag, comm); } mask >>= 1; } diff --git a/src/smpi/colls/allreduce-smp-rsag-rab.cpp b/src/smpi/colls/allreduce-smp-rsag-rab.cpp index 64c17294a6..c3da37812f 100644 --- a/src/smpi/colls/allreduce-smp-rsag-rab.cpp +++ b/src/smpi/colls/allreduce-smp-rsag-rab.cpp @@ -53,7 +53,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, int inter_comm_size = (comm_size + num_core - 1) / num_core; - smpi_mpi_sendrecv(sbuf, count, dtype, rank, tag, + Request::sendrecv(sbuf, count, dtype, rank, tag, rbuf, count, dtype, rank, tag, comm, &status); // SMP_binomial_reduce @@ -63,14 +63,14 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, src = (inter_rank * num_core) + (intra_rank | mask); // if (src < ((inter_rank + 1) * num_core)) { if (src < comm_size) { - smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); + Request::recv(tmp_buf, count, dtype, src, tag, comm, &status); smpi_op_apply(op, tmp_buf, rbuf, &count, &dtype); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); } } else { dst = (inter_rank * num_core) + (intra_rank & (~mask)); - smpi_mpi_send(rbuf, count, dtype, dst, tag, comm); + Request::send(rbuf, count, dtype, dst, tag, comm); //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); break; } @@ -114,7 +114,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, // if (rank==7) // printf("node %d send to %d in phase %d s_offset = %d r_offset = %d count = %d\n",rank,dst,phase, send_offset, recv_offset, curr_count); - smpi_mpi_sendrecv((char *)rbuf + send_offset, curr_count, dtype, (dst * num_core), tag, + Request::sendrecv((char *)rbuf + send_offset, curr_count, dtype, (dst * num_core), tag, tmp_buf, curr_count, dtype, (dst * num_core), tag, comm, &status); @@ -161,7 +161,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, // if (rank==7) //printf("node %d send to %d in phase %d s_offset = %d r_offset = %d count = %d\n",rank,dst,phase, send_offset, recv_offset, curr_count); - smpi_mpi_sendrecv((char *)rbuf + send_offset, curr_count, dtype, (dst * num_core), tag, + Request::sendrecv((char *)rbuf + send_offset, curr_count, dtype, (dst * num_core), tag, (char *)rbuf + recv_offset, curr_count, dtype, (dst * num_core), tag, comm, &status); @@ -187,7 +187,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, if (intra_rank & mask) { src = (inter_rank * num_core) + (intra_rank - mask); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); - smpi_mpi_recv(rbuf, count, dtype, src, tag, comm, &status); + Request::recv(rbuf, count, dtype, src, tag, comm, &status); break; } mask <<= 1; @@ -200,7 +200,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, dst = (inter_rank * num_core) + (intra_rank + mask); if (dst < comm_size) { //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); - smpi_mpi_send(rbuf, count, dtype, dst, tag, comm); + Request::send(rbuf, count, dtype, dst, tag, comm); } mask >>= 1; } diff --git a/src/smpi/colls/allreduce-smp-rsag.cpp b/src/smpi/colls/allreduce-smp-rsag.cpp index 298c9784d0..8cb4532435 100644 --- a/src/smpi/colls/allreduce-smp-rsag.cpp +++ b/src/smpi/colls/allreduce-smp-rsag.cpp @@ -59,7 +59,7 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, } - smpi_mpi_sendrecv(send_buf, count, dtype, rank, tag, + Request::sendrecv(send_buf, count, dtype, rank, tag, recv_buf, count, dtype, rank, tag, comm, &status); @@ -70,14 +70,14 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, src = (inter_rank * num_core) + (intra_rank | mask); // if (src < ((inter_rank + 1) * num_core)) { if (src < comm_size) { - smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); + Request::recv(tmp_buf, count, dtype, src, tag, comm, &status); smpi_op_apply(op, tmp_buf, recv_buf, &count, &dtype); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); } } else { dst = (inter_rank * num_core) + (intra_rank & (~mask)); - smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); + Request::send(recv_buf, count, dtype, dst, tag, comm); //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); break; } @@ -106,7 +106,7 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, ((inter_rank - 2 - i + inter_comm_size) % inter_comm_size) * seg_count * extent; - smpi_mpi_sendrecv((char *) recv_buf + send_offset, seg_count, dtype, to, + Request::sendrecv((char *) recv_buf + send_offset, seg_count, dtype, to, tag + i, tmp_buf, seg_count, dtype, from, tag + i, comm, &status); @@ -125,7 +125,7 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, ((inter_rank - 1 - i + inter_comm_size) % inter_comm_size) * seg_count * extent; - smpi_mpi_sendrecv((char *) recv_buf + send_offset, seg_count, dtype, to, + Request::sendrecv((char *) recv_buf + send_offset, seg_count, dtype, to, tag + i, (char *) recv_buf + recv_offset, seg_count, dtype, from, tag + i, comm, &status); @@ -144,14 +144,14 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, // if ((mask & inter_rank) == 0) { // src = (inter_rank | mask) * num_core; // if (src < comm_size) { -// smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status); +// Request::recv(tmp_buf, count, dtype, src, tag, comm, &status); // (* uop) (tmp_buf, recv_buf, &count, &dtype); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); // } // } // else { // dst = (inter_rank & (~mask)) * num_core; -// smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); +// Request::send(recv_buf, count, dtype, dst, tag, comm); //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); // break; // } @@ -168,7 +168,7 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, // if (inter_rank & mask) { // src = (inter_rank - mask) * num_core; //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); -// smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status); +// Request::recv(recv_buf, count, dtype, src, tag, comm, &status); // break; // } // mask <<= 1; @@ -182,7 +182,7 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, // dst = (inter_rank + mask) * num_core; // if (dst < comm_size) { //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); -// smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); +// Request::send(recv_buf, count, dtype, dst, tag, comm); // } // } // mask >>= 1; @@ -202,7 +202,7 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, if (intra_rank & mask) { src = (inter_rank * num_core) + (intra_rank - mask); //printf("Node %d recv from node %d when mask is %d\n", rank, src, mask); - smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status); + Request::recv(recv_buf, count, dtype, src, tag, comm, &status); break; } mask <<= 1; @@ -215,7 +215,7 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, dst = (inter_rank * num_core) + (intra_rank + mask); if (dst < comm_size) { //printf("Node %d send to node %d when mask is %d\n", rank, dst, mask); - smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm); + Request::send(recv_buf, count, dtype, dst, tag, comm); } mask >>= 1; } diff --git a/src/smpi/colls/alltoall-2dmesh.cpp b/src/smpi/colls/alltoall-2dmesh.cpp index 784552ab26..3451012cfa 100644 --- a/src/smpi/colls/alltoall-2dmesh.cpp +++ b/src/smpi/colls/alltoall-2dmesh.cpp @@ -100,17 +100,17 @@ int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, continue; recv_offset = (src % Y) * block_size * num_procs; - *(req_ptr++) = smpi_mpi_irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm); + *(req_ptr++) = Request::irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm); } for (i = 0; i < Y; i++) { dst = i + my_row_base; if (dst == rank) continue; - smpi_mpi_send(send_buff, count, send_type, dst, tag, comm); + Request::send(send_buff, count, send_type, dst, tag, comm); } - smpi_mpi_waitall(Y - 1, reqs, statuses); + Request::waitall(Y - 1, reqs, statuses); req_ptr = reqs; for (i = 0; i < Y; i++) { @@ -118,13 +118,13 @@ int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, recv_offset = (my_row_base * block_size) + (i * block_size); if (i + my_row_base == rank) - smpi_mpi_sendrecv((char *) send_buff + recv_offset, send_count, send_type, + Request::sendrecv((char *) send_buff + recv_offset, send_count, send_type, rank, tag, (char *) recv_buff + recv_offset, recv_count, recv_type, rank, tag, comm, &s); else - smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type, + Request::sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag, (char *) recv_buff + recv_offset, recv_count, recv_type, rank, tag, comm, &s); @@ -137,7 +137,7 @@ int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, continue; src_row_base = (src / Y) * Y; - *(req_ptr++) = smpi_mpi_irecv((char *) recv_buff + src_row_base * block_size, recv_count * Y, + *(req_ptr++) = Request::irecv((char *) recv_buff + src_row_base * block_size, recv_count * Y, recv_type, src, tag, comm); } @@ -151,11 +151,11 @@ int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, send_offset = (dst + j * num_procs) * block_size; if (j + my_row_base == rank) - smpi_mpi_sendrecv((char *) send_buff + dst * block_size, send_count, + Request::sendrecv((char *) send_buff + dst * block_size, send_count, send_type, rank, tag, tmp_buff2 + recv_offset, recv_count, recv_type, rank, tag, comm, &s); else - smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type, + Request::sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag, tmp_buff2 + recv_offset, recv_count, recv_type, rank, tag, comm, &s); @@ -163,9 +163,9 @@ int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, recv_offset += block_size; } - smpi_mpi_send(tmp_buff2, send_count * Y, send_type, dst, tag, comm); + Request::send(tmp_buff2, send_count * Y, send_type, dst, tag, comm); } - smpi_mpi_waitall(X - 1, reqs, statuses); + Request::waitall(X - 1, reqs, statuses); free(reqs); free(statuses); smpi_free_tmp_buffer(tmp_buff1); diff --git a/src/smpi/colls/alltoall-3dmesh.cpp b/src/smpi/colls/alltoall-3dmesh.cpp index 2c4b10c8f9..695f6d738d 100644 --- a/src/smpi/colls/alltoall-3dmesh.cpp +++ b/src/smpi/colls/alltoall-3dmesh.cpp @@ -92,7 +92,7 @@ int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, recv_offset = (rank % two_dsize) * block_size * num_procs; - smpi_mpi_sendrecv(send_buff, send_count * num_procs, send_type, rank, tag, + Request::sendrecv(send_buff, send_count * num_procs, send_type, rank, tag, tmp_buff1 + recv_offset, num_procs * recv_count, recv_type, rank, tag, comm, &status); @@ -103,17 +103,17 @@ int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, if (src == rank) continue; recv_offset = (src % two_dsize) * block_size * num_procs; - *(req_ptr++) = smpi_mpi_irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm); + *(req_ptr++) = Request::irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm); } for (i = 0; i < Y; i++) { dst = i + my_row_base; if (dst == rank) continue; - smpi_mpi_send(send_buff, count, send_type, dst, tag, comm); + Request::send(send_buff, count, send_type, dst, tag, comm); } - smpi_mpi_waitall(Y - 1, reqs, statuses); + Request::waitall(Y - 1, reqs, statuses); req_ptr = reqs; @@ -125,7 +125,7 @@ int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, src_row_base = (src / X) * X; recv_offset = (src_row_base % two_dsize) * block_size * num_procs; - *(req_ptr++) = smpi_mpi_irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y, + *(req_ptr++) = Request::irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y, recv_type, src, tag, comm); } @@ -134,17 +134,17 @@ int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, dst = (i * Y + my_col_base); if (dst == rank) continue; - smpi_mpi_send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type, + Request::send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type, dst, tag, comm); } - smpi_mpi_waitall(X - 1, reqs, statuses); + Request::waitall(X - 1, reqs, statuses); req_ptr = reqs; for (i = 0; i < two_dsize; i++) { send_offset = (rank * block_size) + (i * block_size * num_procs); recv_offset = (my_z_base * block_size) + (i * block_size); - smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag, + Request::sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag, (char *) recv_buff + recv_offset, recv_count, recv_type, rank, tag, comm, &status); } @@ -155,7 +155,7 @@ int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, recv_offset = (src_z_base * block_size); - *(req_ptr++) = smpi_mpi_irecv((char *) recv_buff + recv_offset, recv_count * two_dsize, + *(req_ptr++) = Request::irecv((char *) recv_buff + recv_offset, recv_count * two_dsize, recv_type, src, tag, comm); } @@ -165,18 +165,18 @@ int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, recv_offset = 0; for (j = 0; j < two_dsize; j++) { send_offset = (dst + j * num_procs) * block_size; - smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type, + Request::sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag, tmp_buff2 + recv_offset, recv_count, recv_type, rank, tag, comm, &status); recv_offset += block_size; } - smpi_mpi_send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm); + Request::send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm); } - smpi_mpi_waitall(Z - 1, reqs, statuses); + Request::waitall(Z - 1, reqs, statuses); free(reqs); free(statuses); diff --git a/src/smpi/colls/alltoall-bruck.cpp b/src/smpi/colls/alltoall-bruck.cpp index 903931ba37..b4922c8944 100644 --- a/src/smpi/colls/alltoall-bruck.cpp +++ b/src/smpi/colls/alltoall-bruck.cpp @@ -52,12 +52,12 @@ smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count, disps = (int *) xbt_malloc(sizeof(int) * num_procs); blocks_length = (int *) xbt_malloc(sizeof(int) * num_procs); - smpi_mpi_sendrecv(send_ptr + rank * send_count * extent, + Request::sendrecv(send_ptr + rank * send_count * extent, (num_procs - rank) * send_count, send_type, rank, tag, recv_ptr, (num_procs - rank) * recv_count, recv_type, rank, tag, comm, &status); - smpi_mpi_sendrecv(send_ptr, rank * send_count, send_type, rank, tag, + Request::sendrecv(send_ptr, rank * send_count, send_type, rank, tag, recv_ptr + (num_procs - rank) * recv_count * extent, rank * recv_count, recv_type, rank, tag, comm, &status); @@ -84,7 +84,7 @@ smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count, position = 0; MPI_Pack(recv_buff, 1, new_type, tmp_buff, pack_size, &position, comm); - smpi_mpi_sendrecv(tmp_buff, position, MPI_PACKED, dst, tag, recv_buff, 1, + Request::sendrecv(tmp_buff, position, MPI_PACKED, dst, tag, recv_buff, 1, new_type, src, tag, comm, &status); smpi_datatype_unuse(new_type); @@ -94,18 +94,18 @@ smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count, free(disps); free(blocks_length); - smpi_mpi_sendrecv(recv_ptr + (rank + 1) * recv_count * extent, + Request::sendrecv(recv_ptr + (rank + 1) * recv_count * extent, (num_procs - rank - 1) * recv_count, send_type, rank, tag, tmp_buff, (num_procs - rank - 1) * recv_count, recv_type, rank, tag, comm, &status); - smpi_mpi_sendrecv(recv_ptr, (rank + 1) * recv_count, send_type, rank, tag, + Request::sendrecv(recv_ptr, (rank + 1) * recv_count, send_type, rank, tag, tmp_buff + (num_procs - rank - 1) * recv_count * extent, (rank + 1) * recv_count, recv_type, rank, tag, comm, &status); for (i = 0; i < num_procs; i++) - smpi_mpi_sendrecv(tmp_buff + i * recv_count * extent, recv_count, send_type, + Request::sendrecv(tmp_buff + i * recv_count * extent, recv_count, send_type, rank, tag, recv_ptr + (num_procs - i - 1) * recv_count * extent, recv_count, recv_type, rank, tag, comm, &status); diff --git a/src/smpi/colls/alltoall-mvapich-scatter-dest.cpp b/src/smpi/colls/alltoall-mvapich-scatter-dest.cpp index 92b80663ef..3cc984fc14 100644 --- a/src/smpi/colls/alltoall-mvapich-scatter-dest.cpp +++ b/src/smpi/colls/alltoall-mvapich-scatter-dest.cpp @@ -101,7 +101,7 @@ int smpi_coll_tuned_alltoall_mvapich2_scatter_dest( /* do the communication -- post ss sends and receives: */ for ( i=0; i rank) && (rank < tree_root + num_procs_completed) && (dst >= tree_root + num_procs_completed)) { - smpi_mpi_send(tmp_buff + dst_tree_root * send_increment, + Request::send(tmp_buff + dst_tree_root * send_increment, last_recv_count, send_type, dst, tag, comm); } @@ -127,7 +127,7 @@ int smpi_coll_tuned_alltoall_rdb(void *send_buff, int send_count, else if ((dst < rank) && (dst < tree_root + num_procs_completed) && (rank >= tree_root + num_procs_completed)) { - smpi_mpi_recv(tmp_buff + dst_tree_root * send_increment, + Request::recv(tmp_buff + dst_tree_root * send_increment, mask * num_procs * send_count, send_type, dst, tag, comm, &status); @@ -145,7 +145,7 @@ int smpi_coll_tuned_alltoall_rdb(void *send_buff, int send_count, } for (i = 0; i < num_procs; i++) - smpi_mpi_sendrecv(tmp_buff + (rank + i * num_procs) * send_count * extent, + Request::sendrecv(tmp_buff + (rank + i * num_procs) * send_count * extent, send_count, send_type, rank, tag, recv_ptr + (i * recv_count * extent), recv_count, recv_type, rank, tag, comm, &status); diff --git a/src/smpi/colls/alltoall-ring-light-barrier.cpp b/src/smpi/colls/alltoall-ring-light-barrier.cpp index 26e91c2466..04749d1aa9 100644 --- a/src/smpi/colls/alltoall-ring-light-barrier.cpp +++ b/src/smpi/colls/alltoall-ring-light-barrier.cpp @@ -51,7 +51,7 @@ smpi_coll_tuned_alltoall_ring_light_barrier(void *send_buff, int send_count, send_chunk *= send_count; recv_chunk *= recv_count; - smpi_mpi_sendrecv(send_ptr + rank * send_chunk, send_count, send_type, rank, tag, + Request::sendrecv(send_ptr + rank * send_chunk, send_count, send_type, rank, tag, recv_ptr + rank * recv_chunk, recv_count, recv_type, rank, tag, comm, &s); @@ -59,14 +59,14 @@ smpi_coll_tuned_alltoall_ring_light_barrier(void *send_buff, int send_count, src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; - smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, + Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); if ((i + 1) < num_procs) { next_src = (rank - (i + 1) + num_procs) % num_procs; next_dst = (rank + (i + 1) + num_procs) % num_procs; - smpi_mpi_sendrecv(&send_sync, 1, MPI_CHAR, next_src, tag, + Request::sendrecv(&send_sync, 1, MPI_CHAR, next_src, tag, &recv_sync, 1, MPI_CHAR, next_dst, tag, comm, &s); } diff --git a/src/smpi/colls/alltoall-ring-mpi-barrier.cpp b/src/smpi/colls/alltoall-ring-mpi-barrier.cpp index 2c8a9dc5e4..b94671d484 100644 --- a/src/smpi/colls/alltoall-ring-mpi-barrier.cpp +++ b/src/smpi/colls/alltoall-ring-mpi-barrier.cpp @@ -53,7 +53,7 @@ smpi_coll_tuned_alltoall_ring_mpi_barrier(void *send_buff, int send_count, dst = (rank + i) % num_procs; mpi_coll_barrier_fun(comm); - smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, + Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); } diff --git a/src/smpi/colls/alltoall-ring-one-barrier.cpp b/src/smpi/colls/alltoall-ring-one-barrier.cpp index ca0a770203..83f023756f 100644 --- a/src/smpi/colls/alltoall-ring-one-barrier.cpp +++ b/src/smpi/colls/alltoall-ring-one-barrier.cpp @@ -52,7 +52,7 @@ smpi_coll_tuned_alltoall_ring_one_barrier(void *send_buff, int send_count, src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; - smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, + Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); } diff --git a/src/smpi/colls/alltoall-ring.cpp b/src/smpi/colls/alltoall-ring.cpp index b87117378f..b0230019cd 100644 --- a/src/smpi/colls/alltoall-ring.cpp +++ b/src/smpi/colls/alltoall-ring.cpp @@ -51,7 +51,7 @@ smpi_coll_tuned_alltoall_ring(void *send_buff, int send_count, src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; - smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, + Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); } diff --git a/src/smpi/colls/alltoallv-bruck.cpp b/src/smpi/colls/alltoallv-bruck.cpp index 9d18f95042..5cef5c778f 100644 --- a/src/smpi/colls/alltoallv-bruck.cpp +++ b/src/smpi/colls/alltoallv-bruck.cpp @@ -63,7 +63,7 @@ int smpi_coll_tuned_alltoallv_bruck(void *sendbuf, int *sendcounts, int *senddis continue; } - requests[count]=smpi_mpi_irecv((char *)recvbuf + recvdisps[dst] * recvext, recvcounts[dst], + requests[count]=Request::irecv((char *)recvbuf + recvdisps[dst] * recvext, recvcounts[dst], recvtype, dst, system_tag, comm ); count++; } @@ -75,14 +75,14 @@ int smpi_coll_tuned_alltoallv_bruck(void *sendbuf, int *sendcounts, int *senddis rank, i, sendcounts[dst]); continue; } - requests[count]=smpi_mpi_isend((char *)sendbuf + senddisps[dst] * sendext, sendcounts[dst], + requests[count]=Request::isend((char *)sendbuf + senddisps[dst] * sendext, sendcounts[dst], sendtype, dst, system_tag, comm); count++; } /* Wait for them all. */ //smpi_mpi_startall(count, requests); XBT_DEBUG("<%d> wait for %d requests", rank, count); - smpi_mpi_waitall(count, requests, MPI_STATUSES_IGNORE); + Request::waitall(count, requests, MPI_STATUSES_IGNORE); xbt_free(requests); } diff --git a/src/smpi/colls/alltoallv-ompi-basic-linear.cpp b/src/smpi/colls/alltoallv-ompi-basic-linear.cpp index 0f55bda157..e7d7da7e02 100644 --- a/src/smpi/colls/alltoallv-ompi-basic-linear.cpp +++ b/src/smpi/colls/alltoallv-ompi-basic-linear.cpp @@ -60,7 +60,7 @@ smpi_coll_tuned_alltoallv_ompi_basic_linear(void *sbuf, int *scounts, int *sdisp prcv = ((char *) rbuf) + (rdisps[i] * rext); - *preq = smpi_irecv_init(prcv, rcounts[i], rdtype, + *preq = Request::irecv_init(prcv, rcounts[i], rdtype, i, COLL_TAG_ALLTOALLV, comm ); preq++; @@ -75,7 +75,7 @@ smpi_coll_tuned_alltoallv_ompi_basic_linear(void *sbuf, int *scounts, int *sdisp } psnd = ((char *) sbuf) + (sdisps[i] * sext); - *preq=smpi_isend_init(psnd, scounts[i], sdtype, + *preq=Request::isend_init(psnd, scounts[i], sdtype, i, COLL_TAG_ALLTOALLV, comm ); preq++; @@ -83,7 +83,7 @@ smpi_coll_tuned_alltoallv_ompi_basic_linear(void *sbuf, int *scounts, int *sdisp } /* Start your engines. This will never return an error. */ - smpi_mpi_startall(nreqs, ireqs); + Request::startall(nreqs, ireqs); /* Wait for them all. If there's an error, note that we don't care * what the error was -- just that there *was* an error. The PML @@ -91,12 +91,13 @@ smpi_coll_tuned_alltoallv_ompi_basic_linear(void *sbuf, int *scounts, int *sdisp * i.e., by the end of this call, all the requests are free-able. * So free them anyway -- even if there was an error, and return the * error after we free everything. */ - smpi_mpi_waitall(nreqs, ireqs, + Request::waitall(nreqs, ireqs, MPI_STATUSES_IGNORE); /* Free the requests. */ for (i = 0; i < nreqs; ++i) { - if(ireqs[i]!=MPI_REQUEST_NULL)smpi_mpi_request_free(&ireqs[i]); + if(ireqs[i]!=MPI_REQUEST_NULL) + Request::unuse(&ireqs[i]); } free(ireqs); diff --git a/src/smpi/colls/alltoallv-pair-light-barrier.cpp b/src/smpi/colls/alltoallv-pair-light-barrier.cpp index ea05e33ebe..6637187ebe 100644 --- a/src/smpi/colls/alltoallv-pair-light-barrier.cpp +++ b/src/smpi/colls/alltoallv-pair-light-barrier.cpp @@ -52,20 +52,20 @@ smpi_coll_tuned_alltoallv_pair_light_barrier(void *send_buff, int *send_counts, send_chunk = smpi_datatype_get_extent(send_type); recv_chunk = smpi_datatype_get_extent(recv_type); - smpi_mpi_sendrecv(send_ptr + send_disps[rank] * send_chunk, send_counts[rank], send_type, rank, tag, + Request::sendrecv(send_ptr + send_disps[rank] * send_chunk, send_counts[rank], send_type, rank, tag, recv_ptr + recv_disps[rank] * recv_chunk, recv_counts[rank], recv_type, rank, tag, comm, &s); for (i = 1; i < num_procs; i++) { src = dst = rank ^ i; - smpi_mpi_sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, + Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, tag, recv_ptr + recv_disps[src] *recv_chunk, recv_counts[dst], recv_type, src, tag, comm, &s); if ((i + 1) < num_procs) { next_partner = rank ^ (i + 1); - smpi_mpi_sendrecv(&send_sync, 1, MPI_CHAR, next_partner, tag, + Request::sendrecv(&send_sync, 1, MPI_CHAR, next_partner, tag, &recv_sync, 1, MPI_CHAR, next_partner, tag, comm, &s); } } diff --git a/src/smpi/colls/alltoallv-pair-mpi-barrier.cpp b/src/smpi/colls/alltoallv-pair-mpi-barrier.cpp index 95231a187a..8d4dfb57e7 100644 --- a/src/smpi/colls/alltoallv-pair-mpi-barrier.cpp +++ b/src/smpi/colls/alltoallv-pair-mpi-barrier.cpp @@ -52,7 +52,7 @@ smpi_coll_tuned_alltoallv_pair_mpi_barrier(void *send_buff, int *send_counts, in for (i = 0; i < num_procs; i++) { src = dst = rank ^ i; smpi_mpi_barrier(comm); - smpi_mpi_sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, + Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type, src, tag, comm, &s); } diff --git a/src/smpi/colls/alltoallv-pair-one-barrier.cpp b/src/smpi/colls/alltoallv-pair-one-barrier.cpp index c53ddde2dd..0ab3b284d5 100644 --- a/src/smpi/colls/alltoallv-pair-one-barrier.cpp +++ b/src/smpi/colls/alltoallv-pair-one-barrier.cpp @@ -52,7 +52,7 @@ smpi_coll_tuned_alltoallv_pair_one_barrier(void *send_buff, int *send_counts, in smpi_mpi_barrier(comm); for (i = 0; i < num_procs; i++) { src = dst = rank ^ i; - smpi_mpi_sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, + Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type, src, tag, comm, &s); } diff --git a/src/smpi/colls/alltoallv-pair.cpp b/src/smpi/colls/alltoallv-pair.cpp index cb2551de37..d127c04fbf 100644 --- a/src/smpi/colls/alltoallv-pair.cpp +++ b/src/smpi/colls/alltoallv-pair.cpp @@ -51,7 +51,7 @@ int smpi_coll_tuned_alltoallv_pair(void *send_buff, int *send_counts, int *send_ for (i = 0; i < num_procs; i++) { src = dst = rank ^ i; - smpi_mpi_sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, tag, + Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type, src, tag, comm, &s); } diff --git a/src/smpi/colls/alltoallv-ring-light-barrier.cpp b/src/smpi/colls/alltoallv-ring-light-barrier.cpp index 0e3086f59e..48cc255131 100644 --- a/src/smpi/colls/alltoallv-ring-light-barrier.cpp +++ b/src/smpi/colls/alltoallv-ring-light-barrier.cpp @@ -48,7 +48,7 @@ smpi_coll_tuned_alltoallv_ring_light_barrier(void *send_buff, int *send_counts, send_chunk = smpi_datatype_get_extent(send_type); recv_chunk = smpi_datatype_get_extent(recv_type); - smpi_mpi_sendrecv(send_ptr + send_disps[rank] * send_chunk, send_counts[rank], send_type, rank, tag, + Request::sendrecv(send_ptr + send_disps[rank] * send_chunk, send_counts[rank], send_type, rank, tag, recv_ptr + recv_disps[rank] * recv_chunk, recv_counts[rank], recv_type, rank, tag, comm, &s); @@ -56,14 +56,14 @@ smpi_coll_tuned_alltoallv_ring_light_barrier(void *send_buff, int *send_counts, src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; - smpi_mpi_sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, + Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type, src, tag, comm, &s); if ((i + 1) < num_procs) { next_src = (rank - (i + 1) + num_procs) % num_procs; next_dst = (rank + (i + 1) + num_procs) % num_procs; - smpi_mpi_sendrecv(&send_sync, 1, MPI_CHAR, next_src, tag, + Request::sendrecv(&send_sync, 1, MPI_CHAR, next_src, tag, &recv_sync, 1, MPI_CHAR, next_dst, tag, comm, &s); } diff --git a/src/smpi/colls/alltoallv-ring-mpi-barrier.cpp b/src/smpi/colls/alltoallv-ring-mpi-barrier.cpp index ed7030f0e8..69204c3aee 100644 --- a/src/smpi/colls/alltoallv-ring-mpi-barrier.cpp +++ b/src/smpi/colls/alltoallv-ring-mpi-barrier.cpp @@ -50,7 +50,7 @@ smpi_coll_tuned_alltoallv_ring_mpi_barrier(void *send_buff, int *send_counts, in dst = (rank + i) % num_procs; smpi_mpi_barrier(comm); - smpi_mpi_sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, + Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type, src, tag, comm, &s); } diff --git a/src/smpi/colls/alltoallv-ring-one-barrier.cpp b/src/smpi/colls/alltoallv-ring-one-barrier.cpp index 443bebe74d..46e4dfce11 100644 --- a/src/smpi/colls/alltoallv-ring-one-barrier.cpp +++ b/src/smpi/colls/alltoallv-ring-one-barrier.cpp @@ -49,7 +49,7 @@ smpi_coll_tuned_alltoallv_ring_one_barrier(void *send_buff, int *send_counts, in src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; - smpi_mpi_sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, + Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type, src, tag, comm, &s); } diff --git a/src/smpi/colls/alltoallv-ring.cpp b/src/smpi/colls/alltoallv-ring.cpp index cc46d4ccd9..c8937a2747 100644 --- a/src/smpi/colls/alltoallv-ring.cpp +++ b/src/smpi/colls/alltoallv-ring.cpp @@ -55,7 +55,7 @@ smpi_coll_tuned_alltoallv_ring(void *send_buff, int *send_counts, int *send_disp dst = (rank + i) % num_procs; } - smpi_mpi_sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, + Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type, src, tag, comm, &s); diff --git a/src/smpi/colls/barrier-mvapich2-pair.cpp b/src/smpi/colls/barrier-mvapich2-pair.cpp index 9683162cdb..67b3f7b368 100644 --- a/src/smpi/colls/barrier-mvapich2-pair.cpp +++ b/src/smpi/colls/barrier-mvapich2-pair.cpp @@ -67,14 +67,14 @@ int smpi_coll_tuned_barrier_mvapich2_pair(MPI_Comm comm) if (rank < surfeit) { /* get the fanin letter from the upper "half" process: */ dst = N2_prev + rank; - smpi_mpi_recv(NULL, 0, MPI_BYTE, dst, COLL_TAG_BARRIER, + Request::recv(NULL, 0, MPI_BYTE, dst, COLL_TAG_BARRIER, comm, MPI_STATUS_IGNORE); } /* combine on embedded N2_prev power-of-two processes */ for (d = 1; d < N2_prev; d <<= 1) { dst = (rank ^ d); - smpi_mpi_sendrecv(NULL, 0, MPI_BYTE, dst, COLL_TAG_BARRIER, NULL, + Request::sendrecv(NULL, 0, MPI_BYTE, dst, COLL_TAG_BARRIER, NULL, 0, MPI_BYTE, dst, COLL_TAG_BARRIER, comm, MPI_STATUS_IGNORE); } @@ -82,13 +82,13 @@ int smpi_coll_tuned_barrier_mvapich2_pair(MPI_Comm comm) /* fanout data to nodes above N2_prev... */ if (rank < surfeit) { dst = N2_prev + rank; - smpi_mpi_send(NULL, 0, MPI_BYTE, dst, COLL_TAG_BARRIER, + Request::send(NULL, 0, MPI_BYTE, dst, COLL_TAG_BARRIER, comm); } } else { /* fanin data to power of 2 subset */ src = rank - N2_prev; - smpi_mpi_sendrecv(NULL, 0, MPI_BYTE, src, COLL_TAG_BARRIER, + Request::sendrecv(NULL, 0, MPI_BYTE, src, COLL_TAG_BARRIER, NULL, 0, MPI_BYTE, src, COLL_TAG_BARRIER, comm, MPI_STATUS_IGNORE); } diff --git a/src/smpi/colls/barrier-ompi.cpp b/src/smpi/colls/barrier-ompi.cpp index 3d9f943acc..69a9a94d03 100644 --- a/src/smpi/colls/barrier-ompi.cpp +++ b/src/smpi/colls/barrier-ompi.cpp @@ -59,38 +59,38 @@ int smpi_coll_tuned_barrier_ompi_doublering(MPI_Comm comm right = ((rank+1)%size); if (rank > 0) { /* receive message from the left */ - smpi_mpi_recv((void*)NULL, 0, MPI_BYTE, left, + Request::recv((void*)NULL, 0, MPI_BYTE, left, COLL_TAG_BARRIER, comm, MPI_STATUS_IGNORE); } /* Send message to the right */ - smpi_mpi_send((void*)NULL, 0, MPI_BYTE, right, + Request::send((void*)NULL, 0, MPI_BYTE, right, COLL_TAG_BARRIER, comm); /* root needs to receive from the last node */ if (rank == 0) { - smpi_mpi_recv((void*)NULL, 0, MPI_BYTE, left, + Request::recv((void*)NULL, 0, MPI_BYTE, left, COLL_TAG_BARRIER, comm, MPI_STATUS_IGNORE); } /* Allow nodes to exit */ if (rank > 0) { /* post Receive from left */ - smpi_mpi_recv((void*)NULL, 0, MPI_BYTE, left, + Request::recv((void*)NULL, 0, MPI_BYTE, left, COLL_TAG_BARRIER, comm, MPI_STATUS_IGNORE); } /* send message to the right one */ - smpi_mpi_send((void*)NULL, 0, MPI_BYTE, right, + Request::send((void*)NULL, 0, MPI_BYTE, right, COLL_TAG_BARRIER, comm); /* rank 0 post receive from the last node */ if (rank == 0) { - smpi_mpi_recv((void*)NULL, 0, MPI_BYTE, left, + Request::recv((void*)NULL, 0, MPI_BYTE, left, COLL_TAG_BARRIER, comm, MPI_STATUS_IGNORE); } @@ -125,7 +125,7 @@ int smpi_coll_tuned_barrier_ompi_recursivedoubling(MPI_Comm comm if (rank >= adjsize) { /* send message to lower ranked node */ remote = rank - adjsize; - smpi_mpi_sendrecv(NULL, 0, MPI_BYTE, remote, + Request::sendrecv(NULL, 0, MPI_BYTE, remote, COLL_TAG_BARRIER, NULL, 0, MPI_BYTE, remote, COLL_TAG_BARRIER, @@ -134,7 +134,7 @@ int smpi_coll_tuned_barrier_ompi_recursivedoubling(MPI_Comm comm } else if (rank < (size - adjsize)) { /* receive message from high level rank */ - smpi_mpi_recv((void*)NULL, 0, MPI_BYTE, rank+adjsize, + Request::recv((void*)NULL, 0, MPI_BYTE, rank+adjsize, COLL_TAG_BARRIER, comm, MPI_STATUS_IGNORE); @@ -150,7 +150,7 @@ int smpi_coll_tuned_barrier_ompi_recursivedoubling(MPI_Comm comm if (remote >= adjsize) continue; /* post receive from the remote node */ - smpi_mpi_sendrecv(NULL, 0, MPI_BYTE, remote, + Request::sendrecv(NULL, 0, MPI_BYTE, remote, COLL_TAG_BARRIER, NULL, 0, MPI_BYTE, remote, COLL_TAG_BARRIER, @@ -163,7 +163,7 @@ int smpi_coll_tuned_barrier_ompi_recursivedoubling(MPI_Comm comm if (rank < (size - adjsize)) { /* send enter message to higher ranked node */ remote = rank + adjsize; - smpi_mpi_send((void*)NULL, 0, MPI_BYTE, remote, + Request::send((void*)NULL, 0, MPI_BYTE, remote, COLL_TAG_BARRIER, comm); @@ -196,7 +196,7 @@ int smpi_coll_tuned_barrier_ompi_bruck(MPI_Comm comm to = (rank + distance) % size; /* send message to lower ranked node */ - smpi_mpi_sendrecv(NULL, 0, MPI_BYTE, to, + Request::sendrecv(NULL, 0, MPI_BYTE, to, COLL_TAG_BARRIER, NULL, 0, MPI_BYTE, from, COLL_TAG_BARRIER, @@ -222,7 +222,7 @@ int smpi_coll_tuned_barrier_ompi_two_procs(MPI_Comm comm "ompi_coll_tuned_barrier_ompi_two_procs rank %d", remote); remote = (remote + 1) & 0x1; - smpi_mpi_sendrecv(NULL, 0, MPI_BYTE, remote, + Request::sendrecv(NULL, 0, MPI_BYTE, remote, COLL_TAG_BARRIER, NULL, 0, MPI_BYTE, remote, COLL_TAG_BARRIER, @@ -254,11 +254,11 @@ int smpi_coll_tuned_barrier_ompi_basic_linear(MPI_Comm comm) /* All non-root send & receive zero-length message. */ if (rank > 0) { - smpi_mpi_send (NULL, 0, MPI_BYTE, 0, + Request::send (NULL, 0, MPI_BYTE, 0, COLL_TAG_BARRIER, comm); - smpi_mpi_recv (NULL, 0, MPI_BYTE, 0, + Request::recv (NULL, 0, MPI_BYTE, 0, COLL_TAG_BARRIER, comm, MPI_STATUS_IGNORE); } @@ -270,19 +270,19 @@ int smpi_coll_tuned_barrier_ompi_basic_linear(MPI_Comm comm) requests = (MPI_Request*)malloc( size * sizeof(MPI_Request) ); for (i = 1; i < size; ++i) { - requests[i] = smpi_mpi_irecv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE, + requests[i] = Request::irecv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE, COLL_TAG_BARRIER, comm ); } - smpi_mpi_waitall( size-1, requests+1, MPI_STATUSES_IGNORE ); + Request::waitall( size-1, requests+1, MPI_STATUSES_IGNORE ); for (i = 1; i < size; ++i) { - requests[i] = smpi_mpi_isend(NULL, 0, MPI_BYTE, i, + requests[i] = Request::isend(NULL, 0, MPI_BYTE, i, COLL_TAG_BARRIER, comm ); } - smpi_mpi_waitall( size-1, requests+1, MPI_STATUSES_IGNORE ); + Request::waitall( size-1, requests+1, MPI_STATUSES_IGNORE ); free( requests ); } @@ -315,11 +315,11 @@ int smpi_coll_tuned_barrier_ompi_tree(MPI_Comm comm) partner = rank ^ jump; if (!(partner & (jump-1)) && partner < size) { if (partner > rank) { - smpi_mpi_recv (NULL, 0, MPI_BYTE, partner, + Request::recv (NULL, 0, MPI_BYTE, partner, COLL_TAG_BARRIER, comm, MPI_STATUS_IGNORE); } else if (partner < rank) { - smpi_mpi_send (NULL, 0, MPI_BYTE, partner, + Request::send (NULL, 0, MPI_BYTE, partner, COLL_TAG_BARRIER, comm); } @@ -331,11 +331,11 @@ int smpi_coll_tuned_barrier_ompi_tree(MPI_Comm comm) partner = rank ^ jump; if (!(partner & (jump-1)) && partner < size) { if (partner > rank) { - smpi_mpi_send (NULL, 0, MPI_BYTE, partner, + Request::send (NULL, 0, MPI_BYTE, partner, COLL_TAG_BARRIER, comm); } else if (partner < rank) { - smpi_mpi_recv (NULL, 0, MPI_BYTE, partner, + Request::recv (NULL, 0, MPI_BYTE, partner, COLL_TAG_BARRIER, comm, MPI_STATUS_IGNORE); } diff --git a/src/smpi/colls/bcast-NTSB.cpp b/src/smpi/colls/bcast-NTSB.cpp index 5869f15082..c5ec8cddf9 100644 --- a/src/smpi/colls/bcast-NTSB.cpp +++ b/src/smpi/colls/bcast-NTSB.cpp @@ -52,9 +52,9 @@ int smpi_coll_tuned_bcast_NTSB(void *buf, int count, MPI_Datatype datatype, /* if root is not zero send to rank zero first */ if (root != 0) { if (rank == root) { - smpi_mpi_send(buf, count, datatype, 0, tag, comm); + Request::send(buf, count, datatype, 0, tag, comm); } else if (rank == 0) { - smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); + Request::recv(buf, count, datatype, root, tag, comm, &status); } } @@ -65,31 +65,31 @@ int smpi_coll_tuned_bcast_NTSB(void *buf, int count, MPI_Datatype datatype, if (rank == 0) { /* case root has only a left child */ if (to_right == -1) { - smpi_mpi_send(buf, count, datatype, to_left, tag, comm); + Request::send(buf, count, datatype, to_left, tag, comm); } /* case root has both left and right children */ else { - smpi_mpi_send(buf, count, datatype, to_left, tag, comm); - smpi_mpi_send(buf, count, datatype, to_right, tag, comm); + Request::send(buf, count, datatype, to_left, tag, comm); + Request::send(buf, count, datatype, to_right, tag, comm); } } /* case: leaf ==> receive only */ else if (to_left == -1) { - smpi_mpi_recv(buf, count, datatype, from, tag, comm, &status); + Request::recv(buf, count, datatype, from, tag, comm, &status); } /* case: intermidiate node with only left child ==> relay message */ else if (to_right == -1) { - smpi_mpi_recv(buf, count, datatype, from, tag, comm, &status); - smpi_mpi_send(buf, count, datatype, to_left, tag, comm); + Request::recv(buf, count, datatype, from, tag, comm, &status); + Request::send(buf, count, datatype, to_left, tag, comm); } /* case: intermidiate node with both left and right children ==> relay message */ else { - smpi_mpi_recv(buf, count, datatype, from, tag, comm, &status); - smpi_mpi_send(buf, count, datatype, to_left, tag, comm); - smpi_mpi_send(buf, count, datatype, to_right, tag, comm); + Request::recv(buf, count, datatype, from, tag, comm, &status); + Request::send(buf, count, datatype, to_left, tag, comm); + Request::send(buf, count, datatype, to_right, tag, comm); } return MPI_SUCCESS; } @@ -112,60 +112,60 @@ int smpi_coll_tuned_bcast_NTSB(void *buf, int count, MPI_Datatype datatype, /* case root has only a left child */ if (to_right == -1) { for (i = 0; i < pipe_length; i++) { - send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_left, + send_request_array[i] = Request::isend((char *) buf + (i * increment), segment, datatype, to_left, tag + i, comm); } - smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); + Request::waitall((pipe_length), send_request_array, send_status_array); } /* case root has both left and right children */ else { for (i = 0; i < pipe_length; i++) { - send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_left, + send_request_array[i] = Request::isend((char *) buf + (i * increment), segment, datatype, to_left, tag + i, comm); - send_request_array[i + pipe_length] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_right, + send_request_array[i + pipe_length] = Request::isend((char *) buf + (i * increment), segment, datatype, to_right, tag + i, comm); } - smpi_mpi_waitall((2 * pipe_length), send_request_array, send_status_array); + Request::waitall((2 * pipe_length), send_request_array, send_status_array); } } /* case: leaf ==> receive only */ else if (to_left == -1) { for (i = 0; i < pipe_length; i++) { - recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from, + recv_request_array[i] = Request::irecv((char *) buf + (i * increment), segment, datatype, from, tag + i, comm); } - smpi_mpi_waitall((pipe_length), recv_request_array, recv_status_array); + Request::waitall((pipe_length), recv_request_array, recv_status_array); } /* case: intermidiate node with only left child ==> relay message */ else if (to_right == -1) { for (i = 0; i < pipe_length; i++) { - recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from, + recv_request_array[i] = Request::irecv((char *) buf + (i * increment), segment, datatype, from, tag + i, comm); } for (i = 0; i < pipe_length; i++) { - smpi_mpi_wait(&recv_request_array[i], &status); - send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_left, + Request::wait(&recv_request_array[i], &status); + send_request_array[i] = Request::isend((char *) buf + (i * increment), segment, datatype, to_left, tag + i, comm); } - smpi_mpi_waitall(pipe_length, send_request_array, send_status_array); + Request::waitall(pipe_length, send_request_array, send_status_array); } /* case: intermidiate node with both left and right children ==> relay message */ else { for (i = 0; i < pipe_length; i++) { - recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from, + recv_request_array[i] = Request::irecv((char *) buf + (i * increment), segment, datatype, from, tag + i, comm); } for (i = 0; i < pipe_length; i++) { - smpi_mpi_wait(&recv_request_array[i], &status); - send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_left, + Request::wait(&recv_request_array[i], &status); + send_request_array[i] = Request::isend((char *) buf + (i * increment), segment, datatype, to_left, tag + i, comm); - send_request_array[i + pipe_length] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_right, + send_request_array[i + pipe_length] = Request::isend((char *) buf + (i * increment), segment, datatype, to_right, tag + i, comm); } - smpi_mpi_waitall((2 * pipe_length), send_request_array, send_status_array); + Request::waitall((2 * pipe_length), send_request_array, send_status_array); } free(send_request_array); diff --git a/src/smpi/colls/bcast-NTSL-Isend.cpp b/src/smpi/colls/bcast-NTSL-Isend.cpp index 5304f31c98..31d0e0a3fd 100644 --- a/src/smpi/colls/bcast-NTSL-Isend.cpp +++ b/src/smpi/colls/bcast-NTSL-Isend.cpp @@ -51,23 +51,23 @@ int smpi_coll_tuned_bcast_NTSL_Isend(void *buf, int count, MPI_Datatype datatype */ if (root != 0) { if (rank == root) { - smpi_mpi_send(buf, count, datatype, 0, tag, comm); + Request::send(buf, count, datatype, 0, tag, comm); } else if (rank == 0) { - smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); + Request::recv(buf, count, datatype, root, tag, comm, &status); } } /* when a message is smaller than a block size => no pipeline */ if (count <= segment) { if (rank == 0) { - smpi_mpi_send(buf, count, datatype, to, tag, comm); + Request::send(buf, count, datatype, to, tag, comm); } else if (rank == (size - 1)) { - request = smpi_mpi_irecv(buf, count, datatype, from, tag, comm); - smpi_mpi_wait(&request, &status); + request = Request::irecv(buf, count, datatype, from, tag, comm); + Request::wait(&request, &status); } else { - request = smpi_mpi_irecv(buf, count, datatype, from, tag, comm); - smpi_mpi_wait(&request, &status); - smpi_mpi_send(buf, count, datatype, to, tag, comm); + request = Request::irecv(buf, count, datatype, from, tag, comm); + Request::wait(&request, &status); + Request::send(buf, count, datatype, to, tag, comm); } return MPI_SUCCESS; } @@ -86,33 +86,33 @@ int smpi_coll_tuned_bcast_NTSL_Isend(void *buf, int count, MPI_Datatype datatype /* root send data */ if (rank == 0) { for (i = 0; i < pipe_length; i++) { - send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to, + send_request_array[i] = Request::isend((char *) buf + (i * increment), segment, datatype, to, (tag + i), comm); } - smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); + Request::waitall((pipe_length), send_request_array, send_status_array); } /* last node only receive data */ else if (rank == (size - 1)) { for (i = 0; i < pipe_length; i++) { - recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from, + recv_request_array[i] = Request::irecv((char *) buf + (i * increment), segment, datatype, from, (tag + i), comm); } - smpi_mpi_waitall((pipe_length), recv_request_array, recv_status_array); + Request::waitall((pipe_length), recv_request_array, recv_status_array); } /* intermediate nodes relay (receive, then send) data */ else { for (i = 0; i < pipe_length; i++) { - recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from, + recv_request_array[i] = Request::irecv((char *) buf + (i * increment), segment, datatype, from, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - smpi_mpi_wait(&recv_request_array[i], &status); - send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to, + Request::wait(&recv_request_array[i], &status); + send_request_array[i] = Request::isend((char *) buf + (i * increment), segment, datatype, to, (tag + i), comm); } - smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); + Request::waitall((pipe_length), send_request_array, send_status_array); } free(send_request_array); diff --git a/src/smpi/colls/bcast-NTSL.cpp b/src/smpi/colls/bcast-NTSL.cpp index 146d840326..773f097ee0 100644 --- a/src/smpi/colls/bcast-NTSL.cpp +++ b/src/smpi/colls/bcast-NTSL.cpp @@ -51,23 +51,23 @@ int smpi_coll_tuned_bcast_NTSL(void *buf, int count, MPI_Datatype datatype, */ if (root != 0) { if (rank == root) { - smpi_mpi_send(buf, count, datatype, 0, tag, comm); + Request::send(buf, count, datatype, 0, tag, comm); } else if (rank == 0) { - smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); + Request::recv(buf, count, datatype, root, tag, comm, &status); } } /* when a message is smaller than a block size => no pipeline */ if (count <= segment) { if (rank == 0) { - smpi_mpi_send(buf, count, datatype, to, tag, comm); + Request::send(buf, count, datatype, to, tag, comm); } else if (rank == (size - 1)) { - request = smpi_mpi_irecv(buf, count, datatype, from, tag, comm); - smpi_mpi_wait(&request, &status); + request = Request::irecv(buf, count, datatype, from, tag, comm); + Request::wait(&request, &status); } else { - request = smpi_mpi_irecv(buf, count, datatype, from, tag, comm); - smpi_mpi_wait(&request, &status); - smpi_mpi_send(buf, count, datatype, to, tag, comm); + request = Request::irecv(buf, count, datatype, from, tag, comm); + Request::wait(&request, &status); + Request::send(buf, count, datatype, to, tag, comm); } return MPI_SUCCESS; } @@ -86,33 +86,33 @@ int smpi_coll_tuned_bcast_NTSL(void *buf, int count, MPI_Datatype datatype, /* root send data */ if (rank == 0) { for (i = 0; i < pipe_length; i++) { - send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to, + send_request_array[i] = Request::isend((char *) buf + (i * increment), segment, datatype, to, (tag + i), comm); } - smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); + Request::waitall((pipe_length), send_request_array, send_status_array); } /* last node only receive data */ else if (rank == (size - 1)) { for (i = 0; i < pipe_length; i++) { - recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from, + recv_request_array[i] = Request::irecv((char *) buf + (i * increment), segment, datatype, from, (tag + i), comm); } - smpi_mpi_waitall((pipe_length), recv_request_array, recv_status_array); + Request::waitall((pipe_length), recv_request_array, recv_status_array); } /* intermediate nodes relay (receive, then send) data */ else { for (i = 0; i < pipe_length; i++) { - recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from, + recv_request_array[i] = Request::irecv((char *) buf + (i * increment), segment, datatype, from, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - smpi_mpi_wait(&recv_request_array[i], &status); - send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to, + Request::wait(&recv_request_array[i], &status); + send_request_array[i] = Request::isend((char *) buf + (i * increment), segment, datatype, to, (tag + i), comm); } - smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); + Request::waitall((pipe_length), send_request_array, send_status_array); } free(send_request_array); diff --git a/src/smpi/colls/bcast-SMP-binary.cpp b/src/smpi/colls/bcast-SMP-binary.cpp index b551d84d18..05cacd5aaa 100644 --- a/src/smpi/colls/bcast-SMP-binary.cpp +++ b/src/smpi/colls/bcast-SMP-binary.cpp @@ -57,9 +57,9 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, // if root is not zero send to rank zero first if (root != 0) { if (rank == root) - smpi_mpi_send(buf, count, datatype, 0, tag, comm); + Request::send(buf, count, datatype, 0, tag, comm); else if (rank == 0) - smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); + Request::recv(buf, count, datatype, root, tag, comm, &status); } // when a message is smaller than a block size => no pipeline if (count <= segment) { @@ -69,52 +69,52 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, if (rank == 0) { //printf("node %d left %d right %d\n",rank,to_inter_left,to_inter_right); if (to_inter_left < size) - smpi_mpi_send(buf, count, datatype, to_inter_left, tag, comm); + Request::send(buf, count, datatype, to_inter_left, tag, comm); if (to_inter_right < size) - smpi_mpi_send(buf, count, datatype, to_inter_right, tag, comm); + Request::send(buf, count, datatype, to_inter_right, tag, comm); if ((to_intra_left - base) < num_core) - smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm); + Request::send(buf, count, datatype, to_intra_left, tag, comm); if ((to_intra_right - base) < num_core) - smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm); + Request::send(buf, count, datatype, to_intra_right, tag, comm); } // case LEAVES ROOT-of-eash-SMP else if (to_inter_left >= size) { //printf("node %d from %d\n",rank,from_inter); - request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm); - smpi_mpi_wait(&request, &status); + request = Request::irecv(buf, count, datatype, from_inter, tag, comm); + Request::wait(&request, &status); if ((to_intra_left - base) < num_core) - smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm); + Request::send(buf, count, datatype, to_intra_left, tag, comm); if ((to_intra_right - base) < num_core) - smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm); + Request::send(buf, count, datatype, to_intra_right, tag, comm); } // case INTERMEDIAT ROOT-of-each-SMP else { //printf("node %d left %d right %d from %d\n",rank,to_inter_left,to_inter_right,from_inter); - request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm); - smpi_mpi_wait(&request, &status); - smpi_mpi_send(buf, count, datatype, to_inter_left, tag, comm); + request = Request::irecv(buf, count, datatype, from_inter, tag, comm); + Request::wait(&request, &status); + Request::send(buf, count, datatype, to_inter_left, tag, comm); if (to_inter_right < size) - smpi_mpi_send(buf, count, datatype, to_inter_right, tag, comm); + Request::send(buf, count, datatype, to_inter_right, tag, comm); if ((to_intra_left - base) < num_core) - smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm); + Request::send(buf, count, datatype, to_intra_left, tag, comm); if ((to_intra_right - base) < num_core) - smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm); + Request::send(buf, count, datatype, to_intra_right, tag, comm); } } // case non ROOT-of-each-SMP else { // case leaves if ((to_intra_left - base) >= num_core) { - request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm); - smpi_mpi_wait(&request, &status); + request = Request::irecv(buf, count, datatype, from_intra, tag, comm); + Request::wait(&request, &status); } // case intermediate else { - request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm); - smpi_mpi_wait(&request, &status); - smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm); + request = Request::irecv(buf, count, datatype, from_intra, tag, comm); + Request::wait(&request, &status); + Request::send(buf, count, datatype, to_intra_left, tag, comm); if ((to_intra_right - base) < num_core) - smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm); + Request::send(buf, count, datatype, to_intra_right, tag, comm); } } @@ -135,16 +135,16 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, for (i = 0; i < pipe_length; i++) { //printf("node %d left %d right %d\n",rank,to_inter_left,to_inter_right); if (to_inter_left < size) - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, + Request::send((char *) buf + (i * increment), segment, datatype, to_inter_left, (tag + i), comm); if (to_inter_right < size) - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, + Request::send((char *) buf + (i * increment), segment, datatype, to_inter_right, (tag + i), comm); if ((to_intra_left - base) < num_core) - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, + Request::send((char *) buf + (i * increment), segment, datatype, to_intra_left, (tag + i), comm); if ((to_intra_right - base) < num_core) - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, + Request::send((char *) buf + (i * increment), segment, datatype, to_intra_right, (tag + i), comm); } } @@ -152,16 +152,16 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, else if (to_inter_left >= size) { //printf("node %d from %d\n",rank,from_inter); for (i = 0; i < pipe_length; i++) { - request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + request_array[i] = Request::irecv((char *) buf + (i * increment), segment, datatype, from_inter, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - smpi_mpi_wait(&request_array[i], &status); + Request::wait(&request_array[i], &status); if ((to_intra_left - base) < num_core) - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, + Request::send((char *) buf + (i * increment), segment, datatype, to_intra_left, (tag + i), comm); if ((to_intra_right - base) < num_core) - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, + Request::send((char *) buf + (i * increment), segment, datatype, to_intra_right, (tag + i), comm); } } @@ -169,21 +169,21 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, else { //printf("node %d left %d right %d from %d\n",rank,to_inter_left,to_inter_right,from_inter); for (i = 0; i < pipe_length; i++) { - request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + request_array[i] = Request::irecv((char *) buf + (i * increment), segment, datatype, from_inter, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - smpi_mpi_wait(&request_array[i], &status); - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, + Request::wait(&request_array[i], &status); + Request::send((char *) buf + (i * increment), segment, datatype, to_inter_left, (tag + i), comm); if (to_inter_right < size) - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, + Request::send((char *) buf + (i * increment), segment, datatype, to_inter_right, (tag + i), comm); if ((to_intra_left - base) < num_core) - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, + Request::send((char *) buf + (i * increment), segment, datatype, to_intra_left, (tag + i), comm); if ((to_intra_right - base) < num_core) - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, + Request::send((char *) buf + (i * increment), segment, datatype, to_intra_right, (tag + i), comm); } } @@ -193,23 +193,23 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, // case leaves if ((to_intra_left - base) >= num_core) { for (i = 0; i < pipe_length; i++) { - request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + request_array[i] = Request::irecv((char *) buf + (i * increment), segment, datatype, from_intra, (tag + i), comm); } - smpi_mpi_waitall((pipe_length), request_array, status_array); + Request::waitall((pipe_length), request_array, status_array); } // case intermediate else { for (i = 0; i < pipe_length; i++) { - request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + request_array[i] = Request::irecv((char *) buf + (i * increment), segment, datatype, from_intra, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - smpi_mpi_wait(&request_array[i], &status); - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, + Request::wait(&request_array[i], &status); + Request::send((char *) buf + (i * increment), segment, datatype, to_intra_left, (tag + i), comm); if ((to_intra_right - base) < num_core) - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, + Request::send((char *) buf + (i * increment), segment, datatype, to_intra_right, (tag + i), comm); } } diff --git a/src/smpi/colls/bcast-SMP-binomial.cpp b/src/smpi/colls/bcast-SMP-binomial.cpp index 911721926d..57c0b4cfa1 100644 --- a/src/smpi/colls/bcast-SMP-binomial.cpp +++ b/src/smpi/colls/bcast-SMP-binomial.cpp @@ -43,9 +43,9 @@ int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count, // if root is not zero send to rank zero first if (root != 0) { if (rank == root) - smpi_mpi_send(buf, count, datatype, 0, tag, comm); + Request::send(buf, count, datatype, 0, tag, comm); else if (rank == 0) - smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); + Request::recv(buf, count, datatype, root, tag, comm, &status); } //FIRST STEP node 0 send to every root-of-each-SMP with binomial tree @@ -57,7 +57,7 @@ int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count, if (inter_rank & mask) { from_inter = (inter_rank - mask) * num_core; //printf("Node %d recv from node %d when mask is %d\n", rank, from_inter, mask); - smpi_mpi_recv(buf, count, datatype, from_inter, tag, comm, &status); + Request::recv(buf, count, datatype, from_inter, tag, comm, &status); break; } mask <<= 1; @@ -71,7 +71,7 @@ int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count, to_inter = (inter_rank + mask) * num_core; if (to_inter < size) { //printf("Node %d send to node %d when mask is %d\n", rank, to_inter, mask); - smpi_mpi_send(buf, count, datatype, to_inter, tag, comm); + Request::send(buf, count, datatype, to_inter, tag, comm); } } mask >>= 1; @@ -85,7 +85,7 @@ int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count, if (intra_rank & mask) { from_intra = base + (intra_rank - mask); //printf("Node %d recv from node %d when mask is %d\n", rank, from_inter, mask); - smpi_mpi_recv(buf, count, datatype, from_intra, tag, comm, &status); + Request::recv(buf, count, datatype, from_intra, tag, comm, &status); break; } mask <<= 1; @@ -100,7 +100,7 @@ int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count, to_intra = base + (intra_rank + mask); if (to_intra < size) { //printf("Node %d send to node %d when mask is %d\n", rank, to_inter, mask); - smpi_mpi_send(buf, count, datatype, to_intra, tag, comm); + Request::send(buf, count, datatype, to_intra, tag, comm); } } mask >>= 1; diff --git a/src/smpi/colls/bcast-SMP-linear.cpp b/src/smpi/colls/bcast-SMP-linear.cpp index 049682de39..70390f62d0 100644 --- a/src/smpi/colls/bcast-SMP-linear.cpp +++ b/src/smpi/colls/bcast-SMP-linear.cpp @@ -59,40 +59,40 @@ int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count, // if root is not zero send to rank zero first if (root != 0) { if (rank == root) - smpi_mpi_send(buf, count, datatype, 0, tag, comm); + Request::send(buf, count, datatype, 0, tag, comm); else if (rank == 0) - smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); + Request::recv(buf, count, datatype, root, tag, comm, &status); } // when a message is smaller than a block size => no pipeline if (count <= segment) { // case ROOT if (rank == 0) { - smpi_mpi_send(buf, count, datatype, to_inter, tag, comm); - smpi_mpi_send(buf, count, datatype, to_intra, tag, comm); + Request::send(buf, count, datatype, to_inter, tag, comm); + Request::send(buf, count, datatype, to_intra, tag, comm); } // case last ROOT of each SMP else if (rank == (((size - 1) / num_core) * num_core)) { - request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm); - smpi_mpi_wait(&request, &status); - smpi_mpi_send(buf, count, datatype, to_intra, tag, comm); + request = Request::irecv(buf, count, datatype, from_inter, tag, comm); + Request::wait(&request, &status); + Request::send(buf, count, datatype, to_intra, tag, comm); } // case intermediate ROOT of each SMP else if (rank % num_core == 0) { - request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm); - smpi_mpi_wait(&request, &status); - smpi_mpi_send(buf, count, datatype, to_inter, tag, comm); - smpi_mpi_send(buf, count, datatype, to_intra, tag, comm); + request = Request::irecv(buf, count, datatype, from_inter, tag, comm); + Request::wait(&request, &status); + Request::send(buf, count, datatype, to_inter, tag, comm); + Request::send(buf, count, datatype, to_intra, tag, comm); } // case last non-ROOT of each SMP else if (((rank + 1) % num_core == 0) || (rank == (size - 1))) { - request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm); - smpi_mpi_wait(&request, &status); + request = Request::irecv(buf, count, datatype, from_intra, tag, comm); + Request::wait(&request, &status); } // case intermediate non-ROOT of each SMP else { - request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm); - smpi_mpi_wait(&request, &status); - smpi_mpi_send(buf, count, datatype, to_intra, tag, comm); + request = Request::irecv(buf, count, datatype, from_intra, tag, comm); + Request::wait(&request, &status); + Request::send(buf, count, datatype, to_intra, tag, comm); } return MPI_SUCCESS; } @@ -108,57 +108,57 @@ int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count, // case real root if (rank == 0) { for (i = 0; i < pipe_length; i++) { - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_inter, + Request::send((char *) buf + (i * increment), segment, datatype, to_inter, (tag + i), comm); - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra, + Request::send((char *) buf + (i * increment), segment, datatype, to_intra, (tag + i), comm); } } // case last ROOT of each SMP else if (rank == (((size - 1) / num_core) * num_core)) { for (i = 0; i < pipe_length; i++) { - request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + request_array[i] = Request::irecv((char *) buf + (i * increment), segment, datatype, from_inter, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - smpi_mpi_wait(&request_array[i], &status); - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra, + Request::wait(&request_array[i], &status); + Request::send((char *) buf + (i * increment), segment, datatype, to_intra, (tag + i), comm); } } // case intermediate ROOT of each SMP else { for (i = 0; i < pipe_length; i++) { - request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + request_array[i] = Request::irecv((char *) buf + (i * increment), segment, datatype, from_inter, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - smpi_mpi_wait(&request_array[i], &status); - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_inter, + Request::wait(&request_array[i], &status); + Request::send((char *) buf + (i * increment), segment, datatype, to_inter, (tag + i), comm); - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra, + Request::send((char *) buf + (i * increment), segment, datatype, to_intra, (tag + i), comm); } } } else { // case last non-ROOT of each SMP if (((rank + 1) % num_core == 0) || (rank == (size - 1))) { for (i = 0; i < pipe_length; i++) { - request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + request_array[i] = Request::irecv((char *) buf + (i * increment), segment, datatype, from_intra, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - smpi_mpi_wait(&request_array[i], &status); + Request::wait(&request_array[i], &status); } } // case intermediate non-ROOT of each SMP else { for (i = 0; i < pipe_length; i++) { - request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, + request_array[i] = Request::irecv((char *) buf + (i * increment), segment, datatype, from_intra, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - smpi_mpi_wait(&request_array[i], &status); - smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra, + Request::wait(&request_array[i], &status); + Request::send((char *) buf + (i * increment), segment, datatype, to_intra, (tag + i), comm); } } diff --git a/src/smpi/colls/bcast-arrival-pattern-aware-wait.cpp b/src/smpi/colls/bcast-arrival-pattern-aware-wait.cpp index 2b3a0a6262..2d3449ec2f 100644 --- a/src/smpi/colls/bcast-arrival-pattern-aware-wait.cpp +++ b/src/smpi/colls/bcast-arrival-pattern-aware-wait.cpp @@ -77,9 +77,9 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, */ if (root != 0) { if (rank == root) { - smpi_mpi_send(buf, count, datatype, 0, tag, comm); + Request::send(buf, count, datatype, 0, tag, comm); } else if (rank == 0) { - smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); + Request::recv(buf, count, datatype, root, tag, comm, &status); } } @@ -120,11 +120,11 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, for (k = 0; k < 3; k++) { for (i = 1; i < size; i++) { if ((already_sent[i] == 0) && (will_send[i] == 0)) { - smpi_mpi_iprobe(i, MPI_ANY_TAG, comm, &flag_array[i], + Request::iprobe(i, MPI_ANY_TAG, comm, &flag_array[i], &temp_status_array[i]); if (flag_array[i] == 1) { will_send[i] = 1; - smpi_mpi_recv(&temp_buf[i], 1, MPI_CHAR, i, tag, comm, + Request::recv(&temp_buf[i], 1, MPI_CHAR, i, tag, comm, &status); i = 0; } @@ -153,13 +153,13 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, to = header_buf[0]; /* send header */ - smpi_mpi_send(header_buf, header_size, MPI_INT, to, tag, comm); + Request::send(header_buf, header_size, MPI_INT, to, tag, comm); /* send data - pipeline */ for (i = 0; i < pipe_length; i++) { - send_request_array[i] = smpi_mpi_isend((char *)buf + (i * increment), segment, datatype, to, tag, comm); + send_request_array[i] = Request::isend((char *)buf + (i * increment), segment, datatype, to, tag, comm); } - smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); + Request::waitall((pipe_length), send_request_array, send_status_array); } @@ -176,11 +176,11 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, header_buf[1] = -1; to = i; - smpi_mpi_send(header_buf, header_size, MPI_INT, to, tag, comm); + Request::send(header_buf, header_size, MPI_INT, to, tag, comm); /* still need to chop data so that we can use the same non-root code */ for (j = 0; j < pipe_length; j++) { - smpi_mpi_send((char *)buf + (j * increment), segment, datatype, to, tag, comm); + Request::send((char *)buf + (j * increment), segment, datatype, to, tag, comm); } } } @@ -193,11 +193,11 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, else { /* send 1-byte message to root */ - smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm); + Request::send(temp_buf, 1, MPI_CHAR, 0, tag, comm); /* wait for header forward when required */ - request = smpi_mpi_irecv(header_buf, header_size, MPI_INT, MPI_ANY_SOURCE, tag, comm); - smpi_mpi_wait(&request, MPI_STATUS_IGNORE); + request = Request::irecv(header_buf, header_size, MPI_INT, MPI_ANY_SOURCE, tag, comm); + Request::wait(&request, MPI_STATUS_IGNORE); /* search for where it is */ int myordering = 0; @@ -214,27 +214,27 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, /* send header when required */ if (to != -1) { - smpi_mpi_send(header_buf, header_size, MPI_INT, to, tag, comm); + Request::send(header_buf, header_size, MPI_INT, to, tag, comm); } /* receive data */ for (i = 0; i < pipe_length; i++) { - recv_request_array[i] = smpi_mpi_irecv((char *)buf + (i * increment), segment, datatype, from, tag, comm); + recv_request_array[i] = Request::irecv((char *)buf + (i * increment), segment, datatype, from, tag, comm); } /* forward data */ if (to != -1) { for (i = 0; i < pipe_length; i++) { - smpi_mpi_wait(&recv_request_array[i], MPI_STATUS_IGNORE); - send_request_array[i] = smpi_mpi_isend((char *)buf + (i * increment), segment, datatype, to, tag, comm); + Request::wait(&recv_request_array[i], MPI_STATUS_IGNORE); + send_request_array[i] = Request::isend((char *)buf + (i * increment), segment, datatype, to, tag, comm); } - smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); + Request::waitall((pipe_length), send_request_array, send_status_array); } /* recv only */ else { - smpi_mpi_waitall((pipe_length), recv_request_array, recv_status_array); + Request::waitall((pipe_length), recv_request_array, recv_status_array); } } diff --git a/src/smpi/colls/bcast-arrival-pattern-aware.cpp b/src/smpi/colls/bcast-arrival-pattern-aware.cpp index 03e4f300eb..93dc6a3602 100644 --- a/src/smpi/colls/bcast-arrival-pattern-aware.cpp +++ b/src/smpi/colls/bcast-arrival-pattern-aware.cpp @@ -67,9 +67,9 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, */ if (root != 0) { if (rank == root) { - smpi_mpi_send(buf, count, datatype, 0, tag, comm); + Request::send(buf, count, datatype, 0, tag, comm); } else if (rank == 0) { - smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status); + Request::recv(buf, count, datatype, root, tag, comm, &status); } } @@ -86,7 +86,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, while (sent_count < (size - 1)) { for (i = 1; i < size; i++) { - smpi_mpi_iprobe(i, MPI_ANY_TAG, comm, &flag_array[i], + Request::iprobe(i, MPI_ANY_TAG, comm, &flag_array[i], MPI_STATUSES_IGNORE); } @@ -96,7 +96,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, /* message arrive */ if ((flag_array[i] == 1) && (already_sent[i] == 0)) { - smpi_mpi_recv(temp_buf, 1, MPI_CHAR, i, tag, comm, &status); + Request::recv(temp_buf, 1, MPI_CHAR, i, tag, comm, &status); header_buf[header_index] = i; header_index++; sent_count++; @@ -110,8 +110,8 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, if (header_index != 0) { header_buf[header_index] = -1; to = header_buf[0]; - smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); - smpi_mpi_send(buf, count, datatype, to, tag, comm); + Request::send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); + Request::send(buf, count, datatype, to, tag, comm); } /* randomly MPI_Send to one */ @@ -121,8 +121,8 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, if (already_sent[i] == 0) { header_buf[0] = i; header_buf[1] = -1; - smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, i, tag, comm); - smpi_mpi_send(buf, count, datatype, i, tag, comm); + Request::send(header_buf, HEADER_SIZE, MPI_INT, i, tag, comm); + Request::send(buf, count, datatype, i, tag, comm); already_sent[i] = 1; sent_count++; break; @@ -138,12 +138,12 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, else { /* send 1-byte message to root */ - smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm); + Request::send(temp_buf, 1, MPI_CHAR, 0, tag, comm); /* wait for header and data, forward when required */ - smpi_mpi_recv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm, + Request::recv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm, &status); - smpi_mpi_recv(buf, count, datatype, MPI_ANY_SOURCE, tag, comm, &status); + Request::recv(buf, count, datatype, MPI_ANY_SOURCE, tag, comm, &status); /* search for where it is */ int myordering = 0; @@ -153,9 +153,9 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, /* send header followed by data */ if (header_buf[myordering + 1] != -1) { - smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1], + Request::send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1], tag, comm); - smpi_mpi_send(buf, count, datatype, header_buf[myordering + 1], tag, comm); + Request::send(buf, count, datatype, header_buf[myordering + 1], tag, comm); } } } @@ -178,7 +178,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, //iteration++; //start = MPI_Wtime(); for (i = 1; i < size; i++) { - smpi_mpi_iprobe(i, MPI_ANY_TAG, comm, &flag_array[i], + Request::iprobe(i, MPI_ANY_TAG, comm, &flag_array[i], &temp_status_array[i]); } //total = MPI_Wtime() - start; @@ -191,7 +191,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, for (i = 1; i < size; i++) { /* message arrive */ if ((flag_array[i] == 1) && (already_sent[i] == 0)) { - smpi_mpi_recv(&temp_buf[i], 1, MPI_CHAR, i, tag, comm, + Request::recv(&temp_buf[i], 1, MPI_CHAR, i, tag, comm, &status); header_buf[header_index] = i; header_index++; @@ -223,7 +223,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, //start = MPI_Wtime(); /* send header */ - smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); + Request::send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); //total = MPI_Wtime() - start; //total *= 1000; @@ -235,16 +235,16 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, if (0 == 1) { //if (header_index == 1) { - smpi_mpi_send(buf, count, datatype, to, tag, comm); + Request::send(buf, count, datatype, to, tag, comm); } /* send data - pipeline */ else { for (i = 0; i < pipe_length; i++) { - smpi_mpi_send((char *)buf + (i * increment), segment, datatype, to, tag, comm); + Request::send((char *)buf + (i * increment), segment, datatype, to, tag, comm); } - //smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); + //Request::waitall((pipe_length), send_request_array, send_status_array); } //total = MPI_Wtime() - start; //total *= 1000; @@ -264,16 +264,16 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, to = i; //start = MPI_Wtime(); - smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); + Request::send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); /* still need to chop data so that we can use the same non-root code */ for (j = 0; j < pipe_length; j++) { - smpi_mpi_send((char *)buf + (j * increment), segment, datatype, to, tag, + Request::send((char *)buf + (j * increment), segment, datatype, to, tag, comm); } - //smpi_mpi_send(buf,count,datatype,to,tag,comm); - //smpi_mpi_wait(&request,MPI_STATUS_IGNORE); + //Request::send(buf,count,datatype,to,tag,comm); + //Request::wait(&request,MPI_STATUS_IGNORE); //total = MPI_Wtime() - start; //total *= 1000; @@ -291,7 +291,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, } /* while loop */ for(i=0; i= num_procs) dst -= num_procs; - smpi_mpi_send(buff, count, data_type, dst, tag, comm); + Request::send(buff, count, data_type, dst, tag, comm); } mask >>= 1; } diff --git a/src/smpi/colls/bcast-flattree-pipeline.cpp b/src/smpi/colls/bcast-flattree-pipeline.cpp index 9246b51841..29701ba592 100644 --- a/src/smpi/colls/bcast-flattree-pipeline.cpp +++ b/src/smpi/colls/bcast-flattree-pipeline.cpp @@ -38,9 +38,9 @@ smpi_coll_tuned_bcast_flattree_pipeline(void *buff, int count, if (rank != root) { for (i = 0; i < pipe_length; i++) { - request_array[i] = smpi_mpi_irecv((char *)buff + (i * increment), segment, data_type, root, tag, comm); + request_array[i] = Request::irecv((char *)buff + (i * increment), segment, data_type, root, tag, comm); } - smpi_mpi_waitall(pipe_length, request_array, status_array); + Request::waitall(pipe_length, request_array, status_array); } else { @@ -50,7 +50,7 @@ smpi_coll_tuned_bcast_flattree_pipeline(void *buff, int count, continue; else { for (i = 0; i < pipe_length; i++) { - smpi_mpi_send((char *)buff + (i * increment), segment, data_type, j, tag, comm); + Request::send((char *)buff + (i * increment), segment, data_type, j, tag, comm); } } } diff --git a/src/smpi/colls/bcast-flattree.cpp b/src/smpi/colls/bcast-flattree.cpp index bafa90d3b4..5fe60c16d2 100644 --- a/src/smpi/colls/bcast-flattree.cpp +++ b/src/smpi/colls/bcast-flattree.cpp @@ -20,7 +20,7 @@ smpi_coll_tuned_bcast_flattree(void *buff, int count, MPI_Datatype data_type, num_procs = comm->size(); if (rank != root) { - smpi_mpi_recv(buff, count, data_type, root, tag, comm, MPI_STATUS_IGNORE); + Request::recv(buff, count, data_type, root, tag, comm, MPI_STATUS_IGNORE); } else { @@ -31,11 +31,11 @@ smpi_coll_tuned_bcast_flattree(void *buff, int count, MPI_Datatype data_type, for (i = 0; i < num_procs; i++) { if (i == rank) continue; - *(req_ptr++) = smpi_mpi_isend(buff, count, data_type, i, tag, comm); + *(req_ptr++) = Request::isend(buff, count, data_type, i, tag, comm); } // wait on all requests - smpi_mpi_waitall(num_procs - 1, reqs, MPI_STATUSES_IGNORE); + Request::waitall(num_procs - 1, reqs, MPI_STATUSES_IGNORE); free(reqs); } diff --git a/src/smpi/colls/bcast-mvapich-smp.cpp b/src/smpi/colls/bcast-mvapich-smp.cpp index 083110b9fa..886f48e8d6 100644 --- a/src/smpi/colls/bcast-mvapich-smp.cpp +++ b/src/smpi/colls/bcast-mvapich-smp.cpp @@ -119,11 +119,11 @@ int smpi_coll_tuned_bcast_mvapich2_inter_node(void *buffer, if (local_size > 1) { if ((local_rank == 0) && (root != rank) && (leader_root == global_rank)) { - smpi_mpi_recv(buffer, count, datatype, root, + Request::recv(buffer, count, datatype, root, COLL_TAG_BCAST, comm, MPI_STATUS_IGNORE); } if ((local_rank != 0) && (root == rank)) { - smpi_mpi_send(buffer, count, datatype, + Request::send(buffer, count, datatype, leader_of_root, COLL_TAG_BCAST, comm); } } @@ -212,7 +212,7 @@ int smpi_coll_tuned_bcast_mvapich2_knomial_intra_node(void *buffer, src -= local_size; } - smpi_mpi_recv(buffer, count, datatype, src, + Request::recv(buffer, count, datatype, src, COLL_TAG_BCAST, comm, MPI_STATUS_IGNORE); break; @@ -229,11 +229,11 @@ int smpi_coll_tuned_bcast_mvapich2_knomial_intra_node(void *buffer, if (dst >= local_size) { dst -= local_size; } - reqarray[reqs++]=smpi_mpi_isend(buffer, count, datatype, dst, + reqarray[reqs++]=Request::isend(buffer, count, datatype, dst, COLL_TAG_BCAST, comm); } } - smpi_mpi_waitall(reqs, reqarray, starray); + Request::waitall(reqs, reqarray, starray); mask /= mv2_intra_node_knomial_factor; } diff --git a/src/smpi/colls/bcast-ompi-pipeline.cpp b/src/smpi/colls/bcast-ompi-pipeline.cpp index a55825009d..802389d1c5 100644 --- a/src/smpi/colls/bcast-ompi-pipeline.cpp +++ b/src/smpi/colls/bcast-ompi-pipeline.cpp @@ -100,13 +100,13 @@ int smpi_coll_tuned_bcast_ompi_pipeline( void* buffer, sendcount = original_count - segindex * count_by_segment; } for( i = 0; i < tree->tree_nextsize; i++ ) { - send_reqs[i] = smpi_mpi_isend(tmpbuf, sendcount, datatype, + send_reqs[i] = Request::isend(tmpbuf, sendcount, datatype, tree->tree_next[i], COLL_TAG_BCAST, comm); } /* complete the sends before starting the next sends */ - smpi_mpi_waitall( tree->tree_nextsize, send_reqs, + Request::waitall( tree->tree_nextsize, send_reqs, MPI_STATUSES_IGNORE ); /* update tmp buffer */ @@ -129,7 +129,7 @@ int smpi_coll_tuned_bcast_ompi_pipeline( void* buffer, 5) Send the last segment to children */ req_index = 0; - recv_reqs[req_index]=smpi_mpi_irecv(tmpbuf, count_by_segment, datatype, + recv_reqs[req_index]=Request::irecv(tmpbuf, count_by_segment, datatype, tree->tree_prev, COLL_TAG_BCAST, comm); @@ -138,23 +138,23 @@ int smpi_coll_tuned_bcast_ompi_pipeline( void* buffer, req_index = req_index ^ 0x1; /* post new irecv */ - recv_reqs[req_index]= smpi_mpi_irecv( tmpbuf + realsegsize, count_by_segment, + recv_reqs[req_index]= Request::irecv( tmpbuf + realsegsize, count_by_segment, datatype, tree->tree_prev, COLL_TAG_BCAST, comm); /* wait for and forward the previous segment to children */ - smpi_mpi_wait( &recv_reqs[req_index ^ 0x1], + Request::wait( &recv_reqs[req_index ^ 0x1], MPI_STATUSES_IGNORE ); for( i = 0; i < tree->tree_nextsize; i++ ) { - send_reqs[i]=smpi_mpi_isend(tmpbuf, count_by_segment, datatype, + send_reqs[i]=Request::isend(tmpbuf, count_by_segment, datatype, tree->tree_next[i], COLL_TAG_BCAST, comm ); } /* complete the sends before starting the next iteration */ - smpi_mpi_waitall( tree->tree_nextsize, send_reqs, + Request::waitall( tree->tree_nextsize, send_reqs, MPI_STATUSES_IGNORE ); /* Update the receive buffer */ @@ -162,15 +162,15 @@ int smpi_coll_tuned_bcast_ompi_pipeline( void* buffer, } /* Process the last segment */ - smpi_mpi_wait( &recv_reqs[req_index], MPI_STATUSES_IGNORE ); + Request::wait( &recv_reqs[req_index], MPI_STATUSES_IGNORE ); sendcount = original_count - (num_segments - 1) * count_by_segment; for( i = 0; i < tree->tree_nextsize; i++ ) { - send_reqs[i] = smpi_mpi_isend(tmpbuf, sendcount, datatype, + send_reqs[i] = Request::isend(tmpbuf, sendcount, datatype, tree->tree_next[i], COLL_TAG_BCAST, comm); } - smpi_mpi_waitall( tree->tree_nextsize, send_reqs, + Request::waitall( tree->tree_nextsize, send_reqs, MPI_STATUSES_IGNORE ); } @@ -185,7 +185,7 @@ int smpi_coll_tuned_bcast_ompi_pipeline( void* buffer, 3) wait for the last segment */ req_index = 0; - recv_reqs[req_index] = smpi_mpi_irecv(tmpbuf, count_by_segment, datatype, + recv_reqs[req_index] = Request::irecv(tmpbuf, count_by_segment, datatype, tree->tree_prev, COLL_TAG_BCAST, comm); @@ -193,15 +193,15 @@ int smpi_coll_tuned_bcast_ompi_pipeline( void* buffer, req_index = req_index ^ 0x1; tmpbuf += realsegsize; /* post receive for the next segment */ - recv_reqs[req_index] = smpi_mpi_irecv(tmpbuf, count_by_segment, datatype, + recv_reqs[req_index] = Request::irecv(tmpbuf, count_by_segment, datatype, tree->tree_prev, COLL_TAG_BCAST, comm); /* wait on the previous segment */ - smpi_mpi_wait( &recv_reqs[req_index ^ 0x1], + Request::wait( &recv_reqs[req_index ^ 0x1], MPI_STATUS_IGNORE ); } - smpi_mpi_wait( &recv_reqs[req_index], MPI_STATUS_IGNORE ); + Request::wait( &recv_reqs[req_index], MPI_STATUS_IGNORE ); } if( NULL != send_reqs ) free(send_reqs); diff --git a/src/smpi/colls/bcast-ompi-split-bintree.cpp b/src/smpi/colls/bcast-ompi-split-bintree.cpp index fdf9b1f08a..044d3a7ceb 100644 --- a/src/smpi/colls/bcast-ompi-split-bintree.cpp +++ b/src/smpi/colls/bcast-ompi-split-bintree.cpp @@ -173,7 +173,7 @@ smpi_coll_tuned_bcast_ompi_split_bintree ( void* buffer, if(segindex == (num_segments[i] - 1)) sendcount[i] = counts[i] - segindex*segcount[i]; /* send data */ - smpi_mpi_send(tmpbuf[i], sendcount[i], datatype, + Request::send(tmpbuf[i], sendcount[i], datatype, tree->tree_next[i], COLL_TAG_BCAST, comm); /* update tmp buffer */ tmpbuf[i] += realsegsize[i]; @@ -195,7 +195,7 @@ smpi_coll_tuned_bcast_ompi_split_bintree ( void* buffer, * and we disseminating the data to all children. */ sendcount[lr] = segcount[lr]; - base_req=smpi_mpi_irecv(tmpbuf[lr], sendcount[lr], datatype, + base_req=Request::irecv(tmpbuf[lr], sendcount[lr], datatype, tree->tree_prev, COLL_TAG_BCAST, comm); @@ -204,14 +204,14 @@ smpi_coll_tuned_bcast_ompi_split_bintree ( void* buffer, if( segindex == (num_segments[lr] - 1)) sendcount[lr] = counts[lr] - segindex*segcount[lr]; /* post new irecv */ - new_req = smpi_mpi_irecv( tmpbuf[lr] + realsegsize[lr], sendcount[lr], + new_req = Request::irecv( tmpbuf[lr] + realsegsize[lr], sendcount[lr], datatype, tree->tree_prev, COLL_TAG_BCAST, comm); /* wait for and forward current segment */ - smpi_mpi_waitall( 1, &base_req, MPI_STATUSES_IGNORE ); + Request::waitall( 1, &base_req, MPI_STATUSES_IGNORE ); for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children (segcount[lr]) */ - smpi_mpi_send( tmpbuf[lr], segcount[lr], datatype, + Request::send( tmpbuf[lr], segcount[lr], datatype, tree->tree_next[i], COLL_TAG_BCAST, comm); } /* end of for each child */ @@ -223,9 +223,9 @@ smpi_coll_tuned_bcast_ompi_split_bintree ( void* buffer, } /* end of for segindex */ /* wait for the last segment and forward current segment */ - smpi_mpi_waitall( 1, &base_req, MPI_STATUSES_IGNORE ); + Request::waitall( 1, &base_req, MPI_STATUSES_IGNORE ); for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children */ - smpi_mpi_send(tmpbuf[lr], sendcount[lr], datatype, + Request::send(tmpbuf[lr], sendcount[lr], datatype, tree->tree_next[i], COLL_TAG_BCAST, comm); } /* end of for each child */ } @@ -238,7 +238,7 @@ smpi_coll_tuned_bcast_ompi_split_bintree ( void* buffer, /* determine how many elements to expect in this round */ if (segindex == (num_segments[lr] - 1)) sendcount[lr] = counts[lr] - segindex*segcount[lr]; /* receive segments */ - smpi_mpi_recv(tmpbuf[lr], sendcount[lr], datatype, + Request::recv(tmpbuf[lr], sendcount[lr], datatype, tree->tree_prev, COLL_TAG_BCAST, comm, MPI_STATUS_IGNORE); /* update the initial pointer to the buffer */ @@ -267,7 +267,7 @@ smpi_coll_tuned_bcast_ompi_split_bintree ( void* buffer, if ( (size%2) != 0 && rank != root) { - smpi_mpi_sendrecv( tmpbuf[lr], counts[lr], datatype, + Request::sendrecv( tmpbuf[lr], counts[lr], datatype, pair, COLL_TAG_BCAST, tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype, pair, COLL_TAG_BCAST, @@ -275,19 +275,19 @@ smpi_coll_tuned_bcast_ompi_split_bintree ( void* buffer, } else if ( (size%2) == 0 ) { /* root sends right buffer to the last node */ if( rank == root ) { - smpi_mpi_send(tmpbuf[1], counts[1], datatype, + Request::send(tmpbuf[1], counts[1], datatype, (root+size-1)%size, COLL_TAG_BCAST, comm); } /* last node receives right buffer from the root */ else if (rank == (root+size-1)%size) { - smpi_mpi_recv(tmpbuf[1], counts[1], datatype, + Request::recv(tmpbuf[1], counts[1], datatype, root, COLL_TAG_BCAST, comm, MPI_STATUS_IGNORE); } /* everyone else exchanges buffers */ else { - smpi_mpi_sendrecv( tmpbuf[lr], counts[lr], datatype, + Request::sendrecv( tmpbuf[lr], counts[lr], datatype, pair, COLL_TAG_BCAST, tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype, pair, COLL_TAG_BCAST, diff --git a/src/smpi/colls/bcast-scatter-LR-allgather.cpp b/src/smpi/colls/bcast-scatter-LR-allgather.cpp index d8091d9e98..96ed610fd3 100644 --- a/src/smpi/colls/bcast-scatter-LR-allgather.cpp +++ b/src/smpi/colls/bcast-scatter-LR-allgather.cpp @@ -103,7 +103,7 @@ smpi_coll_tuned_bcast_scatter_LR_allgather(void *buff, int count, curr_size = 0; // this process doesn't receive any data // because of uneven division else { - smpi_mpi_recv((char *) buff + relative_rank * scatter_size, recv_size, + Request::recv((char *) buff + relative_rank * scatter_size, recv_size, MPI_BYTE, src, tag, comm, &status); curr_size = smpi_mpi_get_count(&status, MPI_BYTE); } @@ -127,7 +127,7 @@ smpi_coll_tuned_bcast_scatter_LR_allgather(void *buff, int count, dst = rank + mask; if (dst >= num_procs) dst -= num_procs; - smpi_mpi_send((char *) buff + scatter_size * (relative_rank + mask), + Request::send((char *) buff + scatter_size * (relative_rank + mask), send_size, MPI_BYTE, dst, tag, comm); curr_size -= send_size; @@ -159,7 +159,7 @@ smpi_coll_tuned_bcast_scatter_LR_allgather(void *buff, int count, next_src = left; for (i = 1; i < num_procs; i++) { - smpi_mpi_sendrecv((char *) buff + disps[(src - root + num_procs) % num_procs], + Request::sendrecv((char *) buff + disps[(src - root + num_procs) % num_procs], recv_counts[(src - root + num_procs) % num_procs], MPI_BYTE, right, tag, (char *) buff + diff --git a/src/smpi/colls/bcast-scatter-rdb-allgather.cpp b/src/smpi/colls/bcast-scatter-rdb-allgather.cpp index fa49229a1f..dd4e565465 100644 --- a/src/smpi/colls/bcast-scatter-rdb-allgather.cpp +++ b/src/smpi/colls/bcast-scatter-rdb-allgather.cpp @@ -51,7 +51,7 @@ static int scatter_for_bcast( } else { - smpi_mpi_recv(((char *)tmp_buf + + Request::recv(((char *)tmp_buf + relative_rank*scatter_size), recv_size, MPI_BYTE, src, COLL_TAG_BCAST, comm, &status); @@ -80,7 +80,7 @@ static int scatter_for_bcast( { dst = rank + mask; if (dst >= comm_size) dst -= comm_size; - smpi_mpi_send(((char *)tmp_buf + + Request::send(((char *)tmp_buf + scatter_size*(relative_rank+mask)), send_size, MPI_BYTE, dst, COLL_TAG_BCAST, comm); @@ -206,7 +206,7 @@ smpi_coll_tuned_bcast_scatter_rdb_allgather ( if (relative_dst < comm_size) { - smpi_mpi_sendrecv(((char *)tmp_buf + send_offset), + Request::sendrecv(((char *)tmp_buf + send_offset), curr_size, MPI_BYTE, dst, COLL_TAG_BCAST, ((char *)tmp_buf + recv_offset), (nbytes-recv_offset < 0 ? 0 : nbytes-recv_offset), @@ -273,7 +273,7 @@ smpi_coll_tuned_bcast_scatter_rdb_allgather ( /* printf("Rank %d, send to %d, offset %d, size %d\n", rank, dst, offset, recv_size); fflush(stdout); */ - smpi_mpi_send(((char *)tmp_buf + offset), + Request::send(((char *)tmp_buf + offset), recv_size, MPI_BYTE, dst, COLL_TAG_BCAST, comm); /* recv_size was set in the previous @@ -288,7 +288,7 @@ smpi_coll_tuned_bcast_scatter_rdb_allgather ( { /* printf("Rank %d waiting to recv from rank %d\n", relative_rank, dst); */ - smpi_mpi_recv(((char *)tmp_buf + offset), + Request::recv(((char *)tmp_buf + offset), nbytes - offset, MPI_BYTE, dst, COLL_TAG_BCAST, comm, &status); diff --git a/src/smpi/colls/gather-mvapich.cpp b/src/smpi/colls/gather-mvapich.cpp index 4baf4db0da..a2ea22d077 100644 --- a/src/smpi/colls/gather-mvapich.cpp +++ b/src/smpi/colls/gather-mvapich.cpp @@ -385,7 +385,7 @@ int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf, } if ((local_rank == 0) && (root != rank) && (leader_of_root == rank)) { - smpi_mpi_send(leader_gather_buf, + Request::send(leader_gather_buf, nbytes * comm_size, MPI_BYTE, root, COLL_TAG_GATHER, comm); } @@ -393,7 +393,7 @@ int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf, if (rank == root && local_rank != 0) { /* The root of the gather operation is not the node leader. Receive y* data from the node leader */ - smpi_mpi_recv(recvbuf, recvcnt * comm_size, recvtype, + Request::recv(recvbuf, recvcnt * comm_size, recvtype, leader_of_root, COLL_TAG_GATHER, comm, &status); } diff --git a/src/smpi/colls/gather-ompi.cpp b/src/smpi/colls/gather-ompi.cpp index e7d1cff536..0e1ea4a2e3 100644 --- a/src/smpi/colls/gather-ompi.cpp +++ b/src/smpi/colls/gather-ompi.cpp @@ -138,7 +138,7 @@ smpi_coll_tuned_gather_ompi_binomial(void *sbuf, int scount, "smpi_coll_tuned_gather_ompi_binomial rank %d recv %d mycount = %d", rank, bmtree->tree_next[i], mycount); - smpi_mpi_recv(ptmp + total_recv*rextent, mycount, rdtype, + Request::recv(ptmp + total_recv*rextent, mycount, rdtype, bmtree->tree_next[i], COLL_TAG_GATHER, comm, &status); @@ -152,7 +152,7 @@ smpi_coll_tuned_gather_ompi_binomial(void *sbuf, int scount, "smpi_coll_tuned_gather_ompi_binomial rank %d send %d count %d\n", rank, bmtree->tree_prev, total_recv); - smpi_mpi_send(ptmp, total_recv, sdtype, + Request::send(ptmp, total_recv, sdtype, bmtree->tree_prev, COLL_TAG_GATHER, comm); @@ -245,15 +245,15 @@ smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount, COLL_TUNED_COMPUTED_SEGCOUNT( (size_t) first_segment_size, typelng, first_segment_count ); - smpi_mpi_recv(sbuf, 0, MPI_BYTE, root, + Request::recv(sbuf, 0, MPI_BYTE, root, COLL_TAG_GATHER, comm, MPI_STATUS_IGNORE); - smpi_mpi_send(sbuf, first_segment_count, sdtype, root, + Request::send(sbuf, first_segment_count, sdtype, root, COLL_TAG_GATHER, comm); - smpi_mpi_send((char*)sbuf + extent * first_segment_count, + Request::send((char*)sbuf + extent * first_segment_count, (scount - first_segment_count), sdtype, root, COLL_TAG_GATHER, comm); @@ -289,23 +289,23 @@ smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount, /* irecv for the first segment from i */ ptmp = (char*)rbuf + i * rcount * extent; - first_segment_req = smpi_mpi_irecv(ptmp, first_segment_count, rdtype, i, + first_segment_req = Request::irecv(ptmp, first_segment_count, rdtype, i, COLL_TAG_GATHER, comm ); /* send sync message */ - smpi_mpi_send(rbuf, 0, MPI_BYTE, i, + Request::send(rbuf, 0, MPI_BYTE, i, COLL_TAG_GATHER, comm); /* irecv for the second segment */ ptmp = (char*)rbuf + (i * rcount + first_segment_count) * extent; - reqs[i]=smpi_mpi_irecv(ptmp, (rcount - first_segment_count), + reqs[i]=Request::irecv(ptmp, (rcount - first_segment_count), rdtype, i, COLL_TAG_GATHER, comm ); /* wait on the first segment to complete */ - smpi_mpi_wait(&first_segment_req, MPI_STATUS_IGNORE); + Request::wait(&first_segment_req, MPI_STATUS_IGNORE); } /* copy local data if necessary */ @@ -317,7 +317,7 @@ smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount, } /* wait all second segments to complete */ - ret = smpi_mpi_waitall(size, reqs, MPI_STATUSES_IGNORE); + ret = Request::waitall(size, reqs, MPI_STATUSES_IGNORE); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } free(reqs); @@ -378,7 +378,7 @@ smpi_coll_tuned_gather_ompi_basic_linear(void *sbuf, int scount, "ompi_coll_tuned_gather_intra_basic_linear rank %d", rank); if (rank != root) { - smpi_mpi_send(sbuf, scount, sdtype, root, + Request::send(sbuf, scount, sdtype, root, COLL_TAG_GATHER, comm); return MPI_SUCCESS; @@ -397,7 +397,7 @@ smpi_coll_tuned_gather_ompi_basic_linear(void *sbuf, int scount, err = MPI_SUCCESS; } } else { - smpi_mpi_recv(ptmp, rcount, rdtype, i, + Request::recv(ptmp, rcount, rdtype, i, COLL_TAG_GATHER, comm, MPI_STATUS_IGNORE); err = MPI_SUCCESS; diff --git a/src/smpi/colls/reduce-NTSL.cpp b/src/smpi/colls/reduce-NTSL.cpp index c2560e5ebe..c7a3e0c130 100644 --- a/src/smpi/colls/reduce-NTSL.cpp +++ b/src/smpi/colls/reduce-NTSL.cpp @@ -54,10 +54,10 @@ int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count, /* if (root != 0) { if (rank == root){ - smpi_mpi_send(buf,count,datatype,0,tag,comm); + Request::send(buf,count,datatype,0,tag,comm); } else if (rank == 0) { - smpi_mpi_recv(buf,count,datatype,root,tag,comm,&status); + Request::recv(buf,count,datatype,root,tag,comm,&status); } } */ @@ -65,20 +65,20 @@ int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count, char *tmp_buf; tmp_buf = (char *) smpi_get_tmp_sendbuffer(count * extent); - smpi_mpi_sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank, + Request::sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank, tag, comm, &status); /* when a message is smaller than a block size => no pipeline */ if (count <= segment) { if (rank == root) { - smpi_mpi_recv(tmp_buf, count, datatype, from, tag, comm, &status); + Request::recv(tmp_buf, count, datatype, from, tag, comm, &status); smpi_op_apply(op, tmp_buf, rbuf, &count, &datatype); } else if (rank == ((root - 1 + size) % size)) { - smpi_mpi_send(rbuf, count, datatype, to, tag, comm); + Request::send(rbuf, count, datatype, to, tag, comm); } else { - smpi_mpi_recv(tmp_buf, count, datatype, from, tag, comm, &status); + Request::recv(tmp_buf, count, datatype, from, tag, comm, &status); smpi_op_apply(op, tmp_buf, rbuf, &count, &datatype); - smpi_mpi_send(rbuf, count, datatype, to, tag, comm); + Request::send(rbuf, count, datatype, to, tag, comm); } smpi_free_tmp_buffer(tmp_buf); return MPI_SUCCESS; @@ -98,11 +98,11 @@ int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count, /* root recv data */ if (rank == root) { for (i = 0; i < pipe_length; i++) { - recv_request_array[i] = smpi_mpi_irecv((char *) tmp_buf + (i * increment), segment, datatype, from, + recv_request_array[i] = Request::irecv((char *) tmp_buf + (i * increment), segment, datatype, from, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - smpi_mpi_wait(&recv_request_array[i], &status); + Request::wait(&recv_request_array[i], &status); smpi_op_apply(op, tmp_buf + (i * increment), (char *)rbuf + (i * increment), &segment, &datatype); } @@ -111,26 +111,26 @@ int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count, /* last node only sends data */ else if (rank == ((root - 1 + size) % size)) { for (i = 0; i < pipe_length; i++) { - send_request_array[i] = smpi_mpi_isend((char *)rbuf + (i * increment), segment, datatype, to, (tag + i), + send_request_array[i] = Request::isend((char *)rbuf + (i * increment), segment, datatype, to, (tag + i), comm); } - smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); + Request::waitall((pipe_length), send_request_array, send_status_array); } /* intermediate nodes relay (receive, reduce, then send) data */ else { for (i = 0; i < pipe_length; i++) { - recv_request_array[i] = smpi_mpi_irecv((char *) tmp_buf + (i * increment), segment, datatype, from, + recv_request_array[i] = Request::irecv((char *) tmp_buf + (i * increment), segment, datatype, from, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { - smpi_mpi_wait(&recv_request_array[i], &status); + Request::wait(&recv_request_array[i], &status); smpi_op_apply(op, tmp_buf + (i * increment), (char *)rbuf + (i * increment), &segment, &datatype); - send_request_array[i] = smpi_mpi_isend((char *) rbuf + (i * increment), segment, datatype, to, + send_request_array[i] = Request::isend((char *) rbuf + (i * increment), segment, datatype, to, (tag + i), comm); } - smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); + Request::waitall((pipe_length), send_request_array, send_status_array); } free(send_request_array); diff --git a/src/smpi/colls/reduce-arrival-pattern-aware.cpp b/src/smpi/colls/reduce-arrival-pattern-aware.cpp index 7aaf9043cf..9778ecc141 100644 --- a/src/smpi/colls/reduce-arrival-pattern-aware.cpp +++ b/src/smpi/colls/reduce-arrival-pattern-aware.cpp @@ -74,7 +74,7 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, char *tmp_buf; tmp_buf = (char *) smpi_get_tmp_sendbuffer(count * extent); - smpi_mpi_sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank, + Request::sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank, tag, comm, &status); @@ -89,7 +89,7 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, for (i = 1; i < size; i++) { if (already_received[i] == 0) { - smpi_mpi_iprobe(i, MPI_ANY_TAG, comm, &flag_array[i], + Request::iprobe(i, MPI_ANY_TAG, comm, &flag_array[i], MPI_STATUSES_IGNORE); simcall_process_sleep(0.0001); } @@ -103,7 +103,7 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, /* 1-byte message arrive */ if ((flag_array[i] == 1) && (already_received[i] == 0)) { - smpi_mpi_recv(temp_buf, 1, MPI_CHAR, i, tag, comm, &status); + Request::recv(temp_buf, 1, MPI_CHAR, i, tag, comm, &status); header_buf[header_index] = i; header_index++; sent_count++; @@ -127,8 +127,8 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, to = header_buf[0]; from = header_buf[header_index - 1]; - smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); - smpi_mpi_recv(tmp_buf, count, datatype, from, tag, comm, &status); + Request::send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); + Request::recv(tmp_buf, count, datatype, from, tag, comm, &status); smpi_op_apply(op, tmp_buf, rbuf, &count, &datatype); } } /* while loop */ @@ -139,12 +139,12 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, else { /* send 1-byte message to root */ - smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm); + Request::send(temp_buf, 1, MPI_CHAR, 0, tag, comm); /* wait for header and data, forward when required */ - smpi_mpi_recv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm, + Request::recv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm, &status); - // smpi_mpi_recv(buf,count,datatype,MPI_ANY_SOURCE,tag,comm,&status); + // Request::recv(buf,count,datatype,MPI_ANY_SOURCE,tag,comm,&status); /* search for where it is */ int myordering = 0; @@ -154,7 +154,7 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, /* forward header */ if (header_buf[myordering + 1] != -1) { - smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1], + Request::send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1], tag, comm); } //printf("node %d ordering %d\n",rank,myordering); @@ -168,7 +168,7 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, } else { to = header_buf[myordering + 1]; } - smpi_mpi_send(rbuf, count, datatype, to, tag, comm); + Request::send(rbuf, count, datatype, to, tag, comm); } /* recv, reduce, send */ @@ -179,9 +179,9 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, to = header_buf[myordering + 1]; } from = header_buf[myordering - 1]; - smpi_mpi_recv(tmp_buf, count, datatype, from, tag, comm, &status); + Request::recv(tmp_buf, count, datatype, from, tag, comm, &status); smpi_op_apply(op, tmp_buf, rbuf, &count, &datatype); - smpi_mpi_send(rbuf, count, datatype, to, tag, comm); + Request::send(rbuf, count, datatype, to, tag, comm); } } /* non-root */ } @@ -213,11 +213,11 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, //if (i == rank) //continue; if ((already_received[i] == 0) && (will_send[i] == 0)) { - smpi_mpi_iprobe(i, MPI_ANY_TAG, comm, &flag_array[i], + Request::iprobe(i, MPI_ANY_TAG, comm, &flag_array[i], &temp_status_array[i]); if (flag_array[i] == 1) { will_send[i] = 1; - smpi_mpi_recv(&temp_buf[i], 1, MPI_CHAR, i, tag, comm, + Request::recv(&temp_buf[i], 1, MPI_CHAR, i, tag, comm, &status); //printf("recv from %d\n",i); i = 1; @@ -249,12 +249,12 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, to = header_buf[0]; /* send header */ - smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); + Request::send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm); /* recv data - pipeline */ from = header_buf[header_index - 1]; for (i = 0; i < pipe_length; i++) { - smpi_mpi_recv(tmp_buf + (i * increment), segment, datatype, from, tag, + Request::recv(tmp_buf + (i * increment), segment, datatype, from, tag, comm, &status); smpi_op_apply(op, tmp_buf + (i * increment), (char *)rbuf + (i * increment), &segment, &datatype); @@ -267,12 +267,12 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, /* none root */ else { /* send 1-byte message to root */ - smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm); + Request::send(temp_buf, 1, MPI_CHAR, 0, tag, comm); /* wait for header forward when required */ - request=smpi_mpi_irecv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm); - smpi_mpi_wait(&request, MPI_STATUS_IGNORE); + request=Request::irecv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm); + Request::wait(&request, MPI_STATUS_IGNORE); /* search for where it is */ int myordering = 0; @@ -283,7 +283,7 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, /* send header when required */ if (header_buf[myordering + 1] != -1) { - smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1], + Request::send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1], tag, comm); } @@ -297,24 +297,24 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, /* send only */ if (myordering == 0) { for (i = 0; i < pipe_length; i++) { - send_request_array[i]= smpi_mpi_isend((char *)rbuf + (i * increment), segment, datatype, to, tag, comm); + send_request_array[i]= Request::isend((char *)rbuf + (i * increment), segment, datatype, to, tag, comm); } - smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); + Request::waitall((pipe_length), send_request_array, send_status_array); } /* receive, reduce, and send */ else { from = header_buf[myordering - 1]; for (i = 0; i < pipe_length; i++) { - recv_request_array[i]=smpi_mpi_irecv(tmp_buf + (i * increment), segment, datatype, from, tag, comm); + recv_request_array[i]=Request::irecv(tmp_buf + (i * increment), segment, datatype, from, tag, comm); } for (i = 0; i < pipe_length; i++) { - smpi_mpi_wait(&recv_request_array[i], MPI_STATUS_IGNORE); + Request::wait(&recv_request_array[i], MPI_STATUS_IGNORE); smpi_op_apply(op, tmp_buf + (i * increment), (char *)rbuf + (i * increment), &segment, &datatype); - send_request_array[i]=smpi_mpi_isend((char *)rbuf + (i * increment), segment, datatype, to, tag, comm); + send_request_array[i]=Request::isend((char *)rbuf + (i * increment), segment, datatype, to, tag, comm); } - smpi_mpi_waitall((pipe_length), send_request_array, send_status_array); + Request::waitall((pipe_length), send_request_array, send_status_array); } } /* non-root */ @@ -335,9 +335,9 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, */ if (root != 0) { if (rank == 0) { - smpi_mpi_send(rbuf, count, datatype, root, tag, comm); + Request::send(rbuf, count, datatype, root, tag, comm); } else if (rank == root) { - smpi_mpi_recv(rbuf, count, datatype, 0, tag, comm, &status); + Request::recv(rbuf, count, datatype, 0, tag, comm, &status); } } diff --git a/src/smpi/colls/reduce-binomial.cpp b/src/smpi/colls/reduce-binomial.cpp index d9255a8555..c35390d5c8 100644 --- a/src/smpi/colls/reduce-binomial.cpp +++ b/src/smpi/colls/reduce-binomial.cpp @@ -59,7 +59,7 @@ int smpi_coll_tuned_reduce_binomial(void *sendbuf, void *recvbuf, int count, source = (relrank | mask); if (source < comm_size) { source = (source + lroot) % comm_size; - smpi_mpi_recv(tmp_buf, count, datatype, source, tag, comm, &status); + Request::recv(tmp_buf, count, datatype, source, tag, comm, &status); if (is_commutative) { smpi_op_apply(op, tmp_buf, recvbuf, &count, &datatype); @@ -70,7 +70,7 @@ int smpi_coll_tuned_reduce_binomial(void *sendbuf, void *recvbuf, int count, } } else { dst = ((relrank & (~mask)) + lroot) % comm_size; - smpi_mpi_send(recvbuf, count, datatype, dst, tag, comm); + Request::send(recvbuf, count, datatype, dst, tag, comm); break; } mask <<= 1; @@ -78,9 +78,9 @@ int smpi_coll_tuned_reduce_binomial(void *sendbuf, void *recvbuf, int count, if (!is_commutative && (root != 0)){ if (rank == 0){ - smpi_mpi_send(recvbuf, count, datatype, root,tag, comm); + Request::send(recvbuf, count, datatype, root,tag, comm); }else if (rank == root){ - smpi_mpi_recv(recvbuf, count, datatype, 0, tag, comm, &status); + Request::recv(recvbuf, count, datatype, 0, tag, comm, &status); } } diff --git a/src/smpi/colls/reduce-flat-tree.cpp b/src/smpi/colls/reduce-flat-tree.cpp index a12009c131..33999addd8 100644 --- a/src/smpi/colls/reduce-flat-tree.cpp +++ b/src/smpi/colls/reduce-flat-tree.cpp @@ -27,7 +27,7 @@ smpi_coll_tuned_reduce_flat_tree(void *sbuf, void *rbuf, int count, extent = smpi_datatype_get_extent(dtype); if (rank != root) { - smpi_mpi_send(sbuf, count, dtype, root, tag, comm); + Request::send(sbuf, count, dtype, root, tag, comm); return 0; } @@ -40,10 +40,10 @@ smpi_coll_tuned_reduce_flat_tree(void *sbuf, void *rbuf, int count, /* Initialize the receive buffer. */ if (rank == (size - 1)) - smpi_mpi_sendrecv(sbuf, count, dtype, rank, tag, + Request::sendrecv(sbuf, count, dtype, rank, tag, rbuf, count, dtype, rank, tag, comm, &status); else - smpi_mpi_recv(rbuf, count, dtype, size - 1, tag, comm, &status); + Request::recv(rbuf, count, dtype, size - 1, tag, comm, &status); /* Loop receiving and calling reduction function (C or Fortran). */ @@ -51,7 +51,7 @@ smpi_coll_tuned_reduce_flat_tree(void *sbuf, void *rbuf, int count, if (rank == i) inbuf = static_cast(sbuf); else { - smpi_mpi_recv(origin, count, dtype, i, tag, comm, &status); + Request::recv(origin, count, dtype, i, tag, comm, &status); inbuf = origin; } diff --git a/src/smpi/colls/reduce-mvapich-knomial.cpp b/src/smpi/colls/reduce-mvapich-knomial.cpp index 39f69bb753..837da1e5c9 100644 --- a/src/smpi/colls/reduce-mvapich-knomial.cpp +++ b/src/smpi/colls/reduce-mvapich-knomial.cpp @@ -182,7 +182,7 @@ int smpi_coll_tuned_reduce_mvapich2_knomial ( while(recv_iter < expected_recv_count) { src = src_array[expected_recv_count - (recv_iter+1)]; - requests[recv_iter]=smpi_mpi_irecv (tmp_buf[recv_iter], count, datatype ,src, + requests[recv_iter]=Request::irecv (tmp_buf[recv_iter], count, datatype ,src, COLL_TAG_REDUCE, comm); recv_iter++; @@ -190,7 +190,7 @@ int smpi_coll_tuned_reduce_mvapich2_knomial ( recv_iter=0; while(recv_iter < expected_recv_count) { - index=smpi_mpi_waitany(expected_recv_count, requests, + index=Request::waitany(expected_recv_count, requests, &status); recv_iter++; @@ -211,10 +211,10 @@ int smpi_coll_tuned_reduce_mvapich2_knomial ( } if(rank != root) { - send_request=smpi_mpi_isend(recvbuf,count, datatype, dst, + send_request=Request::isend(recvbuf,count, datatype, dst, COLL_TAG_REDUCE,comm); - smpi_mpi_waitall(1, &send_request, &status); + Request::waitall(1, &send_request, &status); smpi_free_tmp_buffer((void *)((char*)recvbuf + true_lb)); } diff --git a/src/smpi/colls/reduce-mvapich-two-level.cpp b/src/smpi/colls/reduce-mvapich-two-level.cpp index 548c5ca986..45c5cd7d7e 100644 --- a/src/smpi/colls/reduce-mvapich-two-level.cpp +++ b/src/smpi/colls/reduce-mvapich-two-level.cpp @@ -161,11 +161,11 @@ int smpi_coll_tuned_reduce_mvapich2_two_level( void *sendbuf, } if (local_rank == 0 && root != my_rank) { - smpi_mpi_send(out_buf, count, datatype, root, + Request::send(out_buf, count, datatype, root, COLL_TAG_REDUCE+1, comm); } if ((local_rank != 0) && (root == my_rank)) { - smpi_mpi_recv(recvbuf, count, datatype, + Request::recv(recvbuf, count, datatype, leader_of_root, COLL_TAG_REDUCE+1, comm, MPI_STATUS_IGNORE); } @@ -286,11 +286,11 @@ int smpi_coll_tuned_reduce_mvapich2_two_level( void *sendbuf, * root of the reduce operation. The reduced data is in tmp_buf */ if ((local_rank == 0) && (root != my_rank) && (leader_root == leader_comm_rank)) { - smpi_mpi_send(tmp_buf, count, datatype, root, + Request::send(tmp_buf, count, datatype, root, COLL_TAG_REDUCE+1, comm); } if ((local_rank != 0) && (root == my_rank)) { - smpi_mpi_recv(recvbuf, count, datatype, + Request::recv(recvbuf, count, datatype, leader_of_root, COLL_TAG_REDUCE+1, comm, MPI_STATUS_IGNORE); diff --git a/src/smpi/colls/reduce-ompi.cpp b/src/smpi/colls/reduce-ompi.cpp index faf37ef7d5..100023bd92 100644 --- a/src/smpi/colls/reduce-ompi.cpp +++ b/src/smpi/colls/reduce-ompi.cpp @@ -151,7 +151,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi } } - reqs[inbi]=smpi_mpi_irecv(local_recvbuf, recvcount, datatype, + reqs[inbi]=Request::irecv(local_recvbuf, recvcount, datatype, tree->tree_next[i], COLL_TAG_REDUCE, comm ); @@ -160,7 +160,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi if there are no requests reqs[inbi ^1] will be MPI_REQUEST_NULL. */ /* wait on data from last child for previous segment */ - smpi_mpi_waitall( 1, &reqs[inbi ^ 1], + Request::waitall( 1, &reqs[inbi ^ 1], MPI_STATUSES_IGNORE ); local_op_buffer = inbuf[inbi ^ 1]; if( i > 0 ) { @@ -195,7 +195,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi */ if (rank != tree->tree_root) { /* send combined/accumulated data to parent */ - smpi_mpi_send( accumulator, prevcount, + Request::send( accumulator, prevcount, datatype, tree->tree_prev, COLL_TAG_REDUCE, comm); @@ -240,7 +240,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi if (original_count < count_by_segment) { count_by_segment = original_count; } - smpi_mpi_send((char*)sendbuf + + Request::send((char*)sendbuf + segindex * segment_increment, count_by_segment, datatype, tree->tree_prev, @@ -268,7 +268,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi /* post first group of requests */ for (segindex = 0; segindex < max_outstanding_reqs; segindex++) { - sreq[segindex]=smpi_mpi_isend((char*)sendbuf + + sreq[segindex]=Request::isend((char*)sendbuf + segindex * segment_increment, count_by_segment, datatype, tree->tree_prev, @@ -280,13 +280,13 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi creq = 0; while ( original_count > 0 ) { /* wait on a posted request to complete */ - smpi_mpi_wait(&sreq[creq], MPI_STATUS_IGNORE); + Request::wait(&sreq[creq], MPI_STATUS_IGNORE); sreq[creq] = MPI_REQUEST_NULL; if( original_count < count_by_segment ) { count_by_segment = original_count; } - sreq[creq]=smpi_mpi_isend((char*)sendbuf + + sreq[creq]=Request::isend((char*)sendbuf + segindex * segment_increment, count_by_segment, datatype, tree->tree_prev, @@ -298,7 +298,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi } /* Wait on the remaining request to complete */ - smpi_mpi_waitall( max_outstanding_reqs, sreq, + Request::waitall( max_outstanding_reqs, sreq, MPI_STATUSES_IGNORE ); /* free requests */ @@ -547,7 +547,7 @@ int smpi_coll_tuned_reduce_ompi_in_order_binary( void *sendbuf, void *recvbuf, if (io_root != root) { if (root == rank) { /* Receive result from rank io_root to recvbuf */ - smpi_mpi_recv(recvbuf, count, datatype, io_root, + Request::recv(recvbuf, count, datatype, io_root, COLL_TAG_REDUCE, comm, MPI_STATUS_IGNORE); if (MPI_IN_PLACE == sendbuf) { @@ -556,7 +556,7 @@ int smpi_coll_tuned_reduce_ompi_in_order_binary( void *sendbuf, void *recvbuf, } else if (io_root == rank) { /* Send result from use_this_recvbuf to root */ - smpi_mpi_send(use_this_recvbuf, count, datatype, root, + Request::send(use_this_recvbuf, count, datatype, root, COLL_TAG_REDUCE, comm); smpi_free_tmp_buffer(use_this_recvbuf); @@ -612,7 +612,7 @@ smpi_coll_tuned_reduce_ompi_basic_linear(void *sbuf, void *rbuf, int count, /* If not root, send data to the root. */ if (rank != root) { - smpi_mpi_send(sbuf, count, dtype, root, + Request::send(sbuf, count, dtype, root, COLL_TAG_REDUCE, comm); return MPI_SUCCESS; @@ -644,7 +644,7 @@ smpi_coll_tuned_reduce_ompi_basic_linear(void *sbuf, void *rbuf, int count, if (rank == (size - 1)) { smpi_datatype_copy((char*)sbuf, count, dtype,(char*)rbuf, count, dtype); } else { - smpi_mpi_recv(rbuf, count, dtype, size - 1, + Request::recv(rbuf, count, dtype, size - 1, COLL_TAG_REDUCE, comm, MPI_STATUS_IGNORE); } @@ -655,7 +655,7 @@ smpi_coll_tuned_reduce_ompi_basic_linear(void *sbuf, void *rbuf, int count, if (rank == i) { inbuf = (char*)sbuf; } else { - smpi_mpi_recv(pml_buffer, count, dtype, i, + Request::recv(pml_buffer, count, dtype, i, COLL_TAG_REDUCE, comm, MPI_STATUS_IGNORE); inbuf = pml_buffer; diff --git a/src/smpi/colls/reduce-rab.cpp b/src/smpi/colls/reduce-rab.cpp index f529bd8364..46ac70b50c 100644 --- a/src/smpi/colls/reduce-rab.cpp +++ b/src/smpi/colls/reduce-rab.cpp @@ -391,21 +391,21 @@ Benchmark results on CRAY T3E #ifdef USE_Irecv #define MPI_I_Sendrecv(sb,sc,sd,dest,st,rb,rc,rd,source,rt,comm,stat) \ { MPI_Request req; \ - req=smpi_mpi_irecv(rb,rc,rd,source,rt,comm); \ - smpi_mpi_send(sb,sc,sd,dest,st,comm); \ - smpi_mpi_wait(&req,stat); \ + req=Request::irecv(rb,rc,rd,source,rt,comm); \ + Request::send(sb,sc,sd,dest,st,comm); \ + Request::wait(&req,stat); \ } #else #ifdef USE_Isend #define MPI_I_Sendrecv(sb,sc,sd,dest,st,rb,rc,rd,source,rt,comm,stat) \ { MPI_Request req; \ req=mpi_mpi_isend(sb,sc,sd,dest,st,comm); \ - smpi_mpi_recv(rb,rc,rd,source,rt,comm,stat); \ - smpi_mpi_wait(&req,stat); \ + Request::recv(rb,rc,rd,source,rt,comm,stat); \ + Request::wait(&req,stat); \ } #else #define MPI_I_Sendrecv(sb,sc,sd,dest,st,rb,rc,rd,source,rt,comm,stat) \ - smpi_mpi_sendrecv(sb,sc,sd,dest,st,rb,rc,rd,source,rt,comm,stat) + Request::sendrecv(sb,sc,sd,dest,st,rb,rc,rd,source,rt,comm,stat) #endif #endif @@ -634,7 +634,7 @@ static int MPI_I_anyReduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype comm, &status); MPI_I_do_op(sendbuf, scr2buf, scr1buf, count/2, datatype, op); - smpi_mpi_recv(scr1buf + (count/2)*typelng, count - count/2, + Request::recv(scr1buf + (count/2)*typelng, count - count/2, mpi_datatype, myrank+1, 1223, comm, &status); computed = 1; # ifdef DEBUG @@ -656,7 +656,7 @@ static int MPI_I_anyReduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype sendbuf + (count/2)*typelng, scr1buf + (count/2)*typelng, count - count/2, datatype, op); - smpi_mpi_send(scr1buf + (count/2)*typelng, count - count/2, + Request::send(scr1buf + (count/2)*typelng, count - count/2, mpi_datatype, myrank-1, 1223, comm); } } @@ -812,9 +812,9 @@ static int MPI_I_anyReduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype printf("[%2d] step 7 begin\n",myrank); fflush(stdout); # endif if (myrank%2 == 0 /*even*/) - smpi_mpi_send(recvbuf, count, mpi_datatype, myrank+1, 1253, comm); + Request::send(recvbuf, count, mpi_datatype, myrank+1, 1253, comm); else /*odd*/ - smpi_mpi_recv(recvbuf, count, mpi_datatype, myrank-1, 1253, comm, &status); + Request::recv(recvbuf, count, mpi_datatype, myrank-1, 1253, comm, &status); } } @@ -831,7 +831,7 @@ static int MPI_I_anyReduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype if (myrank == 0) /* then mynewrank==0, x_start==0 x_count == count/x_size */ { - smpi_mpi_send(scr1buf,x_count,mpi_datatype,root,1241,comm); + Request::send(scr1buf,x_count,mpi_datatype,root,1241,comm); mynewrank = -1; } @@ -850,7 +850,7 @@ static int MPI_I_anyReduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype x_start = start_even[idx]; x_count = count_even[idx]; } - smpi_mpi_recv(recvbuf,x_count,mpi_datatype,0,1241,comm,&status); + Request::recv(recvbuf,x_count,mpi_datatype,0,1241,comm,&status); } newroot = 0; } @@ -880,7 +880,7 @@ static int MPI_I_anyReduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype else { x_start = start_odd[idx]; x_count = count_odd[idx]; partner = mynewrank-x_base; } - smpi_mpi_send(scr1buf + x_start*typelng, x_count, mpi_datatype, + Request::send(scr1buf + x_start*typelng, x_count, mpi_datatype, OLDRANK(partner), 1244, comm); } else /*odd*/ @@ -891,7 +891,7 @@ static int MPI_I_anyReduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype else { x_start = start_even[idx]; x_count = count_even[idx]; partner = mynewrank-x_base; } - smpi_mpi_recv((myrank==root ? recvbuf : scr1buf) + Request::recv((myrank==root ? recvbuf : scr1buf) + x_start*typelng, x_count, mpi_datatype, OLDRANK(partner), 1244, comm, &status); # ifdef DEBUG diff --git a/src/smpi/colls/reduce-scatter-gather.cpp b/src/smpi/colls/reduce-scatter-gather.cpp index 6256be704c..65279e5f26 100644 --- a/src/smpi/colls/reduce-scatter-gather.cpp +++ b/src/smpi/colls/reduce-scatter-gather.cpp @@ -57,17 +57,17 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, memcpy(send_ptr, sendbuf != MPI_IN_PLACE ? sendbuf : recvbuf, extent * count); //if ((rank != root)) - smpi_mpi_sendrecv(send_ptr, new_count, datatype, rank, tag, + Request::sendrecv(send_ptr, new_count, datatype, rank, tag, recv_ptr, new_count, datatype, rank, tag, comm, &status); rem = comm_size - pof2; if (rank < 2 * rem) { if (rank % 2 != 0) { /* odd */ - smpi_mpi_send(recv_ptr, new_count, datatype, rank - 1, tag, comm); + Request::send(recv_ptr, new_count, datatype, rank - 1, tag, comm); newrank = -1; } else { - smpi_mpi_recv(tmp_buf, count, datatype, rank + 1, tag, comm, &status); + Request::recv(tmp_buf, count, datatype, rank + 1, tag, comm, &status); smpi_op_apply(op, tmp_buf, recv_ptr, &new_count, &datatype); newrank = rank / 2; } @@ -110,7 +110,7 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, } /* Send data from recvbuf. Recv into tmp_buf */ - smpi_mpi_sendrecv((char *) recv_ptr + + Request::sendrecv((char *) recv_ptr + disps[send_idx] * extent, send_cnt, datatype, dst, tag, @@ -148,13 +148,13 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, for (i = 1; i < pof2; i++) disps[i] = disps[i - 1] + cnts[i - 1]; - smpi_mpi_recv(recv_ptr, cnts[0], datatype, 0, tag, comm, &status); + Request::recv(recv_ptr, cnts[0], datatype, 0, tag, comm, &status); newrank = 0; send_idx = 0; last_idx = 2; } else if (newrank == 0) { - smpi_mpi_send(recv_ptr, cnts[0], datatype, root, tag, comm); + Request::send(recv_ptr, cnts[0], datatype, root, tag, comm); newrank = -1; } newroot = 0; @@ -206,12 +206,12 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, } if (newdst_tree_root == newroot_tree_root) { - smpi_mpi_send((char *) recv_ptr + + Request::send((char *) recv_ptr + disps[send_idx] * extent, send_cnt, datatype, dst, tag, comm); break; } else { - smpi_mpi_recv((char *) recv_ptr + + Request::recv((char *) recv_ptr + disps[recv_idx] * extent, recv_cnt, datatype, dst, tag, comm, &status); } @@ -233,18 +233,18 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); //if ((rank != root)) - smpi_mpi_sendrecv(sendbuf != MPI_IN_PLACE ? sendbuf : recvbuf, count, datatype, rank, tag, + Request::sendrecv(sendbuf != MPI_IN_PLACE ? sendbuf : recvbuf, count, datatype, rank, tag, recvbuf, count, datatype, rank, tag, comm, &status); rem = comm_size - pof2; if (rank < 2 * rem) { if (rank % 2 != 0) { /* odd */ - smpi_mpi_send(recvbuf, count, datatype, rank - 1, tag, comm); + Request::send(recvbuf, count, datatype, rank - 1, tag, comm); newrank = -1; } else { - smpi_mpi_recv(tmp_buf, count, datatype, rank + 1, tag, comm, &status); + Request::recv(tmp_buf, count, datatype, rank + 1, tag, comm, &status); smpi_op_apply(op, tmp_buf, recvbuf, &count, &datatype); newrank = rank / 2; } @@ -287,7 +287,7 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, } /* Send data from recvbuf. Recv into tmp_buf */ - smpi_mpi_sendrecv((char *) recvbuf + + Request::sendrecv((char *) recvbuf + disps[send_idx] * extent, send_cnt, datatype, dst, tag, @@ -324,13 +324,13 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, for (i = 1; i < pof2; i++) disps[i] = disps[i - 1] + cnts[i - 1]; - smpi_mpi_recv(recvbuf, cnts[0], datatype, 0, tag, comm, &status); + Request::recv(recvbuf, cnts[0], datatype, 0, tag, comm, &status); newrank = 0; send_idx = 0; last_idx = 2; } else if (newrank == 0) { - smpi_mpi_send(recvbuf, cnts[0], datatype, root, tag, comm); + Request::send(recvbuf, cnts[0], datatype, root, tag, comm); newrank = -1; } newroot = 0; @@ -382,12 +382,12 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, } if (newdst_tree_root == newroot_tree_root) { - smpi_mpi_send((char *) recvbuf + + Request::send((char *) recvbuf + disps[send_idx] * extent, send_cnt, datatype, dst, tag, comm); break; } else { - smpi_mpi_recv((char *) recvbuf + + Request::recv((char *) recvbuf + disps[recv_idx] * extent, recv_cnt, datatype, dst, tag, comm, &status); } diff --git a/src/smpi/colls/reduce_scatter-mpich.cpp b/src/smpi/colls/reduce_scatter-mpich.cpp index 2692b50a3e..00cd986e9a 100644 --- a/src/smpi/colls/reduce_scatter-mpich.cpp +++ b/src/smpi/colls/reduce_scatter-mpich.cpp @@ -74,14 +74,14 @@ int smpi_coll_tuned_reduce_scatter_mpich_pair(void *sendbuf, void *recvbuf, int /* send the data that dst needs. recv data that this process needs from src into tmp_recvbuf */ if (sendbuf != MPI_IN_PLACE) - smpi_mpi_sendrecv(((char *)sendbuf+disps[dst]*extent), + Request::sendrecv(((char *)sendbuf+disps[dst]*extent), recvcounts[dst], datatype, dst, COLL_TAG_SCATTER, tmp_recvbuf, recvcounts[rank], datatype, src, COLL_TAG_SCATTER, comm, MPI_STATUS_IGNORE); else - smpi_mpi_sendrecv(((char *)recvbuf+disps[dst]*extent), + Request::sendrecv(((char *)recvbuf+disps[dst]*extent), recvcounts[dst], datatype, dst, COLL_TAG_SCATTER, tmp_recvbuf, recvcounts[rank], datatype, src, @@ -223,7 +223,7 @@ int smpi_coll_tuned_reduce_scatter_mpich_noncomm(void *sendbuf, void *recvbuf, i send_offset += size; } - smpi_mpi_sendrecv(outgoing_data + send_offset*true_extent, + Request::sendrecv(outgoing_data + send_offset*true_extent, size, datatype, peer, COLL_TAG_SCATTER, incoming_data + recv_offset*true_extent, size, datatype, peer, COLL_TAG_SCATTER, @@ -380,7 +380,7 @@ int smpi_coll_tuned_reduce_scatter_mpich_rdb(void *sendbuf, void *recvbuf, int r received in tmp_recvbuf and then accumulated into tmp_results. accumulation is done later below. */ - smpi_mpi_sendrecv(tmp_results, 1, sendtype, dst, + Request::sendrecv(tmp_results, 1, sendtype, dst, COLL_TAG_SCATTER, tmp_recvbuf, 1, recvtype, dst, COLL_TAG_SCATTER, comm, @@ -424,7 +424,7 @@ int smpi_coll_tuned_reduce_scatter_mpich_rdb(void *sendbuf, void *recvbuf, int r (rank < tree_root + nprocs_completed) && (dst >= tree_root + nprocs_completed)) { /* send the current result */ - smpi_mpi_send(tmp_recvbuf, 1, recvtype, + Request::send(tmp_recvbuf, 1, recvtype, dst, COLL_TAG_SCATTER, comm); } @@ -433,7 +433,7 @@ int smpi_coll_tuned_reduce_scatter_mpich_rdb(void *sendbuf, void *recvbuf, int r else if ((dst < rank) && (dst < tree_root + nprocs_completed) && (rank >= tree_root + nprocs_completed)) { - smpi_mpi_recv(tmp_recvbuf, 1, recvtype, dst, + Request::recv(tmp_recvbuf, 1, recvtype, dst, COLL_TAG_SCATTER, comm, MPI_STATUS_IGNORE); received = 1; diff --git a/src/smpi/colls/reduce_scatter-ompi.cpp b/src/smpi/colls/reduce_scatter-ompi.cpp index 81862299dd..2838220cea 100644 --- a/src/smpi/colls/reduce_scatter-ompi.cpp +++ b/src/smpi/colls/reduce_scatter-ompi.cpp @@ -121,13 +121,13 @@ smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(void *sbuf, two procs to do the rest of the algorithm */ if (rank < 2 * remain) { if ((rank & 1) == 0) { - smpi_mpi_send(result_buf, count, dtype, rank + 1, + Request::send(result_buf, count, dtype, rank + 1, COLL_TAG_REDUCE_SCATTER, comm); /* we don't participate from here on out */ tmp_rank = -1; } else { - smpi_mpi_recv(recv_buf, count, dtype, rank - 1, + Request::recv(recv_buf, count, dtype, rank - 1, COLL_TAG_REDUCE_SCATTER, comm, MPI_STATUS_IGNORE); @@ -215,7 +215,7 @@ smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(void *sbuf, /* actual data transfer. Send from result_buf, receive into recv_buf */ if (send_count > 0 && recv_count != 0) { - request=smpi_mpi_irecv(recv_buf + (ptrdiff_t)tmp_disps[recv_index] * extent, + request=Request::irecv(recv_buf + (ptrdiff_t)tmp_disps[recv_index] * extent, recv_count, dtype, peer, COLL_TAG_REDUCE_SCATTER, comm); @@ -226,7 +226,7 @@ smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(void *sbuf, } } if (recv_count > 0 && send_count != 0) { - smpi_mpi_send(result_buf + (ptrdiff_t)tmp_disps[send_index] * extent, + Request::send(result_buf + (ptrdiff_t)tmp_disps[send_index] * extent, send_count, dtype, peer, COLL_TAG_REDUCE_SCATTER, comm); @@ -237,7 +237,7 @@ smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(void *sbuf, } } if (send_count > 0 && recv_count != 0) { - smpi_mpi_wait(&request, MPI_STATUS_IGNORE); + Request::wait(&request, MPI_STATUS_IGNORE); } /* if we received something on this step, push it into @@ -276,13 +276,13 @@ smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(void *sbuf, if (rank < (2 * remain)) { if ((rank & 1) == 0) { if (rcounts[rank]) { - smpi_mpi_recv(rbuf, rcounts[rank], dtype, rank + 1, + Request::recv(rbuf, rcounts[rank], dtype, rank + 1, COLL_TAG_REDUCE_SCATTER, comm, MPI_STATUS_IGNORE); } } else { if (rcounts[rank - 1]) { - smpi_mpi_send(result_buf + disps[rank - 1] * extent, + Request::send(result_buf + disps[rank - 1] * extent, rcounts[rank - 1], dtype, rank - 1, COLL_TAG_REDUCE_SCATTER, comm); @@ -457,11 +457,11 @@ smpi_coll_tuned_reduce_scatter_ompi_ring(void *sbuf, void *rbuf, int *rcounts, inbi = 0; /* Initialize first receive from the neighbor on the left */ - reqs[inbi]=smpi_mpi_irecv(inbuf[inbi], max_block_count, dtype, recv_from, + reqs[inbi]=Request::irecv(inbuf[inbi], max_block_count, dtype, recv_from, COLL_TAG_REDUCE_SCATTER, comm ); tmpsend = accumbuf + (ptrdiff_t)displs[recv_from] * extent; - smpi_mpi_send(tmpsend, rcounts[recv_from], dtype, send_to, + Request::send(tmpsend, rcounts[recv_from], dtype, send_to, COLL_TAG_REDUCE_SCATTER, comm); @@ -471,12 +471,12 @@ smpi_coll_tuned_reduce_scatter_ompi_ring(void *sbuf, void *rbuf, int *rcounts, inbi = inbi ^ 0x1; /* Post irecv for the current block */ - reqs[inbi]=smpi_mpi_irecv(inbuf[inbi], max_block_count, dtype, recv_from, + reqs[inbi]=Request::irecv(inbuf[inbi], max_block_count, dtype, recv_from, COLL_TAG_REDUCE_SCATTER, comm ); /* Wait on previous block to arrive */ - smpi_mpi_wait(&reqs[inbi ^ 0x1], MPI_STATUS_IGNORE); + Request::wait(&reqs[inbi ^ 0x1], MPI_STATUS_IGNORE); /* Apply operation on previous block: result goes to rbuf rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock] @@ -485,13 +485,13 @@ smpi_coll_tuned_reduce_scatter_ompi_ring(void *sbuf, void *rbuf, int *rcounts, smpi_op_apply(op, inbuf[inbi ^ 0x1], tmprecv, &(rcounts[prevblock]), &dtype); /* send previous block to send_to */ - smpi_mpi_send(tmprecv, rcounts[prevblock], dtype, send_to, + Request::send(tmprecv, rcounts[prevblock], dtype, send_to, COLL_TAG_REDUCE_SCATTER, comm); } /* Wait on the last block to arrive */ - smpi_mpi_wait(&reqs[inbi], MPI_STATUS_IGNORE); + Request::wait(&reqs[inbi], MPI_STATUS_IGNORE); /* Apply operation on the last block (my block) rbuf[rank] = inbuf[inbi] (op) rbuf[rank] */ diff --git a/src/smpi/colls/scatter-mvapich-two-level.cpp b/src/smpi/colls/scatter-mvapich-two-level.cpp index 1edba3662c..aeeae235f5 100644 --- a/src/smpi/colls/scatter-mvapich-two-level.cpp +++ b/src/smpi/colls/scatter-mvapich-two-level.cpp @@ -124,7 +124,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf, /* The root of the scatter operation is not the node leader. Recv * data from the node leader */ leader_scatter_buf = smpi_get_tmp_sendbuffer(nbytes * comm_size); - smpi_mpi_recv(leader_scatter_buf, nbytes * comm_size, MPI_BYTE, + Request::recv(leader_scatter_buf, nbytes * comm_size, MPI_BYTE, root, COLL_TAG_SCATTER, comm, &status); } @@ -132,7 +132,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf, if (rank == root && local_rank != 0) { /* The root of the scatter operation is not the node leader. Send * data to the node leader */ - smpi_mpi_send(sendbuf, sendcnt * comm_size, sendtype, + Request::send(sendbuf, sendcnt * comm_size, sendtype, leader_of_root, COLL_TAG_SCATTER, comm ); } @@ -307,14 +307,14 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_binomial(void *sendbuf, /* The root of the scatter operation is not the node leader. Recv * data from the node leader */ leader_scatter_buf = smpi_get_tmp_sendbuffer(nbytes * comm_size); - smpi_mpi_recv(leader_scatter_buf, nbytes * comm_size, MPI_BYTE, + Request::recv(leader_scatter_buf, nbytes * comm_size, MPI_BYTE, root, COLL_TAG_SCATTER, comm, &status); } if (rank == root && local_rank != 0) { /* The root of the scatter operation is not the node leader. Send * data to the node leader */ - smpi_mpi_send(sendbuf, sendcnt * comm_size, sendtype, + Request::send(sendbuf, sendcnt * comm_size, sendtype, leader_of_root, COLL_TAG_SCATTER, comm); } diff --git a/src/smpi/colls/scatter-ompi.cpp b/src/smpi/colls/scatter-ompi.cpp index 6fb5e0c6fc..ed6b5809d4 100644 --- a/src/smpi/colls/scatter-ompi.cpp +++ b/src/smpi/colls/scatter-ompi.cpp @@ -124,7 +124,7 @@ smpi_coll_tuned_scatter_ompi_binomial(void *sbuf, int scount, if (!(vrank % 2)) { if (rank != root) { /* recv from parent on non-root */ - smpi_mpi_recv(ptmp, rcount*size, rdtype, bmtree->tree_prev, + Request::recv(ptmp, rcount*size, rdtype, bmtree->tree_prev, COLL_TAG_SCATTER, comm, &status); /* local copy to rbuf */ smpi_datatype_copy(ptmp, scount, sdtype, rbuf, rcount, rdtype); @@ -139,7 +139,7 @@ smpi_coll_tuned_scatter_ompi_binomial(void *sbuf, int scount, mycount = size - vkid; mycount *= scount; - smpi_mpi_send(ptmp + total_send*sextent, mycount, sdtype, + Request::send(ptmp + total_send*sextent, mycount, sdtype, bmtree->tree_next[i], COLL_TAG_SCATTER, comm); @@ -150,7 +150,7 @@ smpi_coll_tuned_scatter_ompi_binomial(void *sbuf, int scount, } else { /* recv from parent on leaf nodes */ - smpi_mpi_recv(ptmp, rcount, rdtype, bmtree->tree_prev, + Request::recv(ptmp, rcount, rdtype, bmtree->tree_prev, COLL_TAG_SCATTER, comm, &status); } @@ -211,7 +211,7 @@ smpi_coll_tuned_scatter_ompi_basic_linear(void *sbuf, int scount, /* If not root, receive data. */ if (rank != root) { - smpi_mpi_recv(rbuf, rcount, rdtype, root, + Request::recv(rbuf, rcount, rdtype, root, COLL_TAG_SCATTER, comm, MPI_STATUS_IGNORE); return MPI_SUCCESS; @@ -236,7 +236,7 @@ smpi_coll_tuned_scatter_ompi_basic_linear(void *sbuf, int scount, rdtype); } } else { - smpi_mpi_send(ptmp, scount, sdtype, i, + Request::send(ptmp, scount, sdtype, i, COLL_TAG_SCATTER, comm); } diff --git a/src/smpi/private.h b/src/smpi/private.h index 3f65868a7e..bbf58b7c27 100644 --- a/src/smpi/private.h +++ b/src/smpi/private.h @@ -18,10 +18,14 @@ #include "xbt/xbt_os_time.h" #include "src/smpi/smpi_group.hpp" #include "src/smpi/smpi_comm.hpp" +#include "src/smpi/smpi_request.hpp" #include "src/smpi/smpi_topo.hpp" #include "src/smpi/smpi_win.hpp" SG_BEGIN_DECL() + +using namespace simgrid::smpi; + struct s_smpi_process_data; typedef struct s_smpi_process_data *smpi_process_data_t; @@ -84,34 +88,7 @@ typedef struct s_smpi_mpi_datatype{ extern XBT_PRIVATE MPI_Comm MPI_COMM_UNINITIALIZED; -typedef struct s_smpi_mpi_request { - void *buf; - /* in the case of non-contiguous memory the user address should be keep - * to unserialize the data inside the user memory*/ - void *old_buf; - /* this let us know how to unserialize at the end of - * the communication*/ - MPI_Datatype old_type; - size_t size; - int src; - int dst; - int tag; - //to handle cases where we have an unknown sender - //We can't override src, tag, and size, because the request may be reused later - int real_src; - int real_tag; - int truncated; - size_t real_size; - MPI_Comm comm; - smx_activity_t action; - unsigned flags; - int detached; - MPI_Request detached_sender; - int refcount; - MPI_Op op; - int send; - int recv; -} s_smpi_mpi_request_t; + typedef struct s_smpi_mpi_comm_key_elem { MPI_Comm_copy_attr_function* copy_fn; @@ -221,6 +198,7 @@ XBT_PRIVATE int smpi_mpi_pack(void* inbuf, int incount, MPI_Datatype type, void* MPI_Comm comm); XBT_PRIVATE void smpi_empty_status(MPI_Status * status); +XBT_PRIVATE int smpi_mpi_get_count(MPI_Status * status, MPI_Datatype datatype); XBT_PRIVATE MPI_Op smpi_op_new(MPI_User_function * function, bool commute); XBT_PRIVATE bool smpi_op_is_commute(MPI_Op op); XBT_PRIVATE void smpi_op_destroy(MPI_Op op); @@ -250,43 +228,7 @@ XBT_PRIVATE int smpi_info_c2f(MPI_Info info); XBT_PRIVATE int smpi_info_add_f(MPI_Info info); XBT_PRIVATE MPI_Info smpi_info_f2c(int info); -XBT_PRIVATE MPI_Request smpi_mpi_send_init(void *buf, int count, MPI_Datatype datatype, int dst, int tag, - MPI_Comm comm); -XBT_PRIVATE MPI_Request smpi_mpi_recv_init(void *buf, int count, MPI_Datatype datatype, int src, int tag, - MPI_Comm comm); -XBT_PRIVATE MPI_Request smpi_mpi_ssend_init(void *buf, int count, MPI_Datatype datatype, int dst, int tag, - MPI_Comm comm); -XBT_PRIVATE void smpi_mpi_start(MPI_Request request); -XBT_PRIVATE void smpi_mpi_startall(int count, MPI_Request * requests); -XBT_PRIVATE void smpi_mpi_request_free(MPI_Request * request); -XBT_PRIVATE MPI_Request smpi_isend_init(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm); -XBT_PRIVATE MPI_Request smpi_mpi_isend(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm); -XBT_PRIVATE MPI_Request smpi_issend_init(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm); -XBT_PRIVATE MPI_Request smpi_mpi_issend(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm); -XBT_PRIVATE MPI_Request smpi_irecv_init(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm); -XBT_PRIVATE MPI_Request smpi_mpi_irecv(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm); -XBT_PRIVATE MPI_Request smpi_rma_send_init(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, - MPI_Comm comm, MPI_Op op); -XBT_PRIVATE MPI_Request smpi_rma_recv_init(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, - MPI_Comm comm, MPI_Op op); -XBT_PRIVATE void smpi_mpi_recv(void *buf, int count, MPI_Datatype datatype, int src,int tag, MPI_Comm comm, - MPI_Status * status); -XBT_PRIVATE void smpi_mpi_send(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm); -XBT_PRIVATE void smpi_mpi_ssend(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm); -XBT_PRIVATE void smpi_mpi_sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype, int dst, int sendtag, - void *recvbuf, int recvcount, MPI_Datatype recvtype, int src, int recvtag, - MPI_Comm comm, MPI_Status * status); -XBT_PRIVATE int smpi_mpi_test(MPI_Request * request, MPI_Status * status); -XBT_PRIVATE int smpi_mpi_testany(int count, MPI_Request requests[], int *index, MPI_Status * status); -XBT_PRIVATE int smpi_mpi_testall(int count, MPI_Request requests[], MPI_Status status[]); -XBT_PRIVATE void smpi_mpi_probe(int source, int tag, MPI_Comm comm, MPI_Status* status); -XBT_PRIVATE void smpi_mpi_iprobe(int source, int tag, MPI_Comm comm, int* flag, MPI_Status* status); -XBT_PRIVATE int smpi_mpi_get_count(MPI_Status * status, MPI_Datatype datatype); -XBT_PRIVATE void smpi_mpi_wait(MPI_Request * request, MPI_Status * status); -XBT_PRIVATE int smpi_mpi_waitany(int count, MPI_Request requests[], MPI_Status * status); -XBT_PRIVATE int smpi_mpi_waitall(int count, MPI_Request requests[], MPI_Status status[]); -XBT_PRIVATE int smpi_mpi_waitsome(int incount, MPI_Request requests[], int *indices, MPI_Status status[]); -XBT_PRIVATE int smpi_mpi_testsome(int incount, MPI_Request requests[], int *indices, MPI_Status status[]); + XBT_PRIVATE void smpi_mpi_bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm); XBT_PRIVATE void smpi_mpi_barrier(MPI_Comm comm); XBT_PRIVATE void smpi_mpi_gather(void *sendbuf, int sendcount, MPI_Datatype sendtype, diff --git a/src/smpi/smpi_base.cpp b/src/smpi/smpi_base.cpp index 7201d37d2f..6efaa9b234 100644 --- a/src/smpi/smpi_base.cpp +++ b/src/smpi/smpi_base.cpp @@ -24,143 +24,11 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_base, smpi, "Logging specific to SMPI (base)"); -extern void (*smpi_comm_copy_data_callback) (smx_activity_t, void*, size_t); - - -static int match_recv(void* a, void* b, smx_activity_t ignored) { - MPI_Request ref = static_cast(a); - MPI_Request req = static_cast(b); - XBT_DEBUG("Trying to match a recv of src %d against %d, tag %d against %d",ref->src,req->src, ref->tag, req->tag); - - xbt_assert(ref, "Cannot match recv against null reference"); - xbt_assert(req, "Cannot match recv against null request"); - if((ref->src == MPI_ANY_SOURCE || req->src == ref->src) - && ((ref->tag == MPI_ANY_TAG && req->tag >=0) || req->tag == ref->tag)){ - //we match, we can transfer some values - if(ref->src == MPI_ANY_SOURCE) - ref->real_src = req->src; - if(ref->tag == MPI_ANY_TAG) - ref->real_tag = req->tag; - if(ref->real_size < req->real_size) - ref->truncated = 1; - if(req->detached==1) - ref->detached_sender=req; //tie the sender to the receiver, as it is detached and has to be freed in the receiver - XBT_DEBUG("match succeeded"); - return 1; - }else return 0; -} - -static int match_send(void* a, void* b,smx_activity_t ignored) { - MPI_Request ref = static_cast(a); - MPI_Request req = static_cast(b); - XBT_DEBUG("Trying to match a send of src %d against %d, tag %d against %d",ref->src,req->src, ref->tag, req->tag); - xbt_assert(ref, "Cannot match send against null reference"); - xbt_assert(req, "Cannot match send against null request"); - - if((req->src == MPI_ANY_SOURCE || req->src == ref->src) - && ((req->tag == MPI_ANY_TAG && ref->tag >=0)|| req->tag == ref->tag)){ - if(req->src == MPI_ANY_SOURCE) - req->real_src = ref->src; - if(req->tag == MPI_ANY_TAG) - req->real_tag = ref->tag; - if(req->real_size < ref->real_size) - req->truncated = 1; - if(ref->detached==1) - req->detached_sender=ref; //tie the sender to the receiver, as it is detached and has to be freed in the receiver - XBT_DEBUG("match succeeded"); - return 1; - } else - return 0; -} - -std::vector smpi_os_values; -std::vector smpi_or_values; -std::vector smpi_ois_values; static simgrid::config::Flag smpi_wtime_sleep( "smpi/wtime", "Minimum time to inject inside a call to MPI_Wtime", 0.0); static simgrid::config::Flag smpi_init_sleep( "smpi/init", "Time to inject inside a call to MPI_Init", 0.0); -static simgrid::config::Flag smpi_iprobe_sleep( - "smpi/iprobe", "Minimum time to inject inside a call to MPI_Iprobe", 1e-4); -static simgrid::config::Flag smpi_test_sleep( - "smpi/test", "Minimum time to inject inside a call to MPI_Test", 1e-4); - - -static double smpi_os(size_t size) -{ - if (smpi_os_values.empty()) { - smpi_os_values = parse_factor(xbt_cfg_get_string("smpi/os")); - } - double current=smpi_os_values.empty()?0.0:smpi_os_values[0].values[0]+smpi_os_values[0].values[1]*size; - // Iterate over all the sections that were specified and find the right - // value. (fact.factor represents the interval sizes; we want to find the - // section that has fact.factor <= size and no other such fact.factor <= size) - // Note: parse_factor() (used before) already sorts the vector we iterate over! - for (auto& fact : smpi_os_values) { - if (size <= fact.factor) { // Values already too large, use the previously computed value of current! - XBT_DEBUG("os : %zu <= %zu return %.10f", size, fact.factor, current); - return current; - }else{ - // If the next section is too large, the current section must be used. - // Hence, save the cost, as we might have to use it. - current = fact.values[0]+fact.values[1]*size; - } - } - XBT_DEBUG("Searching for smpi/os: %zu is larger than the largest boundary, return %.10f", size, current); - - return current; -} - -static double smpi_ois(size_t size) -{ - if (smpi_ois_values.empty()) { - smpi_ois_values = parse_factor(xbt_cfg_get_string("smpi/ois")); - } - double current=smpi_ois_values.empty()?0.0:smpi_ois_values[0].values[0]+smpi_ois_values[0].values[1]*size; - // Iterate over all the sections that were specified and find the right value. (fact.factor represents the interval - // sizes; we want to find the section that has fact.factor <= size and no other such fact.factor <= size) - // Note: parse_factor() (used before) already sorts the vector we iterate over! - for (auto& fact : smpi_ois_values) { - if (size <= fact.factor) { // Values already too large, use the previously computed value of current! - XBT_DEBUG("ois : %zu <= %zu return %.10f", size, fact.factor, current); - return current; - }else{ - // If the next section is too large, the current section must be used. - // Hence, save the cost, as we might have to use it. - current = fact.values[0]+fact.values[1]*size; - } - } - XBT_DEBUG("Searching for smpi/ois: %zu is larger than the largest boundary, return %.10f", size, current); - - return current; -} - -static double smpi_or(size_t size) -{ - if (smpi_or_values.empty()) { - smpi_or_values = parse_factor(xbt_cfg_get_string("smpi/or")); - } - - double current=smpi_or_values.empty()?0.0:smpi_or_values.front().values[0]+smpi_or_values.front().values[1]*size; - - // Iterate over all the sections that were specified and find the right value. (fact.factor represents the interval - // sizes; we want to find the section that has fact.factor <= size and no other such fact.factor <= size) - // Note: parse_factor() (used before) already sorts the vector we iterate over! - for (auto fact : smpi_or_values) { - if (size <= fact.factor) { // Values already too large, use the previously computed value of current! - XBT_DEBUG("or : %zu <= %zu return %.10f", size, fact.factor, current); - return current; - } else { - // If the next section is too large, the current section must be used. - // Hence, save the cost, as we might have to use it. - current=fact.values[0]+fact.values[1]*size; - } - } - XBT_DEBUG("smpi_or: %zu is larger than largest boundary, return %.10f", size, current); - - return current; -} void smpi_mpi_init() { if(smpi_init_sleep > 0) @@ -187,831 +55,6 @@ double smpi_mpi_wtime(){ return time; } -static MPI_Request build_request(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, MPI_Comm comm, - unsigned flags) -{ - MPI_Request request = nullptr; - - void *old_buf = nullptr; - - request = xbt_new(s_smpi_mpi_request_t, 1); - - s_smpi_subtype_t *subtype = static_cast(datatype->substruct); - - if((((flags & RECV) != 0) && ((flags & ACCUMULATE) !=0)) || (datatype->sizeof_substruct != 0)){ - // This part handles the problem of non-contiguous memory - old_buf = buf; - buf = count==0 ? nullptr : xbt_malloc(count*smpi_datatype_size(datatype)); - if ((datatype->sizeof_substruct != 0) && ((flags & SEND) != 0)) { - subtype->serialize(old_buf, buf, count, datatype->substruct); - } - } - - request->buf = buf; - // This part handles the problem of non-contiguous memory (for the unserialisation at the reception) - request->old_buf = old_buf; - request->old_type = datatype; - - request->size = smpi_datatype_size(datatype) * count; - smpi_datatype_use(datatype); - request->src = src; - request->dst = dst; - request->tag = tag; - request->comm = comm; - request->comm->use(); - request->action = nullptr; - request->flags = flags; - request->detached = 0; - request->detached_sender = nullptr; - request->real_src = 0; - request->truncated = 0; - request->real_size = 0; - request->real_tag = 0; - if (flags & PERSISTENT) - request->refcount = 1; - else - request->refcount = 0; - request->op = MPI_REPLACE; - request->send = 0; - request->recv = 0; - - return request; -} - -void smpi_empty_status(MPI_Status * status) -{ - if(status != MPI_STATUS_IGNORE) { - status->MPI_SOURCE = MPI_ANY_SOURCE; - status->MPI_TAG = MPI_ANY_TAG; - status->MPI_ERROR = MPI_SUCCESS; - status->count=0; - } -} - -static void smpi_mpi_request_free_voidp(void* request) -{ - MPI_Request req = static_cast(request); - smpi_mpi_request_free(&req); -} - -/* MPI Low level calls */ -MPI_Request smpi_mpi_send_init(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm) -{ - MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ - request = build_request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, smpi_process_index(), - comm->group()->index(dst), tag, comm, PERSISTENT | SEND | PREPARED); - return request; -} - -MPI_Request smpi_mpi_ssend_init(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm) -{ - MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ - request = build_request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, smpi_process_index(), - comm->group()->index(dst), tag, comm, PERSISTENT | SSEND | SEND | PREPARED); - return request; -} - -MPI_Request smpi_mpi_recv_init(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm) -{ - MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ - request = build_request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, - src == MPI_ANY_SOURCE ? MPI_ANY_SOURCE : comm->group()->index(src), - smpi_process_index(), tag, comm, PERSISTENT | RECV | PREPARED); - return request; -} - -void smpi_mpi_start(MPI_Request request) -{ - smx_mailbox_t mailbox; - - xbt_assert(request->action == nullptr, "Cannot (re-)start unfinished communication"); - request->flags &= ~PREPARED; - request->flags &= ~FINISHED; - request->refcount++; - - if ((request->flags & RECV) != 0) { - print_request("New recv", request); - - int async_small_thresh = xbt_cfg_get_int("smpi/async-small-thresh"); - - xbt_mutex_t mut = smpi_process_mailboxes_mutex(); - if (async_small_thresh != 0 || (request->flags & RMA) != 0) - xbt_mutex_acquire(mut); - - if (async_small_thresh == 0 && (request->flags & RMA) == 0 ) { - mailbox = smpi_process_mailbox(); - } - else if (((request->flags & RMA) != 0) || static_cast(request->size) < async_small_thresh) { - //We have to check both mailboxes (because SSEND messages are sent to the large mbox). - //begin with the more appropriate one : the small one. - mailbox = smpi_process_mailbox_small(); - XBT_DEBUG("Is there a corresponding send already posted in the small mailbox %p (in case of SSEND)?", mailbox); - smx_activity_t action = simcall_comm_iprobe(mailbox, 0, request->src,request->tag, &match_recv, - static_cast(request)); - - if (action == nullptr) { - mailbox = smpi_process_mailbox(); - XBT_DEBUG("No, nothing in the small mailbox test the other one : %p", mailbox); - action = simcall_comm_iprobe(mailbox, 0, request->src,request->tag, &match_recv, static_cast(request)); - if (action == nullptr) { - XBT_DEBUG("Still nothing, switch back to the small mailbox : %p", mailbox); - mailbox = smpi_process_mailbox_small(); - } - } else { - XBT_DEBUG("yes there was something for us in the large mailbox"); - } - } else { - mailbox = smpi_process_mailbox_small(); - XBT_DEBUG("Is there a corresponding send already posted the small mailbox?"); - smx_activity_t action = simcall_comm_iprobe(mailbox, 0, request->src,request->tag, &match_recv, (void*)request); - - if (action == nullptr) { - XBT_DEBUG("No, nothing in the permanent receive mailbox"); - mailbox = smpi_process_mailbox(); - } else { - XBT_DEBUG("yes there was something for us in the small mailbox"); - } - } - - // we make a copy here, as the size is modified by simix, and we may reuse the request in another receive later - request->real_size=request->size; - request->action = simcall_comm_irecv(SIMIX_process_self(), mailbox, request->buf, &request->real_size, &match_recv, - ! smpi_process_get_replaying()? smpi_comm_copy_data_callback - : &smpi_comm_null_copy_buffer_callback, request, -1.0); - XBT_DEBUG("recv simcall posted"); - - if (async_small_thresh != 0 || (request->flags & RMA) != 0 ) - xbt_mutex_release(mut); - } else { /* the RECV flag was not set, so this is a send */ - int receiver = request->dst; - - int rank = request->src; - if (TRACE_smpi_view_internals()) { - TRACE_smpi_send(rank, rank, receiver, request->tag, request->size); - } - print_request("New send", request); - - void* buf = request->buf; - if ((request->flags & SSEND) == 0 && ( (request->flags & RMA) != 0 - || static_cast(request->size) < xbt_cfg_get_int("smpi/send-is-detached-thresh") ) ) { - void *oldbuf = nullptr; - request->detached = 1; - XBT_DEBUG("Send request %p is detached", request); - request->refcount++; - if(request->old_type->sizeof_substruct == 0){ - oldbuf = request->buf; - if (!smpi_process_get_replaying() && oldbuf != nullptr && request->size!=0){ - if((smpi_privatize_global_variables != 0) - && (static_cast(request->buf) >= smpi_start_data_exe) - && (static_cast(request->buf) < smpi_start_data_exe + smpi_size_data_exe )){ - XBT_DEBUG("Privatization : We are sending from a zone inside global memory. Switch data segment "); - smpi_switch_data_segment(request->src); - } - buf = xbt_malloc(request->size); - memcpy(buf,oldbuf,request->size); - XBT_DEBUG("buf %p copied into %p",oldbuf,buf); - } - } - } - - //if we are giving back the control to the user without waiting for completion, we have to inject timings - double sleeptime = 0.0; - if(request->detached != 0 || ((request->flags & (ISEND|SSEND)) != 0)){// issend should be treated as isend - //isend and send timings may be different - sleeptime = ((request->flags & ISEND) != 0) ? smpi_ois(request->size) : smpi_os(request->size); - } - - if(sleeptime > 0.0){ - simcall_process_sleep(sleeptime); - XBT_DEBUG("sending size of %zu : sleep %f ", request->size, sleeptime); - } - - int async_small_thresh = xbt_cfg_get_int("smpi/async-small-thresh"); - - xbt_mutex_t mut=smpi_process_remote_mailboxes_mutex(receiver); - - if (async_small_thresh != 0 || (request->flags & RMA) != 0) - xbt_mutex_acquire(mut); - - if (!(async_small_thresh != 0 || (request->flags & RMA) !=0)) { - mailbox = smpi_process_remote_mailbox(receiver); - } else if (((request->flags & RMA) != 0) || static_cast(request->size) < async_small_thresh) { // eager mode - mailbox = smpi_process_remote_mailbox(receiver); - XBT_DEBUG("Is there a corresponding recv already posted in the large mailbox %p?", mailbox); - smx_activity_t action = simcall_comm_iprobe(mailbox, 1,request->dst, request->tag, &match_send, - static_cast(request)); - if (action == nullptr) { - if ((request->flags & SSEND) == 0){ - mailbox = smpi_process_remote_mailbox_small(receiver); - XBT_DEBUG("No, nothing in the large mailbox, message is to be sent on the small one %p", mailbox); - } else { - mailbox = smpi_process_remote_mailbox_small(receiver); - XBT_DEBUG("SSEND : Is there a corresponding recv already posted in the small mailbox %p?", mailbox); - action = simcall_comm_iprobe(mailbox, 1,request->dst, request->tag, &match_send, static_cast(request)); - if (action == nullptr) { - XBT_DEBUG("No, we are first, send to large mailbox"); - mailbox = smpi_process_remote_mailbox(receiver); - } - } - } else { - XBT_DEBUG("Yes there was something for us in the large mailbox"); - } - } else { - mailbox = smpi_process_remote_mailbox(receiver); - XBT_DEBUG("Send request %p is in the large mailbox %p (buf: %p)",mailbox, request,request->buf); - } - - // we make a copy here, as the size is modified by simix, and we may reuse the request in another receive later - request->real_size=request->size; - request->action = simcall_comm_isend(SIMIX_process_from_PID(request->src+1), mailbox, request->size, -1.0, - buf, request->real_size, &match_send, - &xbt_free_f, // how to free the userdata if a detached send fails - !smpi_process_get_replaying() ? smpi_comm_copy_data_callback - : &smpi_comm_null_copy_buffer_callback, request, - // detach if msg size < eager/rdv switch limit - request->detached); - XBT_DEBUG("send simcall posted"); - - /* FIXME: detached sends are not traceable (request->action == nullptr) */ - if (request->action != nullptr) - simcall_set_category(request->action, TRACE_internal_smpi_get_category()); - - if (async_small_thresh != 0 || ((request->flags & RMA)!=0)) - xbt_mutex_release(mut); - } -} - -void smpi_mpi_startall(int count, MPI_Request * requests) -{ - if(requests== nullptr) - return; - - for(int i = 0; i < count; i++) { - smpi_mpi_start(requests[i]); - } -} - -void smpi_mpi_request_free(MPI_Request * request) -{ - if((*request) != MPI_REQUEST_NULL){ - (*request)->refcount--; - if((*request)->refcount<0) xbt_die("wrong refcount"); - - if((*request)->refcount==0){ - smpi_datatype_unuse((*request)->old_type); - (*request)->comm->unuse(); - print_request("Destroying", (*request)); - xbt_free(*request); - *request = MPI_REQUEST_NULL; - }else{ - print_request("Decrementing", (*request)); - } - }else{ - xbt_die("freeing an already free request"); - } -} - -MPI_Request smpi_rma_send_init(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, MPI_Comm comm, - MPI_Op op) -{ - MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ - if(op==MPI_OP_NULL){ - request = build_request(buf==MPI_BOTTOM ? nullptr : buf , count, datatype, src, dst, tag, - comm, RMA | NON_PERSISTENT | ISEND | SEND | PREPARED); - }else{ - request = build_request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, src, dst, tag, - comm, RMA | NON_PERSISTENT | ISEND | SEND | PREPARED | ACCUMULATE); - request->op = op; - } - return request; -} - -MPI_Request smpi_rma_recv_init(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, MPI_Comm comm, - MPI_Op op) -{ - MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ - if(op==MPI_OP_NULL){ - request = build_request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, src, dst, tag, - comm, RMA | NON_PERSISTENT | RECV | PREPARED); - }else{ - request = build_request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, src, dst, tag, - comm, RMA | NON_PERSISTENT | RECV | PREPARED | ACCUMULATE); - request->op = op; - } - return request; -} - -MPI_Request smpi_isend_init(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm) -{ - MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ - request = build_request(buf==MPI_BOTTOM ? nullptr : buf , count, datatype, smpi_process_index(), - comm->group()->index(dst), tag,comm, PERSISTENT | ISEND | SEND | PREPARED); - return request; -} - -MPI_Request smpi_mpi_isend(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm) -{ - MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ - request = build_request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, smpi_process_index(), - comm->group()->index(dst), tag, comm, NON_PERSISTENT | ISEND | SEND); - smpi_mpi_start(request); - return request; -} - -MPI_Request smpi_mpi_issend(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm) -{ - MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ - request = build_request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, smpi_process_index(), - comm->group()->index(dst), tag,comm, NON_PERSISTENT | ISEND | SSEND | SEND); - smpi_mpi_start(request); - return request; -} - -MPI_Request smpi_irecv_init(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm) -{ - MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ - request = build_request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, src == MPI_ANY_SOURCE ? MPI_ANY_SOURCE : - comm->group()->index(src), smpi_process_index(), tag, - comm, PERSISTENT | RECV | PREPARED); - return request; -} - -MPI_Request smpi_mpi_irecv(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm) -{ - MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ - request = build_request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, src == MPI_ANY_SOURCE ? MPI_ANY_SOURCE : - comm->group()->index(src), smpi_process_index(), tag, comm, - NON_PERSISTENT | RECV); - smpi_mpi_start(request); - return request; -} - -void smpi_mpi_recv(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm, MPI_Status * status) -{ - MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ - request = smpi_mpi_irecv(buf, count, datatype, src, tag, comm); - smpi_mpi_wait(&request, status); - request = nullptr; -} - -void smpi_mpi_send(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm) -{ - MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ - request = build_request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, smpi_process_index(), - comm->group()->index(dst), tag, comm, NON_PERSISTENT | SEND); - - smpi_mpi_start(request); - smpi_mpi_wait(&request, MPI_STATUS_IGNORE); - request = nullptr; -} - -void smpi_mpi_ssend(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm) -{ - MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ - request = build_request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, smpi_process_index(), - comm->group()->index(dst), tag, comm, NON_PERSISTENT | SSEND | SEND); - - smpi_mpi_start(request); - smpi_mpi_wait(&request, MPI_STATUS_IGNORE); - request = nullptr; -} - -void smpi_mpi_sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,int dst, int sendtag, - void *recvbuf, int recvcount, MPI_Datatype recvtype, int src, int recvtag, - MPI_Comm comm, MPI_Status * status) -{ - MPI_Request requests[2]; - MPI_Status stats[2]; - int myid=smpi_process_index(); - if ((comm->group()->index(dst) == myid) && (comm->group()->index(src) == myid)){ - smpi_datatype_copy(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype); - return; - } - requests[0] = smpi_isend_init(sendbuf, sendcount, sendtype, dst, sendtag, comm); - requests[1] = smpi_irecv_init(recvbuf, recvcount, recvtype, src, recvtag, comm); - smpi_mpi_startall(2, requests); - smpi_mpi_waitall(2, requests, stats); - smpi_mpi_request_free(&requests[0]); - smpi_mpi_request_free(&requests[1]); - if(status != MPI_STATUS_IGNORE) { - // Copy receive status - *status = stats[1]; - } -} - -int smpi_mpi_get_count(MPI_Status * status, MPI_Datatype datatype) -{ - return status->count / smpi_datatype_size(datatype); -} - -static void finish_wait(MPI_Request * request, MPI_Status * status) -{ - MPI_Request req = *request; - smpi_empty_status(status); - - if(!((req->detached != 0) && ((req->flags & SEND) != 0)) && ((req->flags & PREPARED) == 0)){ - if(status != MPI_STATUS_IGNORE) { - int src = req->src == MPI_ANY_SOURCE ? req->real_src : req->src; - status->MPI_SOURCE = req->comm->group()->rank(src); - status->MPI_TAG = req->tag == MPI_ANY_TAG ? req->real_tag : req->tag; - status->MPI_ERROR = req->truncated != 0 ? MPI_ERR_TRUNCATE : MPI_SUCCESS; - // this handles the case were size in receive differs from size in send - status->count = req->real_size; - } - - print_request("Finishing", req); - MPI_Datatype datatype = req->old_type; - - if(((req->flags & ACCUMULATE) != 0) || (datatype->sizeof_substruct != 0)){ - if (!smpi_process_get_replaying()){ - if( smpi_privatize_global_variables != 0 && (static_cast(req->old_buf) >= smpi_start_data_exe) - && ((char*)req->old_buf < smpi_start_data_exe + smpi_size_data_exe )){ - XBT_VERB("Privatization : We are unserializing to a zone in global memory - Switch data segment "); - smpi_switch_data_segment(smpi_process_index()); - } - } - - if(datatype->sizeof_substruct != 0){ - // This part handles the problem of non-contignous memory the unserialization at the reception - s_smpi_subtype_t *subtype = static_cast(datatype->substruct); - if(req->flags & RECV) - subtype->unserialize(req->buf, req->old_buf, req->real_size/smpi_datatype_size(datatype) , - datatype->substruct, req->op); - xbt_free(req->buf); - }else if(req->flags & RECV){//apply op on contiguous buffer for accumulate - int n =req->real_size/smpi_datatype_size(datatype); - smpi_op_apply(req->op, req->buf, req->old_buf, &n, &datatype); - xbt_free(req->buf); - } - } - } - - if (TRACE_smpi_view_internals() && ((req->flags & RECV) != 0)){ - int rank = smpi_process_index(); - int src_traced = (req->src == MPI_ANY_SOURCE ? req->real_src : req->src); - TRACE_smpi_recv(rank, src_traced, rank,req->tag); - } - - if(req->detached_sender != nullptr){ - //integrate pseudo-timing for buffering of small messages, do not bother to execute the simcall if 0 - double sleeptime = smpi_or(req->real_size); - if(sleeptime > 0.0){ - simcall_process_sleep(sleeptime); - XBT_DEBUG("receiving size of %zu : sleep %f ", req->real_size, sleeptime); - } - smpi_mpi_request_free(&(req->detached_sender)); - } - if(req->flags & PERSISTENT) - req->action = nullptr; - req->flags |= FINISHED; - - smpi_mpi_request_free(request); -} - -int smpi_mpi_test(MPI_Request * request, MPI_Status * status) { - //assume that request is not MPI_REQUEST_NULL (filtered in PMPI_Test or smpi_mpi_testall before) - - // to avoid deadlocks if used as a break condition, such as - // while (MPI_Test(request, flag, status) && flag) { - // } - // because the time will not normally advance when only calls to MPI_Test are made -> deadlock - // multiplier to the sleeptime, to increase speed of execution, each failed test will increase it - static int nsleeps = 1; - if(smpi_test_sleep > 0) - simcall_process_sleep(nsleeps*smpi_test_sleep); - - smpi_empty_status(status); - int flag = 1; - if (((*request)->flags & PREPARED) == 0) { - if ((*request)->action != nullptr) - flag = simcall_comm_test((*request)->action); - if (flag) { - finish_wait(request, status); - nsleeps=1;//reset the number of sleeps we will do next time - if (*request != MPI_REQUEST_NULL && ((*request)->flags & PERSISTENT)==0) - *request = MPI_REQUEST_NULL; - } else if (xbt_cfg_get_boolean("smpi/grow-injected-times")){ - nsleeps++; - } - } - return flag; -} - -int smpi_mpi_testany(int count, MPI_Request requests[], int *index, MPI_Status * status) -{ - std::vector comms; - comms.reserve(count); - - int i; - int flag = 0; - - *index = MPI_UNDEFINED; - - std::vector map; /** Maps all matching comms back to their location in requests **/ - for(i = 0; i < count; i++) { - if ((requests[i] != MPI_REQUEST_NULL) && requests[i]->action && !(requests[i]->flags & PREPARED)) { - comms.push_back(requests[i]->action); - map.push_back(i); - } - } - if(!map.empty()) { - //multiplier to the sleeptime, to increase speed of execution, each failed testany will increase it - static int nsleeps = 1; - if(smpi_test_sleep > 0) - simcall_process_sleep(nsleeps*smpi_test_sleep); - - i = simcall_comm_testany(comms.data(), comms.size()); // The i-th element in comms matches! - if (i != -1) { // -1 is not MPI_UNDEFINED but a SIMIX return code. (nothing matches) - *index = map[i]; - finish_wait(&requests[*index], status); - flag = 1; - nsleeps = 1; - if (requests[*index] != MPI_REQUEST_NULL && (requests[*index]->flags & NON_PERSISTENT)) { - requests[*index] = MPI_REQUEST_NULL; - } - } else { - nsleeps++; - } - } else { - //all requests are null or inactive, return true - flag = 1; - smpi_empty_status(status); - } - - return flag; -} - -int smpi_mpi_testall(int count, MPI_Request requests[], MPI_Status status[]) -{ - MPI_Status stat; - MPI_Status *pstat = status == MPI_STATUSES_IGNORE ? MPI_STATUS_IGNORE : &stat; - int flag=1; - for(int i=0; iflags & PREPARED)) { - if (smpi_mpi_test(&requests[i], pstat)!=1){ - flag=0; - }else{ - requests[i]=MPI_REQUEST_NULL; - } - }else{ - smpi_empty_status(pstat); - } - if(status != MPI_STATUSES_IGNORE) { - status[i] = *pstat; - } - } - return flag; -} - -void smpi_mpi_probe(int source, int tag, MPI_Comm comm, MPI_Status* status){ - int flag=0; - //FIXME find another way to avoid busy waiting ? - // the issue here is that we have to wait on a nonexistent comm - while(flag==0){ - smpi_mpi_iprobe(source, tag, comm, &flag, status); - XBT_DEBUG("Busy Waiting on probing : %d", flag); - } -} - -void smpi_mpi_iprobe(int source, int tag, MPI_Comm comm, int* flag, MPI_Status* status){ - MPI_Request request = build_request(nullptr, 0, MPI_CHAR, source == MPI_ANY_SOURCE ? MPI_ANY_SOURCE : - comm->group()->index(source), comm->rank(), tag, comm, PERSISTENT | RECV); - - // to avoid deadlock, we have to sleep some time here, or the timer won't advance and we will only do iprobe simcalls - // (especially when used as a break condition, such as while(MPI_Iprobe(...)) ... ) - // nsleeps is a multiplier to the sleeptime, to increase speed of execution, each failed iprobe will increase it - // (This can speed up the execution of certain applications by an order of magnitude, such as HPL) - static int nsleeps = 1; - double speed = simgrid::s4u::Actor::self()->host()->speed(); - double maxrate = xbt_cfg_get_double("smpi/iprobe-cpu-usage"); - if (smpi_iprobe_sleep > 0) { - smx_activity_t iprobe_sleep = simcall_execution_start("iprobe", /* flops to executek*/nsleeps*smpi_iprobe_sleep*speed*maxrate, /* priority */1.0, /* performance bound */maxrate*speed); - simcall_execution_wait(iprobe_sleep); - } - // behave like a receive, but don't do it - smx_mailbox_t mailbox; - - print_request("New iprobe", request); - // We have to test both mailboxes as we don't know if we will receive one one or another - if (xbt_cfg_get_int("smpi/async-small-thresh") > 0){ - mailbox = smpi_process_mailbox_small(); - XBT_DEBUG("Trying to probe the perm recv mailbox"); - request->action = simcall_comm_iprobe(mailbox, 0, request->src, request->tag, &match_recv, - static_cast(request)); - } - - if (request->action == nullptr){ - mailbox = smpi_process_mailbox(); - XBT_DEBUG("trying to probe the other mailbox"); - request->action = simcall_comm_iprobe(mailbox, 0, request->src,request->tag, &match_recv, - static_cast(request)); - } - - if (request->action != nullptr){ - simgrid::kernel::activity::Comm *sync_comm = static_cast(request->action); - MPI_Request req = static_cast(sync_comm->src_data); - *flag = 1; - if(status != MPI_STATUS_IGNORE && (req->flags & PREPARED) == 0) { - status->MPI_SOURCE = comm->group()->rank(req->src); - status->MPI_TAG = req->tag; - status->MPI_ERROR = MPI_SUCCESS; - status->count = req->real_size; - } - nsleeps = 1;//reset the number of sleeps we will do next time - } - else { - *flag = 0; - if (xbt_cfg_get_boolean("smpi/grow-injected-times")) - nsleeps++; - } - smpi_mpi_request_free(&request); -} - -void smpi_mpi_wait(MPI_Request * request, MPI_Status * status) -{ - print_request("Waiting", *request); - if ((*request)->flags & PREPARED) { - smpi_empty_status(status); - return; - } - - if ((*request)->action != nullptr) - // this is not a detached send - simcall_comm_wait((*request)->action, -1.0); - - finish_wait(request, status); - if (*request != MPI_REQUEST_NULL && (((*request)->flags & NON_PERSISTENT)!=0)) - *request = MPI_REQUEST_NULL; -} - -static int sort_accumulates(MPI_Request a, MPI_Request b) -{ - return (a->tag < b->tag); -} - -int smpi_mpi_waitany(int count, MPI_Request requests[], MPI_Status * status) -{ - s_xbt_dynar_t comms; // Keep it on stack to save some extra mallocs - int i; - int size = 0; - int index = MPI_UNDEFINED; - int *map; - - if(count > 0) { - // Wait for a request to complete - xbt_dynar_init(&comms, sizeof(smx_activity_t), nullptr); - map = xbt_new(int, count); - XBT_DEBUG("Wait for one of %d", count); - for(i = 0; i < count; i++) { - if (requests[i] != MPI_REQUEST_NULL && !(requests[i]->flags & PREPARED) && !(requests[i]->flags & FINISHED)) { - if (requests[i]->action != nullptr) { - XBT_DEBUG("Waiting any %p ", requests[i]); - xbt_dynar_push(&comms, &requests[i]->action); - map[size] = i; - size++; - } else { - // This is a finished detached request, let's return this one - size = 0; // so we free the dynar but don't do the waitany call - index = i; - finish_wait(&requests[i], status); // cleanup if refcount = 0 - if (requests[i] != MPI_REQUEST_NULL && (requests[i]->flags & NON_PERSISTENT)) - requests[i] = MPI_REQUEST_NULL; // set to null - break; - } - } - } - if(size > 0) { - i = simcall_comm_waitany(&comms, -1); - - // not MPI_UNDEFINED, as this is a simix return code - if (i != -1) { - index = map[i]; - //in case of an accumulate, we have to wait the end of all requests to apply the operation, ordered correctly. - if ((requests[index] == MPI_REQUEST_NULL) - || (!((requests[index]->flags & ACCUMULATE) && (requests[index]->flags & RECV)))){ - finish_wait(&requests[index], status); - if (requests[i] != MPI_REQUEST_NULL && (requests[i]->flags & NON_PERSISTENT)) - requests[index] = MPI_REQUEST_NULL; - }else{ - XBT_WARN("huu?"); - } - } - } - - xbt_dynar_free_data(&comms); - xbt_free(map); - } - - if (index==MPI_UNDEFINED) - smpi_empty_status(status); - - return index; -} - -int smpi_mpi_waitall(int count, MPI_Request requests[], MPI_Status status[]) -{ - std::vector accumulates; - int index; - MPI_Status stat; - MPI_Status *pstat = status == MPI_STATUSES_IGNORE ? MPI_STATUS_IGNORE : &stat; - int retvalue = MPI_SUCCESS; - //tag invalid requests in the set - if (status != MPI_STATUSES_IGNORE) { - for (int c = 0; c < count; c++) { - if (requests[c] == MPI_REQUEST_NULL || requests[c]->dst == MPI_PROC_NULL || (requests[c]->flags & PREPARED)) { - smpi_empty_status(&status[c]); - } else if (requests[c]->src == MPI_PROC_NULL) { - smpi_empty_status(&status[c]); - status[c].MPI_SOURCE = MPI_PROC_NULL; - } - } - } - for (int c = 0; c < count; c++) { - if (MC_is_active() || MC_record_replay_is_active()) { - smpi_mpi_wait(&requests[c], pstat); - index = c; - } else { - index = smpi_mpi_waitany(count, requests, pstat); - if (index == MPI_UNDEFINED) - break; - - if (requests[index] != MPI_REQUEST_NULL - && (requests[index]->flags & RECV) - && (requests[index]->flags & ACCUMULATE)) - accumulates.push_back(requests[index]); - if (requests[index] != MPI_REQUEST_NULL && (requests[index]->flags & NON_PERSISTENT)) - requests[index] = MPI_REQUEST_NULL; - } - if (status != MPI_STATUSES_IGNORE) { - status[index] = *pstat; - if (status[index].MPI_ERROR == MPI_ERR_TRUNCATE) - retvalue = MPI_ERR_IN_STATUS; - } - } - - if (!accumulates.empty()) { - std::sort(accumulates.begin(), accumulates.end(), sort_accumulates); - for (auto req : accumulates) { - finish_wait(&req, status); - } - } - - return retvalue; -} - -int smpi_mpi_waitsome(int incount, MPI_Request requests[], int *indices, MPI_Status status[]) -{ - int i; - int count = 0; - int index; - MPI_Status stat; - MPI_Status *pstat = status == MPI_STATUSES_IGNORE ? MPI_STATUS_IGNORE : &stat; - - for(i = 0; i < incount; i++) - { - index=smpi_mpi_waitany(incount, requests, pstat); - if(index!=MPI_UNDEFINED){ - indices[count] = index; - count++; - if(status != MPI_STATUSES_IGNORE) { - status[index] = *pstat; - } - if (requests[index] != MPI_REQUEST_NULL && (requests[index]->flags & NON_PERSISTENT)) - requests[index]=MPI_REQUEST_NULL; - }else{ - return MPI_UNDEFINED; - } - } - return count; -} - -int smpi_mpi_testsome(int incount, MPI_Request requests[], int *indices, MPI_Status status[]) -{ - int i; - int count = 0; - int count_dead = 0; - MPI_Status stat; - MPI_Status *pstat = status == MPI_STATUSES_IGNORE ? MPI_STATUS_IGNORE : &stat; - - for(i = 0; i < incount; i++) { - if((requests[i] != MPI_REQUEST_NULL)) { - if(smpi_mpi_test(&requests[i], pstat)) { - indices[i] = 1; - count++; - if(status != MPI_STATUSES_IGNORE) { - status[i] = *pstat; - } - if ((requests[i] != MPI_REQUEST_NULL) && requests[i]->flags & NON_PERSISTENT) - requests[i]=MPI_REQUEST_NULL; - } - }else{ - count_dead++; - } - } - if(count_dead==incount) - return MPI_UNDEFINED; - else return count; -} void smpi_mpi_bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) { @@ -1034,7 +77,7 @@ void smpi_mpi_gather(void *sendbuf, int sendcount, MPI_Datatype sendtype, int size = comm->size(); if(rank != root) { // Send buffer to root - smpi_mpi_send(sendbuf, sendcount, sendtype, root, system_tag, comm); + Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm); } else { smpi_datatype_extent(recvtype, &lb, &recvext); // Local copy from root @@ -1045,16 +88,16 @@ void smpi_mpi_gather(void *sendbuf, int sendcount, MPI_Datatype sendtype, int index = 0; for (int src = 0; src < size; src++) { if(src != root) { - requests[index] = smpi_irecv_init(static_cast(recvbuf) + src * recvcount * recvext, recvcount, recvtype, + requests[index] = Request::irecv_init(static_cast(recvbuf) + src * recvcount * recvext, recvcount, recvtype, src, system_tag, comm); index++; } } // Wait for completion of irecv's. - smpi_mpi_startall(size - 1, requests); - smpi_mpi_waitall(size - 1, requests, MPI_STATUS_IGNORE); + Request::startall(size - 1, requests); + Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); for (int src = 0; src < size-1; src++) { - smpi_mpi_request_free(&requests[src]); + Request::unuse(&requests[src]); } xbt_free(requests); } @@ -1092,7 +135,7 @@ void smpi_mpi_gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void int size = comm->size(); if (rank != root) { // Send buffer to root - smpi_mpi_send(sendbuf, sendcount, sendtype, root, system_tag, comm); + Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm); } else { smpi_datatype_extent(recvtype, &lb, &recvext); // Local copy from root @@ -1103,16 +146,16 @@ void smpi_mpi_gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void int index = 0; for (int src = 0; src < size; src++) { if(src != root) { - requests[index] = smpi_irecv_init(static_cast(recvbuf) + displs[src] * recvext, + requests[index] = Request::irecv_init(static_cast(recvbuf) + displs[src] * recvext, recvcounts[src], recvtype, src, system_tag, comm); index++; } } // Wait for completion of irecv's. - smpi_mpi_startall(size - 1, requests); - smpi_mpi_waitall(size - 1, requests, MPI_STATUS_IGNORE); + Request::startall(size - 1, requests); + Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); for (int src = 0; src < size-1; src++) { - smpi_mpi_request_free(&requests[src]); + Request::unuse(&requests[src]); } xbt_free(requests); } @@ -1138,18 +181,18 @@ void smpi_mpi_allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype, int index = 0; for (int other = 0; other < size; other++) { if(other != rank) { - requests[index] = smpi_isend_init(sendbuf, sendcount, sendtype, other, system_tag,comm); + requests[index] = Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag,comm); index++; - requests[index] = smpi_irecv_init(static_cast(recvbuf) + other * recvcount * recvext, recvcount, recvtype, + requests[index] = Request::irecv_init(static_cast(recvbuf) + other * recvcount * recvext, recvcount, recvtype, other, system_tag, comm); index++; } } // Wait for completion of all comms. - smpi_mpi_startall(2 * (size - 1), requests); - smpi_mpi_waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE); + Request::startall(2 * (size - 1), requests); + Request::waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE); for (int other = 0; other < 2*(size-1); other++) { - smpi_mpi_request_free(&requests[other]); + Request::unuse(&requests[other]); } xbt_free(requests); } @@ -1173,18 +216,18 @@ void smpi_mpi_allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, vo for (int other = 0; other < size; other++) { if(other != rank) { requests[index] = - smpi_isend_init(sendbuf, sendcount, sendtype, other, system_tag, comm); + Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag, comm); index++; - requests[index] = smpi_irecv_init(static_cast(recvbuf) + displs[other] * recvext, recvcounts[other], + requests[index] = Request::irecv_init(static_cast(recvbuf) + displs[other] * recvext, recvcounts[other], recvtype, other, system_tag, comm); index++; } } // Wait for completion of all comms. - smpi_mpi_startall(2 * (size - 1), requests); - smpi_mpi_waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE); + Request::startall(2 * (size - 1), requests); + Request::waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE); for (int other = 0; other < 2*(size-1); other++) { - smpi_mpi_request_free(&requests[other]); + Request::unuse(&requests[other]); } xbt_free(requests); } @@ -1201,7 +244,7 @@ void smpi_mpi_scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype, int size = comm->size(); if(rank != root) { // Recv buffer from root - smpi_mpi_recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE); + Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE); } else { smpi_datatype_extent(sendtype, &lb, &sendext); // Local copy from root @@ -1214,16 +257,16 @@ void smpi_mpi_scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype, int index = 0; for(int dst = 0; dst < size; dst++) { if(dst != root) { - requests[index] = smpi_isend_init(static_cast(sendbuf) + dst * sendcount * sendext, sendcount, sendtype, + requests[index] = Request::isend_init(static_cast(sendbuf) + dst * sendcount * sendext, sendcount, sendtype, dst, system_tag, comm); index++; } } // Wait for completion of isend's. - smpi_mpi_startall(size - 1, requests); - smpi_mpi_waitall(size - 1, requests, MPI_STATUS_IGNORE); + Request::startall(size - 1, requests); + Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); for (int dst = 0; dst < size-1; dst++) { - smpi_mpi_request_free(&requests[dst]); + Request::unuse(&requests[dst]); } xbt_free(requests); } @@ -1240,7 +283,7 @@ void smpi_mpi_scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype int size = comm->size(); if(rank != root) { // Recv buffer from root - smpi_mpi_recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE); + Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE); } else { smpi_datatype_extent(sendtype, &lb, &sendext); // Local copy from root @@ -1253,16 +296,16 @@ void smpi_mpi_scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype int index = 0; for (int dst = 0; dst < size; dst++) { if (dst != root) { - requests[index] = smpi_isend_init(static_cast(sendbuf) + displs[dst] * sendext, sendcounts[dst], + requests[index] = Request::isend_init(static_cast(sendbuf) + displs[dst] * sendext, sendcounts[dst], sendtype, dst, system_tag, comm); index++; } } // Wait for completion of isend's. - smpi_mpi_startall(size - 1, requests); - smpi_mpi_waitall(size - 1, requests, MPI_STATUS_IGNORE); + Request::startall(size - 1, requests); + Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); for (int dst = 0; dst < size-1; dst++) { - smpi_mpi_request_free(&requests[dst]); + Request::unuse(&requests[dst]); } xbt_free(requests); } @@ -1292,7 +335,7 @@ void smpi_mpi_reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datat if(rank != root) { // Send buffer to root - smpi_mpi_send(sendtmpbuf, count, datatype, root, system_tag, comm); + Request::send(sendtmpbuf, count, datatype, root, system_tag, comm); } else { smpi_datatype_extent(datatype, &lb, &dataext); // Local copy from root @@ -1309,19 +352,19 @@ void smpi_mpi_reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datat else tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); requests[index] = - smpi_irecv_init(tmpbufs[index], count, datatype, src, system_tag, comm); + Request::irecv_init(tmpbufs[index], count, datatype, src, system_tag, comm); index++; } } // Wait for completion of irecv's. - smpi_mpi_startall(size - 1, requests); + Request::startall(size - 1, requests); for (int src = 0; src < size - 1; src++) { - index = smpi_mpi_waitany(size - 1, requests, MPI_STATUS_IGNORE); + index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE); XBT_DEBUG("finished waiting any request with index %d", index); if(index == MPI_UNDEFINED) { break; }else{ - smpi_mpi_request_free(&requests[index]); + Request::unuse(&requests[index]); } if(op) /* op can be MPI_OP_NULL that does nothing */ smpi_op_apply(op, tmpbufs[index], recvbuf, &count, &datatype); @@ -1364,19 +407,19 @@ void smpi_mpi_scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatyp int index = 0; for (int other = 0; other < rank; other++) { tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); - requests[index] = smpi_irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm); + requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm); index++; } for (int other = rank + 1; other < size; other++) { - requests[index] = smpi_isend_init(sendbuf, count, datatype, other, system_tag, comm); + requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm); index++; } // Wait for completion of all comms. - smpi_mpi_startall(size - 1, requests); + Request::startall(size - 1, requests); if(smpi_op_is_commute(op)){ for (int other = 0; other < size - 1; other++) { - index = smpi_mpi_waitany(size - 1, requests, MPI_STATUS_IGNORE); + index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE); if(index == MPI_UNDEFINED) { break; } @@ -1388,7 +431,7 @@ void smpi_mpi_scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatyp }else{ //non commutative case, wait in order for (int other = 0; other < size - 1; other++) { - smpi_mpi_wait(&(requests[other]), MPI_STATUS_IGNORE); + Request::wait(&(requests[other]), MPI_STATUS_IGNORE); if(index < rank) { smpi_op_apply(op, tmpbufs[other], recvbuf, &count, &datatype); } @@ -1398,7 +441,7 @@ void smpi_mpi_scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatyp smpi_free_tmp_buffer(tmpbufs[index]); } for(index = 0; index < size-1; index++) { - smpi_mpi_request_free(&requests[index]); + Request::unuse(&requests[index]); } xbt_free(tmpbufs); xbt_free(requests); @@ -1421,19 +464,19 @@ void smpi_mpi_exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datat int index = 0; for (int other = 0; other < rank; other++) { tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); - requests[index] = smpi_irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm); + requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm); index++; } for (int other = rank + 1; other < size; other++) { - requests[index] = smpi_isend_init(sendbuf, count, datatype, other, system_tag, comm); + requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm); index++; } // Wait for completion of all comms. - smpi_mpi_startall(size - 1, requests); + Request::startall(size - 1, requests); if(smpi_op_is_commute(op)){ for (int other = 0; other < size - 1; other++) { - index = smpi_mpi_waitany(size - 1, requests, MPI_STATUS_IGNORE); + index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE); if(index == MPI_UNDEFINED) { break; } @@ -1449,7 +492,7 @@ void smpi_mpi_exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datat }else{ //non commutative case, wait in order for (int other = 0; other < size - 1; other++) { - smpi_mpi_wait(&(requests[other]), MPI_STATUS_IGNORE); + Request::wait(&(requests[other]), MPI_STATUS_IGNORE); if(index < rank) { if (recvbuf_is_empty) { smpi_datatype_copy(tmpbufs[other], count, datatype, recvbuf, count, datatype); @@ -1463,8 +506,25 @@ void smpi_mpi_exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datat smpi_free_tmp_buffer(tmpbufs[index]); } for(index = 0; index < size-1; index++) { - smpi_mpi_request_free(&requests[index]); + Request::unuse(&requests[index]); } xbt_free(tmpbufs); xbt_free(requests); } + +void smpi_empty_status(MPI_Status * status) +{ + if(status != MPI_STATUS_IGNORE) { + status->MPI_SOURCE = MPI_ANY_SOURCE; + status->MPI_TAG = MPI_ANY_TAG; + status->MPI_ERROR = MPI_SUCCESS; + status->count=0; + } +} + +int smpi_mpi_get_count(MPI_Status * status, MPI_Datatype datatype) +{ + return status->count / smpi_datatype_size(datatype); +} + + diff --git a/src/smpi/smpi_coll.cpp b/src/smpi/smpi_coll.cpp index 1fbc744d2a..960af87bcd 100644 --- a/src/smpi/smpi_coll.cpp +++ b/src/smpi/smpi_coll.cpp @@ -169,7 +169,7 @@ int smpi_coll_tuned_alltoall_bruck(void *sendbuf, int sendcount, MPI_Datatype se /* Create all receives that will be posted first */ for (i = 0; i < size; ++i) { if (i != rank) { - requests[count] = smpi_irecv_init(static_cast(recvbuf) + i * recvcount * recvext, recvcount, + requests[count] = Request::irecv_init(static_cast(recvbuf) + i * recvcount * recvext, recvcount, recvtype, i, system_tag, comm); count++; }else{ @@ -179,7 +179,7 @@ int smpi_coll_tuned_alltoall_bruck(void *sendbuf, int sendcount, MPI_Datatype se /* Now create all sends */ for (i = 0; i < size; ++i) { if (i != rank) { - requests[count] = smpi_isend_init(static_cast(sendbuf) + i * sendcount * sendext, sendcount, + requests[count] = Request::isend_init(static_cast(sendbuf) + i * sendcount * sendext, sendcount, sendtype, i, system_tag, comm); count++; }else{ @@ -187,12 +187,12 @@ int smpi_coll_tuned_alltoall_bruck(void *sendbuf, int sendcount, MPI_Datatype se } } /* Wait for them all. */ - smpi_mpi_startall(count, requests); + Request::startall(count, requests); XBT_DEBUG("<%d> wait for %d requests", rank, count); - smpi_mpi_waitall(count, requests, MPI_STATUS_IGNORE); + Request::waitall(count, requests, MPI_STATUS_IGNORE); for(i = 0; i < count; i++) { if(requests[i]!=MPI_REQUEST_NULL) - smpi_mpi_request_free(&requests[i]); + Request::unuse(&requests[i]); } xbt_free(requests); } @@ -226,7 +226,7 @@ int smpi_coll_tuned_alltoall_basic_linear(void *sendbuf, int sendcount, MPI_Data /* Post all receives first -- a simple optimization */ count = 0; for (i = (rank + 1) % size; i != rank; i = (i + 1) % size) { - requests[count] = smpi_irecv_init(static_cast(recvbuf) + i * recvcount * recvext, recvcount, + requests[count] = Request::irecv_init(static_cast(recvbuf) + i * recvcount * recvext, recvcount, recvtype, i, system_tag, comm); count++; } @@ -236,17 +236,17 @@ int smpi_coll_tuned_alltoall_basic_linear(void *sendbuf, int sendcount, MPI_Data * TODO: check the previous assertion */ for (i = (rank + size - 1) % size; i != rank; i = (i + size - 1) % size) { - requests[count] = smpi_isend_init(static_cast(sendbuf) + i * sendcount * sendext, sendcount, + requests[count] = Request::isend_init(static_cast(sendbuf) + i * sendcount * sendext, sendcount, sendtype, i, system_tag, comm); count++; } /* Wait for them all. */ - smpi_mpi_startall(count, requests); + Request::startall(count, requests); XBT_DEBUG("<%d> wait for %d requests", rank, count); - smpi_mpi_waitall(count, requests, MPI_STATUS_IGNORE); + Request::waitall(count, requests, MPI_STATUS_IGNORE); for(i = 0; i < count; i++) { if(requests[i]!=MPI_REQUEST_NULL) - smpi_mpi_request_free(&requests[i]); + Request::unuse(&requests[i]); } xbt_free(requests); } @@ -280,7 +280,7 @@ int smpi_coll_basic_alltoallv(void *sendbuf, int *sendcounts, int *senddisps, MP /* Create all receives that will be posted first */ for (i = 0; i < size; ++i) { if (i != rank && recvcounts[i] != 0) { - requests[count] = smpi_irecv_init(static_cast(recvbuf) + recvdisps[i] * recvext, + requests[count] = Request::irecv_init(static_cast(recvbuf) + recvdisps[i] * recvext, recvcounts[i], recvtype, i, system_tag, comm); count++; }else{ @@ -290,7 +290,7 @@ int smpi_coll_basic_alltoallv(void *sendbuf, int *sendcounts, int *senddisps, MP /* Now create all sends */ for (i = 0; i < size; ++i) { if (i != rank && sendcounts[i] != 0) { - requests[count] = smpi_isend_init(static_cast(sendbuf) + senddisps[i] * sendext, + requests[count] = Request::isend_init(static_cast(sendbuf) + senddisps[i] * sendext, sendcounts[i], sendtype, i, system_tag, comm); count++; }else{ @@ -298,12 +298,12 @@ int smpi_coll_basic_alltoallv(void *sendbuf, int *sendcounts, int *senddisps, MP } } /* Wait for them all. */ - smpi_mpi_startall(count, requests); + Request::startall(count, requests); XBT_DEBUG("<%d> wait for %d requests", rank, count); - smpi_mpi_waitall(count, requests, MPI_STATUS_IGNORE); + Request::waitall(count, requests, MPI_STATUS_IGNORE); for(i = 0; i < count; i++) { if(requests[i]!=MPI_REQUEST_NULL) - smpi_mpi_request_free(&requests[i]); + Request::unuse(&requests[i]); } xbt_free(requests); } diff --git a/src/smpi/smpi_comm.cpp b/src/smpi/smpi_comm.cpp index 489af18bc0..5c094cd55c 100644 --- a/src/smpi/smpi_comm.cpp +++ b/src/smpi/smpi_comm.cpp @@ -256,14 +256,14 @@ MPI_Comm Comm::split(int color, int key) for (int j = 0; j < count; j++) { if(rankmap[2 * j] != 0) { group_snd[reqs]=new simgrid::smpi::Group(group_out); - requests[reqs] = smpi_mpi_isend(&(group_snd[reqs]), 1, MPI_PTR, rankmap[2 * j], system_tag, this); + requests[reqs] = Request::isend(&(group_snd[reqs]), 1, MPI_PTR, rankmap[2 * j], system_tag, this); reqs++; } } if(i != 0) { group_out->destroy(); } - smpi_mpi_waitall(reqs, requests, MPI_STATUS_IGNORE); + Request::waitall(reqs, requests, MPI_STATUS_IGNORE); xbt_free(requests); } } @@ -273,7 +273,7 @@ MPI_Comm Comm::split(int color, int key) group_out = group_root; /* exit with root's group */ } else { if(color != MPI_UNDEFINED) { - smpi_mpi_recv(&group_out, 1, MPI_PTR, 0, system_tag, this, MPI_STATUS_IGNORE); + Request::recv(&group_out, 1, MPI_PTR, 0, system_tag, this, MPI_STATUS_IGNORE); } /* otherwise, exit with group_out == nullptr */ } return group_out!=nullptr ? new simgrid::smpi::Comm(group_out, nullptr) : MPI_COMM_NULL; diff --git a/src/smpi/smpi_global.cpp b/src/smpi/smpi_global.cpp index 4190569e45..f67ffe0b86 100644 --- a/src/smpi/smpi_global.cpp +++ b/src/smpi/smpi_global.cpp @@ -384,12 +384,6 @@ int smpi_process_get_sampling() return data->sampling; } -void print_request(const char *message, MPI_Request request) -{ - XBT_VERB("%s request %p [buf = %p, size = %zu, src = %d, dst = %d, tag = %d, flags = %x]", - message, request, request->buf, request->size, request->src, request->dst, request->tag, request->flags); -} - void smpi_comm_set_copy_data_callback(void (*callback) (smx_activity_t, void*, size_t)) { smpi_comm_copy_data_callback = callback; diff --git a/src/smpi/smpi_pmpi.cpp b/src/smpi/smpi_pmpi.cpp index cffccf4e81..d032edff5f 100644 --- a/src/smpi/smpi_pmpi.cpp +++ b/src/smpi/smpi_pmpi.cpp @@ -606,7 +606,7 @@ int PMPI_Send_init(void *buf, int count, MPI_Datatype datatype, int dst, int tag } else if (dst == MPI_PROC_NULL) { retval = MPI_SUCCESS; } else { - *request = smpi_mpi_send_init(buf, count, datatype, dst, tag, comm); + *request = Request::send_init(buf, count, datatype, dst, tag, comm); retval = MPI_SUCCESS; } smpi_bench_begin(); @@ -629,7 +629,7 @@ int PMPI_Recv_init(void *buf, int count, MPI_Datatype datatype, int src, int tag } else if (src == MPI_PROC_NULL) { retval = MPI_SUCCESS; } else { - *request = smpi_mpi_recv_init(buf, count, datatype, src, tag, comm); + *request = Request::recv_init(buf, count, datatype, src, tag, comm); retval = MPI_SUCCESS; } smpi_bench_begin(); @@ -652,7 +652,7 @@ int PMPI_Ssend_init(void* buf, int count, MPI_Datatype datatype, int dst, int ta } else if (dst == MPI_PROC_NULL) { retval = MPI_SUCCESS; } else { - *request = smpi_mpi_ssend_init(buf, count, datatype, dst, tag, comm); + *request = Request::ssend_init(buf, count, datatype, dst, tag, comm); retval = MPI_SUCCESS; } smpi_bench_begin(); @@ -669,7 +669,7 @@ int PMPI_Start(MPI_Request * request) if (request == nullptr || *request == MPI_REQUEST_NULL) { retval = MPI_ERR_REQUEST; } else { - smpi_mpi_start(*request); + (*request)->start(); retval = MPI_SUCCESS; } smpi_bench_begin(); @@ -690,7 +690,7 @@ int PMPI_Startall(int count, MPI_Request * requests) } } if(retval != MPI_ERR_REQUEST) { - smpi_mpi_startall(count, requests); + Request::startall(count, requests); } } smpi_bench_begin(); @@ -705,7 +705,7 @@ int PMPI_Request_free(MPI_Request * request) if (*request == MPI_REQUEST_NULL) { retval = MPI_ERR_ARG; } else { - smpi_mpi_request_free(request); + Request::unuse(request); retval = MPI_SUCCESS; } smpi_bench_begin(); @@ -750,11 +750,10 @@ int PMPI_Irecv(void *buf, int count, MPI_Datatype datatype, int src, int tag, MP extra->send_size = count*dt_size_send; TRACE_smpi_ptp_in(rank, src_traced, rank, __FUNCTION__, extra); - *request = smpi_mpi_irecv(buf, count, datatype, src, tag, comm); + *request = Request::irecv(buf, count, datatype, src, tag, comm); retval = MPI_SUCCESS; TRACE_smpi_ptp_out(rank, src_traced, rank, __FUNCTION__); - (*request)->recv = 1; } smpi_bench_begin(); @@ -800,11 +799,10 @@ int PMPI_Isend(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MP TRACE_smpi_ptp_in(rank, rank, dst_traced, __FUNCTION__, extra); TRACE_smpi_send(rank, rank, dst_traced, tag, count*smpi_datatype_size(datatype)); - *request = smpi_mpi_isend(buf, count, datatype, dst, tag, comm); + *request = Request::isend(buf, count, datatype, dst, tag, comm); retval = MPI_SUCCESS; TRACE_smpi_ptp_out(rank, rank, dst_traced, __FUNCTION__); - (*request)->send = 1; } smpi_bench_begin(); @@ -849,11 +847,10 @@ int PMPI_Issend(void* buf, int count, MPI_Datatype datatype, int dst, int tag, M TRACE_smpi_ptp_in(rank, rank, dst_traced, __FUNCTION__, extra); TRACE_smpi_send(rank, rank, dst_traced, tag, count*smpi_datatype_size(datatype)); - *request = smpi_mpi_issend(buf, count, datatype, dst, tag, comm); + *request = Request::issend(buf, count, datatype, dst, tag, comm); retval = MPI_SUCCESS; TRACE_smpi_ptp_out(rank, rank, dst_traced, __FUNCTION__); - (*request)->send = 1; } smpi_bench_begin(); @@ -896,7 +893,7 @@ int PMPI_Recv(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI extra->send_size = count * dt_size_send; TRACE_smpi_ptp_in(rank, src_traced, rank, __FUNCTION__, extra); - smpi_mpi_recv(buf, count, datatype, src, tag, comm, status); + Request::recv(buf, count, datatype, src, tag, comm, status); retval = MPI_SUCCESS; // the src may not have been known at the beginning of the recv (MPI_ANY_SOURCE) @@ -950,7 +947,7 @@ int PMPI_Send(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI TRACE_smpi_send(rank, rank, dst_traced, tag,count*smpi_datatype_size(datatype)); } - smpi_mpi_send(buf, count, datatype, dst, tag, comm); + Request::send(buf, count, datatype, dst, tag, comm); retval = MPI_SUCCESS; TRACE_smpi_ptp_out(rank, rank, dst_traced, __FUNCTION__); @@ -994,7 +991,7 @@ int PMPI_Ssend(void* buf, int count, MPI_Datatype datatype, int dst, int tag, MP TRACE_smpi_ptp_in(rank, rank, dst_traced, __FUNCTION__, extra); TRACE_smpi_send(rank, rank, dst_traced, tag,count*smpi_datatype_size(datatype)); - smpi_mpi_ssend(buf, count, datatype, dst, tag, comm); + Request::ssend(buf, count, datatype, dst, tag, comm); retval = MPI_SUCCESS; TRACE_smpi_ptp_out(rank, rank, dst_traced, __FUNCTION__); @@ -1051,7 +1048,7 @@ int PMPI_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype, int dst, TRACE_smpi_ptp_in(rank, src_traced, dst_traced, __FUNCTION__, extra); TRACE_smpi_send(rank, rank, dst_traced, sendtag,sendcount*smpi_datatype_size(sendtype)); - smpi_mpi_sendrecv(sendbuf, sendcount, sendtype, dst, sendtag, recvbuf, recvcount, recvtype, src, recvtag, comm, + Request::sendrecv(sendbuf, sendcount, sendtype, dst, sendtag, recvbuf, recvcount, recvtype, src, recvtag, comm, status); retval = MPI_SUCCESS; @@ -1095,13 +1092,13 @@ int PMPI_Test(MPI_Request * request, int *flag, MPI_Status * status) smpi_empty_status(status); retval = MPI_SUCCESS; } else { - int rank = ((*request)->comm != MPI_COMM_NULL) ? smpi_process_index() : -1; + int rank = ((*request)->comm() != MPI_COMM_NULL) ? smpi_process_index() : -1; instr_extra_data extra = xbt_new0(s_instr_extra_data_t,1); extra->type = TRACING_TEST; TRACE_smpi_testing_in(rank, extra); - *flag = smpi_mpi_test(request, status); + *flag = Request::test(request,status); TRACE_smpi_testing_out(rank); retval = MPI_SUCCESS; @@ -1118,7 +1115,7 @@ int PMPI_Testany(int count, MPI_Request requests[], int *index, int *flag, MPI_S if (index == nullptr || flag == nullptr) { retval = MPI_ERR_ARG; } else { - *flag = smpi_mpi_testany(count, requests, index, status); + *flag = Request::testany(count, requests, index, status); retval = MPI_SUCCESS; } smpi_bench_begin(); @@ -1133,7 +1130,7 @@ int PMPI_Testall(int count, MPI_Request* requests, int* flag, MPI_Status* status if (flag == nullptr) { retval = MPI_ERR_ARG; } else { - *flag = smpi_mpi_testall(count, requests, statuses); + *flag = Request::testall(count, requests, statuses); retval = MPI_SUCCESS; } smpi_bench_begin(); @@ -1153,7 +1150,7 @@ int PMPI_Probe(int source, int tag, MPI_Comm comm, MPI_Status* status) { status->MPI_SOURCE = MPI_PROC_NULL; retval = MPI_SUCCESS; } else { - smpi_mpi_probe(source, tag, comm, status); + Request::probe(source, tag, comm, status); retval = MPI_SUCCESS; } smpi_bench_begin(); @@ -1174,7 +1171,7 @@ int PMPI_Iprobe(int source, int tag, MPI_Comm comm, int* flag, MPI_Status* statu status->MPI_SOURCE = MPI_PROC_NULL; retval = MPI_SUCCESS; } else { - smpi_mpi_iprobe(source, tag, comm, flag, status); + Request::iprobe(source, tag, comm, flag, status); retval = MPI_SUCCESS; } smpi_bench_begin(); @@ -1195,18 +1192,18 @@ int PMPI_Wait(MPI_Request * request, MPI_Status * status) retval = MPI_SUCCESS; } else { - int rank = (request!=nullptr && (*request)->comm != MPI_COMM_NULL) ? smpi_process_index() : -1; + int rank = (request!=nullptr && (*request)->comm() != MPI_COMM_NULL) ? smpi_process_index() : -1; - int src_traced = (*request)->src; - int dst_traced = (*request)->dst; - int tag_traced= (*request)->tag; - MPI_Comm comm = (*request)->comm; - int is_wait_for_receive = (*request)->recv; + int src_traced = (*request)->src(); + int dst_traced = (*request)->dst(); + int tag_traced= (*request)->tag(); + MPI_Comm comm = (*request)->comm(); + int is_wait_for_receive = ((*request)->flags() & RECV); instr_extra_data extra = xbt_new0(s_instr_extra_data_t,1); extra->type = TRACING_WAIT; TRACE_smpi_ptp_in(rank, src_traced, dst_traced, __FUNCTION__, extra); - smpi_mpi_wait(request, status); + Request::wait(request, status); retval = MPI_SUCCESS; //the src may not have been known at the beginning of the recv (MPI_ANY_SOURCE) @@ -1245,7 +1242,7 @@ int PMPI_Waitany(int count, MPI_Request requests[], int *index, MPI_Status * sta for (int i = 0; i < count; i++) { MPI_Request req = requests[i]; //already received requests are no longer valid if (req) { - savedvals[i]=(savedvalstype){req->src, req->dst, req->recv, req->tag, req->comm}; + savedvals[i]=(savedvalstype){req->src(), req->dst(), (req->flags() & RECV), req->tag(), req->comm()}; } } int rank_traced = smpi_process_index(); @@ -1254,7 +1251,7 @@ int PMPI_Waitany(int count, MPI_Request requests[], int *index, MPI_Status * sta extra->send_size=count; TRACE_smpi_ptp_in(rank_traced, -1, -1, __FUNCTION__,extra); - *index = smpi_mpi_waitany(count, requests, status); + *index = Request::waitany(count, requests, status); if(*index!=MPI_UNDEFINED){ int src_traced = savedvals[*index].src; @@ -1293,7 +1290,7 @@ int PMPI_Waitall(int count, MPI_Request requests[], MPI_Status status[]) for (int i = 0; i < count; i++) { MPI_Request req = requests[i]; if(req!=MPI_REQUEST_NULL){ - savedvals[i]=(savedvalstype){req->src, req->dst, req->recv, req->tag, 1, req->comm}; + savedvals[i]=(savedvalstype){req->src(), req->dst(), (req->flags() & RECV), req->tag(), 1, req->comm()}; }else{ savedvals[i].valid=0; } @@ -1304,7 +1301,7 @@ int PMPI_Waitall(int count, MPI_Request requests[], MPI_Status status[]) extra->send_size=count; TRACE_smpi_ptp_in(rank_traced, -1, -1, __FUNCTION__,extra); - int retval = smpi_mpi_waitall(count, requests, status); + int retval =Request::waitall(count, requests, status); for (int i = 0; i < count; i++) { if(savedvals[i].valid){ @@ -1335,7 +1332,7 @@ int PMPI_Waitsome(int incount, MPI_Request requests[], int *outcount, int *indic if (outcount == nullptr) { retval = MPI_ERR_ARG; } else { - *outcount = smpi_mpi_waitsome(incount, requests, indices, status); + *outcount = Request::waitsome(incount, requests, indices, status); retval = MPI_SUCCESS; } smpi_bench_begin(); @@ -1350,7 +1347,7 @@ int PMPI_Testsome(int incount, MPI_Request requests[], int* outcount, int* indic if (outcount == nullptr) { retval = MPI_ERR_ARG; } else { - *outcount = smpi_mpi_testsome(incount, requests, indices, status); + *outcount = Request::testsome(incount, requests, indices, status); retval = MPI_SUCCESS; } smpi_bench_begin(); diff --git a/src/smpi/smpi_replay.cpp b/src/smpi/smpi_replay.cpp index 6d7d878e8d..44720cf570 100644 --- a/src/smpi/smpi_replay.cpp +++ b/src/smpi/smpi_replay.cpp @@ -228,7 +228,7 @@ static void action_send(const char *const *action) if (!TRACE_smpi_view_internals()) TRACE_smpi_send(rank, rank, dst_traced, 0, size*smpi_datatype_size(MPI_CURRENT_TYPE)); - smpi_mpi_send(nullptr, size, MPI_CURRENT_TYPE, to , 0, MPI_COMM_WORLD); + Request::send(nullptr, size, MPI_CURRENT_TYPE, to , 0, MPI_COMM_WORLD); log_timed_action (action, clock); @@ -259,10 +259,9 @@ static void action_Isend(const char *const *action) if (!TRACE_smpi_view_internals()) TRACE_smpi_send(rank, rank, dst_traced, 0, size*smpi_datatype_size(MPI_CURRENT_TYPE)); - MPI_Request request = smpi_mpi_isend(nullptr, size, MPI_CURRENT_TYPE, to, 0,MPI_COMM_WORLD); + MPI_Request request = Request::isend(nullptr, size, MPI_CURRENT_TYPE, to, 0,MPI_COMM_WORLD); TRACE_smpi_ptp_out(rank, rank, dst_traced, __FUNCTION__); - request->send = 1; get_reqq_self()->push_back(request); @@ -294,11 +293,11 @@ static void action_recv(const char *const *action) { //unknown size from the receiver point of view if(size<=0.0){ - smpi_mpi_probe(from, 0, MPI_COMM_WORLD, &status); + Request::probe(from, 0, MPI_COMM_WORLD, &status); size=status.count; } - smpi_mpi_recv(nullptr, size, MPI_CURRENT_TYPE, from, 0, MPI_COMM_WORLD, &status); + Request::recv(nullptr, size, MPI_CURRENT_TYPE, from, 0, MPI_COMM_WORLD, &status); TRACE_smpi_ptp_out(rank, src_traced, rank, __FUNCTION__); if (!TRACE_smpi_view_internals()) { @@ -332,14 +331,13 @@ static void action_Irecv(const char *const *action) MPI_Status status; //unknow size from the receiver pov if(size<=0.0){ - smpi_mpi_probe(from, 0, MPI_COMM_WORLD, &status); + Request::probe(from, 0, MPI_COMM_WORLD, &status); size=status.count; } - MPI_Request request = smpi_mpi_irecv(nullptr, size, MPI_CURRENT_TYPE, from, 0, MPI_COMM_WORLD); + MPI_Request request = Request::irecv(nullptr, size, MPI_CURRENT_TYPE, from, 0, MPI_COMM_WORLD); TRACE_smpi_ptp_out(rank, src_traced, rank, __FUNCTION__); - request->recv = 1; get_reqq_self()->push_back(request); log_timed_action (action, clock); @@ -361,7 +359,7 @@ static void action_test(const char *const *action){ extra->type=TRACING_TEST; TRACE_smpi_testing_in(rank, extra); - int flag = smpi_mpi_test(&request, &status); + int flag = Request::test(&request, &status); XBT_DEBUG("MPI_Test result: %d", flag); /* push back request in vector to be caught by a subsequent wait. if the test did succeed, the request is now nullptr.*/ @@ -387,17 +385,17 @@ static void action_wait(const char *const *action){ return; } - int rank = request->comm != MPI_COMM_NULL ? request->comm->rank() : -1; + int rank = request->comm() != MPI_COMM_NULL ? request->comm()->rank() : -1; - MPI_Group group = request->comm->group(); - int src_traced = group->rank(request->src); - int dst_traced = group->rank(request->dst); - int is_wait_for_receive = request->recv; + MPI_Group group = request->comm()->group(); + int src_traced = group->rank(request->src()); + int dst_traced = group->rank(request->dst()); + int is_wait_for_receive = (request->flags() & RECV); instr_extra_data extra = xbt_new0(s_instr_extra_data_t,1); extra->type = TRACING_WAIT; TRACE_smpi_ptp_in(rank, src_traced, dst_traced, __FUNCTION__, extra); - smpi_mpi_wait(&request, &status); + Request::wait(&request, &status); TRACE_smpi_ptp_out(rank, src_traced, dst_traced, __FUNCTION__); if (is_wait_for_receive) @@ -422,14 +420,14 @@ static void action_waitall(const char *const *action){ int recvs_rcv[count_requests]; unsigned int i=0; for (auto req : *(get_reqq_self())){ - if (req && req->recv){ - recvs_snd[i]=req->src; - recvs_rcv[i]=req->dst; + if (req && (req->flags () & RECV)){ + recvs_snd[i]=req->src(); + recvs_rcv[i]=req->dst(); }else recvs_snd[i]=-100; i++; } - smpi_mpi_waitall(count_requests, &(*get_reqq_self())[0], status); + Request::waitall(count_requests, &(*get_reqq_self())[0], status); for (i=0; i +#include + +#include "private.h" +#include "xbt/replay.h" +#include "mc/mc.h" +#include "src/mc/mc_replay.h" +#include "src/simix/smx_private.h" +#include "simgrid/sg_config.h" +#include "smpi/smpi_utils.hpp" +#include + +#include "src/kernel/activity/SynchroComm.hpp" + +XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_request, smpi, "Logging specific to SMPI (reques)"); + +static simgrid::config::Flag smpi_iprobe_sleep( + "smpi/iprobe", "Minimum time to inject inside a call to MPI_Iprobe", 1e-4); +static simgrid::config::Flag smpi_test_sleep( + "smpi/test", "Minimum time to inject inside a call to MPI_Test", 1e-4); + +std::vector smpi_os_values; +std::vector smpi_or_values; +std::vector smpi_ois_values; + +extern void (*smpi_comm_copy_data_callback) (smx_activity_t, void*, size_t); + +static double smpi_os(size_t size) +{ + if (smpi_os_values.empty()) { + smpi_os_values = parse_factor(xbt_cfg_get_string("smpi/os")); + } + double current=smpi_os_values.empty()?0.0:smpi_os_values[0].values[0]+smpi_os_values[0].values[1]*size; + // Iterate over all the sections that were specified and find the right + // value. (fact.factor represents the interval sizes; we want to find the + // section that has fact.factor <= size and no other such fact.factor <= size) + // Note: parse_factor() (used before) already sorts the vector we iterate over! + for (auto& fact : smpi_os_values) { + if (size <= fact.factor) { // Values already too large, use the previously computed value of current! + XBT_DEBUG("os : %zu <= %zu return %.10f", size, fact.factor, current); + return current; + }else{ + // If the next section is too large, the current section must be used. + // Hence, save the cost, as we might have to use it. + current = fact.values[0]+fact.values[1]*size; + } + } + XBT_DEBUG("Searching for smpi/os: %zu is larger than the largest boundary, return %.10f", size, current); + + return current; +} + +static double smpi_ois(size_t size) +{ + if (smpi_ois_values.empty()) { + smpi_ois_values = parse_factor(xbt_cfg_get_string("smpi/ois")); + } + double current=smpi_ois_values.empty()?0.0:smpi_ois_values[0].values[0]+smpi_ois_values[0].values[1]*size; + // Iterate over all the sections that were specified and find the right value. (fact.factor represents the interval + // sizes; we want to find the section that has fact.factor <= size and no other such fact.factor <= size) + // Note: parse_factor() (used before) already sorts the vector we iterate over! + for (auto& fact : smpi_ois_values) { + if (size <= fact.factor) { // Values already too large, use the previously computed value of current! + XBT_DEBUG("ois : %zu <= %zu return %.10f", size, fact.factor, current); + return current; + }else{ + // If the next section is too large, the current section must be used. + // Hence, save the cost, as we might have to use it. + current = fact.values[0]+fact.values[1]*size; + } + } + XBT_DEBUG("Searching for smpi/ois: %zu is larger than the largest boundary, return %.10f", size, current); + + return current; +} + +static double smpi_or(size_t size) +{ + if (smpi_or_values.empty()) { + smpi_or_values = parse_factor(xbt_cfg_get_string("smpi/or")); + } + + double current=smpi_or_values.empty()?0.0:smpi_or_values.front().values[0]+smpi_or_values.front().values[1]*size; + + // Iterate over all the sections that were specified and find the right value. (fact.factor represents the interval + // sizes; we want to find the section that has fact.factor <= size and no other such fact.factor <= size) + // Note: parse_factor() (used before) already sorts the vector we iterate over! + for (auto fact : smpi_or_values) { + if (size <= fact.factor) { // Values already too large, use the previously computed value of current! + XBT_DEBUG("or : %zu <= %zu return %.10f", size, fact.factor, current); + return current; + } else { + // If the next section is too large, the current section must be used. + // Hence, save the cost, as we might have to use it. + current=fact.values[0]+fact.values[1]*size; + } + } + XBT_DEBUG("smpi_or: %zu is larger than largest boundary, return %.10f", size, current); + + return current; +} + + +namespace simgrid{ +namespace smpi{ + +Request::Request(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, MPI_Comm comm, unsigned flags) : src_(src), dst_(dst), tag_(tag), comm_(comm), flags_(flags) +{ + void *old_buf = nullptr; + s_smpi_subtype_t *subtype = static_cast(datatype->substruct); + + if((((flags & RECV) != 0) && ((flags & ACCUMULATE) !=0)) || (datatype->sizeof_substruct != 0)){ + // This part handles the problem of non-contiguous memory + old_buf = buf; + buf = count==0 ? nullptr : xbt_malloc(count*smpi_datatype_size(datatype)); + if ((datatype->sizeof_substruct != 0) && ((flags & SEND) != 0)) { + subtype->serialize(old_buf, buf, count, datatype->substruct); + } + } + buf_ = buf; + // This part handles the problem of non-contiguous memory (for the unserialisation at the reception) + old_buf_ = old_buf; + old_type_ = datatype; + size_ = smpi_datatype_size(datatype) * count; + smpi_datatype_use(datatype); + comm_->use(); + action_ = nullptr; + detached_ = 0; + detached_sender_ = nullptr; + real_src_ = 0; + truncated_ = 0; + real_size_ = 0; + real_tag_ = 0; + if (flags & PERSISTENT) + refcount_ = 1; + else + refcount_ = 0; + op_ = MPI_REPLACE; +} + + +//Request::destroy(void* request) +//{ +// MPI_Request req = static_cast(request); +// delete(req); +//} + +MPI_Comm Request::comm(){ + return comm_; +} + +int Request::src(){ + return src_; +} + +int Request::dst(){ + return dst_; +} + +int Request::tag(){ + return tag_; +} + +int Request::flags(){ + return flags_; +} + +void Request::unuse(MPI_Request* request) +{ + if((*request) != MPI_REQUEST_NULL){ + (*request)->refcount_--; + if((*request)->refcount_<0) xbt_die("wrong refcount"); + + if((*request)->refcount_==0){ + smpi_datatype_unuse((*request)->old_type_); + (*request)->comm_->unuse(); + (*request)->print_request("Destroying"); + delete *request; + *request = MPI_REQUEST_NULL; + }else{ + (*request)->print_request("Decrementing"); + } + }else{ + xbt_die("freeing an already free request"); + } +} + + +int Request::match_recv(void* a, void* b, smx_activity_t ignored) { + MPI_Request ref = static_cast(a); + MPI_Request req = static_cast(b); + XBT_DEBUG("Trying to match a recv of src %d against %d, tag %d against %d",ref->src_,req->src_, ref->tag_, req->tag_); + + xbt_assert(ref, "Cannot match recv against null reference"); + xbt_assert(req, "Cannot match recv against null request"); + if((ref->src_ == MPI_ANY_SOURCE || req->src_ == ref->src_) + && ((ref->tag_ == MPI_ANY_TAG && req->tag_ >=0) || req->tag_ == ref->tag_)){ + //we match, we can transfer some values + if(ref->src_ == MPI_ANY_SOURCE) + ref->real_src_ = req->src_; + if(ref->tag_ == MPI_ANY_TAG) + ref->real_tag_ = req->tag_; + if(ref->real_size_ < req->real_size_) + ref->truncated_ = 1; + if(req->detached_==1) + ref->detached_sender_=req; //tie the sender to the receiver, as it is detached and has to be freed in the receiver + XBT_DEBUG("match succeeded"); + return 1; + }else return 0; +} + +int Request::match_send(void* a, void* b,smx_activity_t ignored) { + MPI_Request ref = static_cast(a); + MPI_Request req = static_cast(b); + XBT_DEBUG("Trying to match a send of src %d against %d, tag %d against %d",ref->src_,req->src_, ref->tag_, req->tag_); + xbt_assert(ref, "Cannot match send against null reference"); + xbt_assert(req, "Cannot match send against null request"); + + if((req->src_ == MPI_ANY_SOURCE || req->src_ == ref->src_) + && ((req->tag_ == MPI_ANY_TAG && ref->tag_ >=0)|| req->tag_ == ref->tag_)){ + if(req->src_ == MPI_ANY_SOURCE) + req->real_src_ = ref->src_; + if(req->tag_ == MPI_ANY_TAG) + req->real_tag_ = ref->tag_; + if(req->real_size_ < ref->real_size_) + req->truncated_ = 1; + if(ref->detached_==1) + req->detached_sender_=ref; //tie the sender to the receiver, as it is detached and has to be freed in the receiver + XBT_DEBUG("match succeeded"); + return 1; + } else + return 0; +} + +void Request::print_request(const char *message) +{ + XBT_VERB("%s request %p [buf = %p, size = %zu, src = %d, dst = %d, tag = %d, flags = %x]", + message, this, buf_, size_, src_, dst_, tag_, flags_); +} + + +/* factories, to hide the internal flags from the caller */ +MPI_Request Request::send_init(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm) +{ + MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ + request = new Request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, smpi_process_index(), + comm->group()->index(dst), tag, comm, PERSISTENT | SEND | PREPARED); + return request; +} + +MPI_Request Request::ssend_init(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm) +{ + MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ + request = new Request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, smpi_process_index(), + comm->group()->index(dst), tag, comm, PERSISTENT | SSEND | SEND | PREPARED); + return request; +} + +MPI_Request Request::isend_init(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm) +{ + MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ + request = new Request(buf==MPI_BOTTOM ? nullptr : buf , count, datatype, smpi_process_index(), + comm->group()->index(dst), tag,comm, PERSISTENT | ISEND | SEND | PREPARED); + return request; +} + + +MPI_Request Request::rma_send_init(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, MPI_Comm comm, + MPI_Op op) +{ + MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ + if(op==MPI_OP_NULL){ + request = new Request(buf==MPI_BOTTOM ? nullptr : buf , count, datatype, src, dst, tag, + comm, RMA | NON_PERSISTENT | ISEND | SEND | PREPARED); + }else{ + request = new Request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, src, dst, tag, + comm, RMA | NON_PERSISTENT | ISEND | SEND | PREPARED | ACCUMULATE); + request->op_ = op; + } + return request; +} + +MPI_Request Request::recv_init(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm) +{ + MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ + request = new Request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, + src == MPI_ANY_SOURCE ? MPI_ANY_SOURCE : comm->group()->index(src), + smpi_process_index(), tag, comm, PERSISTENT | RECV | PREPARED); + return request; +} + +MPI_Request Request::rma_recv_init(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, MPI_Comm comm, + MPI_Op op) +{ + MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ + if(op==MPI_OP_NULL){ + request = new Request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, src, dst, tag, + comm, RMA | NON_PERSISTENT | RECV | PREPARED); + }else{ + request = new Request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, src, dst, tag, + comm, RMA | NON_PERSISTENT | RECV | PREPARED | ACCUMULATE); + request->op_ = op; + } + return request; +} + +MPI_Request Request::irecv_init(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm) +{ + MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ + request = new Request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, src == MPI_ANY_SOURCE ? MPI_ANY_SOURCE : + comm->group()->index(src), smpi_process_index(), tag, + comm, PERSISTENT | RECV | PREPARED); + return request; +} + +MPI_Request Request::isend(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm) +{ + MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ + request = new Request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, smpi_process_index(), + comm->group()->index(dst), tag, comm, NON_PERSISTENT | ISEND | SEND); + request->start(); + return request; +} + +MPI_Request Request::issend(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm) +{ + MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ + request = new Request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, smpi_process_index(), + comm->group()->index(dst), tag,comm, NON_PERSISTENT | ISEND | SSEND | SEND); + request->start(); + return request; +} + + +MPI_Request Request::irecv(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm) +{ + MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ + request = new Request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, src == MPI_ANY_SOURCE ? MPI_ANY_SOURCE : + comm->group()->index(src), smpi_process_index(), tag, comm, + NON_PERSISTENT | RECV); + request->start(); + return request; +} + +void Request::recv(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm, MPI_Status * status) +{ + MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ + request = irecv(buf, count, datatype, src, tag, comm); + wait(&request,status); + request = nullptr; +} + +void Request::send(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm) +{ + MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ + request = new Request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, smpi_process_index(), + comm->group()->index(dst), tag, comm, NON_PERSISTENT | SEND); + + request->start(); + wait(&request, MPI_STATUS_IGNORE); + request = nullptr; +} + +void Request::ssend(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm) +{ + MPI_Request request = nullptr; /* MC needs the comm to be set to nullptr during the call */ + request = new Request(buf==MPI_BOTTOM ? nullptr : buf, count, datatype, smpi_process_index(), + comm->group()->index(dst), tag, comm, NON_PERSISTENT | SSEND | SEND); + + request->start(); + wait(&request,MPI_STATUS_IGNORE); + request = nullptr; +} + + + +void Request::sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,int dst, int sendtag, + void *recvbuf, int recvcount, MPI_Datatype recvtype, int src, int recvtag, + MPI_Comm comm, MPI_Status * status) +{ + MPI_Request requests[2]; + MPI_Status stats[2]; + int myid=smpi_process_index(); + if ((comm->group()->index(dst) == myid) && (comm->group()->index(src) == myid)){ + smpi_datatype_copy(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype); + return; + } + requests[0] = isend_init(sendbuf, sendcount, sendtype, dst, sendtag, comm); + requests[1] = irecv_init(recvbuf, recvcount, recvtype, src, recvtag, comm); + startall(2, requests); + waitall(2, requests, stats); + unuse(&requests[0]); + unuse(&requests[1]); + if(status != MPI_STATUS_IGNORE) { + // Copy receive status + *status = stats[1]; + } +} + + + +void Request::start() +{ + smx_mailbox_t mailbox; + + xbt_assert(action_ == nullptr, "Cannot (re-)start unfinished communication"); + flags_ &= ~PREPARED; + flags_ &= ~FINISHED; + refcount_++; + + if ((flags_ & RECV) != 0) { + this->print_request("New recv"); + + int async_small_thresh = xbt_cfg_get_int("smpi/async-small-thresh"); + + xbt_mutex_t mut = smpi_process_mailboxes_mutex(); + if (async_small_thresh != 0 || (flags_ & RMA) != 0) + xbt_mutex_acquire(mut); + + if (async_small_thresh == 0 && (flags_ & RMA) == 0 ) { + mailbox = smpi_process_mailbox(); + } + else if (((flags_ & RMA) != 0) || static_cast(size_) < async_small_thresh) { + //We have to check both mailboxes (because SSEND messages are sent to the large mbox). + //begin with the more appropriate one : the small one. + mailbox = smpi_process_mailbox_small(); + XBT_DEBUG("Is there a corresponding send already posted in the small mailbox %p (in case of SSEND)?", mailbox); + smx_activity_t action = simcall_comm_iprobe(mailbox, 0, src_,tag_, &match_recv, + static_cast(this)); + + if (action == nullptr) { + mailbox = smpi_process_mailbox(); + XBT_DEBUG("No, nothing in the small mailbox test the other one : %p", mailbox); + action = simcall_comm_iprobe(mailbox, 0, src_,tag_, &match_recv, static_cast(this)); + if (action == nullptr) { + XBT_DEBUG("Still nothing, switch back to the small mailbox : %p", mailbox); + mailbox = smpi_process_mailbox_small(); + } + } else { + XBT_DEBUG("yes there was something for us in the large mailbox"); + } + } else { + mailbox = smpi_process_mailbox_small(); + XBT_DEBUG("Is there a corresponding send already posted the small mailbox?"); + smx_activity_t action = simcall_comm_iprobe(mailbox, 0, src_,tag_, &match_recv, static_cast(this)); + + if (action == nullptr) { + XBT_DEBUG("No, nothing in the permanent receive mailbox"); + mailbox = smpi_process_mailbox(); + } else { + XBT_DEBUG("yes there was something for us in the small mailbox"); + } + } + + // we make a copy here, as the size is modified by simix, and we may reuse the request in another receive later + real_size_=size_; + action_ = simcall_comm_irecv(SIMIX_process_self(), mailbox, buf_, &real_size_, &match_recv, + ! smpi_process_get_replaying()? smpi_comm_copy_data_callback + : &smpi_comm_null_copy_buffer_callback, this, -1.0); + XBT_DEBUG("recv simcall posted"); + + if (async_small_thresh != 0 || (flags_ & RMA) != 0 ) + xbt_mutex_release(mut); + } else { /* the RECV flag was not set, so this is a send */ + int receiver = dst_; + + int rank = src_; + if (TRACE_smpi_view_internals()) { + TRACE_smpi_send(rank, rank, receiver, tag_, size_); + } + this->print_request("New send"); + + void* buf = buf_; + if ((flags_ & SSEND) == 0 && ( (flags_ & RMA) != 0 + || static_cast(size_) < xbt_cfg_get_int("smpi/send-is-detached-thresh") ) ) { + void *oldbuf = nullptr; + detached_ = 1; + XBT_DEBUG("Send request %p is detached", this); + refcount_++; + if(old_type_->sizeof_substruct == 0){ + oldbuf = buf_; + if (!smpi_process_get_replaying() && oldbuf != nullptr && size_!=0){ + if((smpi_privatize_global_variables != 0) + && (static_cast(buf_) >= smpi_start_data_exe) + && (static_cast(buf_) < smpi_start_data_exe + smpi_size_data_exe )){ + XBT_DEBUG("Privatization : We are sending from a zone inside global memory. Switch data segment "); + smpi_switch_data_segment(src_); + } + buf = xbt_malloc(size_); + memcpy(buf,oldbuf,size_); + XBT_DEBUG("buf %p copied into %p",oldbuf,buf); + } + } + } + + //if we are giving back the control to the user without waiting for completion, we have to inject timings + double sleeptime = 0.0; + if(detached_ != 0 || ((flags_ & (ISEND|SSEND)) != 0)){// issend should be treated as isend + //isend and send timings may be different + sleeptime = ((flags_ & ISEND) != 0) ? smpi_ois(size_) : smpi_os(size_); + } + + if(sleeptime > 0.0){ + simcall_process_sleep(sleeptime); + XBT_DEBUG("sending size of %zu : sleep %f ", size_, sleeptime); + } + + int async_small_thresh = xbt_cfg_get_int("smpi/async-small-thresh"); + + xbt_mutex_t mut=smpi_process_remote_mailboxes_mutex(receiver); + + if (async_small_thresh != 0 || (flags_ & RMA) != 0) + xbt_mutex_acquire(mut); + + if (!(async_small_thresh != 0 || (flags_ & RMA) !=0)) { + mailbox = smpi_process_remote_mailbox(receiver); + } else if (((flags_ & RMA) != 0) || static_cast(size_) < async_small_thresh) { // eager mode + mailbox = smpi_process_remote_mailbox(receiver); + XBT_DEBUG("Is there a corresponding recv already posted in the large mailbox %p?", mailbox); + smx_activity_t action = simcall_comm_iprobe(mailbox, 1,dst_, tag_, &match_send, + static_cast(this)); + if (action == nullptr) { + if ((flags_ & SSEND) == 0){ + mailbox = smpi_process_remote_mailbox_small(receiver); + XBT_DEBUG("No, nothing in the large mailbox, message is to be sent on the small one %p", mailbox); + } else { + mailbox = smpi_process_remote_mailbox_small(receiver); + XBT_DEBUG("SSEND : Is there a corresponding recv already posted in the small mailbox %p?", mailbox); + action = simcall_comm_iprobe(mailbox, 1,dst_, tag_, &match_send, static_cast(this)); + if (action == nullptr) { + XBT_DEBUG("No, we are first, send to large mailbox"); + mailbox = smpi_process_remote_mailbox(receiver); + } + } + } else { + XBT_DEBUG("Yes there was something for us in the large mailbox"); + } + } else { + mailbox = smpi_process_remote_mailbox(receiver); + XBT_DEBUG("Send request %p is in the large mailbox %p (buf: %p)",mailbox, this,buf_); + } + + // we make a copy here, as the size is modified by simix, and we may reuse the request in another receive later + real_size_=size_; + action_ = simcall_comm_isend(SIMIX_process_from_PID(src_+1), mailbox, size_, -1.0, + buf, real_size_, &match_send, + &xbt_free_f, // how to free the userdata if a detached send fails + !smpi_process_get_replaying() ? smpi_comm_copy_data_callback + : &smpi_comm_null_copy_buffer_callback, this, + // detach if msg size < eager/rdv switch limit + detached_); + XBT_DEBUG("send simcall posted"); + + /* FIXME: detached sends are not traceable (action_ == nullptr) */ + if (action_ != nullptr) + simcall_set_category(action_, TRACE_internal_smpi_get_category()); + + if (async_small_thresh != 0 || ((flags_ & RMA)!=0)) + xbt_mutex_release(mut); + } +} + + +void Request::startall(int count, MPI_Request * requests) +{ + if(requests== nullptr) + return; + + for(int i = 0; i < count; i++) { + requests[i]->start(); + } +} + +int Request::test(MPI_Request * request, MPI_Status * status) { + //assume that request is not MPI_REQUEST_NULL (filtered in PMPI_Test or testall before) + + // to avoid deadlocks if used as a break condition, such as + // while (MPI_Test(request, flag, status) && flag) { + // } + // because the time will not normally advance when only calls to MPI_Test are made -> deadlock + // multiplier to the sleeptime, to increase speed of execution, each failed test will increase it + static int nsleeps = 1; + if(smpi_test_sleep > 0) + simcall_process_sleep(nsleeps*smpi_test_sleep); + + smpi_empty_status(status); + int flag = 1; + if (((*request)->flags_ & PREPARED) == 0) { + if ((*request)->action_ != nullptr) + flag = simcall_comm_test((*request)->action_); + if (flag) { + finish_wait(request,status); + nsleeps=1;//reset the number of sleeps we will do next time + if (*request != MPI_REQUEST_NULL && ((*request)->flags_ & PERSISTENT)==0) + *request = MPI_REQUEST_NULL; + } else if (xbt_cfg_get_boolean("smpi/grow-injected-times")){ + nsleeps++; + } + } + return flag; +} + + +int Request::testsome(int incount, MPI_Request requests[], int *indices, MPI_Status status[]) +{ + int i; + int count = 0; + int count_dead = 0; + MPI_Status stat; + MPI_Status *pstat = status == MPI_STATUSES_IGNORE ? MPI_STATUS_IGNORE : &stat; + + for(i = 0; i < incount; i++) { + if((requests[i] != MPI_REQUEST_NULL)) { + if(test(&requests[i], pstat)) { + indices[i] = 1; + count++; + if(status != MPI_STATUSES_IGNORE) { + status[i] = *pstat; + } + if ((requests[i] != MPI_REQUEST_NULL) && requests[i]->flags_ & NON_PERSISTENT) + requests[i]=MPI_REQUEST_NULL; + } + }else{ + count_dead++; + } + } + if(count_dead==incount) + return MPI_UNDEFINED; + else return count; +} + + +int Request::testany(int count, MPI_Request requests[], int *index, MPI_Status * status) +{ + std::vector comms; + comms.reserve(count); + + int i; + int flag = 0; + + *index = MPI_UNDEFINED; + + std::vector map; /** Maps all matching comms back to their location in requests **/ + for(i = 0; i < count; i++) { + if ((requests[i] != MPI_REQUEST_NULL) && requests[i]->action_ && !(requests[i]->flags_ & PREPARED)) { + comms.push_back(requests[i]->action_); + map.push_back(i); + } + } + if(!map.empty()) { + //multiplier to the sleeptime, to increase speed of execution, each failed testany will increase it + static int nsleeps = 1; + if(smpi_test_sleep > 0) + simcall_process_sleep(nsleeps*smpi_test_sleep); + + i = simcall_comm_testany(comms.data(), comms.size()); // The i-th element in comms matches! + if (i != -1) { // -1 is not MPI_UNDEFINED but a SIMIX return code. (nothing matches) + *index = map[i]; + finish_wait(&requests[*index],status); + flag = 1; + nsleeps = 1; + if (requests[*index] != MPI_REQUEST_NULL && (requests[*index]->flags_ & NON_PERSISTENT)) { + requests[*index] = MPI_REQUEST_NULL; + } + } else { + nsleeps++; + } + } else { + //all requests are null or inactive, return true + flag = 1; + smpi_empty_status(status); + } + + return flag; +} + + +int Request::testall(int count, MPI_Request requests[], MPI_Status status[]) +{ + MPI_Status stat; + MPI_Status *pstat = status == MPI_STATUSES_IGNORE ? MPI_STATUS_IGNORE : &stat; + int flag=1; + for(int i=0; iflags_ & PREPARED)) { + if (test(&requests[i], pstat)!=1){ + flag=0; + }else{ + requests[i]=MPI_REQUEST_NULL; + } + }else{ + smpi_empty_status(pstat); + } + if(status != MPI_STATUSES_IGNORE) { + status[i] = *pstat; + } + } + return flag; +} + + + + +void Request::probe(int source, int tag, MPI_Comm comm, MPI_Status* status){ + int flag=0; + //FIXME find another way to avoid busy waiting ? + // the issue here is that we have to wait on a nonexistent comm + while(flag==0){ + iprobe(source, tag, comm, &flag, status); + XBT_DEBUG("Busy Waiting on probing : %d", flag); + } +} + +void Request::iprobe(int source, int tag, MPI_Comm comm, int* flag, MPI_Status* status){ + MPI_Request request = new Request(nullptr, 0, MPI_CHAR, source == MPI_ANY_SOURCE ? MPI_ANY_SOURCE : + comm->group()->index(source), comm->rank(), tag, comm, PERSISTENT | RECV); + + // to avoid deadlock, we have to sleep some time here, or the timer won't advance and we will only do iprobe simcalls + // (especially when used as a break condition, such as while(MPI_Iprobe(...)) ... ) + // nsleeps is a multiplier to the sleeptime, to increase speed of execution, each failed iprobe will increase it + // (This can speed up the execution of certain applications by an order of magnitude, such as HPL) + static int nsleeps = 1; + double speed = simgrid::s4u::Actor::self()->host()->speed(); + double maxrate = xbt_cfg_get_double("smpi/iprobe-cpu-usage"); + if (smpi_iprobe_sleep > 0) { + smx_activity_t iprobe_sleep = simcall_execution_start("iprobe", /* flops to executek*/nsleeps*smpi_iprobe_sleep*speed*maxrate, /* priority */1.0, /* performance bound */maxrate*speed); + simcall_execution_wait(iprobe_sleep); + } + // behave like a receive, but don't do it + smx_mailbox_t mailbox; + + request->print_request("New iprobe"); + // We have to test both mailboxes as we don't know if we will receive one one or another + if (xbt_cfg_get_int("smpi/async-small-thresh") > 0){ + mailbox = smpi_process_mailbox_small(); + XBT_DEBUG("Trying to probe the perm recv mailbox"); + request->action_ = simcall_comm_iprobe(mailbox, 0, request->src_, request->tag_, &match_recv, + static_cast(request)); + } + + if (request->action_ == nullptr){ + mailbox = smpi_process_mailbox(); + XBT_DEBUG("trying to probe the other mailbox"); + request->action_ = simcall_comm_iprobe(mailbox, 0, request->src_,request->tag_, &match_recv, + static_cast(request)); + } + + if (request->action_ != nullptr){ + simgrid::kernel::activity::Comm *sync_comm = static_cast(request->action_); + MPI_Request req = static_cast(sync_comm->src_data); + *flag = 1; + if(status != MPI_STATUS_IGNORE && (req->flags_ & PREPARED) == 0) { + status->MPI_SOURCE = comm->group()->rank(req->src_); + status->MPI_TAG = req->tag_; + status->MPI_ERROR = MPI_SUCCESS; + status->count = req->real_size_; + } + nsleeps = 1;//reset the number of sleeps we will do next time + } + else { + *flag = 0; + if (xbt_cfg_get_boolean("smpi/grow-injected-times")) + nsleeps++; + } + unuse(&request); +} + + +void Request::finish_wait(MPI_Request* request, MPI_Status * status) +{ + MPI_Request req = *request; + smpi_empty_status(status); + + if(!((req->detached_ != 0) && ((req->flags_ & SEND) != 0)) && ((req->flags_ & PREPARED) == 0)){ + if(status != MPI_STATUS_IGNORE) { + int src = req->src_ == MPI_ANY_SOURCE ? req->real_src_ : req->src_; + status->MPI_SOURCE = req->comm_->group()->rank(src); + status->MPI_TAG = req->tag_ == MPI_ANY_TAG ? req->real_tag_ : req->tag_; + status->MPI_ERROR = req->truncated_ != 0 ? MPI_ERR_TRUNCATE : MPI_SUCCESS; + // this handles the case were size in receive differs from size in send + status->count = req->real_size_; + } + + req->print_request("Finishing"); + MPI_Datatype datatype = req->old_type_; + + if(((req->flags_ & ACCUMULATE) != 0) || (datatype->sizeof_substruct != 0)){ + if (!smpi_process_get_replaying()){ + if( smpi_privatize_global_variables != 0 && (static_cast(req->old_buf_) >= smpi_start_data_exe) + && ((char*)req->old_buf_ < smpi_start_data_exe + smpi_size_data_exe )){ + XBT_VERB("Privatization : We are unserializing to a zone in global memory Switch data segment "); + smpi_switch_data_segment(smpi_process_index()); + } + } + + if(datatype->sizeof_substruct != 0){ + // This part handles the problem of non-contignous memory the unserialization at the reception + s_smpi_subtype_t *subtype = static_cast(datatype->substruct); + if(req->flags_ & RECV) + subtype->unserialize(req->buf_, req->old_buf_, req->real_size_/smpi_datatype_size(datatype) , + datatype->substruct, req->op_); + xbt_free(req->buf_); + }else if(req->flags_ & RECV){//apply op on contiguous buffer for accumulate + int n =req->real_size_/smpi_datatype_size(datatype); + smpi_op_apply(req->op_, req->buf_, req->old_buf_, &n, &datatype); + xbt_free(req->buf_); + } + } + } + + if (TRACE_smpi_view_internals() && ((req->flags_ & RECV) != 0)){ + int rank = smpi_process_index(); + int src_traced = (req->src_ == MPI_ANY_SOURCE ? req->real_src_ : req->src_); + TRACE_smpi_recv(rank, src_traced, rank,req->tag_); + } + if(req->detached_sender_ != nullptr){ + //integrate pseudo-timing for buffering of small messages, do not bother to execute the simcall if 0 + double sleeptime = smpi_or(req->real_size_); + if(sleeptime > 0.0){ + simcall_process_sleep(sleeptime); + XBT_DEBUG("receiving size of %zu : sleep %f ", req->real_size_, sleeptime); + } + unuse(&(req->detached_sender_)); + } + if(req->flags_ & PERSISTENT) + req->action_ = nullptr; + req->flags_ |= FINISHED; + unuse(request); +} + + +void Request::wait(MPI_Request * request, MPI_Status * status) +{ + (*request)->print_request("Waiting"); + if ((*request)->flags_ & PREPARED) { + smpi_empty_status(status); + return; + } + + if ((*request)->action_ != nullptr) + // this is not a detached send + simcall_comm_wait((*request)->action_, -1.0); + + finish_wait(request,status); + if (*request != MPI_REQUEST_NULL && (((*request)->flags_ & NON_PERSISTENT)!=0)) + *request = MPI_REQUEST_NULL; +} + +int Request::waitany(int count, MPI_Request requests[], MPI_Status * status) +{ + s_xbt_dynar_t comms; // Keep it on stack to save some extra mallocs + int i; + int size = 0; + int index = MPI_UNDEFINED; + int *map; + + if(count > 0) { + // Wait for a request to complete + xbt_dynar_init(&comms, sizeof(smx_activity_t), nullptr); + map = xbt_new(int, count); + XBT_DEBUG("Wait for one of %d", count); + for(i = 0; i < count; i++) { + if (requests[i] != MPI_REQUEST_NULL && !(requests[i]->flags_ & PREPARED) && !(requests[i]->flags_ & FINISHED)) { + if (requests[i]->action_ != nullptr) { + XBT_DEBUG("Waiting any %p ", requests[i]); + xbt_dynar_push(&comms, &requests[i]->action_); + map[size] = i; + size++; + } else { + // This is a finished detached request, let's return this one + size = 0; // so we free the dynar but don't do the waitany call + index = i; + finish_wait(&requests[i], status); // cleanup if refcount = 0 + if (requests[i] != MPI_REQUEST_NULL && (requests[i]->flags_ & NON_PERSISTENT)) + requests[i] = MPI_REQUEST_NULL; // set to null + break; + } + } + } + if(size > 0) { + i = simcall_comm_waitany(&comms, -1); + + // not MPI_UNDEFINED, as this is a simix return code + if (i != -1) { + index = map[i]; + //in case of an accumulate, we have to wait the end of all requests to apply the operation, ordered correctly. + if ((requests[index] == MPI_REQUEST_NULL) + || (!((requests[index]->flags_ & ACCUMULATE) && (requests[index]->flags_ & RECV)))){ + finish_wait(&requests[index],status); + if (requests[i] != MPI_REQUEST_NULL && (requests[i]->flags_ & NON_PERSISTENT)) + requests[index] = MPI_REQUEST_NULL; + }else{ + XBT_WARN("huu?"); + } + } + } + + xbt_dynar_free_data(&comms); + xbt_free(map); + } + + if (index==MPI_UNDEFINED) + smpi_empty_status(status); + + return index; +} + +static int sort_accumulates(MPI_Request a, MPI_Request b) +{ + return (a->tag() < b->tag()); +} + +int Request::waitall(int count, MPI_Request requests[], MPI_Status status[]) +{ + std::vector accumulates; + int index; + MPI_Status stat; + MPI_Status *pstat = status == MPI_STATUSES_IGNORE ? MPI_STATUS_IGNORE : &stat; + int retvalue = MPI_SUCCESS; + //tag invalid requests in the set + if (status != MPI_STATUSES_IGNORE) { + for (int c = 0; c < count; c++) { + if (requests[c] == MPI_REQUEST_NULL || requests[c]->dst_ == MPI_PROC_NULL || (requests[c]->flags_ & PREPARED)) { + smpi_empty_status(&status[c]); + } else if (requests[c]->src_ == MPI_PROC_NULL) { + smpi_empty_status(&status[c]); + status[c].MPI_SOURCE = MPI_PROC_NULL; + } + } + } + for (int c = 0; c < count; c++) { + if (MC_is_active() || MC_record_replay_is_active()) { + wait(&requests[c],pstat); + index = c; + } else { + index = waitany(count, requests, pstat); + if (index == MPI_UNDEFINED) + break; + + if (requests[index] != MPI_REQUEST_NULL + && (requests[index]->flags_ & RECV) + && (requests[index]->flags_ & ACCUMULATE)) + accumulates.push_back(requests[index]); + if (requests[index] != MPI_REQUEST_NULL && (requests[index]->flags_ & NON_PERSISTENT)) + requests[index] = MPI_REQUEST_NULL; + } + if (status != MPI_STATUSES_IGNORE) { + status[index] = *pstat; + if (status[index].MPI_ERROR == MPI_ERR_TRUNCATE) + retvalue = MPI_ERR_IN_STATUS; + } + } + + if (!accumulates.empty()) { + std::sort(accumulates.begin(), accumulates.end(), sort_accumulates); + for (auto req : accumulates) { + finish_wait(&req, status); + } + } + + return retvalue; +} + +int Request::waitsome(int incount, MPI_Request requests[], int *indices, MPI_Status status[]) +{ + int i; + int count = 0; + int index; + MPI_Status stat; + MPI_Status *pstat = status == MPI_STATUSES_IGNORE ? MPI_STATUS_IGNORE : &stat; + + for(i = 0; i < incount; i++) + { + index=waitany(incount, requests, pstat); + if(index!=MPI_UNDEFINED){ + indices[count] = index; + count++; + if(status != MPI_STATUSES_IGNORE) { + status[index] = *pstat; + } + if (requests[index] != MPI_REQUEST_NULL && (requests[index]->flags_ & NON_PERSISTENT)) + requests[index]=MPI_REQUEST_NULL; + }else{ + return MPI_UNDEFINED; + } + } + return count; +} + + +} +} + + + diff --git a/src/smpi/smpi_request.hpp b/src/smpi/smpi_request.hpp new file mode 100644 index 0000000000..fa33611dac --- /dev/null +++ b/src/smpi/smpi_request.hpp @@ -0,0 +1,94 @@ +/* Copyright (c) 2010, 2013-2015. The SimGrid Team. + * All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#ifndef SMPI_REQUEST_HPP_INCLUDED +#define SMPI_REQUEST_HPP_INCLUDED + +#include "private.h" + +namespace simgrid{ +namespace smpi{ + +class Request { + private : + void *buf_; + /* in the case of non-contiguous memory the user address should be keep + * to unserialize the data inside the user memory*/ + void *old_buf_; + /* this let us know how to unserialize at the end of + * the communication*/ + MPI_Datatype old_type_; + size_t size_; + int src_; + int dst_; + int tag_; + //to handle cases where we have an unknown sender + //We can't override src, tag, and size, because the request may be reused later + int real_src_; + int real_tag_; + int truncated_; + size_t real_size_; + MPI_Comm comm_; + smx_activity_t action_; + unsigned flags_; + int detached_; + MPI_Request detached_sender_; + int refcount_; + MPI_Op op_; + public: + Request(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, MPI_Comm comm, unsigned flags); + MPI_Comm comm(); + int src(); + int dst(); + int tag(); + int flags(); + void print_request(const char *message); + void start(); + + static void finish_wait(MPI_Request* request, MPI_Status * status); + static void unuse(MPI_Request* request); + static void wait(MPI_Request* req, MPI_Status * status); + static MPI_Request send_init(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm); + static MPI_Request isend_init(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm); + static MPI_Request ssend_init(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm); + static MPI_Request rma_send_init(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, MPI_Comm comm,MPI_Op op); + static MPI_Request recv_init(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm); + static MPI_Request rma_recv_init(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, MPI_Comm comm,MPI_Op op); + static MPI_Request irecv_init(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm); + static MPI_Request isend(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm); + static MPI_Request issend(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm); + static MPI_Request irecv(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm); + + static void recv(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm, MPI_Status * status); + static void send(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm); + static void ssend(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm); + + static void sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,int dst, int sendtag, + void *recvbuf, int recvcount, MPI_Datatype recvtype, int src, int recvtag, + MPI_Comm comm, MPI_Status * status); + + static void startall(int count, MPI_Request * requests); + + static int test(MPI_Request * request,MPI_Status * status); + static int testsome(int incount, MPI_Request requests[], int *indices, MPI_Status status[]); + static int testany(int count, MPI_Request requests[], int *index, MPI_Status * status); + static int testall(int count, MPI_Request requests[], MPI_Status status[]); + + static void probe(int source, int tag, MPI_Comm comm, MPI_Status* status); + static void iprobe(int source, int tag, MPI_Comm comm, int* flag, MPI_Status* status); + + static int waitany(int count, MPI_Request requests[], MPI_Status * status); + static int waitall(int count, MPI_Request requests[], MPI_Status status[]); + static int waitsome(int incount, MPI_Request requests[], int *indices, MPI_Status status[]); + + static int match_send(void* a, void* b,smx_activity_t ignored); + static int match_recv(void* a, void* b,smx_activity_t ignored); +}; + +} +} + +#endif diff --git a/src/smpi/smpi_win.cpp b/src/smpi/smpi_win.cpp index 803776fe93..0f8f5c2707 100644 --- a/src/smpi/smpi_win.cpp +++ b/src/smpi/smpi_win.cpp @@ -91,20 +91,20 @@ int Win::fence(int assert) xbt_mutex_acquire(mut_); // This (simulated) mutex ensures that no process pushes to the vector of requests during the waitall. // Without this, the vector could get redimensionned when another process pushes. - // This would result in the array used by smpi_mpi_waitall() to be invalidated. - // Another solution would be to copy the data and cleanup the vector *before* smpi_mpi_waitall + // This would result in the array used by Request::waitall() to be invalidated. + // Another solution would be to copy the data and cleanup the vector *before* Request::waitall std::vector *reqs = requests_; int size = static_cast(reqs->size()); // start all requests that have been prepared by another process if (size > 0) { for (const auto& req : *reqs) { - if (req && (req->flags & PREPARED)) - smpi_mpi_start(req); + if (req && (req->flags() & PREPARED)) + req->start(); } MPI_Request* treqs = &(*reqs)[0]; - smpi_mpi_waitall(size, treqs, MPI_STATUSES_IGNORE); + Request::waitall(size, treqs, MPI_STATUSES_IGNORE); } count_=0; xbt_mutex_release(mut_); @@ -130,11 +130,11 @@ int Win::put( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, if(target_rank != comm_->rank()){ //prepare send_request - MPI_Request sreq = smpi_rma_send_init(origin_addr, origin_count, origin_datatype, smpi_process_index(), + MPI_Request sreq = Request::rma_send_init(origin_addr, origin_count, origin_datatype, smpi_process_index(), comm_->group()->index(target_rank), SMPI_RMA_TAG+1, comm_, MPI_OP_NULL); //prepare receiver request - MPI_Request rreq = smpi_rma_recv_init(recv_addr, target_count, target_datatype, smpi_process_index(), + MPI_Request rreq = Request::rma_recv_init(recv_addr, target_count, target_datatype, smpi_process_index(), comm_->group()->index(target_rank), SMPI_RMA_TAG+1, recv_win->comm_, MPI_OP_NULL); //push request to receiver's win @@ -142,7 +142,7 @@ int Win::put( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, recv_win->requests_->push_back(rreq); xbt_mutex_release(recv_win->mut_); //start send - smpi_mpi_start(sreq); + sreq->start(); //push request to sender's win xbt_mutex_acquire(mut_); @@ -168,24 +168,24 @@ int Win::get( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, if(target_rank != comm_->rank()){ //prepare send_request - MPI_Request sreq = smpi_rma_send_init(send_addr, target_count, target_datatype, + MPI_Request sreq = Request::rma_send_init(send_addr, target_count, target_datatype, comm_->group()->index(target_rank), smpi_process_index(), SMPI_RMA_TAG+2, send_win->comm_, MPI_OP_NULL); //prepare receiver request - MPI_Request rreq = smpi_rma_recv_init(origin_addr, origin_count, origin_datatype, + MPI_Request rreq = Request::rma_recv_init(origin_addr, origin_count, origin_datatype, comm_->group()->index(target_rank), smpi_process_index(), SMPI_RMA_TAG+2, comm_, MPI_OP_NULL); //start the send, with another process than us as sender. - smpi_mpi_start(sreq); + sreq->start(); //push request to receiver's win xbt_mutex_acquire(send_win->mut_); send_win->requests_->push_back(sreq); xbt_mutex_release(send_win->mut_); //start recv - smpi_mpi_start(rreq); + rreq->start(); //push request to sender's win xbt_mutex_acquire(mut_); requests_->push_back(rreq); @@ -211,11 +211,11 @@ int Win::accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_da XBT_DEBUG("Entering MPI_Accumulate to %d", target_rank); //As the tag will be used for ordering of the operations, add count to it //prepare send_request - MPI_Request sreq = smpi_rma_send_init(origin_addr, origin_count, origin_datatype, + MPI_Request sreq = Request::rma_send_init(origin_addr, origin_count, origin_datatype, smpi_process_index(), comm_->group()->index(target_rank), SMPI_RMA_TAG+3+count_, comm_, op); //prepare receiver request - MPI_Request rreq = smpi_rma_recv_init(recv_addr, target_count, target_datatype, + MPI_Request rreq = Request::rma_recv_init(recv_addr, target_count, target_datatype, smpi_process_index(), comm_->group()->index(target_rank), SMPI_RMA_TAG+3+count_, recv_win->comm_, op); count_++; @@ -224,7 +224,7 @@ int Win::accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_da recv_win->requests_->push_back(rreq); xbt_mutex_release(recv_win->mut_); //start send - smpi_mpi_start(sreq); + sreq->start(); //push request to sender's win xbt_mutex_acquire(mut_); @@ -256,16 +256,16 @@ int Win::start(MPI_Group group, int assert){ while (j != size) { int src = group->index(j); if (src != smpi_process_index() && src != MPI_UNDEFINED) { - reqs[i] = smpi_irecv_init(nullptr, 0, MPI_CHAR, src, SMPI_RMA_TAG + 4, MPI_COMM_WORLD); + reqs[i] = Request::irecv_init(nullptr, 0, MPI_CHAR, src, SMPI_RMA_TAG + 4, MPI_COMM_WORLD); i++; } j++; } size=i; - smpi_mpi_startall(size, reqs); - smpi_mpi_waitall(size, reqs, MPI_STATUSES_IGNORE); + Request::startall(size, reqs); + Request::waitall(size, reqs, MPI_STATUSES_IGNORE); for(i=0;iindex(j); if(dst!=smpi_process_index() && dst!=MPI_UNDEFINED){ - reqs[i]=smpi_mpi_send_init(nullptr, 0, MPI_CHAR, dst, SMPI_RMA_TAG+4, MPI_COMM_WORLD); + reqs[i]=Request::send_init(nullptr, 0, MPI_CHAR, dst, SMPI_RMA_TAG+4, MPI_COMM_WORLD); i++; } j++; } size=i; - smpi_mpi_startall(size, reqs); - smpi_mpi_waitall(size, reqs, MPI_STATUSES_IGNORE); + Request::startall(size, reqs); + Request::waitall(size, reqs, MPI_STATUSES_IGNORE); for(i=0;iindex(j); if(dst!=smpi_process_index() && dst!=MPI_UNDEFINED){ - reqs[i]=smpi_mpi_send_init(nullptr, 0, MPI_CHAR, dst, SMPI_RMA_TAG+5, MPI_COMM_WORLD); + reqs[i]=Request::send_init(nullptr, 0, MPI_CHAR, dst, SMPI_RMA_TAG+5, MPI_COMM_WORLD); i++; } j++; } size=i; XBT_DEBUG("Win_complete - Sending sync messages to %d processes", size); - smpi_mpi_startall(size, reqs); - smpi_mpi_waitall(size, reqs, MPI_STATUSES_IGNORE); + Request::startall(size, reqs); + Request::waitall(size, reqs, MPI_STATUSES_IGNORE); for(i=0;i 0) { // start all requests that have been prepared by another process for (const auto& req : *reqqs) { - if (req && (req->flags & PREPARED)) - smpi_mpi_start(req); + if (req && (req->flags() & PREPARED)) + req->start(); } MPI_Request* treqs = &(*reqqs)[0]; - smpi_mpi_waitall(size, treqs, MPI_STATUSES_IGNORE); + Request::waitall(size, treqs, MPI_STATUSES_IGNORE); reqqs->clear(); } xbt_mutex_release(mut_); @@ -365,17 +365,17 @@ int Win::wait(){ while(j!=size){ int src=group_->index(j); if(src!=smpi_process_index() && src!=MPI_UNDEFINED){ - reqs[i]=smpi_irecv_init(nullptr, 0, MPI_CHAR, src,SMPI_RMA_TAG+5, MPI_COMM_WORLD); + reqs[i]=Request::irecv_init(nullptr, 0, MPI_CHAR, src,SMPI_RMA_TAG+5, MPI_COMM_WORLD); i++; } j++; } size=i; XBT_DEBUG("Win_wait - Receiving sync messages from %d processes", size); - smpi_mpi_startall(size, reqs); - smpi_mpi_waitall(size, reqs, MPI_STATUSES_IGNORE); + Request::startall(size, reqs); + Request::waitall(size, reqs, MPI_STATUSES_IGNORE); for(i=0;i 0) { // start all requests that have been prepared by another process for (const auto& req : *reqqs) { - if (req && (req->flags & PREPARED)) - smpi_mpi_start(req); + if (req && (req->flags() & PREPARED)) + req->start(); } MPI_Request* treqs = &(*reqqs)[0]; - smpi_mpi_waitall(size, treqs, MPI_STATUSES_IGNORE); + Request::waitall(size, treqs, MPI_STATUSES_IGNORE); reqqs->clear(); } xbt_mutex_release(mut_); diff --git a/tools/cmake/DefinePackages.cmake b/tools/cmake/DefinePackages.cmake index 9552e15f20..9ec0dbb819 100644 --- a/tools/cmake/DefinePackages.cmake +++ b/tools/cmake/DefinePackages.cmake @@ -222,6 +222,8 @@ set(SMPI_SRC src/smpi/smpi_mpi_dt.cpp src/smpi/smpi_pmpi.cpp src/smpi/smpi_replay.cpp + src/smpi/smpi_request.cpp + src/smpi/smpi_request.hpp src/smpi/smpi_win.cpp src/smpi/smpi_win.hpp src/smpi/smpi_topo.cpp -- 2.20.1