From: Martin Quinson Date: Mon, 18 Nov 2019 14:57:30 +0000 (+0100) Subject: Change smpi::Colls static class into a namespace of functions X-Git-Tag: v3.25~385 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/efa1f86e31adca3c57e9d0135b2f6ebdbabb0878 Change smpi::Colls static class into a namespace of functions --- diff --git a/src/smpi/bindings/smpi_pmpi_coll.cpp b/src/smpi/bindings/smpi_pmpi_coll.cpp index bf4952b2b9..81535ee456 100644 --- a/src/smpi/bindings/smpi_pmpi_coll.cpp +++ b/src/smpi/bindings/smpi_pmpi_coll.cpp @@ -38,11 +38,11 @@ int PMPI_Ibarrier(MPI_Comm comm, MPI_Request *request) TRACE_smpi_comm_in(rank, request == MPI_REQUEST_IGNORED ? "PMPI_Barrier" : "PMPI_Ibarrier", new simgrid::instr::NoOpTIData(request == MPI_REQUEST_IGNORED ? "barrier" : "ibarrier")); if (request == MPI_REQUEST_IGNORED) { - simgrid::smpi::Colls::barrier(comm); + simgrid::smpi::colls::barrier(comm); // Barrier can be used to synchronize RMA calls. Finish all requests from comm before. comm->finish_rma_calls(); } else - simgrid::smpi::Colls::ibarrier(comm, request); + simgrid::smpi::colls::ibarrier(comm, request); TRACE_smpi_comm_out(rank); smpi_bench_begin(); @@ -78,9 +78,9 @@ int PMPI_Ibcast(void *buf, int count, MPI_Datatype datatype, simgrid::smpi::Datatype::encode(datatype), "")); if (comm->size() > 1) { if (request == MPI_REQUEST_IGNORED) - simgrid::smpi::Colls::bcast(buf, count, datatype, root, comm); + simgrid::smpi::colls::bcast(buf, count, datatype, root, comm); else - simgrid::smpi::Colls::ibcast(buf, count, datatype, root, comm, request); + simgrid::smpi::colls::ibcast(buf, count, datatype, root, comm, request); } else { if (request != MPI_REQUEST_IGNORED) *request = MPI_REQUEST_NULL; @@ -130,9 +130,9 @@ int PMPI_Igather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void (comm->rank() != root || recvtype->is_replayable()) ? recvcount : recvcount * recvtype->size(), simgrid::smpi::Datatype::encode(real_sendtype), simgrid::smpi::Datatype::encode(recvtype))); if (request == MPI_REQUEST_IGNORED) - simgrid::smpi::Colls::gather(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype, root, comm); + simgrid::smpi::colls::gather(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype, root, comm); else - simgrid::smpi::Colls::igather(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype, root, comm, + simgrid::smpi::colls::igather(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype, root, comm, request); TRACE_smpi_comm_out(rank); @@ -194,10 +194,10 @@ int PMPI_Igatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, voi nullptr, dt_size_recv, trace_recvcounts, simgrid::smpi::Datatype::encode(real_sendtype), simgrid::smpi::Datatype::encode(recvtype))); if (request == MPI_REQUEST_IGNORED) - simgrid::smpi::Colls::gatherv(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcounts, displs, recvtype, + simgrid::smpi::colls::gatherv(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcounts, displs, recvtype, root, comm); else - simgrid::smpi::Colls::igatherv(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcounts, displs, recvtype, + simgrid::smpi::colls::igatherv(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, request); TRACE_smpi_comm_out(rank); @@ -240,9 +240,9 @@ int PMPI_Iallgather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, v recvtype->is_replayable() ? recvcount : recvcount * recvtype->size(), simgrid::smpi::Datatype::encode(sendtype), simgrid::smpi::Datatype::encode(recvtype))); if (request == MPI_REQUEST_IGNORED) - simgrid::smpi::Colls::allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); + simgrid::smpi::colls::allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); else - simgrid::smpi::Colls::iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request); + simgrid::smpi::colls::iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request); TRACE_smpi_comm_out(rank); smpi_bench_begin(); @@ -298,9 +298,9 @@ int PMPI_Iallgatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, dt_size_recv, trace_recvcounts, simgrid::smpi::Datatype::encode(sendtype), simgrid::smpi::Datatype::encode(recvtype))); if (request == MPI_REQUEST_IGNORED) - simgrid::smpi::Colls::allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); + simgrid::smpi::colls::allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); else - simgrid::smpi::Colls::iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, + simgrid::smpi::colls::iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, request); TRACE_smpi_comm_out(rank); @@ -345,9 +345,9 @@ int PMPI_Iscatter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, voi recvtype->is_replayable() ? recvcount : recvcount * recvtype->size(), simgrid::smpi::Datatype::encode(sendtype), simgrid::smpi::Datatype::encode(recvtype))); if (request == MPI_REQUEST_IGNORED) - simgrid::smpi::Colls::scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); + simgrid::smpi::colls::scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); else - simgrid::smpi::Colls::iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request); + simgrid::smpi::colls::iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request); TRACE_smpi_comm_out(rank); smpi_bench_begin(); @@ -407,9 +407,9 @@ int PMPI_Iscatterv(const void* sendbuf, const int* sendcounts, const int* displs nullptr, simgrid::smpi::Datatype::encode(sendtype), simgrid::smpi::Datatype::encode(recvtype))); if (request == MPI_REQUEST_IGNORED) - simgrid::smpi::Colls::scatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm); + simgrid::smpi::colls::scatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm); else - simgrid::smpi::Colls::iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, + simgrid::smpi::colls::iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, request); TRACE_smpi_comm_out(rank); @@ -447,9 +447,9 @@ int PMPI_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat datatype->is_replayable() ? count : count * datatype->size(), -1, simgrid::smpi::Datatype::encode(datatype), "")); if (request == MPI_REQUEST_IGNORED) - simgrid::smpi::Colls::reduce(sendbuf, recvbuf, count, datatype, op, root, comm); + simgrid::smpi::colls::reduce(sendbuf, recvbuf, count, datatype, op, root, comm); else - simgrid::smpi::Colls::ireduce(sendbuf, recvbuf, count, datatype, op, root, comm, request); + simgrid::smpi::colls::ireduce(sendbuf, recvbuf, count, datatype, op, root, comm, request); TRACE_smpi_comm_out(rank); smpi_bench_begin(); @@ -507,9 +507,9 @@ int PMPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype simgrid::smpi::Datatype::encode(datatype), "")); if (request == MPI_REQUEST_IGNORED) - simgrid::smpi::Colls::allreduce(real_sendbuf, recvbuf, count, datatype, op, comm); + simgrid::smpi::colls::allreduce(real_sendbuf, recvbuf, count, datatype, op, comm); else - simgrid::smpi::Colls::iallreduce(real_sendbuf, recvbuf, count, datatype, op, comm, request); + simgrid::smpi::colls::iallreduce(real_sendbuf, recvbuf, count, datatype, op, comm, request); TRACE_smpi_comm_out(rank); smpi_bench_begin(); @@ -551,9 +551,9 @@ int PMPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datat int retval; if (request == MPI_REQUEST_IGNORED) - retval = simgrid::smpi::Colls::scan(real_sendbuf, recvbuf, count, datatype, op, comm); + retval = simgrid::smpi::colls::scan(real_sendbuf, recvbuf, count, datatype, op, comm); else - retval = simgrid::smpi::Colls::iscan(real_sendbuf, recvbuf, count, datatype, op, comm, request); + retval = simgrid::smpi::colls::iscan(real_sendbuf, recvbuf, count, datatype, op, comm, request); TRACE_smpi_comm_out(rank); smpi_bench_begin(); @@ -595,9 +595,9 @@ int PMPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat int retval; if (request == MPI_REQUEST_IGNORED) - retval = simgrid::smpi::Colls::exscan(real_sendbuf, recvbuf, count, datatype, op, comm); + retval = simgrid::smpi::colls::exscan(real_sendbuf, recvbuf, count, datatype, op, comm); else - retval = simgrid::smpi::Colls::iexscan(real_sendbuf, recvbuf, count, datatype, op, comm, request); + retval = simgrid::smpi::colls::iexscan(real_sendbuf, recvbuf, count, datatype, op, comm, request); TRACE_smpi_comm_out(rank); smpi_bench_begin(); @@ -653,9 +653,9 @@ int PMPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int *recvcoun -1, trace_recvcounts, simgrid::smpi::Datatype::encode(datatype), "")); if (request == MPI_REQUEST_IGNORED) - simgrid::smpi::Colls::reduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm); + simgrid::smpi::colls::reduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm); else - simgrid::smpi::Colls::ireduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm, request); + simgrid::smpi::colls::ireduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm, request); TRACE_smpi_comm_out(rank); smpi_bench_begin(); @@ -705,9 +705,9 @@ int PMPI_Ireduce_scatter_block(const void* sendbuf, void* recvbuf, int recvcount for (int i = 0; i < count; i++) recvcounts[i] = recvcount; if (request == MPI_REQUEST_IGNORED) - simgrid::smpi::Colls::reduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm); + simgrid::smpi::colls::reduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm); else - simgrid::smpi::Colls::ireduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm, request); + simgrid::smpi::colls::ireduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm, request); delete[] recvcounts; TRACE_smpi_comm_out(rank); @@ -759,9 +759,9 @@ int PMPI_Ialltoall(const void* sendbuf, int sendcount, MPI_Datatype sendtype, vo int retval; if (request == MPI_REQUEST_IGNORED) retval = - simgrid::smpi::Colls::alltoall(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype, comm); + simgrid::smpi::colls::alltoall(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype, comm); else - retval = simgrid::smpi::Colls::ialltoall(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype, + retval = simgrid::smpi::colls::ialltoall(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype, comm, request); TRACE_smpi_comm_out(rank); @@ -846,10 +846,10 @@ int PMPI_Ialltoallv(const void* sendbuf, const int* sendcounts, const int* sendd int retval; if (request == MPI_REQUEST_IGNORED) - retval = simgrid::smpi::Colls::alltoallv(real_sendbuf, real_sendcounts, real_senddisps, real_sendtype, recvbuf, + retval = simgrid::smpi::colls::alltoallv(real_sendbuf, real_sendcounts, real_senddisps, real_sendtype, recvbuf, recvcounts, recvdisps, recvtype, comm); else - retval = simgrid::smpi::Colls::ialltoallv(real_sendbuf, real_sendcounts, real_senddisps, real_sendtype, recvbuf, + retval = simgrid::smpi::colls::ialltoallv(real_sendbuf, real_sendcounts, real_senddisps, real_sendtype, recvbuf, recvcounts, recvdisps, recvtype, comm, request); TRACE_smpi_comm_out(rank); @@ -938,10 +938,10 @@ int PMPI_Ialltoallw(const void* sendbuf, const int* sendcounts, const int* sendd int retval; if (request == MPI_REQUEST_IGNORED) - retval = simgrid::smpi::Colls::alltoallw(real_sendbuf, real_sendcounts, real_senddisps, real_sendtypes, recvbuf, + retval = simgrid::smpi::colls::alltoallw(real_sendbuf, real_sendcounts, real_senddisps, real_sendtypes, recvbuf, recvcounts, recvdisps, recvtypes, comm); else - retval = simgrid::smpi::Colls::ialltoallw(real_sendbuf, real_sendcounts, real_senddisps, real_sendtypes, recvbuf, + retval = simgrid::smpi::colls::ialltoallw(real_sendbuf, real_sendcounts, real_senddisps, real_sendtypes, recvbuf, recvcounts, recvdisps, recvtypes, comm, request); TRACE_smpi_comm_out(rank); diff --git a/src/smpi/bindings/smpi_pmpi_win.cpp b/src/smpi/bindings/smpi_pmpi_win.cpp index 32abd4ef65..6ff061d9c7 100644 --- a/src/smpi/bindings/smpi_pmpi_win.cpp +++ b/src/smpi/bindings/smpi_pmpi_win.cpp @@ -64,10 +64,10 @@ int PMPI_Win_allocate_shared( MPI_Aint size, int disp_unit, MPI_Info info, MPI_C if(ptr==nullptr) return MPI_ERR_NO_MEM; } - - simgrid::smpi::Colls::bcast(&ptr, sizeof(void*), MPI_BYTE, 0, comm); - simgrid::smpi::Colls::barrier(comm); - + + simgrid::smpi::colls::bcast(&ptr, sizeof(void*), MPI_BYTE, 0, comm); + simgrid::smpi::colls::barrier(comm); + *static_cast(base) = (char*)ptr+rank*size; *win = new simgrid::smpi::Win( ptr, size, disp_unit, info, comm,rank==0); retval = MPI_SUCCESS; diff --git a/src/smpi/colls/allgather/allgather-GB.cpp b/src/smpi/colls/allgather/allgather-GB.cpp index a0a320aab2..63d5f33a68 100644 --- a/src/smpi/colls/allgather/allgather-GB.cpp +++ b/src/smpi/colls/allgather/allgather-GB.cpp @@ -16,9 +16,8 @@ int allgather__GB(const void *send_buff, int send_count, { int num_procs; num_procs = comm->size(); - Colls::gather(send_buff, send_count, send_type, recv_buff, recv_count, recv_type, - 0, comm); - Colls::bcast(recv_buff, (recv_count * num_procs), recv_type, 0, comm); + colls::gather(send_buff, send_count, send_type, recv_buff, recv_count, recv_type, 0, comm); + colls::bcast(recv_buff, (recv_count * num_procs), recv_type, 0, comm); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allgather/allgather-mvapich-smp.cpp b/src/smpi/colls/allgather/allgather-mvapich-smp.cpp index b53f6f9e69..c8e06001b3 100644 --- a/src/smpi/colls/allgather/allgather-mvapich-smp.cpp +++ b/src/smpi/colls/allgather/allgather-mvapich-smp.cpp @@ -84,22 +84,17 @@ int allgather__mvapich2_smp(const void *sendbuf,int sendcnt, MPI_Datatype sendty /*If there is just one node, after gather itself, * root has all the data and it can do bcast*/ if(local_rank == 0) { - mpi_errno = Colls::gather(sendbuf, sendcnt,sendtype, - (void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)), - recvcnt, recvtype, - 0, shmem_comm); + mpi_errno = + colls::gather(sendbuf, sendcnt, sendtype, (void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)), + recvcnt, recvtype, 0, shmem_comm); } else { /*Since in allgather all the processes could have * its own data in place*/ if(sendbuf == MPI_IN_PLACE) { - mpi_errno = Colls::gather((void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)), - recvcnt , recvtype, - recvbuf, recvcnt, recvtype, - 0, shmem_comm); + mpi_errno = colls::gather((void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)), recvcnt, recvtype, + recvbuf, recvcnt, recvtype, 0, shmem_comm); } else { - mpi_errno = Colls::gather(sendbuf, sendcnt,sendtype, - recvbuf, recvcnt, recvtype, - 0, shmem_comm); + mpi_errno = colls::gather(sendbuf, sendcnt, sendtype, recvbuf, recvcnt, recvtype, 0, shmem_comm); } } /* Exchange the data between the node leaders*/ @@ -125,12 +120,8 @@ int allgather__mvapich2_smp(const void *sendbuf,int sendcnt, MPI_Datatype sendty void* sendbuf=((char*)recvbuf)+recvtype->get_extent()*displs[leader_comm->rank()]; - mpi_errno = Colls::allgatherv(sendbuf, - (recvcnt*local_size), - recvtype, - recvbuf, recvcnts, - displs, recvtype, - leader_comm); + mpi_errno = colls::allgatherv(sendbuf, (recvcnt * local_size), recvtype, recvbuf, recvcnts, displs, + recvtype, leader_comm); delete[] displs; delete[] recvcnts; } else { @@ -148,7 +139,7 @@ int allgather__mvapich2_smp(const void *sendbuf,int sendcnt, MPI_Datatype sendty } /*Bcast the entire data from node leaders to all other cores*/ - mpi_errno = Colls::bcast (recvbuf, recvcnt * size, recvtype, 0, shmem_comm); + mpi_errno = colls::bcast(recvbuf, recvcnt * size, recvtype, 0, shmem_comm); return mpi_errno; } diff --git a/src/smpi/colls/allgatherv/allgatherv-GB.cpp b/src/smpi/colls/allgatherv/allgatherv-GB.cpp index d6f9e517aa..4123cfecf9 100644 --- a/src/smpi/colls/allgatherv/allgatherv-GB.cpp +++ b/src/smpi/colls/allgatherv/allgatherv-GB.cpp @@ -15,7 +15,7 @@ int allgatherv__GB(const void *send_buff, int send_count, const int *recv_counts, const int *recv_disps, MPI_Datatype recv_type, MPI_Comm comm) { - Colls::gatherv(send_buff, send_count, send_type, recv_buff, recv_counts, recv_disps, recv_type, 0, comm); + colls::gatherv(send_buff, send_count, send_type, recv_buff, recv_counts, recv_disps, recv_type, 0, comm); int num_procs, i, current, max = 0; num_procs = comm->size(); for (i = 0; i < num_procs; i++) { @@ -23,7 +23,7 @@ int allgatherv__GB(const void *send_buff, int send_count, if (current > max) max = current; } - Colls::bcast(recv_buff, max, recv_type, 0, comm); + colls::bcast(recv_buff, max, recv_type, 0, comm); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allreduce/allreduce-lr.cpp b/src/smpi/colls/allreduce/allreduce-lr.cpp index 5cd9bb684b..1920b14038 100644 --- a/src/smpi/colls/allreduce/allreduce-lr.cpp +++ b/src/smpi/colls/allreduce/allreduce-lr.cpp @@ -97,9 +97,7 @@ int allreduce__lr(const void *sbuf, void *rbuf, int rcount, /* when communication size is not divisible by number of process: call the native implementation for the remain chunk at the end of the operation */ if (remainder_flag) { - return Colls::allreduce((char *) sbuf + remainder_offset, - (char *) rbuf + remainder_offset, remainder, dtype, op, - comm); + return colls::allreduce((char*)sbuf + remainder_offset, (char*)rbuf + remainder_offset, remainder, dtype, op, comm); } return 0; diff --git a/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp b/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp index 61ee55d5ed..393dba7309 100644 --- a/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp +++ b/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp @@ -62,7 +62,7 @@ static int MPIR_Allreduce_reduce_p2p_MV2(const void *sendbuf, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm); + colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm); return MPI_SUCCESS; } @@ -72,7 +72,7 @@ static int MPIR_Allreduce_reduce_shmem_MV2(const void *sendbuf, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm); + colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm); return MPI_SUCCESS; } @@ -166,7 +166,7 @@ int allreduce__mvapich2_two_level(const void *sendbuf, /* Broadcasting the mesage from leader to the rest */ /* Note: shared memory broadcast could improve the performance */ - mpi_errno = Colls::bcast(recvbuf, count, datatype, 0, shmem_comm); + mpi_errno = colls::bcast(recvbuf, count, datatype, 0, shmem_comm); return (mpi_errno); diff --git a/src/smpi/colls/allreduce/allreduce-rab1.cpp b/src/smpi/colls/allreduce/allreduce-rab1.cpp index 355b6fe781..e2808ec563 100644 --- a/src/smpi/colls/allreduce/allreduce-rab1.cpp +++ b/src/smpi/colls/allreduce/allreduce-rab1.cpp @@ -67,7 +67,7 @@ int allreduce__rab1(const void *sbuff, void *rbuff, } memcpy(tmp_buf, recv + recv_idx * extent, recv_cnt * extent); - Colls::allgather(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm); + colls::allgather(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm); memcpy(rbuff, recv, count * extent); smpi_free_tmp_buffer(recv); @@ -101,7 +101,7 @@ int allreduce__rab1(const void *sbuff, void *rbuff, } memcpy(tmp_buf, (char *) rbuff + recv_idx * extent, recv_cnt * extent); - Colls::allgather(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm); + colls::allgather(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm); smpi_free_tmp_buffer(tmp_buf); } diff --git a/src/smpi/colls/allreduce/allreduce-rab2.cpp b/src/smpi/colls/allreduce/allreduce-rab2.cpp index b060f474b9..e1f09dc7c7 100644 --- a/src/smpi/colls/allreduce/allreduce-rab2.cpp +++ b/src/smpi/colls/allreduce/allreduce-rab2.cpp @@ -47,7 +47,7 @@ int allreduce__rab2(const void *sbuff, void *rbuff, memcpy(send, sbuff, s_extent * count); - Colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm); + colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm); memcpy(tmp, recv, nbytes); @@ -55,7 +55,7 @@ int allreduce__rab2(const void *sbuff, void *rbuff, if (op != MPI_OP_NULL) op->apply(recv + s_offset, tmp, &send_size, dtype); - Colls::allgather(tmp, send_size, dtype, recv, send_size, dtype, comm); + colls::allgather(tmp, send_size, dtype, recv, send_size, dtype, comm); memcpy(rbuff, recv, count * s_extent); smpi_free_tmp_buffer(recv); @@ -69,7 +69,7 @@ int allreduce__rab2(const void *sbuff, void *rbuff, unsigned char* recv = smpi_get_tmp_recvbuffer(s_extent * send_size * nprocs); - Colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm); + colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm); memcpy((char *) rbuff + r_offset, recv, nbytes); @@ -77,8 +77,7 @@ int allreduce__rab2(const void *sbuff, void *rbuff, if (op != MPI_OP_NULL) op->apply(recv + s_offset, static_cast(rbuff) + r_offset, &send_size, dtype); - Colls::allgather((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size, - dtype, comm); + colls::allgather((char*)rbuff + r_offset, send_size, dtype, rbuff, send_size, dtype, comm); smpi_free_tmp_buffer(recv); } diff --git a/src/smpi/colls/allreduce/allreduce-redbcast.cpp b/src/smpi/colls/allreduce/allreduce-redbcast.cpp index 8c967ce14c..4607c6ecda 100644 --- a/src/smpi/colls/allreduce/allreduce-redbcast.cpp +++ b/src/smpi/colls/allreduce/allreduce-redbcast.cpp @@ -11,8 +11,8 @@ int allreduce__redbcast(const void *buf, void *buf2, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - Colls::reduce(buf, buf2, count, datatype, op, 0, comm); - Colls::bcast(buf2, count, datatype, 0, comm); + colls::reduce(buf, buf2, count, datatype, op, 0, comm); + colls::bcast(buf2, count, datatype, 0, comm); return MPI_SUCCESS; } } diff --git a/src/smpi/colls/alltoall/alltoall-pair-mpi-barrier.cpp b/src/smpi/colls/alltoall/alltoall-pair-mpi-barrier.cpp index b113714b8c..a2cdc87f6d 100644 --- a/src/smpi/colls/alltoall/alltoall-pair-mpi-barrier.cpp +++ b/src/smpi/colls/alltoall/alltoall-pair-mpi-barrier.cpp @@ -56,7 +56,7 @@ alltoall__pair_mpi_barrier(const void *send_buff, int send_count, for (i = 0; i < num_procs; i++) { src = dst = rank ^ i; - Colls::barrier(comm); + colls::barrier(comm); Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); diff --git a/src/smpi/colls/alltoall/alltoall-pair-one-barrier.cpp b/src/smpi/colls/alltoall/alltoall-pair-one-barrier.cpp index 79c5133e44..2daafb4e59 100644 --- a/src/smpi/colls/alltoall/alltoall-pair-one-barrier.cpp +++ b/src/smpi/colls/alltoall/alltoall-pair-one-barrier.cpp @@ -55,7 +55,7 @@ alltoall__pair_one_barrier(const void *send_buff, int send_count, send_chunk *= send_count; recv_chunk *= recv_count; - Colls::barrier(comm); + colls::barrier(comm); for (i = 0; i < num_procs; i++) { src = dst = rank ^ i; Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, diff --git a/src/smpi/colls/alltoall/alltoall-ring-mpi-barrier.cpp b/src/smpi/colls/alltoall/alltoall-ring-mpi-barrier.cpp index 1d72cc6998..29ef3ca318 100644 --- a/src/smpi/colls/alltoall/alltoall-ring-mpi-barrier.cpp +++ b/src/smpi/colls/alltoall/alltoall-ring-mpi-barrier.cpp @@ -54,7 +54,7 @@ alltoall__ring_mpi_barrier(const void *send_buff, int send_count, src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; - Colls::barrier(comm); + colls::barrier(comm); Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); diff --git a/src/smpi/colls/alltoall/alltoall-ring-one-barrier.cpp b/src/smpi/colls/alltoall/alltoall-ring-one-barrier.cpp index b184b186bb..43cb7d7daa 100644 --- a/src/smpi/colls/alltoall/alltoall-ring-one-barrier.cpp +++ b/src/smpi/colls/alltoall/alltoall-ring-one-barrier.cpp @@ -49,7 +49,7 @@ alltoall__ring_one_barrier(const void *send_buff, int send_count, send_chunk *= send_count; recv_chunk *= recv_count; - Colls::barrier(comm); + colls::barrier(comm); for (i = 0; i < num_procs; i++) { src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; diff --git a/src/smpi/colls/alltoallv/alltoallv-bruck.cpp b/src/smpi/colls/alltoallv/alltoallv-bruck.cpp index d32380b8f4..d625e29e5a 100644 --- a/src/smpi/colls/alltoallv/alltoallv-bruck.cpp +++ b/src/smpi/colls/alltoallv/alltoallv-bruck.cpp @@ -78,7 +78,7 @@ int alltoallv__bruck(const void *sendbuf, const int *sendcounts, const int *send count++; } /* Wait for them all. */ - // Colls::startall(count, requests); + // colls::startall(count, requests); XBT_DEBUG("<%d> wait for %d requests", rank, count); Request::waitall(count, requests, MPI_STATUSES_IGNORE); delete[] requests; diff --git a/src/smpi/colls/alltoallv/alltoallv-pair-mpi-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-pair-mpi-barrier.cpp index 68d9852666..c209f5bc51 100644 --- a/src/smpi/colls/alltoallv/alltoallv-pair-mpi-barrier.cpp +++ b/src/smpi/colls/alltoallv/alltoallv-pair-mpi-barrier.cpp @@ -52,7 +52,7 @@ int alltoallv__pair_mpi_barrier(const void *send_buff, const int *send_counts, c for (i = 0; i < num_procs; i++) { src = dst = rank ^ i; - Colls::barrier(comm); + colls::barrier(comm); Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type, src, tag, comm, &s); diff --git a/src/smpi/colls/alltoallv/alltoallv-pair-one-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-pair-one-barrier.cpp index 7ef7a10e17..7bac365ac6 100644 --- a/src/smpi/colls/alltoallv/alltoallv-pair-one-barrier.cpp +++ b/src/smpi/colls/alltoallv/alltoallv-pair-one-barrier.cpp @@ -50,7 +50,7 @@ int alltoallv__pair_one_barrier(const void *send_buff, const int *send_counts, c send_chunk = send_type->get_extent(); recv_chunk = recv_type->get_extent(); - Colls::barrier(comm); + colls::barrier(comm); for (i = 0; i < num_procs; i++) { src = dst = rank ^ i; Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, diff --git a/src/smpi/colls/alltoallv/alltoallv-ring-mpi-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-ring-mpi-barrier.cpp index 44a32f4adf..60e3ebfa52 100644 --- a/src/smpi/colls/alltoallv/alltoallv-ring-mpi-barrier.cpp +++ b/src/smpi/colls/alltoallv/alltoallv-ring-mpi-barrier.cpp @@ -50,7 +50,7 @@ int alltoallv__ring_mpi_barrier(const void *send_buff, const int *send_counts, c src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; - Colls::barrier(comm); + colls::barrier(comm); Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type, src, tag, comm, &s); diff --git a/src/smpi/colls/alltoallv/alltoallv-ring-one-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-ring-one-barrier.cpp index f265fba054..d40bfa1d8a 100644 --- a/src/smpi/colls/alltoallv/alltoallv-ring-one-barrier.cpp +++ b/src/smpi/colls/alltoallv/alltoallv-ring-one-barrier.cpp @@ -45,7 +45,7 @@ int alltoallv__ring_one_barrier(const void *send_buff, const int *send_counts, c send_chunk = send_type->get_extent(); recv_chunk = recv_type->get_extent(); - Colls::barrier(comm); + colls::barrier(comm); for (i = 0; i < num_procs; i++) { src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; diff --git a/src/smpi/colls/bcast/bcast-NTSB.cpp b/src/smpi/colls/bcast/bcast-NTSB.cpp index 632eda44fa..fa5aab1e80 100644 --- a/src/smpi/colls/bcast/bcast-NTSB.cpp +++ b/src/smpi/colls/bcast/bcast-NTSB.cpp @@ -167,8 +167,7 @@ int bcast__NTSB(void *buf, int count, MPI_Datatype datatype, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { XBT_WARN("MPI_bcast_NTSB use default MPI_bcast."); - Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype, - root, comm); + colls::bcast((char*)buf + (pipe_length * increment), remainder, datatype, root, comm); } return MPI_SUCCESS; diff --git a/src/smpi/colls/bcast/bcast-NTSL-Isend.cpp b/src/smpi/colls/bcast/bcast-NTSL-Isend.cpp index 0ceb9d8168..535b208d57 100644 --- a/src/smpi/colls/bcast/bcast-NTSL-Isend.cpp +++ b/src/smpi/colls/bcast/bcast-NTSL-Isend.cpp @@ -118,8 +118,7 @@ int bcast__NTSL_Isend(void *buf, int count, MPI_Datatype datatype, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { XBT_WARN("MPI_bcast_NTSL_Isend_nb use default MPI_bcast."); - Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype, - root, comm); + colls::bcast((char*)buf + (pipe_length * increment), remainder, datatype, root, comm); } return MPI_SUCCESS; diff --git a/src/smpi/colls/bcast/bcast-NTSL.cpp b/src/smpi/colls/bcast/bcast-NTSL.cpp index 115abd967a..59312664c5 100644 --- a/src/smpi/colls/bcast/bcast-NTSL.cpp +++ b/src/smpi/colls/bcast/bcast-NTSL.cpp @@ -118,8 +118,7 @@ int bcast__NTSL(void *buf, int count, MPI_Datatype datatype, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { XBT_WARN("MPI_bcast_arrival_NTSL use default MPI_bcast."); - Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype, - root, comm); + colls::bcast((char*)buf + (pipe_length * increment), remainder, datatype, root, comm); } return MPI_SUCCESS; diff --git a/src/smpi/colls/bcast/bcast-SMP-binary.cpp b/src/smpi/colls/bcast/bcast-SMP-binary.cpp index 259b90d370..1690c72817 100644 --- a/src/smpi/colls/bcast/bcast-SMP-binary.cpp +++ b/src/smpi/colls/bcast/bcast-SMP-binary.cpp @@ -217,8 +217,7 @@ int bcast__SMP_binary(void *buf, int count, // when count is not divisible by block size, use default BCAST for the remainder if ((remainder != 0) && (count > segment)) { XBT_WARN("MPI_bcast_SMP_binary use default MPI_bcast."); - Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype, - root, comm); + colls::bcast((char*)buf + (pipe_length * increment), remainder, datatype, root, comm); } return 1; diff --git a/src/smpi/colls/bcast/bcast-SMP-linear.cpp b/src/smpi/colls/bcast/bcast-SMP-linear.cpp index ea576febf3..a423e2d6d7 100644 --- a/src/smpi/colls/bcast/bcast-SMP-linear.cpp +++ b/src/smpi/colls/bcast/bcast-SMP-linear.cpp @@ -166,8 +166,7 @@ int bcast__SMP_linear(void *buf, int count, // when count is not divisible by block size, use default BCAST for the remainder if ((remainder != 0) && (count > segment)) { XBT_WARN("MPI_bcast_SMP_linear use default MPI_bcast."); - Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype, - root, comm); + colls::bcast((char*)buf + (pipe_length * increment), remainder, datatype, root, comm); } return MPI_SUCCESS; diff --git a/src/smpi/colls/bcast/bcast-arrival-pattern-aware-wait.cpp b/src/smpi/colls/bcast/bcast-arrival-pattern-aware-wait.cpp index 6f1dc20cd9..6d331df2ab 100644 --- a/src/smpi/colls/bcast/bcast-arrival-pattern-aware-wait.cpp +++ b/src/smpi/colls/bcast/bcast-arrival-pattern-aware-wait.cpp @@ -239,7 +239,7 @@ int bcast__arrival_pattern_aware_wait(void *buf, int count, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { XBT_WARN("MPI_bcast_arrival_pattern_aware_wait use default MPI_bcast."); - Colls::bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm); + colls::bcast((char*)buf + (pipe_length * increment), remainder, datatype, root, comm); } return MPI_SUCCESS; diff --git a/src/smpi/colls/bcast/bcast-arrival-pattern-aware.cpp b/src/smpi/colls/bcast/bcast-arrival-pattern-aware.cpp index fab160a980..b9a70b54c3 100644 --- a/src/smpi/colls/bcast/bcast-arrival-pattern-aware.cpp +++ b/src/smpi/colls/bcast/bcast-arrival-pattern-aware.cpp @@ -351,7 +351,7 @@ int bcast__arrival_pattern_aware(void *buf, int count, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { XBT_WARN("MPI_bcast_arrival_pattern_aware use default MPI_bcast."); - Colls::bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm); + colls::bcast((char*)buf + (pipe_length * increment), remainder, datatype, root, comm); } return MPI_SUCCESS; diff --git a/src/smpi/colls/bcast/bcast-arrival-scatter.cpp b/src/smpi/colls/bcast/bcast-arrival-scatter.cpp index 0e13cfa5f2..bc56af46e8 100644 --- a/src/smpi/colls/bcast/bcast-arrival-scatter.cpp +++ b/src/smpi/colls/bcast/bcast-arrival-scatter.cpp @@ -60,7 +60,7 @@ int bcast__arrival_scatter(void *buf, int count, /* message too small */ if (count < size) { XBT_WARN("MPI_bcast_arrival_scatter use default MPI_bcast."); - Colls::bcast(buf, count, datatype, root, comm); + colls::bcast(buf, count, datatype, root, comm); return MPI_SUCCESS; } diff --git a/src/smpi/colls/gather/gather-mvapich.cpp b/src/smpi/colls/gather/gather-mvapich.cpp index c5ca478959..127ca1bcac 100644 --- a/src/smpi/colls/gather/gather-mvapich.cpp +++ b/src/smpi/colls/gather/gather-mvapich.cpp @@ -318,7 +318,7 @@ int gather__mvapich2_two_level(const void *sendbuf, recvcnts[i] = node_sizes[i] * recvcnt; } } - Colls::gatherv(tmp_buf, local_size * nbytes, MPI_BYTE, recvbuf, recvcnts, displs, recvtype, leader_root, + colls::gatherv(tmp_buf, local_size * nbytes, MPI_BYTE, recvbuf, recvcnts, displs, recvtype, leader_root, leader_comm); } else { /* The root of the gather operation is not the node leader. @@ -333,7 +333,7 @@ int gather__mvapich2_two_level(const void *sendbuf, recvcnts[i] = node_sizes[i] * nbytes; } } - Colls::gatherv(tmp_buf, local_size * nbytes, MPI_BYTE, leader_gather_buf, recvcnts, displs, MPI_BYTE, + colls::gatherv(tmp_buf, local_size * nbytes, MPI_BYTE, leader_gather_buf, recvcnts, displs, MPI_BYTE, leader_root, leader_comm); } if (leader_comm_rank == leader_root) { diff --git a/src/smpi/colls/reduce/reduce-rab.cpp b/src/smpi/colls/reduce/reduce-rab.cpp index 4e8cdb0166..86bb34ae45 100644 --- a/src/smpi/colls/reduce/reduce-rab.cpp +++ b/src/smpi/colls/reduce/reduce-rab.cpp @@ -65,7 +65,7 @@ Exa.: size=13 ==> n=3, r=5 (i.e. size == 13 == 2**n+r == 2**3 + 5) - The algorithm needs for the execution of one Colls::reduce + The algorithm needs for the execution of one colls::reduce - for r==0 exec_time = n*(L1+L2) + buf_lng * (1-1/2**n) * (T1 + T2 + O/d) @@ -207,7 +207,7 @@ Step 5.n) 7: { [(a+b)+(c+d)] + [(e+f)+(g+h)] } + { [(i+j)+k] + [l+m] } for H -For Colls::allreduce: +For colls::allreduce: ------------------ Step 6.1) @@ -249,7 +249,7 @@ Step 7) on all nodes 0..12 -For Colls::reduce: +For colls::reduce: --------------- Step 6.0) @@ -942,9 +942,9 @@ static int MPI_I_anyReduce(const void* Sendbuf, void* Recvbuf, int count, MPI_Da } /* new_prot */ /*otherwise:*/ if (is_all) - return( Colls::allreduce(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, comm) ); + return (colls::allreduce(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, comm)); else - return( Colls::reduce(Sendbuf,Recvbuf, count,mpi_datatype,mpi_op, root, comm) ); + return (colls::reduce(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, root, comm)); } #endif /*REDUCE_LIMITS*/ diff --git a/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp b/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp index ec056faff5..399d94f091 100644 --- a/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp +++ b/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp @@ -160,7 +160,7 @@ int scatter__mvapich2_two_level_direct(const void *sendbuf, sendcnts[i] = node_sizes[i] * nbytes; } } - Colls::scatterv(leader_scatter_buf, sendcnts, displs, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE, + colls::scatterv(leader_scatter_buf, sendcnts, displs, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root, leader_comm); } else { if (leader_comm_rank == leader_root) { @@ -174,7 +174,7 @@ int scatter__mvapich2_two_level_direct(const void *sendbuf, sendcnts[i] = node_sizes[i] * sendcnt; } } - Colls::scatterv(sendbuf, sendcnts, displs, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root, + colls::scatterv(sendbuf, sendcnts, displs, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root, leader_comm); } if (leader_comm_rank == leader_root) { @@ -336,7 +336,7 @@ int scatter__mvapich2_two_level_binomial(const void *sendbuf, sendcnts[i] = node_sizes[i] * nbytes; } } - Colls::scatterv(leader_scatter_buf, sendcnts, displs, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE, + colls::scatterv(leader_scatter_buf, sendcnts, displs, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root, leader_comm); } else { if (leader_comm_rank == leader_root) { @@ -350,7 +350,7 @@ int scatter__mvapich2_two_level_binomial(const void *sendbuf, sendcnts[i] = node_sizes[i] * sendcnt; } } - Colls::scatterv(sendbuf, sendcnts, displs, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root, + colls::scatterv(sendbuf, sendcnts, displs, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root, leader_comm); } if (leader_comm_rank == leader_root) { diff --git a/src/smpi/colls/smpi_automatic_selector.cpp b/src/smpi/colls/smpi_automatic_selector.cpp index f01efcf422..a4ba840276 100644 --- a/src/smpi/colls/smpi_automatic_selector.cpp +++ b/src/smpi/colls/smpi_automatic_selector.cpp @@ -10,35 +10,33 @@ #include "src/smpi/include/smpi_actor.hpp" //attempt to do a quick autotuning version of the collective, -#define TRACE_AUTO_COLL(cat) \ - if (TRACE_is_enabled()) { \ - simgrid::instr::EventType* type = \ - simgrid::instr::Container::get_root()->type_->by_name_or_create( \ - _XBT_STRINGIFY(cat)); \ - \ - std::string cont_name = std::string("rank-" + std::to_string(simgrid::s4u::this_actor::get_pid())); \ - type->add_entity_value(Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[i].name, "1.0 1.0 1.0"); \ - new simgrid::instr::NewEvent(SIMIX_get_clock(), simgrid::instr::Container::by_name(cont_name), type, \ - type->get_entity_value(Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[i].name)); \ - } - #define AUTOMATIC_COLL_BENCH(cat, ret, args, args2) \ ret _XBT_CONCAT2(cat, __automatic)(COLL_UNPAREN args) \ { \ double time1, time2, time_min = DBL_MAX; \ int min_coll = -1, global_coll = -1; \ - int i; \ + int i = 0; \ double buf_in, buf_out, max_min = DBL_MAX; \ - for (i = 0; not Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[i].name.empty(); i++) { \ - if (Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[i].name == "automatic") \ - continue; \ - if (Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[i].name == "default") \ - continue; \ + auto desc = simgrid::smpi::colls::get_smpi_coll_description(_XBT_STRINGIFY(cat), i); \ + while (not desc->name.empty()) { \ + if (desc->name == "automatic") \ + goto next_iteration; \ + if (desc->name == "default") \ + goto next_iteration; \ barrier__default(comm); \ - TRACE_AUTO_COLL(cat) \ + if (TRACE_is_enabled()) { \ + simgrid::instr::EventType* type = \ + simgrid::instr::Container::get_root()->type_->by_name_or_create( \ + _XBT_STRINGIFY(cat)); \ + \ + std::string cont_name = std::string("rank-" + std::to_string(simgrid::s4u::this_actor::get_pid())); \ + type->add_entity_value(desc->name, "1.0 1.0 1.0"); \ + new simgrid::instr::NewEvent(SIMIX_get_clock(), simgrid::instr::Container::by_name(cont_name), type, \ + type->get_entity_value(desc->name)); \ + } \ time1 = SIMIX_get_clock(); \ try { \ - ((int(*) args)Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[i].coll) args2; \ + ((int(*) args)desc->coll) args2; \ } catch (std::exception & ex) { \ continue; \ } \ @@ -55,14 +53,19 @@ global_coll = i; \ } \ } \ + next_iteration: \ + i++; \ + desc = simgrid::smpi::colls::get_smpi_coll_description(_XBT_STRINGIFY(cat), i); \ } \ if (comm->rank() == 0) { \ XBT_WARN("For rank 0, the quickest was %s : %f , but global was %s : %f at max", \ - Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[min_coll].name.c_str(), time_min, \ - Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[global_coll].name.c_str(), max_min); \ + simgrid::smpi::colls::get_smpi_coll_description(_XBT_STRINGIFY(cat), min_coll)->name.c_str(), time_min, \ + simgrid::smpi::colls::get_smpi_coll_description(_XBT_STRINGIFY(cat), global_coll)->name.c_str(), \ + max_min); \ } else \ XBT_WARN("The quickest " _XBT_STRINGIFY(cat) " was %s on rank %d and took %f", \ - Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[min_coll].name.c_str(), comm->rank(), time_min); \ + simgrid::smpi::colls::get_smpi_coll_description(_XBT_STRINGIFY(cat), min_coll)->name.c_str(), \ + comm->rank(), time_min); \ return (min_coll != -1) ? MPI_SUCCESS : MPI_ERR_INTERN; \ } diff --git a/src/smpi/colls/smpi_coll.cpp b/src/smpi/colls/smpi_coll.cpp index fdc25eea73..2dce7e7838 100644 --- a/src/smpi/colls/smpi_coll.cpp +++ b/src/smpi/colls/smpi_coll.cpp @@ -16,8 +16,7 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_coll, smpi, "Logging specific to SMPI collectives."); #define COLL_SETTER(cat, ret, args, args2) \ - int(*Colls::cat) args; \ - void Colls::_XBT_CONCAT(set_, cat)(const std::string& name) \ + void colls::_XBT_CONCAT(set_, cat)(const std::string& name) \ { \ int id = find_coll_description(_XBT_CONCAT3(mpi_coll_, cat, _description), name, _XBT_STRINGIFY(cat)); \ cat = reinterpret_cast(_XBT_CONCAT3(mpi_coll_, cat, _description)[id].coll); \ @@ -28,41 +27,67 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_coll, smpi, "Logging specific to SMPI colle namespace simgrid{ namespace smpi{ -void (*Colls::smpi_coll_cleanup_callback)(); - /* these arrays must be nullptr terminated */ -s_mpi_coll_description_t Colls::mpi_coll_gather_description[] = { - COLL_GATHERS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} }; -s_mpi_coll_description_t Colls::mpi_coll_allgather_description[] = { - COLL_ALLGATHERS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} }; -s_mpi_coll_description_t Colls::mpi_coll_allgatherv_description[] = { - COLL_ALLGATHERVS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} }; -s_mpi_coll_description_t Colls::mpi_coll_allreduce_description[] ={ - COLL_ALLREDUCES(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} }; -s_mpi_coll_description_t Colls::mpi_coll_reduce_scatter_description[] = { - COLL_REDUCE_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} }; -s_mpi_coll_description_t Colls::mpi_coll_scatter_description[] ={ - COLL_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} }; -s_mpi_coll_description_t Colls::mpi_coll_barrier_description[] ={ - COLL_BARRIERS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} }; -s_mpi_coll_description_t Colls::mpi_coll_alltoall_description[] = { - COLL_ALLTOALLS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} }; -s_mpi_coll_description_t Colls::mpi_coll_alltoallv_description[] = { - COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} }; -s_mpi_coll_description_t Colls::mpi_coll_bcast_description[] = { - COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} }; -s_mpi_coll_description_t Colls::mpi_coll_reduce_description[] = { - COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} }; +s_mpi_coll_description_t mpi_coll_gather_description[] = {COLL_GATHERS(COLL_DESCRIPTION, COLL_COMMA), + {"", "", nullptr}}; +s_mpi_coll_description_t mpi_coll_allgather_description[] = {COLL_ALLGATHERS(COLL_DESCRIPTION, COLL_COMMA), + {"", "", nullptr}}; +s_mpi_coll_description_t mpi_coll_allgatherv_description[] = {COLL_ALLGATHERVS(COLL_DESCRIPTION, COLL_COMMA), + {"", "", nullptr}}; +s_mpi_coll_description_t mpi_coll_allreduce_description[] = {COLL_ALLREDUCES(COLL_DESCRIPTION, COLL_COMMA), + {"", "", nullptr}}; +s_mpi_coll_description_t mpi_coll_reduce_scatter_description[] = {COLL_REDUCE_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), + {"", "", nullptr}}; +s_mpi_coll_description_t mpi_coll_scatter_description[] = {COLL_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), + {"", "", nullptr}}; +s_mpi_coll_description_t mpi_coll_barrier_description[] = {COLL_BARRIERS(COLL_DESCRIPTION, COLL_COMMA), + {"", "", nullptr}}; +s_mpi_coll_description_t mpi_coll_alltoall_description[] = {COLL_ALLTOALLS(COLL_DESCRIPTION, COLL_COMMA), + {"", "", nullptr}}; +s_mpi_coll_description_t mpi_coll_alltoallv_description[] = {COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA), + {"", "", nullptr}}; +s_mpi_coll_description_t mpi_coll_bcast_description[] = {COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr}}; +s_mpi_coll_description_t mpi_coll_reduce_description[] = {COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA), + {"", "", nullptr}}; + +// Needed by the automatic selector weird implementation +s_mpi_coll_description_t* colls::get_smpi_coll_description(const char* name, int rank) +{ + if (strcmp(name, "gather") == 0) + return &mpi_coll_gather_description[rank]; + if (strcmp(name, "allgather") == 0) + return &mpi_coll_allgather_description[rank]; + if (strcmp(name, "allgatherv") == 0) + return &mpi_coll_allgatherv_description[rank]; + if (strcmp(name, "allreduce") == 0) + return &mpi_coll_allreduce_description[rank]; + if (strcmp(name, "reduce_scatter") == 0) + return &mpi_coll_reduce_scatter_description[rank]; + if (strcmp(name, "scatter") == 0) + return &mpi_coll_scatter_description[rank]; + if (strcmp(name, "barrier") == 0) + return &mpi_coll_barrier_description[rank]; + if (strcmp(name, "alltoall") == 0) + return &mpi_coll_alltoall_description[rank]; + if (strcmp(name, "alltoallv") == 0) + return &mpi_coll_alltoallv_description[rank]; + if (strcmp(name, "bcast") == 0) + return &mpi_coll_bcast_description[rank]; + if (strcmp(name, "reduce") == 0) + return &mpi_coll_reduce_description[rank]; + XBT_INFO("You requested an unknown collective: %s", name); + return nullptr; +} /** Displays the long description of all registered models, and quit */ -void Colls::coll_help(const char *category, s_mpi_coll_description_t * table) +void colls::coll_help(const char* category, s_mpi_coll_description_t* table) { XBT_WARN("Long description of the %s models accepted by this simulator:\n", category); for (int i = 0; not table[i].name.empty(); i++) XBT_WARN(" %s: %s\n", table[i].name.c_str(), table[i].description.c_str()); } -int Colls::find_coll_description(s_mpi_coll_description_t* table, const std::string& name, const char* desc) +int colls::find_coll_description(s_mpi_coll_description_t* table, const std::string& name, const char* desc) { for (int i = 0; not table[i].name.empty(); i++) if (name == table[i].name) { @@ -81,10 +106,29 @@ int Colls::find_coll_description(s_mpi_coll_description_t* table, const std::str return -1; } -int(*Colls::gather) (const void *send_buff, int send_count, MPI_Datatype send_type, - void *recv_buff, int recv_count, MPI_Datatype recv_type, - int root, MPI_Comm comm); -void Colls::set_gather(const std::string& name) +int (*colls::gather)(const void* send_buff, int send_count, MPI_Datatype send_type, void* recv_buff, int recv_count, + MPI_Datatype recv_type, int root, MPI_Comm comm); +int (*colls::allgather)(const void* send_buff, int send_count, MPI_Datatype send_type, void* recv_buff, int recv_count, + MPI_Datatype recv_type, MPI_Comm comm); +int (*colls::allgatherv)(const void* send_buff, int send_count, MPI_Datatype send_type, void* recv_buff, + const int* recv_count, const int* recv_disps, MPI_Datatype recv_type, MPI_Comm comm); +int (*colls::alltoall)(const void* send_buff, int send_count, MPI_Datatype send_type, void* recv_buff, int recv_count, + MPI_Datatype recv_type, MPI_Comm comm); +int (*colls::alltoallv)(const void* send_buff, const int* send_counts, const int* send_disps, MPI_Datatype send_type, + void* recv_buff, const int* recv_counts, const int* recv_disps, MPI_Datatype recv_type, + MPI_Comm comm); +int (*colls::bcast)(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm); +int (*colls::reduce)(const void* buf, void* rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); +int (*colls::allreduce)(const void* sbuf, void* rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm); +int (*colls::reduce_scatter)(const void* sbuf, void* rbuf, const int* rcounts, MPI_Datatype dtype, MPI_Op op, + MPI_Comm comm); +int (*colls::scatter)(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, + MPI_Datatype recvtype, int root, MPI_Comm comm); +int (*colls::barrier)(MPI_Comm comm); + +void (*colls::smpi_coll_cleanup_callback)(); + +void colls::set_gather(const std::string& name) { int id = find_coll_description(mpi_coll_gather_description, name, "gather"); gather = reinterpret_cast("smpi/coll-selector"); if (selector_name.empty()) selector_name = "default"; std::pair> setter_callbacks[] = { - {"gather", &Colls::set_gather}, {"allgather", &Colls::set_allgather}, - {"allgatherv", &Colls::set_allgatherv}, {"allreduce", &Colls::set_allreduce}, - {"alltoall", &Colls::set_alltoall}, {"alltoallv", &Colls::set_alltoallv}, - {"reduce", &Colls::set_reduce}, {"reduce_scatter", &Colls::set_reduce_scatter}, - {"scatter", &Colls::set_scatter}, {"bcast", &Colls::set_bcast}, - {"barrier", &Colls::set_barrier}}; + {"gather", &colls::set_gather}, {"allgather", &colls::set_allgather}, + {"allgatherv", &colls::set_allgatherv}, {"allreduce", &colls::set_allreduce}, + {"alltoall", &colls::set_alltoall}, {"alltoallv", &colls::set_alltoallv}, + {"reduce", &colls::set_reduce}, {"reduce_scatter", &colls::set_reduce_scatter}, + {"scatter", &colls::set_scatter}, {"bcast", &colls::set_bcast}, + {"barrier", &colls::set_barrier}}; for (auto& elem : setter_callbacks) { std::string name = simgrid::config::get_value(("smpi/" + elem.first).c_str()); @@ -130,25 +175,23 @@ void Colls::set_collectives(){ //Implementations of the single algorithm collectives -int Colls::gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, - MPI_Datatype recvtype, int root, MPI_Comm comm) +int colls::gatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts, + const int* displs, MPI_Datatype recvtype, int root, MPI_Comm comm) { MPI_Request request; - Colls::igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, &request, 0); + colls::igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, &request, 0); return Request::wait(&request, MPI_STATUS_IGNORE); } - -int Colls::scatterv(const void *sendbuf, const int *sendcounts, const int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, - MPI_Datatype recvtype, int root, MPI_Comm comm) +int colls::scatterv(const void* sendbuf, const int* sendcounts, const int* displs, MPI_Datatype sendtype, void* recvbuf, + int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) { MPI_Request request; - Colls::iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, &request, 0); + colls::iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, &request, 0); return Request::wait(&request, MPI_STATUS_IGNORE); } - -int Colls::scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) +int colls::scan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { int system_tag = -888; MPI_Aint lb = 0; @@ -209,7 +252,7 @@ int Colls::scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype data return MPI_SUCCESS; } -int Colls::exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) +int colls::exscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { int system_tag = -888; MPI_Aint lb = 0; @@ -276,11 +319,13 @@ int Colls::exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype da return MPI_SUCCESS; } -int Colls::alltoallw(const void *sendbuf, const int *sendcounts, const int *senddisps, const MPI_Datatype* sendtypes, - void *recvbuf, const int *recvcounts, const int *recvdisps, const MPI_Datatype* recvtypes, MPI_Comm comm) +int colls::alltoallw(const void* sendbuf, const int* sendcounts, const int* senddisps, const MPI_Datatype* sendtypes, + void* recvbuf, const int* recvcounts, const int* recvdisps, const MPI_Datatype* recvtypes, + MPI_Comm comm) { MPI_Request request; - Colls::ialltoallw(sendbuf, sendcounts, senddisps, sendtypes, recvbuf, recvcounts, recvdisps, recvtypes, comm, &request, 0); + colls::ialltoallw(sendbuf, sendcounts, senddisps, sendtypes, recvbuf, recvcounts, recvdisps, recvtypes, comm, + &request, 0); return Request::wait(&request, MPI_STATUS_IGNORE); } diff --git a/src/smpi/colls/smpi_default_selector.cpp b/src/smpi/colls/smpi_default_selector.cpp index 4b00cd1839..4ca403e633 100644 --- a/src/smpi/colls/smpi_default_selector.cpp +++ b/src/smpi/colls/smpi_default_selector.cpp @@ -26,7 +26,7 @@ int gather__default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) { MPI_Request request; - Colls::igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, &request, 0); + colls::igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, &request, 0); return Request::wait(&request, MPI_STATUS_IGNORE); } @@ -47,7 +47,7 @@ int reduce_scatter__default(const void *sendbuf, void *recvbuf, const int *recvc int ret = reduce__default(sendbuf, tmpbuf, count, datatype, op, 0, comm); if(ret==MPI_SUCCESS) - ret = Colls::scatterv(tmpbuf, recvcounts, displs, datatype, recvbuf, recvcounts[rank], datatype, 0, comm); + ret = colls::scatterv(tmpbuf, recvcounts, displs, datatype, recvbuf, recvcounts[rank], datatype, 0, comm); delete[] displs; smpi_free_tmp_buffer(tmpbuf); return ret; @@ -58,7 +58,7 @@ int allgather__default(const void *sendbuf, int sendcount, MPI_Datatype sendtype void *recvbuf,int recvcount, MPI_Datatype recvtype, MPI_Comm comm) { MPI_Request request; - Colls::iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, &request); + colls::iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, &request); return Request::wait(&request, MPI_STATUS_IGNORE); } @@ -66,7 +66,7 @@ int allgatherv__default(const void *sendbuf, int sendcount, MPI_Datatype sendtyp const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPI_Comm comm) { MPI_Request request; - Colls::iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, &request, 0); + colls::iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, &request, 0); MPI_Request* requests = request->get_nbc_requests(); int count = request->get_nbc_requests_size(); Request::waitall(count, requests, MPI_STATUS_IGNORE); @@ -82,7 +82,7 @@ int scatter__default(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) { MPI_Request request; - Colls::iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, &request, 0); + colls::iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, &request, 0); return Request::wait(&request, MPI_STATUS_IGNORE); } @@ -94,7 +94,7 @@ int reduce__default(const void *sendbuf, void *recvbuf, int count, MPI_Datatype return reduce__ompi_basic_linear(sendbuf, recvbuf, count, datatype, op, root, comm); } MPI_Request request; - Colls::ireduce(sendbuf, recvbuf, count, datatype, op, root, comm, &request, 0); + colls::ireduce(sendbuf, recvbuf, count, datatype, op, root, comm, &request, 0); return Request::wait(&request, MPI_STATUS_IGNORE); } @@ -119,7 +119,8 @@ int alltoallv__default(const void *sendbuf, const int *sendcounts, const int *se void *recvbuf, const int *recvcounts, const int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm) { MPI_Request request; - Colls::ialltoallv(sendbuf, sendcounts, senddisps, sendtype, recvbuf, recvcounts, recvdisps, recvtype, comm, &request, 0); + colls::ialltoallv(sendbuf, sendcounts, senddisps, sendtype, recvbuf, recvcounts, recvdisps, recvtype, comm, &request, + 0); return Request::wait(&request, MPI_STATUS_IGNORE); } diff --git a/src/smpi/colls/smpi_mvapich2_selector_stampede.hpp b/src/smpi/colls/smpi_mvapich2_selector_stampede.hpp index 25df8f1647..e859efc44c 100644 --- a/src/smpi/colls/smpi_mvapich2_selector_stampede.hpp +++ b/src/smpi/colls/smpi_mvapich2_selector_stampede.hpp @@ -53,8 +53,8 @@ static void init_mv2_alltoall_tables_stampede() int agg_table_sum = 0; mv2_alltoall_tuning_table** table_ptrs = NULL; mv2_alltoall_num_ppn_conf = 3; - if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL) - simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; + if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL) + simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; mv2_alltoall_thresholds_table = new mv2_alltoall_tuning_table*[mv2_alltoall_num_ppn_conf]; table_ptrs = new mv2_alltoall_tuning_table*[mv2_alltoall_num_ppn_conf]; mv2_size_alltoall_tuning_table = new int[mv2_alltoall_num_ppn_conf]; @@ -367,8 +367,8 @@ static void init_mv2_allgather_tables_stampede() { int agg_table_sum = 0; - if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL) - simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; + if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL) + simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; mv2_allgather_num_ppn_conf = 3; mv2_allgather_thresholds_table = new mv2_allgather_tuning_table*[mv2_allgather_num_ppn_conf]; mv2_allgather_tuning_table** table_ptrs = new mv2_allgather_tuning_table*[mv2_allgather_num_ppn_conf]; @@ -590,8 +590,8 @@ MV2_Gather_function_ptr MV2_Gather_intra_node_function = NULL; static void init_mv2_gather_tables_stampede() { - if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL) - simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; + if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL) + simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; mv2_size_gather_tuning_table = 7; mv2_gather_thresholds_table = new mv2_gather_tuning_table[mv2_size_gather_tuning_table]; mv2_gather_tuning_table mv2_tmp_gather_thresholds_table[] = { @@ -674,8 +674,8 @@ mv2_allgatherv_tuning_table* mv2_allgatherv_thresholds_table = NULL; static void init_mv2_allgatherv_tables_stampede() { - if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL) - simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; + if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL) + simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; mv2_size_allgatherv_tuning_table = 6; mv2_allgatherv_thresholds_table = new mv2_allgatherv_tuning_table[mv2_size_allgatherv_tuning_table]; mv2_allgatherv_tuning_table mv2_tmp_allgatherv_thresholds_table[] = { @@ -769,14 +769,14 @@ static int MPIR_Allreduce_mcst_reduce_redscat_gather_MV2(const void* sendbuf, vo static int MPIR_Allreduce_reduce_p2p_MV2(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - simgrid::smpi::Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm); + simgrid::smpi::colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm); return MPI_SUCCESS; } static int MPIR_Allreduce_reduce_shmem_MV2(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - simgrid::smpi::Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm); + simgrid::smpi::colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm); return MPI_SUCCESS; } @@ -786,8 +786,8 @@ static int MPIR_Allreduce_reduce_shmem_MV2(const void* sendbuf, void* recvbuf, i static void init_mv2_allreduce_tables_stampede() { - if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL) - simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; + if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL) + simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; mv2_size_allreduce_tuning_table = 8; mv2_allreduce_thresholds_table = new mv2_allreduce_tuning_table[mv2_size_allreduce_tuning_table]; mv2_allreduce_tuning_table mv2_tmp_allreduce_thresholds_table[] = { @@ -970,8 +970,8 @@ int mv2_intra_node_knomial_factor = 4; static void init_mv2_bcast_tables_stampede() { // Stampede, - if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL) - simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; + if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL) + simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; mv2_size_bcast_tuning_table = 8; mv2_bcast_thresholds_table = new mv2_bcast_tuning_table[mv2_size_bcast_tuning_table]; @@ -1185,8 +1185,8 @@ int (*MV2_Reduce_intra_function)(const void* sendbuf, void* recvbuf, int count, static void init_mv2_reduce_tables_stampede() { - if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL) - simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; + if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL) + simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; /*Stampede*/ mv2_size_reduce_tuning_table = 8; mv2_reduce_thresholds_table = new mv2_reduce_tuning_table[mv2_size_reduce_tuning_table]; @@ -1409,8 +1409,8 @@ static int MPIR_Reduce_Scatter_Basic_MV2(const void* sendbuf, void* recvbuf, con static void init_mv2_reduce_scatter_tables_stampede() { - if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL) - simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; + if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL) + simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; mv2_size_red_scat_tuning_table = 6; mv2_red_scat_thresholds_table = new mv2_red_scat_tuning_table[mv2_size_red_scat_tuning_table]; mv2_red_scat_tuning_table mv2_tmp_red_scat_thresholds_table[] = { @@ -1510,8 +1510,8 @@ int MPIR_Scatter_mcst_wrap_MV2(const void* sendbuf, int sendcnt, MPI_Datatype se static void init_mv2_scatter_tables_stampede() { - if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL) - simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; + if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL) + simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2; int agg_table_sum = 0; mv2_scatter_num_ppn_conf = 3; diff --git a/src/smpi/colls/smpi_nbc_impl.cpp b/src/smpi/colls/smpi_nbc_impl.cpp index 22301c0fcc..a56f631acf 100644 --- a/src/smpi/colls/smpi_nbc_impl.cpp +++ b/src/smpi/colls/smpi_nbc_impl.cpp @@ -11,8 +11,7 @@ namespace simgrid{ namespace smpi{ - -int Colls::ibarrier(MPI_Comm comm, MPI_Request* request, int external) +int colls::ibarrier(MPI_Comm comm, MPI_Request* request, int external) { int size = comm->size(); int rank = comm->rank(); @@ -40,7 +39,8 @@ int Colls::ibarrier(MPI_Comm comm, MPI_Request* request, int external) return MPI_SUCCESS; } -int Colls::ibcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm, MPI_Request* request, int external) +int colls::ibcast(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm, MPI_Request* request, + int external) { int size = comm->size(); int rank = comm->rank(); @@ -71,8 +71,8 @@ int Colls::ibcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Com return MPI_SUCCESS; } -int Colls::iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf,int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external) +int colls::iallgather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external) { const int system_tag = COLL_TAG_ALLGATHER-external; @@ -105,8 +105,8 @@ int Colls::iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, return MPI_SUCCESS; } -int Colls::iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, int external) +int colls::iscatter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, + MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, int external) { const int system_tag = COLL_TAG_SCATTER-external; MPI_Aint lb = 0; @@ -145,8 +145,8 @@ int Colls::iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, return MPI_SUCCESS; } -int Colls::iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, - const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external) +int colls::iallgatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts, + const int* displs, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external) { const int system_tag = COLL_TAG_ALLGATHERV-external; MPI_Aint lb = 0; @@ -179,7 +179,9 @@ int Colls::iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype return MPI_SUCCESS; } -int Colls::ialltoall( const void *sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external){ +int colls::ialltoall(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external) +{ int system_tag = COLL_TAG_ALLTOALL-external; MPI_Aint lb = 0; MPI_Aint sendext = 0; @@ -222,8 +224,10 @@ int Colls::ialltoall( const void *sendbuf, int sendcount, MPI_Datatype sendtype, return MPI_SUCCESS; } -int Colls::ialltoallv(const void *sendbuf, const int *sendcounts, const int *senddisps, MPI_Datatype sendtype, - void *recvbuf, const int *recvcounts, const int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request, int external){ +int colls::ialltoallv(const void* sendbuf, const int* sendcounts, const int* senddisps, MPI_Datatype sendtype, + void* recvbuf, const int* recvcounts, const int* recvdisps, MPI_Datatype recvtype, MPI_Comm comm, + MPI_Request* request, int external) +{ const int system_tag = COLL_TAG_ALLTOALLV-external; MPI_Aint lb = 0; MPI_Aint sendext = 0; @@ -270,8 +274,10 @@ int Colls::ialltoallv(const void *sendbuf, const int *sendcounts, const int *sen return err; } -int Colls::ialltoallw(const void *sendbuf, const int *sendcounts, const int *senddisps, const MPI_Datatype* sendtypes, - void *recvbuf, const int *recvcounts, const int *recvdisps, const MPI_Datatype* recvtypes, MPI_Comm comm, MPI_Request *request, int external){ +int colls::ialltoallw(const void* sendbuf, const int* sendcounts, const int* senddisps, const MPI_Datatype* sendtypes, + void* recvbuf, const int* recvcounts, const int* recvdisps, const MPI_Datatype* recvtypes, + MPI_Comm comm, MPI_Request* request, int external) +{ const int system_tag = COLL_TAG_ALLTOALLW-external; /* Initialize. */ @@ -313,8 +319,8 @@ int Colls::ialltoallw(const void *sendbuf, const int *sendcounts, const int *sen return err; } -int Colls::igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request, int external) +int colls::igather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, + MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, int external) { const int system_tag = COLL_TAG_GATHER-external; MPI_Aint lb = 0; @@ -351,8 +357,9 @@ int Colls::igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, return MPI_SUCCESS; } -int Colls::igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, - MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request, int external) +int colls::igatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts, + const int* displs, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, + int external) { int system_tag = COLL_TAG_GATHERV-external; MPI_Aint lb = 0; @@ -388,8 +395,9 @@ int Colls::igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, v } return MPI_SUCCESS; } -int Colls::iscatterv(const void *sendbuf, const int *sendcounts, const int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, - MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request, int external) +int colls::iscatterv(const void* sendbuf, const int* sendcounts, const int* displs, MPI_Datatype sendtype, + void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, + int external) { int system_tag = COLL_TAG_SCATTERV-external; MPI_Aint lb = 0; @@ -428,8 +436,8 @@ int Colls::iscatterv(const void *sendbuf, const int *sendcounts, const int *disp return MPI_SUCCESS; } -int Colls::ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, - MPI_Comm comm, MPI_Request* request, int external) +int colls::ireduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, + MPI_Comm comm, MPI_Request* request, int external) { const int system_tag = COLL_TAG_REDUCE-external; MPI_Aint lb = 0; @@ -488,8 +496,8 @@ int Colls::ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype d return MPI_SUCCESS; } -int Colls::iallreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, - MPI_Op op, MPI_Comm comm, MPI_Request* request, int external) +int colls::iallreduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, + MPI_Request* request, int external) { const int system_tag = COLL_TAG_ALLREDUCE-external; @@ -521,7 +529,8 @@ int Colls::iallreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatyp return MPI_SUCCESS; } -int Colls::iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request* request, int external) +int colls::iscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, + MPI_Request* request, int external) { int system_tag = -888-external; MPI_Aint lb = 0; @@ -553,7 +562,8 @@ int Colls::iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat return MPI_SUCCESS; } -int Colls::iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request* request, int external) +int colls::iexscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, + MPI_Request* request, int external) { int system_tag = -888-external; MPI_Aint lb = 0; @@ -583,9 +593,10 @@ int Colls::iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype d return MPI_SUCCESS; } -int Colls::ireduce_scatter(const void *sendbuf, void *recvbuf, const int *recvcounts, MPI_Datatype datatype, MPI_Op op, - MPI_Comm comm, MPI_Request* request, int external){ -//Version where each process performs the reduce for its own part. Alltoall pattern for comms. +int colls::ireduce_scatter(const void* sendbuf, void* recvbuf, const int* recvcounts, MPI_Datatype datatype, MPI_Op op, + MPI_Comm comm, MPI_Request* request, int external) +{ + // Version where each process performs the reduce for its own part. Alltoall pattern for comms. const int system_tag = COLL_TAG_REDUCE_SCATTER-external; MPI_Aint lb = 0; MPI_Aint dataext = 0; diff --git a/src/smpi/include/smpi_coll.hpp b/src/smpi/include/smpi_coll.hpp index 02644788e8..98df0b530c 100644 --- a/src/smpi/include/smpi_coll.hpp +++ b/src/smpi/include/smpi_coll.hpp @@ -14,12 +14,10 @@ /** @brief MPI collective description */ #define COLL_DEFS(cat, ret, args, args2) \ - static void _XBT_CONCAT(set_, cat)(const std::string& name); \ - static s_mpi_coll_description_t _XBT_CONCAT3(mpi_coll_, cat, _description)[]; \ - static int(*cat) args; + void _XBT_CONCAT(set_, cat)(const std::string& name); \ + extern int(*cat) args; -#define COLL_SIG(cat, ret, args, args2)\ - static int cat args; +#define COLL_SIG(cat, ret, args, args2) int cat args; #define COLL_DESCRIPTION(cat, ret, args, name) \ { \ @@ -85,80 +83,77 @@ struct s_mpi_coll_description_t { void *coll; }; -class Colls{ -public: - static XBT_PUBLIC void coll_help(const char* category, s_mpi_coll_description_t* table); - static XBT_PUBLIC int find_coll_description(s_mpi_coll_description_t* table, const std::string& name, - const char* desc); - static void set_collectives(); - - // for each collective type, create the set_* prototype, the description array and the function pointer -// static void set_gather(const std::string& name); -// static s_mpi_coll_description_t mpi_coll_gather_description[]; -// static int(*gather)(const void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, +namespace colls { +XBT_PUBLIC void coll_help(const char* category, s_mpi_coll_description_t* table); +XBT_PUBLIC int find_coll_description(s_mpi_coll_description_t* table, const std::string& name, const char* desc); +void set_collectives(); +XBT_PUBLIC s_mpi_coll_description_t* get_smpi_coll_description(const char* name, int rank); + +// for each collective type, create the set_* prototype, the description array and the function pointer +// void set_gather(const std::string& name); +// extern int(*gather)(const void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, +// MPI_Datatype recv_type, // int root, MPI_Comm comm); - COLL_APPLY(COLL_DEFS, COLL_GATHER_SIG, "") - COLL_APPLY(COLL_DEFS, COLL_ALLGATHER_SIG, "") - COLL_APPLY(COLL_DEFS, COLL_ALLGATHERV_SIG, "") - COLL_APPLY(COLL_DEFS, COLL_REDUCE_SIG, "") - COLL_APPLY(COLL_DEFS, COLL_ALLREDUCE_SIG, "") - COLL_APPLY(COLL_DEFS, COLL_REDUCE_SCATTER_SIG, "") - COLL_APPLY(COLL_DEFS, COLL_SCATTER_SIG, "") - COLL_APPLY(COLL_DEFS, COLL_BARRIER_SIG, "") - COLL_APPLY(COLL_DEFS, COLL_BCAST_SIG, "") - COLL_APPLY(COLL_DEFS, COLL_ALLTOALL_SIG, "") - COLL_APPLY(COLL_DEFS, COLL_ALLTOALLV_SIG, "") - - // These fairly unused collectives only have one implementation in SMPI - static int gatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts, const int* displs, - MPI_Datatype recvtype, int root, MPI_Comm comm); - static int scatterv(const void* sendbuf, const int* sendcounts, const int* displs, MPI_Datatype sendtype, void* recvbuf, int recvcount, - MPI_Datatype recvtype, int root, MPI_Comm comm); - static int scan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); - static int exscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); - static int alltoallw - (const void* sendbuf, const int* sendcounts, const int* senddisps, const MPI_Datatype* sendtypes, void* recvbuf, const int* recvcounts, - const int* recvdisps, const MPI_Datatype* recvtypes, MPI_Comm comm); - - //async collectives - static int ibarrier(MPI_Comm comm, MPI_Request* request, int external=1); - static int ibcast(void *buf, int count, MPI_Datatype datatype, - int root, MPI_Comm comm, MPI_Request* request, int external=1); - static int igather (const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, - MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request, int external=1); - static int igatherv (const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, - const int* recvcounts, const int* displs, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request, int external=1); - static int iallgather (const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, - int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request, int external=1); - static int iallgatherv (const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, - const int* recvcounts, const int* displs, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request, int external=1); - static int iscatter (const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, - int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request, int external=1); - static int iscatterv (const void* sendbuf, const int* sendcounts, const int* displs, MPI_Datatype sendtype, - void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request, int external=1); - static int ireduce - (const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, MPI_Request *request, int external=1); - static int iallreduce - (const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request, int external=1); - static int iscan - (const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request, int external=1); - static int iexscan - (const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request, int external=1); - static int ireduce_scatter - (const void* sendbuf, void* recvbuf, const int* recvcounts, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request, int external=1); - static int ireduce_scatter_block - (const void* sendbuf, void* recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request, int external=1); - static int ialltoall (const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, - int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request, int external=1); - static int ialltoallv - (const void* sendbuf, const int* sendcounts, const int* senddisps, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts, - const int* recvdisps, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request, int external=1); - static int ialltoallw - (const void* sendbuf, const int* sendcounts, const int* senddisps, const MPI_Datatype* sendtypes, void* recvbuf, const int* recvcounts, - const int* recvdisps, const MPI_Datatype* recvtypes, MPI_Comm comm, MPI_Request *request, int external=1); - - - static void (*smpi_coll_cleanup_callback)(); +COLL_APPLY(COLL_DEFS, COLL_GATHER_SIG, "") +COLL_APPLY(COLL_DEFS, COLL_ALLGATHER_SIG, "") +COLL_APPLY(COLL_DEFS, COLL_ALLGATHERV_SIG, "") +COLL_APPLY(COLL_DEFS, COLL_REDUCE_SIG, "") +COLL_APPLY(COLL_DEFS, COLL_ALLREDUCE_SIG, "") +COLL_APPLY(COLL_DEFS, COLL_REDUCE_SCATTER_SIG, "") +COLL_APPLY(COLL_DEFS, COLL_SCATTER_SIG, "") +COLL_APPLY(COLL_DEFS, COLL_BARRIER_SIG, "") +COLL_APPLY(COLL_DEFS, COLL_BCAST_SIG, "") +COLL_APPLY(COLL_DEFS, COLL_ALLTOALL_SIG, "") +COLL_APPLY(COLL_DEFS, COLL_ALLTOALLV_SIG, "") + +// These fairly unused collectives only have one implementation in SMPI +int gatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts, + const int* displs, MPI_Datatype recvtype, int root, MPI_Comm comm); +int scatterv(const void* sendbuf, const int* sendcounts, const int* displs, MPI_Datatype sendtype, void* recvbuf, + int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); +int scan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); +int exscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); +int alltoallw(const void* sendbuf, const int* sendcounts, const int* senddisps, const MPI_Datatype* sendtypes, + void* recvbuf, const int* recvcounts, const int* recvdisps, const MPI_Datatype* recvtypes, MPI_Comm comm); + +// async collectives +int ibarrier(MPI_Comm comm, MPI_Request* request, int external = 1); +int ibcast(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm, MPI_Request* request, + int external = 1); +int igather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, + MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, int external = 1); +int igatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts, + const int* displs, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, int external = 1); +int iallgather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external = 1); +int iallgatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts, + const int* displs, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external = 1); +int iscatter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, + MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, int external = 1); +int iscatterv(const void* sendbuf, const int* sendcounts, const int* displs, MPI_Datatype sendtype, void* recvbuf, + int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, int external = 1); +int ireduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, + MPI_Request* request, int external = 1); +int iallreduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, + MPI_Request* request, int external = 1); +int iscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, + MPI_Request* request, int external = 1); +int iexscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, + MPI_Request* request, int external = 1); +int ireduce_scatter(const void* sendbuf, void* recvbuf, const int* recvcounts, MPI_Datatype datatype, MPI_Op op, + MPI_Comm comm, MPI_Request* request, int external = 1); +int ireduce_scatter_block(const void* sendbuf, void* recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, + MPI_Comm comm, MPI_Request* request, int external = 1); +int ialltoall(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external = 1); +int ialltoallv(const void* sendbuf, const int* sendcounts, const int* senddisps, MPI_Datatype sendtype, void* recvbuf, + const int* recvcounts, const int* recvdisps, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, + int external = 1); +int ialltoallw(const void* sendbuf, const int* sendcounts, const int* senddisps, const MPI_Datatype* sendtypes, + void* recvbuf, const int* recvcounts, const int* recvdisps, const MPI_Datatype* recvtypes, MPI_Comm comm, + MPI_Request* request, int external = 1); + +extern void (*smpi_coll_cleanup_callback)(); }; /************* diff --git a/src/smpi/include/smpi_file.hpp b/src/smpi/include/smpi_file.hpp index ccb8f7fa96..1584a22b32 100644 --- a/src/smpi/include/smpi_file.hpp +++ b/src/smpi/include/smpi_file.hpp @@ -74,8 +74,8 @@ class File{ MPI_Offset max_offset = (min_offset + count * datatype->size());//cheating, as we don't care about exact data location, we can skip extent MPI_Offset* min_offsets = new MPI_Offset[size]; MPI_Offset* max_offsets = new MPI_Offset[size]; - simgrid::smpi::Colls::allgather(&min_offset, 1, MPI_OFFSET, min_offsets, 1, MPI_OFFSET, comm_); - simgrid::smpi::Colls::allgather(&max_offset, 1, MPI_OFFSET, max_offsets, 1, MPI_OFFSET, comm_); + simgrid::smpi::colls::allgather(&min_offset, 1, MPI_OFFSET, min_offsets, 1, MPI_OFFSET, comm_); + simgrid::smpi::colls::allgather(&max_offset, 1, MPI_OFFSET, max_offsets, 1, MPI_OFFSET, comm_); MPI_Offset min=min_offset; MPI_Offset max=max_offset; MPI_Offset tot= 0; @@ -171,15 +171,15 @@ class File{ seek(min_offset, MPI_SEEK_SET); T(this,sendbuf,totreads/datatype->size(),datatype, status); } - simgrid::smpi::Colls::alltoall(send_sizes, 1, MPI_INT, recv_sizes, 1, MPI_INT, comm_); + simgrid::smpi::colls::alltoall(send_sizes, 1, MPI_INT, recv_sizes, 1, MPI_INT, comm_); int total_recv=0; for(int i=0;icount=count * datatype->size(); smpi_free_tmp_buffer(sendbuf); delete[] send_sizes; diff --git a/src/smpi/internals/smpi_global.cpp b/src/smpi/internals/smpi_global.cpp index 38d58de5c2..1f64139005 100644 --- a/src/smpi/internals/smpi_global.cpp +++ b/src/smpi/internals/smpi_global.cpp @@ -330,8 +330,8 @@ static void smpi_init_options(){ // return if already called if (smpi_cpu_threshold > -1) return; - simgrid::smpi::Colls::set_collectives(); - simgrid::smpi::Colls::smpi_coll_cleanup_callback = nullptr; + simgrid::smpi::colls::set_collectives(); + simgrid::smpi::colls::smpi_coll_cleanup_callback = nullptr; smpi_cpu_threshold = simgrid::config::get_value("smpi/cpu-threshold"); if (smpi_cpu_threshold < 0) smpi_cpu_threshold = DBL_MAX; @@ -718,8 +718,8 @@ void SMPI_finalize() smpi_shared_destroy(); smpi_deployment_cleanup_instances(); - if (simgrid::smpi::Colls::smpi_coll_cleanup_callback != nullptr) - simgrid::smpi::Colls::smpi_coll_cleanup_callback(); + if (simgrid::smpi::colls::smpi_coll_cleanup_callback != nullptr) + simgrid::smpi::colls::smpi_coll_cleanup_callback(); MPI_COMM_WORLD = MPI_COMM_NULL; diff --git a/src/smpi/internals/smpi_replay.cpp b/src/smpi/internals/smpi_replay.cpp index 2af987f919..f0f6dd2520 100644 --- a/src/smpi/internals/smpi_replay.cpp +++ b/src/smpi/internals/smpi_replay.cpp @@ -577,7 +577,7 @@ void WaitAllAction::kernel(simgrid::xbt::ReplayAction&) void BarrierAction::kernel(simgrid::xbt::ReplayAction&) { TRACE_smpi_comm_in(my_proc_id, __func__, new simgrid::instr::NoOpTIData("barrier")); - Colls::barrier(MPI_COMM_WORLD); + colls::barrier(MPI_COMM_WORLD); TRACE_smpi_comm_out(my_proc_id); } @@ -587,7 +587,7 @@ void BcastAction::kernel(simgrid::xbt::ReplayAction&) new simgrid::instr::CollTIData("bcast", MPI_COMM_WORLD->group()->actor(args.root)->get_pid(), -1.0, args.size, -1, Datatype::encode(args.datatype1), "")); - Colls::bcast(send_buffer(args.size * args.datatype1->size()), args.size, args.datatype1, args.root, MPI_COMM_WORLD); + colls::bcast(send_buffer(args.size * args.datatype1->size()), args.size, args.datatype1, args.root, MPI_COMM_WORLD); TRACE_smpi_comm_out(my_proc_id); } @@ -599,8 +599,9 @@ void ReduceAction::kernel(simgrid::xbt::ReplayAction&) args.comp_size, args.comm_size, -1, Datatype::encode(args.datatype1), "")); - Colls::reduce(send_buffer(args.comm_size * args.datatype1->size()), - recv_buffer(args.comm_size * args.datatype1->size()), args.comm_size, args.datatype1, MPI_OP_NULL, args.root, MPI_COMM_WORLD); + colls::reduce(send_buffer(args.comm_size * args.datatype1->size()), + recv_buffer(args.comm_size * args.datatype1->size()), args.comm_size, args.datatype1, MPI_OP_NULL, + args.root, MPI_COMM_WORLD); private_execute_flops(args.comp_size); TRACE_smpi_comm_out(my_proc_id); @@ -611,8 +612,9 @@ void AllReduceAction::kernel(simgrid::xbt::ReplayAction&) TRACE_smpi_comm_in(my_proc_id, "action_allreduce", new simgrid::instr::CollTIData("allreduce", -1, args.comp_size, args.comm_size, -1, Datatype::encode(args.datatype1), "")); - Colls::allreduce(send_buffer(args.comm_size * args.datatype1->size()), - recv_buffer(args.comm_size * args.datatype1->size()), args.comm_size, args.datatype1, MPI_OP_NULL, MPI_COMM_WORLD); + colls::allreduce(send_buffer(args.comm_size * args.datatype1->size()), + recv_buffer(args.comm_size * args.datatype1->size()), args.comm_size, args.datatype1, MPI_OP_NULL, + MPI_COMM_WORLD); private_execute_flops(args.comp_size); TRACE_smpi_comm_out(my_proc_id); @@ -625,9 +627,9 @@ void AllToAllAction::kernel(simgrid::xbt::ReplayAction&) Datatype::encode(args.datatype1), Datatype::encode(args.datatype2))); - Colls::alltoall(send_buffer(args.send_size * args.comm_size * args.datatype1->size()), args.send_size, - args.datatype1, recv_buffer(args.recv_size * args.comm_size * args.datatype2->size()), - args.recv_size, args.datatype2, MPI_COMM_WORLD); + colls::alltoall(send_buffer(args.send_size * args.comm_size * args.datatype1->size()), args.send_size, args.datatype1, + recv_buffer(args.recv_size * args.comm_size * args.datatype2->size()), args.recv_size, args.datatype2, + MPI_COMM_WORLD); TRACE_smpi_comm_out(my_proc_id); } @@ -639,12 +641,14 @@ void GatherAction::kernel(simgrid::xbt::ReplayAction&) if (name == "gather") { int rank = MPI_COMM_WORLD->rank(); - Colls::gather(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1, - (rank == args.root) ? recv_buffer(args.recv_size * args.comm_size * args.datatype2->size()) : nullptr, args.recv_size, args.datatype2, args.root, MPI_COMM_WORLD); + colls::gather(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1, + (rank == args.root) ? recv_buffer(args.recv_size * args.comm_size * args.datatype2->size()) : nullptr, + args.recv_size, args.datatype2, args.root, MPI_COMM_WORLD); } else - Colls::allgather(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1, - recv_buffer(args.recv_size * args.datatype2->size()), args.recv_size, args.datatype2, MPI_COMM_WORLD); + colls::allgather(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1, + recv_buffer(args.recv_size * args.datatype2->size()), args.recv_size, args.datatype2, + MPI_COMM_WORLD); TRACE_smpi_comm_out(my_proc_id); } @@ -658,14 +662,14 @@ void GatherVAction::kernel(simgrid::xbt::ReplayAction&) Datatype::encode(args.datatype1), Datatype::encode(args.datatype2))); if (name == "gatherv") { - Colls::gatherv(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1, - (rank == args.root) ? recv_buffer(args.recv_size_sum * args.datatype2->size()) : nullptr, - args.recvcounts->data(), args.disps.data(), args.datatype2, args.root, MPI_COMM_WORLD); + colls::gatherv(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1, + (rank == args.root) ? recv_buffer(args.recv_size_sum * args.datatype2->size()) : nullptr, + args.recvcounts->data(), args.disps.data(), args.datatype2, args.root, MPI_COMM_WORLD); } else { - Colls::allgatherv(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1, - recv_buffer(args.recv_size_sum * args.datatype2->size()), args.recvcounts->data(), - args.disps.data(), args.datatype2, MPI_COMM_WORLD); + colls::allgatherv(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1, + recv_buffer(args.recv_size_sum * args.datatype2->size()), args.recvcounts->data(), + args.disps.data(), args.datatype2, MPI_COMM_WORLD); } TRACE_smpi_comm_out(my_proc_id); @@ -678,8 +682,9 @@ void ScatterAction::kernel(simgrid::xbt::ReplayAction&) Datatype::encode(args.datatype1), Datatype::encode(args.datatype2))); - Colls::scatter(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1, - (rank == args.root) ? recv_buffer(args.recv_size * args.datatype2->size()) : nullptr, args.recv_size, args.datatype2, args.root, MPI_COMM_WORLD); + colls::scatter(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1, + (rank == args.root) ? recv_buffer(args.recv_size * args.datatype2->size()) : nullptr, args.recv_size, + args.datatype2, args.root, MPI_COMM_WORLD); TRACE_smpi_comm_out(my_proc_id); } @@ -691,10 +696,10 @@ void ScatterVAction::kernel(simgrid::xbt::ReplayAction&) nullptr, Datatype::encode(args.datatype1), Datatype::encode(args.datatype2))); - Colls::scatterv((rank == args.root) ? send_buffer(args.send_size_sum * args.datatype1->size()) : nullptr, - args.sendcounts->data(), args.disps.data(), args.datatype1, - recv_buffer(args.recv_size * args.datatype2->size()), args.recv_size, args.datatype2, args.root, - MPI_COMM_WORLD); + colls::scatterv((rank == args.root) ? send_buffer(args.send_size_sum * args.datatype1->size()) : nullptr, + args.sendcounts->data(), args.disps.data(), args.datatype1, + recv_buffer(args.recv_size * args.datatype2->size()), args.recv_size, args.datatype2, args.root, + MPI_COMM_WORLD); TRACE_smpi_comm_out(my_proc_id); } @@ -706,9 +711,9 @@ void ReduceScatterAction::kernel(simgrid::xbt::ReplayAction&) std::to_string(args.comp_size), /* ugly hack to print comp_size */ Datatype::encode(args.datatype1))); - Colls::reduce_scatter(send_buffer(args.recv_size_sum * args.datatype1->size()), - recv_buffer(args.recv_size_sum * args.datatype1->size()), args.recvcounts->data(), - args.datatype1, MPI_OP_NULL, MPI_COMM_WORLD); + colls::reduce_scatter(send_buffer(args.recv_size_sum * args.datatype1->size()), + recv_buffer(args.recv_size_sum * args.datatype1->size()), args.recvcounts->data(), + args.datatype1, MPI_OP_NULL, MPI_COMM_WORLD); private_execute_flops(args.comp_size); TRACE_smpi_comm_out(my_proc_id); @@ -721,8 +726,9 @@ void AllToAllVAction::kernel(simgrid::xbt::ReplayAction&) "alltoallv", -1, args.send_size_sum, args.sendcounts, args.recv_size_sum, args.recvcounts, Datatype::encode(args.datatype1), Datatype::encode(args.datatype2))); - Colls::alltoallv(send_buffer(args.send_buf_size * args.datatype1->size()), args.sendcounts->data(), args.senddisps.data(), args.datatype1, - recv_buffer(args.recv_buf_size * args.datatype2->size()), args.recvcounts->data(), args.recvdisps.data(), args.datatype2, MPI_COMM_WORLD); + colls::alltoallv(send_buffer(args.send_buf_size * args.datatype1->size()), args.sendcounts->data(), + args.senddisps.data(), args.datatype1, recv_buffer(args.recv_buf_size * args.datatype2->size()), + args.recvcounts->data(), args.recvdisps.data(), args.datatype2, MPI_COMM_WORLD); TRACE_smpi_comm_out(my_proc_id); } diff --git a/src/smpi/mpi/smpi_comm.cpp b/src/smpi/mpi/smpi_comm.cpp index fdd8b69d2a..3507333c9c 100644 --- a/src/smpi/mpi/smpi_comm.cpp +++ b/src/smpi/mpi/smpi_comm.cpp @@ -48,10 +48,10 @@ Comm::Comm(MPI_Group group, MPI_Topology topo, int smp, int in_id) : group_(grou id=global_id_; global_id_++; } - Colls::bcast(&id, 1, MPI_INT, 0, this); + colls::bcast(&id, 1, MPI_INT, 0, this); XBT_DEBUG("Communicator %p has id %d", this, id); id_=id;//only set here, as we don't want to change it in the middle of the bcast - Colls::barrier(this); + colls::barrier(this); } } diff --git a/src/smpi/mpi/smpi_file.cpp b/src/smpi/mpi/smpi_file.cpp index b47984eec6..b0596cb1ae 100644 --- a/src/smpi/mpi/smpi_file.cpp +++ b/src/smpi/mpi/smpi_file.cpp @@ -39,8 +39,8 @@ namespace smpi{ }else{ win_=new Win(list_, 0, 1, MPI_INFO_NULL, comm_); } - simgrid::smpi::Colls::bcast(&shared_file_pointer_, 1, MPI_AINT, 0, comm); - simgrid::smpi::Colls::bcast(&shared_mutex_, 1, MPI_AINT, 0, comm); + simgrid::smpi::colls::bcast(&shared_file_pointer_, 1, MPI_AINT, 0, comm); + simgrid::smpi::colls::bcast(&shared_mutex_, 1, MPI_AINT, 0, comm); if(comm_->rank() != 0) intrusive_ptr_add_ref(&*shared_mutex_); } @@ -157,7 +157,7 @@ namespace smpi{ } MPI_Offset result; - simgrid::smpi::Colls::scan(&val, &result, 1, MPI_OFFSET, MPI_SUM, fh->comm_); + simgrid::smpi::colls::scan(&val, &result, 1, MPI_OFFSET, MPI_SUM, fh->comm_); fh->seek(result, MPI_SEEK_SET); int ret = fh->op_all(buf, count, datatype, status); if(fh->comm_->rank()==fh->comm_->size()-1){ @@ -166,7 +166,7 @@ namespace smpi{ fh->shared_mutex_->unlock(); } char c; - simgrid::smpi::Colls::bcast(&c, 1, MPI_BYTE, fh->comm_->size()-1, fh->comm_); + simgrid::smpi::colls::bcast(&c, 1, MPI_BYTE, fh->comm_->size() - 1, fh->comm_); return ret; } @@ -205,7 +205,7 @@ namespace smpi{ val=count*datatype->size(); } MPI_Offset result; - simgrid::smpi::Colls::scan(&val, &result, 1, MPI_OFFSET, MPI_SUM, fh->comm_); + simgrid::smpi::colls::scan(&val, &result, 1, MPI_OFFSET, MPI_SUM, fh->comm_); fh->seek(result, MPI_SEEK_SET); int ret = fh->op_all(const_cast(buf), count, datatype, status); if(fh->comm_->rank()==fh->comm_->size()-1){ @@ -214,7 +214,7 @@ namespace smpi{ fh->shared_mutex_->unlock(); } char c; - simgrid::smpi::Colls::bcast(&c, 1, MPI_BYTE, fh->comm_->size()-1, fh->comm_); + simgrid::smpi::colls::bcast(&c, 1, MPI_BYTE, fh->comm_->size() - 1, fh->comm_); return ret; } @@ -228,7 +228,7 @@ namespace smpi{ int File::sync(){ //no idea - return simgrid::smpi::Colls::barrier(comm_); + return simgrid::smpi::colls::barrier(comm_); } MPI_Info File::info(){ diff --git a/src/smpi/mpi/smpi_win.cpp b/src/smpi/mpi/smpi_win.cpp index f2adc8c99d..3685c7a6c9 100644 --- a/src/smpi/mpi/smpi_win.cpp +++ b/src/smpi/mpi/smpi_win.cpp @@ -46,12 +46,12 @@ Win::Win(void *base, MPI_Aint size, int disp_unit, MPI_Info info, MPI_Comm comm, comm->add_rma_win(this); comm->ref(); - Colls::allgather(&(connected_wins_[rank_]), sizeof(MPI_Win), MPI_BYTE, connected_wins_, sizeof(MPI_Win), - MPI_BYTE, comm); + colls::allgather(&(connected_wins_[rank_]), sizeof(MPI_Win), MPI_BYTE, connected_wins_, sizeof(MPI_Win), MPI_BYTE, + comm); - Colls::bcast(&(bar_), sizeof(s4u::Barrier*), MPI_BYTE, 0, comm); + colls::bcast(&(bar_), sizeof(s4u::Barrier*), MPI_BYTE, 0, comm); - Colls::barrier(comm); + colls::barrier(comm); } Win::~Win(){ @@ -72,7 +72,7 @@ Win::~Win(){ comm_->remove_rma_win(this); - Colls::barrier(comm_); + colls::barrier(comm_); Comm::unref(comm_); if (rank_ == 0)