From: Augustin Degomme Date: Thu, 28 May 2020 15:30:51 +0000 (+0200) Subject: smpi topo : a topo can be shared between some comm (duplicates), but there was no... X-Git-Tag: v3.26~573 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/e1552e8f04faf687626d1849f52e67981f1169b7 smpi topo : a topo can be shared between some comm (duplicates), but there was no refcounting. Try to switch to a shared_ptr to avoid segfaults or leaks... --- diff --git a/src/smpi/bindings/smpi_pmpi_topo.cpp b/src/smpi/bindings/smpi_pmpi_topo.cpp index a82845c326..c45f88bd33 100644 --- a/src/smpi/bindings/smpi_pmpi_topo.cpp +++ b/src/smpi/bindings/smpi_pmpi_topo.cpp @@ -29,7 +29,7 @@ int PMPI_Cart_create(MPI_Comm comm_old, int ndims, const int* dims, const int* p if (*comm_cart == MPI_COMM_NULL) { delete topo; } else { - xbt_assert((*comm_cart)->topo() == topo); + xbt_assert((*comm_cart)->topo().get() == topo); } return MPI_SUCCESS; } @@ -38,7 +38,7 @@ int PMPI_Cart_rank(MPI_Comm comm, const int* coords, int* rank) { CHECK_COMM(1) CHECK_NULL(1, MPI_ERR_TOPOLOGY, comm->topo()) CHECK_NULL(2, MPI_SUCCESS, coords) - MPIR_Cart_Topology topo = static_cast(comm->topo()); + MPIR_Cart_Topology topo = static_cast(comm->topo().get()); if (topo==nullptr) { return MPI_ERR_ARG; } @@ -51,7 +51,7 @@ int PMPI_Cart_shift(MPI_Comm comm, int direction, int displ, int* source, int* d CHECK_NEGATIVE(3, MPI_ERR_ARG, direction) CHECK_NULL(4, MPI_ERR_ARG, source) CHECK_NULL(5, MPI_ERR_ARG, dest) - MPIR_Cart_Topology topo = static_cast(comm->topo()); + MPIR_Cart_Topology topo = static_cast(comm->topo().get()); if (topo==nullptr) { return MPI_ERR_ARG; } @@ -66,7 +66,7 @@ int PMPI_Cart_coords(MPI_Comm comm, int rank, int maxdims, int* coords) { if(maxdims==0 || coords == nullptr) { return MPI_SUCCESS; } - MPIR_Cart_Topology topo = static_cast(comm->topo()); + MPIR_Cart_Topology topo = static_cast(comm->topo().get()); if (topo==nullptr) { return MPI_ERR_ARG; } @@ -80,7 +80,7 @@ int PMPI_Cart_get(MPI_Comm comm, int maxdims, int* dims, int* periods, int* coor CHECK_COMM(1) CHECK_NULL(1, MPI_ERR_TOPOLOGY, comm->topo()) CHECK_NEGATIVE(3, MPI_ERR_ARG, maxdims) - MPIR_Cart_Topology topo = static_cast(comm->topo()); + MPIR_Cart_Topology topo = static_cast(comm->topo().get()); if (topo==nullptr) { return MPI_ERR_ARG; } @@ -91,7 +91,7 @@ int PMPI_Cartdim_get(MPI_Comm comm, int* ndims) { CHECK_COMM(1) CHECK_NULL(1, MPI_ERR_TOPOLOGY, comm->topo()) CHECK_NULL(2, MPI_ERR_ARG, ndims) - MPIR_Cart_Topology topo = static_cast(comm->topo()); + MPIR_Cart_Topology topo = static_cast(comm->topo().get()); if (topo==nullptr) { return MPI_ERR_ARG; } @@ -110,7 +110,7 @@ int PMPI_Cart_sub(MPI_Comm comm, const int* remain_dims, MPI_Comm* comm_new) { CHECK_COMM(1) CHECK_NULL(1, MPI_ERR_TOPOLOGY, comm->topo()) CHECK_NULL(3, MPI_ERR_ARG, comm_new) - MPIR_Cart_Topology topo = static_cast(comm->topo()); + MPIR_Cart_Topology topo = static_cast(comm->topo().get()); if (topo==nullptr) { return MPI_ERR_ARG; } diff --git a/src/smpi/include/smpi_comm.hpp b/src/smpi/include/smpi_comm.hpp index a9fa971047..61a494002c 100644 --- a/src/smpi/include/smpi_comm.hpp +++ b/src/smpi/include/smpi_comm.hpp @@ -8,6 +8,7 @@ #include #include +#include #include "smpi_errhandler.hpp" #include "smpi_keyvals.hpp" #include "smpi_group.hpp" @@ -20,7 +21,7 @@ class Comm : public F2C, public Keyval{ friend Topo; MPI_Group group_; SMPI_Topo_type topoType_ = MPI_INVALID_TOPO; - MPI_Topology topo_; // to be replaced by an union + std::shared_ptr topo_; // to be replaced by an union int refcount_ = 1; MPI_Comm leaders_comm_ = MPI_COMM_NULL; // inter-node communicator MPI_Comm intra_comm_ = MPI_COMM_NULL; // intra-node communicator. For MPI_COMM_WORLD this can't be used, as var is @@ -47,6 +48,7 @@ public: int dup_with_info(MPI_Info info, MPI_Comm* newcomm); MPI_Group group(); MPI_Topology topo() { return topo_; } + void set_topo(MPI_Topology topo){topo_=topo;} int size(); int rank(); int id(); diff --git a/src/smpi/include/smpi_topo.hpp b/src/smpi/include/smpi_topo.hpp index 507606fb8c..58c7576d4f 100644 --- a/src/smpi/include/smpi_topo.hpp +++ b/src/smpi/include/smpi_topo.hpp @@ -8,8 +8,9 @@ #include "smpi_comm.hpp" #include "smpi_status.hpp" +#include -typedef SMPI_Topology *MPI_Topology; +typedef std::shared_ptr MPI_Topology; namespace simgrid{ namespace smpi{ diff --git a/src/smpi/mpi/smpi_comm.cpp b/src/smpi/mpi/smpi_comm.cpp index 7562d231e2..64fd5921df 100644 --- a/src/smpi/mpi/smpi_comm.cpp +++ b/src/smpi/mpi/smpi_comm.cpp @@ -297,7 +297,7 @@ MPI_Comm Comm::split(int color, int key) Request::recv(&group_out, 1, MPI_PTR, 0, system_tag, this, MPI_STATUS_IGNORE); } /* otherwise, exit with group_out == nullptr */ } - return group_out!=nullptr ? new Comm(group_out, nullptr) : MPI_COMM_NULL; + return group_out!=nullptr ? new Comm(group_out, topo_) : MPI_COMM_NULL; } void Comm::ref(){ @@ -333,7 +333,6 @@ void Comm::unref(Comm* comm){ simgrid::smpi::Info::unref(comm->info_); if (comm->errhandler_ != MPI_ERRHANDLER_NULL) simgrid::smpi::Errhandler::unref(comm->errhandler_); - delete comm->topo_; // there's no use count on topos delete comm; } } diff --git a/src/smpi/mpi/smpi_topo.cpp b/src/smpi/mpi/smpi_topo.cpp index 1184c6fc4b..592850187e 100644 --- a/src/smpi/mpi/smpi_topo.cpp +++ b/src/smpi/mpi/smpi_topo.cpp @@ -23,8 +23,6 @@ void Topo::setComm(MPI_Comm comm) { xbt_assert(not comm_); comm_ = comm; - if (comm_) - comm_->topo_ = this; } /******************************************************************************* @@ -75,20 +73,21 @@ Topo_Cart::Topo_Cart(MPI_Comm comm_old, int ndims, const int dims[], const int p for (int i = 0 ; i < newSize ; i++) { newGroup->set_mapping(oldGroup->actor(i), i); } - *comm_cart = new Comm(newGroup, this); + *comm_cart = new Comm(newGroup, std::shared_ptr(this)); } } else { if(comm_cart != nullptr){ if (rank == 0) { MPI_Group group = new Group(MPI_COMM_SELF->group()); - *comm_cart = new Comm(group, this); + *comm_cart = new Comm(group, std::shared_ptr(this)); } else { *comm_cart = MPI_COMM_NULL; } } } - if(comm_cart != nullptr) + if(comm_cart != nullptr){ setComm(*comm_cart); + } } Topo_Cart* Topo_Cart::sub(const int remain_dims[], MPI_Comm *newcomm) { @@ -133,7 +132,9 @@ Topo_Cart* Topo_Cart::sub(const int remain_dims[], MPI_Comm *newcomm) { } else { *newcomm = getComm()->split(color, getComm()->rank()); res = new Topo_Cart(getComm(), newNDims, newDims, newPeriodic, 0, nullptr); + std::shared_ptr topo=std::shared_ptr(res); res->setComm(*newcomm); + (*newcomm)->set_topo(topo); } delete[] newDims; delete[] newPeriodic;