From a4c8f1898670317d0fa33bf1b1a904ea922b78cc Mon Sep 17 00:00:00 2001 From: Martin Quinson Date: Thu, 1 Aug 2019 22:49:34 +0200 Subject: [PATCH 1/1] Actor: make the refcount observable, and improve debug messages --- include/simgrid/s4u/Actor.hpp | 1 + src/kernel/actor/ActorImpl.hpp | 1 + src/s4u/s4u_Actor.cpp | 4 ++++ src/simix/smx_global.cpp | 2 +- src/smpi/internals/smpi_deployment.cpp | 10 +++++----- src/smpi/internals/smpi_global.cpp | 3 +++ src/smpi/mpi/smpi_request.cpp | 1 + 7 files changed, 16 insertions(+), 6 deletions(-) diff --git a/include/simgrid/s4u/Actor.hpp b/include/simgrid/s4u/Actor.hpp index 3c12abccc6..3b847f249f 100644 --- a/include/simgrid/s4u/Actor.hpp +++ b/include/simgrid/s4u/Actor.hpp @@ -142,6 +142,7 @@ public: // ***** Reference count ***** friend XBT_PUBLIC void intrusive_ptr_add_ref(Actor * actor); friend XBT_PUBLIC void intrusive_ptr_release(Actor * actor); + int get_refcount(); // ***** Actor creation ***** /** Retrieve a reference to myself */ diff --git a/src/kernel/actor/ActorImpl.hpp b/src/kernel/actor/ActorImpl.hpp index 6e4dd5a80c..cc97cb3184 100644 --- a/src/kernel/actor/ActorImpl.hpp +++ b/src/kernel/actor/ActorImpl.hpp @@ -76,6 +76,7 @@ private: std::atomic_int_fast32_t refcount_{0}; public: + int get_refcount() { return refcount_; } friend void intrusive_ptr_add_ref(ActorImpl* actor) { // std::memory_order_relaxed ought to be enough here instead of std::memory_order_seq_cst diff --git a/src/s4u/s4u_Actor.cpp b/src/s4u/s4u_Actor.cpp index 438c476fe4..4a4589098e 100644 --- a/src/s4u/s4u_Actor.cpp +++ b/src/s4u/s4u_Actor.cpp @@ -76,6 +76,10 @@ void intrusive_ptr_release(Actor* actor) { intrusive_ptr_release(actor->pimpl_); } +int Actor::get_refcount() +{ + return pimpl_->get_refcount(); +} // ***** Actor methods ***** diff --git a/src/simix/smx_global.cpp b/src/simix/smx_global.cpp index 9ad348ed7c..ca3e4b9edb 100644 --- a/src/simix/smx_global.cpp +++ b/src/simix/smx_global.cpp @@ -171,7 +171,7 @@ void Global::empty_trash() while (not actors_to_destroy.empty()) { smx_actor_t actor = &actors_to_destroy.front(); actors_to_destroy.pop_front(); - XBT_DEBUG("Getting rid of %p", actor); + XBT_DEBUG("Getting rid of %s (refcount: %d)", actor->get_cname(), actor->get_refcount()); intrusive_ptr_release(actor); } #if SIMGRID_HAVE_MC diff --git a/src/smpi/internals/smpi_deployment.cpp b/src/smpi/internals/smpi_deployment.cpp index d2dd6b4917..54f42e1fc0 100644 --- a/src/smpi/internals/smpi_deployment.cpp +++ b/src/smpi/internals/smpi_deployment.cpp @@ -33,7 +33,7 @@ public: } const std::string name_; - int size_; + unsigned int size_; std::vector present_processes_; unsigned int finalized_ranks_ = 0; MPI_Comm comm_world_; @@ -76,7 +76,6 @@ void SMPI_app_instance_register(const char *name, xbt_main_func_t code, int num_ void smpi_deployment_register_process(const std::string& instance_id, int rank, simgrid::s4u::ActorPtr actor) { Instance& instance = smpi_instances.at(instance_id); - instance.present_processes_.push_back(actor); instance.comm_world_->group()->set_mapping(actor, rank); } @@ -84,9 +83,9 @@ void smpi_deployment_register_process(const std::string& instance_id, int rank, void smpi_deployment_unregister_process(const std::string& instance_id) { Instance& instance = smpi_instances.at(instance_id); - instance.finalized_ranks_++; - if (instance.finalized_ranks_ == instance.present_processes_.size()) { + + if (instance.finalized_ranks_ == instance.size_) { instance.present_processes_.clear(); simgrid::smpi::Comm::destroy(instance.comm_world_); smpi_instances.erase(instance_id); @@ -95,7 +94,8 @@ void smpi_deployment_unregister_process(const std::string& instance_id) MPI_Comm* smpi_deployment_comm_world(const std::string& instance_id) { - if (smpi_instances.empty()) { // no instance registered, we probably used smpirun. + if (smpi_instances + .empty()) { // no instance registered, we probably used smpirun. (FIXME: I guess this never happens for real) return nullptr; } Instance& instance = smpi_instances.at(instance_id); diff --git a/src/smpi/internals/smpi_global.cpp b/src/smpi/internals/smpi_global.cpp index 522106218e..739ed60c53 100644 --- a/src/smpi/internals/smpi_global.cpp +++ b/src/smpi/internals/smpi_global.cpp @@ -118,6 +118,8 @@ simgrid::smpi::ActorExt* smpi_process() simgrid::smpi::ActorExt* smpi_process_remote(simgrid::s4u::ActorPtr actor) { + if (actor.get() == nullptr) + return nullptr; return process_data.at(actor.get()); } @@ -659,6 +661,7 @@ void SMPI_init(){ } }); simgrid::s4u::Actor::on_destruction.connect([](simgrid::s4u::Actor const& actor) { + XBT_DEBUG("Delete the extension of actor %s", actor.get_cname()); auto it = process_data.find(&actor); if (it != process_data.end()) { delete it->second; diff --git a/src/smpi/mpi/smpi_request.cpp b/src/smpi/mpi/smpi_request.cpp index e9a2a49b49..e0ae66c0cb 100644 --- a/src/smpi/mpi/smpi_request.cpp +++ b/src/smpi/mpi/smpi_request.cpp @@ -398,6 +398,7 @@ void Request::start() mut->unlock(); } else { /* the RECV flag was not set, so this is a send */ simgrid::smpi::ActorExt* process = smpi_process_remote(simgrid::s4u::Actor::by_pid(dst_)); + xbt_assert(process, "Actor pid=%d is gone??", dst_); int rank = src_; if (TRACE_smpi_view_internals()) { TRACE_smpi_send(rank, rank, dst_, tag_, size_); -- 2.20.1