Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Actor: make the refcount observable, and improve debug messages
authorMartin Quinson <martin.quinson@ens-rennes.fr>
Thu, 1 Aug 2019 20:49:34 +0000 (22:49 +0200)
committerMartin Quinson <martin.quinson@ens-rennes.fr>
Thu, 1 Aug 2019 20:49:34 +0000 (22:49 +0200)
include/simgrid/s4u/Actor.hpp
src/kernel/actor/ActorImpl.hpp
src/s4u/s4u_Actor.cpp
src/simix/smx_global.cpp
src/smpi/internals/smpi_deployment.cpp
src/smpi/internals/smpi_global.cpp
src/smpi/mpi/smpi_request.cpp

index 3c12abc..3b847f2 100644 (file)
@@ -142,6 +142,7 @@ public:
   // ***** Reference count *****
   friend XBT_PUBLIC void intrusive_ptr_add_ref(Actor * actor);
   friend XBT_PUBLIC void intrusive_ptr_release(Actor * actor);
+  int get_refcount();
 
   // ***** Actor creation *****
   /** Retrieve a reference to myself */
index 6e4dd5a..cc97cb3 100644 (file)
@@ -76,6 +76,7 @@ private:
   std::atomic_int_fast32_t refcount_{0};
 
 public:
+  int get_refcount() { return refcount_; }
   friend void intrusive_ptr_add_ref(ActorImpl* actor)
   {
     // std::memory_order_relaxed ought to be enough here instead of std::memory_order_seq_cst
index 438c476..4a45890 100644 (file)
@@ -76,6 +76,10 @@ void intrusive_ptr_release(Actor* actor)
 {
   intrusive_ptr_release(actor->pimpl_);
 }
+int Actor::get_refcount()
+{
+  return pimpl_->get_refcount();
+}
 
 // ***** Actor methods *****
 
index 9ad348e..ca3e4b9 100644 (file)
@@ -171,7 +171,7 @@ void Global::empty_trash()
   while (not actors_to_destroy.empty()) {
     smx_actor_t actor = &actors_to_destroy.front();
     actors_to_destroy.pop_front();
-    XBT_DEBUG("Getting rid of %p", actor);
+    XBT_DEBUG("Getting rid of %s (refcount: %d)", actor->get_cname(), actor->get_refcount());
     intrusive_ptr_release(actor);
   }
 #if SIMGRID_HAVE_MC
index d2dd6b4..54f42e1 100644 (file)
@@ -33,7 +33,7 @@ public:
   }
 
   const std::string name_;
-  int size_;
+  unsigned int size_;
   std::vector<simgrid::s4u::ActorPtr> present_processes_;
   unsigned int finalized_ranks_ = 0;
   MPI_Comm comm_world_;
@@ -76,7 +76,6 @@ void SMPI_app_instance_register(const char *name, xbt_main_func_t code, int num_
 void smpi_deployment_register_process(const std::string& instance_id, int rank, simgrid::s4u::ActorPtr actor)
 {
   Instance& instance = smpi_instances.at(instance_id);
-
   instance.present_processes_.push_back(actor);
   instance.comm_world_->group()->set_mapping(actor, rank);
 }
@@ -84,9 +83,9 @@ void smpi_deployment_register_process(const std::string& instance_id, int rank,
 void smpi_deployment_unregister_process(const std::string& instance_id)
 {
   Instance& instance = smpi_instances.at(instance_id);
-
   instance.finalized_ranks_++;
-  if (instance.finalized_ranks_ == instance.present_processes_.size()) {
+
+  if (instance.finalized_ranks_ == instance.size_) {
     instance.present_processes_.clear();
     simgrid::smpi::Comm::destroy(instance.comm_world_);
     smpi_instances.erase(instance_id);
@@ -95,7 +94,8 @@ void smpi_deployment_unregister_process(const std::string& instance_id)
 
 MPI_Comm* smpi_deployment_comm_world(const std::string& instance_id)
 {
-  if (smpi_instances.empty()) { // no instance registered, we probably used smpirun.
+  if (smpi_instances
+          .empty()) { // no instance registered, we probably used smpirun. (FIXME: I guess this never happens for real)
     return nullptr;
   }
   Instance& instance = smpi_instances.at(instance_id);
index 5221062..739ed60 100644 (file)
@@ -118,6 +118,8 @@ simgrid::smpi::ActorExt* smpi_process()
 
 simgrid::smpi::ActorExt* smpi_process_remote(simgrid::s4u::ActorPtr actor)
 {
+  if (actor.get() == nullptr)
+    return nullptr;
   return process_data.at(actor.get());
 }
 
@@ -659,6 +661,7 @@ void SMPI_init(){
     }
   });
   simgrid::s4u::Actor::on_destruction.connect([](simgrid::s4u::Actor const& actor) {
+    XBT_DEBUG("Delete the extension of actor %s", actor.get_cname());
     auto it = process_data.find(&actor);
     if (it != process_data.end()) {
       delete it->second;
index e9a2a49..e0ae66c 100644 (file)
@@ -398,6 +398,7 @@ void Request::start()
       mut->unlock();
   } else { /* the RECV flag was not set, so this is a send */
     simgrid::smpi::ActorExt* process = smpi_process_remote(simgrid::s4u::Actor::by_pid(dst_));
+    xbt_assert(process, "Actor pid=%d is gone??", dst_);
     int rank = src_;
     if (TRACE_smpi_view_internals()) {
       TRACE_smpi_send(rank, rank, dst_, tag_, size_);