From 59378580f8bb437fd2a9ed435f7cad27e76c338f Mon Sep 17 00:00:00 2001 From: Arnaud Giersch Date: Wed, 6 Dec 2017 14:53:46 +0100 Subject: [PATCH] Attempt to fix leaks in msg-host_on_off_processes. --- src/msg/msg_task.cpp | 12 ++++++------ src/simix/ActorImpl.cpp | 6 +++++- src/simix/libsmx.cpp | 11 ++++++----- .../host_on_off_processes/host_on_off_processes.c | 1 - .../host_on_off_processes/host_on_off_processes.tesh | 9 --------- 5 files changed, 17 insertions(+), 22 deletions(-) diff --git a/src/msg/msg_task.cpp b/src/msg/msg_task.cpp index 86ece99590..c971394137 100644 --- a/src/msg/msg_task.cpp +++ b/src/msg/msg_task.cpp @@ -216,14 +216,14 @@ msg_error_t MSG_task_cancel(msg_task_t task) { xbt_assert((task != nullptr), "Cannot cancel a nullptr task"); - if (task->simdata->compute) { - simcall_execution_cancel(task->simdata->compute); - } - else if (task->simdata->comm) { - simdata_task_t simdata = task->simdata; + simdata_task_t simdata = task->simdata; + if (simdata->compute) { + simcall_execution_cancel(simdata->compute); + MSG_host_del_task(MSG_process_get_host(MSG_process_self()), task); + } else if (simdata->comm) { simcall_comm_cancel(simdata->comm); - simdata->setNotUsed(); } + simdata->setNotUsed(); return MSG_OK; } diff --git a/src/simix/ActorImpl.cpp b/src/simix/ActorImpl.cpp index f727f484aa..4e82f6b570 100644 --- a/src/simix/ActorImpl.cpp +++ b/src/simix/ActorImpl.cpp @@ -500,7 +500,11 @@ void SIMIX_process_kill(smx_actor_t process, smx_actor_t issuer) { boost::dynamic_pointer_cast(process->waiting_synchro); if (exec != nullptr) { - /* Nothing to do */ + if (exec->surf_exec) { + exec->surf_exec->cancel(); + exec->surf_exec->unref(); + exec->surf_exec = nullptr; + } } else if (comm != nullptr) { process->comms.remove(process->waiting_synchro); comm->cancel(); diff --git a/src/simix/libsmx.cpp b/src/simix/libsmx.cpp index 6af48805e9..d774d3cf45 100644 --- a/src/simix/libsmx.cpp +++ b/src/simix/libsmx.cpp @@ -110,11 +110,12 @@ smx_activity_t simcall_execution_parallel_start(const char* name, int host_nb, s */ void simcall_execution_cancel(smx_activity_t execution) { - simgrid::simix::kernelImmediate([execution] { - XBT_DEBUG("Cancel synchro %p", execution.get()); - simgrid::kernel::activity::ExecImplPtr exec = - boost::static_pointer_cast(execution); - + simgrid::kernel::activity::ExecImplPtr exec = + boost::static_pointer_cast(execution); + if (not exec->surf_exec) + return; + simgrid::simix::kernelImmediate([exec] { + XBT_DEBUG("Cancel synchro %p", exec.get()); if (exec->surf_exec) exec->surf_exec->cancel(); }); diff --git a/teshsuite/msg/host_on_off_processes/host_on_off_processes.c b/teshsuite/msg/host_on_off_processes/host_on_off_processes.c index 680b014b1b..cc9d85e000 100644 --- a/teshsuite/msg/host_on_off_processes/host_on_off_processes.c +++ b/teshsuite/msg/host_on_off_processes/host_on_off_processes.c @@ -177,7 +177,6 @@ static int test_launcher(int argc, char *argv[]) test =6; if (xbt_dynar_search_or_negative(tests, &test)!=-1){ - MSG_process_set_data_cleanup(NULL); /* FIXME: we are leaking here, but removing this line changes the test output */ XBT_INFO("Test 6: Turn on Jupiter, assign a VM on Jupiter, launch a process inside the VM, and turn off the node"); // Create VM0 diff --git a/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh b/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh index e3e9bc5d0c..3f98361023 100644 --- a/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh +++ b/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh @@ -84,15 +84,6 @@ $ ${bindir}/host_on_off_processes ${platfdir}/small_platform.xml 6 --log=no_loc > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Turn Jupiter off > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Shutdown vm0 > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Destroy vm0 -> [10.000000] [surf_vm/WARNING] Dirty page tracking: 1 pending task(s) on a destroyed VM (first one is (name hidden)). -> If you don't understand why your task was not properly removed, please report that bug. -> This is a known bug if you turned the host off during the VM execution. -> Please remind us of that problem at some point: our code base is not ready to fix this harmless issue in 2016, sorry. > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Test 6 is also weird: when the node Jupiter is turned off once again, the VM and its daemon are not killed. However, the issue regarding the shutdown of hosted VMs can be seen a feature not a bug ;) > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Test done. See you! > [10.000000] [msg_test/INFO] Simulation time 10 -> [10.000000] [surf_maxmin/WARNING] Probable bug: a simgrid::surf::CpuCas01Action variable (#13) not removed before the LMM system destruction. -> [10.000000] [surf_maxmin/WARNING] Probable bug: a simgrid::surf::CpuCas01Action variable (#2) not removed before the LMM system destruction. - -p The previous test suffers of bug https://github.com/simgrid/simgrid/issues/120 -p but the code is still not clean enough to really solve it. -- 2.20.1