- #314: SMPI args internal cleanup
- #316: Fix a bug related to the CPU utilization of multi-core VM
- #318: Invalid trace file when using option --cfg=tracing/smpi/display-sizes:yes
+ - #325: Turning off a host has different behavior on sleeping actors and computing actors
----------------------------------------------------------------------------
> [ 40.278351] (1:master@Tremblay) Send to worker-3 completed
> [ 40.278351] (1:master@Tremblay) Send a message to worker-4
> [ 40.278351] (4:worker@Ginette) Start execution...
-> [ 41.000000] (4:worker@Ginette) Gloups. The cpu on which I'm running just turned off!. See you!
> [ 41.309278] (1:master@Tremblay) Send to worker-4 completed
> [ 41.309278] (1:master@Tremblay) All tasks have been dispatched. Let's tell everybody the computation is over.
> [ 41.309278] (2:worker@Tremblay) I'm done. See you!
> [ 40.692268] (4:worker@Ginette) Start execution...
> [ 40.692268] (1:master@Tremblay) Send to worker-3 completed
> [ 40.692268] (1:master@Tremblay) Send a message to worker-4
-> [ 41.000000] (4:worker@Ginette) Gloups. The cpu on which I'm running just turned off!. See you!
> [ 41.774742] (5:worker@Bourassa) Start execution...
> [ 41.774742] (1:master@Tremblay) Send to worker-4 completed
> [ 41.774742] (1:master@Tremblay) All tasks have been dispatched. Let's tell everybody the computation is over.
payload = static_cast<double*>(mailbox->get());
xbt_assert(payload != nullptr, "mailbox->get() failed");
comp_size = *payload;
+ delete payload;
if (comp_size < 0) { /* - Exit when -1.0 is received */
XBT_INFO("I'm done. See you!");
- delete payload;
break;
}
/* - Otherwise, process the task */
XBT_INFO("Start execution...");
simgrid::s4u::this_actor::execute(comp_size);
XBT_INFO("Execution complete.");
- delete payload;
} catch (simgrid::HostFailureException& e) {
XBT_INFO("Gloups. The cpu on which I'm running just turned off!. See you!");
delete payload;
> [ 40.278351] (1:master@Tremblay) Send to worker-3 completed
> [ 40.278351] (1:master@Tremblay) Send a message to worker-4
> [ 40.278351] (4:worker@Ginette) Start execution...
-> [ 41.000000] (4:worker@Ginette) Gloups. The cpu on which I'm running just turned off!. See you!
> [ 41.309278] (1:master@Tremblay) Send to worker-4 completed
> [ 41.309278] (1:master@Tremblay) All tasks have been dispatched. Let's tell everybody the computation is over.
> [ 41.309278] (2:worker@Tremblay) I'm done. See you!
> [ 40.692268] (4:worker@Ginette) Start execution...
> [ 40.692268] (1:master@Tremblay) Send to worker-3 completed
> [ 40.692268] (1:master@Tremblay) Send a message to worker-4
-> [ 41.000000] (4:worker@Ginette) Gloups. The cpu on which I'm running just turned off!. See you!
> [ 41.774742] (5:worker@Bourassa) Start execution...
> [ 41.774742] (1:master@Tremblay) Send to worker-4 completed
> [ 41.774742] (1:master@Tremblay) All tasks have been dispatched. Let's tell everybody the computation is over.
case SIMIX_FAILED:
XBT_DEBUG("ExecImpl::finish(): host '%s' failed", simcall->issuer->get_host()->get_cname());
simcall->issuer->context_->iwannadie = true;
- simcall->issuer->exception_ =
- std::make_exception_ptr(simgrid::HostFailureException(XBT_THROW_POINT, "Host failed"));
+ if (simcall->issuer->get_host()->is_on())
+ simcall->issuer->exception_ =
+ std::make_exception_ptr(simgrid::HostFailureException(XBT_THROW_POINT, "Host failed"));
+ /* else, the actor will be killed with no possibility to survive */
break;
case SIMIX_CANCELED:
msg_task_t task = MSG_task_create("daemon", MSG_host_get_speed(MSG_host_self()), 0, NULL);
MSG_process_set_data(self, task);
XBT_INFO(" Execute daemon");
- msg_error_t res = MSG_task_execute(task);
+ MSG_task_execute(task);
MSG_task_destroy(task);
tasks_done++;
- if (res == MSG_HOST_FAILURE) {
- XBT_INFO("Host has died as expected, do nothing else");
- return 0;
- }
}
XBT_INFO(" daemon done. See you!");
return 0;
> [Jupiter:process_daemonJUPI:(3) 9.000011] [msg_test/INFO] Execute daemon
> [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Turn Jupiter off
> [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Shutdown vm0
-> [Jupiter:process_daemonJUPI:(3) 10.000000] [msg_test/INFO] Host has died as expected, do nothing else
> [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Destroy vm0
> [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Test 6 is also weird: when the node Jupiter is turned off once again, the VM and its daemon are not killed. However, the issue regarding the shutdown of hosted VMs can be seen a feature not a bug ;)
> [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Test done. See you!