From cb6c404ed6a176ee440c09957a8fd9f34918a29b Mon Sep 17 00:00:00 2001 From: Samuel Lepetit Date: Wed, 20 Jun 2012 14:01:52 +0200 Subject: [PATCH] Fix issue when a host failed while executing a task (the glass is only half full however, we should do the process kill in simix) --- src/msg/msg_gos.c | 52 +++++++++++++++++++++++++------------------ src/msg/msg_mailbox.c | 3 --- src/simix/smx_host.c | 8 +------ 3 files changed, 31 insertions(+), 32 deletions(-) diff --git a/src/msg/msg_gos.c b/src/msg/msg_gos.c index 0ce5b3ef84..957fcb8c45 100644 --- a/src/msg/msg_gos.c +++ b/src/msg/msg_gos.c @@ -23,6 +23,7 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(msg_gos, msg, */ MSG_error_t MSG_task_execute(m_task_t task) { + xbt_ex_t e; simdata_task_t simdata = NULL; simdata_process_t p_simdata; e_smx_state_t comp_state; @@ -59,15 +60,16 @@ MSG_error_t MSG_task_execute(m_task_t task) #ifdef HAVE_TRACING simcall_set_category(simdata->compute, task->category); #endif - + p_simdata->waiting_action = simdata->compute; - comp_state = simcall_host_execution_wait(simdata->compute); - p_simdata->waiting_action = NULL; + TRY { + comp_state = simcall_host_execution_wait(simdata->compute); + p_simdata->waiting_action = NULL; - simdata->isused=0; + simdata->isused=0; + + XBT_DEBUG("Execution task '%s' finished in state %d", task->name, (int)comp_state); - XBT_DEBUG("Execution task '%s' finished in state %d", task->name, (int)comp_state); - if (comp_state == SIMIX_DONE) { /* action ended, set comm and compute = NULL, the actions is already destroyed in the main function */ simdata->computation_amount = 0.0; simdata->comm = NULL; @@ -76,22 +78,28 @@ MSG_error_t MSG_task_execute(m_task_t task) TRACE_msg_task_execute_end(task); #endif MSG_RETURN(MSG_OK); - } else if (simcall_host_get_state(SIMIX_host_self()) == 0) { - /* action ended, set comm and compute = NULL, the actions is already destroyed in the main function */ - simdata->comm = NULL; - simdata->compute = NULL; -#ifdef HAVE_TRACING - TRACE_msg_task_execute_end(task); -#endif - MSG_RETURN(MSG_HOST_FAILURE); - } else { - /* action ended, set comm and compute = NULL, the actions is already destroyed in the main function */ - simdata->comm = NULL; - simdata->compute = NULL; -#ifdef HAVE_TRACING - TRACE_msg_task_execute_end(task); -#endif - MSG_RETURN(MSG_TASK_CANCELED); + } + CATCH(e) { + switch (e.category) { + case host_error: + /* action ended, set comm and compute = NULL, the actions is already destroyed in the main function */ + simdata->comm = NULL; + simdata->compute = NULL; + #ifdef HAVE_TRACING + TRACE_msg_task_execute_end(task); + #endif + MSG_RETURN(MSG_HOST_FAILURE); + break; + case cancel_error: + /* action ended, set comm and compute = NULL, the actions is already destroyed in the main function */ + simdata->comm = NULL; + simdata->compute = NULL; + #ifdef HAVE_TRACING + TRACE_msg_task_execute_end(task); + #endif + MSG_RETURN(MSG_TASK_CANCELED); + break; + } } } diff --git a/src/msg/msg_mailbox.c b/src/msg/msg_mailbox.c index 5fd54c3b43..997b138659 100644 --- a/src/msg/msg_mailbox.c +++ b/src/msg/msg_mailbox.c @@ -156,9 +156,6 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task, CATCH(e) { switch (e.category) { - case host_error: - ret = MSG_HOST_FAILURE; - break; case network_error: ret = MSG_TRANSFER_FAILURE; break; diff --git a/src/simix/smx_host.c b/src/simix/smx_host.c index 299864a5f9..815d947a5d 100644 --- a/src/simix/smx_host.c +++ b/src/simix/smx_host.c @@ -348,13 +348,7 @@ void SIMIX_execution_finish(smx_action_t action) case SIMIX_FAILED: XBT_DEBUG("SIMIX_execution_finished: host '%s' failed", simcall->issuer->smx_host->name); - if (simcall->issuer->smx_host == action->execution.host) { - // add a reference to the action that will be destroyed when the killed process is cleaned up, and by the end of the current function - surf_action_ref(action->execution.surf_exec); - SIMIX_process_kill(simcall->issuer); - } else { - SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed"); - } + SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed"); break; case SIMIX_CANCELED: -- 2.20.1