From: Martin Quinson Date: Tue, 5 Jun 2012 09:47:45 +0000 (+0200) Subject: Bug fixes around the resource failures in Simix X-Git-Tag: v3_8~646^2~21 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/99542e8a8213501e7639ec432a47222f0672f9c2 Bug fixes around the resource failures in Simix don't let the processes survive the host they are running onto when they were running something. The same kind of fix is probably due when they are communicating or sleeping. --- diff --git a/ChangeLog b/ChangeLog index b40ea021fc..3099b55a19 100644 --- a/ChangeLog +++ b/ChangeLog @@ -11,6 +11,10 @@ SimGrid (3.8) NOT RELEASED; urgency=low process groups with very few intrinsic semantic, but they should allow you to build the semantic you want easily. + Simix: + * Bug fixes around the resource failures: don't let the processes + survive the host they are running onto + SimDag: * New type of typed tasks SD_TASK_COMP_PAR_AMDAHL that represents a parallel task whose initial work is distributed among host according diff --git a/src/simix/smx_host.c b/src/simix/smx_host.c index 3fdd5fd68f..9fea4909dd 100644 --- a/src/simix/smx_host.c +++ b/src/simix/smx_host.c @@ -345,7 +345,11 @@ void SIMIX_execution_finish(smx_action_t action) case SIMIX_FAILED: XBT_DEBUG("SIMIX_execution_finished: host '%s' failed", simcall->issuer->smx_host->name); - SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed"); + if (simcall->issuer->smx_host == action->execution.host) + SIMIX_process_kill(simcall->issuer); +// simcall->issuer->context->iwannadie = 1; // Bye bye, little process. + else + SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed"); break; case SIMIX_CANCELED: @@ -368,14 +372,15 @@ void SIMIX_execution_finish(smx_action_t action) void SIMIX_post_host_execute(smx_action_t action) { - /* FIXME: check if the host running the action failed or not*/ - /*if(surf_workstation_model->extension.workstation.get_state(action->host->host))*/ - - /* If the host running the action didn't fail, then the action was canceled */ - if (surf_workstation_model->action_state_get(action->execution.surf_exec) == SURF_ACTION_FAILED) + if (surf_workstation_model->extension.workstation.get_state(action->execution.host->host)==SURF_RESOURCE_OFF) { + /* if the host running the action failed, notice it so that the asking process can be killed if it runs on that host itself */ + action->state = SIMIX_FAILED; + } else if (surf_workstation_model->action_state_get(action->execution.surf_exec) == SURF_ACTION_FAILED) { + /* If the host running the action didn't fail, then the action was canceled */ action->state = SIMIX_CANCELED; - else + } else { action->state = SIMIX_DONE; + } if (action->execution.surf_exec) { surf_workstation_model->action_unref(action->execution.surf_exec);