From c5a8ab0fb544a7cc1dbdac35185ae1f336513980 Mon Sep 17 00:00:00 2001 From: alegrand Date: Tue, 2 Feb 2010 23:18:00 +0000 Subject: [PATCH] Handle correctly (hopefully) timeouts in communications. git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/simgrid/simgrid/trunk@7050 48e7efb5-ca39-0410-a469-dd3cf9ba447f --- src/simix/private.h | 2 ++ src/simix/smx_network.c | 68 +++++++++++++++++++++++++++-------------- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/src/simix/private.h b/src/simix/private.h index 08a7fe0d50..ca905b9a3e 100644 --- a/src/simix/private.h +++ b/src/simix/private.h @@ -148,6 +148,8 @@ typedef struct s_smx_comm { /* Surf action data */ smx_process_t src_proc; smx_process_t dst_proc; + smx_action_t src_timeout; + smx_action_t dst_timeout; smx_action_t act; double rate; double task_size; diff --git a/src/simix/smx_network.c b/src/simix/smx_network.c index 2aa4773448..12f2be8a82 100644 --- a/src/simix/smx_network.c +++ b/src/simix/smx_network.c @@ -171,7 +171,17 @@ void SIMIX_communication_destroy(smx_comm_t comm) SIMIX_action_destroy(comm->act); comm->act = NULL; } - + + if(comm->src_timeout){ + SIMIX_action_destroy(comm->src_timeout); + comm->src_timeout = NULL; + } + + if(comm->dst_timeout){ + SIMIX_action_destroy(comm->dst_timeout); + comm->dst_timeout = NULL; + } + xbt_free(comm); } @@ -230,35 +240,47 @@ static inline void SIMIX_communication_start(smx_comm_t comm) */ static inline void SIMIX_communication_wait_for_completion(smx_comm_t comm, double timeout) { - xbt_ex_t e; + smx_action_t act_sleep = NULL; + int src_timeout = 0; + int dst_timeout = 0; DEBUG1("Waiting for the completion of communication %p", comm); - if(timeout > 0){ - TRY{ - SIMIX_sem_acquire_timeout(comm->sem, timeout); - } - CATCH(e){ - /* If there is a timeout then cancel the communication if it is running or - remove it from the rendez-vous otherwise. Then signal the other peer, - destroy the communication and retrow the exception. */ - if(e.category == timeout_error){ - DEBUG1("Communication timeout! %p", comm); - if(comm->act && SIMIX_action_get_state(comm->act) == SURF_ACTION_RUNNING) - SIMIX_communication_cancel(comm); - else if (comm->rdv) - SIMIX_rdv_remove(comm->rdv, comm); - - /* Make sure that everyone sleeping on that semaphore is awake, and that nobody will ever block on it */ - SIMIX_sem_release_forever(comm->sem); - SIMIX_communication_destroy(comm); - } - RETHROW; - } + if (timeout >= 0) { + act_sleep = SIMIX_action_sleep(SIMIX_host_self(), timeout); + if(SIMIX_process_self()==comm->src_proc) + comm->src_timeout = act_sleep; + else + comm->dst_timeout = act_sleep; + SIMIX_action_set_name(act_sleep,bprintf("Timeout for comm %p and wait on semaphore %p (max_duration:%f)", comm, comm->sem,timeout)); + SIMIX_register_action_to_semaphore(act_sleep, comm->sem); + SIMIX_process_self()->waiting_action = act_sleep; + SIMIX_sem_block_onto(comm->sem); + SIMIX_process_self()->waiting_action = NULL; + SIMIX_unregister_action_to_semaphore(act_sleep, comm->sem); } else { SIMIX_sem_acquire(comm->sem); } + /* Check for timeouts */ + if ((src_timeout = ((comm->src_timeout) && (SIMIX_action_get_state(comm->src_timeout) == SURF_ACTION_DONE))) || + (dst_timeout = ((comm->dst_timeout) && (SIMIX_action_get_state(comm->dst_timeout) == SURF_ACTION_DONE))) ) { + /* Somebody did a timeout! */ + if (src_timeout) DEBUG1("Communication timeout from the src! %p", comm); + if (dst_timeout) DEBUG1("Communication timeout from the dst! %p", comm); + + if(comm->act && SIMIX_action_get_state(comm->act) == SURF_ACTION_RUNNING) + SIMIX_communication_cancel(comm); + else if (comm->rdv) + SIMIX_rdv_remove(comm->rdv, comm); + + /* Make sure that everyone sleeping on that semaphore is awake, and that nobody will ever block on it */ + SIMIX_sem_release_forever(comm->sem); + SIMIX_communication_destroy(comm); + + THROW1(timeout_error, 0, "Communication timeouted because of %s",src_timeout?"the source":"the destination"); + } + DEBUG1("Communication %p complete! Let's check for errors", comm); /* Make sure that everyone sleeping on that semaphore is awake, and that nobody will ever block on it */ -- 2.20.1