From: Christophe ThiƩry Date: Wed, 9 Nov 2011 10:46:02 +0000 (+0100) Subject: Fix possible crashes and leaks with dsends during processes cleanup X-Git-Tag: exp_20120216~396 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/0398e492623cdd79abef6beb6819fda9d01b39c9 Fix possible crashes and leaks with dsends during processes cleanup --- diff --git a/src/simix/smx_global.c b/src/simix/smx_global.c index 3485c6b1e9..24f9df4311 100644 --- a/src/simix/smx_global.c +++ b/src/simix/smx_global.c @@ -360,8 +360,10 @@ void SIMIX_display_process_status(void) action_description = "I/O"; break; } - XBT_INFO("Process %ld (%s@%s): waiting for %s action %p (%s) to finish", process->pid, process->name, process->smx_host->name, - action_description, process->waiting_action, process->waiting_action->name); + XBT_INFO("Process %ld (%s@%s): waiting for %s action %p (%s) in state %d to finish", + process->pid, process->name, process->smx_host->name, + action_description, process->waiting_action, + process->waiting_action->name, process->waiting_action->state); } else { XBT_INFO("Process %ld (%s@%s)", process->pid, process->name, process->smx_host->name); diff --git a/src/simix/smx_network.c b/src/simix/smx_network.c index d5d4631509..56f4086130 100644 --- a/src/simix/smx_network.c +++ b/src/simix/smx_network.c @@ -18,7 +18,6 @@ unsigned long int smx_total_comms = 0; static void SIMIX_waitany_req_remove_from_actions(smx_req_t req); static void SIMIX_comm_copy_data(smx_action_t comm); static smx_action_t SIMIX_comm_new(e_smx_comm_type_t type); -static void SIMIX_comm_remove_from_processes(smx_action_t action); static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm); static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm); static smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type, @@ -135,7 +134,7 @@ smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type, xbt_fifo_item_t item; void* req_data = NULL; - xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t){ + xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t) { if (action->comm.type == SIMIX_COMM_SEND) { req_data = action->comm.src_data; } else if (action->comm.type == SIMIX_COMM_RECEIVE) { @@ -188,7 +187,7 @@ int SIMIX_comm_has_recv_match(smx_rdv_t rdv, int (*match_fun)(void*, void*), voi smx_action_t action; xbt_fifo_item_t item; - xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t){ + xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t) { if (action->comm.type == SIMIX_COMM_RECEIVE && (!match_fun || match_fun(data, action->comm.dst_data))) { XBT_DEBUG("Found a matching communication action %p", action); @@ -214,6 +213,7 @@ smx_action_t SIMIX_comm_new(e_smx_comm_type_t type) /* alloc structures */ act = xbt_mallocator_get(simix_global->action_mallocator); + act->type = SIMIX_ACTION_COMMUNICATE; act->state = SIMIX_WAITING; @@ -242,11 +242,12 @@ smx_action_t SIMIX_comm_new(e_smx_comm_type_t type) */ void SIMIX_comm_destroy(smx_action_t action) { - XBT_DEBUG("Destroy action %p (refcount:%d)", action, action->comm.refcount); + XBT_DEBUG("Destroy action %p (refcount: %d), state: %d", + action, action->comm.refcount, action->state); - if (action->comm.refcount <= 0) - xbt_die("the refcount of comm %p is already 0 before decreasing it. " - "That's a bug!", action); + xbt_assert(action->comm.refcount > 0, + "The refcount of comm %p is already 0 before decreasing it. " + "That's a bug!", action); action->comm.refcount--; if (action->comm.refcount > 0) @@ -547,13 +548,15 @@ XBT_INLINE void SIMIX_comm_start(smx_action_t action) } } +/** + * \brief Answers the SIMIX requests associated to a communication action. + * \param action a finished communication action + */ void SIMIX_comm_finish(smx_action_t action) { unsigned int destroy_count = 0; smx_req_t req; - SIMIX_comm_remove_from_processes(action); - while ((req = xbt_fifo_shift(action->request_list))) { /* If a waitany request is waiting for this action to finish, then remove @@ -636,8 +639,22 @@ void SIMIX_comm_finish(smx_action_t action) } break; + case SIMIX_CANCELED: + TRY { + if (req->issuer == action->comm.dst_proc) { + THROWF(cancel_error, 0, "Communication canceled by the sender"); + } + else { + THROWF(cancel_error, 0, "Communication canceled by the receiver"); + } + } + CATCH(req->issuer->running_ctx->exception) { + req->issuer->doexception = 1; + } + break; + default: - THROW_IMPOSSIBLE; + xbt_die("Unexpected action state in SIMIX_comm_finish: %d", action->state); } /* if there is an exception during a waitany or a testany, indicate the position of the failed communication */ @@ -659,6 +676,10 @@ void SIMIX_comm_finish(smx_action_t action) SIMIX_comm_destroy(action); } +/** + * \brief This function is called when a Surf communication action is finished. + * \param action the corresponding Simix communication + */ void SIMIX_post_comm(smx_action_t action) { /* Update action state */ @@ -686,28 +707,19 @@ void SIMIX_post_comm(smx_action_t action) /* destroy the surf actions associated with the Simix communication */ SIMIX_comm_destroy_internal_actions(action); - /* if there are requests associated with the action, then answer them */ - if (xbt_fifo_size(action->request_list)) { - SIMIX_comm_finish(action); - } - else { - SIMIX_comm_remove_from_processes(action); - } -} - -/** - * \brief Removes a communication action from the list of pending communications - * of both processes (if they still exist) - * \param action a communication action - */ -static void SIMIX_comm_remove_from_processes(smx_action_t action) { - + /* remove the communication action from the list of pending communications + * of both processes (if they still exist) */ if (action->comm.src_proc) { xbt_fifo_remove(action->comm.src_proc->comms, action); } if (action->comm.dst_proc) { xbt_fifo_remove(action->comm.dst_proc->comms, action); } + + /* if there are requests associated with the action, then answer them */ + if (xbt_fifo_size(action->request_list)) { + SIMIX_comm_finish(action); + } } void SIMIX_comm_cancel(smx_action_t action) @@ -718,10 +730,9 @@ void SIMIX_comm_cancel(smx_action_t action) SIMIX_rdv_remove(action->comm.rdv, action); action->state = SIMIX_CANCELED; } - else if (!MC_IS_ENABLED + else if (!MC_IS_ENABLED /* when running the MC there are no surf actions */ && (action->state == SIMIX_READY || action->state == SIMIX_RUNNING)) { - /* when running the MC there are no surf actions */ surf_workstation_model->action_cancel(action->comm.surf_comm); } } diff --git a/src/simix/smx_process.c b/src/simix/smx_process.c index 0e8d20ad83..f2f550cebf 100644 --- a/src/simix/smx_process.c +++ b/src/simix/smx_process.c @@ -46,28 +46,38 @@ void SIMIX_process_cleanup(smx_process_t process) SIMIX_comm_cancel(action); if (action->comm.src_proc == process) { - XBT_DEBUG("Found an unfinished send comm %p (detached = %d), state %d", - action, action->comm.detached, action->state); + XBT_DEBUG("Found an unfinished send comm %p (detached = %d), state %d, src = %p, dst = %p", + action, action->comm.detached, action->state, action->comm.src_proc, action->comm.dst_proc); action->comm.src_proc = NULL; if (action->comm.detached) { - /* the receiver was supposed to destroy the comm after completion, - * but the comm will actually never finish */ - action->comm.refcount++; + if (action->comm.refcount == 0) { + /* I'm not supposed to destroy a detached comm from the sender side, + * unless there is no receiver matching the rdv */ + action->comm.refcount++; + SIMIX_comm_destroy(action); + } + } + else { + SIMIX_comm_destroy(action); } } else if (action->comm.dst_proc == process){ - XBT_DEBUG("Found an unfinished recv comm %p, state %d", action, action->state); + XBT_DEBUG("Found an unfinished recv comm %p, state %d, src = %p, dst = %p", + action, action->state, action->comm.src_proc, action->comm.dst_proc); action->comm.dst_proc = NULL; + + if (action->comm.detached && action->comm.refcount == 1 + && action->comm.src_proc != NULL) { + /* the comm will be freed right now, remove it from the sender */ + xbt_fifo_remove(action->comm.src_proc->comms, action); + } + SIMIX_comm_destroy(action); } else { - XBT_DEBUG("Strange, I'm not in comm %p, state = %d, src = %p, dst = %p", action, - action->state, action->comm.src_proc, action->comm.dst_proc); - THROW_IMPOSSIBLE; + xbt_die("Communication action %p is in my list but I'm not the sender " + "or the receiver", action); } - - /* FIXME uncommenting this instruction crashes complex simulations - SIMIX_comm_destroy(action); */ } /*xbt_swag_remove(process, simix_global->process_to_run);*/