X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/7a25b8e6a7aae6d40849189e58495aee0d8f726f..28fd49f9900e7a8424f38cafd21682f8c732ed42:/src/simix/smx_network.c diff --git a/src/simix/smx_network.c b/src/simix/smx_network.c index 41192b6e23..74cefadfa5 100644 --- a/src/simix/smx_network.c +++ b/src/simix/smx_network.c @@ -4,7 +4,7 @@ /* This program is free software; you can redistribute it and/or modify it * under the terms of the license (GNU LGPL) which comes with this package. */ -#include "private.h" +#include "smx_private.h" #include "xbt/log.h" #include "mc/mc.h" #include "xbt/dict.h" @@ -13,23 +13,19 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_network, simix, "Logging specific to SIMIX (network)"); static xbt_dict_t rdv_points = NULL; +unsigned long int smx_total_comms = 0; -static XBT_INLINE void SIMIX_comm_start(smx_action_t action); -static void SIMIX_comm_finish(smx_action_t action); -static void SIMIX_waitany_req_remove_from_actions(smx_req_t req); +static void SIMIX_waitany_remove_simcall_from_actions(smx_simcall_t simcall); static void SIMIX_comm_copy_data(smx_action_t comm); static smx_action_t SIMIX_comm_new(e_smx_comm_type_t type); -static XBT_INLINE void SIMIX_comm_wait_for_completion(smx_action_t comm, - double timeout); static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm); -static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm); -static smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type, - int (*match_fun)(void *, void *), void *); +static smx_action_t SIMIX_rdv_get_comm(smx_rdv_t rdv, e_smx_comm_type_t type, + int (*match_fun)(void *, void *), void *); static void SIMIX_rdv_free(void *data); void SIMIX_network_init(void) { - rdv_points = xbt_dict_new(); + rdv_points = xbt_dict_new_homogeneous(SIMIX_rdv_free); } void SIMIX_network_exit(void) @@ -43,7 +39,7 @@ void SIMIX_network_exit(void) smx_rdv_t SIMIX_rdv_create(const char *name) { - /* two processes may have pushed the same rdv_create request at the same time */ + /* two processes may have pushed the same rdv_create simcall at the same time */ smx_rdv_t rdv = name ? xbt_dict_get_or_null(rdv_points, name) : NULL; if (!rdv) { @@ -51,8 +47,8 @@ smx_rdv_t SIMIX_rdv_create(const char *name) rdv->name = name ? xbt_strdup(name) : NULL; rdv->comm_fifo = xbt_fifo_new(); - if (name) - xbt_dict_set(rdv_points, name, rdv, SIMIX_rdv_free); + if (rdv->name) + xbt_dict_set(rdv_points, rdv->name, rdv, NULL); } return rdv; } @@ -60,18 +56,22 @@ smx_rdv_t SIMIX_rdv_create(const char *name) void SIMIX_rdv_destroy(smx_rdv_t rdv) { if (rdv->name) - xbt_dict_remove(rdv_points, rdv->name); + xbt_dict_remove(rdv_points, rdv->name); } void SIMIX_rdv_free(void *data) { smx_rdv_t rdv = (smx_rdv_t) data; - if (rdv->name) - xbt_free(rdv->name); + xbt_free(rdv->name); xbt_fifo_free(rdv->comm_fifo); xbt_free(rdv); } +xbt_dict_t SIMIX_get_rdv_points() +{ + return rdv_points; +} + smx_rdv_t SIMIX_rdv_get_by_name(const char *name) { return xbt_dict_get_or_null(rdv_points, name); @@ -97,9 +97,9 @@ smx_action_t SIMIX_rdv_get_head(smx_rdv_t rdv) } /** - * \brief Push a communication request into a rendez-vous point + * \brief Pushes a communication action into a rendez-vous point * \param rdv The rendez-vous point - * \param comm The communication request + * \param comm The communication action */ static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm) { @@ -108,57 +108,107 @@ static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm) } /** - * \brief Remove a communication request from a rendez-vous point + * \brief Removes a communication action from a rendez-vous point * \param rdv The rendez-vous point - * \param comm The communication request + * \param comm The communication action */ -static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm) +XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm) { xbt_fifo_remove(rdv->comm_fifo, comm); comm->comm.rdv = NULL; } +/** + * \brief Wrapper to SIMIX_rdv_get_comm + */ +smx_action_t SIMIX_comm_get_send_match(smx_rdv_t rdv, int (*match_fun)(void*, void*), void* data) { + return SIMIX_rdv_get_comm(rdv, SIMIX_COMM_SEND, match_fun, data); +} + /** * \brief Checks if there is a communication action queued in a rendez-vous matching our needs * \param type The type of communication we are looking for (comm_send, comm_recv) * \return The communication action if found, NULL otherwise */ -smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type, +smx_action_t SIMIX_rdv_get_comm(smx_rdv_t rdv, e_smx_comm_type_t type, int (*match_fun)(void *, void *), void *data) { + // FIXME rewrite this function by using SIMIX_rdv_has_send/recv_match smx_action_t action; xbt_fifo_item_t item; - void* req_data = NULL; + void* comm_data = NULL; - xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t){ + xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t) { if (action->comm.type == SIMIX_COMM_SEND) { - req_data = action->comm.src_data; + comm_data = action->comm.src_data; } else if (action->comm.type == SIMIX_COMM_RECEIVE) { - req_data = action->comm.dst_data; + comm_data = action->comm.dst_data; } - if (action->comm.type == type && (!match_fun || match_fun(data, req_data))) { - DEBUG1("Found a matching communication action %p", action); + if (action->comm.type == type && (!match_fun || match_fun(data, comm_data))) { + XBT_DEBUG("Found a matching communication action %p", action); xbt_fifo_remove_item(rdv->comm_fifo, item); xbt_fifo_free_item(item); action->comm.refcount++; action->comm.rdv = NULL; return action; } - DEBUG3("Sorry, communication action %p does not match our needs:" + XBT_DEBUG("Sorry, communication action %p does not match our needs:" " its type is %d but we are looking for a comm of type %d", action, action->comm.type, type); } - DEBUG0("No matching communication action found"); + XBT_DEBUG("No matching communication action found"); return NULL; } +/** + * \brief Checks if there is a send communication action + * queued in a rendez-vous matching our needs. + * \return 1 if found, 0 otherwise + */ +int SIMIX_comm_has_send_match(smx_rdv_t rdv, int (*match_fun)(void*, void*), void* data) { + + smx_action_t action; + xbt_fifo_item_t item; + + xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t){ + if (action->comm.type == SIMIX_COMM_SEND + && (!match_fun || match_fun(data, action->comm.src_data))) { + XBT_DEBUG("Found a matching communication action %p", action); + return 1; + } + } + XBT_DEBUG("No matching communication action found"); + return 0; +} + +/** + * \brief Checks if there is a recv communication action + * queued in a rendez-vous matching our needs. + * \return 1 if found, 0 otherwise + */ +int SIMIX_comm_has_recv_match(smx_rdv_t rdv, int (*match_fun)(void*, void*), void* data) { + + smx_action_t action; + xbt_fifo_item_t item; + + xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t) { + if (action->comm.type == SIMIX_COMM_RECEIVE + && (!match_fun || match_fun(data, action->comm.dst_data))) { + XBT_DEBUG("Found a matching communication action %p", action); + return 1; + } + } + XBT_DEBUG("No matching communication action found"); + return 0; +} + /******************************************************************************/ /* Comunication Actions */ /******************************************************************************/ /** * \brief Creates a new comunicate action - * \param type The type of request (comm_send, comm_recv) + * \param type The direction of communication (comm_send, comm_recv) * \return The new comunicate action */ smx_action_t SIMIX_comm_new(e_smx_comm_type_t type) @@ -166,10 +216,10 @@ smx_action_t SIMIX_comm_new(e_smx_comm_type_t type) smx_action_t act; /* alloc structures */ - act = xbt_new0(s_smx_action_t, 1); + act = xbt_mallocator_get(simix_global->action_mallocator); + act->type = SIMIX_ACTION_COMMUNICATE; act->state = SIMIX_WAITING; - act->request_list = xbt_fifo_new(); /* set communication */ act->comm.type = type; @@ -184,7 +234,8 @@ smx_action_t SIMIX_comm_new(e_smx_comm_type_t type) act->category = NULL; #endif - DEBUG1("Create communicate action %p", act); + XBT_DEBUG("Create communicate action %p", act); + ++smx_total_comms; return act; } @@ -195,33 +246,37 @@ smx_action_t SIMIX_comm_new(e_smx_comm_type_t type) */ void SIMIX_comm_destroy(smx_action_t action) { - DEBUG2("Destroy action %p (refcount:%d)", action, action->comm.refcount); - - if (action->comm.refcount <= 0) - xbt_die(bprintf("the refcount of comm %p is already 0 before decreasing it. That's a bug!",action)); + XBT_DEBUG("Destroy action %p (refcount: %d), state: %d", + action, action->comm.refcount, action->state); + if (action->comm.refcount <= 0) { + xbt_backtrace_display_current(); + xbt_die("the refcount of comm %p is already 0 before decreasing it. " + "That's a bug!", action); + } action->comm.refcount--; if (action->comm.refcount > 0) return; - DEBUG2("Really free communication %p; refcount is now %d", action, + XBT_DEBUG("Really free communication %p; refcount is now %d", action, action->comm.refcount); #ifdef HAVE_LATENCY_BOUND_TRACKING action->latency_limited = SIMIX_comm_is_latency_bounded( action ) ; #endif -#ifdef HAVE_TRACING - TRACE_smx_action_destroy(action); -#endif - - if (action->name) - xbt_free(action->name); - - xbt_fifo_free(action->request_list); - + xbt_free(action->name); SIMIX_comm_destroy_internal_actions(action); - xbt_free(action); + if (action->comm.detached && action->state != SIMIX_DONE) { + /* the communication has failed and was detached: + * we have to free the buffer */ + if (action->comm.clean_fun) { + action->comm.clean_fun(action->comm.src_buff); + } + action->comm.src_buff = NULL; + } + + xbt_mallocator_release(simix_global->action_mallocator, action); } void SIMIX_comm_destroy_internal_actions(smx_action_t action) @@ -248,13 +303,16 @@ void SIMIX_comm_destroy_internal_actions(smx_action_t action) smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv, double task_size, double rate, void *src_buff, size_t src_buff_size, - int (*match_fun)(void *, void *), void *data) + int (*match_fun)(void *, void *), + void (*clean_fun)(void *), // used to free the action in case of problem after a detached send + void *data, + int detached) { smx_action_t action; - /* Look for communication request matching our needs. + /* Look for communication action matching our needs. If it is not found then create it and push it into the rendez-vous point */ - action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_RECEIVE, match_fun, data); + action = SIMIX_rdv_get_comm(rdv, SIMIX_COMM_RECEIVE, match_fun, data); if (!action) { action = SIMIX_comm_new(SIMIX_COMM_SEND); @@ -263,8 +321,19 @@ smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv, action->state = SIMIX_READY; action->comm.type = SIMIX_COMM_READY; } + xbt_fifo_push(src_proc->comms, action); + + /* if the communication action is detached then decrease the refcount + * by one, so it will be eliminated by the receiver's destroy call */ + if (detached) { + action->comm.detached = 1; + action->comm.refcount--; + action->comm.clean_fun = clean_fun; + } else { + action->comm.clean_fun = NULL; + } - /* Setup the communication request */ + /* Setup the communication action */ action->comm.src_proc = src_proc; action->comm.task_size = task_size; action->comm.rate = rate; @@ -278,7 +347,7 @@ smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv, } SIMIX_comm_start(action); - return action; + return (detached ? NULL : action); } smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv, @@ -287,10 +356,10 @@ smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv, { smx_action_t action; - /* Look for communication request matching our needs. + /* Look for communication action matching our needs. * If it is not found then create it and push it into the rendez-vous point */ - action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_SEND, match_fun, data); + action = SIMIX_rdv_get_comm(rdv, SIMIX_COMM_SEND, match_fun, data); if (!action) { action = SIMIX_comm_new(SIMIX_COMM_RECEIVE); @@ -299,8 +368,9 @@ smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv, action->state = SIMIX_READY; action->comm.type = SIMIX_COMM_READY; } + xbt_fifo_push(dst_proc->comms, action); - /* Setup communication request */ + /* Setup communication action */ action->comm.dst_proc = dst_proc; action->comm.dst_buff = dst_buff; action->comm.dst_buff_size = dst_buff_size; @@ -315,18 +385,31 @@ smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv, return action; } -void SIMIX_pre_comm_wait(smx_req_t req) +void SIMIX_pre_comm_wait(smx_simcall_t simcall, smx_action_t action, double timeout, int idx) { - smx_action_t action = req->comm_wait.comm; - double timeout = req->comm_wait.timeout; + + /* the simcall may be a wait, a send or a recv */ surf_action_t sleep; - /* Associate this request to the action */ - xbt_fifo_push(action->request_list, req); - req->issuer->waiting_action = action; + /* Associate this simcall to the wait action */ + xbt_fifo_push(action->simcalls, simcall); + simcall->issuer->waiting_action = action; + + if (MC_IS_ENABLED) { + if (idx == 0) { + action->state = SIMIX_DONE; + } else { + /* If we reached this point, the wait simcall must have a timeout */ + /* Otherwise it shouldn't be enabled and executed by the MC */ + if (timeout == -1) + THROW_IMPOSSIBLE; + + if (action->comm.src_proc == simcall->issuer) + action->state = SIMIX_SRC_TIMEOUT; + else + action->state = SIMIX_DST_TIMEOUT; + } - if (MC_IS_ENABLED){ - action->state = SIMIX_DONE; SIMIX_comm_finish(action); return; } @@ -336,108 +419,123 @@ void SIMIX_pre_comm_wait(smx_req_t req) if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) { SIMIX_comm_finish(action); } else { /* if (timeout >= 0) { we need a surf sleep action even when there is no timeout, otherwise surf won't tell us when the host fails */ - sleep = surf_workstation_model->extension.workstation.sleep(req->issuer->smx_host->host, timeout); + sleep = surf_workstation_model->extension.workstation.sleep(simcall->issuer->smx_host->host, timeout); surf_workstation_model->action_data_set(sleep, action); - if (req->issuer == action->comm.src_proc) + if (simcall->issuer == action->comm.src_proc) action->comm.src_timeout = sleep; else action->comm.dst_timeout = sleep; } } -void SIMIX_pre_comm_test(smx_req_t req) +void SIMIX_pre_comm_test(smx_simcall_t simcall) { - smx_action_t action = req->comm_test.comm; - req->comm_test.result = (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING); + smx_action_t action = simcall->comm_test.comm; - if (req->comm_test.result) { - xbt_fifo_push(action->request_list, req); + if(MC_IS_ENABLED){ + simcall->comm_test.result = action->comm.src_proc && action->comm.dst_proc; + if(simcall->comm_test.result){ + action->state = SIMIX_DONE; + xbt_fifo_push(action->simcalls, simcall); + SIMIX_comm_finish(action); + }else{ + SIMIX_simcall_answer(simcall); + } + return; + } + + simcall->comm_test.result = (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING); + if (simcall->comm_test.result) { + xbt_fifo_push(action->simcalls, simcall); SIMIX_comm_finish(action); } else { - SIMIX_request_answer(req); + SIMIX_simcall_answer(simcall); } } -void SIMIX_pre_comm_testany(smx_req_t req, unsigned int idx) +void SIMIX_pre_comm_testany(smx_simcall_t simcall, int idx) { unsigned int cursor; smx_action_t action; - xbt_dynar_t actions = req->comm_testany.comms; - req->comm_testany.result = -1; + xbt_dynar_t actions = simcall->comm_testany.comms; + simcall->comm_testany.result = -1; if (MC_IS_ENABLED){ - if((int)idx == -1){ - SIMIX_request_answer(req); + if(idx == -1){ + SIMIX_simcall_answer(simcall); }else{ action = xbt_dynar_get_as(actions, idx, smx_action_t); - xbt_fifo_push(action->request_list, req); + simcall->comm_testany.result = idx; + xbt_fifo_push(action->simcalls, simcall); action->state = SIMIX_DONE; SIMIX_comm_finish(action); } return; } - xbt_dynar_foreach(req->comm_testany.comms,cursor,action) { + xbt_dynar_foreach(simcall->comm_testany.comms,cursor,action) { if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) { - req->comm_testany.result = cursor; - xbt_fifo_push(action->request_list, req); + simcall->comm_testany.result = cursor; + xbt_fifo_push(action->simcalls, simcall); SIMIX_comm_finish(action); return; } } - SIMIX_request_answer(req); + SIMIX_simcall_answer(simcall); } -void SIMIX_pre_comm_waitany(smx_req_t req, unsigned int idx) +void SIMIX_pre_comm_waitany(smx_simcall_t simcall, int idx) { smx_action_t action; unsigned int cursor = 0; - xbt_dynar_t actions = req->comm_waitany.comms; + xbt_dynar_t actions = simcall->comm_waitany.comms; if (MC_IS_ENABLED){ action = xbt_dynar_get_as(actions, idx, smx_action_t); - xbt_fifo_push(action->request_list, req); - req->comm_waitany.result = idx; + xbt_fifo_push(action->simcalls, simcall); + simcall->comm_waitany.result = idx; action->state = SIMIX_DONE; SIMIX_comm_finish(action); return; } xbt_dynar_foreach(actions, cursor, action){ - /* Associate this request to the action */ - xbt_fifo_push(action->request_list, req); + /* associate this simcall to the the action */ + xbt_fifo_push(action->simcalls, simcall); + + /* see if the action is already finished */ if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING){ - req->comm_waitany.result = cursor; SIMIX_comm_finish(action); break; } } } -void SIMIX_waitany_req_remove_from_actions(smx_req_t req) +void SIMIX_waitany_remove_simcall_from_actions(smx_simcall_t simcall) { smx_action_t action; unsigned int cursor = 0; - xbt_dynar_t actions = req->comm_waitany.comms; + xbt_dynar_t actions = simcall->comm_waitany.comms; - xbt_dynar_foreach(actions, cursor, action){ - xbt_fifo_remove(action->request_list, req); + xbt_dynar_foreach(actions, cursor, action) { + xbt_fifo_remove(action->simcalls, simcall); } } /** - * \brief Start the simulation of a communication request - * \param action The communication action + * \brief Starts the simulation of a communication action. + * \param action the communication action */ -static XBT_INLINE void SIMIX_comm_start(smx_action_t action) +XBT_INLINE void SIMIX_comm_start(smx_action_t action) { /* If both the sender and the receiver are already there, start the communication */ if (action->state == SIMIX_READY) { + smx_host_t sender = action->comm.src_proc->smx_host; smx_host_t receiver = action->comm.dst_proc->smx_host; - DEBUG3("Starting communication %p from '%s' to '%s'", action, + XBT_DEBUG("Starting communication %p from '%s' to '%s'", action, SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver)); action->comm.surf_comm = surf_workstation_model->extension.workstation. @@ -447,13 +545,9 @@ static XBT_INLINE void SIMIX_comm_start(smx_action_t action) action->state = SIMIX_RUNNING; -#ifdef HAVE_TRACING - TRACE_smx_action_communicate(action, action->comm.src_proc); -#endif - /* If a link is failed, detect it immediately */ if (surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) { - DEBUG2("Communication from '%s' to '%s' failed to start because of a link failure", + XBT_DEBUG("Communication from '%s' to '%s' failed to start because of a link failure", SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver)); action->state = SIMIX_LINK_FAILURE; SIMIX_comm_destroy_internal_actions(action); @@ -469,107 +563,118 @@ static XBT_INLINE void SIMIX_comm_start(smx_action_t action) } } +/** + * \brief Answers the SIMIX simcalls associated to a communication action. + * \param action a finished communication action + */ void SIMIX_comm_finish(smx_action_t action) { - smx_req_t req; + unsigned int destroy_count = 0; + smx_simcall_t simcall; - while ((req = xbt_fifo_shift(action->request_list))) { + while ((simcall = xbt_fifo_shift(action->simcalls))) { - /* If a waitany request is waiting for this action to finish, then remove + /* If a waitany simcall is waiting for this action to finish, then remove it from the other actions in the waitany list. Afterwards, get the - position of the actual action in the waitany request's actions dynar and - return it as the result of the call */ - if (req->call == REQ_COMM_WAITANY) { - SIMIX_waitany_req_remove_from_actions(req); + position of the actual action in the waitany dynar and + return it as the result of the simcall */ + if (simcall->call == SIMCALL_COMM_WAITANY) { + SIMIX_waitany_remove_simcall_from_actions(simcall); + if (!MC_IS_ENABLED) + simcall->comm_waitany.result = xbt_dynar_search(simcall->comm_waitany.comms, &action); } /* If the action is still in a rendez-vous point then remove from it */ if (action->comm.rdv) SIMIX_rdv_remove(action->comm.rdv, action); - DEBUG1("SIMIX_comm_finish: action state = %d", action->state); + XBT_DEBUG("SIMIX_comm_finish: action state = %d", action->state); /* Check out for errors */ switch (action->state) { case SIMIX_DONE: - DEBUG1("Communication %p complete!", action); + XBT_DEBUG("Communication %p complete!", action); SIMIX_comm_copy_data(action); break; case SIMIX_SRC_TIMEOUT: - TRY { - THROW0(timeout_error, 0, "Communication timeouted because of sender"); - } - CATCH(req->issuer->running_ctx->exception) { - req->issuer->doexception = 1; - } + SMX_EXCEPTION(simcall->issuer, timeout_error, 0, + "Communication timeouted because of sender"); break; case SIMIX_DST_TIMEOUT: - TRY { - THROW0(timeout_error, 0, "Communication timeouted because of receiver"); - } - CATCH(req->issuer->running_ctx->exception) { - req->issuer->doexception = 1; - } + SMX_EXCEPTION(simcall->issuer, timeout_error, 0, + "Communication timeouted because of receiver"); break; case SIMIX_SRC_HOST_FAILURE: - TRY { - if (req->issuer == action->comm.src_proc) - THROW0(host_error, 0, "Host failed"); - else - THROW0(network_error, 0, "Remote peer failed"); - } - CATCH(req->issuer->running_ctx->exception) { - req->issuer->doexception = 1; - } + if (simcall->issuer == action->comm.src_proc) + SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed"); + else + SMX_EXCEPTION(simcall->issuer, network_error, 0, "Remote peer failed"); break; case SIMIX_DST_HOST_FAILURE: - TRY { - if (req->issuer == action->comm.dst_proc) - THROW0(host_error, 0, "Host failed"); - else - THROW0(network_error, 0, "Remote peer failed"); - } - CATCH(req->issuer->running_ctx->exception) { - req->issuer->doexception = 1; - } + if (simcall->issuer == action->comm.dst_proc) + SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed"); + else + SMX_EXCEPTION(simcall->issuer, network_error, 0, "Remote peer failed"); break; case SIMIX_LINK_FAILURE: - TRY { - DEBUG5("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p)", - action, action->comm.src_proc->smx_host->name, action->comm.dst_proc->smx_host->name, - req->issuer->name, req->issuer); - THROW0(network_error, 0, "Link failure"); - } - CATCH(req->issuer->running_ctx->exception) { - req->issuer->doexception = 1; + XBT_DEBUG("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p) detached:%d", + action, + action->comm.src_proc ? action->comm.src_proc->smx_host->name : NULL, + action->comm.dst_proc ? action->comm.dst_proc->smx_host->name : NULL, + simcall->issuer->name, simcall->issuer, action->comm.detached); + if (action->comm.src_proc == simcall->issuer) { + XBT_DEBUG("I'm source"); + } else if (action->comm.dst_proc == simcall->issuer) { + XBT_DEBUG("I'm dest"); + } else { + XBT_DEBUG("I'm neither source nor dest"); } + SMX_EXCEPTION(simcall->issuer, network_error, 0, "Link failure"); + break; + + case SIMIX_CANCELED: + if (simcall->issuer == action->comm.dst_proc) + SMX_EXCEPTION(simcall->issuer, cancel_error, 0, + "Communication canceled by the sender"); + else + SMX_EXCEPTION(simcall->issuer, cancel_error, 0, + "Communication canceled by the receiver"); break; default: - THROW_IMPOSSIBLE; + xbt_die("Unexpected action state in SIMIX_comm_finish: %d", action->state); } /* if there is an exception during a waitany or a testany, indicate the position of the failed communication */ - if (req->issuer->doexception) { - if (req->call == REQ_COMM_WAITANY) { - req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_waitany.comms, &action); + if (simcall->issuer->doexception) { + if (simcall->call == SIMCALL_COMM_WAITANY) { + simcall->issuer->running_ctx->exception.value = xbt_dynar_search(simcall->comm_waitany.comms, &action); } - else if (req->call == REQ_COMM_TESTANY) { - req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_testany.comms, &action); + else if (simcall->call == SIMCALL_COMM_TESTANY) { + simcall->issuer->running_ctx->exception.value = xbt_dynar_search(simcall->comm_testany.comms, &action); } } - req->issuer->waiting_action = NULL; - SIMIX_request_answer(req); + simcall->issuer->waiting_action = NULL; + xbt_fifo_remove(simcall->issuer->comms, action); + SIMIX_simcall_answer(simcall); + destroy_count++; } + + while (destroy_count-- > 0) + SIMIX_comm_destroy(action); } +/** + * \brief This function is called when a Surf communication action is finished. + * \param action the corresponding Simix communication + */ void SIMIX_post_comm(smx_action_t action) { /* Update action state */ @@ -586,30 +691,44 @@ void SIMIX_post_comm(smx_action_t action) surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_FAILED) action->state = SIMIX_DST_HOST_FAILURE; else if (action->comm.surf_comm && - surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) + surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) { + XBT_DEBUG("Puta madre. Surf says that the link broke"); action->state = SIMIX_LINK_FAILURE; - else + } else action->state = SIMIX_DONE; - DEBUG1("SIMIX_post_comm: action state = %d", action->state); + XBT_DEBUG("SIMIX_post_comm: comm %p, state %d, src_proc %p, dst_proc %p, detached: %d", + action, action->state, action->comm.src_proc, action->comm.dst_proc, action->comm.detached); - /* After this point the surf actions associated with the simix communicate - action are no longer needed, thus we delete them. */ + /* destroy the surf actions associated with the Simix communication */ SIMIX_comm_destroy_internal_actions(action); - /* If there are requests associated with the action, then answer them */ - if (xbt_fifo_size(action->request_list)) + /* remove the communication action from the list of pending communications + * of both processes (if they still exist) */ + if (action->comm.src_proc) { + xbt_fifo_remove(action->comm.src_proc->comms, action); + } + if (action->comm.dst_proc) { + xbt_fifo_remove(action->comm.dst_proc->comms, action); + } + + /* if there are simcalls associated with the action, then answer them */ + if (xbt_fifo_size(action->simcalls)) { SIMIX_comm_finish(action); + } } void SIMIX_comm_cancel(smx_action_t action) { - /* If the action is a waiting state means that it is still in a rdv */ + /* if the action is a waiting state means that it is still in a rdv */ /* so remove from it and delete it */ if (action->state == SIMIX_WAITING) { SIMIX_rdv_remove(action->comm.rdv, action); - action->state = SIMIX_FAILED; - } else { + action->state = SIMIX_CANCELED; + } + else if (!MC_IS_ENABLED /* when running the MC there are no surf actions */ + && (action->state == SIMIX_READY || action->state == SIMIX_RUNNING)) { + surf_workstation_model->action_cancel(action->comm.surf_comm); } } @@ -637,6 +756,10 @@ double SIMIX_comm_get_remains(smx_action_t action) { double remains; + if(!action){ + return 0; + } + switch (action->state) { case SIMIX_RUNNING: @@ -680,33 +803,6 @@ void* SIMIX_comm_get_dst_data(smx_action_t action) return action->comm.dst_data; } -void* SIMIX_comm_get_src_buff(smx_action_t action) -{ - return action->comm.src_buff; -} - -void* SIMIX_comm_get_dst_buff(smx_action_t action) -{ - return action->comm.dst_buff; -} - -size_t SIMIX_comm_get_src_buff_size(smx_action_t action) -{ - return action->comm.src_buff_size; -} - -size_t SIMIX_comm_get_dst_buff_size(smx_action_t action) -{ - size_t buff_size; - - if (action->comm.dst_buff_size) - buff_size = *(action->comm.dst_buff_size); - else - buff_size = 0; - - return buff_size; -} - smx_process_t SIMIX_comm_get_src_proc(smx_action_t action) { return action->comm.src_proc; @@ -724,10 +820,13 @@ smx_process_t SIMIX_comm_get_dst_proc(smx_action_t action) */ XBT_INLINE int SIMIX_comm_is_latency_bounded(smx_action_t action) { + if(!action){ + return 0; + } if (action->comm.surf_comm){ - DEBUG1("Getting latency limited for surf_action (%p)", action->comm.surf_comm); + XBT_DEBUG("Getting latency limited for surf_action (%p)", action->comm.surf_comm); action->latency_limited = surf_workstation_model->get_latency_limited(action->comm.surf_comm); - DEBUG1("Action limited is %d", action->latency_limited); + XBT_DEBUG("Action limited is %d", action->latency_limited); } return action->latency_limited; } @@ -736,25 +835,36 @@ XBT_INLINE int SIMIX_comm_is_latency_bounded(smx_action_t action) /******************************************************************************/ /* SIMIX_comm_copy_data callbacks */ /******************************************************************************/ -static void (*SIMIX_comm_copy_data_callback) (smx_action_t, size_t) = +static void (*SIMIX_comm_copy_data_callback) (smx_action_t, void*, size_t) = &SIMIX_comm_copy_pointer_callback; void -SIMIX_comm_set_copy_data_callback(void (*callback) (smx_action_t, size_t)) +SIMIX_comm_set_copy_data_callback(void (*callback) (smx_action_t, void*, size_t)) { SIMIX_comm_copy_data_callback = callback; } -void SIMIX_comm_copy_pointer_callback(smx_action_t comm, size_t buff_size) +void SIMIX_comm_copy_pointer_callback(smx_action_t comm, void* buff, size_t buff_size) { - xbt_assert1((buff_size == sizeof(void *)), + xbt_assert((buff_size == sizeof(void *)), "Cannot copy %zu bytes: must be sizeof(void*)", buff_size); - *(void **) (comm->comm.dst_buff) = comm->comm.src_buff; + *(void **) (comm->comm.dst_buff) = buff; } -void SIMIX_comm_copy_buffer_callback(smx_action_t comm, size_t buff_size) +void SIMIX_comm_copy_buffer_callback(smx_action_t comm, void* buff, size_t buff_size) { - memcpy(comm->comm.dst_buff, comm->comm.src_buff, buff_size); + XBT_DEBUG("Copy the data over"); + memcpy(comm->comm.dst_buff, buff, buff_size); +} + +void smpi_comm_copy_data_callback(smx_action_t comm, void* buff, size_t buff_size) +{ + XBT_DEBUG("Copy the data over"); + memcpy(comm->comm.dst_buff, buff, buff_size); + if (comm->comm.detached) { // if this is a detached send, the source buffer was duplicated by SMPI sender to make the original buffer available to the application ASAP + xbt_free(buff); + comm->comm.src_buff = NULL; + } } /** @@ -768,10 +878,12 @@ void SIMIX_comm_copy_data(smx_action_t comm) if (!comm->comm.src_buff || !comm->comm.dst_buff || comm->comm.copied == 1) return; - DEBUG6("Copying comm %p data from %s (%p) -> %s (%p) (%zu bytes)", + XBT_DEBUG("Copying comm %p data from %s (%p) -> %s (%p) (%zu bytes)", comm, - comm->comm.src_proc->smx_host->name, comm->comm.src_buff, - comm->comm.dst_proc->smx_host->name, comm->comm.dst_buff, buff_size); + comm->comm.src_proc ? comm->comm.src_proc->smx_host->name : "a finished process", + comm->comm.src_buff, + comm->comm.dst_proc ? comm->comm.dst_proc->smx_host->name : "a finished process", + comm->comm.dst_buff, buff_size); /* Copy at most dst_buff_size bytes of the message to receiver's buffer */ if (comm->comm.dst_buff_size) @@ -781,10 +893,8 @@ void SIMIX_comm_copy_data(smx_action_t comm) if (comm->comm.dst_buff_size) *comm->comm.dst_buff_size = buff_size; - if (buff_size == 0) - return; - - (*SIMIX_comm_copy_data_callback) (comm, buff_size); + if (buff_size > 0) + SIMIX_comm_copy_data_callback (comm, comm->comm.src_buff, buff_size); /* Set the copied flag so we copy data only once */ /* (this function might be called from both communication ends) */