X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/dff9e15c44ab6340d27215957c56fa72fad246a2..e487ef4538248f261ddb6e814357593df7646e33:/src/msg/msg_mailbox.c diff --git a/src/msg/msg_mailbox.c b/src/msg/msg_mailbox.c index 1f1954d1cf..c285b60005 100644 --- a/src/msg/msg_mailbox.c +++ b/src/msg/msg_mailbox.c @@ -180,17 +180,18 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task, h = MSG_host_self(); h_simdata = h->simdata; - SIMIX_mutex_lock(h->simdata->mutex); + SIMIX_mutex_lock(h_simdata->mutex); //FIXME: lock the mailbox instead if (MSG_mailbox_get_cond(mailbox)) { - CRITICAL1("A process is already blocked on the channel %s", - MSG_mailbox_get_alias(mailbox)); + CRITICAL1 + ("A process is already blocked on the channel %s (meaning that someone is already doing a get on this)", + MSG_mailbox_get_alias(mailbox)); SIMIX_cond_display_info(MSG_mailbox_get_cond(mailbox)); xbt_die("Go fix your code!"); } while (1) { - /* if the mailbox is empty (has no task */ + /* if the mailbox is not empty (has a task) */ if (!MSG_mailbox_is_empty(mailbox)) { if (!host) { /* pop the head of the mailbox */ @@ -203,8 +204,8 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task, } } - if ((timeout > 0) && (SIMIX_get_clock() - start_time >= timeout)) { - SIMIX_mutex_unlock(h->simdata->mutex); + if ((timeout > 0) && (SIMIX_get_clock() - start_time >= timeout)) { // Timeout already elapsed + SIMIX_mutex_unlock(h_simdata->mutex); MSG_mailbox_set_cond(mailbox, NULL); SIMIX_cond_destroy(cond); MSG_RETURN(MSG_TRANSFER_FAILURE); @@ -216,12 +217,13 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task, } if (timeout > 0) - SIMIX_cond_wait_timeout(cond, h->simdata->mutex, timeout - start_time); + SIMIX_cond_wait_timeout(cond, h_simdata->mutex, + timeout - start_time + SIMIX_get_clock()); else - SIMIX_cond_wait(MSG_mailbox_get_cond(mailbox), h->simdata->mutex); + SIMIX_cond_wait(cond, h_simdata->mutex); if (SIMIX_host_get_state(h_simdata->smx_host) == 0) { - SIMIX_mutex_unlock(h->simdata->mutex); + SIMIX_mutex_unlock(h_simdata->mutex); MSG_mailbox_set_cond(mailbox, NULL); SIMIX_cond_destroy(cond); MSG_RETURN(MSG_HOST_FAILURE); @@ -236,7 +238,7 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task, SIMIX_cond_destroy(cond); } - SIMIX_mutex_unlock(h->simdata->mutex); + SIMIX_mutex_unlock(h_simdata->mutex); t_simdata = t->simdata; t_simdata->receiver = process; @@ -247,76 +249,51 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task, /* Transfer */ /* create SIMIX action to the communication */ t_simdata->comm = - SIMIX_action_communicate(t_simdata->sender->simdata->m_host->simdata-> - smx_host, + SIMIX_action_communicate(t_simdata->sender->simdata->m_host-> + simdata->smx_host, process->simdata->m_host->simdata->smx_host, t->name, t_simdata->message_size, t_simdata->rate); - /* This is a hack. We know that both the receiver and the sender will - need to look at the content of t_simdata->comm. And it needs to be - destroyed. However, we don't known whether the receiver or the sender - will get to it first. So by setting with refcount to 2 we can enforce - that things happen correctly. An alternative would be to only do ++ and - -- on this refcount and to sprinkle them judiciously throughout the code, - which appears perhaps worse? Or perhaps the refcount field of - task->simdata can be used for this? At any rate, this will do for now */ - t_simdata->comm->refcount = 2; + SIMIX_action_use(t_simdata->comm); /* if the process is suspend, create the action but stop its execution, it will be restart when the sender process resume */ if (MSG_process_is_suspended(t_simdata->sender)) { DEBUG1("Process sender (%s) suspended", t_simdata->sender->name); SIMIX_action_set_priority(t_simdata->comm, 0); } - - process->simdata->waiting_task = t; SIMIX_register_action_to_condition(t_simdata->comm, t_simdata->cond); + // breaking point if asynchrounous + process->simdata->waiting_action = t_simdata->comm; while (1) { SIMIX_cond_wait(t_simdata->cond, t_simdata->mutex); if (SIMIX_action_get_state(t_simdata->comm) != SURF_ACTION_RUNNING) break; + if (!SIMIX_host_get_state(h_simdata->smx_host)) + break; + if (!SIMIX_host_get_state(process->simdata->m_host->simdata->smx_host)) + break; } SIMIX_unregister_action_to_condition(t_simdata->comm, t_simdata->cond); - process->simdata->waiting_task = NULL; - - /* If sender still around (it didn't free the comm yet), note that it's not waiting anymore */ - if (t_simdata->comm->refcount == 2) { - t->simdata->sender->simdata->waiting_task = NULL; - } + process->simdata->waiting_action = NULL; /* for this process, don't need to change in get function */ SIMIX_mutex_unlock(t_simdata->mutex); - if (SIMIX_action_get_state(t_simdata->comm) == SURF_ACTION_DONE) { - if (t_simdata->comm->refcount == 1) { - SIMIX_action_destroy(t_simdata->comm); + if (SIMIX_action_destroy(t_simdata->comm)) t_simdata->comm = NULL; - } else { - t_simdata->comm->refcount--; - } - t_simdata->refcount--; MSG_RETURN(MSG_OK); } else if (SIMIX_host_get_state(h_simdata->smx_host) == 0) { - if (t_simdata->comm->refcount == 1) { - SIMIX_action_destroy(t_simdata->comm); + if (SIMIX_action_destroy(t_simdata->comm)) t_simdata->comm = NULL; - } else { - t_simdata->comm->refcount--; - } - t_simdata->refcount--; MSG_RETURN(MSG_HOST_FAILURE); } else { - if (t_simdata->comm->refcount == 1) { - SIMIX_action_destroy(t_simdata->comm); + if (SIMIX_action_destroy(t_simdata->comm)) t_simdata->comm = NULL; - } else { - t_simdata->comm->refcount--; - } - t_simdata->refcount--; MSG_RETURN(MSG_TRANSFER_FAILURE); } } @@ -360,7 +337,7 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task, t_simdata->message_size / 1000, local_host->name, remote_host->name, MSG_mailbox_get_alias(mailbox)); - SIMIX_mutex_lock(remote_host->simdata->mutex); + SIMIX_mutex_lock(remote_host->simdata->mutex); /* FIXME: lock the mailbox instead */ /* put the task in the mailbox */ xbt_fifo_push(mailbox->tasks, task); @@ -374,7 +351,7 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task, SIMIX_mutex_lock(t_simdata->mutex); - process->simdata->waiting_task = task; + process->simdata->waiting_action = t_simdata->comm; // for debugging and status displaying purpose if (timeout > 0) { xbt_ex_t e; @@ -389,9 +366,14 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task, SIMIX_cond_wait_timeout(t_simdata->cond, t_simdata->mutex, timeout - time_elapsed); - if ((t_simdata->comm != NULL) - && (SIMIX_action_get_state(t_simdata->comm) != - SURF_ACTION_RUNNING)) + if (t_simdata->comm) + SIMIX_action_use(t_simdata->comm); + if (t_simdata->comm && (SIMIX_action_get_state(t_simdata->comm) != + SURF_ACTION_RUNNING)) + break; + if (!SIMIX_host_get_state(local_host->simdata->smx_host)) + break; + if (!SIMIX_host_get_state(remote_host->simdata->smx_host)) break; } } @@ -400,14 +382,15 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task, xbt_ex_free(e); /* verify if the timeout happened and the communication didn't started yet */ if (t_simdata->comm == NULL) { - process->simdata->waiting_task = NULL; + DEBUG1("Action terminated %s (there was a timeout)", task->name); + process->simdata->waiting_action = NULL; /* remove the task from the mailbox */ MSG_mailbox_remove(mailbox, task); - if (t_simdata->receiver && t_simdata->receiver->simdata) { /* receiver still around */ - t_simdata->receiver->simdata->waiting_task = NULL; - } +/* if (t_simdata->receiver && t_simdata->receiver->simdata) { /\* receiver still around *\/ */ +/* t_simdata->receiver->simdata->waiting_task = NULL; */ +/* } */ SIMIX_mutex_unlock(t_simdata->mutex); MSG_RETURN(MSG_TRANSFER_FAILURE); @@ -417,46 +400,42 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task, } } } else { - while (1) { + while (1) { //FIXME: factorize with the code right above SIMIX_cond_wait(t_simdata->cond, t_simdata->mutex); - if (SIMIX_action_get_state(t_simdata->comm) != SURF_ACTION_RUNNING) + if (t_simdata->comm) + SIMIX_action_use(t_simdata->comm); + if (t_simdata->comm + && SIMIX_action_get_state(t_simdata->comm) != SURF_ACTION_RUNNING) + break; + if (!SIMIX_host_get_state(local_host->simdata->smx_host)) + break; + if (!SIMIX_host_get_state(remote_host->simdata->smx_host)) break; } } DEBUG1("Action terminated %s", task->name); - process->simdata->waiting_task = NULL; - - if (t_simdata->comm->refcount == 2) { //receiver didn't free it yet: he's still around - t_simdata->receiver->simdata->waiting_task = NULL; - } + process->simdata->waiting_action = NULL; +/* if (t_simdata->receiver && t_simdata->receiver->simdata) { /\* receiver still around *\/ */ +/* t_simdata->receiver->simdata->waiting_task = NULL; */ +/* } */ - SIMIX_mutex_unlock(task->simdata->mutex); + SIMIX_mutex_unlock(t_simdata->mutex); - if (SIMIX_action_get_state(t_simdata->comm) == SURF_ACTION_DONE) { - if (t_simdata->comm->refcount == 1) { - SIMIX_action_destroy(t_simdata->comm); + if (t_simdata->comm + && SIMIX_action_get_state(t_simdata->comm) == SURF_ACTION_DONE) { + if (SIMIX_action_destroy(t_simdata->comm)) t_simdata->comm = NULL; - } else { - t_simdata->comm->refcount--; - } + t_simdata->refcount--; MSG_RETURN(MSG_OK); } else if (SIMIX_host_get_state(local_host->simdata->smx_host) == 0) { - if (t_simdata->comm->refcount == 1) { - SIMIX_action_destroy(t_simdata->comm); + if (t_simdata->comm && SIMIX_action_destroy(t_simdata->comm)) t_simdata->comm = NULL; - } else { - t_simdata->comm->refcount--; - } MSG_RETURN(MSG_HOST_FAILURE); } else { - if (t_simdata->comm->refcount == 1) { - SIMIX_action_destroy(t_simdata->comm); + if (t_simdata->comm && SIMIX_action_destroy(t_simdata->comm)) t_simdata->comm = NULL; - } else { - t_simdata->comm->refcount--; - } MSG_RETURN(MSG_TRANSFER_FAILURE); } }