1 /* Copyright (c) 2009, 2010. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
12 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_network, simix,
13 "Logging specific to SIMIX (network)");
15 static xbt_dict_t rdv_points = NULL;
16 unsigned long int smx_total_comms = 0;
18 static XBT_INLINE void SIMIX_comm_start(smx_action_t action);
19 static void SIMIX_comm_finish(smx_action_t action);
20 static void SIMIX_waitany_req_remove_from_actions(smx_req_t req);
21 static void SIMIX_comm_copy_data(smx_action_t comm);
22 static smx_action_t SIMIX_comm_new(e_smx_comm_type_t type);
23 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm);
24 static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm);
25 static smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
26 int (*match_fun)(void *, void *), void *);
27 static void SIMIX_rdv_free(void *data);
29 void SIMIX_network_init(void)
31 rdv_points = xbt_dict_new();
34 void SIMIX_network_exit(void)
36 xbt_dict_free(&rdv_points);
39 /******************************************************************************/
40 /* Rendez-Vous Points */
41 /******************************************************************************/
43 smx_rdv_t SIMIX_rdv_create(const char *name)
45 /* two processes may have pushed the same rdv_create request at the same time */
46 smx_rdv_t rdv = name ? xbt_dict_get_or_null(rdv_points, name) : NULL;
49 rdv = xbt_new0(s_smx_rvpoint_t, 1);
50 rdv->name = name ? xbt_strdup(name) : NULL;
51 rdv->comm_fifo = xbt_fifo_new();
54 xbt_dict_set(rdv_points, rdv->name, rdv, SIMIX_rdv_free);
59 void SIMIX_rdv_destroy(smx_rdv_t rdv)
62 xbt_dict_remove(rdv_points, rdv->name);
65 void SIMIX_rdv_free(void *data)
67 smx_rdv_t rdv = (smx_rdv_t) data;
70 xbt_fifo_free(rdv->comm_fifo);
74 smx_rdv_t SIMIX_rdv_get_by_name(const char *name)
76 return xbt_dict_get_or_null(rdv_points, name);
79 int SIMIX_rdv_comm_count_by_host(smx_rdv_t rdv, smx_host_t host)
81 smx_action_t comm = NULL;
82 xbt_fifo_item_t item = NULL;
85 xbt_fifo_foreach(rdv->comm_fifo, item, comm, smx_action_t) {
86 if (comm->comm.src_proc->smx_host == host)
93 smx_action_t SIMIX_rdv_get_head(smx_rdv_t rdv)
95 return xbt_fifo_get_item_content(xbt_fifo_get_first_item(rdv->comm_fifo));
99 * \brief Push a communication request into a rendez-vous point
100 * \param rdv The rendez-vous point
101 * \param comm The communication request
103 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm)
105 xbt_fifo_push(rdv->comm_fifo, comm);
106 comm->comm.rdv = rdv;
110 * \brief Remove a communication request from a rendez-vous point
111 * \param rdv The rendez-vous point
112 * \param comm The communication request
114 static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm)
116 xbt_fifo_remove(rdv->comm_fifo, comm);
117 comm->comm.rdv = NULL;
121 * \brief Checks if there is a communication action queued in a rendez-vous matching our needs
122 * \param type The type of communication we are looking for (comm_send, comm_recv)
123 * \return The communication action if found, NULL otherwise
125 smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
126 int (*match_fun)(void *, void *), void *data)
129 xbt_fifo_item_t item;
130 void* req_data = NULL;
132 xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t){
133 if (action->comm.type == SIMIX_COMM_SEND) {
134 req_data = action->comm.src_data;
135 } else if (action->comm.type == SIMIX_COMM_RECEIVE) {
136 req_data = action->comm.dst_data;
138 if (action->comm.type == type && (!match_fun || match_fun(data, req_data))) {
139 XBT_DEBUG("Found a matching communication action %p", action);
140 xbt_fifo_remove_item(rdv->comm_fifo, item);
141 xbt_fifo_free_item(item);
142 action->comm.refcount++;
143 action->comm.rdv = NULL;
146 XBT_DEBUG("Sorry, communication action %p does not match our needs:"
147 " its type is %d but we are looking for a comm of type %d",
148 action, action->comm.type, type);
150 XBT_DEBUG("No matching communication action found");
154 /******************************************************************************/
155 /* Comunication Actions */
156 /******************************************************************************/
159 * \brief Creates a new comunicate action
160 * \param type The type of request (comm_send, comm_recv)
161 * \return The new comunicate action
163 smx_action_t SIMIX_comm_new(e_smx_comm_type_t type)
167 /* alloc structures */
168 act = xbt_mallocator_get(simix_global->action_mallocator);
169 act->type = SIMIX_ACTION_COMMUNICATE;
170 act->state = SIMIX_WAITING;
172 /* set communication */
173 act->comm.type = type;
174 act->comm.refcount = 1;
176 #ifdef HAVE_LATENCY_BOUND_TRACKING
177 //initialize with unknown value
178 act->latency_limited = -1;
182 act->category = NULL;
185 XBT_DEBUG("Create communicate action %p", act);
192 * \brief Destroy a communicate action
193 * \param action The communicate action to be destroyed
195 void SIMIX_comm_destroy(smx_action_t action)
197 XBT_DEBUG("Destroy action %p (refcount:%d)", action, action->comm.refcount);
199 if (action->comm.refcount <= 0)
200 xbt_die("the refcount of comm %p is already 0 before decreasing it. "
201 "That's a bug!", action);
203 action->comm.refcount--;
204 if (action->comm.refcount > 0)
206 XBT_DEBUG("Really free communication %p; refcount is now %d", action,
207 action->comm.refcount);
209 #ifdef HAVE_LATENCY_BOUND_TRACKING
210 action->latency_limited = SIMIX_comm_is_latency_bounded( action ) ;
214 TRACE_smx_action_destroy(action);
217 xbt_free(action->name);
218 SIMIX_comm_destroy_internal_actions(action);
220 if (action->comm.detached && action->state != SIMIX_DONE) {
221 /* the communication has failed and was detached:
222 * we have to free the buffer */
223 ((void_f_pvoid_t) action->comm.src_data)(action->comm.src_buff);
226 xbt_mallocator_release(simix_global->action_mallocator, action);
229 void SIMIX_comm_destroy_internal_actions(smx_action_t action)
231 if (action->comm.surf_comm){
232 #ifdef HAVE_LATENCY_BOUND_TRACKING
233 action->latency_limited = SIMIX_comm_is_latency_bounded(action);
235 action->comm.surf_comm->model_type->action_unref(action->comm.surf_comm);
236 action->comm.surf_comm = NULL;
239 if (action->comm.src_timeout){
240 action->comm.src_timeout->model_type->action_unref(action->comm.src_timeout);
241 action->comm.src_timeout = NULL;
244 if (action->comm.dst_timeout){
245 action->comm.dst_timeout->model_type->action_unref(action->comm.dst_timeout);
246 action->comm.dst_timeout = NULL;
250 smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
251 double task_size, double rate,
252 void *src_buff, size_t src_buff_size,
253 int (*match_fun)(void *, void *), void *data,
258 /* Look for communication request matching our needs.
259 If it is not found then create it and push it into the rendez-vous point */
260 action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_RECEIVE, match_fun, data);
263 action = SIMIX_comm_new(SIMIX_COMM_SEND);
264 SIMIX_rdv_push(rdv, action);
266 action->state = SIMIX_READY;
267 action->comm.type = SIMIX_COMM_READY;
270 /* If the communication action is detached then decrease the refcount
271 * by one, so it will be eliminated by the receivers destroy call */
273 action->comm.detached = 1;
274 action->comm.refcount--;
277 /* Setup the communication request */
278 action->comm.src_proc = src_proc;
279 action->comm.task_size = task_size;
280 action->comm.rate = rate;
281 action->comm.src_buff = src_buff;
282 action->comm.src_buff_size = src_buff_size;
283 action->comm.src_data = data;
286 action->state = SIMIX_RUNNING;
290 SIMIX_comm_start(action);
294 smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
295 void *dst_buff, size_t *dst_buff_size,
296 int (*match_fun)(void *, void *), void *data)
300 /* Look for communication request matching our needs.
301 * If it is not found then create it and push it into the rendez-vous point
303 action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_SEND, match_fun, data);
306 action = SIMIX_comm_new(SIMIX_COMM_RECEIVE);
307 SIMIX_rdv_push(rdv, action);
309 action->state = SIMIX_READY;
310 action->comm.type = SIMIX_COMM_READY;
313 /* Setup communication request */
314 action->comm.dst_proc = dst_proc;
315 action->comm.dst_buff = dst_buff;
316 action->comm.dst_buff_size = dst_buff_size;
317 action->comm.dst_data = data;
320 action->state = SIMIX_RUNNING;
324 SIMIX_comm_start(action);
328 void SIMIX_pre_comm_wait(smx_req_t req, smx_action_t action, double timeout, int idx)
330 /* the request may be a wait, a send or a recv */
333 /* Associate this request to the action */
334 xbt_fifo_push(action->request_list, req);
335 req->issuer->waiting_action = action;
339 action->state = SIMIX_DONE;
341 /* If we reached this point, the wait request must have a timeout */
342 /* Otherwise it shouldn't be enabled and executed by the MC */
346 if (action->comm.src_proc == req->issuer)
347 action->state = SIMIX_SRC_TIMEOUT;
349 action->state = SIMIX_DST_TIMEOUT;
352 SIMIX_comm_finish(action);
356 /* If the action has already finish perform the error handling, */
357 /* otherwise set up a waiting timeout on the right side */
358 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
359 SIMIX_comm_finish(action);
360 } else { /* if (timeout >= 0) { we need a surf sleep action even when there is no timeout, otherwise surf won't tell us when the host fails */
361 sleep = surf_workstation_model->extension.workstation.sleep(req->issuer->smx_host->host, timeout);
362 surf_workstation_model->action_data_set(sleep, action);
364 if (req->issuer == action->comm.src_proc)
365 action->comm.src_timeout = sleep;
367 action->comm.dst_timeout = sleep;
371 void SIMIX_pre_comm_test(smx_req_t req)
373 smx_action_t action = req->comm_test.comm;
376 req->comm_test.result = action->comm.src_proc && action->comm.dst_proc;
377 if(req->comm_test.result){
378 action->state = SIMIX_DONE;
379 xbt_fifo_push(action->request_list, req);
380 SIMIX_comm_finish(action);
382 SIMIX_request_answer(req);
387 req->comm_test.result = (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING);
388 if (req->comm_test.result) {
389 xbt_fifo_push(action->request_list, req);
390 SIMIX_comm_finish(action);
392 SIMIX_request_answer(req);
396 void SIMIX_pre_comm_testany(smx_req_t req, int idx)
400 xbt_dynar_t actions = req->comm_testany.comms;
401 req->comm_testany.result = -1;
405 SIMIX_request_answer(req);
407 action = xbt_dynar_get_as(actions, idx, smx_action_t);
408 req->comm_testany.result = idx;
409 xbt_fifo_push(action->request_list, req);
410 action->state = SIMIX_DONE;
411 SIMIX_comm_finish(action);
416 xbt_dynar_foreach(req->comm_testany.comms,cursor,action) {
417 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
418 req->comm_testany.result = cursor;
419 xbt_fifo_push(action->request_list, req);
420 SIMIX_comm_finish(action);
424 SIMIX_request_answer(req);
427 void SIMIX_pre_comm_waitany(smx_req_t req, int idx)
430 unsigned int cursor = 0;
431 xbt_dynar_t actions = req->comm_waitany.comms;
434 action = xbt_dynar_get_as(actions, idx, smx_action_t);
435 xbt_fifo_push(action->request_list, req);
436 req->comm_waitany.result = idx;
437 action->state = SIMIX_DONE;
438 SIMIX_comm_finish(action);
442 xbt_dynar_foreach(actions, cursor, action){
443 /* Associate this request to the action */
444 xbt_fifo_push(action->request_list, req);
445 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING){
446 SIMIX_comm_finish(action);
452 void SIMIX_waitany_req_remove_from_actions(smx_req_t req)
455 unsigned int cursor = 0;
456 xbt_dynar_t actions = req->comm_waitany.comms;
458 xbt_dynar_foreach(actions, cursor, action){
459 xbt_fifo_remove(action->request_list, req);
464 * \brief Start the simulation of a communication request
465 * \param action The communication action
467 static XBT_INLINE void SIMIX_comm_start(smx_action_t action)
469 /* If both the sender and the receiver are already there, start the communication */
470 if (action->state == SIMIX_READY) {
471 smx_host_t sender = action->comm.src_proc->smx_host;
472 smx_host_t receiver = action->comm.dst_proc->smx_host;
474 XBT_DEBUG("Starting communication %p from '%s' to '%s'", action,
475 SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
477 action->comm.surf_comm = surf_workstation_model->extension.workstation.
478 communicate(sender->host, receiver->host, action->comm.task_size, action->comm.rate);
480 surf_workstation_model->action_data_set(action->comm.surf_comm, action);
482 action->state = SIMIX_RUNNING;
485 TRACE_smx_action_communicate(action, action->comm.src_proc);
488 /* If a link is failed, detect it immediately */
489 if (surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) {
490 XBT_DEBUG("Communication from '%s' to '%s' failed to start because of a link failure",
491 SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
492 action->state = SIMIX_LINK_FAILURE;
493 SIMIX_comm_destroy_internal_actions(action);
496 /* If any of the process is suspend, create the action but stop its execution,
497 it will be restarted when the sender process resume */
498 if (SIMIX_process_is_suspended(action->comm.src_proc) ||
499 SIMIX_process_is_suspended(action->comm.dst_proc)) {
500 /* FIXME: check what should happen with the action state */
501 surf_workstation_model->suspend(action->comm.surf_comm);
506 void SIMIX_comm_finish(smx_action_t action)
508 unsigned int destroy_count = 0;
511 while ((req = xbt_fifo_shift(action->request_list))) {
513 /* If a waitany request is waiting for this action to finish, then remove
514 it from the other actions in the waitany list. Afterwards, get the
515 position of the actual action in the waitany request's actions dynar and
516 return it as the result of the call */
517 if (req->call == REQ_COMM_WAITANY) {
518 SIMIX_waitany_req_remove_from_actions(req);
520 req->comm_waitany.result = xbt_dynar_search(req->comm_waitany.comms, &action);
523 /* If the action is still in a rendez-vous point then remove from it */
524 if (action->comm.rdv)
525 SIMIX_rdv_remove(action->comm.rdv, action);
527 XBT_DEBUG("SIMIX_comm_finish: action state = %d", action->state);
529 /* Check out for errors */
530 switch (action->state) {
533 XBT_DEBUG("Communication %p complete!", action);
534 SIMIX_comm_copy_data(action);
537 case SIMIX_SRC_TIMEOUT:
539 THROW0(timeout_error, 0, "Communication timeouted because of sender");
541 CATCH(req->issuer->running_ctx->exception) {
542 req->issuer->doexception = 1;
546 case SIMIX_DST_TIMEOUT:
548 THROW0(timeout_error, 0, "Communication timeouted because of receiver");
550 CATCH(req->issuer->running_ctx->exception) {
551 req->issuer->doexception = 1;
555 case SIMIX_SRC_HOST_FAILURE:
557 if (req->issuer == action->comm.src_proc)
558 THROW0(host_error, 0, "Host failed");
560 THROW0(network_error, 0, "Remote peer failed");
562 CATCH(req->issuer->running_ctx->exception) {
563 req->issuer->doexception = 1;
567 case SIMIX_DST_HOST_FAILURE:
569 if (req->issuer == action->comm.dst_proc)
570 THROW0(host_error, 0, "Host failed");
572 THROW0(network_error, 0, "Remote peer failed");
574 CATCH(req->issuer->running_ctx->exception) {
575 req->issuer->doexception = 1;
579 case SIMIX_LINK_FAILURE:
581 XBT_DEBUG("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p)",
582 action, action->comm.src_proc->smx_host->name, action->comm.dst_proc->smx_host->name,
583 req->issuer->name, req->issuer);
584 THROW0(network_error, 0, "Link failure");
586 CATCH(req->issuer->running_ctx->exception) {
587 req->issuer->doexception = 1;
595 /* if there is an exception during a waitany or a testany, indicate the position of the failed communication */
596 if (req->issuer->doexception) {
597 if (req->call == REQ_COMM_WAITANY) {
598 req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_waitany.comms, &action);
600 else if (req->call == REQ_COMM_TESTANY) {
601 req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_testany.comms, &action);
605 req->issuer->waiting_action = NULL;
606 SIMIX_request_answer(req);
610 while (destroy_count-- > 0)
611 SIMIX_comm_destroy(action);
614 void SIMIX_post_comm(smx_action_t action)
616 /* Update action state */
617 if (action->comm.src_timeout &&
618 surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_DONE)
619 action->state = SIMIX_SRC_TIMEOUT;
620 else if (action->comm.dst_timeout &&
621 surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_DONE)
622 action->state = SIMIX_DST_TIMEOUT;
623 else if (action->comm.src_timeout &&
624 surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_FAILED)
625 action->state = SIMIX_SRC_HOST_FAILURE;
626 else if (action->comm.dst_timeout &&
627 surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_FAILED)
628 action->state = SIMIX_DST_HOST_FAILURE;
629 else if (action->comm.surf_comm &&
630 surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED)
631 action->state = SIMIX_LINK_FAILURE;
633 action->state = SIMIX_DONE;
635 XBT_DEBUG("SIMIX_post_comm: action state = %d", action->state);
637 /* After this point the surf actions associated with the simix communicate
638 action are no longer needed, thus we delete them. */
639 SIMIX_comm_destroy_internal_actions(action);
641 /* If there are requests associated with the action, then answer them */
642 if (xbt_fifo_size(action->request_list))
643 SIMIX_comm_finish(action);
646 void SIMIX_comm_cancel(smx_action_t action)
648 /* If the action is a waiting state means that it is still in a rdv */
649 /* so remove from it and delete it */
650 if (action->state == SIMIX_WAITING) {
651 SIMIX_rdv_remove(action->comm.rdv, action);
652 action->state = SIMIX_FAILED;
654 /* When running the MC there are no surf actions */
656 surf_workstation_model->action_cancel(action->comm.surf_comm);
660 void SIMIX_comm_suspend(smx_action_t action)
662 /*FIXME: shall we suspend also the timeout actions? */
663 surf_workstation_model->suspend(action->comm.surf_comm);
666 void SIMIX_comm_resume(smx_action_t action)
668 /*FIXME: check what happen with the timeouts */
669 surf_workstation_model->resume(action->comm.surf_comm);
673 /************* Action Getters **************/
676 * \brief get the amount remaining from the communication
677 * \param action The communication
679 double SIMIX_comm_get_remains(smx_action_t action)
683 switch (action->state) {
686 remains = surf_workstation_model->get_remains(action->comm.surf_comm);
691 remains = 0; /*FIXME: check what should be returned */
695 remains = 0; /*FIXME: is this correct? */
701 e_smx_state_t SIMIX_comm_get_state(smx_action_t action)
703 return action->state;
707 * \brief Return the user data associated to the sender of the communication
708 * \param action The communication
709 * \return the user data
711 void* SIMIX_comm_get_src_data(smx_action_t action)
713 return action->comm.src_data;
717 * \brief Return the user data associated to the receiver of the communication
718 * \param action The communication
719 * \return the user data
721 void* SIMIX_comm_get_dst_data(smx_action_t action)
723 return action->comm.dst_data;
726 smx_process_t SIMIX_comm_get_src_proc(smx_action_t action)
728 return action->comm.src_proc;
731 smx_process_t SIMIX_comm_get_dst_proc(smx_action_t action)
733 return action->comm.dst_proc;
736 #ifdef HAVE_LATENCY_BOUND_TRACKING
738 * \brief verify if communication is latency bounded
739 * \param comm The communication
741 XBT_INLINE int SIMIX_comm_is_latency_bounded(smx_action_t action)
743 if (action->comm.surf_comm){
744 XBT_DEBUG("Getting latency limited for surf_action (%p)", action->comm.surf_comm);
745 action->latency_limited = surf_workstation_model->get_latency_limited(action->comm.surf_comm);
746 XBT_DEBUG("Action limited is %d", action->latency_limited);
748 return action->latency_limited;
752 /******************************************************************************/
753 /* SIMIX_comm_copy_data callbacks */
754 /******************************************************************************/
755 static void (*SIMIX_comm_copy_data_callback) (smx_action_t, size_t) =
756 &SIMIX_comm_copy_pointer_callback;
759 SIMIX_comm_set_copy_data_callback(void (*callback) (smx_action_t, size_t))
761 SIMIX_comm_copy_data_callback = callback;
764 void SIMIX_comm_copy_pointer_callback(smx_action_t comm, size_t buff_size)
766 xbt_assert1((buff_size == sizeof(void *)),
767 "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
768 *(void **) (comm->comm.dst_buff) = comm->comm.src_buff;
771 void SIMIX_comm_copy_buffer_callback(smx_action_t comm, size_t buff_size)
773 memcpy(comm->comm.dst_buff, comm->comm.src_buff, buff_size);
777 * \brief Copy the communication data from the sender's buffer to the receiver's one
778 * \param comm The communication
780 void SIMIX_comm_copy_data(smx_action_t comm)
782 size_t buff_size = comm->comm.src_buff_size;
783 /* If there is no data to be copy then return */
784 if (!comm->comm.src_buff || !comm->comm.dst_buff || comm->comm.copied == 1)
787 XBT_DEBUG("Copying comm %p data from %s (%p) -> %s (%p) (%zu bytes)",
789 comm->comm.src_proc->smx_host->name, comm->comm.src_buff,
790 comm->comm.dst_proc->smx_host->name, comm->comm.dst_buff, buff_size);
792 /* Copy at most dst_buff_size bytes of the message to receiver's buffer */
793 if (comm->comm.dst_buff_size)
794 buff_size = MIN(buff_size, *(comm->comm.dst_buff_size));
796 /* Update the receiver's buffer size to the copied amount */
797 if (comm->comm.dst_buff_size)
798 *comm->comm.dst_buff_size = buff_size;
803 (*SIMIX_comm_copy_data_callback) (comm, buff_size);
805 /* Set the copied flag so we copy data only once */
806 /* (this function might be called from both communication ends) */
807 comm->comm.copied = 1;