1 /* Copyright (c) 2009, 2010. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
12 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_network, simix,
13 "Logging specific to SIMIX (network)");
15 static xbt_dict_t rdv_points = NULL;
16 unsigned long int smx_total_comms = 0;
18 static void SIMIX_waitany_req_remove_from_actions(smx_req_t req);
19 static void SIMIX_comm_copy_data(smx_action_t comm);
20 static smx_action_t SIMIX_comm_new(e_smx_comm_type_t type);
21 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm);
22 static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm);
23 static smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
24 int (*match_fun)(void *, void *), void *);
25 static void SIMIX_rdv_free(void *data);
27 void SIMIX_network_init(void)
29 rdv_points = xbt_dict_new();
32 void SIMIX_network_exit(void)
34 xbt_dict_free(&rdv_points);
37 /******************************************************************************/
38 /* Rendez-Vous Points */
39 /******************************************************************************/
41 smx_rdv_t SIMIX_rdv_create(const char *name)
43 /* two processes may have pushed the same rdv_create request at the same time */
44 smx_rdv_t rdv = name ? xbt_dict_get_or_null(rdv_points, name) : NULL;
47 rdv = xbt_new0(s_smx_rvpoint_t, 1);
48 rdv->name = name ? xbt_strdup(name) : NULL;
49 rdv->comm_fifo = xbt_fifo_new();
52 xbt_dict_set(rdv_points, rdv->name, rdv, SIMIX_rdv_free);
57 void SIMIX_rdv_destroy(smx_rdv_t rdv)
60 xbt_dict_remove(rdv_points, rdv->name);
63 void SIMIX_rdv_free(void *data)
65 smx_rdv_t rdv = (smx_rdv_t) data;
68 xbt_fifo_free(rdv->comm_fifo);
72 smx_rdv_t SIMIX_rdv_get_by_name(const char *name)
74 return xbt_dict_get_or_null(rdv_points, name);
77 int SIMIX_rdv_comm_count_by_host(smx_rdv_t rdv, smx_host_t host)
79 smx_action_t comm = NULL;
80 xbt_fifo_item_t item = NULL;
83 xbt_fifo_foreach(rdv->comm_fifo, item, comm, smx_action_t) {
84 if (comm->comm.src_proc->smx_host == host)
91 smx_action_t SIMIX_rdv_get_head(smx_rdv_t rdv)
93 return xbt_fifo_get_item_content(xbt_fifo_get_first_item(rdv->comm_fifo));
97 * \brief Push a communication request into a rendez-vous point
98 * \param rdv The rendez-vous point
99 * \param comm The communication request
101 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm)
103 xbt_fifo_push(rdv->comm_fifo, comm);
104 comm->comm.rdv = rdv;
108 * \brief Remove a communication request from a rendez-vous point
109 * \param rdv The rendez-vous point
110 * \param comm The communication request
112 static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm)
114 xbt_fifo_remove(rdv->comm_fifo, comm);
115 comm->comm.rdv = NULL;
118 smx_action_t SIMIX_comm_get_send_match(smx_rdv_t rdv, int (*match_fun)(void*, void*), void* data) {
119 return SIMIX_rdv_get_request(rdv, SIMIX_COMM_SEND, match_fun, data);
123 * \brief Checks if there is a communication action queued in a rendez-vous matching our needs
124 * \param type The type of communication we are looking for (comm_send, comm_recv)
125 * \return The communication action if found, NULL otherwise
127 smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
128 int (*match_fun)(void *, void *), void *data)
131 xbt_fifo_item_t item;
132 void* req_data = NULL;
134 xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t){
135 if (action->comm.type == SIMIX_COMM_SEND) {
136 req_data = action->comm.src_data;
137 } else if (action->comm.type == SIMIX_COMM_RECEIVE) {
138 req_data = action->comm.dst_data;
140 if (action->comm.type == type && (!match_fun || match_fun(data, req_data))) {
141 XBT_DEBUG("Found a matching communication action %p", action);
142 xbt_fifo_remove_item(rdv->comm_fifo, item);
143 xbt_fifo_free_item(item);
144 action->comm.refcount++;
145 action->comm.rdv = NULL;
148 XBT_DEBUG("Sorry, communication action %p does not match our needs:"
149 " its type is %d but we are looking for a comm of type %d",
150 action, action->comm.type, type);
152 XBT_DEBUG("No matching communication action found");
156 /******************************************************************************/
157 /* Comunication Actions */
158 /******************************************************************************/
161 * \brief Creates a new comunicate action
162 * \param type The type of request (comm_send, comm_recv)
163 * \return The new comunicate action
165 smx_action_t SIMIX_comm_new(e_smx_comm_type_t type)
169 /* alloc structures */
170 act = xbt_mallocator_get(simix_global->action_mallocator);
171 act->type = SIMIX_ACTION_COMMUNICATE;
172 act->state = SIMIX_WAITING;
174 /* set communication */
175 act->comm.type = type;
176 act->comm.refcount = 1;
178 #ifdef HAVE_LATENCY_BOUND_TRACKING
179 //initialize with unknown value
180 act->latency_limited = -1;
184 act->category = NULL;
187 XBT_DEBUG("Create communicate action %p", act);
194 * \brief Destroy a communicate action
195 * \param action The communicate action to be destroyed
197 void SIMIX_comm_destroy(smx_action_t action)
199 XBT_DEBUG("Destroy action %p (refcount:%d)", action, action->comm.refcount);
201 if (action->comm.refcount <= 0)
202 xbt_die("the refcount of comm %p is already 0 before decreasing it. "
203 "That's a bug!", action);
205 action->comm.refcount--;
206 if (action->comm.refcount > 0)
208 XBT_DEBUG("Really free communication %p; refcount is now %d", action,
209 action->comm.refcount);
211 #ifdef HAVE_LATENCY_BOUND_TRACKING
212 action->latency_limited = SIMIX_comm_is_latency_bounded( action ) ;
216 TRACE_smx_action_destroy(action);
219 xbt_free(action->name);
220 SIMIX_comm_destroy_internal_actions(action);
222 if (action->comm.detached && action->state != SIMIX_DONE) {
223 /* the communication has failed and was detached:
224 * we have to free the buffer */
225 ((void_f_pvoid_t) action->comm.src_data)(action->comm.src_buff);
228 xbt_mallocator_release(simix_global->action_mallocator, action);
231 void SIMIX_comm_destroy_internal_actions(smx_action_t action)
233 if (action->comm.surf_comm){
234 #ifdef HAVE_LATENCY_BOUND_TRACKING
235 action->latency_limited = SIMIX_comm_is_latency_bounded(action);
237 action->comm.surf_comm->model_type->action_unref(action->comm.surf_comm);
238 action->comm.surf_comm = NULL;
241 if (action->comm.src_timeout){
242 action->comm.src_timeout->model_type->action_unref(action->comm.src_timeout);
243 action->comm.src_timeout = NULL;
246 if (action->comm.dst_timeout){
247 action->comm.dst_timeout->model_type->action_unref(action->comm.dst_timeout);
248 action->comm.dst_timeout = NULL;
252 smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
253 double task_size, double rate,
254 void *src_buff, size_t src_buff_size,
255 int (*match_fun)(void *, void *), void *data,
260 /* Look for communication request matching our needs.
261 If it is not found then create it and push it into the rendez-vous point */
262 action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_RECEIVE, match_fun, data);
265 action = SIMIX_comm_new(SIMIX_COMM_SEND);
266 SIMIX_rdv_push(rdv, action);
268 action->state = SIMIX_READY;
269 action->comm.type = SIMIX_COMM_READY;
272 /* If the communication action is detached then decrease the refcount
273 * by one, so it will be eliminated by the receivers destroy call */
275 action->comm.detached = 1;
276 action->comm.refcount--;
279 /* Setup the communication request */
280 action->comm.src_proc = src_proc;
281 action->comm.task_size = task_size;
282 action->comm.rate = rate;
283 action->comm.src_buff = src_buff;
284 action->comm.src_buff_size = src_buff_size;
285 action->comm.src_data = data;
288 action->state = SIMIX_RUNNING;
292 SIMIX_comm_start(action);
296 smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
297 void *dst_buff, size_t *dst_buff_size,
298 int (*match_fun)(void *, void *), void *data)
302 /* Look for communication request matching our needs.
303 * If it is not found then create it and push it into the rendez-vous point
305 action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_SEND, match_fun, data);
308 action = SIMIX_comm_new(SIMIX_COMM_RECEIVE);
309 SIMIX_rdv_push(rdv, action);
311 action->state = SIMIX_READY;
312 action->comm.type = SIMIX_COMM_READY;
315 /* Setup communication request */
316 action->comm.dst_proc = dst_proc;
317 action->comm.dst_buff = dst_buff;
318 action->comm.dst_buff_size = dst_buff_size;
319 action->comm.dst_data = data;
322 action->state = SIMIX_RUNNING;
326 SIMIX_comm_start(action);
330 void SIMIX_pre_comm_wait(smx_req_t req, smx_action_t action, double timeout, int idx)
332 /* the request may be a wait, a send or a recv */
335 /* Associate this request to the action */
336 xbt_fifo_push(action->request_list, req);
337 req->issuer->waiting_action = action;
341 action->state = SIMIX_DONE;
343 /* If we reached this point, the wait request must have a timeout */
344 /* Otherwise it shouldn't be enabled and executed by the MC */
348 if (action->comm.src_proc == req->issuer)
349 action->state = SIMIX_SRC_TIMEOUT;
351 action->state = SIMIX_DST_TIMEOUT;
354 SIMIX_comm_finish(action);
358 /* If the action has already finish perform the error handling, */
359 /* otherwise set up a waiting timeout on the right side */
360 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
361 SIMIX_comm_finish(action);
362 } else { /* if (timeout >= 0) { we need a surf sleep action even when there is no timeout, otherwise surf won't tell us when the host fails */
363 sleep = surf_workstation_model->extension.workstation.sleep(req->issuer->smx_host->host, timeout);
364 surf_workstation_model->action_data_set(sleep, action);
366 if (req->issuer == action->comm.src_proc)
367 action->comm.src_timeout = sleep;
369 action->comm.dst_timeout = sleep;
373 void SIMIX_pre_comm_test(smx_req_t req)
375 smx_action_t action = req->comm_test.comm;
378 req->comm_test.result = action->comm.src_proc && action->comm.dst_proc;
379 if(req->comm_test.result){
380 action->state = SIMIX_DONE;
381 xbt_fifo_push(action->request_list, req);
382 SIMIX_comm_finish(action);
384 SIMIX_request_answer(req);
389 req->comm_test.result = (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING);
390 if (req->comm_test.result) {
391 xbt_fifo_push(action->request_list, req);
392 SIMIX_comm_finish(action);
394 SIMIX_request_answer(req);
398 void SIMIX_pre_comm_testany(smx_req_t req, int idx)
402 xbt_dynar_t actions = req->comm_testany.comms;
403 req->comm_testany.result = -1;
407 SIMIX_request_answer(req);
409 action = xbt_dynar_get_as(actions, idx, smx_action_t);
410 req->comm_testany.result = idx;
411 xbt_fifo_push(action->request_list, req);
412 action->state = SIMIX_DONE;
413 SIMIX_comm_finish(action);
418 xbt_dynar_foreach(req->comm_testany.comms,cursor,action) {
419 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
420 req->comm_testany.result = cursor;
421 xbt_fifo_push(action->request_list, req);
422 SIMIX_comm_finish(action);
426 SIMIX_request_answer(req);
429 void SIMIX_pre_comm_waitany(smx_req_t req, int idx)
432 unsigned int cursor = 0;
433 xbt_dynar_t actions = req->comm_waitany.comms;
436 action = xbt_dynar_get_as(actions, idx, smx_action_t);
437 xbt_fifo_push(action->request_list, req);
438 req->comm_waitany.result = idx;
439 action->state = SIMIX_DONE;
440 SIMIX_comm_finish(action);
444 xbt_dynar_foreach(actions, cursor, action){
445 /* Associate this request to the action */
446 xbt_fifo_push(action->request_list, req);
447 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING){
448 SIMIX_comm_finish(action);
454 void SIMIX_waitany_req_remove_from_actions(smx_req_t req)
457 unsigned int cursor = 0;
458 xbt_dynar_t actions = req->comm_waitany.comms;
460 xbt_dynar_foreach(actions, cursor, action){
461 xbt_fifo_remove(action->request_list, req);
466 * \brief Start the simulation of a communication request
467 * \param action The communication action
470 XBT_INLINE void SIMIX_comm_start(smx_action_t action)
472 /* If both the sender and the receiver are already there, start the communication */
473 if (action->state == SIMIX_READY) {
474 smx_host_t sender = action->comm.src_proc->smx_host;
475 smx_host_t receiver = action->comm.dst_proc->smx_host;
477 XBT_DEBUG("Starting communication %p from '%s' to '%s'", action,
478 SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
480 action->comm.surf_comm = surf_workstation_model->extension.workstation.
481 communicate(sender->host, receiver->host, action->comm.task_size, action->comm.rate);
483 surf_workstation_model->action_data_set(action->comm.surf_comm, action);
485 action->state = SIMIX_RUNNING;
488 TRACE_smx_action_communicate(action, action->comm.src_proc);
491 /* If a link is failed, detect it immediately */
492 if (surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) {
493 XBT_DEBUG("Communication from '%s' to '%s' failed to start because of a link failure",
494 SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
495 action->state = SIMIX_LINK_FAILURE;
496 SIMIX_comm_destroy_internal_actions(action);
499 /* If any of the process is suspend, create the action but stop its execution,
500 it will be restarted when the sender process resume */
501 if (SIMIX_process_is_suspended(action->comm.src_proc) ||
502 SIMIX_process_is_suspended(action->comm.dst_proc)) {
503 /* FIXME: check what should happen with the action state */
504 surf_workstation_model->suspend(action->comm.surf_comm);
509 void SIMIX_comm_finish(smx_action_t action)
511 unsigned int destroy_count = 0;
514 while ((req = xbt_fifo_shift(action->request_list))) {
516 /* If a waitany request is waiting for this action to finish, then remove
517 it from the other actions in the waitany list. Afterwards, get the
518 position of the actual action in the waitany request's actions dynar and
519 return it as the result of the call */
520 if (req->call == REQ_COMM_WAITANY) {
521 SIMIX_waitany_req_remove_from_actions(req);
523 req->comm_waitany.result = xbt_dynar_search(req->comm_waitany.comms, &action);
526 /* If the action is still in a rendez-vous point then remove from it */
527 if (action->comm.rdv)
528 SIMIX_rdv_remove(action->comm.rdv, action);
530 XBT_DEBUG("SIMIX_comm_finish: action state = %d", action->state);
532 /* Check out for errors */
533 switch (action->state) {
536 XBT_DEBUG("Communication %p complete!", action);
537 SIMIX_comm_copy_data(action);
540 case SIMIX_SRC_TIMEOUT:
542 THROW0(timeout_error, 0, "Communication timeouted because of sender");
544 CATCH(req->issuer->running_ctx->exception) {
545 req->issuer->doexception = 1;
549 case SIMIX_DST_TIMEOUT:
551 THROW0(timeout_error, 0, "Communication timeouted because of receiver");
553 CATCH(req->issuer->running_ctx->exception) {
554 req->issuer->doexception = 1;
558 case SIMIX_SRC_HOST_FAILURE:
560 if (req->issuer == action->comm.src_proc)
561 THROW0(host_error, 0, "Host failed");
563 THROW0(network_error, 0, "Remote peer failed");
565 CATCH(req->issuer->running_ctx->exception) {
566 req->issuer->doexception = 1;
570 case SIMIX_DST_HOST_FAILURE:
572 if (req->issuer == action->comm.dst_proc)
573 THROW0(host_error, 0, "Host failed");
575 THROW0(network_error, 0, "Remote peer failed");
577 CATCH(req->issuer->running_ctx->exception) {
578 req->issuer->doexception = 1;
582 case SIMIX_LINK_FAILURE:
584 XBT_DEBUG("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p)",
585 action, action->comm.src_proc->smx_host->name, action->comm.dst_proc->smx_host->name,
586 req->issuer->name, req->issuer);
587 THROW0(network_error, 0, "Link failure");
589 CATCH(req->issuer->running_ctx->exception) {
590 req->issuer->doexception = 1;
598 /* if there is an exception during a waitany or a testany, indicate the position of the failed communication */
599 if (req->issuer->doexception) {
600 if (req->call == REQ_COMM_WAITANY) {
601 req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_waitany.comms, &action);
603 else if (req->call == REQ_COMM_TESTANY) {
604 req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_testany.comms, &action);
608 req->issuer->waiting_action = NULL;
609 SIMIX_request_answer(req);
613 while (destroy_count-- > 0)
614 SIMIX_comm_destroy(action);
617 void SIMIX_post_comm(smx_action_t action)
619 /* Update action state */
620 if (action->comm.src_timeout &&
621 surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_DONE)
622 action->state = SIMIX_SRC_TIMEOUT;
623 else if (action->comm.dst_timeout &&
624 surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_DONE)
625 action->state = SIMIX_DST_TIMEOUT;
626 else if (action->comm.src_timeout &&
627 surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_FAILED)
628 action->state = SIMIX_SRC_HOST_FAILURE;
629 else if (action->comm.dst_timeout &&
630 surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_FAILED)
631 action->state = SIMIX_DST_HOST_FAILURE;
632 else if (action->comm.surf_comm &&
633 surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED)
634 action->state = SIMIX_LINK_FAILURE;
636 action->state = SIMIX_DONE;
638 XBT_DEBUG("SIMIX_post_comm: action state = %d", action->state);
640 /* After this point the surf actions associated with the simix communicate
641 action are no longer needed, thus we delete them. */
642 SIMIX_comm_destroy_internal_actions(action);
644 /* If there are requests associated with the action, then answer them */
645 if (xbt_fifo_size(action->request_list))
646 SIMIX_comm_finish(action);
649 void SIMIX_comm_cancel(smx_action_t action)
651 /* If the action is a waiting state means that it is still in a rdv */
652 /* so remove from it and delete it */
653 if (action->state == SIMIX_WAITING) {
654 SIMIX_rdv_remove(action->comm.rdv, action);
655 action->state = SIMIX_FAILED;
657 /* When running the MC there are no surf actions */
659 surf_workstation_model->action_cancel(action->comm.surf_comm);
663 void SIMIX_comm_suspend(smx_action_t action)
665 /*FIXME: shall we suspend also the timeout actions? */
666 surf_workstation_model->suspend(action->comm.surf_comm);
669 void SIMIX_comm_resume(smx_action_t action)
671 /*FIXME: check what happen with the timeouts */
672 surf_workstation_model->resume(action->comm.surf_comm);
676 /************* Action Getters **************/
679 * \brief get the amount remaining from the communication
680 * \param action The communication
682 double SIMIX_comm_get_remains(smx_action_t action)
686 switch (action->state) {
689 remains = surf_workstation_model->get_remains(action->comm.surf_comm);
694 remains = 0; /*FIXME: check what should be returned */
698 remains = 0; /*FIXME: is this correct? */
704 e_smx_state_t SIMIX_comm_get_state(smx_action_t action)
706 return action->state;
710 * \brief Return the user data associated to the sender of the communication
711 * \param action The communication
712 * \return the user data
714 void* SIMIX_comm_get_src_data(smx_action_t action)
716 return action->comm.src_data;
720 * \brief Return the user data associated to the receiver of the communication
721 * \param action The communication
722 * \return the user data
724 void* SIMIX_comm_get_dst_data(smx_action_t action)
726 return action->comm.dst_data;
729 smx_process_t SIMIX_comm_get_src_proc(smx_action_t action)
731 return action->comm.src_proc;
734 smx_process_t SIMIX_comm_get_dst_proc(smx_action_t action)
736 return action->comm.dst_proc;
739 #ifdef HAVE_LATENCY_BOUND_TRACKING
741 * \brief verify if communication is latency bounded
742 * \param comm The communication
744 XBT_INLINE int SIMIX_comm_is_latency_bounded(smx_action_t action)
746 if (action->comm.surf_comm){
747 XBT_DEBUG("Getting latency limited for surf_action (%p)", action->comm.surf_comm);
748 action->latency_limited = surf_workstation_model->get_latency_limited(action->comm.surf_comm);
749 XBT_DEBUG("Action limited is %d", action->latency_limited);
751 return action->latency_limited;
755 /******************************************************************************/
756 /* SIMIX_comm_copy_data callbacks */
757 /******************************************************************************/
758 static void (*SIMIX_comm_copy_data_callback) (smx_action_t, size_t) =
759 &SIMIX_comm_copy_pointer_callback;
762 SIMIX_comm_set_copy_data_callback(void (*callback) (smx_action_t, size_t))
764 SIMIX_comm_copy_data_callback = callback;
767 void SIMIX_comm_copy_pointer_callback(smx_action_t comm, size_t buff_size)
769 xbt_assert1((buff_size == sizeof(void *)),
770 "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
771 *(void **) (comm->comm.dst_buff) = comm->comm.src_buff;
774 void SIMIX_comm_copy_buffer_callback(smx_action_t comm, size_t buff_size)
776 memcpy(comm->comm.dst_buff, comm->comm.src_buff, buff_size);
780 * \brief Copy the communication data from the sender's buffer to the receiver's one
781 * \param comm The communication
783 void SIMIX_comm_copy_data(smx_action_t comm)
785 size_t buff_size = comm->comm.src_buff_size;
786 /* If there is no data to be copy then return */
787 if (!comm->comm.src_buff || !comm->comm.dst_buff || comm->comm.copied == 1)
790 XBT_DEBUG("Copying comm %p data from %s (%p) -> %s (%p) (%zu bytes)",
792 comm->comm.src_proc->smx_host->name, comm->comm.src_buff,
793 comm->comm.dst_proc->smx_host->name, comm->comm.dst_buff, buff_size);
795 /* Copy at most dst_buff_size bytes of the message to receiver's buffer */
796 if (comm->comm.dst_buff_size)
797 buff_size = MIN(buff_size, *(comm->comm.dst_buff_size));
799 /* Update the receiver's buffer size to the copied amount */
800 if (comm->comm.dst_buff_size)
801 *comm->comm.dst_buff_size = buff_size;
806 (*SIMIX_comm_copy_data_callback) (comm, buff_size);
808 /* Set the copied flag so we copy data only once */
809 /* (this function might be called from both communication ends) */
810 comm->comm.copied = 1;