1 /* Copyright (c) 2009, 2010. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
12 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_network, simix,
13 "Logging specific to SIMIX (network)");
15 static xbt_dict_t rdv_points = NULL;
17 static XBT_INLINE void SIMIX_comm_start(smx_action_t action);
18 static void SIMIX_comm_finish(smx_action_t action);
19 static void SIMIX_waitany_req_remove_from_actions(smx_req_t req);
20 static void SIMIX_comm_copy_data(smx_action_t comm);
21 static smx_action_t SIMIX_comm_new(e_smx_comm_type_t type);
22 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm);
23 static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm);
24 static smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
25 int (*match_fun)(void *, void *), void *);
26 static void SIMIX_rdv_free(void *data);
28 void SIMIX_network_init(void)
30 rdv_points = xbt_dict_new();
33 void SIMIX_network_exit(void)
35 xbt_dict_free(&rdv_points);
38 /******************************************************************************/
39 /* Rendez-Vous Points */
40 /******************************************************************************/
42 smx_rdv_t SIMIX_rdv_create(const char *name)
44 /* two processes may have pushed the same rdv_create request at the same time */
45 smx_rdv_t rdv = name ? xbt_dict_get_or_null(rdv_points, name) : NULL;
48 rdv = xbt_new0(s_smx_rvpoint_t, 1);
49 rdv->name = name ? xbt_strdup(name) : NULL;
50 rdv->comm_fifo = xbt_fifo_new();
53 xbt_dict_set(rdv_points, rdv->name, rdv, SIMIX_rdv_free);
58 void SIMIX_rdv_destroy(smx_rdv_t rdv)
61 xbt_dict_remove(rdv_points, rdv->name);
64 void SIMIX_rdv_free(void *data)
66 smx_rdv_t rdv = (smx_rdv_t) data;
69 xbt_fifo_free(rdv->comm_fifo);
73 smx_rdv_t SIMIX_rdv_get_by_name(const char *name)
75 return xbt_dict_get_or_null(rdv_points, name);
78 int SIMIX_rdv_comm_count_by_host(smx_rdv_t rdv, smx_host_t host)
80 smx_action_t comm = NULL;
81 xbt_fifo_item_t item = NULL;
84 xbt_fifo_foreach(rdv->comm_fifo, item, comm, smx_action_t) {
85 if (comm->comm.src_proc->smx_host == host)
92 smx_action_t SIMIX_rdv_get_head(smx_rdv_t rdv)
94 return xbt_fifo_get_item_content(xbt_fifo_get_first_item(rdv->comm_fifo));
98 * \brief Push a communication request into a rendez-vous point
99 * \param rdv The rendez-vous point
100 * \param comm The communication request
102 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm)
104 xbt_fifo_push(rdv->comm_fifo, comm);
105 comm->comm.rdv = rdv;
109 * \brief Remove a communication request from a rendez-vous point
110 * \param rdv The rendez-vous point
111 * \param comm The communication request
113 static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm)
115 xbt_fifo_remove(rdv->comm_fifo, comm);
116 comm->comm.rdv = NULL;
120 * \brief Checks if there is a communication action queued in a rendez-vous matching our needs
121 * \param type The type of communication we are looking for (comm_send, comm_recv)
122 * \return The communication action if found, NULL otherwise
124 smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
125 int (*match_fun)(void *, void *), void *data)
128 xbt_fifo_item_t item;
129 void* req_data = NULL;
131 xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t){
132 if (action->comm.type == SIMIX_COMM_SEND) {
133 req_data = action->comm.src_data;
134 } else if (action->comm.type == SIMIX_COMM_RECEIVE) {
135 req_data = action->comm.dst_data;
137 if (action->comm.type == type && (!match_fun || match_fun(data, req_data))) {
138 DEBUG1("Found a matching communication action %p", action);
139 xbt_fifo_remove_item(rdv->comm_fifo, item);
140 xbt_fifo_free_item(item);
141 action->comm.refcount++;
142 action->comm.rdv = NULL;
145 DEBUG3("Sorry, communication action %p does not match our needs:"
146 " its type is %d but we are looking for a comm of type %d",
147 action, action->comm.type, type);
149 DEBUG0("No matching communication action found");
153 /******************************************************************************/
154 /* Comunication Actions */
155 /******************************************************************************/
158 * \brief Creates a new comunicate action
159 * \param type The type of request (comm_send, comm_recv)
160 * \return The new comunicate action
162 smx_action_t SIMIX_comm_new(e_smx_comm_type_t type)
166 /* alloc structures */
167 act = xbt_mallocator_get(simix_global->action_mallocator);
168 act->type = SIMIX_ACTION_COMMUNICATE;
169 act->state = SIMIX_WAITING;
171 /* set communication */
172 act->comm.type = type;
173 act->comm.refcount = 1;
175 #ifdef HAVE_LATENCY_BOUND_TRACKING
176 //initialize with unknown value
177 act->latency_limited = -1;
181 act->category = NULL;
184 DEBUG1("Create communicate action %p", act);
190 * \brief Destroy a communicate action
191 * \param action The communicate action to be destroyed
193 void SIMIX_comm_destroy(smx_action_t action)
195 DEBUG2("Destroy action %p (refcount:%d)", action, action->comm.refcount);
197 if (action->comm.refcount <= 0)
198 xbt_die(bprintf("the refcount of comm %p is already 0 before decreasing it. That's a bug!",action));
200 action->comm.refcount--;
201 if (action->comm.refcount > 0)
203 DEBUG2("Really free communication %p; refcount is now %d", action,
204 action->comm.refcount);
206 #ifdef HAVE_LATENCY_BOUND_TRACKING
207 action->latency_limited = SIMIX_comm_is_latency_bounded( action ) ;
211 TRACE_smx_action_destroy(action);
214 xbt_free(action->name);
215 SIMIX_comm_destroy_internal_actions(action);
217 if (action->comm.detached && action->state != SIMIX_DONE) {
218 /* the communication has failed and was detached:
219 * we have to free the buffer */
220 ((void_f_pvoid_t) action->comm.src_data)(action->comm.src_buff);
223 xbt_mallocator_release(simix_global->action_mallocator, action);
226 void SIMIX_comm_destroy_internal_actions(smx_action_t action)
228 if (action->comm.surf_comm){
229 #ifdef HAVE_LATENCY_BOUND_TRACKING
230 action->latency_limited = SIMIX_comm_is_latency_bounded(action);
232 action->comm.surf_comm->model_type->action_unref(action->comm.surf_comm);
233 action->comm.surf_comm = NULL;
236 if (action->comm.src_timeout){
237 action->comm.src_timeout->model_type->action_unref(action->comm.src_timeout);
238 action->comm.src_timeout = NULL;
241 if (action->comm.dst_timeout){
242 action->comm.dst_timeout->model_type->action_unref(action->comm.dst_timeout);
243 action->comm.dst_timeout = NULL;
247 smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
248 double task_size, double rate,
249 void *src_buff, size_t src_buff_size,
250 int (*match_fun)(void *, void *), void *data,
255 /* Look for communication request matching our needs.
256 If it is not found then create it and push it into the rendez-vous point */
257 action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_RECEIVE, match_fun, data);
260 action = SIMIX_comm_new(SIMIX_COMM_SEND);
261 SIMIX_rdv_push(rdv, action);
263 action->state = SIMIX_READY;
264 action->comm.type = SIMIX_COMM_READY;
267 /* If the communication action is detached then decrease the refcount
268 * by one, so it will be eliminated by the receivers destroy call */
270 action->comm.detached = 1;
271 action->comm.refcount--;
274 /* Setup the communication request */
275 action->comm.src_proc = src_proc;
276 action->comm.task_size = task_size;
277 action->comm.rate = rate;
278 action->comm.src_buff = src_buff;
279 action->comm.src_buff_size = src_buff_size;
280 action->comm.src_data = data;
283 action->state = SIMIX_RUNNING;
287 SIMIX_comm_start(action);
291 smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
292 void *dst_buff, size_t *dst_buff_size,
293 int (*match_fun)(void *, void *), void *data)
297 /* Look for communication request matching our needs.
298 * If it is not found then create it and push it into the rendez-vous point
300 action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_SEND, match_fun, data);
303 action = SIMIX_comm_new(SIMIX_COMM_RECEIVE);
304 SIMIX_rdv_push(rdv, action);
306 action->state = SIMIX_READY;
307 action->comm.type = SIMIX_COMM_READY;
310 /* Setup communication request */
311 action->comm.dst_proc = dst_proc;
312 action->comm.dst_buff = dst_buff;
313 action->comm.dst_buff_size = dst_buff_size;
314 action->comm.dst_data = data;
317 action->state = SIMIX_RUNNING;
321 SIMIX_comm_start(action);
325 void SIMIX_pre_comm_wait(smx_req_t req, int idx)
327 smx_action_t action = req->comm_wait.comm;
328 double timeout = req->comm_wait.timeout;
331 /* Associate this request to the action */
332 xbt_fifo_push(action->request_list, req);
333 req->issuer->waiting_action = action;
337 action->state = SIMIX_DONE;
339 /* If we reached this point, the wait request must have a timeout */
340 /* Otherwise it shouldn't be enabled and executed by the MC */
344 if(action->comm.src_proc == req->issuer)
345 action->state = SIMIX_SRC_TIMEOUT;
347 action->state = SIMIX_DST_TIMEOUT;
350 SIMIX_comm_finish(action);
354 /* If the action has already finish perform the error handling, */
355 /* otherwise set up a waiting timeout on the right side */
356 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
357 SIMIX_comm_finish(action);
358 } else { /* if (timeout >= 0) { we need a surf sleep action even when there is no timeout, otherwise surf won't tell us when the host fails */
359 sleep = surf_workstation_model->extension.workstation.sleep(req->issuer->smx_host->host, timeout);
360 surf_workstation_model->action_data_set(sleep, action);
362 if (req->issuer == action->comm.src_proc)
363 action->comm.src_timeout = sleep;
365 action->comm.dst_timeout = sleep;
369 void SIMIX_pre_comm_test(smx_req_t req)
371 smx_action_t action = req->comm_test.comm;
374 req->comm_test.result = action->comm.src_proc && action->comm.dst_proc;
375 if(req->comm_test.result){
376 action->state = SIMIX_DONE;
377 xbt_fifo_push(action->request_list, req);
378 SIMIX_comm_finish(action);
380 SIMIX_request_answer(req);
385 req->comm_test.result = (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING);
386 if (req->comm_test.result) {
387 xbt_fifo_push(action->request_list, req);
388 SIMIX_comm_finish(action);
390 SIMIX_request_answer(req);
394 void SIMIX_pre_comm_testany(smx_req_t req, int idx)
398 xbt_dynar_t actions = req->comm_testany.comms;
399 req->comm_testany.result = -1;
403 SIMIX_request_answer(req);
405 action = xbt_dynar_get_as(actions, idx, smx_action_t);
406 req->comm_testany.result = idx;
407 xbt_fifo_push(action->request_list, req);
408 action->state = SIMIX_DONE;
409 SIMIX_comm_finish(action);
414 xbt_dynar_foreach(req->comm_testany.comms,cursor,action) {
415 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
416 req->comm_testany.result = cursor;
417 xbt_fifo_push(action->request_list, req);
418 SIMIX_comm_finish(action);
422 SIMIX_request_answer(req);
425 void SIMIX_pre_comm_waitany(smx_req_t req, int idx)
428 unsigned int cursor = 0;
429 xbt_dynar_t actions = req->comm_waitany.comms;
432 action = xbt_dynar_get_as(actions, idx, smx_action_t);
433 xbt_fifo_push(action->request_list, req);
434 req->comm_waitany.result = idx;
435 action->state = SIMIX_DONE;
436 SIMIX_comm_finish(action);
440 xbt_dynar_foreach(actions, cursor, action){
441 /* Associate this request to the action */
442 xbt_fifo_push(action->request_list, req);
443 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING){
444 SIMIX_comm_finish(action);
450 void SIMIX_waitany_req_remove_from_actions(smx_req_t req)
453 unsigned int cursor = 0;
454 xbt_dynar_t actions = req->comm_waitany.comms;
456 xbt_dynar_foreach(actions, cursor, action){
457 xbt_fifo_remove(action->request_list, req);
462 * \brief Start the simulation of a communication request
463 * \param action The communication action
465 static XBT_INLINE void SIMIX_comm_start(smx_action_t action)
467 /* If both the sender and the receiver are already there, start the communication */
468 if (action->state == SIMIX_READY) {
469 smx_host_t sender = action->comm.src_proc->smx_host;
470 smx_host_t receiver = action->comm.dst_proc->smx_host;
472 DEBUG3("Starting communication %p from '%s' to '%s'", action,
473 SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
475 action->comm.surf_comm = surf_workstation_model->extension.workstation.
476 communicate(sender->host, receiver->host, action->comm.task_size, action->comm.rate);
478 surf_workstation_model->action_data_set(action->comm.surf_comm, action);
480 action->state = SIMIX_RUNNING;
483 TRACE_smx_action_communicate(action, action->comm.src_proc);
486 /* If a link is failed, detect it immediately */
487 if (surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) {
488 DEBUG2("Communication from '%s' to '%s' failed to start because of a link failure",
489 SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
490 action->state = SIMIX_LINK_FAILURE;
491 SIMIX_comm_destroy_internal_actions(action);
494 /* If any of the process is suspend, create the action but stop its execution,
495 it will be restarted when the sender process resume */
496 if (SIMIX_process_is_suspended(action->comm.src_proc) ||
497 SIMIX_process_is_suspended(action->comm.dst_proc)) {
498 /* FIXME: check what should happen with the action state */
499 surf_workstation_model->suspend(action->comm.surf_comm);
504 void SIMIX_comm_finish(smx_action_t action)
506 unsigned int destroy_count = 0;
509 while ((req = xbt_fifo_shift(action->request_list))) {
511 /* If a waitany request is waiting for this action to finish, then remove
512 it from the other actions in the waitany list. Afterwards, get the
513 position of the actual action in the waitany request's actions dynar and
514 return it as the result of the call */
515 if (req->call == REQ_COMM_WAITANY) {
516 SIMIX_waitany_req_remove_from_actions(req);
518 req->comm_waitany.result = xbt_dynar_search(req->comm_waitany.comms, &action);
521 /* If the action is still in a rendez-vous point then remove from it */
522 if (action->comm.rdv)
523 SIMIX_rdv_remove(action->comm.rdv, action);
525 DEBUG1("SIMIX_comm_finish: action state = %d", action->state);
527 /* Check out for errors */
528 switch (action->state) {
531 DEBUG1("Communication %p complete!", action);
532 SIMIX_comm_copy_data(action);
535 case SIMIX_SRC_TIMEOUT:
537 THROW0(timeout_error, 0, "Communication timeouted because of sender");
539 CATCH(req->issuer->running_ctx->exception) {
540 req->issuer->doexception = 1;
544 case SIMIX_DST_TIMEOUT:
546 THROW0(timeout_error, 0, "Communication timeouted because of receiver");
548 CATCH(req->issuer->running_ctx->exception) {
549 req->issuer->doexception = 1;
553 case SIMIX_SRC_HOST_FAILURE:
555 if (req->issuer == action->comm.src_proc)
556 THROW0(host_error, 0, "Host failed");
558 THROW0(network_error, 0, "Remote peer failed");
560 CATCH(req->issuer->running_ctx->exception) {
561 req->issuer->doexception = 1;
565 case SIMIX_DST_HOST_FAILURE:
567 if (req->issuer == action->comm.dst_proc)
568 THROW0(host_error, 0, "Host failed");
570 THROW0(network_error, 0, "Remote peer failed");
572 CATCH(req->issuer->running_ctx->exception) {
573 req->issuer->doexception = 1;
577 case SIMIX_LINK_FAILURE:
579 DEBUG5("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p)",
580 action, action->comm.src_proc->smx_host->name, action->comm.dst_proc->smx_host->name,
581 req->issuer->name, req->issuer);
582 THROW0(network_error, 0, "Link failure");
584 CATCH(req->issuer->running_ctx->exception) {
585 req->issuer->doexception = 1;
593 /* if there is an exception during a waitany or a testany, indicate the position of the failed communication */
594 if (req->issuer->doexception) {
595 if (req->call == REQ_COMM_WAITANY) {
596 req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_waitany.comms, &action);
598 else if (req->call == REQ_COMM_TESTANY) {
599 req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_testany.comms, &action);
603 req->issuer->waiting_action = NULL;
604 SIMIX_request_answer(req);
608 while(destroy_count-- > 0)
609 SIMIX_comm_destroy(action);
612 void SIMIX_post_comm(smx_action_t action)
614 /* Update action state */
615 if (action->comm.src_timeout &&
616 surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_DONE)
617 action->state = SIMIX_SRC_TIMEOUT;
618 else if (action->comm.dst_timeout &&
619 surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_DONE)
620 action->state = SIMIX_DST_TIMEOUT;
621 else if (action->comm.src_timeout &&
622 surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_FAILED)
623 action->state = SIMIX_SRC_HOST_FAILURE;
624 else if (action->comm.dst_timeout &&
625 surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_FAILED)
626 action->state = SIMIX_DST_HOST_FAILURE;
627 else if (action->comm.surf_comm &&
628 surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED)
629 action->state = SIMIX_LINK_FAILURE;
631 action->state = SIMIX_DONE;
633 DEBUG1("SIMIX_post_comm: action state = %d", action->state);
635 /* After this point the surf actions associated with the simix communicate
636 action are no longer needed, thus we delete them. */
637 SIMIX_comm_destroy_internal_actions(action);
639 /* If there are requests associated with the action, then answer them */
640 if (xbt_fifo_size(action->request_list))
641 SIMIX_comm_finish(action);
644 void SIMIX_comm_cancel(smx_action_t action)
646 /* If the action is a waiting state means that it is still in a rdv */
647 /* so remove from it and delete it */
648 if (action->state == SIMIX_WAITING) {
649 SIMIX_rdv_remove(action->comm.rdv, action);
650 action->state = SIMIX_FAILED;
652 /* When running the MC there are no surf actions */
654 surf_workstation_model->action_cancel(action->comm.surf_comm);
658 void SIMIX_comm_suspend(smx_action_t action)
660 /*FIXME: shall we suspend also the timeout actions? */
661 surf_workstation_model->suspend(action->comm.surf_comm);
664 void SIMIX_comm_resume(smx_action_t action)
666 /*FIXME: check what happen with the timeouts */
667 surf_workstation_model->resume(action->comm.surf_comm);
671 /************* Action Getters **************/
674 * \brief get the amount remaining from the communication
675 * \param action The communication
677 double SIMIX_comm_get_remains(smx_action_t action)
681 switch (action->state) {
684 remains = surf_workstation_model->get_remains(action->comm.surf_comm);
689 remains = 0; /*FIXME: check what should be returned */
693 remains = 0; /*FIXME: is this correct? */
699 e_smx_state_t SIMIX_comm_get_state(smx_action_t action)
701 return action->state;
705 * \brief Return the user data associated to the sender of the communication
706 * \param action The communication
707 * \return the user data
709 void* SIMIX_comm_get_src_data(smx_action_t action)
711 return action->comm.src_data;
715 * \brief Return the user data associated to the receiver of the communication
716 * \param action The communication
717 * \return the user data
719 void* SIMIX_comm_get_dst_data(smx_action_t action)
721 return action->comm.dst_data;
724 smx_process_t SIMIX_comm_get_src_proc(smx_action_t action)
726 return action->comm.src_proc;
729 smx_process_t SIMIX_comm_get_dst_proc(smx_action_t action)
731 return action->comm.dst_proc;
734 #ifdef HAVE_LATENCY_BOUND_TRACKING
736 * \brief verify if communication is latency bounded
737 * \param comm The communication
739 XBT_INLINE int SIMIX_comm_is_latency_bounded(smx_action_t action)
741 if (action->comm.surf_comm){
742 DEBUG1("Getting latency limited for surf_action (%p)", action->comm.surf_comm);
743 action->latency_limited = surf_workstation_model->get_latency_limited(action->comm.surf_comm);
744 DEBUG1("Action limited is %d", action->latency_limited);
746 return action->latency_limited;
750 /******************************************************************************/
751 /* SIMIX_comm_copy_data callbacks */
752 /******************************************************************************/
753 static void (*SIMIX_comm_copy_data_callback) (smx_action_t, size_t) =
754 &SIMIX_comm_copy_pointer_callback;
757 SIMIX_comm_set_copy_data_callback(void (*callback) (smx_action_t, size_t))
759 SIMIX_comm_copy_data_callback = callback;
762 void SIMIX_comm_copy_pointer_callback(smx_action_t comm, size_t buff_size)
764 xbt_assert1((buff_size == sizeof(void *)),
765 "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
766 *(void **) (comm->comm.dst_buff) = comm->comm.src_buff;
769 void SIMIX_comm_copy_buffer_callback(smx_action_t comm, size_t buff_size)
771 memcpy(comm->comm.dst_buff, comm->comm.src_buff, buff_size);
775 * \brief Copy the communication data from the sender's buffer to the receiver's one
776 * \param comm The communication
778 void SIMIX_comm_copy_data(smx_action_t comm)
780 size_t buff_size = comm->comm.src_buff_size;
781 /* If there is no data to be copy then return */
782 if (!comm->comm.src_buff || !comm->comm.dst_buff || comm->comm.copied == 1)
785 DEBUG6("Copying comm %p data from %s (%p) -> %s (%p) (%zu bytes)",
787 comm->comm.src_proc->smx_host->name, comm->comm.src_buff,
788 comm->comm.dst_proc->smx_host->name, comm->comm.dst_buff, buff_size);
790 /* Copy at most dst_buff_size bytes of the message to receiver's buffer */
791 if (comm->comm.dst_buff_size)
792 buff_size = MIN(buff_size, *(comm->comm.dst_buff_size));
794 /* Update the receiver's buffer size to the copied amount */
795 if (comm->comm.dst_buff_size)
796 *comm->comm.dst_buff_size = buff_size;
801 (*SIMIX_comm_copy_data_callback) (comm, buff_size);
803 /* Set the copied flag so we copy data only once */
804 /* (this function might be called from both communication ends) */
805 comm->comm.copied = 1;