1 /* Copyright (c) 2009, 2010. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
12 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_network, simix,
13 "Logging specific to SIMIX (network)");
15 static xbt_dict_t rdv_points = NULL;
17 static XBT_INLINE void SIMIX_comm_start(smx_action_t action);
18 static void SIMIX_comm_finish(smx_action_t action);
19 static void SIMIX_waitany_req_remove_from_actions(smx_req_t req);
20 static void SIMIX_comm_copy_data(smx_action_t comm);
21 static smx_action_t SIMIX_comm_new(e_smx_comm_type_t type);
22 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm);
23 static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm);
24 static smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
25 int (*match_fun)(void *, void *), void *);
26 static void SIMIX_rdv_free(void *data);
28 void SIMIX_network_init(void)
30 rdv_points = xbt_dict_new();
33 void SIMIX_network_exit(void)
35 xbt_dict_free(&rdv_points);
38 /******************************************************************************/
39 /* Rendez-Vous Points */
40 /******************************************************************************/
42 smx_rdv_t SIMIX_rdv_create(const char *name)
44 /* two processes may have pushed the same rdv_create request at the same time */
45 smx_rdv_t rdv = name ? xbt_dict_get_or_null(rdv_points, name) : NULL;
48 rdv = xbt_new0(s_smx_rvpoint_t, 1);
49 rdv->name = name ? xbt_strdup(name) : NULL;
50 rdv->comm_fifo = xbt_fifo_new();
53 xbt_dict_set(rdv_points, rdv->name, rdv, SIMIX_rdv_free);
58 void SIMIX_rdv_destroy(smx_rdv_t rdv)
61 xbt_dict_remove(rdv_points, rdv->name);
64 void SIMIX_rdv_free(void *data)
66 smx_rdv_t rdv = (smx_rdv_t) data;
69 xbt_fifo_free(rdv->comm_fifo);
73 smx_rdv_t SIMIX_rdv_get_by_name(const char *name)
75 return xbt_dict_get_or_null(rdv_points, name);
78 int SIMIX_rdv_comm_count_by_host(smx_rdv_t rdv, smx_host_t host)
80 smx_action_t comm = NULL;
81 xbt_fifo_item_t item = NULL;
84 xbt_fifo_foreach(rdv->comm_fifo, item, comm, smx_action_t) {
85 if (comm->comm.src_proc->smx_host == host)
92 smx_action_t SIMIX_rdv_get_head(smx_rdv_t rdv)
94 return xbt_fifo_get_item_content(xbt_fifo_get_first_item(rdv->comm_fifo));
98 * \brief Push a communication request into a rendez-vous point
99 * \param rdv The rendez-vous point
100 * \param comm The communication request
102 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm)
104 xbt_fifo_push(rdv->comm_fifo, comm);
105 comm->comm.rdv = rdv;
109 * \brief Remove a communication request from a rendez-vous point
110 * \param rdv The rendez-vous point
111 * \param comm The communication request
113 static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm)
115 xbt_fifo_remove(rdv->comm_fifo, comm);
116 comm->comm.rdv = NULL;
120 * \brief Checks if there is a communication action queued in a rendez-vous matching our needs
121 * \param type The type of communication we are looking for (comm_send, comm_recv)
122 * \return The communication action if found, NULL otherwise
124 smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
125 int (*match_fun)(void *, void *), void *data)
128 xbt_fifo_item_t item;
129 void* req_data = NULL;
131 xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t){
132 if (action->comm.type == SIMIX_COMM_SEND) {
133 req_data = action->comm.src_data;
134 } else if (action->comm.type == SIMIX_COMM_RECEIVE) {
135 req_data = action->comm.dst_data;
137 if (action->comm.type == type && (!match_fun || match_fun(data, req_data))) {
138 DEBUG1("Found a matching communication action %p", action);
139 xbt_fifo_remove_item(rdv->comm_fifo, item);
140 xbt_fifo_free_item(item);
141 action->comm.refcount++;
142 action->comm.rdv = NULL;
145 DEBUG3("Sorry, communication action %p does not match our needs:"
146 " its type is %d but we are looking for a comm of type %d",
147 action, action->comm.type, type);
149 DEBUG0("No matching communication action found");
153 /******************************************************************************/
154 /* Comunication Actions */
155 /******************************************************************************/
158 * \brief Creates a new comunicate action
159 * \param type The type of request (comm_send, comm_recv)
160 * \return The new comunicate action
162 smx_action_t SIMIX_comm_new(e_smx_comm_type_t type)
166 /* alloc structures */
167 act = xbt_new0(s_smx_action_t, 1);
168 act->type = SIMIX_ACTION_COMMUNICATE;
169 act->state = SIMIX_WAITING;
170 act->request_list = xbt_fifo_new();
172 /* set communication */
173 act->comm.type = type;
174 act->comm.refcount = 1;
176 #ifdef HAVE_LATENCY_BOUND_TRACKING
177 //initialize with unknown value
178 act->latency_limited = -1;
182 act->category = NULL;
185 DEBUG1("Create communicate action %p", act);
191 * \brief Destroy a communicate action
192 * \param action The communicate action to be destroyed
194 void SIMIX_comm_destroy(smx_action_t action)
196 DEBUG2("Destroy action %p (refcount:%d)", action, action->comm.refcount);
198 if (action->comm.refcount <= 0)
199 xbt_die(bprintf("the refcount of comm %p is already 0 before decreasing it. That's a bug!",action));
201 action->comm.refcount--;
202 if (action->comm.refcount > 0)
204 DEBUG2("Really free communication %p; refcount is now %d", action,
205 action->comm.refcount);
207 #ifdef HAVE_LATENCY_BOUND_TRACKING
208 action->latency_limited = SIMIX_comm_is_latency_bounded( action ) ;
212 TRACE_smx_action_destroy(action);
216 xbt_free(action->name);
218 xbt_fifo_free(action->request_list);
220 SIMIX_comm_destroy_internal_actions(action);
225 void SIMIX_comm_destroy_internal_actions(smx_action_t action)
227 if (action->comm.surf_comm){
228 #ifdef HAVE_LATENCY_BOUND_TRACKING
229 action->latency_limited = SIMIX_comm_is_latency_bounded(action);
231 action->comm.surf_comm->model_type->action_unref(action->comm.surf_comm);
232 action->comm.surf_comm = NULL;
235 if (action->comm.src_timeout){
236 action->comm.src_timeout->model_type->action_unref(action->comm.src_timeout);
237 action->comm.src_timeout = NULL;
240 if (action->comm.dst_timeout){
241 action->comm.dst_timeout->model_type->action_unref(action->comm.dst_timeout);
242 action->comm.dst_timeout = NULL;
246 smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
247 double task_size, double rate,
248 void *src_buff, size_t src_buff_size,
249 int (*match_fun)(void *, void *), void *data,
254 /* Look for communication request matching our needs.
255 If it is not found then create it and push it into the rendez-vous point */
256 action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_RECEIVE, match_fun, data);
259 action = SIMIX_comm_new(SIMIX_COMM_SEND);
260 SIMIX_rdv_push(rdv, action);
262 action->state = SIMIX_READY;
263 action->comm.type = SIMIX_COMM_READY;
266 /* If the communication action is detached then decrease the refcount
267 * by one, so it will be eliminated by the receivers destroy call */
269 action->comm.refcount--;
271 /* Setup the communication request */
272 action->comm.src_proc = src_proc;
273 action->comm.task_size = task_size;
274 action->comm.rate = rate;
275 action->comm.src_buff = src_buff;
276 action->comm.src_buff_size = src_buff_size;
277 action->comm.src_data = data;
280 action->state = SIMIX_RUNNING;
284 SIMIX_comm_start(action);
288 smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
289 void *dst_buff, size_t *dst_buff_size,
290 int (*match_fun)(void *, void *), void *data)
294 /* Look for communication request matching our needs.
295 * If it is not found then create it and push it into the rendez-vous point
297 action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_SEND, match_fun, data);
300 action = SIMIX_comm_new(SIMIX_COMM_RECEIVE);
301 SIMIX_rdv_push(rdv, action);
303 action->state = SIMIX_READY;
304 action->comm.type = SIMIX_COMM_READY;
307 /* Setup communication request */
308 action->comm.dst_proc = dst_proc;
309 action->comm.dst_buff = dst_buff;
310 action->comm.dst_buff_size = dst_buff_size;
311 action->comm.dst_data = data;
314 action->state = SIMIX_RUNNING;
318 SIMIX_comm_start(action);
322 void SIMIX_pre_comm_wait(smx_req_t req, int idx)
324 smx_action_t action = req->comm_wait.comm;
325 double timeout = req->comm_wait.timeout;
328 /* Associate this request to the action */
329 xbt_fifo_push(action->request_list, req);
330 req->issuer->waiting_action = action;
334 action->state = SIMIX_DONE;
336 /* If we reached this point, the wait request must have a timeout */
337 /* Otherwise it shouldn't be enabled and executed by the MC */
341 if(action->comm.src_proc == req->issuer)
342 action->state = SIMIX_SRC_TIMEOUT;
344 action->state = SIMIX_DST_TIMEOUT;
347 SIMIX_comm_finish(action);
351 /* If the action has already finish perform the error handling, */
352 /* otherwise set up a waiting timeout on the right side */
353 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
354 SIMIX_comm_finish(action);
355 } else { /* if (timeout >= 0) { we need a surf sleep action even when there is no timeout, otherwise surf won't tell us when the host fails */
356 sleep = surf_workstation_model->extension.workstation.sleep(req->issuer->smx_host->host, timeout);
357 surf_workstation_model->action_data_set(sleep, action);
359 if (req->issuer == action->comm.src_proc)
360 action->comm.src_timeout = sleep;
362 action->comm.dst_timeout = sleep;
366 void SIMIX_pre_comm_test(smx_req_t req)
368 smx_action_t action = req->comm_test.comm;
371 req->comm_test.result = action->comm.src_proc && action->comm.dst_proc;
372 if(req->comm_test.result){
373 action->state = SIMIX_DONE;
374 xbt_fifo_push(action->request_list, req);
375 SIMIX_comm_finish(action);
377 SIMIX_request_answer(req);
382 req->comm_test.result = (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING);
383 if (req->comm_test.result) {
384 xbt_fifo_push(action->request_list, req);
385 SIMIX_comm_finish(action);
387 SIMIX_request_answer(req);
391 void SIMIX_pre_comm_testany(smx_req_t req, int idx)
395 xbt_dynar_t actions = req->comm_testany.comms;
396 req->comm_testany.result = -1;
400 SIMIX_request_answer(req);
402 action = xbt_dynar_get_as(actions, idx, smx_action_t);
403 req->comm_testany.result = idx;
404 xbt_fifo_push(action->request_list, req);
405 action->state = SIMIX_DONE;
406 SIMIX_comm_finish(action);
411 xbt_dynar_foreach(req->comm_testany.comms,cursor,action) {
412 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
413 req->comm_testany.result = cursor;
414 xbt_fifo_push(action->request_list, req);
415 SIMIX_comm_finish(action);
419 SIMIX_request_answer(req);
422 void SIMIX_pre_comm_waitany(smx_req_t req, int idx)
425 unsigned int cursor = 0;
426 xbt_dynar_t actions = req->comm_waitany.comms;
429 action = xbt_dynar_get_as(actions, idx, smx_action_t);
430 xbt_fifo_push(action->request_list, req);
431 req->comm_waitany.result = idx;
432 action->state = SIMIX_DONE;
433 SIMIX_comm_finish(action);
437 xbt_dynar_foreach(actions, cursor, action){
438 /* Associate this request to the action */
439 xbt_fifo_push(action->request_list, req);
440 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING){
441 SIMIX_comm_finish(action);
447 void SIMIX_waitany_req_remove_from_actions(smx_req_t req)
450 unsigned int cursor = 0;
451 xbt_dynar_t actions = req->comm_waitany.comms;
453 xbt_dynar_foreach(actions, cursor, action){
454 xbt_fifo_remove(action->request_list, req);
459 * \brief Start the simulation of a communication request
460 * \param action The communication action
462 static XBT_INLINE void SIMIX_comm_start(smx_action_t action)
464 /* If both the sender and the receiver are already there, start the communication */
465 if (action->state == SIMIX_READY) {
466 smx_host_t sender = action->comm.src_proc->smx_host;
467 smx_host_t receiver = action->comm.dst_proc->smx_host;
469 DEBUG3("Starting communication %p from '%s' to '%s'", action,
470 SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
472 action->comm.surf_comm = surf_workstation_model->extension.workstation.
473 communicate(sender->host, receiver->host, action->comm.task_size, action->comm.rate);
475 surf_workstation_model->action_data_set(action->comm.surf_comm, action);
477 action->state = SIMIX_RUNNING;
480 TRACE_smx_action_communicate(action, action->comm.src_proc);
483 /* If a link is failed, detect it immediately */
484 if (surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) {
485 DEBUG2("Communication from '%s' to '%s' failed to start because of a link failure",
486 SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
487 action->state = SIMIX_LINK_FAILURE;
488 SIMIX_comm_destroy_internal_actions(action);
491 /* If any of the process is suspend, create the action but stop its execution,
492 it will be restarted when the sender process resume */
493 if (SIMIX_process_is_suspended(action->comm.src_proc) ||
494 SIMIX_process_is_suspended(action->comm.dst_proc)) {
495 /* FIXME: check what should happen with the action state */
496 surf_workstation_model->suspend(action->comm.surf_comm);
501 void SIMIX_comm_finish(smx_action_t action)
505 while ((req = xbt_fifo_shift(action->request_list))) {
507 /* If a waitany request is waiting for this action to finish, then remove
508 it from the other actions in the waitany list. Afterwards, get the
509 position of the actual action in the waitany request's actions dynar and
510 return it as the result of the call */
511 if (req->call == REQ_COMM_WAITANY) {
512 SIMIX_waitany_req_remove_from_actions(req);
514 req->comm_waitany.result = xbt_dynar_search(req->comm_waitany.comms, &action);
517 /* If the action is still in a rendez-vous point then remove from it */
518 if (action->comm.rdv)
519 SIMIX_rdv_remove(action->comm.rdv, action);
521 DEBUG1("SIMIX_comm_finish: action state = %d", action->state);
523 /* Check out for errors */
524 switch (action->state) {
527 DEBUG1("Communication %p complete!", action);
528 SIMIX_comm_copy_data(action);
531 case SIMIX_SRC_TIMEOUT:
533 THROW0(timeout_error, 0, "Communication timeouted because of sender");
535 CATCH(req->issuer->running_ctx->exception) {
536 req->issuer->doexception = 1;
540 case SIMIX_DST_TIMEOUT:
542 THROW0(timeout_error, 0, "Communication timeouted because of receiver");
544 CATCH(req->issuer->running_ctx->exception) {
545 req->issuer->doexception = 1;
549 case SIMIX_SRC_HOST_FAILURE:
551 if (req->issuer == action->comm.src_proc)
552 THROW0(host_error, 0, "Host failed");
554 THROW0(network_error, 0, "Remote peer failed");
556 CATCH(req->issuer->running_ctx->exception) {
557 req->issuer->doexception = 1;
561 case SIMIX_DST_HOST_FAILURE:
563 if (req->issuer == action->comm.dst_proc)
564 THROW0(host_error, 0, "Host failed");
566 THROW0(network_error, 0, "Remote peer failed");
568 CATCH(req->issuer->running_ctx->exception) {
569 req->issuer->doexception = 1;
573 case SIMIX_LINK_FAILURE:
575 DEBUG5("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p)",
576 action, action->comm.src_proc->smx_host->name, action->comm.dst_proc->smx_host->name,
577 req->issuer->name, req->issuer);
578 THROW0(network_error, 0, "Link failure");
580 CATCH(req->issuer->running_ctx->exception) {
581 req->issuer->doexception = 1;
589 /* if there is an exception during a waitany or a testany, indicate the position of the failed communication */
590 if (req->issuer->doexception) {
591 if (req->call == REQ_COMM_WAITANY) {
592 req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_waitany.comms, &action);
594 else if (req->call == REQ_COMM_TESTANY) {
595 req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_testany.comms, &action);
599 req->issuer->waiting_action = NULL;
600 SIMIX_request_answer(req);
604 void SIMIX_post_comm(smx_action_t action)
606 /* Update action state */
607 if (action->comm.src_timeout &&
608 surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_DONE)
609 action->state = SIMIX_SRC_TIMEOUT;
610 else if (action->comm.dst_timeout &&
611 surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_DONE)
612 action->state = SIMIX_DST_TIMEOUT;
613 else if (action->comm.src_timeout &&
614 surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_FAILED)
615 action->state = SIMIX_SRC_HOST_FAILURE;
616 else if (action->comm.dst_timeout &&
617 surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_FAILED)
618 action->state = SIMIX_DST_HOST_FAILURE;
619 else if (action->comm.surf_comm &&
620 surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED)
621 action->state = SIMIX_LINK_FAILURE;
623 action->state = SIMIX_DONE;
625 DEBUG1("SIMIX_post_comm: action state = %d", action->state);
627 /* After this point the surf actions associated with the simix communicate
628 action are no longer needed, thus we delete them. */
629 SIMIX_comm_destroy_internal_actions(action);
631 /* If there are requests associated with the action, then answer them */
632 if (xbt_fifo_size(action->request_list))
633 SIMIX_comm_finish(action);
636 void SIMIX_comm_cancel(smx_action_t action)
638 /* If the action is a waiting state means that it is still in a rdv */
639 /* so remove from it and delete it */
640 if (action->state == SIMIX_WAITING) {
641 SIMIX_rdv_remove(action->comm.rdv, action);
642 action->state = SIMIX_FAILED;
644 /* When running the MC there are no surf actions */
646 surf_workstation_model->action_cancel(action->comm.surf_comm);
650 void SIMIX_comm_suspend(smx_action_t action)
652 /*FIXME: shall we suspend also the timeout actions? */
653 surf_workstation_model->suspend(action->comm.surf_comm);
656 void SIMIX_comm_resume(smx_action_t action)
658 /*FIXME: check what happen with the timeouts */
659 surf_workstation_model->resume(action->comm.surf_comm);
663 /************* Action Getters **************/
666 * \brief get the amount remaining from the communication
667 * \param action The communication
669 double SIMIX_comm_get_remains(smx_action_t action)
673 switch (action->state) {
676 remains = surf_workstation_model->get_remains(action->comm.surf_comm);
681 remains = 0; /*FIXME: check what should be returned */
685 remains = 0; /*FIXME: is this correct? */
691 e_smx_state_t SIMIX_comm_get_state(smx_action_t action)
693 return action->state;
697 * \brief Return the user data associated to the sender of the communication
698 * \param action The communication
699 * \return the user data
701 void* SIMIX_comm_get_src_data(smx_action_t action)
703 return action->comm.src_data;
707 * \brief Return the user data associated to the receiver of the communication
708 * \param action The communication
709 * \return the user data
711 void* SIMIX_comm_get_dst_data(smx_action_t action)
713 return action->comm.dst_data;
716 void* SIMIX_comm_get_src_buff(smx_action_t action)
718 return action->comm.src_buff;
721 void* SIMIX_comm_get_dst_buff(smx_action_t action)
723 return action->comm.dst_buff;
726 size_t SIMIX_comm_get_src_buff_size(smx_action_t action)
728 return action->comm.src_buff_size;
731 size_t SIMIX_comm_get_dst_buff_size(smx_action_t action)
735 if (action->comm.dst_buff_size)
736 buff_size = *(action->comm.dst_buff_size);
743 smx_process_t SIMIX_comm_get_src_proc(smx_action_t action)
745 return action->comm.src_proc;
748 smx_process_t SIMIX_comm_get_dst_proc(smx_action_t action)
750 return action->comm.dst_proc;
753 #ifdef HAVE_LATENCY_BOUND_TRACKING
755 * \brief verify if communication is latency bounded
756 * \param comm The communication
758 XBT_INLINE int SIMIX_comm_is_latency_bounded(smx_action_t action)
760 if (action->comm.surf_comm){
761 DEBUG1("Getting latency limited for surf_action (%p)", action->comm.surf_comm);
762 action->latency_limited = surf_workstation_model->get_latency_limited(action->comm.surf_comm);
763 DEBUG1("Action limited is %d", action->latency_limited);
765 return action->latency_limited;
769 /******************************************************************************/
770 /* SIMIX_comm_copy_data callbacks */
771 /******************************************************************************/
772 static void (*SIMIX_comm_copy_data_callback) (smx_action_t, size_t) =
773 &SIMIX_comm_copy_pointer_callback;
776 SIMIX_comm_set_copy_data_callback(void (*callback) (smx_action_t, size_t))
778 SIMIX_comm_copy_data_callback = callback;
781 void SIMIX_comm_copy_pointer_callback(smx_action_t comm, size_t buff_size)
783 xbt_assert1((buff_size == sizeof(void *)),
784 "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
785 *(void **) (comm->comm.dst_buff) = comm->comm.src_buff;
788 void SIMIX_comm_copy_buffer_callback(smx_action_t comm, size_t buff_size)
790 memcpy(comm->comm.dst_buff, comm->comm.src_buff, buff_size);
794 * \brief Copy the communication data from the sender's buffer to the receiver's one
795 * \param comm The communication
797 void SIMIX_comm_copy_data(smx_action_t comm)
799 size_t buff_size = comm->comm.src_buff_size;
800 /* If there is no data to be copy then return */
801 if (!comm->comm.src_buff || !comm->comm.dst_buff || comm->comm.copied == 1)
804 DEBUG6("Copying comm %p data from %s (%p) -> %s (%p) (%zu bytes)",
806 comm->comm.src_proc->smx_host->name, comm->comm.src_buff,
807 comm->comm.dst_proc->smx_host->name, comm->comm.dst_buff, buff_size);
809 /* Copy at most dst_buff_size bytes of the message to receiver's buffer */
810 if (comm->comm.dst_buff_size)
811 buff_size = MIN(buff_size, *(comm->comm.dst_buff_size));
813 /* Update the receiver's buffer size to the copied amount */
814 if (comm->comm.dst_buff_size)
815 *comm->comm.dst_buff_size = buff_size;
820 (*SIMIX_comm_copy_data_callback) (comm, buff_size);
822 /* Set the copied flag so we copy data only once */
823 /* (this function might be called from both communication ends) */
824 comm->comm.copied = 1;