1 /* Copyright (c) 2009, 2010. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
12 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_network, simix,
13 "Logging specific to SIMIX (network)");
15 static xbt_dict_t rdv_points = NULL;
17 static XBT_INLINE void SIMIX_comm_start(smx_action_t action);
18 static void SIMIX_comm_finish(smx_action_t action);
19 static void SIMIX_waitany_req_remove_from_actions(smx_req_t req);
20 static void SIMIX_comm_copy_data(smx_action_t comm);
21 static smx_action_t SIMIX_comm_new(e_smx_comm_type_t type);
22 static XBT_INLINE void SIMIX_comm_wait_for_completion(smx_action_t comm,
24 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm);
25 static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm);
26 static smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
27 int (*match_fun)(void *, void *), void *);
28 static void SIMIX_rdv_free(void *data);
30 void SIMIX_network_init(void)
32 rdv_points = xbt_dict_new();
35 void SIMIX_network_exit(void)
37 xbt_dict_free(&rdv_points);
40 /******************************************************************************/
41 /* Rendez-Vous Points */
42 /******************************************************************************/
44 smx_rdv_t SIMIX_rdv_create(const char *name)
46 /* two processes may have pushed the same rdv_create request at the same time */
47 smx_rdv_t rdv = name ? xbt_dict_get_or_null(rdv_points, name) : NULL;
50 rdv = xbt_new0(s_smx_rvpoint_t, 1);
51 rdv->name = name ? xbt_strdup(name) : NULL;
52 rdv->comm_fifo = xbt_fifo_new();
55 xbt_dict_set(rdv_points, rdv->name, rdv, SIMIX_rdv_free);
60 void SIMIX_rdv_destroy(smx_rdv_t rdv)
63 xbt_dict_remove(rdv_points, rdv->name);
66 void SIMIX_rdv_free(void *data)
68 smx_rdv_t rdv = (smx_rdv_t) data;
71 xbt_fifo_free(rdv->comm_fifo);
75 smx_rdv_t SIMIX_rdv_get_by_name(const char *name)
77 return xbt_dict_get_or_null(rdv_points, name);
80 int SIMIX_rdv_comm_count_by_host(smx_rdv_t rdv, smx_host_t host)
82 smx_action_t comm = NULL;
83 xbt_fifo_item_t item = NULL;
86 xbt_fifo_foreach(rdv->comm_fifo, item, comm, smx_action_t) {
87 if (comm->comm.src_proc->smx_host == host)
94 smx_action_t SIMIX_rdv_get_head(smx_rdv_t rdv)
96 return xbt_fifo_get_item_content(xbt_fifo_get_first_item(rdv->comm_fifo));
100 * \brief Push a communication request into a rendez-vous point
101 * \param rdv The rendez-vous point
102 * \param comm The communication request
104 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm)
106 xbt_fifo_push(rdv->comm_fifo, comm);
107 comm->comm.rdv = rdv;
111 * \brief Remove a communication request from a rendez-vous point
112 * \param rdv The rendez-vous point
113 * \param comm The communication request
115 static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm)
117 xbt_fifo_remove(rdv->comm_fifo, comm);
118 comm->comm.rdv = NULL;
122 * \brief Checks if there is a communication action queued in a rendez-vous matching our needs
123 * \param type The type of communication we are looking for (comm_send, comm_recv)
124 * \return The communication action if found, NULL otherwise
126 smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
127 int (*match_fun)(void *, void *), void *data)
130 xbt_fifo_item_t item;
131 void* req_data = NULL;
133 xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t){
134 if (action->comm.type == SIMIX_COMM_SEND) {
135 req_data = action->comm.src_data;
136 } else if (action->comm.type == SIMIX_COMM_RECEIVE) {
137 req_data = action->comm.dst_data;
139 if (action->comm.type == type && (!match_fun || match_fun(data, req_data))) {
140 DEBUG1("Found a matching communication action %p", action);
141 xbt_fifo_remove_item(rdv->comm_fifo, item);
142 xbt_fifo_free_item(item);
143 action->comm.refcount++;
144 action->comm.rdv = NULL;
147 DEBUG3("Sorry, communication action %p does not match our needs:"
148 " its type is %d but we are looking for a comm of type %d",
149 action, action->comm.type, type);
151 DEBUG0("No matching communication action found");
155 /******************************************************************************/
156 /* Comunication Actions */
157 /******************************************************************************/
160 * \brief Creates a new comunicate action
161 * \param type The type of request (comm_send, comm_recv)
162 * \return The new comunicate action
164 smx_action_t SIMIX_comm_new(e_smx_comm_type_t type)
168 /* alloc structures */
169 act = xbt_new0(s_smx_action_t, 1);
170 act->type = SIMIX_ACTION_COMMUNICATE;
171 act->state = SIMIX_WAITING;
172 act->request_list = xbt_fifo_new();
174 /* set communication */
175 act->comm.type = type;
176 act->comm.refcount = 1;
178 #ifdef HAVE_LATENCY_BOUND_TRACKING
179 //initialize with unknown value
180 act->latency_limited = -1;
184 act->category = NULL;
187 DEBUG1("Create communicate action %p", act);
193 * \brief Destroy a communicate action
194 * \param action The communicate action to be destroyed
196 void SIMIX_comm_destroy(smx_action_t action)
198 DEBUG2("Destroy action %p (refcount:%d)", action, action->comm.refcount);
200 if (action->comm.refcount <= 0)
201 xbt_die(bprintf("the refcount of comm %p is already 0 before decreasing it. That's a bug!",action));
203 action->comm.refcount--;
204 if (action->comm.refcount > 0)
206 DEBUG2("Really free communication %p; refcount is now %d", action,
207 action->comm.refcount);
209 #ifdef HAVE_LATENCY_BOUND_TRACKING
210 action->latency_limited = SIMIX_comm_is_latency_bounded( action ) ;
214 TRACE_smx_action_destroy(action);
218 xbt_free(action->name);
220 xbt_fifo_free(action->request_list);
222 SIMIX_comm_destroy_internal_actions(action);
227 void SIMIX_comm_destroy_internal_actions(smx_action_t action)
229 if (action->comm.surf_comm){
230 #ifdef HAVE_LATENCY_BOUND_TRACKING
231 action->latency_limited = SIMIX_comm_is_latency_bounded(action);
233 action->comm.surf_comm->model_type->action_unref(action->comm.surf_comm);
234 action->comm.surf_comm = NULL;
237 if (action->comm.src_timeout){
238 action->comm.src_timeout->model_type->action_unref(action->comm.src_timeout);
239 action->comm.src_timeout = NULL;
242 if (action->comm.dst_timeout){
243 action->comm.dst_timeout->model_type->action_unref(action->comm.dst_timeout);
244 action->comm.dst_timeout = NULL;
248 smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
249 double task_size, double rate,
250 void *src_buff, size_t src_buff_size,
251 int (*match_fun)(void *, void *), void *data)
255 /* Look for communication request matching our needs.
256 If it is not found then create it and push it into the rendez-vous point */
257 action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_RECEIVE, match_fun, data);
260 action = SIMIX_comm_new(SIMIX_COMM_SEND);
261 SIMIX_rdv_push(rdv, action);
263 action->state = SIMIX_READY;
264 action->comm.type = SIMIX_COMM_READY;
267 /* Setup the communication request */
268 action->comm.src_proc = src_proc;
269 action->comm.task_size = task_size;
270 action->comm.rate = rate;
271 action->comm.src_buff = src_buff;
272 action->comm.src_buff_size = src_buff_size;
273 action->comm.src_data = data;
276 action->state = SIMIX_RUNNING;
280 SIMIX_comm_start(action);
284 smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
285 void *dst_buff, size_t *dst_buff_size,
286 int (*match_fun)(void *, void *), void *data)
290 /* Look for communication request matching our needs.
291 * If it is not found then create it and push it into the rendez-vous point
293 action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_SEND, match_fun, data);
296 action = SIMIX_comm_new(SIMIX_COMM_RECEIVE);
297 SIMIX_rdv_push(rdv, action);
299 action->state = SIMIX_READY;
300 action->comm.type = SIMIX_COMM_READY;
303 /* Setup communication request */
304 action->comm.dst_proc = dst_proc;
305 action->comm.dst_buff = dst_buff;
306 action->comm.dst_buff_size = dst_buff_size;
307 action->comm.dst_data = data;
310 action->state = SIMIX_RUNNING;
314 SIMIX_comm_start(action);
318 void SIMIX_pre_comm_wait(smx_req_t req, int idx)
320 smx_action_t action = req->comm_wait.comm;
321 double timeout = req->comm_wait.timeout;
324 /* Associate this request to the action */
325 xbt_fifo_push(action->request_list, req);
326 req->issuer->waiting_action = action;
330 action->state = SIMIX_DONE;
332 /* If we reached this point, the wait request must have a timeout */
333 /* Otherwise it shouldn't be enabled and executed by the MC */
337 if(action->comm.src_proc == req->issuer)
338 action->state = SIMIX_SRC_TIMEOUT;
340 action->state = SIMIX_DST_TIMEOUT;
343 SIMIX_comm_finish(action);
347 /* If the action has already finish perform the error handling, */
348 /* otherwise set up a waiting timeout on the right side */
349 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
350 SIMIX_comm_finish(action);
351 } else { /* if (timeout >= 0) { we need a surf sleep action even when there is no timeout, otherwise surf won't tell us when the host fails */
352 sleep = surf_workstation_model->extension.workstation.sleep(req->issuer->smx_host->host, timeout);
353 surf_workstation_model->action_data_set(sleep, action);
355 if (req->issuer == action->comm.src_proc)
356 action->comm.src_timeout = sleep;
358 action->comm.dst_timeout = sleep;
362 void SIMIX_pre_comm_test(smx_req_t req)
364 smx_action_t action = req->comm_test.comm;
367 req->comm_test.result = action->comm.src_proc && action->comm.dst_proc;
368 if(req->comm_test.result){
369 action->state = SIMIX_DONE;
370 xbt_fifo_push(action->request_list, req);
371 SIMIX_comm_finish(action);
373 SIMIX_request_answer(req);
378 req->comm_test.result = (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING);
379 if (req->comm_test.result) {
380 xbt_fifo_push(action->request_list, req);
381 SIMIX_comm_finish(action);
383 SIMIX_request_answer(req);
387 void SIMIX_pre_comm_testany(smx_req_t req, int idx)
391 xbt_dynar_t actions = req->comm_testany.comms;
392 req->comm_testany.result = -1;
396 SIMIX_request_answer(req);
398 action = xbt_dynar_get_as(actions, idx, smx_action_t);
399 req->comm_testany.result = idx;
400 xbt_fifo_push(action->request_list, req);
401 action->state = SIMIX_DONE;
402 SIMIX_comm_finish(action);
407 xbt_dynar_foreach(req->comm_testany.comms,cursor,action) {
408 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
409 req->comm_testany.result = cursor;
410 xbt_fifo_push(action->request_list, req);
411 SIMIX_comm_finish(action);
415 SIMIX_request_answer(req);
418 void SIMIX_pre_comm_waitany(smx_req_t req, int idx)
421 unsigned int cursor = 0;
422 xbt_dynar_t actions = req->comm_waitany.comms;
425 action = xbt_dynar_get_as(actions, idx, smx_action_t);
426 xbt_fifo_push(action->request_list, req);
427 req->comm_waitany.result = idx;
428 action->state = SIMIX_DONE;
429 SIMIX_comm_finish(action);
433 xbt_dynar_foreach(actions, cursor, action){
434 /* Associate this request to the action */
435 xbt_fifo_push(action->request_list, req);
436 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING){
437 SIMIX_comm_finish(action);
443 void SIMIX_waitany_req_remove_from_actions(smx_req_t req)
446 unsigned int cursor = 0;
447 xbt_dynar_t actions = req->comm_waitany.comms;
449 xbt_dynar_foreach(actions, cursor, action){
450 xbt_fifo_remove(action->request_list, req);
455 * \brief Start the simulation of a communication request
456 * \param action The communication action
458 static XBT_INLINE void SIMIX_comm_start(smx_action_t action)
460 /* If both the sender and the receiver are already there, start the communication */
461 if (action->state == SIMIX_READY) {
462 smx_host_t sender = action->comm.src_proc->smx_host;
463 smx_host_t receiver = action->comm.dst_proc->smx_host;
465 DEBUG3("Starting communication %p from '%s' to '%s'", action,
466 SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
468 action->comm.surf_comm = surf_workstation_model->extension.workstation.
469 communicate(sender->host, receiver->host, action->comm.task_size, action->comm.rate);
471 surf_workstation_model->action_data_set(action->comm.surf_comm, action);
473 action->state = SIMIX_RUNNING;
476 TRACE_smx_action_communicate(action, action->comm.src_proc);
479 /* If a link is failed, detect it immediately */
480 if (surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) {
481 DEBUG2("Communication from '%s' to '%s' failed to start because of a link failure",
482 SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
483 action->state = SIMIX_LINK_FAILURE;
484 SIMIX_comm_destroy_internal_actions(action);
487 /* If any of the process is suspend, create the action but stop its execution,
488 it will be restarted when the sender process resume */
489 if (SIMIX_process_is_suspended(action->comm.src_proc) ||
490 SIMIX_process_is_suspended(action->comm.dst_proc)) {
491 /* FIXME: check what should happen with the action state */
492 surf_workstation_model->suspend(action->comm.surf_comm);
497 void SIMIX_comm_finish(smx_action_t action)
501 while ((req = xbt_fifo_shift(action->request_list))) {
503 /* If a waitany request is waiting for this action to finish, then remove
504 it from the other actions in the waitany list. Afterwards, get the
505 position of the actual action in the waitany request's actions dynar and
506 return it as the result of the call */
507 if (req->call == REQ_COMM_WAITANY) {
508 SIMIX_waitany_req_remove_from_actions(req);
510 req->comm_waitany.result = xbt_dynar_search(req->comm_waitany.comms, &action);
513 /* If the action is still in a rendez-vous point then remove from it */
514 if (action->comm.rdv)
515 SIMIX_rdv_remove(action->comm.rdv, action);
517 DEBUG1("SIMIX_comm_finish: action state = %d", action->state);
519 /* Check out for errors */
520 switch (action->state) {
523 DEBUG1("Communication %p complete!", action);
524 SIMIX_comm_copy_data(action);
527 case SIMIX_SRC_TIMEOUT:
529 THROW0(timeout_error, 0, "Communication timeouted because of sender");
531 CATCH(req->issuer->running_ctx->exception) {
532 req->issuer->doexception = 1;
536 case SIMIX_DST_TIMEOUT:
538 THROW0(timeout_error, 0, "Communication timeouted because of receiver");
540 CATCH(req->issuer->running_ctx->exception) {
541 req->issuer->doexception = 1;
545 case SIMIX_SRC_HOST_FAILURE:
547 if (req->issuer == action->comm.src_proc)
548 THROW0(host_error, 0, "Host failed");
550 THROW0(network_error, 0, "Remote peer failed");
552 CATCH(req->issuer->running_ctx->exception) {
553 req->issuer->doexception = 1;
557 case SIMIX_DST_HOST_FAILURE:
559 if (req->issuer == action->comm.dst_proc)
560 THROW0(host_error, 0, "Host failed");
562 THROW0(network_error, 0, "Remote peer failed");
564 CATCH(req->issuer->running_ctx->exception) {
565 req->issuer->doexception = 1;
569 case SIMIX_LINK_FAILURE:
571 DEBUG5("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p)",
572 action, action->comm.src_proc->smx_host->name, action->comm.dst_proc->smx_host->name,
573 req->issuer->name, req->issuer);
574 THROW0(network_error, 0, "Link failure");
576 CATCH(req->issuer->running_ctx->exception) {
577 req->issuer->doexception = 1;
585 /* if there is an exception during a waitany or a testany, indicate the position of the failed communication */
586 if (req->issuer->doexception) {
587 if (req->call == REQ_COMM_WAITANY) {
588 req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_waitany.comms, &action);
590 else if (req->call == REQ_COMM_TESTANY) {
591 req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_testany.comms, &action);
595 req->issuer->waiting_action = NULL;
596 SIMIX_request_answer(req);
600 void SIMIX_post_comm(smx_action_t action)
602 /* Update action state */
603 if (action->comm.src_timeout &&
604 surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_DONE)
605 action->state = SIMIX_SRC_TIMEOUT;
606 else if (action->comm.dst_timeout &&
607 surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_DONE)
608 action->state = SIMIX_DST_TIMEOUT;
609 else if (action->comm.src_timeout &&
610 surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_FAILED)
611 action->state = SIMIX_SRC_HOST_FAILURE;
612 else if (action->comm.dst_timeout &&
613 surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_FAILED)
614 action->state = SIMIX_DST_HOST_FAILURE;
615 else if (action->comm.surf_comm &&
616 surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED)
617 action->state = SIMIX_LINK_FAILURE;
619 action->state = SIMIX_DONE;
621 DEBUG1("SIMIX_post_comm: action state = %d", action->state);
623 /* After this point the surf actions associated with the simix communicate
624 action are no longer needed, thus we delete them. */
625 SIMIX_comm_destroy_internal_actions(action);
627 /* If there are requests associated with the action, then answer them */
628 if (xbt_fifo_size(action->request_list))
629 SIMIX_comm_finish(action);
632 void SIMIX_comm_cancel(smx_action_t action)
634 /* If the action is a waiting state means that it is still in a rdv */
635 /* so remove from it and delete it */
636 if (action->state == SIMIX_WAITING) {
637 SIMIX_rdv_remove(action->comm.rdv, action);
638 action->state = SIMIX_FAILED;
640 /* When running the MC there are no surf actions */
642 surf_workstation_model->action_cancel(action->comm.surf_comm);
646 void SIMIX_comm_suspend(smx_action_t action)
648 /*FIXME: shall we suspend also the timeout actions? */
649 surf_workstation_model->suspend(action->comm.surf_comm);
652 void SIMIX_comm_resume(smx_action_t action)
654 /*FIXME: check what happen with the timeouts */
655 surf_workstation_model->resume(action->comm.surf_comm);
659 /************* Action Getters **************/
662 * \brief get the amount remaining from the communication
663 * \param action The communication
665 double SIMIX_comm_get_remains(smx_action_t action)
669 switch (action->state) {
672 remains = surf_workstation_model->get_remains(action->comm.surf_comm);
677 remains = 0; /*FIXME: check what should be returned */
681 remains = 0; /*FIXME: is this correct? */
687 e_smx_state_t SIMIX_comm_get_state(smx_action_t action)
689 return action->state;
693 * \brief Return the user data associated to the sender of the communication
694 * \param action The communication
695 * \return the user data
697 void* SIMIX_comm_get_src_data(smx_action_t action)
699 return action->comm.src_data;
703 * \brief Return the user data associated to the receiver of the communication
704 * \param action The communication
705 * \return the user data
707 void* SIMIX_comm_get_dst_data(smx_action_t action)
709 return action->comm.dst_data;
712 void* SIMIX_comm_get_src_buff(smx_action_t action)
714 return action->comm.src_buff;
717 void* SIMIX_comm_get_dst_buff(smx_action_t action)
719 return action->comm.dst_buff;
722 size_t SIMIX_comm_get_src_buff_size(smx_action_t action)
724 return action->comm.src_buff_size;
727 size_t SIMIX_comm_get_dst_buff_size(smx_action_t action)
731 if (action->comm.dst_buff_size)
732 buff_size = *(action->comm.dst_buff_size);
739 smx_process_t SIMIX_comm_get_src_proc(smx_action_t action)
741 return action->comm.src_proc;
744 smx_process_t SIMIX_comm_get_dst_proc(smx_action_t action)
746 return action->comm.dst_proc;
749 #ifdef HAVE_LATENCY_BOUND_TRACKING
751 * \brief verify if communication is latency bounded
752 * \param comm The communication
754 XBT_INLINE int SIMIX_comm_is_latency_bounded(smx_action_t action)
756 if (action->comm.surf_comm){
757 DEBUG1("Getting latency limited for surf_action (%p)", action->comm.surf_comm);
758 action->latency_limited = surf_workstation_model->get_latency_limited(action->comm.surf_comm);
759 DEBUG1("Action limited is %d", action->latency_limited);
761 return action->latency_limited;
765 /******************************************************************************/
766 /* SIMIX_comm_copy_data callbacks */
767 /******************************************************************************/
768 static void (*SIMIX_comm_copy_data_callback) (smx_action_t, size_t) =
769 &SIMIX_comm_copy_pointer_callback;
772 SIMIX_comm_set_copy_data_callback(void (*callback) (smx_action_t, size_t))
774 SIMIX_comm_copy_data_callback = callback;
777 void SIMIX_comm_copy_pointer_callback(smx_action_t comm, size_t buff_size)
779 xbt_assert1((buff_size == sizeof(void *)),
780 "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
781 *(void **) (comm->comm.dst_buff) = comm->comm.src_buff;
784 void SIMIX_comm_copy_buffer_callback(smx_action_t comm, size_t buff_size)
786 memcpy(comm->comm.dst_buff, comm->comm.src_buff, buff_size);
790 * \brief Copy the communication data from the sender's buffer to the receiver's one
791 * \param comm The communication
793 void SIMIX_comm_copy_data(smx_action_t comm)
795 size_t buff_size = comm->comm.src_buff_size;
796 /* If there is no data to be copy then return */
797 if (!comm->comm.src_buff || !comm->comm.dst_buff || comm->comm.copied == 1)
800 DEBUG6("Copying comm %p data from %s (%p) -> %s (%p) (%zu bytes)",
802 comm->comm.src_proc->smx_host->name, comm->comm.src_buff,
803 comm->comm.dst_proc->smx_host->name, comm->comm.dst_buff, buff_size);
805 /* Copy at most dst_buff_size bytes of the message to receiver's buffer */
806 if (comm->comm.dst_buff_size)
807 buff_size = MIN(buff_size, *(comm->comm.dst_buff_size));
809 /* Update the receiver's buffer size to the copied amount */
810 if (comm->comm.dst_buff_size)
811 *comm->comm.dst_buff_size = buff_size;
816 (*SIMIX_comm_copy_data_callback) (comm, buff_size);
818 /* Set the copied flag so we copy data only once */
819 /* (this function might be called from both communication ends) */
820 comm->comm.copied = 1;