1 /* Copyright (c) 2009, 2010. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
12 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_network, simix,
13 "Logging specific to SIMIX (network)");
15 static xbt_dict_t rdv_points = NULL;
16 unsigned long int smx_total_comms = 0;
18 static XBT_INLINE void SIMIX_comm_start(smx_action_t action);
19 static void SIMIX_comm_finish(smx_action_t action);
20 static void SIMIX_waitany_req_remove_from_actions(smx_req_t req);
21 static void SIMIX_comm_copy_data(smx_action_t comm);
22 static smx_action_t SIMIX_comm_new(e_smx_comm_type_t type);
23 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm);
24 static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm);
25 static smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
26 int (*match_fun)(void *, void *), void *);
27 static void SIMIX_rdv_free(void *data);
29 void SIMIX_network_init(void)
31 rdv_points = xbt_dict_new();
34 void SIMIX_network_exit(void)
36 xbt_dict_free(&rdv_points);
39 /******************************************************************************/
40 /* Rendez-Vous Points */
41 /******************************************************************************/
43 smx_rdv_t SIMIX_rdv_create(const char *name)
45 /* two processes may have pushed the same rdv_create request at the same time */
46 smx_rdv_t rdv = name ? xbt_dict_get_or_null(rdv_points, name) : NULL;
49 rdv = xbt_new0(s_smx_rvpoint_t, 1);
50 rdv->name = name ? xbt_strdup(name) : NULL;
51 rdv->comm_fifo = xbt_fifo_new();
54 xbt_dict_set(rdv_points, rdv->name, rdv, SIMIX_rdv_free);
59 void SIMIX_rdv_destroy(smx_rdv_t rdv)
62 xbt_dict_remove(rdv_points, rdv->name);
65 void SIMIX_rdv_free(void *data)
67 smx_rdv_t rdv = (smx_rdv_t) data;
70 xbt_fifo_free(rdv->comm_fifo);
74 smx_rdv_t SIMIX_rdv_get_by_name(const char *name)
76 return xbt_dict_get_or_null(rdv_points, name);
79 int SIMIX_rdv_comm_count_by_host(smx_rdv_t rdv, smx_host_t host)
81 smx_action_t comm = NULL;
82 xbt_fifo_item_t item = NULL;
85 xbt_fifo_foreach(rdv->comm_fifo, item, comm, smx_action_t) {
86 if (comm->comm.src_proc->smx_host == host)
93 smx_action_t SIMIX_rdv_get_head(smx_rdv_t rdv)
95 return xbt_fifo_get_item_content(xbt_fifo_get_first_item(rdv->comm_fifo));
99 * \brief Push a communication request into a rendez-vous point
100 * \param rdv The rendez-vous point
101 * \param comm The communication request
103 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm)
105 xbt_fifo_push(rdv->comm_fifo, comm);
106 comm->comm.rdv = rdv;
110 * \brief Remove a communication request from a rendez-vous point
111 * \param rdv The rendez-vous point
112 * \param comm The communication request
114 static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm)
116 xbt_fifo_remove(rdv->comm_fifo, comm);
117 comm->comm.rdv = NULL;
121 * \brief Checks if there is a communication action queued in a rendez-vous matching our needs
122 * \param type The type of communication we are looking for (comm_send, comm_recv)
123 * \return The communication action if found, NULL otherwise
125 smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
126 int (*match_fun)(void *, void *), void *data)
129 xbt_fifo_item_t item;
130 void* req_data = NULL;
132 xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t){
133 if (action->comm.type == SIMIX_COMM_SEND) {
134 req_data = action->comm.src_data;
135 } else if (action->comm.type == SIMIX_COMM_RECEIVE) {
136 req_data = action->comm.dst_data;
138 if (action->comm.type == type && (!match_fun || match_fun(data, req_data))) {
139 XBT_DEBUG("Found a matching communication action %p", action);
140 xbt_fifo_remove_item(rdv->comm_fifo, item);
141 xbt_fifo_free_item(item);
142 action->comm.refcount++;
143 action->comm.rdv = NULL;
146 XBT_DEBUG("Sorry, communication action %p does not match our needs:"
147 " its type is %d but we are looking for a comm of type %d",
148 action, action->comm.type, type);
150 XBT_DEBUG("No matching communication action found");
154 /******************************************************************************/
155 /* Comunication Actions */
156 /******************************************************************************/
159 * \brief Creates a new comunicate action
160 * \param type The type of request (comm_send, comm_recv)
161 * \return The new comunicate action
163 smx_action_t SIMIX_comm_new(e_smx_comm_type_t type)
167 /* alloc structures */
168 act = xbt_mallocator_get(simix_global->action_mallocator);
169 act->type = SIMIX_ACTION_COMMUNICATE;
170 act->state = SIMIX_WAITING;
172 /* set communication */
173 act->comm.type = type;
174 act->comm.refcount = 1;
176 #ifdef HAVE_LATENCY_BOUND_TRACKING
177 //initialize with unknown value
178 act->latency_limited = -1;
182 act->category = NULL;
185 XBT_DEBUG("Create communicate action %p", act);
192 * \brief Destroy a communicate action
193 * \param action The communicate action to be destroyed
195 void SIMIX_comm_destroy(smx_action_t action)
197 XBT_DEBUG("Destroy action %p (refcount:%d)", action, action->comm.refcount);
199 if (action->comm.refcount <= 0)
200 xbt_die(bprintf("the refcount of comm %p is already 0 before decreasing it. That's a bug!",action));
202 action->comm.refcount--;
203 if (action->comm.refcount > 0)
205 XBT_DEBUG("Really free communication %p; refcount is now %d", action,
206 action->comm.refcount);
208 #ifdef HAVE_LATENCY_BOUND_TRACKING
209 action->latency_limited = SIMIX_comm_is_latency_bounded( action ) ;
213 TRACE_smx_action_destroy(action);
216 xbt_free(action->name);
217 SIMIX_comm_destroy_internal_actions(action);
219 if (action->comm.detached && action->state != SIMIX_DONE) {
220 /* the communication has failed and was detached:
221 * we have to free the buffer */
222 ((void_f_pvoid_t) action->comm.src_data)(action->comm.src_buff);
225 xbt_mallocator_release(simix_global->action_mallocator, action);
228 void SIMIX_comm_destroy_internal_actions(smx_action_t action)
230 if (action->comm.surf_comm){
231 #ifdef HAVE_LATENCY_BOUND_TRACKING
232 action->latency_limited = SIMIX_comm_is_latency_bounded(action);
234 action->comm.surf_comm->model_type->action_unref(action->comm.surf_comm);
235 action->comm.surf_comm = NULL;
238 if (action->comm.src_timeout){
239 action->comm.src_timeout->model_type->action_unref(action->comm.src_timeout);
240 action->comm.src_timeout = NULL;
243 if (action->comm.dst_timeout){
244 action->comm.dst_timeout->model_type->action_unref(action->comm.dst_timeout);
245 action->comm.dst_timeout = NULL;
249 smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
250 double task_size, double rate,
251 void *src_buff, size_t src_buff_size,
252 int (*match_fun)(void *, void *), void *data,
257 /* Look for communication request matching our needs.
258 If it is not found then create it and push it into the rendez-vous point */
259 action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_RECEIVE, match_fun, data);
262 action = SIMIX_comm_new(SIMIX_COMM_SEND);
263 SIMIX_rdv_push(rdv, action);
265 action->state = SIMIX_READY;
266 action->comm.type = SIMIX_COMM_READY;
269 /* If the communication action is detached then decrease the refcount
270 * by one, so it will be eliminated by the receivers destroy call */
272 action->comm.detached = 1;
273 action->comm.refcount--;
276 /* Setup the communication request */
277 action->comm.src_proc = src_proc;
278 action->comm.task_size = task_size;
279 action->comm.rate = rate;
280 action->comm.src_buff = src_buff;
281 action->comm.src_buff_size = src_buff_size;
282 action->comm.src_data = data;
285 action->state = SIMIX_RUNNING;
289 SIMIX_comm_start(action);
293 smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
294 void *dst_buff, size_t *dst_buff_size,
295 int (*match_fun)(void *, void *), void *data)
299 /* Look for communication request matching our needs.
300 * If it is not found then create it and push it into the rendez-vous point
302 action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_SEND, match_fun, data);
305 action = SIMIX_comm_new(SIMIX_COMM_RECEIVE);
306 SIMIX_rdv_push(rdv, action);
308 action->state = SIMIX_READY;
309 action->comm.type = SIMIX_COMM_READY;
312 /* Setup communication request */
313 action->comm.dst_proc = dst_proc;
314 action->comm.dst_buff = dst_buff;
315 action->comm.dst_buff_size = dst_buff_size;
316 action->comm.dst_data = data;
319 action->state = SIMIX_RUNNING;
323 SIMIX_comm_start(action);
327 void SIMIX_pre_comm_wait(smx_req_t req, smx_action_t action, double timeout, int idx)
329 /* the request may be a wait, a send or a recv */
332 /* Associate this request to the action */
333 xbt_fifo_push(action->request_list, req);
334 req->issuer->waiting_action = action;
338 action->state = SIMIX_DONE;
340 /* If we reached this point, the wait request must have a timeout */
341 /* Otherwise it shouldn't be enabled and executed by the MC */
345 if (action->comm.src_proc == req->issuer)
346 action->state = SIMIX_SRC_TIMEOUT;
348 action->state = SIMIX_DST_TIMEOUT;
351 SIMIX_comm_finish(action);
355 /* If the action has already finish perform the error handling, */
356 /* otherwise set up a waiting timeout on the right side */
357 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
358 SIMIX_comm_finish(action);
359 } else { /* if (timeout >= 0) { we need a surf sleep action even when there is no timeout, otherwise surf won't tell us when the host fails */
360 sleep = surf_workstation_model->extension.workstation.sleep(req->issuer->smx_host->host, timeout);
361 surf_workstation_model->action_data_set(sleep, action);
363 if (req->issuer == action->comm.src_proc)
364 action->comm.src_timeout = sleep;
366 action->comm.dst_timeout = sleep;
370 void SIMIX_pre_comm_test(smx_req_t req)
372 smx_action_t action = req->comm_test.comm;
375 req->comm_test.result = action->comm.src_proc && action->comm.dst_proc;
376 if(req->comm_test.result){
377 action->state = SIMIX_DONE;
378 xbt_fifo_push(action->request_list, req);
379 SIMIX_comm_finish(action);
381 SIMIX_request_answer(req);
386 req->comm_test.result = (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING);
387 if (req->comm_test.result) {
388 xbt_fifo_push(action->request_list, req);
389 SIMIX_comm_finish(action);
391 SIMIX_request_answer(req);
395 void SIMIX_pre_comm_testany(smx_req_t req, int idx)
399 xbt_dynar_t actions = req->comm_testany.comms;
400 req->comm_testany.result = -1;
404 SIMIX_request_answer(req);
406 action = xbt_dynar_get_as(actions, idx, smx_action_t);
407 req->comm_testany.result = idx;
408 xbt_fifo_push(action->request_list, req);
409 action->state = SIMIX_DONE;
410 SIMIX_comm_finish(action);
415 xbt_dynar_foreach(req->comm_testany.comms,cursor,action) {
416 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
417 req->comm_testany.result = cursor;
418 xbt_fifo_push(action->request_list, req);
419 SIMIX_comm_finish(action);
423 SIMIX_request_answer(req);
426 void SIMIX_pre_comm_waitany(smx_req_t req, int idx)
429 unsigned int cursor = 0;
430 xbt_dynar_t actions = req->comm_waitany.comms;
433 action = xbt_dynar_get_as(actions, idx, smx_action_t);
434 xbt_fifo_push(action->request_list, req);
435 req->comm_waitany.result = idx;
436 action->state = SIMIX_DONE;
437 SIMIX_comm_finish(action);
441 xbt_dynar_foreach(actions, cursor, action){
442 /* Associate this request to the action */
443 xbt_fifo_push(action->request_list, req);
444 if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING){
445 SIMIX_comm_finish(action);
451 void SIMIX_waitany_req_remove_from_actions(smx_req_t req)
454 unsigned int cursor = 0;
455 xbt_dynar_t actions = req->comm_waitany.comms;
457 xbt_dynar_foreach(actions, cursor, action){
458 xbt_fifo_remove(action->request_list, req);
463 * \brief Start the simulation of a communication request
464 * \param action The communication action
466 static XBT_INLINE void SIMIX_comm_start(smx_action_t action)
468 /* If both the sender and the receiver are already there, start the communication */
469 if (action->state == SIMIX_READY) {
470 smx_host_t sender = action->comm.src_proc->smx_host;
471 smx_host_t receiver = action->comm.dst_proc->smx_host;
473 XBT_DEBUG("Starting communication %p from '%s' to '%s'", action,
474 SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
476 action->comm.surf_comm = surf_workstation_model->extension.workstation.
477 communicate(sender->host, receiver->host, action->comm.task_size, action->comm.rate);
479 surf_workstation_model->action_data_set(action->comm.surf_comm, action);
481 action->state = SIMIX_RUNNING;
484 TRACE_smx_action_communicate(action, action->comm.src_proc);
487 /* If a link is failed, detect it immediately */
488 if (surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) {
489 XBT_DEBUG("Communication from '%s' to '%s' failed to start because of a link failure",
490 SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
491 action->state = SIMIX_LINK_FAILURE;
492 SIMIX_comm_destroy_internal_actions(action);
495 /* If any of the process is suspend, create the action but stop its execution,
496 it will be restarted when the sender process resume */
497 if (SIMIX_process_is_suspended(action->comm.src_proc) ||
498 SIMIX_process_is_suspended(action->comm.dst_proc)) {
499 /* FIXME: check what should happen with the action state */
500 surf_workstation_model->suspend(action->comm.surf_comm);
505 void SIMIX_comm_finish(smx_action_t action)
507 unsigned int destroy_count = 0;
510 while ((req = xbt_fifo_shift(action->request_list))) {
512 /* If a waitany request is waiting for this action to finish, then remove
513 it from the other actions in the waitany list. Afterwards, get the
514 position of the actual action in the waitany request's actions dynar and
515 return it as the result of the call */
516 if (req->call == REQ_COMM_WAITANY) {
517 SIMIX_waitany_req_remove_from_actions(req);
519 req->comm_waitany.result = xbt_dynar_search(req->comm_waitany.comms, &action);
522 /* If the action is still in a rendez-vous point then remove from it */
523 if (action->comm.rdv)
524 SIMIX_rdv_remove(action->comm.rdv, action);
526 XBT_DEBUG("SIMIX_comm_finish: action state = %d", action->state);
528 /* Check out for errors */
529 switch (action->state) {
532 XBT_DEBUG("Communication %p complete!", action);
533 SIMIX_comm_copy_data(action);
536 case SIMIX_SRC_TIMEOUT:
538 THROW0(timeout_error, 0, "Communication timeouted because of sender");
540 CATCH(req->issuer->running_ctx->exception) {
541 req->issuer->doexception = 1;
545 case SIMIX_DST_TIMEOUT:
547 THROW0(timeout_error, 0, "Communication timeouted because of receiver");
549 CATCH(req->issuer->running_ctx->exception) {
550 req->issuer->doexception = 1;
554 case SIMIX_SRC_HOST_FAILURE:
556 if (req->issuer == action->comm.src_proc)
557 THROW0(host_error, 0, "Host failed");
559 THROW0(network_error, 0, "Remote peer failed");
561 CATCH(req->issuer->running_ctx->exception) {
562 req->issuer->doexception = 1;
566 case SIMIX_DST_HOST_FAILURE:
568 if (req->issuer == action->comm.dst_proc)
569 THROW0(host_error, 0, "Host failed");
571 THROW0(network_error, 0, "Remote peer failed");
573 CATCH(req->issuer->running_ctx->exception) {
574 req->issuer->doexception = 1;
578 case SIMIX_LINK_FAILURE:
580 XBT_DEBUG("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p)",
581 action, action->comm.src_proc->smx_host->name, action->comm.dst_proc->smx_host->name,
582 req->issuer->name, req->issuer);
583 THROW0(network_error, 0, "Link failure");
585 CATCH(req->issuer->running_ctx->exception) {
586 req->issuer->doexception = 1;
594 /* if there is an exception during a waitany or a testany, indicate the position of the failed communication */
595 if (req->issuer->doexception) {
596 if (req->call == REQ_COMM_WAITANY) {
597 req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_waitany.comms, &action);
599 else if (req->call == REQ_COMM_TESTANY) {
600 req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_testany.comms, &action);
604 req->issuer->waiting_action = NULL;
605 SIMIX_request_answer(req);
609 while (destroy_count-- > 0)
610 SIMIX_comm_destroy(action);
613 void SIMIX_post_comm(smx_action_t action)
615 /* Update action state */
616 if (action->comm.src_timeout &&
617 surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_DONE)
618 action->state = SIMIX_SRC_TIMEOUT;
619 else if (action->comm.dst_timeout &&
620 surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_DONE)
621 action->state = SIMIX_DST_TIMEOUT;
622 else if (action->comm.src_timeout &&
623 surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_FAILED)
624 action->state = SIMIX_SRC_HOST_FAILURE;
625 else if (action->comm.dst_timeout &&
626 surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_FAILED)
627 action->state = SIMIX_DST_HOST_FAILURE;
628 else if (action->comm.surf_comm &&
629 surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED)
630 action->state = SIMIX_LINK_FAILURE;
632 action->state = SIMIX_DONE;
634 XBT_DEBUG("SIMIX_post_comm: action state = %d", action->state);
636 /* After this point the surf actions associated with the simix communicate
637 action are no longer needed, thus we delete them. */
638 SIMIX_comm_destroy_internal_actions(action);
640 /* If there are requests associated with the action, then answer them */
641 if (xbt_fifo_size(action->request_list))
642 SIMIX_comm_finish(action);
645 void SIMIX_comm_cancel(smx_action_t action)
647 /* If the action is a waiting state means that it is still in a rdv */
648 /* so remove from it and delete it */
649 if (action->state == SIMIX_WAITING) {
650 SIMIX_rdv_remove(action->comm.rdv, action);
651 action->state = SIMIX_FAILED;
653 /* When running the MC there are no surf actions */
655 surf_workstation_model->action_cancel(action->comm.surf_comm);
659 void SIMIX_comm_suspend(smx_action_t action)
661 /*FIXME: shall we suspend also the timeout actions? */
662 surf_workstation_model->suspend(action->comm.surf_comm);
665 void SIMIX_comm_resume(smx_action_t action)
667 /*FIXME: check what happen with the timeouts */
668 surf_workstation_model->resume(action->comm.surf_comm);
672 /************* Action Getters **************/
675 * \brief get the amount remaining from the communication
676 * \param action The communication
678 double SIMIX_comm_get_remains(smx_action_t action)
682 switch (action->state) {
685 remains = surf_workstation_model->get_remains(action->comm.surf_comm);
690 remains = 0; /*FIXME: check what should be returned */
694 remains = 0; /*FIXME: is this correct? */
700 e_smx_state_t SIMIX_comm_get_state(smx_action_t action)
702 return action->state;
706 * \brief Return the user data associated to the sender of the communication
707 * \param action The communication
708 * \return the user data
710 void* SIMIX_comm_get_src_data(smx_action_t action)
712 return action->comm.src_data;
716 * \brief Return the user data associated to the receiver of the communication
717 * \param action The communication
718 * \return the user data
720 void* SIMIX_comm_get_dst_data(smx_action_t action)
722 return action->comm.dst_data;
725 smx_process_t SIMIX_comm_get_src_proc(smx_action_t action)
727 return action->comm.src_proc;
730 smx_process_t SIMIX_comm_get_dst_proc(smx_action_t action)
732 return action->comm.dst_proc;
735 #ifdef HAVE_LATENCY_BOUND_TRACKING
737 * \brief verify if communication is latency bounded
738 * \param comm The communication
740 XBT_INLINE int SIMIX_comm_is_latency_bounded(smx_action_t action)
742 if (action->comm.surf_comm){
743 XBT_DEBUG("Getting latency limited for surf_action (%p)", action->comm.surf_comm);
744 action->latency_limited = surf_workstation_model->get_latency_limited(action->comm.surf_comm);
745 XBT_DEBUG("Action limited is %d", action->latency_limited);
747 return action->latency_limited;
751 /******************************************************************************/
752 /* SIMIX_comm_copy_data callbacks */
753 /******************************************************************************/
754 static void (*SIMIX_comm_copy_data_callback) (smx_action_t, size_t) =
755 &SIMIX_comm_copy_pointer_callback;
758 SIMIX_comm_set_copy_data_callback(void (*callback) (smx_action_t, size_t))
760 SIMIX_comm_copy_data_callback = callback;
763 void SIMIX_comm_copy_pointer_callback(smx_action_t comm, size_t buff_size)
765 xbt_assert1((buff_size == sizeof(void *)),
766 "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
767 *(void **) (comm->comm.dst_buff) = comm->comm.src_buff;
770 void SIMIX_comm_copy_buffer_callback(smx_action_t comm, size_t buff_size)
772 memcpy(comm->comm.dst_buff, comm->comm.src_buff, buff_size);
776 * \brief Copy the communication data from the sender's buffer to the receiver's one
777 * \param comm The communication
779 void SIMIX_comm_copy_data(smx_action_t comm)
781 size_t buff_size = comm->comm.src_buff_size;
782 /* If there is no data to be copy then return */
783 if (!comm->comm.src_buff || !comm->comm.dst_buff || comm->comm.copied == 1)
786 XBT_DEBUG("Copying comm %p data from %s (%p) -> %s (%p) (%zu bytes)",
788 comm->comm.src_proc->smx_host->name, comm->comm.src_buff,
789 comm->comm.dst_proc->smx_host->name, comm->comm.dst_buff, buff_size);
791 /* Copy at most dst_buff_size bytes of the message to receiver's buffer */
792 if (comm->comm.dst_buff_size)
793 buff_size = MIN(buff_size, *(comm->comm.dst_buff_size));
795 /* Update the receiver's buffer size to the copied amount */
796 if (comm->comm.dst_buff_size)
797 *comm->comm.dst_buff_size = buff_size;
802 (*SIMIX_comm_copy_data_callback) (comm, buff_size);
804 /* Set the copied flag so we copy data only once */
805 /* (this function might be called from both communication ends) */
806 comm->comm.copied = 1;