add_executable(scatter scatter.c)
add_executable(reduce reduce.c)
add_executable(split split.c)
+add_executable(dsend dsend.c)
add_executable(mvmul mvmul.c)
add_executable(smpi_sendrecv sendrecv.c)
add_executable(smpi_traced smpi_traced.c)
target_link_libraries(scatter m simgrid smpi )
target_link_libraries(reduce m simgrid smpi )
target_link_libraries(split m simgrid smpi )
+target_link_libraries(dsend m simgrid smpi )
target_link_libraries(mvmul m simgrid smpi )
target_link_libraries(smpi_sendrecv m simgrid smpi )
target_link_libraries(smpi_traced m simgrid smpi )
--- /dev/null
+/* Copyright (c) 2011. The SimGrid Team. All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+/* This program simply does a very small exchange to test whether using SIMIX dsend to model the eager mode works */
+
+#include <stdio.h>
+#include <mpi.h>
+
+int main(int argc, char *argv[]) {
+ int rank;
+ int data=11;
+
+ MPI_Init(&argc, &argv);
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+ if (rank==1) {
+ data=22;
+ MPI_Send(&data,1,MPI_INT,(rank+1)%2,666,MPI_COMM_WORLD);
+ } else {
+ MPI_Recv(&data,1,MPI_INT,-1,666,MPI_COMM_WORLD,NULL);
+ if (data !=22) {
+ printf("rank %d: Damn, data does not match (got %d)\n",rank, data);
+ }
+ }
+
+ printf("rank %d: data exchanged\n", rank);
+ MPI_Finalize();
+ return 0;
+}
double rate, void *src_buff,
size_t src_buff_size,
int (*match_fun)(void *, void *),
+ void (*clean_fun)(void *),
void *data, int detached);
XBT_PUBLIC(void) SIMIX_req_comm_recv(smx_rdv_t rdv, void *dst_buff,
payload, msg->payl);
}
- comm = SIMIX_req_comm_isend(target_rdv, whole_payload_size, -1, msg, sizeof(void *), NULL, msg, 0);
+ comm = SIMIX_req_comm_isend(target_rdv, whole_payload_size, -1, msg, sizeof(void *), NULL,NULL, msg, 0);
SIMIX_req_comm_wait(comm, -1);
XBT_VERB("Message sent (and received)");
comm->status = MSG_OK;
comm->s_comm =
SIMIX_req_comm_isend(mailbox, t_simdata->message_size,
- t_simdata->rate, task, sizeof(void *), match_fun, match_data, 0);
+ t_simdata->rate, task, sizeof(void *), match_fun, NULL, match_data, 0);
t_simdata->comm = comm->s_comm; /* FIXME: is the field t_simdata->comm still useful? */
return comm;
/* Send it by calling SIMIX network layer */
smx_action_t comm = SIMIX_req_comm_isend(mailbox, t_simdata->message_size,
- t_simdata->rate, task, sizeof(void *), NULL, cleanup, 1);
+ t_simdata->rate, task, sizeof(void *), NULL,cleanup, NULL, 1);
t_simdata->comm = comm;
}
TRY {
smx_action_t comm = SIMIX_req_comm_isend(mailbox, t_simdata->message_size,
t_simdata->rate, task, sizeof(void *),
- NULL, NULL, 0);
+ NULL, NULL, NULL, 0);
#ifdef HAVE_TRACING
if (TRACE_is_enabled()) {
SIMIX_req_set_category(comm, task->category);
smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
double task_size, double rate,
void *src_buff, size_t src_buff_size,
- int (*)(void *, void *), void *data,
+ int (*)(void *, void *),
+ void (*clean_fun)(void *), // used to free the action in case of problem after a detached send
+ void *data,
int detached);
void SIMIX_comm_recv(smx_process_t dst_proc, smx_rdv_t rdv,
void *dst_buff, size_t *dst_buff_size,
int refcount; /* Number of processes involved in the cond */
int detached; /* If detached or not */
+ void (*clean_fun)(void*); /* Function to clean the detached src_buf if something goes wrong */
+
/* Surf action data */
surf_action_t surf_comm; /* The Surf communication action encapsulated */
surf_action_t src_timeout; /* Surf's actions to instrument the timeouts */
void *src_buff;
size_t src_buff_size;
int (*match_fun)(void *, void *);
+ void (*clean_fun)(void *);
void *data;
int detached;
smx_action_t result;
if (action->comm.detached && action->state != SIMIX_DONE) {
/* the communication has failed and was detached:
* we have to free the buffer */
- ((void_f_pvoid_t) action->comm.src_data)(action->comm.src_buff);
+ action->comm.clean_fun(action->comm.src_buff);
+ action->comm.src_buff = NULL;
}
xbt_mallocator_release(simix_global->action_mallocator, action);
smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
double task_size, double rate,
void *src_buff, size_t src_buff_size,
- int (*match_fun)(void *, void *), void *data,
+ int (*match_fun)(void *, void *),
+ void (*clean_fun)(void *), // used to free the action in case of problem after a detached send
+ void *data,
int detached)
{
smx_action_t action;
if (detached) {
action->comm.detached = 1;
action->comm.refcount--;
+ action->comm.clean_fun = clean_fun;
+ } else {
+ action->comm.clean_fun = NULL;
}
/* Setup the communication request */
void SIMIX_pre_comm_wait(smx_req_t req, smx_action_t action, double timeout, int idx)
{
+
/* the request may be a wait, a send or a recv */
surf_action_t sleep;
SIMIX_comm_finish(action);
return;
}
+ XBT_INFO("Comm_wait. state:%d; I'm %s",action->state,
+ req->issuer == action->comm.src_proc?"sender":"receiver");
/* If the action has already finish perform the error handling, */
/* otherwise set up a waiting timeout on the right side */
if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
SIMIX_comm_finish(action);
} else { /* if (timeout >= 0) { we need a surf sleep action even when there is no timeout, otherwise surf won't tell us when the host fails */
+ XBT_INFO("Not done, we need a sleep action");
sleep = surf_workstation_model->extension.workstation.sleep(req->issuer->smx_host->host, timeout);
surf_workstation_model->action_data_set(sleep, action);
case SIMIX_LINK_FAILURE:
TRY {
- XBT_DEBUG("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p)",
+ XBT_INFO("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p) detached:%d",
action,
action->comm.src_proc ? action->comm.src_proc->smx_host->name : NULL,
action->comm.dst_proc ? action->comm.dst_proc->smx_host->name : NULL,
- req->issuer->name, req->issuer);
+ req->issuer->name, req->issuer,action->comm.detached);
+ if (action->comm.src_proc == req->issuer) {
+ XBT_INFO("I'm source");
+ } else if (action->comm.dst_proc == req->issuer) {
+ XBT_INFO("I'm dest");
+ } else {
+ XBT_INFO("I'm neither source nor dest");
+ }
THROWF(network_error, 0, "Link failure");
}
CATCH(req->issuer->running_ctx->exception) {
surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_FAILED)
action->state = SIMIX_DST_HOST_FAILURE;
else if (action->comm.surf_comm &&
- surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED)
+ surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) {
+ XBT_INFO("Puta madre. Surf says that the link broke");
action->state = SIMIX_LINK_FAILURE;
- else
+ } else
action->state = SIMIX_DONE;
XBT_DEBUG("SIMIX_post_comm: comm %p, state %d, src_proc %p, dst_proc %p, detached: %d",
void smpi_comm_copy_data_callback(smx_action_t comm, size_t buff_size)
{
+ XBT_INFO("Copy the data over");
memcpy(comm->comm.dst_buff, comm->comm.src_buff, buff_size);
- if (comm->comm.detached) // if this is a detached send, the source buffer was duplicated by SMPI sender to make the original buffer available to the application ASAP
- free(comm->comm.src_buff);
+ if (comm->comm.detached) { // if this is a detached send, the source buffer was duplicated by SMPI sender to make the original buffer available to the application ASAP
+ comm->comm.clean_fun(comm->comm.src_buff);
+ comm->comm.src_buff = NULL;
+ }
}
/**
if (comm->comm.dst_buff_size)
*comm->comm.dst_buff_size = buff_size;
-<<<<<<< HEAD
if (buff_size > 0)
- (*SIMIX_comm_copy_data_callback) (comm, buff_size);
-=======
- if (buff_size == 0)
- return;
-
- SIMIX_comm_copy_data_callback(comm, buff_size);
->>>>>>> master
+ SIMIX_comm_copy_data_callback (comm, buff_size);
/* Set the copied flag so we copy data only once */
/* (this function might be called from both communication ends) */
if (action->comm.detached) {
if (action->comm.refcount == 0) {
+ XBT_DEBUG("Increase the refcount before destroying it");
/* I'm not supposed to destroy a detached comm from the sender side,
* unless there is no receiver matching the rdv */
action->comm.refcount++;
SIMIX_comm_destroy(action);
}
+ XBT_DEBUG("Don't destroy it since its refcount is %d",action->comm.refcount);
}
else {
SIMIX_comm_destroy(action);
req->comm_send.src_buff,
req->comm_send.src_buff_size,
req->comm_send.match_fun,
+ NULL, /* no clean function since it's not detached */
req->comm_send.data,
0);
SIMIX_pre_comm_wait(req, comm, req->comm_send.timeout, 0);
req->comm_isend.src_buff,
req->comm_isend.src_buff_size,
req->comm_isend.match_fun,
+ req->comm_isend.clean_fun,
req->comm_isend.data,
req->comm_isend.detached);
SIMIX_request_answer(req);
if (MC_IS_ENABLED) {
/* the model-checker wants two separate requests */
smx_action_t comm = SIMIX_req_comm_isend(rdv, task_size, rate,
- src_buff, src_buff_size, match_fun, data, 0);
+ src_buff, src_buff_size, match_fun, NULL, data, 0);
SIMIX_req_comm_wait(comm, timeout);
}
else {
smx_action_t SIMIX_req_comm_isend(smx_rdv_t rdv, double task_size, double rate,
void *src_buff, size_t src_buff_size,
- int (*match_fun)(void *, void *), void *data,
+ int (*match_fun)(void *, void *),
+ void (*clean_fun)(void *),
+ void *data,
int detached)
{
/* checking for infinite values */
req->comm_isend.src_buff = src_buff;
req->comm_isend.src_buff_size = src_buff_size;
req->comm_isend.match_fun = match_fun;
+ req->comm_isend.clean_fun = clean_fun;
req->comm_isend.data = data;
req->comm_isend.detached = detached;
return request;
}
+static void myfree(void *d) {
+ xbt_backtrace_display_current();
+ XBT_INFO("myfree called on %p",d);
+ free(d);
+}
+
void smpi_mpi_start(MPI_Request request)
{
smx_rdv_t mailbox;
detached = 1;
request->buf = malloc(request->size);
memcpy(request->buf,oldbuf,request->size);
- XBT_DEBUG("Send request %p is detached; buf %p copied into %p",request,oldbuf,request->buf);
+ XBT_INFO("Send request %p is detached; buf %p copied into %p",request,oldbuf,request->buf);
} else {
XBT_DEBUG("Send request %p is not detached (buf: %p)",request,request->buf);
}
request->action =
SIMIX_req_comm_isend(mailbox, request->size, -1.0,
- request->buf, request->size, &match_send, request,
+ request->buf, request->size,
+ &match_send,myfree, // cleanup using a simple free() FIXME: that may not be sufficient
+ request,
// detach if msg size < eager/rdv switch limit
detached);
/**********************/
/* SMPI callbacks */
/**********************/
-static double smpi_latency_factor(double size)
-{
- /* 1 B <= size <= 1 KiB */
- if (size <= 1024.0) {
- return 1.0056;
- }
-
static double smpi_bandwidth_factor(double size)
{
static void net_action_cancel(surf_action_t action)
{
+ XBT_DEBUG("cancel action %p",action);
surf_network_model->action_state_set(action, SURF_ACTION_FAILED);
if(network_update_mechanism == UM_LAZY){// remove action from the heap
xbt_swag_remove(action, net_modified_set);
}
}
- XBT_DEBUG("Min for resources (except NS3) : %f", min);
+ XBT_DEBUG("Min for resources (remember that NS3 dont update that value) : %f", min);
XBT_DEBUG("Looking for next trace event");
min = model_next_action_end;
}
- if (next_event_date == -1.0) break;
+ if (next_event_date == -1.0) {
+ XBT_DEBUG("no next TRACE event. Stop searching for it");
+ break;
+ }
if ((min != -1.0) && (next_event_date > NOW + min)) break;
}
} while (1);
- /* FIXME: Moved this test to here to avoid stoping simulation if there are actions running on cpus and all cpus are with availability = 0.
+ /* FIXME: Moved this test to here to avoid stopping simulation if there are actions running on cpus and all cpus are with availability = 0.
* This may cause an infinite loop if one cpu has a trace with periodicity = 0 and the other a trace with periodicity > 0.
* The options are: all traces with same periodicity(0 or >0) or we need to change the way how the events are managed */
- if (min < 0.0)
+ if (min < 0.0) {
+ XBT_DEBUG("No next event at all. Bail out now.");
return -1.0;
+ }
XBT_DEBUG("Duration set to %f", min);