#include <signal.h>
#include <sys/time.h>
-#include "xbt/xbt_portability.h"
+#include "xbt/xbt_os_time.h"
#include "simix/simix.h"
#include "simix/private.h"
#include "smpi.h"
-xbt_fifo_t *smpi_pending_send_requests = NULL;
-xbt_fifo_t *smpi_pending_recv_requests = NULL;
-xbt_fifo_t *smpi_received_messages = NULL;
+// FIXME: move globals into structure...
+
+xbt_mallocator_t smpi_request_mallocator = NULL;
+xbt_mallocator_t smpi_message_mallocator = NULL;
+
+xbt_fifo_t *smpi_pending_send_requests = NULL;
+smx_mutex_t *smpi_pending_send_requests_mutex = NULL;
+
+xbt_fifo_t *smpi_pending_recv_requests = NULL;
+smx_mutex_t *smpi_pending_recv_requests_mutex = NULL;
+
+xbt_fifo_t *smpi_received_messages = NULL;
+smx_mutex_t *smpi_received_messages_mutex = NULL;
+
+smx_process_t *smpi_sender_processes = NULL;
+smx_process_t *smpi_receiver_processes = NULL;
int smpi_running_hosts = 0;
// mutexes
smx_mutex_t smpi_running_hosts_mutex = NULL;
+smx_mutex_t smpi_benchmarking_mutex = NULL;
smx_mutex_t init_mutex = NULL;
smx_cond_t init_cond = NULL;
-int rootready = 0;
-int readycount = 0;
+int smpi_root_ready = 0;
+int smpi_ready_count = 0;
XBT_LOG_NEW_DEFAULT_CATEGORY(smpi, "SMPI");
+int inline smpi_mpi_comm_size(smpi_mpi_communicator_t *comm)
+{
+ return comm->size;
+}
+
+// FIXME: smarter algorithm?
+int smpi_mpi_comm_rank(smpi_mpi_communicator_t *comm, smx_host_t host)
+{
+ int i;
+
+ for(i = comm->size - 1; i > 0 && host != comm->hosts[i]; i--);
+
+ return i;
+}
+
+int inline smpi_mpi_comm_rank_self(smpi_mpi_communicator_t *comm)
+{
+ return smpi_mpi_comm_rank(comm, SIMIX_host_self());
+}
+
+int inline smpi_mpi_comm_world_rank_self()
+{
+ return smpi_mpi_comm_rank(&smpi_mpi_comm_world, SIMIX_host_self())
+}
+
int smpi_sender(int argc, char **argv)
{
+ smx_process_t self;
+ smx_host_t shost;
+ int rank;
+ xbt_fifo_t request_queue;
+ smx_mutex_t request_queue_mutex;
+ int size;
+ int running_hosts = 0;
+ smpi_mpi_request_t *request;
+ smx_host_t dhost;
+ smx_action_t communicate_action;
+ smpi_received_message_t *scratch;
+ int drank;
+ smx_process_t waitproc;
+
+ self = SIMIX_process_self();
+ shost = SIMIX_host_self();
+ rank = smpi_mpi_comm_rank(&smpi_mpi_comm_world, shost);
+
+ // make sure root is done before own initialization
+ SIMIX_mutex_lock(init_mutex);
+ if (!smpi_root_ready) {
+ SIMIX_cond_wait(init_cond, init_mutex);
+ }
+ SIMIX_mutex_unlock(init_mutex);
+
+ request_queue = smpi_pending_send_requests[rank];
+ request_queue_mutex = smpi_pending_send_requests_mutex[rank];
+
+ size = smpi_mpi_comm_size(&smpi_mpi_comm_world);
+
+ smpi_sender_processes[rank] = self;
+
+ // wait for all nodes to signal initializatin complete
+ SIMIX_mutex_lock(init_mutex);
+ smpi_ready_count++;
+ if (smpi_ready_count < 3 * size) {
+ SIMIX_cond_wait(init_cond, init_mutex);
+ } else {
+ SIMIX_cond_broadcast(init_cond);
+ }
+ SIMIX_mutex_unlock(init_mutex);
+
+ SIMIX_mutex_lock(smpi_running_hosts_mutex);
+ running_hosts = smpi_running_hosts;
+ SIMIX_mutex_unlock(smpi_running_hosts_mutex);
+
+ while (0 < running_hosts) {
+
+ SIMIX_mutex_lock(request_queue_mutex);
+ request = xbt_fifo_shift(request_queue);
+ SIMIX_mutex_unlock(request_queue_mutex);
+
+ if (NULL == request) {
+ SIMIX_process_suspend(self);
+ } else {
+ SIMIX_mutex_lock(request->mutex);
+
+ dhost = request->comm->hosts[request->dst];
+
+ // FIXME: not at all sure I can assume magic just happens here....
+ communicate_action = SIMIX_action_communicate(shost, dhost,
+ "communication", request->datatype->size * request->count * 1.0, -1.0);
+
+ SIMIX_register_condition_to_action(communicate_action, request->cond);
+ SIMIX_register_action_to_condition(communicate_action, request->cond);
+
+ SIMIX_cond_wait(request->cond, request->mutex);
+
+ // copy request to appropriate received queue
+ scratch = xbt_mallocator_get(smpi_message_mallocator);
+ scratch->comm = request->comm;
+ scratch->src = request->src;
+ scratch->dst = request->dst;
+ scratch->tag = request->tag;
+ scratch->buf = request->buf;
+ drank = smpi_mpi_comm_rank(&smpi_mpi_comm_world, dhost);
+ SIMIX_mutex_lock(smpi_received_messages_mutex[drank]);
+ xbt_fifo_push(smpi_received_messages[drank], scratch);
+ SIMIX_mutex_unlock(smpi_received_messages_mutex[drank]);
+
+ request->completed = 1;
+
+ // wake up receiver, then any waiting sender
+ waitproc = smpi_receiver_processes[drank];
+
+ do {
+ if (SIMIX_process_is_suspended(waitproc)) {
+ SIMIX_process_resume(waitproc);
+ }
+ } while(waitproc = xbt_fifo_shift(request->waitlist));
+
+ SIMIX_mutex_unlock(request->mutex);
+ }
+
+ SIMIX_mutex_lock(smpi_running_hosts_mutex);
+ running_hosts = smpi_running_hosts;
+ SIMIX_mutex_unlock(smpi_running_hosts_mutex);
+ }
+
return 0;
}
int smpi_receiver(int argc, char **argv)
{
+ smx_process_t self;
+ int rank;
+ xbt_fifo_t request_queue;
+ smx_mutex_t request_queue_mutex;
+ xbt_fifo_t message_queue;
+ smx_mutex_t message_queue_mutex;
+ int size;
+ int running_hosts;
+ xbt_fifo_item_t request_item, message_item;
+ smpi_mpi_request_t *request;
+ smpi_received_message_t *message;
+ smx_process_t waitproc;
+
+ self = SIMIX_process_self();
+ rank = smpi_mpi_comm_world_rank_self();
+
+ // make sure root is done before own initialization
+ SIMIX_mutex_lock(init_mutex);
+ if (!smpi_root_ready) {
+ SIMIX_cond_wait(init_cond, init_mutex);
+ }
+ SIMIX_mutex_unlock(init_mutex);
+
+ request_queue = smpi_pending_recv_requests[rank];
+ request_queue_mutex = smpi_pending_recv_requests_mutex[rank];
+
+ message_queue = smpi_received_messages[rank];
+ message_queue_mutex = smpi_received_messages_mutex[rank];
+
+ size = smpi_mpi_comm_size(&smpi_mpi_comm_world);
+ smpi_receiver_processes[rank] = self;
+
+ // wait for all nodes to signal initializatin complete
+ SIMIX_mutex_lock(init_mutex);
+ smpi_ready_count++;
+ if (smpi_ready_count < 3 * size) {
+ SIMIX_cond_wait(init_cond, init_mutex);
+ } else {
+ SIMIX_cond_broadcast(init_cond);
+ }
+ SIMIX_mutex_unlock(init_mutex);
+
+ SIMIX_mutex_lock(smpi_running_hosts_mutex);
+ running_hosts = smpi_running_hosts;
+ SIMIX_mutex_unlock(smpi_running_hosts_mutex);
+
+ while (0 < running_hosts) {
+
+ // FIXME: better algorithm, maybe some kind of balanced tree? or a heap?
+
+ // FIXME: not the best way to request multiple locks...
+ SIMIX_mutex_lock(request_queue_mutex);
+ SIMIX_mutex_lock(message_queue_mutex);
+search: for (request_item = xbt_fifo_get_first_item(request_queue);
+ NULL != request_item;
+ request_item = xbt_fifo_get_next_item(request_item)) {
+ request = xbt_fifo_get_item_content(request_item);
+ for (message_item = xbt_fifo_get_first_item(message_queue);
+ NULL != message_item;
+ message_item = xbt_fifo_get_next_item(message_item)) {
+ message = xbt_fifo_get_item_content(message_item);
+ if (request->comm == message->comm &&
+ (MPI_ANY_SOURCE == request->src || request->src == message->src) &&
+ request->tag == message->tag) {
+ xbt_fifo_remove_item(request_queue, request_item);
+ xbt_fifo_remove_item(message_queue, message_item);
+ break search;
+ }
+ }
+ }
+ SIMIX_mutex_unlock(message_queue_mutex);
+ SIMIX_mutex_unlock(request_queue_mutex);
+
+ if (NULL == request || NULL == message) {
+ SIMIX_process_suspend(self);
+ } else {
+ SIMIX_mutex_lock(request->mutex);
+ memcpy(request->buf, message->buf, request->count * request->datatype->size);
+ request->src = message->src;
+ request->completed = 1;
+
+ while (waitproc = xbt_fifo_shift(request->waitlist)) {
+ if (SIMIX_process_is_suspended(waitproc)) {
+ SIMIX_process_resume(waitproc);
+ }
+ }
+ SIMIX_mutex_unlock(request->mutex);
+
+ xbt_mallocator_release(smpi_message_mallocator, message);
+ }
+
+ SIMIX_mutex_lock(smpi_running_hosts_mutex);
+ running_hosts = smpi_running_hosts;
+ SIMIX_mutex_unlock(smpi_running_hosts_mutex);
+ }
+
return 0;
}
init_cond = SIMIX_cond_init();
SIMIX_function_register("smpi_simulated_main", smpi_simulated_main);
+ SIMIX_function_register("smpi_sender", smpi_sender);
+ SIMIX_function_register("smpi_receiver", smpi_receiver);
SIMIX_create_environment(argv[1]);
SIMIX_launch_application(argv[2]);
*(int *)z = *(int *)x + *(int *)y;
}
-int smpi_mpi_rank(smpi_mpi_communicator_t *comm, smx_host_t host)
-{
- int i;
-
- for(i = comm->size - 1; i > 0 && host != comm->hosts[i]; i--);
-
- return i;
-}
-
-int inline smpi_mpi_rank_self(smpi_mpi_communicator_t *comm)
+smpi_mpi_request_t *smpi_new_request()
{
- return smpi_mpi_rank(comm, SIMIX_host_self());
+ return xbt_new(smpi_mpi_request_t, 1);
}
void smpi_mpi_init()
// node 0 sets the globals
if (host == hosts[0]) {
+ // processes
+ smpi_sender_processes = xbt_new(smx_process_t, size);
+ smpi_receiver_processes = xbt_new(smx_process_t, size);
+
// running hosts
smpi_running_hosts_mutex = SIMIX_mutex_init();
smpi_running_hosts = size;
smpi_mpi_comm_world.barrier_mutex = SIMIX_mutex_init();
smpi_mpi_comm_world.barrier_cond = SIMIX_cond_init();
smpi_mpi_comm_world.hosts = hosts;
- smpi_mpi_comm_world.processes = xbt_new0(smx_process_t, size);
+ smpi_mpi_comm_world.processes = xbt_new(smx_process_t, size);
smpi_mpi_comm_world.processes[0] = SIMIX_process_self();
// mpi datatypes
smpi_mpi_sum.func = &smpi_mpi_sum_func;
// smpi globals
- smpi_pending_send_requests = xbt_new0(xbt_fifo_t, size);
- smpi_pending_recv_requests = xbt_new0(xbt_fifo_t, size);
- smpi_received_messages = xbt_new0(xbt_fifo_t, size);
+ smpi_request_mallocator = xbt_mallocator_new(SMPI_REQUEST_MALLOCATOR_SIZE, smpi_new_request, xbt_free, NULL);
+ smpi_message_mallocator = xbt_mallocator_new(SMPI_MESSAGE_MALLOCATOR_SIZE, smpi_new_message, xbt_free, NULL);
+ smpi_pending_send_requests = xbt_new(xbt_fifo_t, size);
+ smpi_pending_send_requests_mutex = xbt_new(smx_mutex_t, size);
+ smpi_pending_recv_requests = xbt_new(xbt_fifo_t, size);
+ smpi_pending_recv_requests_mutex = xbt_new(smx_mutex_t, size);
+ smpi_received_messages = xbt_new(xbt_fifo_t, size);
+ smpi_received_messages_mutex = xbt_new(smx_mutex_t, size);
for(i = 0; i < size; i++) {
- smpi_pending_send_requests[i] = xbt_fifo_new();
- smpi_pending_recv_requests[i] = xbt_fifo_new();
- smpi_received_messages[i] = xbt_fifo_new();
+ smpi_pending_send_requests[i] = xbt_fifo_new();
+ smpi_pending_send_requests_mutex[i] = SIMIX_mutex_init();
+ smpi_pending_recv_requests[i] = xbt_fifo_new();
+ smpi_pending_recv_requests_mutex[i] = SIMIX_mutex_init();
+ smpi_received_messages[i] = xbt_fifo_new();
+ smpi_received_messages_mutex[i] = SIMIX_mutex_init();
}
smpi_timer = xbt_os_timer_new();
smpi_reference_speed = SMPI_DEFAULT_SPEED;
smpi_benchmarking = 0;
+ smpi_benchmarking_mutex = SIMIX_mutex_init();
// signal all nodes to perform initialization
SIMIX_mutex_lock(init_mutex);
- rootready = 1;
+ smpi_root_ready = 1;
SIMIX_cond_broadcast(init_cond);
SIMIX_mutex_unlock(init_mutex);
// make sure root is done before own initialization
SIMIX_mutex_lock(init_mutex);
- if (!rootready) {
+ if (!smpi_root_ready) {
SIMIX_cond_wait(init_cond, init_mutex);
}
SIMIX_mutex_unlock(init_mutex);
- smpi_mpi_comm_world.processes[smpi_mpi_rank_self(&smpi_mpi_comm_world)] = SIMIX_process_self();
+ smpi_mpi_comm_world.processes[smpi_mpi_comm_rank_self(&smpi_mpi_comm_world)] = SIMIX_process_self();
}
// wait for all nodes to signal initializatin complete
SIMIX_mutex_lock(init_mutex);
- readycount++;
- if (readycount < size) {
+ smpi_ready_count++;
+ if (smpi_ready_count < 3 * size) {
SIMIX_cond_wait(init_cond, init_mutex);
} else {
SIMIX_cond_broadcast(init_cond);
for (i = 0 ; i < smpi_mpi_comm_world.size; i++) {
xbt_fifo_free(smpi_pending_send_requests[i]);
+ SIMIX_mutex_destroy(smpi_pending_send_requests_mutex[i]);
xbt_fifo_free(smpi_pending_recv_requests[i]);
+ SIMIX_mutex_destroy(smpi_pending_recv_requests_mutex[i]);
xbt_fifo_free(smpi_received_messages[i]);
+ SIMIX_mutex_destroy(smpi_received_messages_mutex[i]);
}
+ xbt_mallocator_free(smpi_request_mallocator);
+ xbt_mallocator_free(smpi_message_mallocator);
xbt_free(smpi_pending_send_requests);
+ xbt_free(smpi_pending_send_requests_mutex);
xbt_free(smpi_pending_recv_requests);
+ xbt_free(smpi_pending_recv_requests_mutex);
xbt_free(smpi_received_messages);
+ xbt_free(smpi_received_messages_mutex);
SIMIX_mutex_destroy(smpi_mpi_comm_world.barrier_mutex);
SIMIX_cond_destroy(smpi_mpi_comm_world.barrier_cond);
void smpi_bench_end()
{
double duration;
+ smx_host_t host;
+ smx_action_t compute_action;
+ smx_mutex_t mutex;
+ smx_cond_t cond;
+
xbt_assert0(smpi_benchmarking, "Not benchmarking yet");
smpi_benchmarking = 0;
xbt_os_timer_stop(smpi_timer);
duration = xbt_os_timer_elapsed(smpi_timer);
- // FIXME: add simix call to perform computation
+ host = SIMIX_host_self();
+ compute_action = SIMIX_action_execute(host, "computation", duration * SMPI_DEFAULT_SPEED);
+ mutex = SIMIX_mutex_init();
+ cond = SIMIX_cond_init();
+ SIMIX_mutex_lock(mutex);
+ SIMIX_register_condition_to_action(compute_action, cond);
+ SIMIX_register_action_to_condition(compute_action, cond);
+ SIMIX_cond_wait(cond, mutex);
+ SIMIX_mutex_unlock(mutex);
+ SIMIX_mutex_destroy(mutex);
+ SIMIX_cond_destroy(cond);
+ // FIXME: check for success/failure?
return;
}
return i;
}
+int smpi_create_request(void *buf, int count, smpi_mpi_datatype_t *datatype,
+ int src, int dst, int tag, smpi_mpi_communicator_t *comm, smpi_mpi_request_t **request)
+{
+ int retval = MPI_SUCCESS;
+
+ *request = NULL;
+
+ if (0 > count) {
+ retval = MPI_ERR_COUNT;
+ } else if (NULL == buf) {
+ retval = MPI_ERR_INTERN;
+ } else if (NULL == datatype) {
+ retval = MPI_ERR_TYPE;
+ } else if (NULL == comm) {
+ retval = MPI_ERR_COMM;
+ } else if (MPI_ANY_SOURCE != src && (0 > src || comm->size <= src)) {
+ retval = MPI_ERR_RANK;
+ } else if (0 > dst || comm->size <= dst) {
+ retval = MPI_ERR_RANK;
+ } else if (0 > tag) {
+ retval = MPI_ERR_TAG;
+ } else {
+ *request = xbt_mallocator_get(smpi_request_mallocator);
+ (*request)->comm = comm;
+ (*request)->src = src;
+ (*request)->dst = dst;
+ (*request)->tag = tag;
+ (*request)->buf = buf;
+ (*request)->count = count;
+ (*request)->datatype = datatype;
+ (*request)->completed = 0;
+ (*request)->mutex = SIMIX_mutex_init();
+ (*request)->cond = SIMIX_cond_init();
+ (*request)->waitlist = NULL;
+ }
+ return retval;
+}
+
+int smpi_isend(smpi_mpi_request_t *request)
+{
+ int rank = smpi_mpi_comm_rank_self(&smpi_mpi_comm_world);
+
+ SIMIX_mutex_lock(smpi_pending_send_requests_mutex[rank]);
+ xbt_fifo_push(smpi_pending_send_requests[rank], request);
+ SIMIX_mutex_unlock(smpi_pending_send_requests_mutex[rank]);
+
+ if (MSG_process_is_suspended(smpi_sender_processes[rank])) {
+ MSG_process_resume(smpi_sender_processes[rank]);
+ }
+}
+
+int smpi_irecv(smpi_mpi_request_t *request)
+{
+ int rank = smpi_mpi_comm_rank_self(&smpi_mpi_comm_world);
+
+ SIMIX_mutex_lock(smpi_pending_recv_requests_mutex[rank]);
+ xbt_fifo_push(smpi_pending_recv_requests[rank], request);
+ SIMIX_mutex_unlock(smpi_pending_recv_requests_mutex[rank]);
+
+ if (MSG_process_is_suspended(smpi_receiver_processes[rank])) {
+ MSG_process_resume(smpi_receiver_processes[rank]);
+ }
+}
+
+void smpi_wait(smpi_mpi_request_t *request, smpi_mpi_status_t *status)
+{
+ smx_process_t self;
+ int suspend = 0;
+ self = SIMIX_process_self();
+
+ if (NULL != request) {
+ SIMIX_mutex_lock(request->mutex);
+ if (!request->completed) {
+ xbt_fifo_push(request->waitlist, self);
+ suspend = 1;
+ }
+ SIMIX_mutex_unlock(request->mutex);
+ if (suspend) {
+ SIMIX_suspend(self);
+ }
+ if (NULL != status && MPI_STATUS_IGNORE != status) {
+ SIMIX_mutex_lock(request->mutex);
+ status->MPI_SOURCE = request->src;
+ SIMIX_mutex_unlock(request->mutex);
+ }
+ }
+}
+
// FIXME: move into own file
int smpi_gettimeofday(struct timeval *tv, struct timezone *tz)
{