Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
SMPI: Use the amount of processes to malloc the process list, not the amount of hosts
[simgrid.git] / src / smpi / smpi_base.c
index 6ebefe4..1d619ba 100644 (file)
@@ -1,6 +1,8 @@
 #include "private.h"
+#include "xbt/time.h"
 
-XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_base, smpi, "Logging specific to SMPI (base)");
+XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_base, smpi,
+                                "Logging specific to SMPI (base)");
 XBT_LOG_EXTERNAL_CATEGORY(smpi_base);
 XBT_LOG_EXTERNAL_CATEGORY(smpi_bench);
 XBT_LOG_EXTERNAL_CATEGORY(smpi_kernel);
@@ -11,250 +13,167 @@ XBT_LOG_EXTERNAL_CATEGORY(smpi_util);
 
 smpi_mpi_global_t smpi_mpi_global = NULL;
 
-void smpi_mpi_land_func(void *a, void *b, int *length, MPI_Datatype *datatype);
+void smpi_mpi_land_func(void *a, void *b, int *length,
+                        MPI_Datatype * datatype);
 
-void smpi_mpi_land_func(void *a, void *b, int *length, MPI_Datatype *datatype)
+void smpi_mpi_land_func(void *a, void *b, int *length,
+                        MPI_Datatype * datatype)
 {
-       int i;
-       if (*datatype == smpi_mpi_global->mpi_int) {
-               int *x = a, *y = b;
-               for (i = 0; i < *length; i++) {
-                       y[i] = x[i] && y[i];
-               }
-       }
+  int i;
+  if (*datatype == smpi_mpi_global->mpi_int) {
+    int *x = a, *y = b;
+    for (i = 0; i < *length; i++) {
+      y[i] = x[i] && y[i];
+    }
+  }
 }
 
-void smpi_mpi_sum_func(void *a, void *b, int *length, MPI_Datatype *datatype);
+void smpi_mpi_sum_func(void *a, void *b, int *length,
+                       MPI_Datatype * datatype);
 
-void smpi_mpi_sum_func(void *a, void *b, int *length, MPI_Datatype *datatype)
+void smpi_mpi_sum_func(void *a, void *b, int *length, MPI_Datatype * datatype)
 {
-       int i;
-       if (*datatype == smpi_mpi_global->mpi_int) {
-               int *x = a, *y = b;
-               for (i = 0; i < *length; i++) {
-                       y[i] = x[i] + y[i];
-               }
-       }
+  int i;
+  if (*datatype == smpi_mpi_global->mpi_int) {
+    int *x = a, *y = b;
+    for (i = 0; i < *length; i++) {
+      y[i] = x[i] + y[i];
+    }
+  }
 }
 
 int smpi_mpi_comm_rank(smpi_mpi_communicator_t comm)
 {
-       return comm->index_to_rank_map[smpi_host_index()];
+  return comm->index_to_rank_map[smpi_process_index()];
 }
 
-void smpi_mpi_init()
+void smpi_process_init(int *argc, char***argv)
 {
-       smx_host_t host;
-       smx_host_t *hosts;
-       int host_count;
-       int i;
-       smpi_host_data_t hdata;
-
-       SIMIX_mutex_lock(smpi_global->running_hosts_count_mutex);
-       smpi_global->running_hosts_count++;
-       SIMIX_mutex_unlock(smpi_global->running_hosts_count_mutex);
-
-       // initialize some local variables
-       host       = SIMIX_host_self();
-       hosts      = SIMIX_host_get_table();
-       host_count = SIMIX_host_get_number();
-
-       hdata      = xbt_new(s_smpi_host_data_t, 1);
-
-       for (i = 0; i < host_count && host != hosts[i]; i ++);
-
-       hdata->index = i;
-       hdata->mutex = SIMIX_mutex_init();
-       hdata->cond  = SIMIX_cond_init();
-
-       SIMIX_host_set_data(host, hdata);
-
-       // node 0 sets the globals
-       if (0 == i) {
-
-               smpi_global->hosts                              = hosts;
-               smpi_global->host_count                         = host_count;
-
-               smpi_mpi_global                                 = xbt_new(s_smpi_mpi_global_t, 1);
-
-               // global communicator
-               smpi_mpi_global->mpi_comm_world                 = xbt_new(s_smpi_mpi_communicator_t, 1);
-               smpi_mpi_global->mpi_comm_world->size           = host_count;
-               smpi_mpi_global->mpi_comm_world->barrier_count  = 0;
-               smpi_mpi_global->mpi_comm_world->barrier_mutex  = SIMIX_mutex_init();
-               smpi_mpi_global->mpi_comm_world->barrier_cond   = SIMIX_cond_init();
-               smpi_mpi_global->mpi_comm_world->rank_to_index_map = xbt_new(int, host_count);
-               smpi_mpi_global->mpi_comm_world->index_to_rank_map = xbt_new(int, host_count);
-               for (i = 0; i < host_count; i++) {
-                       smpi_mpi_global->mpi_comm_world->rank_to_index_map[i] = i;
-                       smpi_mpi_global->mpi_comm_world->index_to_rank_map[i] = i;
-               }
-
-               // mpi datatypes
-               smpi_mpi_global->mpi_byte                       = xbt_new(s_smpi_mpi_datatype_t, 1);
-               smpi_mpi_global->mpi_byte->size                 = (size_t)1;
-               smpi_mpi_global->mpi_int                        = xbt_new(s_smpi_mpi_datatype_t, 1);
-               smpi_mpi_global->mpi_int->size                  = sizeof(int);
-               smpi_mpi_global->mpi_double                     = xbt_new(s_smpi_mpi_datatype_t, 1);
-               smpi_mpi_global->mpi_double->size               = sizeof(double);
-
-               // mpi operations
-               smpi_mpi_global->mpi_land                       = xbt_new(s_smpi_mpi_op_t, 1);
-               smpi_mpi_global->mpi_land->func                 = smpi_mpi_land_func;
-               smpi_mpi_global->mpi_sum                        = xbt_new(s_smpi_mpi_op_t, 1);
-               smpi_mpi_global->mpi_sum->func                  = smpi_mpi_sum_func;
-
-               // signal all nodes to perform initialization
-               SIMIX_mutex_lock(smpi_global->start_stop_mutex);
-               smpi_global->root_ready = 1;
-               SIMIX_cond_broadcast(smpi_global->start_stop_cond);
-               SIMIX_mutex_unlock(smpi_global->start_stop_mutex);
-
-       } else {
-
-               // make sure root is done before own initialization
-               SIMIX_mutex_lock(smpi_global->start_stop_mutex);
-               while (!smpi_global->root_ready) {
-                       SIMIX_cond_wait(smpi_global->start_stop_cond, smpi_global->start_stop_mutex);
-               }
-               SIMIX_mutex_unlock(smpi_global->start_stop_mutex);
-
-       }
-
-       // wait for all nodes to signal initializatin complete
-       SIMIX_mutex_lock(smpi_global->start_stop_mutex);
-       smpi_global->ready_process_count++;
-       if (smpi_global->ready_process_count >= 3 * host_count) {
-               SIMIX_cond_broadcast(smpi_global->start_stop_cond);
-       }
-       while (smpi_global->ready_process_count < 3 * host_count) {
-               SIMIX_cond_wait(smpi_global->start_stop_cond, smpi_global->start_stop_mutex);
-       }
-       SIMIX_mutex_unlock(smpi_global->start_stop_mutex);
-
-       return;
+  smpi_process_data_t pdata;
+
+  // initialize some local variables
+
+  pdata = xbt_new(s_smpi_process_data_t, 1);
+  SIMIX_process_set_data(SIMIX_process_self(),pdata);
+
+  /* get rank from command line, and remove it from argv */
+  pdata->index = atoi( (*argv)[1] );
+  DEBUG1("I'm rank %d",pdata->index);
+  if (*argc>2) {
+         memmove((*argv)[1],(*argv)[2], sizeof(char*)* (*argc-2));
+         (*argv)[ (*argc)-1] = NULL;
+  }
+  (*argc)--;
+
+  pdata->mutex = SIMIX_mutex_init();
+  pdata->cond = SIMIX_cond_init();
+  pdata->finalize = 0;
+
+  pdata->pending_recv_request_queue = xbt_fifo_new();
+  pdata->pending_send_request_queue = xbt_fifo_new();
+  pdata->received_message_queue = xbt_fifo_new();
+
+  pdata->main = SIMIX_process_self();
+  pdata->sender = SIMIX_process_create("smpi_sender",
+          smpi_sender, pdata,
+          SIMIX_host_get_name(SIMIX_host_self()), 0, NULL,
+          /*props */ NULL);
+  pdata->receiver = SIMIX_process_create("smpi_receiver",
+          smpi_receiver, pdata,
+          SIMIX_host_get_name(SIMIX_host_self()), 0, NULL,
+          /*props */ NULL);
+
+  smpi_global->main_processes[pdata->index] = SIMIX_process_self();
+  return;
 }
 
-void smpi_mpi_finalize()
+void smpi_process_finalize()
 {
-       int i;
-
-       SIMIX_mutex_lock(smpi_global->running_hosts_count_mutex);
-       i = --smpi_global->running_hosts_count;
-       SIMIX_mutex_unlock(smpi_global->running_hosts_count_mutex);
-
-       SIMIX_mutex_lock(smpi_global->start_stop_mutex);
-       smpi_global->ready_process_count--;
-       SIMIX_mutex_unlock(smpi_global->start_stop_mutex);
-
-       SIMIX_mutex_destroy(smpi_host_mutex());
-       SIMIX_cond_destroy(smpi_host_cond());
-
-       if (0 >= i) {
-
-               // wake up senders/receivers
-               for (i = 0; i < smpi_global->host_count; i++) {
-                       if (SIMIX_process_is_suspended(smpi_global->sender_processes[i])) {
-                               SIMIX_process_resume(smpi_global->sender_processes[i]);
-                       }
-                       if (SIMIX_process_is_suspended(smpi_global->receiver_processes[i])) {
-                               SIMIX_process_resume(smpi_global->receiver_processes[i]);
-                       }
-               }
-
-               SIMIX_mutex_destroy(smpi_mpi_global->mpi_comm_world->barrier_mutex);
-               SIMIX_cond_destroy(smpi_mpi_global->mpi_comm_world->barrier_cond);
-               xbt_free(smpi_mpi_global->mpi_comm_world);
-
-               xbt_free(smpi_mpi_global->mpi_byte);
-               xbt_free(smpi_mpi_global->mpi_int);
-               xbt_free(smpi_mpi_global->mpi_double);
-
-               xbt_free(smpi_mpi_global->mpi_land);
-               xbt_free(smpi_mpi_global->mpi_sum);
-
-               xbt_free(smpi_mpi_global);
-
-       }
-
+  smpi_process_data_t pdata =  SIMIX_process_get_data(SIMIX_process_self());
+
+  pdata->finalize = 2; /* Tell sender and receiver to quit */
+  SIMIX_process_resume(pdata->sender);
+  SIMIX_process_resume(pdata->receiver);
+  while (pdata->finalize>0) { /* wait until it's done */
+         SIMIX_cond_wait(pdata->cond,pdata->mutex);
+  }
+
+  SIMIX_mutex_destroy(pdata->mutex);
+  SIMIX_cond_destroy(pdata->cond);
+  xbt_fifo_free(pdata->pending_recv_request_queue);
+  xbt_fifo_free(pdata->pending_send_request_queue);
+  xbt_fifo_free(pdata->received_message_queue);
 }
 
 int smpi_mpi_barrier(smpi_mpi_communicator_t comm)
 {
 
-       SIMIX_mutex_lock(comm->barrier_mutex);
-       ++comm->barrier_count;
-       if (comm->barrier_count > comm->size) { // only happens on second barrier...
-               comm->barrier_count = 0;
-       } else if (comm->barrier_count == comm->size) {
-               SIMIX_cond_broadcast(comm->barrier_cond);
-       }
-       while (comm->barrier_count < comm->size) {
-               SIMIX_cond_wait(comm->barrier_cond, comm->barrier_mutex);
-       }
-       SIMIX_mutex_unlock(comm->barrier_mutex);
-
-       return MPI_SUCCESS;
+  SIMIX_mutex_lock(comm->barrier_mutex);
+  ++comm->barrier_count;
+  if (comm->barrier_count > comm->size) {       // only happens on second barrier...
+    comm->barrier_count = 0;
+  } else if (comm->barrier_count == comm->size) {
+    SIMIX_cond_broadcast(comm->barrier_cond);
+  }
+  while (comm->barrier_count < comm->size) {
+    SIMIX_cond_wait(comm->barrier_cond, comm->barrier_mutex);
+  }
+  SIMIX_mutex_unlock(comm->barrier_mutex);
+
+  return MPI_SUCCESS;
 }
 
 int smpi_mpi_isend(smpi_mpi_request_t request)
 {
-       int retval = MPI_SUCCESS;
-       int index  = smpi_host_index();
-
-       if (NULL == request) {
-               retval = MPI_ERR_INTERN;
-       } else {
-               SIMIX_mutex_lock(smpi_global->pending_send_request_queues_mutexes[index]);
-               xbt_fifo_push(smpi_global->pending_send_request_queues[index], request);
-               SIMIX_mutex_unlock(smpi_global->pending_send_request_queues_mutexes[index]);
+       smpi_process_data_t pdata =  SIMIX_process_get_data(SIMIX_process_self());
+  int retval = MPI_SUCCESS;
 
-               if (SIMIX_process_is_suspended(smpi_global->sender_processes[index])) {
-                       SIMIX_process_resume(smpi_global->sender_processes[index]);
-               }
-       }
+  if (NULL == request) {
+    retval = MPI_ERR_INTERN;
+  } else {
+    xbt_fifo_push(pdata->pending_send_request_queue, request);
+    SIMIX_process_resume(pdata->sender);
+  }
 
-       return retval;
+  return retval;
 }
 
 int smpi_mpi_irecv(smpi_mpi_request_t request)
 {
-       int retval = MPI_SUCCESS;
-       int index = smpi_host_index();
+  int retval = MPI_SUCCESS;
+  smpi_process_data_t pdata =  SIMIX_process_get_data(SIMIX_process_self());
 
-       if (NULL == request) {
-               retval = MPI_ERR_INTERN;
-       } else {
-               SIMIX_mutex_lock(smpi_global->pending_recv_request_queues_mutexes[index]);
-               xbt_fifo_push(smpi_global->pending_recv_request_queues[index], request);
-               SIMIX_mutex_unlock(smpi_global->pending_recv_request_queues_mutexes[index]);
+  if (NULL == request) {
+    retval = MPI_ERR_INTERN;
+  } else {
+    xbt_fifo_push(pdata->pending_recv_request_queue, request);
 
-               if (SIMIX_process_is_suspended(smpi_global->receiver_processes[index])) {
-                       SIMIX_process_resume(smpi_global->receiver_processes[index]);
-               }
-       }
+    if (SIMIX_process_is_suspended(pdata->receiver)) {
+      SIMIX_process_resume(pdata->receiver);
+    }
+  }
 
-       return retval;
+  return retval;
 }
 
-int smpi_mpi_wait(smpi_mpi_request_t request, smpi_mpi_status_t *status)
+int smpi_mpi_wait(smpi_mpi_request_t request, smpi_mpi_status_t * status)
 {
-       int retval = MPI_SUCCESS;
-
-       if (NULL == request) {
-               retval = MPI_ERR_INTERN;
-       } else {
-               SIMIX_mutex_lock(request->mutex);
-               while (!request->completed) {
-                       SIMIX_cond_wait(request->cond, request->mutex);
-               }
-               if (NULL != status) {
-                       status->MPI_SOURCE = request->src;
-                       status->MPI_TAG    = request->tag;
-                       status->MPI_ERROR  = MPI_SUCCESS;
-               }
-               SIMIX_mutex_unlock(request->mutex);
-       }
-
-       return retval;
+  int retval = MPI_SUCCESS;
+
+  if (NULL == request) {
+    retval = MPI_ERR_INTERN;
+  } else {
+    SIMIX_mutex_lock(request->mutex);
+    while (!request->completed) {
+      SIMIX_cond_wait(request->cond, request->mutex);
+    }
+    if (NULL != status) {
+      status->MPI_SOURCE = request->src;
+      status->MPI_TAG = request->tag;
+      status->MPI_ERROR = MPI_SUCCESS;
+    }
+    SIMIX_mutex_unlock(request->mutex);
+  }
+
+  return retval;
 }