have smp-aware algorithms use number of cores on the node as basis for their computat...

[simgrid.git] / src / smpi / colls / allgather-loosely-lr.c
diff --git a/src/smpi/colls/allgather-loosely-lr.c b/src/smpi/colls/allgather-loosely-lr.c

index 2074e2f..242990f 100644 (file)
--- a/src/smpi/colls/allgather-loosely-lr.c
+++ b/src/smpi/colls/allgather-loosely-lr.c
@@ -1,4 +1,4 @@
-#include "colls.h"
+#include "colls_private.h"
  
  #ifndef NUM_CORE
  #define NUM_CORE 4
@@ -10,16 +10,25 @@ int smpi_coll_tuned_allgather_loosely_lr(void *sbuf, int scount,
                                           MPI_Comm comm)
  {
    int comm_size, rank;
-  int tag = 50;
+  int tag = COLL_TAG_ALLGATHER;
    int i, j, send_offset, recv_offset;
    int intra_rank, inter_rank, inter_comm_size, intra_comm_size;
    int inter_dst, inter_src;
  
-  MPI_Comm_size(comm, &comm_size);
-  MPI_Comm_rank(comm, &rank);
+  comm_size = smpi_comm_size(comm);
+
+  int num_core = simcall_host_get_core(SIMIX_host_self());
+  // do we use the default one or the number of cores in the platform ?
+  // if the number of cores is one, the platform may be simulated with 1 node = 1 core
+  if (num_core == 1) num_core = NUM_CORE;
+
+  if(comm_size%num_core)
+    THROWF(arg_error,0, "allgather loosely lr algorithm can't be used with non multiple of NUM_CORE=%d number of processes ! ",num_core);
+
+  rank = smpi_comm_rank(comm);
    MPI_Aint rextent, sextent;
-  MPI_Type_extent(rtype, &rextent);
-  MPI_Type_extent(stype, &sextent);
+  rextent = smpi_datatype_get_extent(rtype);
+  sextent = smpi_datatype_get_extent(stype);
    MPI_Request inter_rrequest;
    MPI_Request rrequest_array[128];
    MPI_Request srequest_array[128];
@@ -32,16 +41,16 @@ int smpi_coll_tuned_allgather_loosely_lr(void *sbuf, int scount,
  
    MPI_Status status;
  
-  intra_rank = rank % NUM_CORE;
-  inter_rank = rank / NUM_CORE;
-  inter_comm_size = (comm_size + NUM_CORE - 1) / NUM_CORE;
-  intra_comm_size = NUM_CORE;
+  intra_rank = rank % num_core;
+  inter_rank = rank / num_core;
+  inter_comm_size = (comm_size + num_core - 1) / num_core;
+  intra_comm_size = num_core;
  
    int src_seg, dst_seg;
  
    //copy corresponding message from sbuf to rbuf
    recv_offset = rank * rextent * rcount;
-  MPI_Sendrecv(sbuf, scount, stype, rank, tag,
+  smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag,
                 (char *)rbuf + recv_offset, rcount, rtype, rank, tag, comm, &status);
  
    int dst, src;
@@ -74,11 +83,10 @@ int smpi_coll_tuned_allgather_loosely_lr(void *sbuf, int scount,
        if (intra_rank == j) {
          if (i != inter_comm_size - 1) {
  
-          MPI_Irecv((char *)rbuf + inter_recv_offset, rcount, rtype, inter_src, tag,
-                    comm, &inter_rrequest);
-          MPI_Isend((char *)rbuf + inter_send_offset, scount, stype, inter_dst, tag,
-                    comm, &inter_srequest_array[inter_srequest_count++]);
-
+          inter_rrequest = smpi_mpi_irecv((char *)rbuf + inter_recv_offset, rcount, rtype,
+                                         inter_src, tag, comm);
+          inter_srequest_array[inter_srequest_count++] = smpi_mpi_isend((char *)rbuf + inter_send_offset, scount, stype,
+                                                                       inter_dst, tag, comm);
          }
        }
        //intra_communication
@@ -98,25 +106,23 @@ int smpi_coll_tuned_allgather_loosely_lr(void *sbuf, int scount,
  
        if (j != intra_rank) {
  
-        MPI_Irecv((char *)rbuf + recv_offset, rcount, rtype, src, tag, comm,
-                  &rrequest_array[rrequest_count++]);
-        MPI_Isend((char *)rbuf + send_offset, scount, stype, dst, tag, comm,
-                  &srequest_array[srequest_count++]);
+        rrequest_array[rrequest_count++] = smpi_mpi_irecv((char *)rbuf + recv_offset, rcount, rtype, src, tag, comm);
+        srequest_array[srequest_count++] = smpi_mpi_isend((char *)rbuf + send_offset, scount, stype, dst, tag, comm);
  
        }
      }                           // intra loop
  
  
-    // wait for inter communication to finish for these rounds (# of round equals NUM_CORE)
+    // wait for inter communication to finish for these rounds (# of round equals num_core)
      if (i != inter_comm_size - 1) {
-      MPI_Wait(&inter_rrequest, &status);
+      smpi_mpi_wait(&inter_rrequest, &status);
      }
  
    }                             //inter loop
  
-  MPI_Waitall(rrequest_count, rrequest_array, MPI_STATUSES_IGNORE);
-  MPI_Waitall(srequest_count, srequest_array, MPI_STATUSES_IGNORE);
-  MPI_Waitall(inter_srequest_count, inter_srequest_array, MPI_STATUSES_IGNORE);
+  smpi_mpi_waitall(rrequest_count, rrequest_array, MPI_STATUSES_IGNORE);
+  smpi_mpi_waitall(srequest_count, srequest_array, MPI_STATUSES_IGNORE);
+  smpi_mpi_waitall(inter_srequest_count, inter_srequest_array, MPI_STATUSES_IGNORE);
  
    return MPI_SUCCESS;
  }