src/smpi/colls/allgather/allgather-rhv.cpp

   1 /* Copyright (c) 2013-2014. The SimGrid Team.
   2  * All rights reserved.                                                     */
   3
   4 /* This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the license (GNU LGPL) which comes with this package. */
   6
   7 #include "../colls_private.h"
   8
   9 namespace simgrid{
  10 namespace smpi{
  11
  12
  13 // now only work with power of two processes
  14
  15 int
  16 Coll_allgather_rhv::allgather(void *sbuf, int send_count,
  17                               MPI_Datatype send_type, void *rbuf,
  18                               int recv_count, MPI_Datatype recv_type,
  19                               MPI_Comm comm)
  20 {
  21   MPI_Status status;
  22   MPI_Aint s_extent, r_extent;
  23
  24   // local int variables
  25   int i, dst, send_base_offset, recv_base_offset, send_chunk, recv_chunk,
  26       send_offset, recv_offset;
  27   int tag = COLL_TAG_ALLGATHER;
  28   unsigned int mask;
  29   int curr_count;
  30
  31   // get size of the communicator, followed by rank
  32   unsigned int num_procs = comm->size();
  33
  34   if((num_procs&(num_procs-1)))
  35     THROWF(arg_error,0, "allgather rhv algorithm can't be used with non power of two number of processes ! ");
  36
  37   unsigned int rank = comm->rank();
  38
  39   // get size of single element's type for send buffer and recv buffer
  40   s_extent = send_type->get_extent();
  41   r_extent = recv_type->get_extent();
  42
  43   // multiply size of each element by number of elements to send or recv
  44   send_chunk = s_extent * send_count;
  45   recv_chunk = r_extent * recv_count;
  46
  47   if (send_chunk != recv_chunk) {
  48     XBT_WARN("MPI_allgather_rhv use default MPI_allgather.");
  49     Coll_allgather_default::allgather(sbuf, send_count, send_type, rbuf, recv_count,
  50                               recv_type, comm);
  51     return MPI_SUCCESS;
  52   }
  53
  54   // compute starting offset location to perform local copy
  55   int size = num_procs / 2;
  56   int base_offset = 0;
  57   mask = 1;
  58   while (mask < num_procs) {
  59     if (rank & mask) {
  60       base_offset += size;
  61     }
  62     mask <<= 1;
  63     size /= 2;
  64   }
  65
  66   //  printf("node %d base_offset %d\n",rank,base_offset);
  67
  68   //perform a remote copy
  69
  70   dst = base_offset;
  71   Request::sendrecv(sbuf, send_count, send_type, dst, tag,
  72                (char *)rbuf + base_offset * recv_chunk, recv_count, recv_type, dst, tag,
  73                comm, &status);
  74
  75
  76   mask >>= 1;
  77   i = 1;
  78   int phase = 0;
  79   curr_count = recv_count;
  80   while (mask >= 1) {
  81     // destination pair for both send and recv
  82     dst = rank ^ mask;
  83
  84     // compute offsets
  85     send_base_offset = base_offset;
  86     if (rank & mask) {
  87       recv_base_offset = base_offset - i;
  88       base_offset -= i;
  89     } else {
  90       recv_base_offset = base_offset + i;
  91     }
  92     send_offset = send_base_offset * recv_chunk;
  93     recv_offset = recv_base_offset * recv_chunk;
  94
  95     //  printf("node %d send to %d in phase %d s_offset = %d r_offset = %d count = %d\n",rank,dst,phase, send_base_offset, recv_base_offset, curr_count);
  96
  97     Request::sendrecv((char *)rbuf + send_offset, curr_count, recv_type, dst, tag,
  98                  (char *)rbuf + recv_offset, curr_count, recv_type, dst, tag,
  99                  comm, &status);
 100
 101
 102     curr_count *= 2;
 103     i *= 2;
 104     mask >>= 1;
 105     phase++;
 106   }
 107
 108   return MPI_SUCCESS;
 109 }
 110
 111
 112 }
 113 }