src/smpi/smpi_topo.c

   1 #include "xbt/sysdep.h"
   2 #include "smpi/smpi.h"
   3 #include "private.h"
   4
   5 #include <math.h>
   6
   7 typedef struct s_smpi_mpi_topology {
   8   int nnodes;
   9   int ndims;
  10   int *dims;
  11   int *periodic;
  12   int *position;
  13 } s_smpi_mpi_topology_t;
  14
  15
  16
  17 void smpi_topo_destroy(MPI_Topology topo) {
  18   if (topo) {
  19     if(topo->dims) {
  20       free(topo->dims);
  21     }
  22     if(topo->periodic) {
  23       free(topo->periodic);
  24     }
  25     if(topo->position) {
  26       free(topo->position);
  27     }
  28     free(topo);
  29   }
  30 }
  31
  32 MPI_Topology smpi_topo_create(int ndims) {
  33   MPI_Topology topo = xbt_malloc(sizeof(*topo));
  34   topo->nnodes = 0;
  35   topo->ndims = ndims;
  36   topo->dims = xbt_malloc(ndims * sizeof(*topo->dims));
  37   topo->periodic = xbt_malloc(ndims * sizeof(*topo->periodic));
  38   topo->position = xbt_malloc(ndims * sizeof(*topo->position));
  39   return topo;
  40 }
  41
  42 /* reorder is ignored, don't know what would be the consequences of a dumb
  43  * reordering but neither do I see the point of reordering*/
  44 int smpi_mpi_cart_create(MPI_Comm comm_old, int ndims, int dims[],
  45                      int periods[], int reorder, MPI_Comm *comm_cart) {
  46   int retval = MPI_SUCCESS;
  47   int i;
  48   MPI_Topology topo;
  49   MPI_Group newGroup, oldGroup;
  50   int rank, nranks,  newSize;
  51
  52
  53
  54   rank = smpi_comm_rank(comm_old);
  55
  56
  57
  58   newSize = 1;
  59   if(ndims != 0) {
  60     topo = smpi_topo_create(ndims);
  61     for (i = 0 ; i < ndims ; i++) {
  62       newSize *= dims[i];
  63     }
  64     if(rank >= newSize) {
  65       *comm_cart = MPI_COMM_NULL;
  66       return retval;
  67     }
  68     oldGroup = smpi_comm_group(comm_old);
  69     newGroup = smpi_group_new(newSize);
  70     for (i = 0 ; i < newSize ; i++) {
  71       smpi_group_set_mapping(newGroup, smpi_group_index(oldGroup, i), i);
  72     }
  73
  74     topo->nnodes = newSize;
  75
  76     memcpy(topo->dims, dims, ndims * sizeof(*topo->dims));
  77     memcpy(topo->periodic, periods, ndims * sizeof(*topo->periodic));
  78
  79     //  code duplication... See smpi_mpi_cart_coords
  80     nranks = newSize;
  81     for (i=0; i<ndims; i++)
  82       {
  83         topo->dims[i]     = dims[i];
  84         topo->periodic[i] = periods[i];
  85         nranks = nranks / dims[i];
  86         /* FIXME: nranks could be zero (?) */
  87         topo->position[i] = rank / nranks;
  88         rank = rank % nranks;
  89       }
  90
  91     *comm_cart = smpi_comm_new(newGroup, topo);
  92   }
  93   else {
  94     if (rank == 0) {
  95       topo = smpi_topo_create(ndims);
  96       *comm_cart = smpi_comm_new(smpi_comm_group(MPI_COMM_SELF), topo);
  97     }
  98     else {
  99       *comm_cart = MPI_COMM_NULL;
 100     }
 101   }
 102   return retval;
 103 }
 104
 105 int smpi_mpi_cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *newcomm) {
 106   MPI_Topology oldTopo = smpi_comm_topo(comm);
 107   int oldNDims = oldTopo->ndims;
 108   int i, j = 0, newNDims, *newDims = NULL, *newPeriodic = NULL;
 109
 110   if (remain_dims == NULL && oldNDims != 0) {
 111     return MPI_ERR_ARG;
 112   }
 113   newNDims = 0;
 114   for (i = 0 ; i < oldNDims ; i++) {
 115     if (remain_dims[i]) newNDims++;
 116   }
 117
 118   if (newNDims > 0) {
 119     newDims = malloc(newNDims * sizeof(*newDims));
 120     newPeriodic = malloc(newNDims * sizeof(*newPeriodic));
 121
 122     // that should not segfault
 123     for (i = 0 ; j < newNDims ; i++) {
 124       if(remain_dims[i]) {
 125         newDims[j] = oldTopo->dims[i];
 126         newPeriodic[j] = oldTopo->periodic[i];
 127         j++;
 128       }
 129     }
 130   }
 131   return smpi_mpi_cart_create(comm, newNDims, newDims, newPeriodic, 0, newcomm);
 132 }
 133
 134
 135
 136
 137 int smpi_mpi_cart_coords(MPI_Comm comm, int rank, int maxdims,
 138                          int coords[]) {
 139   int nnodes;
 140   int i;
 141   MPI_Topology topo = smpi_comm_topo(comm);
 142
 143   nnodes = topo->nnodes;
 144   for ( i=0; i < topo->ndims; i++ ) {
 145     nnodes    = nnodes / topo->dims[i];
 146     coords[i] = rank / nnodes;
 147     rank      = rank % nnodes;
 148   }
 149   return MPI_SUCCESS;
 150 }
 151
 152 int smpi_mpi_cart_get(MPI_Comm comm, int maxdims, int* dims, int* periods, int* coords) {
 153   MPI_Topology topo = smpi_comm_topo(comm);
 154   int i;
 155   for(i = 0 ; i < maxdims ; i++) {
 156     dims[i] = topo->dims[i];
 157     periods[i] = topo->periodic[i];
 158     coords[i] = topo->position[i];
 159   }
 160   return MPI_SUCCESS;
 161 }
 162
 163 int smpi_mpi_cart_rank(MPI_Comm comm, int* coords, int* rank) {
 164   MPI_Topology topo = smpi_comm_topo(comm);
 165   int ndims = topo->ndims;
 166   int multiplier, coord,i;
 167   *rank = 0;
 168   multiplier = 1;
 169
 170
 171
 172   for ( i=ndims-1; i >=0; i-- ) {
 173     coord = coords[i];
 174
 175     /* Should we check first for args correction, then process,
 176      * or check while we work (as it is currently done) ? */
 177       if (coord >= topo->dims[i]) {
 178         if ( topo->periodic[i] ) {
 179         coord = coord % topo->dims[i];
 180         }
 181         else {
 182           // Should I do that ?
 183           *rank = -1;
 184           return MPI_ERR_ARG;
 185         }
 186       }
 187       else if (coord <  0) {
 188         if(topo->periodic[i]) {
 189           coord = coord % topo->dims[i];
 190           if (coord) coord = topo->dims[i] + coord;
 191         }
 192         else {
 193           *rank = -1;
 194           return MPI_ERR_ARG;
 195         }
 196       }
 197
 198       *rank += multiplier * coord;
 199       multiplier *= topo->dims[i];
 200   }
 201   return MPI_SUCCESS;
 202 }
 203
 204 int smpi_mpi_cart_shift(MPI_Comm comm, int direction, int disp,
 205                         int *rank_source, int *rank_dest) {
 206   MPI_Topology topo = smpi_comm_topo(comm);
 207   int position[topo->ndims];
 208
 209
 210   if(topo->ndims == 0) {
 211     return MPI_ERR_ARG;
 212   }
 213   if (topo->ndims < direction) {
 214     return MPI_ERR_DIMS;
 215   }
 216
 217   smpi_mpi_cart_coords(comm, smpi_comm_rank(comm), topo->ndims, position);
 218   position[direction] += disp;
 219
 220   if(position[direction] < 0 || position[direction] >= topo->dims[direction]) {
 221     if(topo->periodic[direction]) {
 222       position[direction] %= topo->dims[direction];
 223       smpi_mpi_cart_rank(comm, position, rank_dest);
 224     }
 225     else {
 226       *rank_dest = MPI_PROC_NULL;
 227     }
 228   }
 229   else {
 230     smpi_mpi_cart_rank(comm, position, rank_dest);
 231   }
 232
 233   position[direction] =  topo->position[direction] - disp;
 234   if(position[direction] < 0 || position[direction] >= topo->dims[direction]) {
 235     if(topo->periodic[direction]) {
 236       position[direction] %= topo->dims[direction];
 237       smpi_mpi_cart_rank(comm, position, rank_source);
 238     }
 239     else {
 240       *rank_source = MPI_PROC_NULL;
 241     }
 242   }
 243   else {
 244     smpi_mpi_cart_rank(comm, position, rank_source);
 245   }
 246
 247   return MPI_SUCCESS;
 248 }
 249
 250 int smpi_mpi_cartdim_get(MPI_Comm comm, int *ndims) {
 251   MPI_Topology topo = smpi_comm_topo(comm);
 252
 253   *ndims = topo->ndims;
 254   return MPI_SUCCESS;
 255 }
 256
 257
 258
 259 // Everything below has been taken from ompi, but could be easily rewritten.
 260
 261 /*
 262  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 263  *                         University Research and Technology
 264  *                         Corporation.  All rights reserved.
 265  * Copyright (c) 2004-2005 The University of Tennessee and The University
 266  *                         of Tennessee Research Foundation.  All rights
 267  *                         reserved.
 268  * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart,
 269  *                         University of Stuttgart.  All rights reserved.
 270  * Copyright (c) 2004-2005 The Regents of the University of California.
 271  *                         All rights reserved.
 272  * Copyright (c) 2012      Los Alamos National Security, LLC.  All rights
 273  *                         reserved.
 274  * Copyright (c) 2014      Intel, Inc. All rights reserved
 275  * $COPYRIGHT$
 276  *
 277  * Additional copyrights may follow
 278  *
 279  * $HEADER$
 280  */
 281
 282
 283 /* static functions */
 284 static int assignnodes(int ndim, int nfactor, int *pfacts,int **pdims);
 285 static int getfactors(int num, int *nfators, int **factors);
 286
 287 /*
 288  * This is a utility function, no need to have anything in the lower
 289  * layer for this at all
 290  */
 291 int smpi_mpi_dims_create(int nnodes, int ndims, int dims[])
 292 {
 293     int i;
 294     int freeprocs;
 295     int freedims;
 296     int nfactors;
 297     int *factors;
 298     int *procs;
 299     int *p;
 300     int err;
 301
 302     /* Get # of free-to-be-assigned processes and # of free dimensions */
 303     freeprocs = nnodes;
 304     freedims = 0;
 305     for (i = 0, p = dims; i < ndims; ++i,++p) {
 306         if (*p == 0) {
 307             ++freedims;
 308         } else if ((*p < 0) || ((nnodes % *p) != 0)) {
 309           return  MPI_ERR_DIMS;
 310
 311         } else {
 312             freeprocs /= *p;
 313         }
 314     }
 315
 316     if (freedims == 0) {
 317        if (freeprocs == 1) {
 318           return MPI_SUCCESS;
 319        }
 320        return MPI_ERR_DIMS;
 321     }
 322
 323     if (freeprocs == 1) {
 324         for (i = 0; i < ndims; ++i, ++dims) {
 325             if (*dims == 0) {
 326                *dims = 1;
 327             }
 328         }
 329         return MPI_SUCCESS;
 330     }
 331
 332     /* Factor the number of free processes */
 333     if (MPI_SUCCESS != (err = getfactors(freeprocs, &nfactors, &factors))) {
 334       return  err;
 335     }
 336
 337     /* Assign free processes to free dimensions */
 338     if (MPI_SUCCESS != (err = assignnodes(freedims, nfactors, factors, &procs))) {
 339       return err;
 340     }
 341
 342     /* Return assignment results */
 343     p = procs;
 344     for (i = 0; i < ndims; ++i, ++dims) {
 345         if (*dims == 0) {
 346            *dims = *p++;
 347         }
 348     }
 349
 350     free((char *) factors);
 351     free((char *) procs);
 352
 353     /* all done */
 354     return MPI_SUCCESS;
 355 }
 356
 357 /*
 358  *  assignnodes
 359  *
 360  *  Function:   - assign processes to dimensions
 361  *          - get "best-balanced" grid
 362  *          - greedy bin-packing algorithm used
 363  *          - sort dimensions in decreasing order
 364  *          - dimensions array dynamically allocated
 365  *  Accepts:    - # of dimensions
 366  *          - # of prime factors
 367  *          - array of prime factors
 368  *          - ptr to array of dimensions (returned value)
 369  *  Returns:    - 0 or ERROR
 370  */
 371 static int
 372 assignnodes(int ndim, int nfactor, int *pfacts, int **pdims)
 373 {
 374     int *bins;
 375     int i, j;
 376     int n;
 377     int f;
 378     int *p;
 379     int *pmin;
 380
 381     if (0 >= ndim) {
 382        return MPI_ERR_DIMS;
 383     }
 384
 385     /* Allocate and initialize the bins */
 386     bins = (int *) malloc((unsigned) ndim * sizeof(int));
 387     if (NULL == bins) {
 388        return MPI_ERR_NO_MEM;
 389     }
 390     *pdims = bins;
 391
 392     for (i = 0, p = bins; i < ndim; ++i, ++p) {
 393         *p = 1;
 394      }
 395
 396     /* Loop assigning factors from the highest to the lowest */
 397     for (j = nfactor - 1; j >= 0; --j) {
 398         f = pfacts[j];
 399         /* Assign a factor to the smallest bin */
 400         pmin = bins;
 401         for (i = 1, p = pmin + 1; i < ndim; ++i, ++p) {
 402             if (*p < *pmin) {
 403                 pmin = p;
 404             }
 405         }
 406         *pmin *= f;
 407      }
 408
 409      /* Sort dimensions in decreasing order (O(n^2) for now) */
 410      for (i = 0, pmin = bins; i < ndim - 1; ++i, ++pmin) {
 411          for (j = i + 1, p = pmin + 1; j < ndim; ++j, ++p) {
 412              if (*p > *pmin) {
 413                 n = *p;
 414                 *p = *pmin;
 415                 *pmin = n;
 416              }
 417          }
 418      }
 419
 420      return MPI_SUCCESS;
 421 }
 422
 423 /*
 424  *  getfactors
 425  *
 426  *  Function:   - factorize a number
 427  *  Accepts:    - number
 428  *          - # prime factors
 429  *          - array of prime factors
 430  *  Returns:    - MPI_SUCCESS or ERROR
 431  */
 432 static int
 433 getfactors(int num, int *nfactors, int **factors) {
 434     int size;
 435     int d;
 436     int i;
 437     int sqrtnum;
 438
 439     if(num  < 2) {
 440         (*nfactors) = 0;
 441         (*factors) = NULL;
 442         return MPI_SUCCESS;
 443     }
 444     /* Allocate the array of prime factors which cannot exceed log_2(num) entries */
 445     sqrtnum = ceil(sqrt(num));
 446     size = ceil(log(num) / log(2));
 447     *factors = (int *) malloc((unsigned) size * sizeof(int));
 448
 449     i = 0;
 450     /* determine all occurences of factor 2 */
 451     while((num % 2) == 0) {
 452         num /= 2;
 453         (*factors)[i++] = 2;
 454     }
 455     /* determine all occurences of uneven prime numbers up to sqrt(num) */
 456     for(d = 3; (num > 1) && (d < sqrtnum); d += 2) {
 457         while((num % d) == 0) {
 458             num /= d;
 459             (*factors)[i++] = d;
 460         }
 461     }
 462     /* as we looped only up to sqrt(num) one factor > sqrt(num) may be left over */
 463     if(num != 1) {
 464         (*factors)[i++] = num;
 465     }
 466     (*nfactors) = i;
 467     return MPI_SUCCESS;
 468 }
 469