src/smpi/smpi_topo.c

   1 #include "xbt/sysdep.h"
   2 #include "smpi/smpi.h"
   3 #include "private.h"
   4
   5 #include <math.h>
   6
   7 typedef struct s_smpi_mpi_topology {
   8   int nnodes;
   9   int ndims;
  10   int *dims;
  11   int *periodic;
  12   int *position;
  13 } s_smpi_mpi_topology_t;
  14
  15
  16
  17 void smpi_topo_destroy(MPI_Topology topo) {
  18   if (topo) {
  19     if(topo->dims) {
  20       free(topo->dims);
  21     }
  22     if(topo->periodic) {
  23       free(topo->periodic);
  24     }
  25     if(topo->position) {
  26       free(topo->position);
  27     }
  28     free(topo);
  29   }
  30 }
  31
  32 MPI_Topology smpi_topo_create(int ndims) {
  33   MPI_Topology topo = xbt_malloc(sizeof(*topo));
  34   topo->nnodes = 0;
  35   topo->ndims = ndims;
  36   topo->dims = xbt_malloc(ndims * sizeof(*topo->dims));
  37   topo->periodic = xbt_malloc(ndims * sizeof(*topo->periodic));
  38   topo->position = xbt_malloc(ndims * sizeof(*topo->position));
  39   return topo;
  40 }
  41
  42 /* reorder is ignored, don't know what would be the consequences of a dumb
  43  * reordering but neither do I see the point of reordering*/
  44 int smpi_mpi_cart_create(MPI_Comm comm_old, int ndims, int dims[],
  45                      int periods[], int reorder, MPI_Comm *comm_cart) {
  46   int retval = MPI_SUCCESS;
  47   int i;
  48   MPI_Topology topo;
  49   MPI_Group newGroup, oldGroup;
  50   int rank, nranks,  newSize;
  51
  52
  53
  54   rank = smpi_comm_rank(comm_old);
  55
  56   // TODO : add somewhere : if topo != NULL, free...
  57   topo = smpi_topo_create(ndims);
  58   newSize = 1;
  59   if(ndims != 0) {
  60     for (i = 0 ; i < ndims ; i++) {
  61       newSize *= dims[i];
  62     }
  63     if(rank >= newSize) {
  64       *comm_cart = MPI_COMM_NULL;
  65       return retval;
  66     }
  67     oldGroup = smpi_comm_group(comm_old);
  68     newGroup = smpi_group_new(newSize);
  69     for (i = 0 ; i < newSize ; i++) {
  70       smpi_group_set_mapping(newGroup, smpi_group_index(oldGroup, i), i);
  71     }
  72
  73     topo->nnodes = newSize;
  74
  75     memcpy(topo->dims, dims, ndims * sizeof(*topo->dims));
  76     memcpy(topo->periodic, periods, ndims * sizeof(*topo->periodic));
  77
  78     //  code duplication... See smpi_mpi_cart_coords
  79     nranks = newSize;
  80     for (i=0; i<ndims; i++)
  81       {
  82         topo->dims[i]     = dims[i];
  83         topo->periodic[i] = periods[i];
  84         nranks = nranks / dims[i];
  85         /* FIXME: nranks could be zero (?) */
  86         topo->position[i] = rank / nranks;
  87         rank = rank % nranks;
  88       }
  89
  90     *comm_cart = smpi_comm_new(newGroup, topo);
  91   }
  92   else {
  93     if (rank == 0) {
  94       *comm_cart = smpi_comm_new(smpi_comm_group(MPI_COMM_SELF), topo);
  95     }
  96     else {
  97       *comm_cart = MPI_COMM_NULL;
  98     }
  99   }
 100   return retval;
 101 }
 102
 103 int smpi_mpi_cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *newcomm) {
 104   MPI_Topology oldTopo = smpi_comm_topo(comm);
 105   int oldNDims = oldTopo->ndims;
 106   int i, j = 0, newNDims, *newDims = NULL, *newPeriodic = NULL;
 107
 108   if (remain_dims == NULL && oldNDims != 0) {
 109     return MPI_ERR_ARG;
 110   }
 111   newNDims = 0;
 112   for (i = 0 ; i < oldNDims ; i++) {
 113     if (remain_dims[i]) newNDims++;
 114   }
 115
 116   if (newNDims > 0) {
 117     newDims = malloc(newNDims * sizeof(*newDims));
 118     newPeriodic = malloc(newNDims * sizeof(*newPeriodic));
 119
 120     // that should not segfault
 121     for (i = 0 ; j < newNDims ; i++) {
 122       if(remain_dims[i]) {
 123         newDims[j] = oldTopo->dims[i];
 124         newPeriodic[j] = oldTopo->periodic[i];
 125         j++;
 126       }
 127     }
 128   }
 129   return smpi_mpi_cart_create(comm, newNDims, newDims, newPeriodic, 0, newcomm);
 130 }
 131
 132
 133
 134
 135 int smpi_mpi_cart_coords(MPI_Comm comm, int rank, int maxdims,
 136                          int coords[]) {
 137   int nnodes;
 138   int i;
 139   MPI_Topology topo = smpi_comm_topo(comm);
 140
 141   nnodes = topo->nnodes;
 142   for ( i=0; i < topo->ndims; i++ ) {
 143     nnodes    = nnodes / topo->dims[i];
 144     coords[i] = rank / nnodes;
 145     rank      = rank % nnodes;
 146   }
 147   return MPI_SUCCESS;
 148 }
 149
 150 int smpi_mpi_cart_get(MPI_Comm comm, int maxdims, int* dims, int* periods, int* coords) {
 151   MPI_Topology topo = smpi_comm_topo(comm);
 152   int i;
 153   for(i = 0 ; i < maxdims ; i++) {
 154     dims[i] = topo->dims[i];
 155     periods[i] = topo->periodic[i];
 156     coords[i] = topo->position[i];
 157   }
 158   return MPI_SUCCESS;
 159 }
 160
 161 int smpi_mpi_cart_rank(MPI_Comm comm, int* coords, int* rank) {
 162   MPI_Topology topo = smpi_comm_topo(comm);
 163   int ndims = topo->ndims;
 164   int multiplier, coord,i;
 165   *rank = 0;
 166   multiplier = 1;
 167
 168
 169
 170   for ( i=ndims-1; i >=0; i-- ) {
 171     coord = coords[i];
 172
 173     /* Should we check first for args correction, then process,
 174      * or check while we work (as it is currently done) ? */
 175       if (coord >= topo->dims[i]) {
 176         if ( topo->periodic[i] ) {
 177         coord = coord % topo->dims[i];
 178         }
 179         else {
 180           // Should I do that ?
 181           *rank = -1;
 182           return MPI_ERR_ARG;
 183         }
 184       }
 185       else if (coord <  0) {
 186         if(topo->periodic[i]) {
 187           coord = coord % topo->dims[i];
 188           if (coord) coord = topo->dims[i] + coord;
 189         }
 190         else {
 191           *rank = -1;
 192           return MPI_ERR_ARG;
 193         }
 194       }
 195
 196       *rank += multiplier * coord;
 197       multiplier *= topo->dims[i];
 198   }
 199   return MPI_SUCCESS;
 200 }
 201
 202 int smpi_mpi_cart_shift(MPI_Comm comm, int direction, int disp,
 203                         int *rank_source, int *rank_dest) {
 204   MPI_Topology topo = smpi_comm_topo(comm);
 205   int position[topo->ndims];
 206
 207
 208   if(topo->ndims == 0) {
 209     return MPI_ERR_ARG;
 210   }
 211   if (topo->ndims < direction) {
 212     return MPI_ERR_DIMS;
 213   }
 214
 215   smpi_mpi_cart_coords(comm, smpi_comm_rank(comm), topo->ndims, position);
 216   position[direction] += disp;
 217
 218   if(position[direction] < 0 || position[direction] >= topo->dims[direction]) {
 219     if(topo->periodic[direction]) {
 220       position[direction] %= topo->dims[direction];
 221       smpi_mpi_cart_rank(comm, position, rank_dest);
 222     }
 223     else {
 224       *rank_dest = MPI_PROC_NULL;
 225     }
 226   }
 227   else {
 228     smpi_mpi_cart_rank(comm, position, rank_dest);
 229   }
 230
 231   position[direction] =  topo->position[direction] - disp;
 232   if(position[direction] < 0 || position[direction] >= topo->dims[direction]) {
 233     if(topo->periodic[direction]) {
 234       position[direction] %= topo->dims[direction];
 235       smpi_mpi_cart_rank(comm, position, rank_source);
 236     }
 237     else {
 238       *rank_source = MPI_PROC_NULL;
 239     }
 240   }
 241   else {
 242     smpi_mpi_cart_rank(comm, position, rank_source);
 243   }
 244
 245   return MPI_SUCCESS;
 246 }
 247
 248 int smpi_mpi_cartdim_get(MPI_Comm comm, int *ndims) {
 249   MPI_Topology topo = smpi_comm_topo(comm);
 250
 251   *ndims = topo->ndims;
 252   return MPI_SUCCESS;
 253 }
 254
 255
 256
 257 // Everything below has been taken from ompi, but could be easily rewritten.
 258
 259 /*
 260  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 261  *                         University Research and Technology
 262  *                         Corporation.  All rights reserved.
 263  * Copyright (c) 2004-2005 The University of Tennessee and The University
 264  *                         of Tennessee Research Foundation.  All rights
 265  *                         reserved.
 266  * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart,
 267  *                         University of Stuttgart.  All rights reserved.
 268  * Copyright (c) 2004-2005 The Regents of the University of California.
 269  *                         All rights reserved.
 270  * Copyright (c) 2012      Los Alamos National Security, LLC.  All rights
 271  *                         reserved.
 272  * Copyright (c) 2014      Intel, Inc. All rights reserved
 273  * $COPYRIGHT$
 274  *
 275  * Additional copyrights may follow
 276  *
 277  * $HEADER$
 278  */
 279
 280
 281 /* static functions */
 282 static int assignnodes(int ndim, int nfactor, int *pfacts,int **pdims);
 283 static int getfactors(int num, int *nfators, int **factors);
 284
 285 /*
 286  * This is a utility function, no need to have anything in the lower
 287  * layer for this at all
 288  */
 289 int smpi_mpi_dims_create(int nnodes, int ndims, int dims[])
 290 {
 291     int i;
 292     int freeprocs;
 293     int freedims;
 294     int nfactors;
 295     int *factors;
 296     int *procs;
 297     int *p;
 298     int err;
 299
 300     /* Get # of free-to-be-assigned processes and # of free dimensions */
 301     freeprocs = nnodes;
 302     freedims = 0;
 303     for (i = 0, p = dims; i < ndims; ++i,++p) {
 304         if (*p == 0) {
 305             ++freedims;
 306         } else if ((*p < 0) || ((nnodes % *p) != 0)) {
 307           return  MPI_ERR_DIMS;
 308
 309         } else {
 310             freeprocs /= *p;
 311         }
 312     }
 313
 314     if (freedims == 0) {
 315        if (freeprocs == 1) {
 316           return MPI_SUCCESS;
 317        }
 318        return MPI_ERR_DIMS;
 319     }
 320
 321     if (freeprocs == 1) {
 322         for (i = 0; i < ndims; ++i, ++dims) {
 323             if (*dims == 0) {
 324                *dims = 1;
 325             }
 326         }
 327         return MPI_SUCCESS;
 328     }
 329
 330     /* Factor the number of free processes */
 331     if (MPI_SUCCESS != (err = getfactors(freeprocs, &nfactors, &factors))) {
 332       return  err;
 333     }
 334
 335     /* Assign free processes to free dimensions */
 336     if (MPI_SUCCESS != (err = assignnodes(freedims, nfactors, factors, &procs))) {
 337       return err;
 338     }
 339
 340     /* Return assignment results */
 341     p = procs;
 342     for (i = 0; i < ndims; ++i, ++dims) {
 343         if (*dims == 0) {
 344            *dims = *p++;
 345         }
 346     }
 347
 348     free((char *) factors);
 349     free((char *) procs);
 350
 351     /* all done */
 352     return MPI_SUCCESS;
 353 }
 354
 355 /*
 356  *  assignnodes
 357  *
 358  *  Function:   - assign processes to dimensions
 359  *          - get "best-balanced" grid
 360  *          - greedy bin-packing algorithm used
 361  *          - sort dimensions in decreasing order
 362  *          - dimensions array dynamically allocated
 363  *  Accepts:    - # of dimensions
 364  *          - # of prime factors
 365  *          - array of prime factors
 366  *          - ptr to array of dimensions (returned value)
 367  *  Returns:    - 0 or ERROR
 368  */
 369 static int
 370 assignnodes(int ndim, int nfactor, int *pfacts, int **pdims)
 371 {
 372     int *bins;
 373     int i, j;
 374     int n;
 375     int f;
 376     int *p;
 377     int *pmin;
 378
 379     if (0 >= ndim) {
 380        return MPI_ERR_DIMS;
 381     }
 382
 383     /* Allocate and initialize the bins */
 384     bins = (int *) malloc((unsigned) ndim * sizeof(int));
 385     if (NULL == bins) {
 386        return MPI_ERR_NO_MEM;
 387     }
 388     *pdims = bins;
 389
 390     for (i = 0, p = bins; i < ndim; ++i, ++p) {
 391         *p = 1;
 392      }
 393
 394     /* Loop assigning factors from the highest to the lowest */
 395     for (j = nfactor - 1; j >= 0; --j) {
 396         f = pfacts[j];
 397         /* Assign a factor to the smallest bin */
 398         pmin = bins;
 399         for (i = 1, p = pmin + 1; i < ndim; ++i, ++p) {
 400             if (*p < *pmin) {
 401                 pmin = p;
 402             }
 403         }
 404         *pmin *= f;
 405      }
 406
 407      /* Sort dimensions in decreasing order (O(n^2) for now) */
 408      for (i = 0, pmin = bins; i < ndim - 1; ++i, ++pmin) {
 409          for (j = i + 1, p = pmin + 1; j < ndim; ++j, ++p) {
 410              if (*p > *pmin) {
 411                 n = *p;
 412                 *p = *pmin;
 413                 *pmin = n;
 414              }
 415          }
 416      }
 417
 418      return MPI_SUCCESS;
 419 }
 420
 421 /*
 422  *  getfactors
 423  *
 424  *  Function:   - factorize a number
 425  *  Accepts:    - number
 426  *          - # prime factors
 427  *          - array of prime factors
 428  *  Returns:    - MPI_SUCCESS or ERROR
 429  */
 430 static int
 431 getfactors(int num, int *nfactors, int **factors) {
 432     int size;
 433     int d;
 434     int i;
 435     int sqrtnum;
 436
 437     if(num  < 2) {
 438         (*nfactors) = 0;
 439         (*factors) = NULL;
 440         return MPI_SUCCESS;
 441     }
 442     /* Allocate the array of prime factors which cannot exceed log_2(num) entries */
 443     sqrtnum = ceil(sqrt(num));
 444     size = ceil(log(num) / log(2));
 445     *factors = (int *) malloc((unsigned) size * sizeof(int));
 446
 447     i = 0;
 448     /* determine all occurences of factor 2 */
 449     while((num % 2) == 0) {
 450         num /= 2;
 451         (*factors)[i++] = 2;
 452     }
 453     /* determine all occurences of uneven prime numbers up to sqrt(num) */
 454     d = 3;
 455     for(d = 3; (num > 1) && (d < sqrtnum); d += 2) {
 456         while((num % d) == 0) {
 457             num /= d;
 458             (*factors)[i++] = d;
 459         }
 460     }
 461     /* as we looped only up to sqrt(num) one factor > sqrt(num) may be left over */
 462     if(num != 1) {
 463         (*factors)[i++] = num;
 464     }
 465     (*nfactors) = i;
 466     return MPI_SUCCESS;
 467 }
 468