src/smpi/smpi_topo.cpp

   1 /* Copyright (c) 2014. The SimGrid Team.
   2  * All rights reserved.                                                     */
   3
   4 /* This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the license (GNU LGPL) which comes with this package. */
   6
   7 #include "xbt/sysdep.h"
   8 #include "smpi/smpi.h"
   9 #include "private.h"
  10
  11 #include <math.h>
  12
  13 typedef struct s_smpi_mpi_cart_topology {
  14     int nnodes;
  15     int ndims;
  16     int *dims;
  17     int *periodic;
  18     int *position;
  19 } s_smpi_mpi_cart_topology_t;
  20
  21 typedef struct s_smpi_mpi_graph_topology {
  22     int nnodes;
  23     int nedges;
  24     int *index;
  25     int *edges;
  26 } s_smpi_mpi_graph_topology_t;
  27
  28 typedef struct s_smpi_dist_graph_topology {
  29     int indegree;
  30     int *in;
  31     int *in_weights;
  32     int outdegree;
  33     int *out;
  34     int *out_weights;
  35     int is_weighted;
  36 } s_smpi_mpi_dist_graph_topology_t;
  37
  38 typedef struct s_smpi_mpi_topology {
  39     MPIR_Topo_type kind;
  40     union topo {
  41         MPIR_Graph_Topology graph;
  42         MPIR_Cart_Topology  cart;
  43         MPIR_Dist_Graph_Topology dist_graph;
  44     } topo;
  45 } s_smpi_mpi_topology_t;
  46
  47 void smpi_topo_destroy(MPI_Topology topo) {
  48     if(topo == NULL) {
  49         return;
  50     }
  51     switch (topo->kind) {
  52     case MPI_GRAPH:
  53         // Not implemented
  54         // smpi_graph_topo_destroy(topo->topo.graph);
  55         break;
  56     case MPI_CART:
  57         smpi_cart_topo_destroy(topo->topo.cart);
  58         break;
  59     case MPI_DIST_GRAPH:
  60         // Not implemented
  61         // smpi_dist_graph_topo_destroy(topo->topo.dist_graph);
  62         break;
  63     default:
  64         return;
  65         break;
  66     }
  67 }
  68
  69 MPI_Topology smpi_topo_create(MPIR_Topo_type kind) {
  70     MPI_Topology newTopo = static_cast<MPI_Topology>(xbt_malloc(sizeof(*newTopo)));
  71     newTopo->kind = kind;
  72     // Allocate and initialize the right topo should be done by the caller
  73     return newTopo;
  74 }
  75
  76 /*******************************************************************************
  77  * Cartesian topologies
  78  ******************************************************************************/
  79 void smpi_cart_topo_destroy(MPIR_Cart_Topology cart) {
  80     if (cart) {
  81         if(cart->dims) {
  82             free(cart->dims);
  83         }
  84         if(cart->periodic) {
  85             free(cart->periodic);
  86         }
  87         if(cart->position) {
  88             free(cart->position);
  89         }
  90         free(cart);
  91     }
  92 }
  93
  94 MPI_Topology smpi_cart_topo_create(int ndims) {
  95     MPI_Topology newTopo = smpi_topo_create(MPI_CART);
  96     MPIR_Cart_Topology newCart = static_cast<MPIR_Cart_Topology>(xbt_malloc(sizeof(*newCart)));
  97     newCart->nnodes = 0;
  98     newCart->ndims = ndims;
  99     newCart->dims = xbt_new(int, ndims);
 100     newCart->periodic = xbt_new(int, ndims);
 101     newCart->position = xbt_new(int, ndims);
 102     newTopo->topo.cart = newCart;
 103     return newTopo;
 104 }
 105
 106 /* reorder is ignored, don't know what would be the consequences of a dumb
 107  * reordering but neither do I see the point of reordering*/
 108 int smpi_mpi_cart_create(MPI_Comm comm_old, int ndims, int dims[],
 109                          int periods[], int reorder, MPI_Comm *comm_cart) {
 110     int retval = MPI_SUCCESS;
 111     int i;
 112     MPI_Topology newCart;
 113     MPI_Group newGroup, oldGroup;
 114     int rank, nranks, newSize;
 115
 116     rank = smpi_comm_rank(comm_old);
 117
 118     newSize = 1;
 119     if(ndims != 0) {
 120         for (i = 0 ; i < ndims ; i++) {
 121             newSize *= dims[i];
 122         }
 123         if(rank >= newSize) {
 124             *comm_cart = MPI_COMM_NULL;
 125             return retval;
 126         }
 127         newCart = smpi_cart_topo_create(ndims);
 128         oldGroup = smpi_comm_group(comm_old);
 129         newGroup = smpi_group_new(newSize);
 130         for (i = 0 ; i < newSize ; i++) {
 131             smpi_group_set_mapping(newGroup, smpi_group_index(oldGroup, i), i);
 132         }
 133
 134         newCart->topo.cart->nnodes = newSize;
 135
 136         /* memcpy(newCart->topo.cart->dims, dims, */
 137         /*        ndims * sizeof(*newCart->topo.cart->dims)); */
 138         /* memcpy(newCart->topo.cart->periodic, periods, */
 139         /*        ndims * sizeof(*newCart->topo.cart->periodic)); */
 140
 141         //  FIXME : code duplication... See smpi_mpi_cart_coords
 142         nranks = newSize;
 143         for (i=0; i<ndims; i++)
 144         {
 145             newCart->topo.cart->dims[i] = dims[i];
 146             newCart->topo.cart->periodic[i] = periods[i];
 147             nranks = nranks / dims[i];
 148             /* FIXME: nranks could be zero (?) */
 149             newCart->topo.cart->position[i] = rank / nranks;
 150             rank = rank % nranks;
 151         }
 152
 153         *comm_cart = smpi_comm_new(newGroup, newCart);
 154     }
 155     else {
 156         if (rank == 0) {
 157             newCart = smpi_cart_topo_create(ndims);
 158             *comm_cart = smpi_comm_new(smpi_comm_group(MPI_COMM_SELF), newCart);
 159         }
 160         else {
 161             *comm_cart = MPI_COMM_NULL;
 162         }
 163     }
 164     return retval;
 165 }
 166
 167 int smpi_mpi_cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *newcomm) {
 168     MPI_Topology oldTopo = smpi_comm_topo(comm);
 169     int oldNDims = oldTopo->topo.cart->ndims;
 170     int i, j = 0, newNDims, *newDims = NULL, *newPeriodic = NULL;
 171
 172     if (remain_dims == NULL && oldNDims != 0) {
 173         return MPI_ERR_ARG;
 174     }
 175     newNDims = 0;
 176     for (i = 0 ; i < oldNDims ; i++) {
 177         if (remain_dims[i]) newNDims++;
 178     }
 179
 180     if (newNDims > 0) {
 181         newDims = xbt_new(int, newNDims);
 182         newPeriodic = xbt_new(int, newNDims);
 183
 184         // that should not segfault
 185         for (i = 0 ; j < newNDims ; i++) {
 186             if(remain_dims[i]) {
 187                 newDims[j] = oldTopo->topo.cart->dims[i];
 188                 newPeriodic[j] = oldTopo->topo.cart->periodic[i];
 189                 j++;
 190             }
 191         }
 192     }
 193     return smpi_mpi_cart_create(comm, newNDims, newDims, newPeriodic, 0, newcomm);
 194 }
 195
 196
 197
 198
 199 int smpi_mpi_cart_coords(MPI_Comm comm, int rank, int maxdims,
 200                          int coords[]) {
 201     int nnodes;
 202     int i;
 203     MPI_Topology topo = smpi_comm_topo(comm);
 204
 205     nnodes = topo->topo.cart->nnodes;
 206     for ( i=0; i < topo->topo.cart->ndims; i++ ) {
 207         nnodes    = nnodes / topo->topo.cart->dims[i];
 208         coords[i] = rank / nnodes;
 209         rank      = rank % nnodes;
 210     }
 211     return MPI_SUCCESS;
 212 }
 213
 214 int smpi_mpi_cart_get(MPI_Comm comm, int maxdims, int* dims, int* periods, int* coords) {
 215     MPI_Topology topo = smpi_comm_topo(comm);
 216     int i;
 217     for(i = 0 ; i < maxdims ; i++) {
 218         dims[i] = topo->topo.cart->dims[i];
 219         periods[i] = topo->topo.cart->periodic[i];
 220         coords[i] = topo->topo.cart->position[i];
 221     }
 222     return MPI_SUCCESS;
 223 }
 224
 225 int smpi_mpi_cart_rank(MPI_Comm comm, int* coords, int* rank) {
 226     MPI_Topology topo = smpi_comm_topo(comm);
 227     int ndims = topo->topo.cart->ndims;
 228     int multiplier, coord,i;
 229     *rank = 0;
 230     multiplier = 1;
 231
 232     for ( i=ndims-1; i >=0; i-- ) {
 233         coord = coords[i];
 234
 235         /* The user can give us whatever coordinates he wants. If one of them is
 236          * out of range, either this dimension is periodic, and then we
 237          * consider the equivalent coordinate inside the bounds, or it is not
 238          * and then it is an error
 239          */
 240         if (coord >= topo->topo.cart->dims[i]) {
 241             if ( topo->topo.cart->periodic[i] ) {
 242                 coord = coord % topo->topo.cart->dims[i];
 243             }
 244             else {
 245                 // Should I do that ?
 246                 *rank = -1;
 247                 return MPI_ERR_ARG;
 248             }
 249         }
 250         else if (coord <  0) {
 251             if(topo->topo.cart->periodic[i]) {
 252                 coord = coord % topo->topo.cart->dims[i];
 253                 if (coord) coord = topo->topo.cart->dims[i] + coord;
 254             }
 255             else {
 256                 *rank = -1;
 257                 return MPI_ERR_ARG;
 258             }
 259         }
 260
 261         *rank += multiplier * coord;
 262         multiplier *= topo->topo.cart->dims[i];
 263     }
 264     return MPI_SUCCESS;
 265 }
 266
 267 int smpi_mpi_cart_shift(MPI_Comm comm, int direction, int disp,
 268                         int *rank_source, int *rank_dest) {
 269     MPI_Topology topo = smpi_comm_topo(comm);
 270     int position[topo->topo.cart->ndims];
 271
 272
 273     if(topo->topo.cart->ndims == 0) {
 274         return MPI_ERR_ARG;
 275     }
 276     if (topo->topo.cart->ndims < direction) {
 277         return MPI_ERR_DIMS;
 278     }
 279
 280     smpi_mpi_cart_coords(comm, smpi_comm_rank(comm), topo->topo.cart->ndims,
 281                          position);
 282     position[direction] += disp;
 283
 284     if(position[direction] < 0 ||
 285        position[direction] >= topo->topo.cart->dims[direction]) {
 286         if(topo->topo.cart->periodic[direction]) {
 287             position[direction] %= topo->topo.cart->dims[direction];
 288             smpi_mpi_cart_rank(comm, position, rank_dest);
 289         }
 290         else {
 291             *rank_dest = MPI_PROC_NULL;
 292         }
 293     }
 294     else {
 295         smpi_mpi_cart_rank(comm, position, rank_dest);
 296     }
 297
 298     position[direction] =  topo->topo.cart->position[direction] - disp;
 299     if(position[direction] < 0 ||
 300        position[direction] >= topo->topo.cart->dims[direction]) {
 301         if(topo->topo.cart->periodic[direction]) {
 302             position[direction] %= topo->topo.cart->dims[direction];
 303             smpi_mpi_cart_rank(comm, position, rank_source);
 304         }
 305         else {
 306             *rank_source = MPI_PROC_NULL;
 307         }
 308     }
 309     else {
 310         smpi_mpi_cart_rank(comm, position, rank_source);
 311     }
 312
 313     return MPI_SUCCESS;
 314 }
 315
 316 int smpi_mpi_cartdim_get(MPI_Comm comm, int *ndims) {
 317     MPI_Topology topo = smpi_comm_topo(comm);
 318
 319     *ndims = topo->topo.cart->ndims;
 320     return MPI_SUCCESS;
 321 }
 322
 323
 324
 325 // Everything below has been taken from ompi, but could be easily rewritten.
 326
 327 /*
 328  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 329  *                         University Research and Technology
 330  *                         Corporation.  All rights reserved.
 331  * Copyright (c) 2004-2005 The University of Tennessee and The University
 332  *                         of Tennessee Research Foundation.  All rights
 333  *                         reserved.
 334  * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart,
 335  *                         University of Stuttgart.  All rights reserved.
 336  * Copyright (c) 2004-2005 The Regents of the University of California.
 337  *                         All rights reserved.
 338  * Copyright (c) 2012      Los Alamos National Security, LLC.  All rights
 339  *                         reserved.
 340  * Copyright (c) 2014      Intel, Inc. All rights reserved
 341  * $COPYRIGHT$
 342  *
 343  * Additional copyrights may follow
 344  *
 345  * $HEADER$
 346  */
 347
 348
 349 /* static functions */
 350 static int assignnodes(int ndim, int nfactor, int *pfacts,int **pdims);
 351 static int getfactors(int num, int *nfators, int **factors);
 352
 353 /*
 354  * This is a utility function, no need to have anything in the lower
 355  * layer for this at all
 356  */
 357 int smpi_mpi_dims_create(int nnodes, int ndims, int dims[])
 358 {
 359     int i;
 360     int freeprocs;
 361     int freedims;
 362     int nfactors;
 363     int *factors;
 364     int *procs;
 365     int *p;
 366     int err;
 367
 368     /* Get # of free-to-be-assigned processes and # of free dimensions */
 369     freeprocs = nnodes;
 370     freedims = 0;
 371     for (i = 0, p = dims; i < ndims; ++i,++p) {
 372         if (*p == 0) {
 373             ++freedims;
 374         } else if ((*p < 0) || ((nnodes % *p) != 0)) {
 375             return  MPI_ERR_DIMS;
 376
 377         } else {
 378             freeprocs /= *p;
 379         }
 380     }
 381
 382     if (freedims == 0) {
 383         if (freeprocs == 1) {
 384             return MPI_SUCCESS;
 385         }
 386         return MPI_ERR_DIMS;
 387     }
 388
 389     if (freeprocs == 1) {
 390         for (i = 0; i < ndims; ++i, ++dims) {
 391             if (*dims == 0) {
 392                 *dims = 1;
 393             }
 394         }
 395         return MPI_SUCCESS;
 396     }
 397
 398     /* Factor the number of free processes */
 399     if (MPI_SUCCESS != (err = getfactors(freeprocs, &nfactors, &factors))) {
 400         return  err;
 401     }
 402
 403     /* Assign free processes to free dimensions */
 404     if (MPI_SUCCESS != (err = assignnodes(freedims, nfactors, factors, &procs))) {
 405         return err;
 406     }
 407
 408     /* Return assignment results */
 409     p = procs;
 410     for (i = 0; i < ndims; ++i, ++dims) {
 411         if (*dims == 0) {
 412             *dims = *p++;
 413         }
 414     }
 415
 416     free((char *) factors);
 417     free((char *) procs);
 418
 419     /* all done */
 420     return MPI_SUCCESS;
 421 }
 422
 423 /*
 424  *  assignnodes
 425  *
 426  *  Function:   - assign processes to dimensions
 427  *          - get "best-balanced" grid
 428  *          - greedy bin-packing algorithm used
 429  *          - sort dimensions in decreasing order
 430  *          - dimensions array dynamically allocated
 431  *  Accepts:    - # of dimensions
 432  *          - # of prime factors
 433  *          - array of prime factors
 434  *          - ptr to array of dimensions (returned value)
 435  *  Returns:    - 0 or ERROR
 436  */
 437 static int
 438 assignnodes(int ndim, int nfactor, int *pfacts, int **pdims)
 439 {
 440     int *bins;
 441     int i, j;
 442     int n;
 443     int f;
 444     int *p;
 445     int *pmin;
 446
 447     if (0 >= ndim) {
 448         return MPI_ERR_DIMS;
 449     }
 450
 451     /* Allocate and initialize the bins */
 452     bins = (int *) malloc((unsigned) ndim * sizeof(int));
 453     if (NULL == bins) {
 454         return MPI_ERR_NO_MEM;
 455     }
 456     *pdims = bins;
 457
 458     for (i = 0, p = bins; i < ndim; ++i, ++p) {
 459         *p = 1;
 460     }
 461
 462     /* Loop assigning factors from the highest to the lowest */
 463     for (j = nfactor - 1; j >= 0; --j) {
 464         f = pfacts[j];
 465         /* Assign a factor to the smallest bin */
 466         pmin = bins;
 467         for (i = 1, p = pmin + 1; i < ndim; ++i, ++p) {
 468             if (*p < *pmin) {
 469                 pmin = p;
 470             }
 471         }
 472         *pmin *= f;
 473     }
 474
 475     /* Sort dimensions in decreasing order (O(n^2) for now) */
 476     for (i = 0, pmin = bins; i < ndim - 1; ++i, ++pmin) {
 477         for (j = i + 1, p = pmin + 1; j < ndim; ++j, ++p) {
 478             if (*p > *pmin) {
 479                 n = *p;
 480                 *p = *pmin;
 481                 *pmin = n;
 482             }
 483         }
 484     }
 485
 486     return MPI_SUCCESS;
 487 }
 488
 489 /*
 490  *  getfactors
 491  *
 492  *  Function:   - factorize a number
 493  *  Accepts:    - number
 494  *          - # prime factors
 495  *          - array of prime factors
 496  *  Returns:    - MPI_SUCCESS or ERROR
 497  */
 498 static int
 499 getfactors(int num, int *nfactors, int **factors) {
 500     int size;
 501     int d;
 502     int i;
 503     int sqrtnum;
 504
 505     if(num  < 2) {
 506         (*nfactors) = 0;
 507         (*factors) = NULL;
 508         return MPI_SUCCESS;
 509     }
 510     /* Allocate the array of prime factors which cannot exceed log_2(num) entries */
 511     sqrtnum = ceil(sqrt(num));
 512     size = ceil(log(num) / log(2));
 513     *factors = (int *) malloc((unsigned) size * sizeof(int));
 514
 515     i = 0;
 516     /* determine all occurences of factor 2 */
 517     while((num % 2) == 0) {
 518         num /= 2;
 519         (*factors)[i++] = 2;
 520     }
 521     /* determine all occurences of uneven prime numbers up to sqrt(num) */
 522     for(d = 3; (num > 1) && (d < sqrtnum); d += 2) {
 523         while((num % d) == 0) {
 524             num /= d;
 525             (*factors)[i++] = d;
 526         }
 527     }
 528     /* as we looped only up to sqrt(num) one factor > sqrt(num) may be left over */
 529     if(num != 1) {
 530         (*factors)[i++] = num;
 531     }
 532     (*nfactors) = i;
 533     return MPI_SUCCESS;
 534 }
 535