src/smpi/smpi_topo.cpp

   1 /* Copyright (c) 2014. The SimGrid Team.
   2  * All rights reserved.                                                     */
   3
   4 /* This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the license (GNU LGPL) which comes with this package. */
   6
   7 #include "xbt/sysdep.h"
   8 #include "smpi/smpi.h"
   9 #include "private.h"
  10
  11 #include <math.h>
  12
  13 typedef struct s_smpi_mpi_cart_topology {
  14     int nnodes;
  15     int ndims;
  16     int *dims;
  17     int *periodic;
  18     int *position;
  19 } s_smpi_mpi_cart_topology_t;
  20
  21 typedef struct s_smpi_mpi_graph_topology {
  22     int nnodes;
  23     int nedges;
  24     int *index;
  25     int *edges;
  26 } s_smpi_mpi_graph_topology_t;
  27
  28 typedef struct s_smpi_dist_graph_topology {
  29     int indegree;
  30     int *in;
  31     int *in_weights;
  32     int outdegree;
  33     int *out;
  34     int *out_weights;
  35     int is_weighted;
  36 } s_smpi_mpi_dist_graph_topology_t;
  37
  38 typedef struct s_smpi_mpi_topology {
  39     MPIR_Topo_type kind;
  40     union topo {
  41         MPIR_Graph_Topology graph;
  42         MPIR_Cart_Topology  cart;
  43         MPIR_Dist_Graph_Topology dist_graph;
  44     } topo;
  45 } s_smpi_mpi_topology_t;
  46
  47 void smpi_topo_destroy(MPI_Topology topo) {
  48     if(topo == NULL) {
  49         return;
  50     }
  51     switch (topo->kind) {
  52     case MPI_GRAPH:
  53         // Not implemented
  54         // smpi_graph_topo_destroy(topo->topo.graph);
  55         break;
  56     case MPI_CART:
  57         smpi_cart_topo_destroy(topo->topo.cart);
  58         break;
  59     case MPI_DIST_GRAPH:
  60         // Not implemented
  61         // smpi_dist_graph_topo_destroy(topo->topo.dist_graph);
  62         break;
  63     default:
  64         return;
  65         break;
  66     }
  67 }
  68
  69 MPI_Topology smpi_topo_create(MPIR_Topo_type kind) {
  70     MPI_Topology newTopo = static_cast<MPI_Topology>(xbt_malloc(sizeof(*newTopo)));
  71     newTopo->kind = kind;
  72     // Allocate and initialize the right topo should be done by the caller
  73     return newTopo;
  74 }
  75
  76 /*******************************************************************************
  77  * Cartesian topologies
  78  ******************************************************************************/
  79 void smpi_cart_topo_destroy(MPIR_Cart_Topology cart) {
  80     if (cart) {
  81         if(cart->dims) {
  82             free(cart->dims);
  83         }
  84         if(cart->periodic) {
  85             free(cart->periodic);
  86         }
  87         if(cart->position) {
  88             free(cart->position);
  89         }
  90         free(cart);
  91     }
  92 }
  93
  94 MPI_Topology smpi_cart_topo_create(int ndims) {
  95     MPI_Topology newTopo = smpi_topo_create(MPI_CART);
  96     MPIR_Cart_Topology newCart = static_cast<MPIR_Cart_Topology>(xbt_malloc(sizeof(*newCart)));
  97     newCart->nnodes = 0;
  98     newCart->ndims = ndims;
  99     newCart->dims = xbt_new(int, ndims);
 100     newCart->periodic = xbt_new(int, ndims);
 101     newCart->position = xbt_new(int, ndims);
 102     newTopo->topo.cart = newCart;
 103     return newTopo;
 104 }
 105
 106 /* reorder is ignored, don't know what would be the consequences of a dumb
 107  * reordering but neither do I see the point of reordering*/
 108 int smpi_mpi_cart_create(MPI_Comm comm_old, int ndims, int dims[], int periods[], int reorder, MPI_Comm *comm_cart) {
 109     int retval = MPI_SUCCESS;
 110     int i;
 111     MPI_Topology newCart;
 112     MPI_Group newGroup, oldGroup;
 113     int rank, nranks, newSize;
 114
 115     rank = smpi_comm_rank(comm_old);
 116
 117     newSize = 1;
 118     if(ndims != 0) {
 119         for (i = 0 ; i < ndims ; i++) {
 120             newSize *= dims[i];
 121         }
 122         if(rank >= newSize) {
 123             *comm_cart = MPI_COMM_NULL;
 124             return retval;
 125         }
 126         newCart = smpi_cart_topo_create(ndims);
 127         oldGroup = smpi_comm_group(comm_old);
 128         newGroup = smpi_group_new(newSize);
 129         for (i = 0 ; i < newSize ; i++) {
 130             smpi_group_set_mapping(newGroup, smpi_group_index(oldGroup, i), i);
 131         }
 132
 133         newCart->topo.cart->nnodes = newSize;
 134
 135         /* memcpy(newCart->topo.cart->dims, dims, ndims * sizeof(*newCart->topo.cart->dims)); */
 136         /* memcpy(newCart->topo.cart->periodic, periods, ndims * sizeof(*newCart->topo.cart->periodic)); */
 137
 138         //  FIXME : code duplication... See smpi_mpi_cart_coords
 139         nranks = newSize;
 140         for (i=0; i<ndims; i++) {
 141             newCart->topo.cart->dims[i] = dims[i];
 142             newCart->topo.cart->periodic[i] = periods[i];
 143             nranks = nranks / dims[i];
 144             /* FIXME: nranks could be zero (?) */
 145             newCart->topo.cart->position[i] = rank / nranks;
 146             rank = rank % nranks;
 147         }
 148
 149         *comm_cart = smpi_comm_new(newGroup, newCart);
 150     } else {
 151         if (rank == 0) {
 152             newCart = smpi_cart_topo_create(ndims);
 153             *comm_cart = smpi_comm_new(smpi_comm_group(MPI_COMM_SELF), newCart);
 154         } else {
 155             *comm_cart = MPI_COMM_NULL;
 156         }
 157     }
 158     return retval;
 159 }
 160
 161 int smpi_mpi_cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *newcomm) {
 162     MPI_Topology oldTopo = smpi_comm_topo(comm);
 163     int oldNDims = oldTopo->topo.cart->ndims;
 164     int i, j = 0, newNDims, *newDims = NULL, *newPeriodic = NULL;
 165
 166     if (remain_dims == NULL && oldNDims != 0) {
 167         return MPI_ERR_ARG;
 168     }
 169     newNDims = 0;
 170     for (i = 0 ; i < oldNDims ; i++) {
 171         if (remain_dims[i]) newNDims++;
 172     }
 173
 174     if (newNDims > 0) {
 175         newDims = xbt_new(int, newNDims);
 176         newPeriodic = xbt_new(int, newNDims);
 177
 178         // that should not segfault
 179         for (i = 0 ; j < newNDims ; i++) {
 180             if(remain_dims[i]) {
 181                 newDims[j] = oldTopo->topo.cart->dims[i];
 182                 newPeriodic[j] = oldTopo->topo.cart->periodic[i];
 183                 j++;
 184             }
 185         }
 186     }
 187     return smpi_mpi_cart_create(comm, newNDims, newDims, newPeriodic, 0, newcomm);
 188 }
 189
 190 int smpi_mpi_cart_coords(MPI_Comm comm, int rank, int maxdims, int coords[]) {
 191     int nnodes;
 192
 193     MPI_Topology topo = smpi_comm_topo(comm);
 194
 195     nnodes = topo->topo.cart->nnodes;
 196     for (int i=0; i < topo->topo.cart->ndims; i++ ) {
 197         nnodes    = nnodes / topo->topo.cart->dims[i];
 198         coords[i] = rank / nnodes;
 199         rank      = rank % nnodes;
 200     }
 201     return MPI_SUCCESS;
 202 }
 203
 204 int smpi_mpi_cart_get(MPI_Comm comm, int maxdims, int* dims, int* periods, int* coords) {
 205     MPI_Topology topo = smpi_comm_topo(comm);
 206
 207     for(int i = 0 ; i < maxdims ; i++) {
 208         dims[i] = topo->topo.cart->dims[i];
 209         periods[i] = topo->topo.cart->periodic[i];
 210         coords[i] = topo->topo.cart->position[i];
 211     }
 212     return MPI_SUCCESS;
 213 }
 214
 215 int smpi_mpi_cart_rank(MPI_Comm comm, int* coords, int* rank) {
 216     MPI_Topology topo = smpi_comm_topo(comm);
 217     int ndims = topo->topo.cart->ndims;
 218     int multiplier, coord,i;
 219     *rank = 0;
 220     multiplier = 1;
 221
 222     for ( i=ndims-1; i >=0; i-- ) {
 223         coord = coords[i];
 224
 225         /* The user can give us whatever coordinates he wants. If one of them is out of range, either this dimension is
 226          * periodic, and we consider the equivalent coordinate inside the bounds, or it's not and then it's an error
 227          */
 228         if (coord >= topo->topo.cart->dims[i]) {
 229             if ( topo->topo.cart->periodic[i] ) {
 230                 coord = coord % topo->topo.cart->dims[i];
 231             } else {
 232                 // Should I do that ?
 233                 *rank = -1;
 234                 return MPI_ERR_ARG;
 235             }
 236         } else if (coord <  0) {
 237             if(topo->topo.cart->periodic[i]) {
 238                 coord = coord % topo->topo.cart->dims[i];
 239                 if (coord) coord = topo->topo.cart->dims[i] + coord;
 240             } else {
 241                 *rank = -1;
 242                 return MPI_ERR_ARG;
 243             }
 244         }
 245
 246         *rank += multiplier * coord;
 247         multiplier *= topo->topo.cart->dims[i];
 248     }
 249     return MPI_SUCCESS;
 250 }
 251
 252 int smpi_mpi_cart_shift(MPI_Comm comm, int direction, int disp, int *rank_source, int *rank_dest) {
 253     MPI_Topology topo = smpi_comm_topo(comm);
 254     int position[topo->topo.cart->ndims];
 255
 256     if(topo->topo.cart->ndims == 0) {
 257         return MPI_ERR_ARG;
 258     }
 259     if (topo->topo.cart->ndims < direction) {
 260         return MPI_ERR_DIMS;
 261     }
 262
 263     smpi_mpi_cart_coords(comm, smpi_comm_rank(comm), topo->topo.cart->ndims, position);
 264     position[direction] += disp;
 265
 266     if(position[direction] < 0 ||
 267        position[direction] >= topo->topo.cart->dims[direction]) {
 268         if(topo->topo.cart->periodic[direction]) {
 269             position[direction] %= topo->topo.cart->dims[direction];
 270             smpi_mpi_cart_rank(comm, position, rank_dest);
 271         } else {
 272             *rank_dest = MPI_PROC_NULL;
 273         }
 274     } else {
 275         smpi_mpi_cart_rank(comm, position, rank_dest);
 276     }
 277
 278     position[direction] =  topo->topo.cart->position[direction] - disp;
 279     if(position[direction] < 0 || position[direction] >= topo->topo.cart->dims[direction]) {
 280         if(topo->topo.cart->periodic[direction]) {
 281             position[direction] %= topo->topo.cart->dims[direction];
 282             smpi_mpi_cart_rank(comm, position, rank_source);
 283         } else {
 284             *rank_source = MPI_PROC_NULL;
 285         }
 286     } else {
 287         smpi_mpi_cart_rank(comm, position, rank_source);
 288     }
 289
 290     return MPI_SUCCESS;
 291 }
 292
 293 int smpi_mpi_cartdim_get(MPI_Comm comm, int *ndims) {
 294     MPI_Topology topo = smpi_comm_topo(comm);
 295
 296     *ndims = topo->topo.cart->ndims;
 297     return MPI_SUCCESS;
 298 }
 299
 300 // Everything below has been taken from ompi, but could be easily rewritten.
 301
 302 /*
 303  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 304  *                         University Research and Technology
 305  *                         Corporation.  All rights reserved.
 306  * Copyright (c) 2004-2005 The University of Tennessee and The University
 307  *                         of Tennessee Research Foundation.  All rights
 308  *                         reserved.
 309  * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart,
 310  *                         University of Stuttgart.  All rights reserved.
 311  * Copyright (c) 2004-2005 The Regents of the University of California.
 312  *                         All rights reserved.
 313  * Copyright (c) 2012      Los Alamos National Security, LLC.  All rights
 314  *                         reserved.
 315  * Copyright (c) 2014      Intel, Inc. All rights reserved
 316  * $COPYRIGHT$
 317  *
 318  * Additional copyrights may follow
 319  *
 320  * $HEADER$
 321  */
 322
 323 /* static functions */
 324 static int assignnodes(int ndim, int nfactor, int *pfacts,int **pdims);
 325 static int getfactors(int num, int *nfators, int **factors);
 326
 327 /*
 328  * This is a utility function, no need to have anything in the lower
 329  * layer for this at all
 330  */
 331 int smpi_mpi_dims_create(int nnodes, int ndims, int dims[])
 332 {
 333     int i;
 334     int freeprocs;
 335     int freedims;
 336     int nfactors;
 337     int *factors;
 338     int *procs;
 339     int *p;
 340     int err;
 341
 342     /* Get # of free-to-be-assigned processes and # of free dimensions */
 343     freeprocs = nnodes;
 344     freedims = 0;
 345     for (i = 0, p = dims; i < ndims; ++i,++p) {
 346         if (*p == 0) {
 347             ++freedims;
 348         } else if ((*p < 0) || ((nnodes % *p) != 0)) {
 349             return  MPI_ERR_DIMS;
 350
 351         } else {
 352             freeprocs /= *p;
 353         }
 354     }
 355
 356     if (freedims == 0) {
 357         if (freeprocs == 1) {
 358             return MPI_SUCCESS;
 359         }
 360         return MPI_ERR_DIMS;
 361     }
 362
 363     if (freeprocs == 1) {
 364         for (i = 0; i < ndims; ++i, ++dims) {
 365             if (*dims == 0) {
 366                 *dims = 1;
 367             }
 368         }
 369         return MPI_SUCCESS;
 370     }
 371
 372     /* Factor the number of free processes */
 373     if (MPI_SUCCESS != (err = getfactors(freeprocs, &nfactors, &factors))) {
 374         return  err;
 375     }
 376
 377     /* Assign free processes to free dimensions */
 378     if (MPI_SUCCESS != (err = assignnodes(freedims, nfactors, factors, &procs))) {
 379         return err;
 380     }
 381
 382     /* Return assignment results */
 383     p = procs;
 384     for (i = 0; i < ndims; ++i, ++dims) {
 385         if (*dims == 0) {
 386             *dims = *p++;
 387         }
 388     }
 389
 390     free((char *) factors);
 391     free((char *) procs);
 392
 393     /* all done */
 394     return MPI_SUCCESS;
 395 }
 396
 397 /*
 398  *  assignnodes
 399  *
 400  *  Function:   - assign processes to dimensions
 401  *          - get "best-balanced" grid
 402  *          - greedy bin-packing algorithm used
 403  *          - sort dimensions in decreasing order
 404  *          - dimensions array dynamically allocated
 405  *  Accepts:    - # of dimensions
 406  *          - # of prime factors
 407  *          - array of prime factors
 408  *          - ptr to array of dimensions (returned value)
 409  *  Returns:    - 0 or ERROR
 410  */
 411 static int
 412 assignnodes(int ndim, int nfactor, int *pfacts, int **pdims)
 413 {
 414     int *bins;
 415     int i, j;
 416     int n;
 417     int f;
 418     int *p;
 419     int *pmin;
 420
 421     if (0 >= ndim) {
 422         return MPI_ERR_DIMS;
 423     }
 424
 425     /* Allocate and initialize the bins */
 426     bins = (int *) malloc((unsigned) ndim * sizeof(int));
 427     if (NULL == bins) {
 428         return MPI_ERR_NO_MEM;
 429     }
 430     *pdims = bins;
 431
 432     for (i = 0, p = bins; i < ndim; ++i, ++p) {
 433         *p = 1;
 434     }
 435
 436     /* Loop assigning factors from the highest to the lowest */
 437     for (j = nfactor - 1; j >= 0; --j) {
 438         f = pfacts[j];
 439         /* Assign a factor to the smallest bin */
 440         pmin = bins;
 441         for (i = 1, p = pmin + 1; i < ndim; ++i, ++p) {
 442             if (*p < *pmin) {
 443                 pmin = p;
 444             }
 445         }
 446         *pmin *= f;
 447     }
 448
 449     /* Sort dimensions in decreasing order (O(n^2) for now) */
 450     for (i = 0, pmin = bins; i < ndim - 1; ++i, ++pmin) {
 451         for (j = i + 1, p = pmin + 1; j < ndim; ++j, ++p) {
 452             if (*p > *pmin) {
 453                 n = *p;
 454                 *p = *pmin;
 455                 *pmin = n;
 456             }
 457         }
 458     }
 459
 460     return MPI_SUCCESS;
 461 }
 462
 463 /*
 464  *  getfactors
 465  *
 466  *  Function:   - factorize a number
 467  *  Accepts:    - number
 468  *          - # prime factors
 469  *          - array of prime factors
 470  *  Returns:    - MPI_SUCCESS or ERROR
 471  */
 472 static int getfactors(int num, int *nfactors, int **factors) {
 473     int size;
 474     int d;
 475     int i;
 476     int sqrtnum;
 477
 478     if(num  < 2) {
 479         (*nfactors) = 0;
 480         (*factors) = NULL;
 481         return MPI_SUCCESS;
 482     }
 483     /* Allocate the array of prime factors which cannot exceed log_2(num) entries */
 484     sqrtnum = ceil(sqrt(num));
 485     size = ceil(log(num) / log(2));
 486     *factors = (int *) malloc((unsigned) size * sizeof(int));
 487
 488     i = 0;
 489     /* determine all occurences of factor 2 */
 490     while((num % 2) == 0) {
 491         num /= 2;
 492         (*factors)[i++] = 2;
 493     }
 494     /* determine all occurences of uneven prime numbers up to sqrt(num) */
 495     for(d = 3; (num > 1) && (d < sqrtnum); d += 2) {
 496         while((num % d) == 0) {
 497             num /= d;
 498             (*factors)[i++] = d;
 499         }
 500     }
 501     /* as we looped only up to sqrt(num) one factor > sqrt(num) may be left over */
 502     if(num != 1) {
 503         (*factors)[i++] = num;
 504     }
 505     (*nfactors) = i;
 506     return MPI_SUCCESS;
 507 }