src/smpi/smpi_topo.cpp

   1 /* Copyright (c) 2014. The SimGrid Team.
   2  * All rights reserved.                                                     */
   3
   4 /* This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the license (GNU LGPL) which comes with this package. */
   6
   7 #include "xbt/sysdep.h"
   8 #include "smpi/smpi.h"
   9 #include "private.h"
  10
  11 #include <math.h>
  12
  13 typedef struct s_smpi_mpi_cart_topology {
  14     int nnodes;
  15     int ndims;
  16     int *dims;
  17     int *periodic;
  18     int *position;
  19 } s_smpi_mpi_cart_topology_t;
  20
  21 typedef struct s_smpi_mpi_graph_topology {
  22     int nnodes;
  23     int nedges;
  24     int *index;
  25     int *edges;
  26 } s_smpi_mpi_graph_topology_t;
  27
  28 typedef struct s_smpi_dist_graph_topology {
  29     int indegree;
  30     int *in;
  31     int *in_weights;
  32     int outdegree;
  33     int *out;
  34     int *out_weights;
  35     int is_weighted;
  36 } s_smpi_mpi_dist_graph_topology_t;
  37
  38 typedef struct s_smpi_mpi_topology {
  39     MPIR_Topo_type kind;
  40     union topo {
  41         MPIR_Graph_Topology graph;
  42         MPIR_Cart_Topology  cart;
  43         MPIR_Dist_Graph_Topology dist_graph;
  44     } topo;
  45 } s_smpi_mpi_topology_t;
  46
  47 void smpi_topo_destroy(MPI_Topology topo) {
  48     if(topo == NULL) {
  49         return;
  50     }
  51     switch (topo->kind) {
  52     case MPI_GRAPH:
  53         // Not implemented
  54         // smpi_graph_topo_destroy(topo->topo.graph);
  55         break;
  56     case MPI_CART:
  57         smpi_cart_topo_destroy(topo->topo.cart);
  58         break;
  59     case MPI_DIST_GRAPH:
  60         // Not implemented
  61         // smpi_dist_graph_topo_destroy(topo->topo.dist_graph);
  62         break;
  63     default:
  64         return;
  65         break;
  66     }
  67 }
  68
  69 MPI_Topology smpi_topo_create(MPIR_Topo_type kind) {
  70     MPI_Topology newTopo = static_cast<MPI_Topology>(xbt_malloc(sizeof(*newTopo)));
  71     newTopo->kind = kind;
  72     // Allocate and initialize the right topo should be done by the caller
  73     return newTopo;
  74 }
  75
  76 /*******************************************************************************
  77  * Cartesian topologies
  78  ******************************************************************************/
  79 void smpi_cart_topo_destroy(MPIR_Cart_Topology cart) {
  80     if (cart) {
  81         if(cart->dims) {
  82             xbt_free(cart->dims);
  83         }
  84         if(cart->periodic) {
  85             xbt_free(cart->periodic);
  86         }
  87         if(cart->position) {
  88             xbt_free(cart->position);
  89         }
  90         xbt_free(cart);
  91     }
  92 }
  93
  94 MPI_Topology smpi_cart_topo_create(int ndims) {
  95     MPI_Topology newTopo = smpi_topo_create(MPI_CART);
  96     MPIR_Cart_Topology newCart = static_cast<MPIR_Cart_Topology>(xbt_malloc(sizeof(*newCart)));
  97     newCart->nnodes = 0;
  98     newCart->ndims = ndims;
  99     newCart->dims = xbt_new(int, ndims);
 100     newCart->periodic = xbt_new(int, ndims);
 101     newCart->position = xbt_new(int, ndims);
 102     newTopo->topo.cart = newCart;
 103     return newTopo;
 104 }
 105
 106 /* reorder is ignored, don't know what would be the consequences of a dumb
 107  * reordering but neither do I see the point of reordering*/
 108 int smpi_mpi_cart_create(MPI_Comm comm_old, int ndims, int dims[], int periods[], int reorder, MPI_Comm *comm_cart) {
 109     int retval = MPI_SUCCESS;
 110     int i;
 111     MPI_Topology newCart;
 112     MPI_Group newGroup, oldGroup;
 113     int rank, nranks, newSize;
 114
 115     rank = smpi_comm_rank(comm_old);
 116
 117     newSize = 1;
 118     if(ndims != 0) {
 119         for (i = 0 ; i < ndims ; i++) {
 120             newSize *= dims[i];
 121         }
 122         if(rank >= newSize) {
 123             *comm_cart = MPI_COMM_NULL;
 124             return retval;
 125         }
 126         newCart = smpi_cart_topo_create(ndims);
 127         oldGroup = smpi_comm_group(comm_old);
 128         newGroup = smpi_group_new(newSize);
 129         for (i = 0 ; i < newSize ; i++) {
 130             smpi_group_set_mapping(newGroup, smpi_group_index(oldGroup, i), i);
 131         }
 132
 133         newCart->topo.cart->nnodes = newSize;
 134
 135         //  FIXME : code duplication... See smpi_mpi_cart_coords
 136         nranks = newSize;
 137         for (i=0; i<ndims; i++) {
 138             newCart->topo.cart->dims[i] = dims[i];
 139             newCart->topo.cart->periodic[i] = periods[i];
 140             nranks = nranks / dims[i];
 141             /* FIXME: nranks could be zero (?) */
 142             newCart->topo.cart->position[i] = rank / nranks;
 143             rank = rank % nranks;
 144         }
 145
 146         *comm_cart = smpi_comm_new(newGroup, newCart);
 147     } else {
 148         if (rank == 0) {
 149             newCart = smpi_cart_topo_create(ndims);
 150             *comm_cart = smpi_comm_new(smpi_group_copy(smpi_comm_group(MPI_COMM_SELF)), newCart);
 151         } else {
 152             *comm_cart = MPI_COMM_NULL;
 153         }
 154     }
 155     return retval;
 156 }
 157
 158 int smpi_mpi_cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *newcomm) {
 159     MPI_Topology oldTopo = smpi_comm_topo(comm);
 160     int oldNDims = oldTopo->topo.cart->ndims;
 161     int i, j = 0, newNDims, *newDims = NULL, *newPeriodic = NULL;
 162
 163     if (remain_dims == NULL && oldNDims != 0) {
 164         return MPI_ERR_ARG;
 165     }
 166     newNDims = 0;
 167     for (i = 0 ; i < oldNDims ; i++) {
 168         if (remain_dims[i])
 169             newNDims++;
 170     }
 171
 172     if (newNDims > 0) {
 173         newDims = xbt_new(int, newNDims);
 174         newPeriodic = xbt_new(int, newNDims);
 175
 176         // that should not segfault
 177         for (i = 0 ; j < newNDims ; i++) {
 178             if(remain_dims[i]) {
 179                 newDims[j] = oldTopo->topo.cart->dims[i];
 180                 newPeriodic[j] = oldTopo->topo.cart->periodic[i];
 181                 j++;
 182             }
 183         }
 184     }
 185     return smpi_mpi_cart_create(comm, newNDims, newDims, newPeriodic, 0, newcomm);
 186 }
 187
 188 int smpi_mpi_cart_coords(MPI_Comm comm, int rank, int maxdims, int coords[]) {
 189     int nnodes;
 190
 191     MPI_Topology topo = smpi_comm_topo(comm);
 192
 193     nnodes = topo->topo.cart->nnodes;
 194     for (int i=0; i < topo->topo.cart->ndims; i++ ) {
 195         nnodes    = nnodes / topo->topo.cart->dims[i];
 196         coords[i] = rank / nnodes;
 197         rank      = rank % nnodes;
 198     }
 199     return MPI_SUCCESS;
 200 }
 201
 202 int smpi_mpi_cart_get(MPI_Comm comm, int maxdims, int* dims, int* periods, int* coords) {
 203     MPI_Topology topo = smpi_comm_topo(comm);
 204     int ndims=topo->topo.cart->ndims < maxdims ? topo->topo.cart->ndims : maxdims;
 205     for(int i = 0 ; i < ndims ; i++) {
 206         dims[i] = topo->topo.cart->dims[i];
 207         periods[i] = topo->topo.cart->periodic[i];
 208         coords[i] = topo->topo.cart->position[i];
 209     }
 210     return MPI_SUCCESS;
 211 }
 212
 213 int smpi_mpi_cart_rank(MPI_Comm comm, int* coords, int* rank) {
 214     MPI_Topology topo = smpi_comm_topo(comm);
 215     int ndims = topo->topo.cart->ndims;
 216     int multiplier, coord,i;
 217     *rank = 0;
 218     multiplier = 1;
 219
 220     for ( i=ndims-1; i >=0; i-- ) {
 221         coord = coords[i];
 222
 223         /* The user can give us whatever coordinates he wants. If one of them is out of range, either this dimension is
 224          * periodic, and we consider the equivalent coordinate inside the bounds, or it's not and then it's an error
 225          */
 226         if (coord >= topo->topo.cart->dims[i]) {
 227             if ( topo->topo.cart->periodic[i] ) {
 228                 coord = coord % topo->topo.cart->dims[i];
 229             } else {
 230                 // Should I do that ?
 231                 *rank = -1;
 232                 return MPI_ERR_ARG;
 233             }
 234         } else if (coord <  0) {
 235             if(topo->topo.cart->periodic[i]) {
 236                 coord = coord % topo->topo.cart->dims[i];
 237                 if (coord)
 238                     coord = topo->topo.cart->dims[i] + coord;
 239             } else {
 240                 *rank = -1;
 241                 return MPI_ERR_ARG;
 242             }
 243         }
 244
 245         *rank += multiplier * coord;
 246         multiplier *= topo->topo.cart->dims[i];
 247     }
 248     return MPI_SUCCESS;
 249 }
 250
 251 int smpi_mpi_cart_shift(MPI_Comm comm, int direction, int disp, int *rank_source, int *rank_dest) {
 252     MPI_Topology topo = smpi_comm_topo(comm);
 253     int position[topo->topo.cart->ndims];
 254
 255     if(topo->topo.cart->ndims == 0) {
 256         return MPI_ERR_ARG;
 257     }
 258     if (topo->topo.cart->ndims < direction) {
 259         return MPI_ERR_DIMS;
 260     }
 261
 262     smpi_mpi_cart_coords(comm, smpi_comm_rank(comm), topo->topo.cart->ndims, position);
 263     position[direction] += disp;
 264
 265     if(position[direction] < 0 ||
 266        position[direction] >= topo->topo.cart->dims[direction]) {
 267         if(topo->topo.cart->periodic[direction]) {
 268             position[direction] %= topo->topo.cart->dims[direction];
 269             smpi_mpi_cart_rank(comm, position, rank_dest);
 270         } else {
 271             *rank_dest = MPI_PROC_NULL;
 272         }
 273     } else {
 274         smpi_mpi_cart_rank(comm, position, rank_dest);
 275     }
 276
 277     position[direction] =  topo->topo.cart->position[direction] - disp;
 278     if(position[direction] < 0 || position[direction] >= topo->topo.cart->dims[direction]) {
 279         if(topo->topo.cart->periodic[direction]) {
 280             position[direction] %= topo->topo.cart->dims[direction];
 281             smpi_mpi_cart_rank(comm, position, rank_source);
 282         } else {
 283             *rank_source = MPI_PROC_NULL;
 284         }
 285     } else {
 286         smpi_mpi_cart_rank(comm, position, rank_source);
 287     }
 288
 289     return MPI_SUCCESS;
 290 }
 291
 292 int smpi_mpi_cartdim_get(MPI_Comm comm, int *ndims) {
 293     MPI_Topology topo = smpi_comm_topo(comm);
 294
 295     *ndims = topo->topo.cart->ndims;
 296     return MPI_SUCCESS;
 297 }
 298
 299 // Everything below has been taken from ompi, but could be easily rewritten.
 300
 301 /*
 302  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 303  *                         University Research and Technology
 304  *                         Corporation.  All rights reserved.
 305  * Copyright (c) 2004-2005 The University of Tennessee and The University
 306  *                         of Tennessee Research Foundation.  All rights
 307  *                         reserved.
 308  * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart,
 309  *                         University of Stuttgart.  All rights reserved.
 310  * Copyright (c) 2004-2005 The Regents of the University of California.
 311  *                         All rights reserved.
 312  * Copyright (c) 2012      Los Alamos National Security, LLC.  All rights
 313  *                         reserved.
 314  * Copyright (c) 2014      Intel, Inc. All rights reserved
 315  * $COPYRIGHT$
 316  *
 317  * Additional copyrights may follow
 318  *
 319  * $HEADER$
 320  */
 321
 322 /* static functions */
 323 static int assignnodes(int ndim, int nfactor, int *pfacts,int **pdims);
 324 static int getfactors(int num, int *nfators, int **factors);
 325
 326 /*
 327  * This is a utility function, no need to have anything in the lower
 328  * layer for this at all
 329  */
 330 int smpi_mpi_dims_create(int nnodes, int ndims, int dims[])
 331 {
 332     int i;
 333     int freeprocs;
 334     int freedims;
 335     int nfactors;
 336     int *factors;
 337     int *procs;
 338     int *p;
 339     int err;
 340
 341     /* Get # of free-to-be-assigned processes and # of free dimensions */
 342     freeprocs = nnodes;
 343     freedims = 0;
 344     for (i = 0, p = dims; i < ndims; ++i,++p) {
 345         if (*p == 0) {
 346             ++freedims;
 347         } else if ((*p < 0) || ((nnodes % *p) != 0)) {
 348             return  MPI_ERR_DIMS;
 349
 350         } else {
 351             freeprocs /= *p;
 352         }
 353     }
 354
 355     if (freedims == 0) {
 356         if (freeprocs == 1) {
 357             return MPI_SUCCESS;
 358         }
 359         return MPI_ERR_DIMS;
 360     }
 361
 362     if (freeprocs == 1) {
 363         for (i = 0; i < ndims; ++i, ++dims) {
 364             if (*dims == 0) {
 365                 *dims = 1;
 366             }
 367         }
 368         return MPI_SUCCESS;
 369     }
 370
 371     /* Factor the number of free processes */
 372     if (MPI_SUCCESS != (err = getfactors(freeprocs, &nfactors, &factors))) {
 373         return  err;
 374     }
 375
 376     /* Assign free processes to free dimensions */
 377     if (MPI_SUCCESS != (err = assignnodes(freedims, nfactors, factors, &procs))) {
 378         return err;
 379     }
 380
 381     /* Return assignment results */
 382     p = procs;
 383     for (i = 0; i < ndims; ++i, ++dims) {
 384         if (*dims == 0) {
 385             *dims = *p++;
 386         }
 387     }
 388
 389     free((char *) factors);
 390     free((char *) procs);
 391
 392     /* all done */
 393     return MPI_SUCCESS;
 394 }
 395
 396 /*
 397  *  assignnodes
 398  *
 399  *  Function:   - assign processes to dimensions
 400  *          - get "best-balanced" grid
 401  *          - greedy bin-packing algorithm used
 402  *          - sort dimensions in decreasing order
 403  *          - dimensions array dynamically allocated
 404  *  Accepts:    - # of dimensions
 405  *          - # of prime factors
 406  *          - array of prime factors
 407  *          - ptr to array of dimensions (returned value)
 408  *  Returns:    - 0 or ERROR
 409  */
 410 static int
 411 assignnodes(int ndim, int nfactor, int *pfacts, int **pdims)
 412 {
 413     int *bins;
 414     int i, j;
 415     int n;
 416     int f;
 417     int *p;
 418     int *pmin;
 419
 420     if (0 >= ndim) {
 421         return MPI_ERR_DIMS;
 422     }
 423
 424     /* Allocate and initialize the bins */
 425     bins = (int *) malloc((unsigned) ndim * sizeof(int));
 426     if (NULL == bins) {
 427         return MPI_ERR_NO_MEM;
 428     }
 429     *pdims = bins;
 430
 431     for (i = 0, p = bins; i < ndim; ++i, ++p) {
 432         *p = 1;
 433     }
 434
 435     /* Loop assigning factors from the highest to the lowest */
 436     for (j = nfactor - 1; j >= 0; --j) {
 437         f = pfacts[j];
 438         /* Assign a factor to the smallest bin */
 439         pmin = bins;
 440         for (i = 1, p = pmin + 1; i < ndim; ++i, ++p) {
 441             if (*p < *pmin) {
 442                 pmin = p;
 443             }
 444         }
 445         *pmin *= f;
 446     }
 447
 448     /* Sort dimensions in decreasing order (O(n^2) for now) */
 449     for (i = 0, pmin = bins; i < ndim - 1; ++i, ++pmin) {
 450         for (j = i + 1, p = pmin + 1; j < ndim; ++j, ++p) {
 451             if (*p > *pmin) {
 452                 n = *p;
 453                 *p = *pmin;
 454                 *pmin = n;
 455             }
 456         }
 457     }
 458
 459     return MPI_SUCCESS;
 460 }
 461
 462 /*
 463  *  getfactors
 464  *
 465  *  Function:   - factorize a number
 466  *  Accepts:    - number
 467  *          - # prime factors
 468  *          - array of prime factors
 469  *  Returns:    - MPI_SUCCESS or ERROR
 470  */
 471 static int getfactors(int num, int *nfactors, int **factors) {
 472     int size;
 473     int d;
 474     int i;
 475     int sqrtnum;
 476
 477     if(num  < 2) {
 478         (*nfactors) = 0;
 479         (*factors) = NULL;
 480         return MPI_SUCCESS;
 481     }
 482     /* Allocate the array of prime factors which cannot exceed log_2(num) entries */
 483     sqrtnum = ceil(sqrt(num));
 484     size = ceil(log(num) / log(2));
 485     *factors = (int *) malloc((unsigned) size * sizeof(int));
 486
 487     i = 0;
 488     /* determine all occurences of factor 2 */
 489     while((num % 2) == 0) {
 490         num /= 2;
 491         (*factors)[i++] = 2;
 492     }
 493     /* determine all occurences of uneven prime numbers up to sqrt(num) */
 494     for(d = 3; (num > 1) && (d < sqrtnum); d += 2) {
 495         while((num % d) == 0) {
 496             num /= d;
 497             (*factors)[i++] = d;
 498         }
 499     }
 500     /* as we looped only up to sqrt(num) one factor > sqrt(num) may be left over */
 501     if(num != 1) {
 502         (*factors)[i++] = num;
 503     }
 504     (*nfactors) = i;
 505     return MPI_SUCCESS;
 506 }