src/smpi/smpi_topo.cpp

   1 /* Copyright (c) 2014. The SimGrid Team.
   2  * All rights reserved.                                                     */
   3
   4 /* This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the license (GNU LGPL) which comes with this package. */
   6
   7 #include "xbt/sysdep.h"
   8 #include "smpi/smpi.h"
   9 #include "private.h"
  10
  11 #include <math.h>
  12
  13 typedef struct s_smpi_mpi_cart_topology {
  14     int nnodes;
  15     int ndims;
  16     int *dims;
  17     int *periodic;
  18     int *position;
  19 } s_smpi_mpi_cart_topology_t;
  20
  21 typedef struct s_smpi_mpi_graph_topology {
  22     int nnodes;
  23     int nedges;
  24     int *index;
  25     int *edges;
  26 } s_smpi_mpi_graph_topology_t;
  27
  28 typedef struct s_smpi_dist_graph_topology {
  29     int indegree;
  30     int *in;
  31     int *in_weights;
  32     int outdegree;
  33     int *out;
  34     int *out_weights;
  35     int is_weighted;
  36 } s_smpi_mpi_dist_graph_topology_t;
  37
  38 typedef struct s_smpi_mpi_topology {
  39     MPIR_Topo_type kind;
  40     union topo {
  41         MPIR_Graph_Topology graph;
  42         MPIR_Cart_Topology  cart;
  43         MPIR_Dist_Graph_Topology dist_graph;
  44     } topo;
  45 } s_smpi_mpi_topology_t;
  46
  47 void smpi_topo_destroy(MPI_Topology topo) {
  48     if(topo == nullptr) {
  49         return;
  50     }
  51     switch (topo->kind) {
  52     case MPI_GRAPH:
  53         // Not implemented
  54         // smpi_graph_topo_destroy(topo->topo.graph);
  55         break;
  56     case MPI_CART:
  57         smpi_cart_topo_destroy(topo->topo.cart);
  58         break;
  59     case MPI_DIST_GRAPH:
  60         // Not implemented
  61         // smpi_dist_graph_topo_destroy(topo->topo.dist_graph);
  62         break;
  63     default:
  64         return;
  65         break;
  66     }
  67     xbt_free(topo);
  68 }
  69
  70 MPI_Topology smpi_topo_create(MPIR_Topo_type kind) {
  71     MPI_Topology newTopo = static_cast<MPI_Topology>(xbt_malloc(sizeof(*newTopo)));
  72     newTopo->kind = kind;
  73     // Allocate and initialize the right topo should be done by the caller
  74     return newTopo;
  75 }
  76
  77 /*******************************************************************************
  78  * Cartesian topologies
  79  ******************************************************************************/
  80 void smpi_cart_topo_destroy(MPIR_Cart_Topology cart) {
  81     if (cart) {
  82         if(cart->dims) {
  83             xbt_free(cart->dims);
  84         }
  85         if(cart->periodic) {
  86             xbt_free(cart->periodic);
  87         }
  88         if(cart->position) {
  89             xbt_free(cart->position);
  90         }
  91         xbt_free(cart);
  92     }
  93 }
  94
  95 MPI_Topology smpi_cart_topo_create(int ndims) {
  96     MPI_Topology newTopo = smpi_topo_create(MPI_CART);
  97     MPIR_Cart_Topology newCart = static_cast<MPIR_Cart_Topology>(xbt_malloc(sizeof(*newCart)));
  98     newCart->nnodes = 0;
  99     newCart->ndims = ndims;
 100     newCart->dims = xbt_new(int, ndims);
 101     newCart->periodic = xbt_new(int, ndims);
 102     newCart->position = xbt_new(int, ndims);
 103     newTopo->topo.cart = newCart;
 104     return newTopo;
 105 }
 106
 107 /* reorder is ignored, don't know what would be the consequences of a dumb
 108  * reordering but neither do I see the point of reordering*/
 109 int smpi_mpi_cart_create(MPI_Comm comm_old, int ndims, int dims[], int periods[], int reorder, MPI_Comm *comm_cart) {
 110     int retval = MPI_SUCCESS;
 111     int i;
 112     MPI_Topology newCart;
 113     MPI_Group newGroup, oldGroup;
 114     int rank, nranks, newSize;
 115
 116     rank = smpi_comm_rank(comm_old);
 117
 118     newSize = 1;
 119     if(ndims != 0) {
 120         for (i = 0 ; i < ndims ; i++) {
 121             newSize *= dims[i];
 122         }
 123         if(rank >= newSize) {
 124             *comm_cart = MPI_COMM_NULL;
 125             return retval;
 126         }
 127         newCart = smpi_cart_topo_create(ndims);
 128         oldGroup = smpi_comm_group(comm_old);
 129         newGroup = smpi_group_new(newSize);
 130         for (i = 0 ; i < newSize ; i++) {
 131             smpi_group_set_mapping(newGroup, smpi_group_index(oldGroup, i), i);
 132         }
 133
 134         newCart->topo.cart->nnodes = newSize;
 135
 136         //  FIXME : code duplication... See smpi_mpi_cart_coords
 137         nranks = newSize;
 138         for (i=0; i<ndims; i++) {
 139             newCart->topo.cart->dims[i] = dims[i];
 140             newCart->topo.cart->periodic[i] = periods[i];
 141             nranks = nranks / dims[i];
 142             /* FIXME: nranks could be zero (?) */
 143             newCart->topo.cart->position[i] = rank / nranks;
 144             rank = rank % nranks;
 145         }
 146
 147         *comm_cart = smpi_comm_new(newGroup, newCart);
 148     } else {
 149         if (rank == 0) {
 150             newCart = smpi_cart_topo_create(ndims);
 151             *comm_cart = smpi_comm_new(smpi_group_copy(smpi_comm_group(MPI_COMM_SELF)), newCart);
 152         } else {
 153             *comm_cart = MPI_COMM_NULL;
 154         }
 155     }
 156     return retval;
 157 }
 158
 159 int smpi_mpi_cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *newcomm) {
 160     MPI_Topology oldTopo = smpi_comm_topo(comm);
 161     int oldNDims = oldTopo->topo.cart->ndims;
 162     int i, j = 0, newNDims, *newDims = nullptr, *newPeriodic = nullptr;
 163
 164     if (remain_dims == nullptr && oldNDims != 0) {
 165         return MPI_ERR_ARG;
 166     }
 167     newNDims = 0;
 168     for (i = 0 ; i < oldNDims ; i++) {
 169         if (remain_dims[i])
 170             newNDims++;
 171     }
 172
 173     if (newNDims > 0) {
 174         newDims = xbt_new(int, newNDims);
 175         newPeriodic = xbt_new(int, newNDims);
 176
 177         // that should not segfault
 178         for (i = 0 ; j < newNDims ; i++) {
 179             if(remain_dims[i]) {
 180                 newDims[j] = oldTopo->topo.cart->dims[i];
 181                 newPeriodic[j] = oldTopo->topo.cart->periodic[i];
 182                 j++;
 183             }
 184         }
 185     }
 186     return smpi_mpi_cart_create(comm, newNDims, newDims, newPeriodic, 0, newcomm);
 187 }
 188
 189 int smpi_mpi_cart_coords(MPI_Comm comm, int rank, int maxdims, int coords[]) {
 190     int nnodes;
 191
 192     MPI_Topology topo = smpi_comm_topo(comm);
 193
 194     nnodes = topo->topo.cart->nnodes;
 195     for (int i=0; i < topo->topo.cart->ndims; i++ ) {
 196         nnodes    = nnodes / topo->topo.cart->dims[i];
 197         coords[i] = rank / nnodes;
 198         rank      = rank % nnodes;
 199     }
 200     return MPI_SUCCESS;
 201 }
 202
 203 int smpi_mpi_cart_get(MPI_Comm comm, int maxdims, int* dims, int* periods, int* coords) {
 204     MPI_Topology topo = smpi_comm_topo(comm);
 205     int ndims=topo->topo.cart->ndims < maxdims ? topo->topo.cart->ndims : maxdims;
 206     for(int i = 0 ; i < ndims ; i++) {
 207         dims[i] = topo->topo.cart->dims[i];
 208         periods[i] = topo->topo.cart->periodic[i];
 209         coords[i] = topo->topo.cart->position[i];
 210     }
 211     return MPI_SUCCESS;
 212 }
 213
 214 int smpi_mpi_cart_rank(MPI_Comm comm, int* coords, int* rank) {
 215     MPI_Topology topo = smpi_comm_topo(comm);
 216     int ndims = topo->topo.cart->ndims;
 217     int multiplier, coord,i;
 218     *rank = 0;
 219     multiplier = 1;
 220
 221     for ( i=ndims-1; i >=0; i-- ) {
 222         coord = coords[i];
 223
 224         /* The user can give us whatever coordinates he wants. If one of them is out of range, either this dimension is
 225          * periodic, and we consider the equivalent coordinate inside the bounds, or it's not and then it's an error
 226          */
 227         if (coord >= topo->topo.cart->dims[i]) {
 228             if ( topo->topo.cart->periodic[i] ) {
 229                 coord = coord % topo->topo.cart->dims[i];
 230             } else {
 231                 // Should I do that ?
 232                 *rank = -1;
 233                 return MPI_ERR_ARG;
 234             }
 235         } else if (coord <  0) {
 236             if(topo->topo.cart->periodic[i]) {
 237                 coord = coord % topo->topo.cart->dims[i];
 238                 if (coord)
 239                     coord = topo->topo.cart->dims[i] + coord;
 240             } else {
 241                 *rank = -1;
 242                 return MPI_ERR_ARG;
 243             }
 244         }
 245
 246         *rank += multiplier * coord;
 247         multiplier *= topo->topo.cart->dims[i];
 248     }
 249     return MPI_SUCCESS;
 250 }
 251
 252 int smpi_mpi_cart_shift(MPI_Comm comm, int direction, int disp, int *rank_source, int *rank_dest) {
 253     MPI_Topology topo = smpi_comm_topo(comm);
 254     int position[topo->topo.cart->ndims];
 255
 256     if(topo->topo.cart->ndims == 0) {
 257         return MPI_ERR_ARG;
 258     }
 259     if (topo->topo.cart->ndims < direction) {
 260         return MPI_ERR_DIMS;
 261     }
 262
 263     smpi_mpi_cart_coords(comm, smpi_comm_rank(comm), topo->topo.cart->ndims, position);
 264     position[direction] += disp;
 265
 266     if(position[direction] < 0 ||
 267        position[direction] >= topo->topo.cart->dims[direction]) {
 268         if(topo->topo.cart->periodic[direction]) {
 269             position[direction] %= topo->topo.cart->dims[direction];
 270             smpi_mpi_cart_rank(comm, position, rank_dest);
 271         } else {
 272             *rank_dest = MPI_PROC_NULL;
 273         }
 274     } else {
 275         smpi_mpi_cart_rank(comm, position, rank_dest);
 276     }
 277
 278     position[direction] =  topo->topo.cart->position[direction] - disp;
 279     if(position[direction] < 0 || position[direction] >= topo->topo.cart->dims[direction]) {
 280         if(topo->topo.cart->periodic[direction]) {
 281             position[direction] %= topo->topo.cart->dims[direction];
 282             smpi_mpi_cart_rank(comm, position, rank_source);
 283         } else {
 284             *rank_source = MPI_PROC_NULL;
 285         }
 286     } else {
 287         smpi_mpi_cart_rank(comm, position, rank_source);
 288     }
 289
 290     return MPI_SUCCESS;
 291 }
 292
 293 int smpi_mpi_cartdim_get(MPI_Comm comm, int *ndims) {
 294     MPI_Topology topo = smpi_comm_topo(comm);
 295
 296     *ndims = topo->topo.cart->ndims;
 297     return MPI_SUCCESS;
 298 }
 299
 300 // Everything below has been taken from ompi, but could be easily rewritten.
 301
 302 /*
 303  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 304  *                         University Research and Technology
 305  *                         Corporation.  All rights reserved.
 306  * Copyright (c) 2004-2005 The University of Tennessee and The University
 307  *                         of Tennessee Research Foundation.  All rights
 308  *                         reserved.
 309  * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart,
 310  *                         University of Stuttgart.  All rights reserved.
 311  * Copyright (c) 2004-2005 The Regents of the University of California.
 312  *                         All rights reserved.
 313  * Copyright (c) 2012      Los Alamos National Security, LLC.  All rights
 314  *                         reserved.
 315  * Copyright (c) 2014      Intel, Inc. All rights reserved
 316  * $COPYRIGHT$
 317  *
 318  * Additional copyrights may follow
 319  *
 320  * $HEADER$
 321  */
 322
 323 /* static functions */
 324 static int assignnodes(int ndim, int nfactor, int *pfacts,int **pdims);
 325 static int getfactors(int num, int *nfators, int **factors);
 326
 327 /*
 328  * This is a utility function, no need to have anything in the lower
 329  * layer for this at all
 330  */
 331 int smpi_mpi_dims_create(int nnodes, int ndims, int dims[])
 332 {
 333     int i;
 334     int freeprocs;
 335     int freedims;
 336     int nfactors;
 337     int *factors;
 338     int *procs;
 339     int *p;
 340     int err;
 341
 342     /* Get # of free-to-be-assigned processes and # of free dimensions */
 343     freeprocs = nnodes;
 344     freedims = 0;
 345     for (i = 0, p = dims; i < ndims; ++i,++p) {
 346         if (*p == 0) {
 347             ++freedims;
 348         } else if ((*p < 0) || ((nnodes % *p) != 0)) {
 349             return  MPI_ERR_DIMS;
 350
 351         } else {
 352             freeprocs /= *p;
 353         }
 354     }
 355
 356     if (freedims == 0) {
 357         if (freeprocs == 1) {
 358             return MPI_SUCCESS;
 359         }
 360         return MPI_ERR_DIMS;
 361     }
 362
 363     if (freeprocs == 1) {
 364         for (i = 0; i < ndims; ++i, ++dims) {
 365             if (*dims == 0) {
 366                 *dims = 1;
 367             }
 368         }
 369         return MPI_SUCCESS;
 370     }
 371
 372     /* Factor the number of free processes */
 373     if (MPI_SUCCESS != (err = getfactors(freeprocs, &nfactors, &factors))) {
 374         return  err;
 375     }
 376
 377     /* Assign free processes to free dimensions */
 378     if (MPI_SUCCESS != (err = assignnodes(freedims, nfactors, factors, &procs))) {
 379         return err;
 380     }
 381
 382     /* Return assignment results */
 383     p = procs;
 384     for (i = 0; i < ndims; ++i, ++dims) {
 385         if (*dims == 0) {
 386             *dims = *p++;
 387         }
 388     }
 389
 390     free((char *) factors);
 391     free((char *) procs);
 392
 393     /* all done */
 394     return MPI_SUCCESS;
 395 }
 396
 397 /*
 398  *  assignnodes
 399  *
 400  *  Function:   - assign processes to dimensions
 401  *          - get "best-balanced" grid
 402  *          - greedy bin-packing algorithm used
 403  *          - sort dimensions in decreasing order
 404  *          - dimensions array dynamically allocated
 405  *  Accepts:    - # of dimensions
 406  *          - # of prime factors
 407  *          - array of prime factors
 408  *          - ptr to array of dimensions (returned value)
 409  *  Returns:    - 0 or ERROR
 410  */
 411 static int
 412 assignnodes(int ndim, int nfactor, int *pfacts, int **pdims)
 413 {
 414     int *bins;
 415     int i, j;
 416     int n;
 417     int f;
 418     int *p;
 419     int *pmin;
 420
 421     if (0 >= ndim) {
 422         return MPI_ERR_DIMS;
 423     }
 424
 425     /* Allocate and initialize the bins */
 426     bins = (int *) malloc((unsigned) ndim * sizeof(int));
 427     if (nullptr == bins) {
 428         return MPI_ERR_NO_MEM;
 429     }
 430     *pdims = bins;
 431
 432     for (i = 0, p = bins; i < ndim; ++i, ++p) {
 433         *p = 1;
 434     }
 435
 436     /* Loop assigning factors from the highest to the lowest */
 437     for (j = nfactor - 1; j >= 0; --j) {
 438         f = pfacts[j];
 439         /* Assign a factor to the smallest bin */
 440         pmin = bins;
 441         for (i = 1, p = pmin + 1; i < ndim; ++i, ++p) {
 442             if (*p < *pmin) {
 443                 pmin = p;
 444             }
 445         }
 446         *pmin *= f;
 447     }
 448
 449     /* Sort dimensions in decreasing order (O(n^2) for now) */
 450     for (i = 0, pmin = bins; i < ndim - 1; ++i, ++pmin) {
 451         for (j = i + 1, p = pmin + 1; j < ndim; ++j, ++p) {
 452             if (*p > *pmin) {
 453                 n = *p;
 454                 *p = *pmin;
 455                 *pmin = n;
 456             }
 457         }
 458     }
 459
 460     return MPI_SUCCESS;
 461 }
 462
 463 /*
 464  *  getfactors
 465  *
 466  *  Function:   - factorize a number
 467  *  Accepts:    - number
 468  *          - # prime factors
 469  *          - array of prime factors
 470  *  Returns:    - MPI_SUCCESS or ERROR
 471  */
 472 static int getfactors(int num, int *nfactors, int **factors) {
 473     int size;
 474     int d;
 475     int i;
 476     int sqrtnum;
 477
 478     if(num  < 2) {
 479         (*nfactors) = 0;
 480         (*factors) = nullptr;
 481         return MPI_SUCCESS;
 482     }
 483     /* Allocate the array of prime factors which cannot exceed log_2(num) entries */
 484     sqrtnum = ceil(sqrt(num));
 485     size = ceil(log(num) / log(2));
 486     *factors = (int *) malloc((unsigned) size * sizeof(int));
 487
 488     i = 0;
 489     /* determine all occurences of factor 2 */
 490     while((num % 2) == 0) {
 491         num /= 2;
 492         (*factors)[i++] = 2;
 493     }
 494     /* determine all occurences of uneven prime numbers up to sqrt(num) */
 495     for(d = 3; (num > 1) && (d < sqrtnum); d += 2) {
 496         while((num % d) == 0) {
 497             num /= d;
 498             (*factors)[i++] = d;
 499         }
 500     }
 501     /* as we looped only up to sqrt(num) one factor > sqrt(num) may be left over */
 502     if(num != 1) {
 503         (*factors)[i++] = num;
 504     }
 505     (*nfactors) = i;
 506     return MPI_SUCCESS;
 507 }