src/smpi/smpi_topo.cpp

   1 /* Copyright (c) 2014, 2017. The SimGrid Team.
   2  * All rights reserved.                                                     */
   3
   4 /* This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the license (GNU LGPL) which comes with this package. */
   6
   7 #include "xbt/sysdep.h"
   8 #include "smpi/smpi.h"
   9 #include "src/smpi/smpi_group.hpp"
  10 #include "private.h"
  11 #include <vector>
  12 #include <math.h>
  13
  14 typedef struct s_smpi_mpi_cart_topology {
  15   int nnodes;
  16   int ndims;
  17   int *dims;
  18   int *periodic;
  19   int *position;
  20 } s_smpi_mpi_cart_topology_t;
  21
  22 typedef struct s_smpi_mpi_graph_topology {
  23   int nnodes;
  24   int nedges;
  25   int *index;
  26   int *edges;
  27 } s_smpi_mpi_graph_topology_t;
  28
  29 typedef struct s_smpi_dist_graph_topology {
  30   int indegree;
  31   int *in;
  32   int *in_weights;
  33   int outdegree;
  34   int *out;
  35   int *out_weights;
  36   int is_weighted;
  37 } s_smpi_mpi_dist_graph_topology_t;
  38
  39 typedef struct s_smpi_mpi_topology {
  40   MPIR_Topo_type kind;
  41   union topo {
  42     MPIR_Graph_Topology graph;
  43     MPIR_Cart_Topology  cart;
  44     MPIR_Dist_Graph_Topology dist_graph;
  45   } topo;
  46 } s_smpi_mpi_topology_t;
  47
  48 void smpi_topo_destroy(MPI_Topology topo) {
  49   if(topo == nullptr) {
  50     return;
  51   }
  52   switch (topo->kind) {
  53   case MPI_CART:
  54     smpi_cart_topo_destroy(topo->topo.cart);
  55     break;
  56   case MPI_GRAPH:
  57     // This topology is not supported by SMPI yet
  58     smpi_graph_topo_destroy(topo->topo.graph);
  59     break;
  60   case MPI_DIST_GRAPH:
  61     // This topology is not supported by SMPI yet
  62      smpi_dist_graph_topo_destroy(topo->topo.dist_graph);
  63     break;
  64   default:
  65     return;
  66   }
  67   xbt_free(topo);
  68 }
  69
  70 MPI_Topology smpi_topo_create(MPIR_Topo_type kind) {
  71   MPI_Topology newTopo = static_cast<MPI_Topology>(xbt_malloc(sizeof(*newTopo)));
  72   newTopo->kind = kind;
  73   // Allocate and initialize the right topo should be done by the caller
  74   return newTopo;
  75 }
  76
  77 void smpi_graph_topo_destroy(MPIR_Graph_Topology graph) {
  78   if (graph) {
  79     delete[] graph->index;
  80     delete[] graph->edges;
  81     xbt_free(graph);
  82   }
  83 }
  84
  85 void smpi_dist_graph_topo_destroy(MPIR_Dist_Graph_Topology dist_graph) {
  86   if (dist_graph) {
  87     delete[] dist_graph->in;
  88     delete[] dist_graph->in_weights;
  89     delete[] dist_graph->out;
  90     delete[] dist_graph->out_weights;
  91     xbt_free(dist_graph);
  92   }
  93 }
  94
  95 /*******************************************************************************
  96  * Cartesian topologies
  97  ******************************************************************************/
  98 void smpi_cart_topo_destroy(MPIR_Cart_Topology cart) {
  99   if (cart) {
 100     delete[] cart->dims;
 101     delete[] cart->periodic;
 102     delete[] cart->position;
 103     xbt_free(cart);
 104   }
 105 }
 106
 107 MPI_Topology smpi_cart_topo_create(int ndims) {
 108     MPI_Topology newTopo = smpi_topo_create(MPI_CART);
 109     MPIR_Cart_Topology newCart = static_cast<MPIR_Cart_Topology>(xbt_malloc(sizeof(*newCart)));
 110     newCart->nnodes = 0;
 111     newCart->ndims = ndims;
 112     newCart->dims = new int[ndims];
 113     newCart->periodic = new int[ndims];
 114     newCart->position = new int[ndims];
 115     newTopo->topo.cart = newCart;
 116     return newTopo;
 117 }
 118
 119 /* reorder is ignored, don't know what would be the consequences of a dumb reordering but neither do I see the point of
 120  * reordering*/
 121 int smpi_mpi_cart_create(MPI_Comm comm_old, int ndims, int dims[], int periods[], int reorder, MPI_Comm *comm_cart) {
 122   int retval = MPI_SUCCESS;
 123   MPI_Topology newCart;
 124   MPI_Group newGroup;
 125   MPI_Group oldGroup;
 126   int nranks;
 127
 128   int rank = smpi_comm_rank(comm_old);
 129
 130   int newSize = 1;
 131   if(ndims != 0) {
 132     for (int i = 0 ; i < ndims ; i++) {
 133       newSize *= dims[i];
 134     }
 135     if(rank >= newSize) {
 136       *comm_cart = MPI_COMM_NULL;
 137       return retval;
 138     }
 139     newCart = smpi_cart_topo_create(ndims);
 140     oldGroup = smpi_comm_group(comm_old);
 141     newGroup = new simgrid::SMPI::Group(newSize);
 142     for (int i = 0 ; i < newSize ; i++) {
 143       newGroup->set_mapping(oldGroup->index(i), i);
 144     }
 145
 146     newCart->topo.cart->nnodes = newSize;
 147
 148     //  FIXME : code duplication... See smpi_mpi_cart_coords
 149     nranks = newSize;
 150     for (int i=0; i<ndims; i++) {
 151       newCart->topo.cart->dims[i] = dims[i];
 152       newCart->topo.cart->periodic[i] = periods[i];
 153       nranks = nranks / dims[i];
 154       /* FIXME: nranks could be zero (?) */
 155       newCart->topo.cart->position[i] = rank / nranks;
 156       rank = rank % nranks;
 157     }
 158
 159     *comm_cart = smpi_comm_new(newGroup, newCart);
 160   } else {
 161     if (rank == 0) {
 162       newCart = smpi_cart_topo_create(ndims);
 163       *comm_cart = smpi_comm_new(new simgrid::SMPI::Group(smpi_comm_group(MPI_COMM_SELF)), newCart);
 164     } else {
 165       *comm_cart = MPI_COMM_NULL;
 166     }
 167   }
 168   return retval;
 169 }
 170
 171 int smpi_mpi_cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *newcomm) {
 172   MPI_Topology oldTopo = smpi_comm_topo(comm);
 173   int oldNDims = oldTopo->topo.cart->ndims;
 174   int j = 0;
 175   int *newDims = nullptr;
 176   int *newPeriodic = nullptr;
 177
 178   if (remain_dims == nullptr && oldNDims != 0) {
 179     return MPI_ERR_ARG;
 180   }
 181   int newNDims = 0;
 182   for (int i = 0 ; i < oldNDims ; i++) {
 183     if (remain_dims[i])
 184       newNDims++;
 185   }
 186
 187   if (newNDims > 0) {
 188     newDims = xbt_new(int, newNDims);
 189     newPeriodic = xbt_new(int, newNDims);
 190
 191     // that should not segfault
 192     for (int i = 0 ; j < newNDims ; i++) {
 193       if(remain_dims[i]) {
 194         newDims[j] = oldTopo->topo.cart->dims[i];
 195         newPeriodic[j] = oldTopo->topo.cart->periodic[i];
 196         j++;
 197       }
 198     }
 199   }
 200   return smpi_mpi_cart_create(comm, newNDims, newDims, newPeriodic, 0, newcomm);
 201 }
 202
 203 int smpi_mpi_cart_coords(MPI_Comm comm, int rank, int maxdims, int coords[]) {
 204   MPI_Topology topo = smpi_comm_topo(comm);
 205   int nnodes = topo->topo.cart->nnodes;
 206   for (int i = 0; i< topo->topo.cart->ndims; i++ ) {
 207     nnodes    = nnodes / topo->topo.cart->dims[i];
 208     coords[i] = rank / nnodes;
 209     rank      = rank % nnodes;
 210   }
 211   return MPI_SUCCESS;
 212 }
 213
 214 int smpi_mpi_cart_get(MPI_Comm comm, int maxdims, int* dims, int* periods, int* coords) {
 215   MPI_Topology topo = smpi_comm_topo(comm);
 216   int ndims=topo->topo.cart->ndims < maxdims ? topo->topo.cart->ndims : maxdims;
 217   for(int i = 0 ; i < ndims ; i++) {
 218     dims[i] = topo->topo.cart->dims[i];
 219     periods[i] = topo->topo.cart->periodic[i];
 220     coords[i] = topo->topo.cart->position[i];
 221   }
 222   return MPI_SUCCESS;
 223 }
 224
 225 int smpi_mpi_cart_rank(MPI_Comm comm, int* coords, int* rank) {
 226   MPI_Topology topo = smpi_comm_topo(comm);
 227   int ndims = topo->topo.cart->ndims;
 228   int coord;
 229   *rank = 0;
 230   int multiplier = 1;
 231
 232   for (int i=ndims-1; i >=0; i-- ) {
 233     coord = coords[i];
 234
 235     /* The user can give us whatever coordinates he wants. If one of them is out of range, either this dimension is
 236      * periodic, and we consider the equivalent coordinate inside the bounds, or it's not and then it's an error
 237      */
 238     if (coord >= topo->topo.cart->dims[i]) {
 239       if ( topo->topo.cart->periodic[i] ) {
 240         coord = coord % topo->topo.cart->dims[i];
 241       } else {
 242         // Should I do that ?
 243         *rank = -1;
 244         return MPI_ERR_ARG;
 245       }
 246     } else if (coord <  0) {
 247       if(topo->topo.cart->periodic[i]) {
 248         coord = coord % topo->topo.cart->dims[i];
 249         if (coord)
 250           coord = topo->topo.cart->dims[i] + coord;
 251       } else {
 252         *rank = -1;
 253         return MPI_ERR_ARG;
 254       }
 255     }
 256
 257     *rank += multiplier * coord;
 258     multiplier *= topo->topo.cart->dims[i];
 259   }
 260   return MPI_SUCCESS;
 261 }
 262
 263 int smpi_mpi_cart_shift(MPI_Comm comm, int direction, int disp, int *rank_source, int *rank_dest) {
 264   MPI_Topology topo = smpi_comm_topo(comm);
 265   int position[topo->topo.cart->ndims];
 266
 267   if(topo->topo.cart->ndims == 0) {
 268     return MPI_ERR_ARG;
 269   }
 270   if (topo->topo.cart->ndims < direction) {
 271     return MPI_ERR_DIMS;
 272   }
 273
 274   smpi_mpi_cart_coords(comm, smpi_comm_rank(comm), topo->topo.cart->ndims, position);
 275   position[direction] += disp;
 276
 277   if(position[direction] < 0 ||
 278       position[direction] >= topo->topo.cart->dims[direction]) {
 279     if(topo->topo.cart->periodic[direction]) {
 280       position[direction] %= topo->topo.cart->dims[direction];
 281       smpi_mpi_cart_rank(comm, position, rank_dest);
 282     } else {
 283       *rank_dest = MPI_PROC_NULL;
 284     }
 285   } else {
 286     smpi_mpi_cart_rank(comm, position, rank_dest);
 287   }
 288
 289   position[direction] =  topo->topo.cart->position[direction] - disp;
 290   if(position[direction] < 0 || position[direction] >= topo->topo.cart->dims[direction]) {
 291     if(topo->topo.cart->periodic[direction]) {
 292       position[direction] %= topo->topo.cart->dims[direction];
 293       smpi_mpi_cart_rank(comm, position, rank_source);
 294     } else {
 295       *rank_source = MPI_PROC_NULL;
 296     }
 297   } else {
 298     smpi_mpi_cart_rank(comm, position, rank_source);
 299   }
 300
 301   return MPI_SUCCESS;
 302 }
 303
 304 int smpi_mpi_cartdim_get(MPI_Comm comm, int *ndims) {
 305   MPI_Topology topo = smpi_comm_topo(comm);
 306
 307   *ndims = topo->topo.cart->ndims;
 308   return MPI_SUCCESS;
 309 }
 310
 311 // Everything below has been taken from ompi, but could be easily rewritten (and partially was to follow sonar rules).
 312
 313 /*
 314  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 315  *                         University Research and Technology
 316  *                         Corporation.  All rights reserved.
 317  * Copyright (c) 2004-2005 The University of Tennessee and The University
 318  *                         of Tennessee Research Foundation.  All rights
 319  *                         reserved.
 320  * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart,
 321  *                         University of Stuttgart.  All rights reserved.
 322  * Copyright (c) 2004-2005 The Regents of the University of California.
 323  *                         All rights reserved.
 324  * Copyright (c) 2012      Los Alamos National Security, LLC.  All rights
 325  *                         reserved.
 326  * Copyright (c) 2014      Intel, Inc. All rights reserved
 327  * $COPYRIGHT$
 328  *
 329  * Additional copyrights may follow
 330  *
 331  * $HEADER$
 332  */
 333
 334 /* static functions */
 335 static int assignnodes(int ndim, int nfactor, int *pfacts,int **pdims);
 336 static int getfactors(int num, int *nfators, int **factors);
 337
 338 /*
 339  * This is a utility function, no need to have anything in the lower layer for this at all
 340  */
 341 int smpi_mpi_dims_create(int nnodes, int ndims, int dims[])
 342 {
 343   /* Get # of free-to-be-assigned processes and # of free dimensions */
 344   int freeprocs = nnodes;
 345   int freedims = 0;
 346   int *p = dims;
 347   for (int i = 0; i < ndims; ++i) {
 348     if (*p == 0) {
 349       ++freedims;
 350     } else if ((*p < 0) || ((nnodes % *p) != 0)) {
 351       return  MPI_ERR_DIMS;
 352
 353     } else {
 354       freeprocs /= *p;
 355     }
 356     p++;
 357   }
 358
 359   if (freedims == 0) {
 360     if (freeprocs == 1) {
 361       return MPI_SUCCESS;
 362     }
 363     return MPI_ERR_DIMS;
 364   }
 365
 366   if (freeprocs == 1) {
 367     for (int i = 0; i < ndims; ++i) {
 368       if (*dims == 0) {
 369         *dims = 1;
 370       }
 371       dims++;
 372     }
 373     return MPI_SUCCESS;
 374   }
 375
 376   /* Factor the number of free processes */
 377   int nfactors;
 378   int *factors;
 379   int err = getfactors(freeprocs, &nfactors, &factors);
 380   if (MPI_SUCCESS != err)
 381     return  err;
 382
 383   /* Assign free processes to free dimensions */
 384   int *procs;
 385   err = assignnodes(freedims, nfactors, factors, &procs);
 386   if (MPI_SUCCESS != err)
 387     return err;
 388
 389   /* Return assignment results */
 390   p = procs;
 391   for (int i = 0; i < ndims; ++i) {
 392     if (*dims == 0) {
 393       *dims = *p++;
 394     }
 395     dims++;
 396   }
 397
 398   delete[] factors;
 399   delete[] procs;
 400
 401   /* all done */
 402   return MPI_SUCCESS;
 403 }
 404
 405 /*
 406  *  assignnodes
 407  *
 408  *  Function:   - assign processes to dimensions
 409  *          - get "best-balanced" grid
 410  *          - greedy bin-packing algorithm used
 411  *          - sort dimensions in decreasing order
 412  *          - dimensions array dynamically allocated
 413  *  Accepts:    - # of dimensions
 414  *          - # of prime factors
 415  *          - array of prime factors
 416  *          - ptr to array of dimensions (returned value)
 417  *  Returns:    - 0 or ERROR
 418  */
 419 static int assignnodes(int ndim, int nfactor, int *pfacts, int **pdims)
 420 {
 421   int *pmin;
 422
 423   if (0 >= ndim) {
 424     return MPI_ERR_DIMS;
 425   }
 426
 427   /* Allocate and initialize the bins */
 428   int *bins = new int[ndim];
 429
 430   *pdims = bins;
 431   int *p = bins;
 432
 433   for (int i = 0 ; i < ndim; ++i) {
 434     *p = 1;
 435     p++;
 436   }
 437   /* Loop assigning factors from the highest to the lowest */
 438   for (int j = nfactor - 1; j >= 0; --j) {
 439     int f = pfacts[j];
 440     /* Assign a factor to the smallest bin */
 441     pmin = bins;
 442     p = pmin + 1;
 443     for (int i = 1; i < ndim; ++i) {
 444       if (*p < *pmin) {
 445         pmin = p;
 446       }
 447       p++;
 448     }
 449     *pmin *= f;
 450   }
 451
 452   /* Sort dimensions in decreasing order (O(n^2) for now) */
 453   pmin = bins;
 454   for (int i = 0; i < ndim - 1; ++i) {
 455     p = pmin + 1;
 456     for (int j = i + 1; j < ndim; ++j) {
 457       if (*p > *pmin) {
 458         int n = *p;
 459         *p = *pmin;
 460         *pmin = n;
 461       }
 462       p++;
 463     }
 464     pmin++;
 465   }
 466
 467   return MPI_SUCCESS;
 468 }
 469
 470 /*
 471  *  getfactors
 472  *
 473  *  Function:   - factorize a number
 474  *  Accepts:    - number
 475  *          - # prime factors
 476  *          - array of prime factors
 477  *  Returns:    - MPI_SUCCESS or ERROR
 478  */
 479 static int getfactors(int num, int *nfactors, int **factors) {
 480   if(num  < 2) {
 481     (*nfactors) = 0;
 482     (*factors) = nullptr;
 483     return MPI_SUCCESS;
 484   }
 485   /* Allocate the array of prime factors which cannot exceed log_2(num) entries */
 486   int sqrtnum = ceil(sqrt(num));
 487   int size = ceil(log(num) / log(2));
 488   *factors = new int[size];
 489
 490   int i = 0;
 491   /* determine all occurrences of factor 2 */
 492   while((num % 2) == 0) {
 493     num /= 2;
 494     (*factors)[i++] = 2;
 495   }
 496   /* determine all occurrences of uneven prime numbers up to sqrt(num) */
 497   for(int d = 3; (num > 1) && (d < sqrtnum); d += 2) {
 498     while((num % d) == 0) {
 499       num /= d;
 500       (*factors)[i++] = d;
 501     }
 502   }
 503   /* as we looped only up to sqrt(num) one factor > sqrt(num) may be left over */
 504   if(num != 1) {
 505     (*factors)[i++] = num;
 506   }
 507   (*nfactors) = i;
 508   return MPI_SUCCESS;
 509 }