src/smpi/smpi_topo.cpp

   1 /* Copyright (c) 2014, 2017. The SimGrid Team.
   2  * All rights reserved.                                                     */
   3
   4 /* This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the license (GNU LGPL) which comes with this package. */
   6
   7 #include "xbt/sysdep.h"
   8 #include "smpi/smpi.h"
   9 #include "private.h"
  10 #include <vector>
  11 #include <math.h>
  12
  13 typedef struct s_smpi_mpi_cart_topology {
  14   int nnodes;
  15   int ndims;
  16   int *dims;
  17   int *periodic;
  18   int *position;
  19 } s_smpi_mpi_cart_topology_t;
  20
  21 typedef struct s_smpi_mpi_graph_topology {
  22   int nnodes;
  23   int nedges;
  24   int *index;
  25   int *edges;
  26 } s_smpi_mpi_graph_topology_t;
  27
  28 typedef struct s_smpi_dist_graph_topology {
  29   int indegree;
  30   int *in;
  31   int *in_weights;
  32   int outdegree;
  33   int *out;
  34   int *out_weights;
  35   int is_weighted;
  36 } s_smpi_mpi_dist_graph_topology_t;
  37
  38 typedef struct s_smpi_mpi_topology {
  39   MPIR_Topo_type kind;
  40   union topo {
  41     MPIR_Graph_Topology graph;
  42     MPIR_Cart_Topology  cart;
  43     MPIR_Dist_Graph_Topology dist_graph;
  44   } topo;
  45 } s_smpi_mpi_topology_t;
  46
  47 void smpi_topo_destroy(MPI_Topology topo) {
  48   if(topo == nullptr) {
  49     return;
  50   }
  51   switch (topo->kind) {
  52   case MPI_CART:
  53     smpi_cart_topo_destroy(topo->topo.cart);
  54     break;
  55   case MPI_GRAPH:
  56     // This topology is not supported by SMPI yet
  57     smpi_graph_topo_destroy(topo->topo.graph);
  58     break;
  59   case MPI_DIST_GRAPH:
  60     // This topology is not supported by SMPI yet
  61      smpi_dist_graph_topo_destroy(topo->topo.dist_graph);
  62     break;
  63   default:
  64     return;
  65   }
  66   xbt_free(topo);
  67 }
  68
  69 MPI_Topology smpi_topo_create(MPIR_Topo_type kind) {
  70   MPI_Topology newTopo = static_cast<MPI_Topology>(xbt_malloc(sizeof(*newTopo)));
  71   newTopo->kind = kind;
  72   // Allocate and initialize the right topo should be done by the caller
  73   return newTopo;
  74 }
  75
  76 void smpi_graph_topo_destroy(MPIR_Graph_Topology graph) {
  77   if (graph) {
  78     delete[] graph->index;
  79     delete[] graph->edges;
  80     xbt_free(graph);
  81   }
  82 }
  83
  84 void smpi_dist_graph_topo_destroy(MPIR_Dist_Graph_Topology dist_graph) {
  85   if (dist_graph) {
  86     delete[] dist_graph->in;
  87     delete[] dist_graph->in_weights;
  88     delete[] dist_graph->out;
  89     delete[] dist_graph->out_weights;
  90     xbt_free(dist_graph);
  91   }
  92 }
  93
  94 /*******************************************************************************
  95  * Cartesian topologies
  96  ******************************************************************************/
  97 void smpi_cart_topo_destroy(MPIR_Cart_Topology cart) {
  98   if (cart) {
  99     delete[] cart->dims;
 100     delete[] cart->periodic;
 101     delete[] cart->position;
 102     xbt_free(cart);
 103   }
 104 }
 105
 106 MPI_Topology smpi_cart_topo_create(int ndims) {
 107     MPI_Topology newTopo = smpi_topo_create(MPI_CART);
 108     MPIR_Cart_Topology newCart = static_cast<MPIR_Cart_Topology>(xbt_malloc(sizeof(*newCart)));
 109     newCart->nnodes = 0;
 110     newCart->ndims = ndims;
 111     newCart->dims = new int[ndims];
 112     newCart->periodic = new int[ndims];
 113     newCart->position = new int[ndims];
 114     newTopo->topo.cart = newCart;
 115     return newTopo;
 116 }
 117
 118 /* reorder is ignored, don't know what would be the consequences of a dumb reordering but neither do I see the point of
 119  * reordering*/
 120 int smpi_mpi_cart_create(MPI_Comm comm_old, int ndims, int dims[], int periods[], int reorder, MPI_Comm *comm_cart) {
 121   int retval = MPI_SUCCESS;
 122   MPI_Topology newCart;
 123   MPI_Group newGroup;
 124   MPI_Group oldGroup;
 125   int nranks;
 126
 127   int rank = smpi_comm_rank(comm_old);
 128
 129   int newSize = 1;
 130   if(ndims != 0) {
 131     for (int i = 0 ; i < ndims ; i++) {
 132       newSize *= dims[i];
 133     }
 134     if(rank >= newSize) {
 135       *comm_cart = MPI_COMM_NULL;
 136       return retval;
 137     }
 138     newCart = smpi_cart_topo_create(ndims);
 139     oldGroup = smpi_comm_group(comm_old);
 140     newGroup = smpi_group_new(newSize);
 141     for (int i = 0 ; i < newSize ; i++) {
 142       smpi_group_set_mapping(newGroup, smpi_group_index(oldGroup, i), i);
 143     }
 144
 145     newCart->topo.cart->nnodes = newSize;
 146
 147     //  FIXME : code duplication... See smpi_mpi_cart_coords
 148     nranks = newSize;
 149     for (int i=0; i<ndims; i++) {
 150       newCart->topo.cart->dims[i] = dims[i];
 151       newCart->topo.cart->periodic[i] = periods[i];
 152       nranks = nranks / dims[i];
 153       /* FIXME: nranks could be zero (?) */
 154       newCart->topo.cart->position[i] = rank / nranks;
 155       rank = rank % nranks;
 156     }
 157
 158     *comm_cart = smpi_comm_new(newGroup, newCart);
 159   } else {
 160     if (rank == 0) {
 161       newCart = smpi_cart_topo_create(ndims);
 162       *comm_cart = smpi_comm_new(smpi_group_copy(smpi_comm_group(MPI_COMM_SELF)), newCart);
 163     } else {
 164       *comm_cart = MPI_COMM_NULL;
 165     }
 166   }
 167   return retval;
 168 }
 169
 170 int smpi_mpi_cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *newcomm) {
 171   MPI_Topology oldTopo = smpi_comm_topo(comm);
 172   int oldNDims = oldTopo->topo.cart->ndims;
 173   int j = 0;
 174   int *newDims = nullptr;
 175   int *newPeriodic = nullptr;
 176
 177   if (remain_dims == nullptr && oldNDims != 0) {
 178     return MPI_ERR_ARG;
 179   }
 180   int newNDims = 0;
 181   for (int i = 0 ; i < oldNDims ; i++) {
 182     if (remain_dims[i])
 183       newNDims++;
 184   }
 185
 186   if (newNDims > 0) {
 187     newDims = xbt_new(int, newNDims);
 188     newPeriodic = xbt_new(int, newNDims);
 189
 190     // that should not segfault
 191     for (int i = 0 ; j < newNDims ; i++) {
 192       if(remain_dims[i]) {
 193         newDims[j] = oldTopo->topo.cart->dims[i];
 194         newPeriodic[j] = oldTopo->topo.cart->periodic[i];
 195         j++;
 196       }
 197     }
 198   }
 199   return smpi_mpi_cart_create(comm, newNDims, newDims, newPeriodic, 0, newcomm);
 200 }
 201
 202 int smpi_mpi_cart_coords(MPI_Comm comm, int rank, int maxdims, int coords[]) {
 203   MPI_Topology topo = smpi_comm_topo(comm);
 204   int nnodes = topo->topo.cart->nnodes;
 205   for (int i = 0; i< topo->topo.cart->ndims; i++ ) {
 206     nnodes    = nnodes / topo->topo.cart->dims[i];
 207     coords[i] = rank / nnodes;
 208     rank      = rank % nnodes;
 209   }
 210   return MPI_SUCCESS;
 211 }
 212
 213 int smpi_mpi_cart_get(MPI_Comm comm, int maxdims, int* dims, int* periods, int* coords) {
 214   MPI_Topology topo = smpi_comm_topo(comm);
 215   int ndims=topo->topo.cart->ndims < maxdims ? topo->topo.cart->ndims : maxdims;
 216   for(int i = 0 ; i < ndims ; i++) {
 217     dims[i] = topo->topo.cart->dims[i];
 218     periods[i] = topo->topo.cart->periodic[i];
 219     coords[i] = topo->topo.cart->position[i];
 220   }
 221   return MPI_SUCCESS;
 222 }
 223
 224 int smpi_mpi_cart_rank(MPI_Comm comm, int* coords, int* rank) {
 225   MPI_Topology topo = smpi_comm_topo(comm);
 226   int ndims = topo->topo.cart->ndims;
 227   int coord;
 228   *rank = 0;
 229   int multiplier = 1;
 230
 231   for (int i=ndims-1; i >=0; i-- ) {
 232     coord = coords[i];
 233
 234     /* The user can give us whatever coordinates he wants. If one of them is out of range, either this dimension is
 235      * periodic, and we consider the equivalent coordinate inside the bounds, or it's not and then it's an error
 236      */
 237     if (coord >= topo->topo.cart->dims[i]) {
 238       if ( topo->topo.cart->periodic[i] ) {
 239         coord = coord % topo->topo.cart->dims[i];
 240       } else {
 241         // Should I do that ?
 242         *rank = -1;
 243         return MPI_ERR_ARG;
 244       }
 245     } else if (coord <  0) {
 246       if(topo->topo.cart->periodic[i]) {
 247         coord = coord % topo->topo.cart->dims[i];
 248         if (coord)
 249           coord = topo->topo.cart->dims[i] + coord;
 250       } else {
 251         *rank = -1;
 252         return MPI_ERR_ARG;
 253       }
 254     }
 255
 256     *rank += multiplier * coord;
 257     multiplier *= topo->topo.cart->dims[i];
 258   }
 259   return MPI_SUCCESS;
 260 }
 261
 262 int smpi_mpi_cart_shift(MPI_Comm comm, int direction, int disp, int *rank_source, int *rank_dest) {
 263   MPI_Topology topo = smpi_comm_topo(comm);
 264   int position[topo->topo.cart->ndims];
 265
 266   if(topo->topo.cart->ndims == 0) {
 267     return MPI_ERR_ARG;
 268   }
 269   if (topo->topo.cart->ndims < direction) {
 270     return MPI_ERR_DIMS;
 271   }
 272
 273   smpi_mpi_cart_coords(comm, smpi_comm_rank(comm), topo->topo.cart->ndims, position);
 274   position[direction] += disp;
 275
 276   if(position[direction] < 0 ||
 277       position[direction] >= topo->topo.cart->dims[direction]) {
 278     if(topo->topo.cart->periodic[direction]) {
 279       position[direction] %= topo->topo.cart->dims[direction];
 280       smpi_mpi_cart_rank(comm, position, rank_dest);
 281     } else {
 282       *rank_dest = MPI_PROC_NULL;
 283     }
 284   } else {
 285     smpi_mpi_cart_rank(comm, position, rank_dest);
 286   }
 287
 288   position[direction] =  topo->topo.cart->position[direction] - disp;
 289   if(position[direction] < 0 || position[direction] >= topo->topo.cart->dims[direction]) {
 290     if(topo->topo.cart->periodic[direction]) {
 291       position[direction] %= topo->topo.cart->dims[direction];
 292       smpi_mpi_cart_rank(comm, position, rank_source);
 293     } else {
 294       *rank_source = MPI_PROC_NULL;
 295     }
 296   } else {
 297     smpi_mpi_cart_rank(comm, position, rank_source);
 298   }
 299
 300   return MPI_SUCCESS;
 301 }
 302
 303 int smpi_mpi_cartdim_get(MPI_Comm comm, int *ndims) {
 304   MPI_Topology topo = smpi_comm_topo(comm);
 305
 306   *ndims = topo->topo.cart->ndims;
 307   return MPI_SUCCESS;
 308 }
 309
 310 // Everything below has been taken from ompi, but could be easily rewritten (and partially was to follow sonar rules).
 311
 312 /*
 313  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 314  *                         University Research and Technology
 315  *                         Corporation.  All rights reserved.
 316  * Copyright (c) 2004-2005 The University of Tennessee and The University
 317  *                         of Tennessee Research Foundation.  All rights
 318  *                         reserved.
 319  * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart,
 320  *                         University of Stuttgart.  All rights reserved.
 321  * Copyright (c) 2004-2005 The Regents of the University of California.
 322  *                         All rights reserved.
 323  * Copyright (c) 2012      Los Alamos National Security, LLC.  All rights
 324  *                         reserved.
 325  * Copyright (c) 2014      Intel, Inc. All rights reserved
 326  * $COPYRIGHT$
 327  *
 328  * Additional copyrights may follow
 329  *
 330  * $HEADER$
 331  */
 332
 333 /* static functions */
 334 static int assignnodes(int ndim, int nfactor, int *pfacts,int **pdims);
 335 static int getfactors(int num, int *nfators, int **factors);
 336
 337 /*
 338  * This is a utility function, no need to have anything in the lower layer for this at all
 339  */
 340 int smpi_mpi_dims_create(int nnodes, int ndims, int dims[])
 341 {
 342   /* Get # of free-to-be-assigned processes and # of free dimensions */
 343   int freeprocs = nnodes;
 344   int freedims = 0;
 345   int *p = dims;
 346   for (int i = 0; i < ndims; ++i) {
 347     if (*p == 0) {
 348       ++freedims;
 349     } else if ((*p < 0) || ((nnodes % *p) != 0)) {
 350       return  MPI_ERR_DIMS;
 351
 352     } else {
 353       freeprocs /= *p;
 354     }
 355     p++;
 356   }
 357
 358   if (freedims == 0) {
 359     if (freeprocs == 1) {
 360       return MPI_SUCCESS;
 361     }
 362     return MPI_ERR_DIMS;
 363   }
 364
 365   if (freeprocs == 1) {
 366     for (int i = 0; i < ndims; ++i) {
 367       if (*dims == 0) {
 368         *dims = 1;
 369       }
 370       dims++;
 371     }
 372     return MPI_SUCCESS;
 373   }
 374
 375   /* Factor the number of free processes */
 376   int nfactors;
 377   int *factors;
 378   int err = getfactors(freeprocs, &nfactors, &factors);
 379   if (MPI_SUCCESS != err)
 380     return  err;
 381
 382   /* Assign free processes to free dimensions */
 383   int *procs;
 384   err = assignnodes(freedims, nfactors, factors, &procs);
 385   if (MPI_SUCCESS != err)
 386     return err;
 387
 388   /* Return assignment results */
 389   p = procs;
 390   for (int i = 0; i < ndims; ++i) {
 391     if (*dims == 0) {
 392       *dims = *p++;
 393     }
 394     dims++;
 395   }
 396
 397   delete[] factors;
 398   delete[] procs;
 399
 400   /* all done */
 401   return MPI_SUCCESS;
 402 }
 403
 404 /*
 405  *  assignnodes
 406  *
 407  *  Function:   - assign processes to dimensions
 408  *          - get "best-balanced" grid
 409  *          - greedy bin-packing algorithm used
 410  *          - sort dimensions in decreasing order
 411  *          - dimensions array dynamically allocated
 412  *  Accepts:    - # of dimensions
 413  *          - # of prime factors
 414  *          - array of prime factors
 415  *          - ptr to array of dimensions (returned value)
 416  *  Returns:    - 0 or ERROR
 417  */
 418 static int assignnodes(int ndim, int nfactor, int *pfacts, int **pdims)
 419 {
 420   int *pmin;
 421
 422   if (0 >= ndim) {
 423     return MPI_ERR_DIMS;
 424   }
 425
 426   /* Allocate and initialize the bins */
 427   int *bins = new int[ndim];
 428
 429   *pdims = bins;
 430   int *p = bins;
 431
 432   for (int i = 0 ; i < ndim; ++i) {
 433     *p = 1;
 434     p++;
 435   }
 436   /* Loop assigning factors from the highest to the lowest */
 437   for (int j = nfactor - 1; j >= 0; --j) {
 438     int f = pfacts[j];
 439     /* Assign a factor to the smallest bin */
 440     pmin = bins;
 441     p = pmin + 1;
 442     for (int i = 1; i < ndim; ++i) {
 443       if (*p < *pmin) {
 444         pmin = p;
 445       }
 446       p++;
 447     }
 448     *pmin *= f;
 449   }
 450
 451   /* Sort dimensions in decreasing order (O(n^2) for now) */
 452   pmin = bins;
 453   for (int i = 0; i < ndim - 1; ++i) {
 454     p = pmin + 1;
 455     for (int j = i + 1; j < ndim; ++j) {
 456       if (*p > *pmin) {
 457         int n = *p;
 458         *p = *pmin;
 459         *pmin = n;
 460       }
 461       p++;
 462     }
 463     pmin++;
 464   }
 465
 466   return MPI_SUCCESS;
 467 }
 468
 469 /*
 470  *  getfactors
 471  *
 472  *  Function:   - factorize a number
 473  *  Accepts:    - number
 474  *          - # prime factors
 475  *          - array of prime factors
 476  *  Returns:    - MPI_SUCCESS or ERROR
 477  */
 478 static int getfactors(int num, int *nfactors, int **factors) {
 479   if(num  < 2) {
 480     (*nfactors) = 0;
 481     (*factors) = nullptr;
 482     return MPI_SUCCESS;
 483   }
 484   /* Allocate the array of prime factors which cannot exceed log_2(num) entries */
 485   int sqrtnum = ceil(sqrt(num));
 486   int size = ceil(log(num) / log(2));
 487   *factors = new int[size];
 488
 489   int i = 0;
 490   /* determine all occurrences of factor 2 */
 491   while((num % 2) == 0) {
 492     num /= 2;
 493     (*factors)[i++] = 2;
 494   }
 495   /* determine all occurrences of uneven prime numbers up to sqrt(num) */
 496   for(int d = 3; (num > 1) && (d < sqrtnum); d += 2) {
 497     while((num % d) == 0) {
 498       num /= d;
 499       (*factors)[i++] = d;
 500     }
 501   }
 502   /* as we looped only up to sqrt(num) one factor > sqrt(num) may be left over */
 503   if(num != 1) {
 504     (*factors)[i++] = num;
 505   }
 506   (*nfactors) = i;
 507   return MPI_SUCCESS;
 508 }