src/smpi/smpi_topo.c

   1 /* Copyright (c) 2014. The SimGrid Team.
   2  * All rights reserved.                                                     */
   3
   4 /* This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the license (GNU LGPL) which comes with this package. */
   6
   7
   8 #include "xbt/sysdep.h"
   9 #include "smpi/smpi.h"
  10 #include "private.h"
  11
  12 #include <math.h>
  13
  14 typedef struct s_smpi_mpi_cart_topology {
  15     int nnodes;
  16     int ndims;
  17     int *dims;
  18     int *periodic;
  19     int *position;
  20 } s_smpi_mpi_cart_topology_t;
  21
  22 typedef struct s_smpi_mpi_graph_topology {
  23     int nnodes;
  24     int nedges;
  25     int *index;
  26     int *edges;
  27 } s_smpi_mpi_graph_topology_t;
  28
  29 typedef struct s_smpi_dist_graph_topology {
  30     int indegree;
  31     int *in;
  32     int *in_weights;
  33     int outdegree;
  34     int *out;
  35     int *out_weights;
  36     int is_weighted;
  37 } s_smpi_mpi_dist_graph_topology_t;
  38
  39 typedef struct s_smpi_mpi_topology {
  40     MPIR_Topo_type kind;
  41     union topo {
  42         MPIR_Graph_Topology graph;
  43         MPIR_Cart_Topology  cart;
  44         MPIR_Dist_Graph_Topology dist_graph;
  45     } topo;
  46 } s_smpi_mpi_topology_t;
  47
  48 void smpi_topo_destroy(MPI_Topology topo) {
  49     if(topo == NULL) {
  50         return;
  51     }
  52     switch (topo->kind) {
  53     case MPI_GRAPH:
  54         // Not implemented
  55         // smpi_graph_topo_destroy(topo->topo.graph);
  56         break;
  57     case MPI_CART:
  58         smpi_cart_topo_destroy(topo->topo.cart);
  59         break;
  60     case MPI_DIST_GRAPH:
  61         // Not implemented
  62         // smpi_dist_graph_topo_destroy(topo->topo.dist_graph);
  63         break;
  64     default:
  65         return;
  66         break;
  67     }
  68 }
  69
  70 MPI_Topology smpi_topo_create(MPIR_Topo_type kind) {
  71     MPI_Topology newTopo = xbt_malloc(sizeof(*newTopo));
  72     newTopo->kind = kind;
  73     // Allocate and initialize the right topo should be done by the caller
  74     return newTopo;
  75 }
  76
  77 /*******************************************************************************
  78  * Cartesian topologies
  79  ******************************************************************************/
  80 void smpi_cart_topo_destroy(MPIR_Cart_Topology cart) {
  81     if (cart) {
  82         if(cart->dims) {
  83             free(cart->dims);
  84         }
  85         if(cart->periodic) {
  86             free(cart->periodic);
  87         }
  88         if(cart->position) {
  89             free(cart->position);
  90         }
  91         free(cart);
  92     }
  93 }
  94
  95 MPI_Topology smpi_cart_topo_create(int ndims) {
  96     MPI_Topology newTopo = smpi_topo_create(MPI_CART);
  97     MPIR_Cart_Topology newCart = xbt_malloc(sizeof(*newCart));
  98     newCart->nnodes = 0;
  99     newCart->ndims = ndims;
 100     newCart->dims = xbt_malloc(ndims * sizeof(*newCart->dims));
 101     newCart->periodic = xbt_malloc(ndims * sizeof(*newCart->periodic));
 102     newCart->position = xbt_malloc(ndims * sizeof(*newCart->position));
 103     newTopo->topo.cart = newCart;
 104     return newTopo;
 105 }
 106
 107 /* reorder is ignored, don't know what would be the consequences of a dumb
 108  * reordering but neither do I see the point of reordering*/
 109 int smpi_mpi_cart_create(MPI_Comm comm_old, int ndims, int dims[],
 110                          int periods[], int reorder, MPI_Comm *comm_cart) {
 111     int retval = MPI_SUCCESS;
 112     int i;
 113     MPI_Topology newCart;
 114     MPI_Group newGroup, oldGroup;
 115     int rank, nranks, newSize;
 116
 117     rank = smpi_comm_rank(comm_old);
 118
 119     newSize = 1;
 120     if(ndims != 0) {
 121         for (i = 0 ; i < ndims ; i++) {
 122             newSize *= dims[i];
 123         }
 124         if(rank >= newSize) {
 125             *comm_cart = MPI_COMM_NULL;
 126             return retval;
 127         }
 128         newCart = smpi_cart_topo_create(ndims);
 129         oldGroup = smpi_comm_group(comm_old);
 130         newGroup = smpi_group_new(newSize);
 131         for (i = 0 ; i < newSize ; i++) {
 132             smpi_group_set_mapping(newGroup, smpi_group_index(oldGroup, i), i);
 133         }
 134
 135         newCart->topo.cart->nnodes = newSize;
 136
 137         /* memcpy(newCart->topo.cart->dims, dims, */
 138         /*        ndims * sizeof(*newCart->topo.cart->dims)); */
 139         /* memcpy(newCart->topo.cart->periodic, periods, */
 140         /*        ndims * sizeof(*newCart->topo.cart->periodic)); */
 141
 142         //  FIXME : code duplication... See smpi_mpi_cart_coords
 143         nranks = newSize;
 144         for (i=0; i<ndims; i++)
 145         {
 146             newCart->topo.cart->dims[i] = dims[i];
 147             newCart->topo.cart->periodic[i] = periods[i];
 148             nranks = nranks / dims[i];
 149             /* FIXME: nranks could be zero (?) */
 150             newCart->topo.cart->position[i] = rank / nranks;
 151             rank = rank % nranks;
 152         }
 153
 154         *comm_cart = smpi_comm_new(newGroup, newCart);
 155     }
 156     else {
 157         if (rank == 0) {
 158             newCart = smpi_cart_topo_create(ndims);
 159             *comm_cart = smpi_comm_new(smpi_comm_group(MPI_COMM_SELF), newCart);
 160         }
 161         else {
 162             *comm_cart = MPI_COMM_NULL;
 163         }
 164     }
 165     return retval;
 166 }
 167
 168 int smpi_mpi_cart_sub(MPI_Comm comm, const int remain_dims[], MPI_Comm *newcomm) {
 169     MPI_Topology oldTopo = smpi_comm_topo(comm);
 170     int oldNDims = oldTopo->topo.cart->ndims;
 171     int i, j = 0, newNDims, *newDims = NULL, *newPeriodic = NULL;
 172
 173     if (remain_dims == NULL && oldNDims != 0) {
 174         return MPI_ERR_ARG;
 175     }
 176     newNDims = 0;
 177     for (i = 0 ; i < oldNDims ; i++) {
 178         if (remain_dims[i]) newNDims++;
 179     }
 180
 181     if (newNDims > 0) {
 182         newDims = xbt_malloc(newNDims * sizeof(*newDims));
 183         newPeriodic = xbt_malloc(newNDims * sizeof(*newPeriodic));
 184
 185         // that should not segfault
 186         for (i = 0 ; j < newNDims ; i++) {
 187             if(remain_dims[i]) {
 188                 newDims[j] = oldTopo->topo.cart->dims[i];
 189                 newPeriodic[j] = oldTopo->topo.cart->periodic[i];
 190                 j++;
 191             }
 192         }
 193     }
 194     return smpi_mpi_cart_create(comm, newNDims, newDims, newPeriodic, 0, newcomm);
 195 }
 196
 197
 198
 199
 200 int smpi_mpi_cart_coords(MPI_Comm comm, int rank, int maxdims,
 201                          int coords[]) {
 202     int nnodes;
 203     int i;
 204     MPI_Topology topo = smpi_comm_topo(comm);
 205
 206     nnodes = topo->topo.cart->nnodes;
 207     for ( i=0; i < topo->topo.cart->ndims; i++ ) {
 208         nnodes    = nnodes / topo->topo.cart->dims[i];
 209         coords[i] = rank / nnodes;
 210         rank      = rank % nnodes;
 211     }
 212     return MPI_SUCCESS;
 213 }
 214
 215 int smpi_mpi_cart_get(MPI_Comm comm, int maxdims, int* dims, int* periods, int* coords) {
 216     MPI_Topology topo = smpi_comm_topo(comm);
 217     int i;
 218     for(i = 0 ; i < maxdims ; i++) {
 219         dims[i] = topo->topo.cart->dims[i];
 220         periods[i] = topo->topo.cart->periodic[i];
 221         coords[i] = topo->topo.cart->position[i];
 222     }
 223     return MPI_SUCCESS;
 224 }
 225
 226 int smpi_mpi_cart_rank(MPI_Comm comm, int* coords, int* rank) {
 227     MPI_Topology topo = smpi_comm_topo(comm);
 228     int ndims = topo->topo.cart->ndims;
 229     int multiplier, coord,i;
 230     *rank = 0;
 231     multiplier = 1;
 232
 233     for ( i=ndims-1; i >=0; i-- ) {
 234         coord = coords[i];
 235
 236         /* The user can give us whatever coordinates he wants. If one of them is
 237          * out of range, either this dimension is periodic, and then we
 238          * consider the equivalent coordinate inside the bounds, or it is not
 239          * and then it is an error
 240          */
 241         if (coord >= topo->topo.cart->dims[i]) {
 242             if ( topo->topo.cart->periodic[i] ) {
 243                 coord = coord % topo->topo.cart->dims[i];
 244             }
 245             else {
 246                 // Should I do that ?
 247                 *rank = -1;
 248                 return MPI_ERR_ARG;
 249             }
 250         }
 251         else if (coord <  0) {
 252             if(topo->topo.cart->periodic[i]) {
 253                 coord = coord % topo->topo.cart->dims[i];
 254                 if (coord) coord = topo->topo.cart->dims[i] + coord;
 255             }
 256             else {
 257                 *rank = -1;
 258                 return MPI_ERR_ARG;
 259             }
 260         }
 261
 262         *rank += multiplier * coord;
 263         multiplier *= topo->topo.cart->dims[i];
 264     }
 265     return MPI_SUCCESS;
 266 }
 267
 268 int smpi_mpi_cart_shift(MPI_Comm comm, int direction, int disp,
 269                         int *rank_source, int *rank_dest) {
 270     MPI_Topology topo = smpi_comm_topo(comm);
 271     int position[topo->topo.cart->ndims];
 272
 273
 274     if(topo->topo.cart->ndims == 0) {
 275         return MPI_ERR_ARG;
 276     }
 277     if (topo->topo.cart->ndims < direction) {
 278         return MPI_ERR_DIMS;
 279     }
 280
 281     smpi_mpi_cart_coords(comm, smpi_comm_rank(comm), topo->topo.cart->ndims,
 282                          position);
 283     position[direction] += disp;
 284
 285     if(position[direction] < 0 ||
 286        position[direction] >= topo->topo.cart->dims[direction]) {
 287         if(topo->topo.cart->periodic[direction]) {
 288             position[direction] %= topo->topo.cart->dims[direction];
 289             smpi_mpi_cart_rank(comm, position, rank_dest);
 290         }
 291         else {
 292             *rank_dest = MPI_PROC_NULL;
 293         }
 294     }
 295     else {
 296         smpi_mpi_cart_rank(comm, position, rank_dest);
 297     }
 298
 299     position[direction] =  topo->topo.cart->position[direction] - disp;
 300     if(position[direction] < 0 ||
 301        position[direction] >= topo->topo.cart->dims[direction]) {
 302         if(topo->topo.cart->periodic[direction]) {
 303             position[direction] %= topo->topo.cart->dims[direction];
 304             smpi_mpi_cart_rank(comm, position, rank_source);
 305         }
 306         else {
 307             *rank_source = MPI_PROC_NULL;
 308         }
 309     }
 310     else {
 311         smpi_mpi_cart_rank(comm, position, rank_source);
 312     }
 313
 314     return MPI_SUCCESS;
 315 }
 316
 317 int smpi_mpi_cartdim_get(MPI_Comm comm, int *ndims) {
 318     MPI_Topology topo = smpi_comm_topo(comm);
 319
 320     *ndims = topo->topo.cart->ndims;
 321     return MPI_SUCCESS;
 322 }
 323
 324
 325
 326 // Everything below has been taken from ompi, but could be easily rewritten.
 327
 328 /*
 329  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 330  *                         University Research and Technology
 331  *                         Corporation.  All rights reserved.
 332  * Copyright (c) 2004-2005 The University of Tennessee and The University
 333  *                         of Tennessee Research Foundation.  All rights
 334  *                         reserved.
 335  * Copyright (c) 2004-2014 High Performance Computing Center Stuttgart,
 336  *                         University of Stuttgart.  All rights reserved.
 337  * Copyright (c) 2004-2005 The Regents of the University of California.
 338  *                         All rights reserved.
 339  * Copyright (c) 2012      Los Alamos National Security, LLC.  All rights
 340  *                         reserved.
 341  * Copyright (c) 2014      Intel, Inc. All rights reserved
 342  * $COPYRIGHT$
 343  *
 344  * Additional copyrights may follow
 345  *
 346  * $HEADER$
 347  */
 348
 349
 350 /* static functions */
 351 static int assignnodes(int ndim, int nfactor, int *pfacts,int **pdims);
 352 static int getfactors(int num, int *nfators, int **factors);
 353
 354 /*
 355  * This is a utility function, no need to have anything in the lower
 356  * layer for this at all
 357  */
 358 int smpi_mpi_dims_create(int nnodes, int ndims, int dims[])
 359 {
 360     int i;
 361     int freeprocs;
 362     int freedims;
 363     int nfactors;
 364     int *factors;
 365     int *procs;
 366     int *p;
 367     int err;
 368
 369     /* Get # of free-to-be-assigned processes and # of free dimensions */
 370     freeprocs = nnodes;
 371     freedims = 0;
 372     for (i = 0, p = dims; i < ndims; ++i,++p) {
 373         if (*p == 0) {
 374             ++freedims;
 375         } else if ((*p < 0) || ((nnodes % *p) != 0)) {
 376             return  MPI_ERR_DIMS;
 377
 378         } else {
 379             freeprocs /= *p;
 380         }
 381     }
 382
 383     if (freedims == 0) {
 384         if (freeprocs == 1) {
 385             return MPI_SUCCESS;
 386         }
 387         return MPI_ERR_DIMS;
 388     }
 389
 390     if (freeprocs == 1) {
 391         for (i = 0; i < ndims; ++i, ++dims) {
 392             if (*dims == 0) {
 393                 *dims = 1;
 394             }
 395         }
 396         return MPI_SUCCESS;
 397     }
 398
 399     /* Factor the number of free processes */
 400     if (MPI_SUCCESS != (err = getfactors(freeprocs, &nfactors, &factors))) {
 401         return  err;
 402     }
 403
 404     /* Assign free processes to free dimensions */
 405     if (MPI_SUCCESS != (err = assignnodes(freedims, nfactors, factors, &procs))) {
 406         return err;
 407     }
 408
 409     /* Return assignment results */
 410     p = procs;
 411     for (i = 0; i < ndims; ++i, ++dims) {
 412         if (*dims == 0) {
 413             *dims = *p++;
 414         }
 415     }
 416
 417     free((char *) factors);
 418     free((char *) procs);
 419
 420     /* all done */
 421     return MPI_SUCCESS;
 422 }
 423
 424 /*
 425  *  assignnodes
 426  *
 427  *  Function:   - assign processes to dimensions
 428  *          - get "best-balanced" grid
 429  *          - greedy bin-packing algorithm used
 430  *          - sort dimensions in decreasing order
 431  *          - dimensions array dynamically allocated
 432  *  Accepts:    - # of dimensions
 433  *          - # of prime factors
 434  *          - array of prime factors
 435  *          - ptr to array of dimensions (returned value)
 436  *  Returns:    - 0 or ERROR
 437  */
 438 static int
 439 assignnodes(int ndim, int nfactor, int *pfacts, int **pdims)
 440 {
 441     int *bins;
 442     int i, j;
 443     int n;
 444     int f;
 445     int *p;
 446     int *pmin;
 447
 448     if (0 >= ndim) {
 449         return MPI_ERR_DIMS;
 450     }
 451
 452     /* Allocate and initialize the bins */
 453     bins = (int *) malloc((unsigned) ndim * sizeof(int));
 454     if (NULL == bins) {
 455         return MPI_ERR_NO_MEM;
 456     }
 457     *pdims = bins;
 458
 459     for (i = 0, p = bins; i < ndim; ++i, ++p) {
 460         *p = 1;
 461     }
 462
 463     /* Loop assigning factors from the highest to the lowest */
 464     for (j = nfactor - 1; j >= 0; --j) {
 465         f = pfacts[j];
 466         /* Assign a factor to the smallest bin */
 467         pmin = bins;
 468         for (i = 1, p = pmin + 1; i < ndim; ++i, ++p) {
 469             if (*p < *pmin) {
 470                 pmin = p;
 471             }
 472         }
 473         *pmin *= f;
 474     }
 475
 476     /* Sort dimensions in decreasing order (O(n^2) for now) */
 477     for (i = 0, pmin = bins; i < ndim - 1; ++i, ++pmin) {
 478         for (j = i + 1, p = pmin + 1; j < ndim; ++j, ++p) {
 479             if (*p > *pmin) {
 480                 n = *p;
 481                 *p = *pmin;
 482                 *pmin = n;
 483             }
 484         }
 485     }
 486
 487     return MPI_SUCCESS;
 488 }
 489
 490 /*
 491  *  getfactors
 492  *
 493  *  Function:   - factorize a number
 494  *  Accepts:    - number
 495  *          - # prime factors
 496  *          - array of prime factors
 497  *  Returns:    - MPI_SUCCESS or ERROR
 498  */
 499 static int
 500 getfactors(int num, int *nfactors, int **factors) {
 501     int size;
 502     int d;
 503     int i;
 504     int sqrtnum;
 505
 506     if(num  < 2) {
 507         (*nfactors) = 0;
 508         (*factors) = NULL;
 509         return MPI_SUCCESS;
 510     }
 511     /* Allocate the array of prime factors which cannot exceed log_2(num) entries */
 512     sqrtnum = ceil(sqrt(num));
 513     size = ceil(log(num) / log(2));
 514     *factors = (int *) malloc((unsigned) size * sizeof(int));
 515
 516     i = 0;
 517     /* determine all occurences of factor 2 */
 518     while((num % 2) == 0) {
 519         num /= 2;
 520         (*factors)[i++] = 2;
 521     }
 522     /* determine all occurences of uneven prime numbers up to sqrt(num) */
 523     for(d = 3; (num > 1) && (d < sqrtnum); d += 2) {
 524         while((num % d) == 0) {
 525             num /= d;
 526             (*factors)[i++] = d;
 527         }
 528     }
 529     /* as we looped only up to sqrt(num) one factor > sqrt(num) may be left over */
 530     if(num != 1) {
 531         (*factors)[i++] = num;
 532     }
 533     (*nfactors) = i;
 534     return MPI_SUCCESS;
 535 }
 536