examples/smpi/MM/MM_mpi.c

   1 /* Copyright (c) 2012, 2014. The SimGrid Team.
   2  * All rights reserved.                                                     */
   3
   4 /* This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the license (GNU LGPL) which comes with this package. */
   6
   7 /* Block Matrix Multiplication example */
   8
   9 #include "Matrix_init.h"
  10 #include "Summa.h"
  11 #include "xbt/log.h"
  12 #include <xbt/str.h>
  13
  14 #include <stdio.h>
  15 #include <string.h>
  16 #include <mpi.h>
  17 #include <math.h>
  18 #include <getopt.h>
  19
  20 #define CHECK_25D 1
  21
  22 XBT_LOG_NEW_DEFAULT_CATEGORY(MM_mpi, "Messages specific for this msg example");
  23
  24 static double two_dot_five(size_t m, size_t k, size_t n, size_t Block_size, size_t group, size_t key, size_t size_row,
  25                            size_t size_col, size_t NB_groups ){
  26   double *a,  *b,  *c;
  27   double *res = NULL;
  28   /* Split the communicator into groups */
  29
  30   /* Find out my identity in the default communicator */
  31   int myrank;
  32   int NB_proc;
  33   int err;
  34   int useless = 0;
  35
  36   double time, communication_time = 0;
  37   double start_time, end_time; //time mesure
  38   double end_time_intern; //time mesure
  39   double start_time_reduce, end_time_reduce; //time mesure
  40
  41   MPI_Comm my_world;
  42
  43   if ( group >= NB_groups ){
  44     XBT_DEBUG("Not enough group NB_groups : %zu my group id : %zu\n", NB_groups, group);
  45     MPI_Comm_split(MPI_COMM_WORLD, 0, key, &my_world);
  46     return -1;
  47   }else{
  48     MPI_Comm_split(MPI_COMM_WORLD, 1, key, &my_world);
  49   }
  50
  51   MPI_Comm_size (my_world, &NB_proc);
  52
  53   if ( NB_proc < (int)(size_row*size_col*NB_groups) ){
  54     XBT_INFO("Not enough processors NB_proc : %d required : %zu\n", NB_proc, size_row*size_col*NB_groups);
  55     return -1;
  56   }
  57
  58   MPI_Comm group_comm;
  59   MPI_Comm_split(my_world, group, key, &group_comm);
  60
  61   MPI_Comm_rank(group_comm, &myrank);
  62   MPI_Comm_size (group_comm, &NB_proc);
  63   /* for each group start the execution of his */
  64
  65   NB_proc=size_row*size_col;
  66   size_t row = myrank / size_row;
  67   size_t col = myrank % size_row;
  68
  69   /*-------------------------Check some mandatory conditions------------------*/
  70   size_t NB_Block = k / Block_size;
  71   if ( k % Block_size != 0 ){
  72     XBT_INFO("The matrix size has to be proportionnal to the number of blocks: %zu\n", NB_Block);
  73     return -1;
  74   }
  75
  76   if ( size_row > NB_Block || size_col > NB_Block ){
  77     XBT_INFO("Number of blocks is too small compare to the number of processors (%zu,%zu) in a row or a col (%zu)\n",
  78              size_col, size_row, NB_Block);
  79     return -1;
  80   }
  81
  82   if ( NB_Block % size_row != 0 || NB_Block % size_col != 0){
  83     XBT_INFO("The number of Block by processor is not an integer\n");
  84     return -1;
  85   }
  86
  87   if(row >= size_col || col >= size_row){
  88     XBT_INFO( "I'm useless bye!!! col: %zu row: %zu, size_col: %zu , size_row: %zu \n", col,row,size_col,size_row);
  89     useless = 1;
  90   }
  91
  92   if(useless == 1){
  93     /*----------------------Prepare the Communication Layer-------------------*/
  94     /* add useless processor on a new color to execute the matrix
  95      * multiplication with the other processors*/
  96
  97     /* Split comm size_to row and column comms */
  98     MPI_Comm row_comm, col_comm, group_line;
  99     MPI_Comm_split(my_world, myrank, MPI_UNDEFINED, &group_line);
 100     /* color by row, rank by column */
 101     MPI_Comm_split(group_comm, size_row, MPI_UNDEFINED, &row_comm);
 102     /* color by column, rank by row */
 103     MPI_Comm_split(group_comm, size_col, MPI_UNDEFINED, &col_comm);
 104     /*------------------------Communication Layer can be used-----------------*/
 105
 106     return 0;
 107   }
 108   XBT_DEBUG("I'm initialized col: %zu row: %zu, size_col: %zu , size_row: %zu, my rank: %d \n", col,row,size_col,
 109             size_row, myrank);
 110
 111   /*------------------------Initialize the matrices---------------------------*/
 112   /* think about a common interface
 113    *  int pdgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, a, ia, ja, lda, b, ib, jb, ldb,
 114    *              beta, c, ldc, Comm, rank );
 115    */
 116
 117   /*------------------------Prepare the Communication Layer-------------------*/
 118   /* Split comm size_to row and column comms */
 119   MPI_Comm row_comm, col_comm, group_line;
 120   MPI_Comm_split(my_world, myrank, group, &group_line);
 121   /* color by row, rank by column */
 122   MPI_Comm_split(group_comm, row, col, &row_comm);
 123   /* color by column, rank by row */
 124   MPI_Comm_split(group_comm, col, row, &col_comm);
 125   /*-------------------------Communication Layer can be used------------------*/
 126
 127   // matrix sizes
 128   m   = m   / size_col;
 129   n   = n   / size_row;
 130   size_t k_a = k / size_row;
 131   size_t k_b = k / size_col;
 132
 133   /*only on the group 0*/
 134   if( group == 0 ) {
 135     matrices_initialisation(&a, &b, &c, m, k_a, k_b, n, row, col);
 136     if( NB_groups > 1 ) res = malloc( sizeof(double)*m*n );
 137   } else matrices_allocation(&a, &b, &c, m, k_a, k_b, n);
 138
 139   /*-------------------Configuration for Summa algorihtm--------------------*/
 140   /*--------------------Allocation of matrices block-------------------------*/
 141   double *a_Summa, *b_Summa;
 142   blocks_initialisation(&a_Summa, &b_Summa, m, Block_size, n);
 143
 144   /*--------------------Communication types for MPI--------------------------*/
 145   MPI_Datatype Block_a;
 146   MPI_Datatype Block_a_local;
 147   MPI_Datatype Block_b;
 148   MPI_Type_vector(m , Block_size, k_a, MPI_DOUBLE, &Block_a);
 149   MPI_Type_vector(m , Block_size, Block_size, MPI_DOUBLE, &Block_a_local);
 150   MPI_Type_vector(Block_size, n, n, MPI_DOUBLE, &Block_b);
 151   MPI_Type_commit(&Block_a);
 152   MPI_Type_commit(&Block_a_local);
 153   MPI_Type_commit(&Block_b);
 154   /*-------------Communication types for MPI are configured------------------*/
 155
 156   MPI_Barrier(my_world);
 157   start_time = MPI_Wtime();
 158   if( NB_groups > 1 ) {
 159     err = MPI_Bcast(a, m*k_a, MPI_DOUBLE, 0, group_line);
 160     if (err != MPI_SUCCESS) {
 161       perror("Error Bcast A\n");
 162       return -1;
 163     }
 164     err = MPI_Bcast(b, n*k_b, MPI_DOUBLE, 0, group_line);
 165     if (err != MPI_SUCCESS) {
 166       perror("Error Bcast B\n");
 167       return -1;
 168     }
 169     MPI_Barrier(my_world);
 170   }
 171   end_time_intern = MPI_Wtime();
 172   communication_time += end_time_intern - start_time;
 173
 174   XBT_INFO( "group %zu NB_block: %zu, NB_groups %zu\n",group,NB_Block, NB_groups);
 175   XBT_INFO("m %zu,  k_a %zu,  k_b %zu,  n %zu,  Block_size %zu, group*NB_Block/NB_groups %zu, "
 176            "(group+1)*NB_Block/NB_groups %zu, row %zu,  col %zu, size_row %zu,  size_col %zu\n",m, k_a, k_b, n,
 177            Block_size, group*NB_Block/NB_groups, (group+1)*NB_Block/NB_groups,row, col, size_row, size_col);
 178
 179
 180   Summa(a, b, c, k_a, n, n, m, k_a, k_b, n, Block_size, group*NB_Block/NB_groups, (group+1)*NB_Block/NB_groups,
 181           row, col, size_row, size_col, a_Summa, b_Summa, Block_a, Block_a_local, Block_b, row_comm, col_comm, 0);
 182
 183   /*-------------------------End Summa algorihtm-----------------------------*/
 184
 185   MPI_Comm_rank(group_line, &myrank);
 186
 187   MPI_Barrier(my_world);
 188   start_time_reduce = MPI_Wtime();
 189   if( NB_groups > 1 ) {
 190     // a gather is better?
 191     err = MPI_Reduce(c, res, m*n, MPI_DOUBLE, MPI_SUM, 0, group_line);
 192     if (err != MPI_SUCCESS) {
 193       perror("Error Bcast A\n");
 194       return -1;
 195     }
 196   }else{
 197     double *swap= c;
 198     c = res;
 199     res=swap;
 200   }
 201   MPI_Barrier(my_world);
 202   end_time_reduce = MPI_Wtime();
 203
 204   MPI_Barrier(my_world);
 205   end_time = MPI_Wtime();
 206   time = end_time - start_time;
 207   double reduce_time = end_time_reduce - start_time_reduce;
 208   printf("communication time: %e reduce time: %e seconds, total time: %e seconds\n", communication_time, reduce_time,
 209          time);
 210   MPI_Barrier(my_world);
 211
 212 #if CHECK_25D
 213   if(myrank == 0)
 214     check_result(res, a, b, m, n, k_a, k_b, row, col, size_row, size_col);
 215 #endif
 216
 217   // close properly the pragram
 218   MPI_Type_free(&Block_a);
 219   MPI_Type_free(&Block_a_local);
 220   MPI_Type_free(&Block_b);
 221
 222   free(a_Summa);
 223   free(b_Summa);
 224
 225   free( a );
 226   free( b );
 227   if( NB_groups > 1 ) {
 228     free( c );
 229   }
 230   free(res);
 231
 232   MPI_Barrier(MPI_COMM_WORLD);
 233   MPI_Comm_free(&my_world);
 234   MPI_Comm_free(&group_comm);
 235   MPI_Comm_free(&group_line);
 236   MPI_Comm_free(&row_comm);
 237   MPI_Comm_free(&col_comm);
 238   return 0;
 239 }
 240
 241 int main(int argc, char ** argv)
 242 {
 243   size_t    m   = 1024 , n   = 1024 , k = 1024;
 244   size_t    NB_Block = 16;
 245   size_t    Block_size = k/NB_Block ;
 246   size_t    NB_groups = 1, group = 0, key = 0;
 247   /* x index on M
 248      y index on N
 249      Z index on K */
 250
 251   int myrank;
 252   int NB_proc;
 253   size_t row, col, size_row, size_col; //description: vitual processor topology
 254   row = 0;
 255   col = 0;
 256
 257   MPI_Init(&argc, &argv);
 258
 259   /* Find out my identity in the default communicator */
 260   MPI_Comm_rank ( MPI_COMM_WORLD, &myrank );
 261   MPI_Comm_size ( MPI_COMM_WORLD, &NB_proc );
 262
 263   if(NB_proc != 1)
 264     for (size_col=NB_proc/2; NB_proc%size_col; size_col--);
 265   else
 266     size_col = 1;
 267
 268   size_row = NB_proc/size_col;
 269   if (size_row > size_col){
 270     size_col = size_row;
 271     size_row = NB_proc/size_col;
 272   }
 273
 274 #if DEBUG_MPI
 275   size_t loop=1;
 276   while(loop==1);
 277 #endif
 278
 279   int opt = 0;
 280   optind = 1;
 281
 282   //get the parameter from command line
 283   while ((opt = getopt(argc, argv, "hr:c:M:N:K:B:G:g:k:P:")) != -1) {
 284     switch(opt) {
 285       case 'h':
 286         XBT_INFO("Usage: mxm_cblas_test [options]\n"
 287                  "  -M I  M size (default: %zu)\n"
 288                  "  -N I  N size (default: %zu)\n"
 289                  "  -K I  K size (default: %zu)\n"
 290                  "  -B I  Block size on the k dimension (default: %zu)\n"
 291                  "  -G I  Number of processor groups (default: %zu)\n"
 292                  "  -g I  group index (default: %zu)\n"
 293                  "  -k I  group rank (default: %zu)\n"
 294                  "  -r I  processor row size (default: %zu)\n"
 295                  "  -c I  processor col size (default: %zu)\n"
 296                  "  -h  help\n", m, n, k, Block_size, NB_groups, group, key, row, col);
 297         return 0;
 298       case 'M':
 299         m = xbt_str_parse_int(optarg, "Invalid M size: %s");
 300         break;
 301       case 'N':
 302         n = xbt_str_parse_int(optarg, "Invalid N size: %s");
 303         break;
 304       case 'K':
 305         k = xbt_str_parse_int(optarg, "Invalid K size: %s");
 306         break;
 307       case 'B':
 308         Block_size = xbt_str_parse_int(optarg, "Invalid block size: %s");
 309         break;
 310       case 'G':
 311         NB_groups = xbt_str_parse_int(optarg, "Invalid number of processor groups: %s");
 312         break;
 313       case 'g':
 314         group = xbt_str_parse_int(optarg, "Invalid group index: %s");
 315         break;
 316       case 'k':
 317         key = xbt_str_parse_int(optarg, "Invalid group rank: %s");
 318         break;
 319       case 'r':
 320         size_row = xbt_str_parse_int(optarg, "Invalid processor row size: %s");
 321         break;
 322       case 'c':
 323         size_col = xbt_str_parse_int(optarg, "Invalid processor col size: %s");
 324         break;
 325     }
 326   }
 327
 328   two_dot_five( m, k, n, Block_size, group, key, size_row, size_col,  NB_groups);
 329
 330   // close properly the program
 331   MPI_Barrier(MPI_COMM_WORLD);
 332   MPI_Finalize();
 333   return 0;
 334 }