examples/smpi/MM/Summa.c

   1 /*!
   2  * Classical Block Matrix Multiplication example
   3  *
   4  */
   5
   6 #include "Matrix_init.h"
   7 #include "Summa.h"
   8 #include "xbt/log.h"
   9  XBT_LOG_NEW_DEFAULT_CATEGORY(MM_Summa,
  10                              "Messages specific for this msg example");
  11
  12 inline double Summa(
  13                      double *a, double *b, double *c,
  14                      size_t lda, size_t ldb, size_t ldc,
  15                      size_t m, size_t k_a, size_t k_b, size_t n,
  16                      size_t Block_size, size_t start, size_t end,
  17                      size_t row, size_t col, size_t size_row, size_t size_col,
  18                      double *a_local, double *b_local,
  19                      MPI_Datatype Block_a, MPI_Datatype Block_a_local,
  20                      MPI_Datatype Block_b,
  21                      MPI_Comm row_comm, MPI_Comm col_comm, int subs)
  22 {
  23   double *B_a     , *B_b     ; //matrix blocks
  24   size_t err;
  25   //double alpha = 1, beta = 1;  //C := alpha * a * b + beta * c
  26   size_t B_proc_col, B_proc_row; // Number of bloc(row or col) on one processor
  27   B_proc_col =  k_b / Block_size;  // Number of block on one processor
  28   B_proc_row = k_a / Block_size; // Number of block on one processor
  29
  30   //size_t lda = k_a, ldb = n, ldc = n;
  31   size_t lda_local = lda;
  32   size_t ldb_local = ldb;
  33
  34
  35   double time, computation_time = 0, communication_time = 0;
  36   double start_time, end_time; //time mesure
  37   double start_time_intern, end_time_intern; //time mesure
  38
  39
  40
  41
  42   start_time = MPI_Wtime();
  43
  44   /*-------------Distributed Matrix Multiplication algorithm-----------------*/
  45   size_t iter;
  46   for( iter = start; iter < end; iter++ ){
  47     size_t pivot_row, pivot_col, pos_a, pos_b;
  48 #ifdef CYCLIC
  49     // pivot row on processor layer
  50     pivot_row = (iter % size_col);
  51     pivot_col = (iter % size_row);
  52     //position of the block
  53     if(subs == 1){
  54       pos_a = (size_t)((iter - start) / size_row) * Block_size;
  55       pos_b = (size_t)((iter - start) / size_col) * ldb * Block_size;
  56     }else{
  57       pos_a = (size_t)(iter / size_row) * Block_size;
  58       pos_b = (size_t)(iter / size_col) * ldb * Block_size;
  59     }
  60 #else
  61     // pivot row on processor layer
  62     pivot_row = (size_t)(iter / B_proc_col) % size_col;
  63     pivot_col = (size_t)(iter / B_proc_row) % size_row;
  64     //position of the block
  65     if(subs == 1){
  66       pos_a = ((iter - start) % B_proc_row) * Block_size;
  67       pos_b = ((iter - start) % B_proc_col) * ldb * Block_size;
  68     }else{
  69       pos_a = (iter % B_proc_row) * Block_size;
  70       pos_b = (iter % B_proc_col) * ldb * Block_size;
  71     }
  72 #endif
  73     XBT_DEBUG( "pivot: %zu, iter: %zu, B_proc_col: %zu, "
  74                 "size_col:%zu, size_row: %zu\n",
  75                 pivot_row, iter, B_proc_row,size_col,size_row);
  76     MPI_Barrier(row_comm);
  77     MPI_Barrier(col_comm);
  78
  79     start_time_intern = MPI_Wtime();
  80     //Broadcast the row
  81     if(size_row > 1){
  82       MPI_Datatype * Block;
  83       if( pivot_col != col ){
  84         B_a = a_local;
  85         lda_local = Block_size;
  86         XBT_DEBUG("recieve B_a %zu,%zu \n",m , Block_size);
  87         Block = &Block_a_local;
  88       }else{
  89         B_a = a + pos_a;
  90         lda_local = lda;
  91         XBT_DEBUG("sent B_a %zu,%zu \n",m , Block_size);
  92         Block = &Block_a;
  93       }
  94       err = MPI_Bcast(B_a, 1, *Block, pivot_col, row_comm);
  95       if (err != MPI_SUCCESS) {
  96         perror("Error Bcast A\n");
  97         return -1;
  98       }
  99     }else{
 100       B_a = a + pos_a;
 101       XBT_DEBUG("position of B_a: %zu \n", pos_a);
 102     }
 103
 104     //Broadcast the col
 105     if(size_col > 1){
 106       if( pivot_row == row ){
 107         B_b = b + pos_b;
 108         XBT_DEBUG("sent B_b Block_size: %zu, pos:%zu \n",
 109                     ldb, pos_b);
 110       }else{
 111         B_b = b_local;
 112         XBT_DEBUG("recieve B_b %zu,%zu \n", Block_size,n);
 113       }
 114       err = MPI_Bcast(B_b, 1, Block_b, pivot_row, col_comm );
 115       if (err != MPI_SUCCESS) {
 116         perror("Error Bcast B\n");
 117         MPI_Finalize();
 118         exit(-1);
 119       }
 120     }else{
 121       B_b = b + pos_b;
 122       XBT_DEBUG("position of B_b: %zu \n", pos_b);
 123     }
 124     end_time_intern = MPI_Wtime();
 125     communication_time += start_time_intern - end_time_intern;
 126
 127     MPI_Barrier(row_comm);
 128     MPI_Barrier(col_comm);
 129     start_time_intern = MPI_Wtime();
 130     XBT_DEBUG("execute Gemm number: %zu\n", iter);
 131     //We have recieved a line of block and a colomn
 132    //              cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans,
 133    //               m, n, Block_size, alpha, B_a, lda_local, B_b, ldb_local,
 134    //               beta, c, ldc );
 135     int i, j, k;
 136     for(i = 0; i < m; i++)
 137       for(j = 0; j < n; j++)
 138         for(k = 0; k < Block_size; k++)
 139           c[i*ldc+j] += B_a[i*lda_local+k]*B_b[k*ldb_local+j];
 140
 141     end_time_intern = MPI_Wtime();
 142     computation_time += start_time_intern - end_time_intern;
 143
 144   }
 145   MPI_Barrier(row_comm);
 146   MPI_Barrier(col_comm);
 147
 148   end_time = MPI_Wtime();
 149   time = start_time - end_time;
 150   printf("communication time: %le nanoseconds, "
 151          "computation time: %le nanoseconds\n",
 152          communication_time, computation_time);
 153
 154
 155   return time;
 156 }