examples/smpi/MM/Summa.c

   1 /* Copyright (c) 2012-2014. The SimGrid Team.
   2  * All rights reserved.                                                     */
   3
   4 /* This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the license (GNU LGPL) which comes with this package. */
   6
   7 /* Classical Block Matrix Multiplication example */
   8
   9 #include "Matrix_init.h"
  10 #include "Summa.h"
  11 #include "xbt/log.h"
  12 #include <stdio.h>
  13
  14 XBT_LOG_NEW_DEFAULT_CATEGORY(MM_Summa, "Messages specific for this msg example");
  15
  16 double Summa(double *a, double *b, double *c, size_t lda, size_t ldb, size_t ldc,
  17              size_t m, size_t k_a, size_t k_b, size_t n, size_t Block_size, size_t start, size_t end,
  18              size_t row, size_t col, size_t size_row, size_t size_col, double *a_local, double *b_local,
  19              MPI_Datatype Block_a, MPI_Datatype Block_a_local, MPI_Datatype Block_b,
  20              MPI_Comm row_comm, MPI_Comm col_comm, int subs)
  21 {
  22   double *B_a     , *B_b     ; //matrix blocks
  23   size_t err;
  24   //double alpha = 1, beta = 1;  //C := alpha * a * b + beta * c
  25   size_t B_proc_row; // Number of bloc(row or col) on one processor
  26 #ifndef CYCLIC
  27   size_t B_proc_col;
  28   B_proc_col =  k_b / Block_size;  // Number of block on one processor
  29 #endif
  30   B_proc_row = k_a / Block_size; // Number of block on one processor
  31
  32   //size_t lda = k_a, ldb = n, ldc = n;
  33   size_t lda_local = lda;
  34   size_t ldb_local = ldb;
  35
  36   double time, computation_time = 0, communication_time = 0;
  37   double start_time, end_time; //time mesure
  38   double start_time_intern, end_time_intern; //time mesure
  39
  40   start_time = MPI_Wtime();
  41
  42   /*-------------Distributed Matrix Multiplication algorithm-----------------*/
  43   size_t iter;
  44   for( iter = start; iter < end; iter++ ){
  45     size_t pivot_row, pivot_col, pos_a, pos_b;
  46 #ifdef CYCLIC
  47     // pivot row on processor layer
  48     pivot_row = (iter % size_col);
  49     pivot_col = (iter % size_row);
  50     //position of the block
  51     if(subs == 1){
  52       pos_a = (size_t)((iter - start) / size_row) * Block_size;
  53       pos_b = (size_t)((iter - start) / size_col) * ldb * Block_size;
  54     }else{
  55       pos_a = (size_t)(iter / size_row) * Block_size;
  56       pos_b = (size_t)(iter / size_col) * ldb * Block_size;
  57     }
  58 #else
  59     // pivot row on processor layer
  60     pivot_row = (size_t)(iter / B_proc_col) % size_col;
  61     pivot_col = (size_t)(iter / B_proc_row) % size_row;
  62     //position of the block
  63     if(subs == 1){
  64       pos_a = ((iter - start) % B_proc_row) * Block_size;
  65       pos_b = ((iter - start) % B_proc_col) * ldb * Block_size;
  66     }else{
  67       pos_a = (iter % B_proc_row) * Block_size;
  68       pos_b = (iter % B_proc_col) * ldb * Block_size;
  69     }
  70 #endif
  71     XBT_DEBUG( "pivot: %zu, iter: %zu, B_proc_col: %zu, size_col:%zu, size_row: %zu\n",
  72                 pivot_row, iter, B_proc_row,size_col,size_row);
  73 /*    MPI_Barrier(row_comm);*/
  74 /*    MPI_Barrier(col_comm);*/
  75
  76     start_time_intern = MPI_Wtime();
  77     //Broadcast the row
  78     if(size_row > 1){
  79       MPI_Datatype * Block;
  80       if( pivot_col != col ){
  81         B_a = a_local;
  82         lda_local = Block_size;
  83         XBT_DEBUG("recieve B_a %zu,%zu \n",m , Block_size);
  84         Block = &Block_a_local;
  85       }else{
  86         B_a = a + pos_a;
  87         lda_local = lda;
  88         XBT_DEBUG("sent B_a %zu,%zu \n",m , Block_size);
  89         Block = &Block_a;
  90       }
  91       err = MPI_Bcast(B_a, 1, *Block, pivot_col, row_comm);
  92       if (err != MPI_SUCCESS) {
  93         perror("Error Bcast A\n");
  94         return -1;
  95       }
  96     }else{
  97       B_a = a + pos_a;
  98       XBT_DEBUG("position of B_a: %zu \n", pos_a);
  99     }
 100
 101     //Broadcast the col
 102     if(size_col > 1){
 103       if( pivot_row == row ){
 104         B_b = b + pos_b;
 105         XBT_DEBUG("sent B_b Block_size: %zu, pos:%zu \n", ldb, pos_b);
 106       }else{
 107         B_b = b_local;
 108         XBT_DEBUG("recieve B_b %zu,%zu \n", Block_size,n);
 109       }
 110       err = MPI_Bcast(B_b, 1, Block_b, pivot_row, col_comm );
 111       if (err != MPI_SUCCESS) {
 112         perror("Error Bcast B\n");
 113         MPI_Finalize();
 114         exit(-1);
 115       }
 116     }else{
 117       B_b = b + pos_b;
 118       XBT_DEBUG("position of B_b: %zu \n", pos_b);
 119     }
 120     end_time_intern = MPI_Wtime();
 121     communication_time += end_time_intern - start_time_intern;
 122
 123 /*    MPI_Barrier(row_comm);*/
 124 /*    MPI_Barrier(col_comm);*/
 125     start_time_intern = MPI_Wtime();
 126     XBT_DEBUG("execute Gemm number: %zu\n", iter);
 127     //We have recieved a line of block and a colomn
 128    //              cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans,
 129    //               m, n, Block_size, alpha, B_a, lda_local, B_b, ldb_local,
 130    //               beta, c, ldc );
 131     int i, j, k;
 132     for(i = 0; i < m; i++)
 133       for(j = 0; j < n; j++)
 134         for(k = 0; k < Block_size; k++)
 135           c[i*ldc+j] += B_a[i*lda_local+k]*B_b[k*ldb_local+j];
 136
 137     end_time_intern = MPI_Wtime();
 138     computation_time += end_time_intern - start_time_intern;
 139
 140   }
 141   MPI_Barrier(row_comm);
 142   MPI_Barrier(col_comm);
 143
 144   end_time = MPI_Wtime();
 145   time = end_time - start_time ;
 146   printf("communication time: %e seconds, computation time: %e seconds\n", communication_time, computation_time);
 147
 148   return time;
 149 }