docs/source/tuto_smpi/gemm_mpi.cpp

   1 #include "stdio.h"
   2 #include "mpi.h"
   3
   4 const int size = 3000;
   5
   6 float a[size][size];
   7 float b[size][size];
   8 float c[size][size];
   9
  10 void multiply(int istart, int iend)
  11 {
  12     for (int i = istart; i <= iend; ++i){
  13         for (int j = 0; j < size; ++j) {
  14             for (int k = 0; k < size; ++k) {
  15                 c[i][j] += a[i][k] * b[k][j];
  16             }
  17         }
  18     }
  19 }
  20
  21 int main(int argc, char* argv[])
  22 {
  23     int rank, nproc;
  24     int istart, iend;
  25     double start, end;
  26
  27     MPI_Init(&argc, &argv);
  28     MPI_Comm_size(MPI_COMM_WORLD, &nproc);
  29     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  30
  31     // MPI_Barrier(MPI_COMM_WORLD);
  32     // start = MPI_Wtime();
  33
  34     if (rank == 0) {
  35         // Initialize buffers.
  36         for (int i = 0; i < size; ++i) {
  37             for (int j = 0; j < size; ++j) {
  38                 a[i][j] = (float)i + j;
  39                 b[i][j] = (float)i - j;
  40                 c[i][j] = 0.0f;
  41             }
  42         }
  43     }
  44
  45     // Broadcast matrices to all workers.
  46     MPI_Bcast(a, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
  47     MPI_Bcast(b, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
  48     MPI_Bcast(c, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
  49
  50     // Partition work by i-for-loop.
  51     istart = (size / nproc) * rank;
  52     iend = (size / nproc) * (rank + 1) - 1;
  53
  54     // Compute matrix multiplication in [istart,iend]
  55     // of i-for-loop.
  56     // C <- C + A x B
  57     multiply(istart, iend);
  58
  59     // Gather computed results.
  60     MPI_Gather(c + (size/nproc*rank),
  61                size*size/nproc,
  62                MPI_FLOAT,
  63                c + (size/nproc*rank),
  64                size*size/nproc,
  65                MPI_FLOAT,
  66                0,
  67                MPI_COMM_WORLD);
  68
  69     if (rank == 0) {
  70         // Compute remaining multiplications
  71         // when size % nproc > 0.
  72         if (size % nproc > 0) {
  73             multiply((size/nproc)*nproc, size-1);
  74         }
  75     }
  76
  77     // MPI_Barrier(MPI_COMM_WORLD);
  78     // end = MPI_Wtime();
  79
  80     MPI_Finalize();
  81
  82     // if (rank == 0) { /* use time on master node */
  83     //     float msec_total = 0.0f;
  84
  85     //     // Compute and print the performance
  86     //     float msec_per_matrix_mul = end-start;
  87     //     double flops_per_matrix_mul = 2.0 * (double)size * (double)size * (double)size;
  88     //     double giga_flops = (flops_per_matrix_mul * 1.0e-9f) / (msec_per_matrix_mul / 1000.0f);
  89     //     printf(
  90     //         "Performance= %.2f GFlop/s, Time= %.3f msec, Size= %.0f Ops\n",
  91     //         giga_flops,
  92     //         msec_per_matrix_mul,
  93     //         flops_per_matrix_mul);
  94     // }
  95
  96
  97     return 0;
  98 }