docs/source/tuto_smpi/gemm_mpi.cpp

   1 /* Copyright (c) 2019-2023. The SimGrid Team. All rights reserved.          */
   2
   3 /* This program is free software; you can redistribute it and/or modify it
   4  * under the terms of the license (GNU LGPL) which comes with this package. */
   5
   6 #include <mpi.h>
   7 #include <stdio.h>
   8
   9 const int size = 3000;
  10
  11 float a[size][size];
  12 float b[size][size];
  13 float c[size][size];
  14
  15 void multiply(int istart, int iend)
  16 {
  17     for (int i = istart; i <= iend; ++i){
  18         for (int j = 0; j < size; ++j) {
  19             for (int k = 0; k < size; ++k) {
  20                 c[i][j] += a[i][k] * b[k][j];
  21             }
  22         }
  23     }
  24 }
  25
  26 int main(int argc, char* argv[])
  27 {
  28     int rank, nproc;
  29     int istart, iend;
  30     // double start, end;
  31
  32     MPI_Init(&argc, &argv);
  33     MPI_Comm_size(MPI_COMM_WORLD, &nproc);
  34     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  35
  36     // MPI_Barrier(MPI_COMM_WORLD);
  37     // start = MPI_Wtime();
  38
  39     if (rank == 0) {
  40         // Initialize buffers.
  41         for (int i = 0; i < size; ++i) {
  42             for (int j = 0; j < size; ++j) {
  43                 a[i][j] = (float)i + j;
  44                 b[i][j] = (float)i - j;
  45                 c[i][j] = 0.0f;
  46             }
  47         }
  48     }
  49
  50     // Broadcast matrices to all workers.
  51     MPI_Bcast(a, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
  52     MPI_Bcast(b, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
  53     MPI_Bcast(c, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
  54
  55     // Partition work by i-for-loop.
  56     istart = (size / nproc) * rank;
  57     iend = (size / nproc) * (rank + 1) - 1;
  58
  59     // Compute matrix multiplication in [istart,iend]
  60     // of i-for-loop.
  61     // C <- C + A x B
  62     multiply(istart, iend);
  63
  64     // Gather computed results.
  65     MPI_Gather(c + (size/nproc*rank),
  66                size*size/nproc,
  67                MPI_FLOAT,
  68                c + (size/nproc*rank),
  69                size*size/nproc,
  70                MPI_FLOAT,
  71                0,
  72                MPI_COMM_WORLD);
  73
  74     if (rank == 0) {
  75         // Compute remaining multiplications
  76         // when size % nproc > 0.
  77         if (size % nproc > 0) {
  78             multiply((size/nproc)*nproc, size-1);
  79         }
  80     }
  81
  82     // MPI_Barrier(MPI_COMM_WORLD);
  83     // end = MPI_Wtime();
  84
  85     MPI_Finalize();
  86
  87     // if (rank == 0) { /* use time on master node */
  88     //     float msec_total = 0.0f;
  89
  90     //     // Compute and print the performance
  91     //     float msec_per_matrix_mul = end-start;
  92     //     double flops_per_matrix_mul = 2.0 * (double)size * (double)size * (double)size;
  93     //     double giga_flops = (flops_per_matrix_mul * 1.0e-9f) / (msec_per_matrix_mul / 1000.0f);
  94     //     printf(
  95     //         "Performance= %.2f GFlop/s, Time= %.3f msec, Size= %.0f Ops\n",
  96     //         giga_flops,
  97     //         msec_per_matrix_mul,
  98     //         flops_per_matrix_mul);
  99     // }
 100
 101     return 0;
 102 }