examples/smpi/gemm/gemm.c

   1 /* Copyright (c) 2019-2022. The SimGrid Team. All rights reserved.          */
   2
   3 /* This program is free software; you can redistribute it and/or modify it
   4  * under the terms of the license (GNU LGPL) which comes with this package. */
   5
   6 /*==================================================================================================*/
   7 /*# This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.*/
   8 /*# CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).*/
   9 /*#*/
  10 /*# Author(s):*/
  11 /*#   Valeriu Codreanu <valeriu.codreanu@surfsara.nl>*/
  12 /*#*/
  13 /*# ==================================================================================================*/
  14
  15 #include "stdio.h"
  16 #include "mpi.h"
  17
  18 void multiply(float* a, float* b, float* c, int istart, int iend, int size);
  19 void multiply_sampled(float* a, float* b, float* c, int istart, int iend, int size);
  20
  21
  22 void multiply(float* a, float* b, float* c, int istart, int iend, int size)
  23 {
  24     for (int i = istart; i <= iend; ++i) {
  25         for (int j = 0; j < size; ++j) {
  26             for (int k = 0; k < size; ++k) {
  27                 c[i*size+j] += a[i*size+k] * b[k*size+j];
  28             }
  29         }
  30     }
  31 }
  32
  33 void multiply_sampled(float* a, float* b, float* c, int istart, int iend, int size)
  34 {
  35     //for (int i = istart; i <= iend; ++i) {
  36     SMPI_SAMPLE_GLOBAL (int i = istart, i <= iend, ++i, 10, 0.005){
  37         for (int j = 0; j < size; ++j) {
  38             for (int k = 0; k < size; ++k) {
  39                 c[i*size+j] += a[i*size+k] * b[k*size+j];
  40             }
  41         }
  42     }
  43 }
  44
  45 int main(int argc, char* argv[])
  46 {
  47     int rank, nproc;
  48     int istart, iend;
  49     double start, end;
  50
  51     MPI_Init(&argc, &argv);
  52     MPI_Comm_size(MPI_COMM_WORLD, &nproc);
  53     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  54
  55     if(argc<2){
  56       if (rank == 0)
  57         printf("Usage : gemm size \"native/sampling\"\n");
  58       exit(-1);
  59     }
  60
  61     int size=0;
  62     int read = sscanf(argv[1], "%d", &size);
  63     if(read==0){
  64       if (rank == 0)
  65         printf("Invalid argument %s\n", argv[1]);
  66       exit(-1);
  67     }else{
  68       if (rank == 0)
  69         printf("Matrix Size : %dx%d\n",size,size);
  70     }
  71
  72     float *a = (float*)malloc(sizeof(float)*size*size);
  73     float *b = (float*)malloc(sizeof(float)*size*size);
  74     float *c = (float*)malloc(sizeof(float)*size*size);
  75
  76     MPI_Barrier(MPI_COMM_WORLD);
  77     start = MPI_Wtime();
  78
  79     if (rank == 0) {
  80         // Initialize buffers.
  81         for (int i = 0; i < size; ++i) {
  82             for (int j = 0; j < size; ++j) {
  83                 a[i*size+j] = (float)i + j;
  84                 b[i*size+j] = (float)i - j;
  85                 c[i*size+j] = 0.0f;
  86             }
  87         }
  88     }
  89
  90     // Broadcast matrices to all workers.
  91     MPI_Bcast(a, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
  92     MPI_Bcast(b, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
  93     MPI_Bcast(c, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
  94
  95     // Partition work by i-for-loop.
  96     istart = (size / nproc) * rank;
  97     iend = (size / nproc) * (rank + 1) - 1;
  98
  99     // Compute matrix multiplication in [istart,iend]
 100     // of i-for-loop.
 101     // C <- C + A x B
 102     if (strcmp(argv[2], "sampling")){
 103       if (rank == 0)
 104         printf ("Native mode\n");
 105       multiply(a, b, c, istart, iend, size);
 106     }else{
 107       if (rank == 0)
 108         printf ("Sampling mode\n");
 109       multiply_sampled(a, b, c, istart, iend, size);
 110     }
 111
 112     // Gather computed results.
 113     MPI_Gather(c + (size/nproc*rank),
 114                size*size/nproc,
 115                MPI_FLOAT,
 116                c + (size/nproc*rank),
 117                size*size/nproc,
 118                MPI_FLOAT,
 119                0,
 120                MPI_COMM_WORLD);
 121
 122     if (rank == 0) {
 123         // Compute remaining multiplications
 124         // when size % nproc > 0.
 125         if (size % nproc > 0) {
 126             if (strcmp(argv[2], "sampling"))
 127                 multiply(a, b, c, (size/nproc)*nproc, size-1, size);
 128             else
 129                 multiply_sampled(a, b, c, (size/nproc)*nproc, size-1, size);
 130         }
 131     }
 132
 133     MPI_Barrier(MPI_COMM_WORLD);
 134     end = MPI_Wtime();
 135
 136     MPI_Finalize();
 137     free(a);
 138     free(b);
 139     free(c);
 140     if (rank == 0) { /* use time on master node */
 141         //float msec_total = 0.0f;
 142
 143         // Compute and print the performance
 144         float sec_per_matrix_mul = end-start;
 145         double flops_per_matrix_mul = 2.0 * (double)size * (double)size * (double)size;
 146         double giga_flops = (flops_per_matrix_mul * 1.0e-9f) / (sec_per_matrix_mul / 1000.0f);
 147         printf(
 148             "Performance= %.2f GFlop/s, Time= %.3f sec, Size= %.0f Ops\n",
 149             giga_flops,
 150             sec_per_matrix_mul,
 151             flops_per_matrix_mul);
 152     }
 153
 154     return 0;
 155 }