examples/smpi/gemm/gemm.c

   1 /* Copyright (c) 2019-2021. The SimGrid Team. All rights reserved.          */
   2
   3 /* This program is free software; you can redistribute it and/or modify it
   4  * under the terms of the license (GNU LGPL) which comes with this package. */
   5
   6 /*==================================================================================================*/
   7 /*# This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.*/
   8 /*# CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).*/
   9 /*#*/
  10 /*# Author(s):*/
  11 /*#   Valeriu Codreanu <valeriu.codreanu@surfsara.nl>*/
  12 /*#*/
  13 /*# ==================================================================================================*/
  14
  15 #include "stdio.h"
  16 #include "mpi.h"
  17
  18 void multiply(float* a, float* b, float* c, int istart, int iend, int size);
  19 void multiply_sampled(float* a, float* b, float* c, int istart, int iend, int size);
  20
  21
  22 void multiply(float* a, float* b, float* c, int istart, int iend, int size)
  23 {
  24     for (int i = istart; i <= iend; ++i) {
  25         for (int j = 0; j < size; ++j) {
  26             for (int k = 0; k < size; ++k) {
  27                 c[i*size+j] += a[i*size+k] * b[k*size+j];
  28             }
  29         }
  30     }
  31 }
  32
  33 void multiply_sampled(float* a, float* b, float* c, int istart, int iend, int size)
  34 {
  35     //for (int i = istart; i <= iend; ++i) {
  36     SMPI_SAMPLE_GLOBAL (int i = istart, i <= iend, ++i, 10, 0.005){
  37         for (int j = 0; j < size; ++j) {
  38             for (int k = 0; k < size; ++k) {
  39                 c[i*size+j] += a[i*size+k] * b[k*size+j];
  40             }
  41         }
  42     }
  43 }
  44
  45 int main(int argc, char* argv[])
  46 {
  47     int rank, nproc;
  48     int istart, iend;
  49     double start, end;
  50
  51     MPI_Init(&argc, &argv);
  52     MPI_Comm_size(MPI_COMM_WORLD, &nproc);
  53     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  54
  55     if(argc<2){
  56       if (rank == 0)
  57         printf("Usage : gemm size \"native/sampling\"\n");
  58       exit(-1);
  59     }
  60
  61     int size=0;
  62     int read = sscanf(argv[1], "%d", &size);
  63     if(read==0){
  64       if (rank == 0)
  65         printf("Invalid argument %s\n", argv[1]);
  66       exit(-1);
  67     }else{
  68       if (rank == 0)
  69         printf("Matrix Size : %dx%d\n",size,size);
  70     }
  71
  72     float *a = (float*)malloc(sizeof(float)*size*size);
  73     float *b = (float*)malloc(sizeof(float)*size*size);
  74     float *c = (float*)malloc(sizeof(float)*size*size);
  75
  76
  77     MPI_Barrier(MPI_COMM_WORLD);
  78     start = MPI_Wtime();
  79
  80     if (rank == 0) {
  81         // Initialize buffers.
  82         for (int i = 0; i < size; ++i) {
  83             for (int j = 0; j < size; ++j) {
  84                 a[i*size+j] = (float)i + j;
  85                 b[i*size+j] = (float)i - j;
  86                 c[i*size+j] = 0.0f;
  87             }
  88         }
  89     }
  90
  91     // Broadcast matrices to all workers.
  92     MPI_Bcast(a, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
  93     MPI_Bcast(b, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
  94     MPI_Bcast(c, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
  95
  96     // Partition work by i-for-loop.
  97     istart = (size / nproc) * rank;
  98     iend = (size / nproc) * (rank + 1) - 1;
  99
 100     // Compute matrix multiplication in [istart,iend]
 101     // of i-for-loop.
 102     // C <- C + A x B
 103     if (strcmp(argv[2], "sampling")){
 104       if (rank == 0)
 105         printf ("Native mode\n");
 106       multiply(a, b, c, istart, iend, size);
 107     }else{
 108       if (rank == 0)
 109         printf ("Sampling mode\n");
 110       multiply_sampled(a, b, c, istart, iend, size);
 111     }
 112
 113     // Gather computed results.
 114     MPI_Gather(c + (size/nproc*rank),
 115                size*size/nproc,
 116                MPI_FLOAT,
 117                c + (size/nproc*rank),
 118                size*size/nproc,
 119                MPI_FLOAT,
 120                0,
 121                MPI_COMM_WORLD);
 122
 123     if (rank == 0) {
 124         // Compute remaining multiplications
 125         // when size % nproc > 0.
 126         if (size % nproc > 0) {
 127             if (strcmp(argv[2], "sampling"))
 128                 multiply(a, b, c, (size/nproc)*nproc, size-1, size);
 129             else
 130                 multiply_sampled(a, b, c, (size/nproc)*nproc, size-1, size);
 131         }
 132     }
 133
 134     MPI_Barrier(MPI_COMM_WORLD);
 135     end = MPI_Wtime();
 136
 137     MPI_Finalize();
 138     free(a);
 139     free(b);
 140     free(c);
 141     if (rank == 0) { /* use time on master node */
 142         //float msec_total = 0.0f;
 143
 144         // Compute and print the performance
 145         float sec_per_matrix_mul = end-start;
 146         double flops_per_matrix_mul = 2.0 * (double)size * (double)size * (double)size;
 147         double giga_flops = (flops_per_matrix_mul * 1.0e-9f) / (sec_per_matrix_mul / 1000.0f);
 148         printf(
 149             "Performance= %.2f GFlop/s, Time= %.3f sec, Size= %.0f Ops\n",
 150             giga_flops,
 151             sec_per_matrix_mul,
 152             flops_per_matrix_mul);
 153     }
 154
 155
 156     return 0;
 157 }