1 /* Copyright (c) 2019-2021. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
6 /*==================================================================================================*/
7 /*# This file is part of the CodeVault project. The project is licensed under Apache Version 2.0.*/
8 /*# CodeVault is part of the EU-project PRACE-4IP (WP7.3.C).*/
11 /*# Valeriu Codreanu <valeriu.codreanu@surfsara.nl>*/
13 /*# ==================================================================================================*/
18 void multiply(float* a, float* b, float* c, int istart, int iend, int size);
19 void multiply_sampled(float* a, float* b, float* c, int istart, int iend, int size);
22 void multiply(float* a, float* b, float* c, int istart, int iend, int size)
24 for (int i = istart; i <= iend; ++i) {
25 for (int j = 0; j < size; ++j) {
26 for (int k = 0; k < size; ++k) {
27 c[i*size+j] += a[i*size+k] * b[k*size+j];
33 void multiply_sampled(float* a, float* b, float* c, int istart, int iend, int size)
35 //for (int i = istart; i <= iend; ++i) {
36 SMPI_SAMPLE_GLOBAL (int i = istart, i <= iend, ++i, 10, 0.005){
37 for (int j = 0; j < size; ++j) {
38 for (int k = 0; k < size; ++k) {
39 c[i*size+j] += a[i*size+k] * b[k*size+j];
45 int main(int argc, char* argv[])
51 MPI_Init(&argc, &argv);
52 MPI_Comm_size(MPI_COMM_WORLD, &nproc);
53 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
57 printf("Usage : gemm size \"native/sampling\"\n");
62 int read = sscanf(argv[1], "%d", &size);
65 printf("Invalid argument %s\n", argv[1]);
69 printf("Matrix Size : %dx%d\n",size,size);
72 float *a = (float*)malloc(sizeof(float)*size*size);
73 float *b = (float*)malloc(sizeof(float)*size*size);
74 float *c = (float*)malloc(sizeof(float)*size*size);
77 MPI_Barrier(MPI_COMM_WORLD);
81 // Initialize buffers.
82 for (int i = 0; i < size; ++i) {
83 for (int j = 0; j < size; ++j) {
84 a[i*size+j] = (float)i + j;
85 b[i*size+j] = (float)i - j;
91 // Broadcast matrices to all workers.
92 MPI_Bcast(a, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
93 MPI_Bcast(b, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
94 MPI_Bcast(c, size*size, MPI_FLOAT, 0,MPI_COMM_WORLD);
96 // Partition work by i-for-loop.
97 istart = (size / nproc) * rank;
98 iend = (size / nproc) * (rank + 1) - 1;
100 // Compute matrix multiplication in [istart,iend]
103 if (strcmp(argv[2], "sampling")){
105 printf ("Native mode\n");
106 multiply(a, b, c, istart, iend, size);
109 printf ("Sampling mode\n");
110 multiply_sampled(a, b, c, istart, iend, size);
113 // Gather computed results.
114 MPI_Gather(c + (size/nproc*rank),
117 c + (size/nproc*rank),
124 // Compute remaining multiplications
125 // when size % nproc > 0.
126 if (size % nproc > 0) {
127 if (strcmp(argv[2], "sampling"))
128 multiply(a, b, c, (size/nproc)*nproc, size-1, size);
130 multiply_sampled(a, b, c, (size/nproc)*nproc, size-1, size);
134 MPI_Barrier(MPI_COMM_WORLD);
141 if (rank == 0) { /* use time on master node */
142 //float msec_total = 0.0f;
144 // Compute and print the performance
145 float sec_per_matrix_mul = end-start;
146 double flops_per_matrix_mul = 2.0 * (double)size * (double)size * (double)size;
147 double giga_flops = (flops_per_matrix_mul * 1.0e-9f) / (sec_per_matrix_mul / 1000.0f);
149 "Performance= %.2f GFlop/s, Time= %.3f sec, Size= %.0f Ops\n",
152 flops_per_matrix_mul);