examples/smpi/mvmul.c

   1 /* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team.
   2  * All rights reserved.                                                     */
   3
   4 /* This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the license (GNU LGPL) which comes with this package. */
   6
   7 #include <stdio.h>
   8 #include <stdlib.h>
   9 #include <ctype.h>
  10 #include <mpi.h>
  11
  12 #include <xbt/str.h>
  13
  14 #define ITERATIONS         10
  15
  16 #define USAGE_ERROR        1
  17 #define SANITY_ERROR       2
  18 #define GETTIMEOFDAY_ERROR 3
  19
  20 int main(int argc, char *argv[])
  21 {
  22
  23   int size, rank;
  24   int N, n, i, j, k, current_iteration, successful_iterations = 0;
  25   double *matrix = NULL, *vector = NULL, *vcalc, *vcheck;
  26   MPI_Status status;
  27   struct timeval *start_time = NULL, *stop_time = NULL;
  28   long parallel_usecs, parallel_usecs_total =
  29       0, sequential_usecs, sequential_usecs_total = 0;
  30
  31   MPI_Init(&argc, &argv);
  32
  33   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  34   MPI_Comm_size(MPI_COMM_WORLD, &size);
  35
  36   if (0 == rank) {
  37
  38     // root node parses cmdline args
  39     if (2 > argc || !isdigit(*argv[1])) {
  40       printf("usage:\n%s <size>\n", argv[0]);
  41       MPI_Abort(MPI_COMM_WORLD, USAGE_ERROR);
  42       exit(USAGE_ERROR);
  43     }
  44
  45     N = xbt_str_parse_int(argv[1], "Invalid size: %s");
  46
  47     start_time = (struct timeval *) malloc(sizeof(struct timeval));
  48     stop_time = (struct timeval *) malloc(sizeof(struct timeval));
  49
  50   }
  51
  52   for (current_iteration = 0; current_iteration < ITERATIONS;
  53        current_iteration++) {
  54
  55     if (0 == rank) {
  56
  57       matrix = (double *) malloc(N * N * sizeof(double));
  58       vector = (double *) malloc(N * sizeof(double));
  59
  60       for (i = 0; i < N * N; i++) {
  61         matrix[i] = (double) rand() / ((double) RAND_MAX + 1);
  62       }
  63
  64       for (i = 0; i < N; i++) {
  65         vector[i] = (double) rand() / ((double) RAND_MAX + 1);
  66       }
  67
  68       // for the sake of argument, the parallel algorithm begins
  69       // when the root node begins to transmit the matrix to the
  70       // workers.
  71       if (-1 == gettimeofday(start_time, NULL)) {
  72         printf("couldn't set start_time on node 0!\n");
  73         MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
  74         exit(GETTIMEOFDAY_ERROR);
  75       }
  76
  77       for (i = 1; i < size; i++) {
  78         MPI_Send(&N, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
  79       }
  80
  81     } else {
  82       MPI_Recv(&N, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
  83     }
  84
  85     // this algorithm uses at most N processors...
  86     if (rank < N) {
  87
  88       if (size > N)
  89         size = N;
  90       n = N / size + ((rank < (N % size)) ? 1 : 0);
  91
  92       if (0 == rank) {
  93
  94         for (i = 1, j = n; i < size && j < N; i++, j += k) {
  95           k = N / size + ((i < (N % size)) ? 1 : 0);
  96           MPI_Send(matrix + N * j, N * k, MPI_DOUBLE, i, 0,
  97                    MPI_COMM_WORLD);
  98           MPI_Send(vector, N, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
  99         }
 100
 101         // sanity check
 102 #ifdef DEBUG
 103         if (i != size || j != N) {
 104           printf("index calc error: i = %d, size = %d, j = %d, N = %d\n",
 105                  i, size, j, N);
 106           MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
 107           exit(SANITY_ERROR);
 108         }
 109 #endif
 110
 111         vcalc = (double *) malloc(N * sizeof(double));
 112
 113       } else {
 114
 115         matrix = (double *) malloc(N * n * sizeof(double));
 116         vector = (double *) malloc(N * sizeof(double));
 117         vcalc = (double *) malloc(n * sizeof(double));
 118
 119         MPI_Recv(matrix, N * n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
 120         MPI_Recv(vector, N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
 121
 122       }
 123
 124       for (i = 0; i < n; i++) {
 125         vcalc[i] = 0.0;
 126         for (j = 0; j < N; j++) {
 127           vcalc[i] += matrix[N * i + j] * vector[j];
 128         }
 129       }
 130
 131       if (0 != rank) {
 132         MPI_Send(vcalc, n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
 133       } else {
 134
 135         for (i = 1, j = n; i < size && j < N; i++, j += k) {
 136           k = N / size + ((i < (N % size)) ? 1 : 0);
 137           MPI_Recv(vcalc + j, k, MPI_DOUBLE, i, 0, MPI_COMM_WORLD,
 138                    &status);
 139         }
 140
 141         // sanity check
 142 #ifdef DEBUG
 143         if (i != size || j != N) {
 144           printf("index calc error 2: i = %d, size = %d, j = %d, N = %d\n",
 145                  i, size, j, N);
 146           MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
 147           exit(SANITY_ERROR);
 148         }
 149 #endif
 150
 151         if (-1 == gettimeofday(stop_time, NULL)) {
 152           printf("couldn't set stop_time on node 0!\n");
 153           MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
 154           exit(GETTIMEOFDAY_ERROR);
 155         }
 156
 157         parallel_usecs =
 158             (stop_time->tv_sec * 1000000 + stop_time->tv_usec) -
 159             (start_time->tv_sec * 1000000 + start_time->tv_usec);
 160
 161         if (-1 == gettimeofday(start_time, NULL)) {
 162           printf("couldn't set start_time on node 0!\n");
 163           MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
 164           exit(GETTIMEOFDAY_ERROR);
 165         }
 166         // calculate serially
 167         vcheck = (double *) malloc(N * sizeof(double));
 168         for (i = 0; i < N; i++) {
 169           vcheck[i] = 0.0;
 170           for (j = 0; j < N; j++) {
 171             vcheck[i] += matrix[N * i + j] * vector[j];
 172           }
 173         }
 174
 175         if (-1 == gettimeofday(stop_time, NULL)) {
 176           printf("couldn't set stop_time on node 0!\n");
 177           MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
 178           exit(GETTIMEOFDAY_ERROR);
 179         }
 180
 181         sequential_usecs =
 182             (stop_time->tv_sec * 1000000 + stop_time->tv_usec) -
 183             (start_time->tv_sec * 1000000 + start_time->tv_usec);
 184
 185         // verify correctness
 186         for (i = 0; i < N && vcalc[i] == vcheck[i]; i++);
 187
 188         printf("prog: blocking, i: %d ", current_iteration);
 189
 190         if (i == N) {
 191           printf
 192               ("ptime: %ld us, stime: %ld us, speedup: %.3f, nodes: %d, efficiency: %.3f\n",
 193                parallel_usecs, sequential_usecs,
 194                (double) sequential_usecs / (double) parallel_usecs, size,
 195                (double) sequential_usecs / ((double) parallel_usecs *
 196                                             (double) size));
 197
 198           parallel_usecs_total += parallel_usecs;
 199           sequential_usecs_total += sequential_usecs;
 200           successful_iterations++;
 201         } else {
 202           printf("parallel calc != serial calc, ");
 203         }
 204
 205         free(vcheck);
 206
 207       }
 208
 209       free(matrix);
 210       free(vector);
 211       free(vcalc);
 212     }
 213
 214   }
 215
 216   if (0 == rank) {
 217     printf("prog: blocking, ");
 218     if (0 < successful_iterations) {
 219       printf
 220           ("iterations: %d, avg. ptime: %.3f us, avg. stime: %.3f us, avg. speedup: %.3f, nodes: %d, avg. efficiency: %.3f\n",
 221            successful_iterations,
 222            (double) parallel_usecs_total / (double) successful_iterations,
 223            (double) sequential_usecs_total /
 224            (double) successful_iterations,
 225            (double) sequential_usecs_total / (double) parallel_usecs_total,
 226            size,
 227            (double) sequential_usecs_total /
 228            ((double) parallel_usecs_total * (double) size));
 229     } else {
 230       printf("no successful iterations!\n");
 231     }
 232
 233     free(start_time);
 234     free(stop_time);
 235
 236   }
 237
 238   MPI_Finalize();
 239
 240   return 0;
 241 }