examples/smpi/mvmul.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <mpi.h>
   4
   5 #define ITERATIONS         10
   6
   7 #define USAGE_ERROR        1
   8 #define SANITY_ERROR       2
   9 #define GETTIMEOFDAY_ERROR 3
  10
  11 int main(int argc, char *argv[])
  12 {
  13
  14   int size, rank;
  15   int N, n, i, j, k, current_iteration, successful_iterations = 0;
  16   double *matrix, *vector, *vcalc, *vcheck;
  17   MPI_Status status;
  18   struct timeval *start_time, *stop_time;
  19   long parallel_usecs, parallel_usecs_total =
  20     0, sequential_usecs, sequential_usecs_total = 0;
  21
  22   MPI_Init(&argc, &argv);
  23
  24   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  25   MPI_Comm_size(MPI_COMM_WORLD, &size);
  26
  27   if (0 == rank) {
  28
  29     // root node parses cmdline args
  30     if (2 > argc || !isdigit(*argv[1])) {
  31       printf("usage:\n%s <size>\n", argv[0]);
  32       MPI_Abort(MPI_COMM_WORLD, USAGE_ERROR);
  33       exit(USAGE_ERROR);
  34     }
  35
  36     N = atoi(argv[1]);
  37
  38     start_time = (struct timeval *) malloc(sizeof(struct timeval));
  39     stop_time = (struct timeval *) malloc(sizeof(struct timeval));
  40
  41   }
  42
  43   for (current_iteration = 0; current_iteration < ITERATIONS;
  44        current_iteration++) {
  45
  46     if (0 == rank) {
  47
  48       matrix = (double *) malloc(N * N * sizeof(double));
  49       vector = (double *) malloc(N * sizeof(double));
  50
  51       for (i = 0; i < N * N; i++) {
  52         matrix[i] = (double) rand() / ((double) RAND_MAX + 1);
  53       }
  54
  55       for (i = 0; i < N; i++) {
  56         vector[i] = (double) rand() / ((double) RAND_MAX + 1);
  57       }
  58
  59       // for the sake of argument, the parallel algorithm begins
  60       // when the root node begins to transmit the matrix to the
  61       // workers.
  62       if (-1 == gettimeofday(start_time, NULL)) {
  63         printf("couldn't set start_time on node 0!\n");
  64         MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
  65         exit(GETTIMEOFDAY_ERROR);
  66       }
  67
  68       for (i = 1; i < size; i++) {
  69         MPI_Send(&N, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
  70       }
  71
  72     } else {
  73       MPI_Recv(&N, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
  74     }
  75
  76     // this algorithm uses at most N processors...
  77     if (rank < N) {
  78
  79       if (size > N)
  80         size = N;
  81       n = N / size + ((rank < (N % size)) ? 1 : 0);
  82
  83       if (0 == rank) {
  84
  85         for (i = 1, j = n; i < size && j < N; i++, j += k) {
  86           k = N / size + ((i < (N % size)) ? 1 : 0);
  87           MPI_Send(matrix + N * j, N * k, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
  88           MPI_Send(vector, N, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
  89         }
  90
  91         // sanity check
  92 #ifdef DEBUG
  93         if (i != size || j != N) {
  94           printf("index calc error: i = %d, size = %d, j = %d, N = %d\n", i,
  95                  size, j, N);
  96           MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
  97           exit(SANITY_ERROR);
  98         }
  99 #endif
 100
 101         vcalc = (double *) malloc(N * sizeof(double));
 102
 103       } else {
 104
 105         matrix = (double *) malloc(N * n * sizeof(double));
 106         vector = (double *) malloc(N * sizeof(double));
 107         vcalc = (double *) malloc(n * sizeof(double));
 108
 109         MPI_Recv(matrix, N * n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
 110         MPI_Recv(vector, N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
 111
 112       }
 113
 114       for (i = 0; i < n; i++) {
 115         vcalc[i] = 0.0;
 116         for (j = 0; j < N; j++) {
 117           vcalc[i] += matrix[N * i + j] * vector[j];
 118         }
 119       }
 120
 121       if (0 != rank) {
 122         MPI_Send(vcalc, n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
 123       } else {
 124
 125         for (i = 1, j = n; i < size && j < N; i++, j += k) {
 126           k = N / size + ((i < (N % size)) ? 1 : 0);
 127           MPI_Recv(vcalc + j, k, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &status);
 128         }
 129
 130         // sanity check
 131 #ifdef DEBUG
 132         if (i != size || j != N) {
 133           printf("index calc error 2: i = %d, size = %d, j = %d, N = %d\n", i,
 134                  size, j, N);
 135           MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
 136           exit(SANITY_ERROR);
 137         }
 138 #endif
 139
 140         if (-1 == gettimeofday(stop_time, NULL)) {
 141           printf("couldn't set stop_time on node 0!\n");
 142           MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
 143           exit(GETTIMEOFDAY_ERROR);
 144         }
 145
 146         parallel_usecs =
 147           (stop_time->tv_sec * 1000000 + stop_time->tv_usec) -
 148           (start_time->tv_sec * 1000000 + start_time->tv_usec);
 149
 150         if (-1 == gettimeofday(start_time, NULL)) {
 151           printf("couldn't set start_time on node 0!\n");
 152           MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
 153           exit(GETTIMEOFDAY_ERROR);
 154         }
 155         // calculate serially
 156         vcheck = (double *) malloc(N * sizeof(double));
 157         for (i = 0; i < N; i++) {
 158           vcheck[i] = 0.0;
 159           for (j = 0; j < N; j++) {
 160             vcheck[i] += matrix[N * i + j] * vector[j];
 161           }
 162         }
 163
 164         if (-1 == gettimeofday(stop_time, NULL)) {
 165           printf("couldn't set stop_time on node 0!\n");
 166           MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
 167           exit(GETTIMEOFDAY_ERROR);
 168         }
 169
 170         sequential_usecs =
 171           (stop_time->tv_sec * 1000000 + stop_time->tv_usec) -
 172           (start_time->tv_sec * 1000000 + start_time->tv_usec);
 173
 174         // verify correctness
 175         for (i = 0; i < N && vcalc[i] == vcheck[i]; i++);
 176
 177         printf("prog: blocking, i: %d ", current_iteration);
 178
 179         if (i == N) {
 180           printf
 181             ("ptime: %ld us, stime: %ld us, speedup: %.3f, nodes: %d, efficiency: %.3f\n",
 182              parallel_usecs, sequential_usecs,
 183              (double) sequential_usecs / (double) parallel_usecs, size,
 184              (double) sequential_usecs / ((double) parallel_usecs *
 185                                           (double) size));
 186
 187           parallel_usecs_total += parallel_usecs;
 188           sequential_usecs_total += sequential_usecs;
 189           successful_iterations++;
 190         } else {
 191           printf("parallel calc != serial calc, ");
 192         }
 193
 194         free(vcheck);
 195
 196       }
 197
 198       free(matrix);
 199       free(vector);
 200       free(vcalc);
 201     }
 202
 203   }
 204
 205   if (0 == rank) {
 206     printf("prog: blocking, ");
 207     if (0 < successful_iterations) {
 208       printf
 209         ("iterations: %d, avg. ptime: %.3f us, avg. stime: %.3f us, avg. speedup: %.3f, nodes: %d, avg. efficiency: %.3f\n",
 210          successful_iterations,
 211          (double) parallel_usecs_total / (double) successful_iterations,
 212          (double) sequential_usecs_total / (double) successful_iterations,
 213          (double) sequential_usecs_total / (double) parallel_usecs_total,
 214          size,
 215          (double) sequential_usecs_total / ((double) parallel_usecs_total *
 216                                             (double) size));
 217     } else {
 218       printf("no successful iterations!\n");
 219     }
 220
 221     free(start_time);
 222     free(stop_time);
 223
 224   }
 225
 226   MPI_Finalize();
 227
 228   return 0;
 229 }