examples/smpi/mvmul.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <ctype.h>
   4 #include <mpi.h>
   5
   6 #define ITERATIONS         10
   7
   8 #define USAGE_ERROR        1
   9 #define SANITY_ERROR       2
  10 #define GETTIMEOFDAY_ERROR 3
  11
  12 int main(int argc, char *argv[])
  13 {
  14
  15   int size, rank;
  16   int N, n, i, j, k, current_iteration, successful_iterations = 0;
  17   double *matrix, *vector, *vcalc, *vcheck;
  18   MPI_Status status;
  19   struct timeval *start_time, *stop_time;
  20   long parallel_usecs, parallel_usecs_total =
  21     0, sequential_usecs, sequential_usecs_total = 0;
  22
  23   MPI_Init(&argc, &argv);
  24
  25   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  26   MPI_Comm_size(MPI_COMM_WORLD, &size);
  27
  28   if (0 == rank) {
  29
  30     // root node parses cmdline args
  31     if (2 > argc || !isdigit(*argv[1])) {
  32       printf("usage:\n%s <size>\n", argv[0]);
  33       MPI_Abort(MPI_COMM_WORLD, USAGE_ERROR);
  34       exit(USAGE_ERROR);
  35     }
  36
  37     N = atoi(argv[1]);
  38
  39     start_time = (struct timeval *) malloc(sizeof(struct timeval));
  40     stop_time = (struct timeval *) malloc(sizeof(struct timeval));
  41
  42   }
  43
  44   for (current_iteration = 0; current_iteration < ITERATIONS;
  45        current_iteration++) {
  46
  47     if (0 == rank) {
  48
  49       matrix = (double *) malloc(N * N * sizeof(double));
  50       vector = (double *) malloc(N * sizeof(double));
  51
  52       for (i = 0; i < N * N; i++) {
  53         matrix[i] = (double) rand() / ((double) RAND_MAX + 1);
  54       }
  55
  56       for (i = 0; i < N; i++) {
  57         vector[i] = (double) rand() / ((double) RAND_MAX + 1);
  58       }
  59
  60       // for the sake of argument, the parallel algorithm begins
  61       // when the root node begins to transmit the matrix to the
  62       // workers.
  63       if (-1 == gettimeofday(start_time, NULL)) {
  64         printf("couldn't set start_time on node 0!\n");
  65         MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
  66         exit(GETTIMEOFDAY_ERROR);
  67       }
  68
  69       for (i = 1; i < size; i++) {
  70         MPI_Send(&N, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
  71       }
  72
  73     } else {
  74       MPI_Recv(&N, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
  75     }
  76
  77     // this algorithm uses at most N processors...
  78     if (rank < N) {
  79
  80       if (size > N)
  81         size = N;
  82       n = N / size + ((rank < (N % size)) ? 1 : 0);
  83
  84       if (0 == rank) {
  85
  86         for (i = 1, j = n; i < size && j < N; i++, j += k) {
  87           k = N / size + ((i < (N % size)) ? 1 : 0);
  88           MPI_Send(matrix + N * j, N * k, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
  89           MPI_Send(vector, N, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
  90         }
  91
  92         // sanity check
  93 #ifdef DEBUG
  94         if (i != size || j != N) {
  95           printf("index calc error: i = %d, size = %d, j = %d, N = %d\n", i,
  96                  size, j, N);
  97           MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
  98           exit(SANITY_ERROR);
  99         }
 100 #endif
 101
 102         vcalc = (double *) malloc(N * sizeof(double));
 103
 104       } else {
 105
 106         matrix = (double *) malloc(N * n * sizeof(double));
 107         vector = (double *) malloc(N * sizeof(double));
 108         vcalc = (double *) malloc(n * sizeof(double));
 109
 110         MPI_Recv(matrix, N * n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
 111         MPI_Recv(vector, N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
 112
 113       }
 114
 115       for (i = 0; i < n; i++) {
 116         vcalc[i] = 0.0;
 117         for (j = 0; j < N; j++) {
 118           vcalc[i] += matrix[N * i + j] * vector[j];
 119         }
 120       }
 121
 122       if (0 != rank) {
 123         MPI_Send(vcalc, n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
 124       } else {
 125
 126         for (i = 1, j = n; i < size && j < N; i++, j += k) {
 127           k = N / size + ((i < (N % size)) ? 1 : 0);
 128           MPI_Recv(vcalc + j, k, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &status);
 129         }
 130
 131         // sanity check
 132 #ifdef DEBUG
 133         if (i != size || j != N) {
 134           printf("index calc error 2: i = %d, size = %d, j = %d, N = %d\n", i,
 135                  size, j, N);
 136           MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
 137           exit(SANITY_ERROR);
 138         }
 139 #endif
 140
 141         if (-1 == gettimeofday(stop_time, NULL)) {
 142           printf("couldn't set stop_time on node 0!\n");
 143           MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
 144           exit(GETTIMEOFDAY_ERROR);
 145         }
 146
 147         parallel_usecs =
 148           (stop_time->tv_sec * 1000000 + stop_time->tv_usec) -
 149           (start_time->tv_sec * 1000000 + start_time->tv_usec);
 150
 151         if (-1 == gettimeofday(start_time, NULL)) {
 152           printf("couldn't set start_time on node 0!\n");
 153           MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
 154           exit(GETTIMEOFDAY_ERROR);
 155         }
 156         // calculate serially
 157         vcheck = (double *) malloc(N * sizeof(double));
 158         for (i = 0; i < N; i++) {
 159           vcheck[i] = 0.0;
 160           for (j = 0; j < N; j++) {
 161             vcheck[i] += matrix[N * i + j] * vector[j];
 162           }
 163         }
 164
 165         if (-1 == gettimeofday(stop_time, NULL)) {
 166           printf("couldn't set stop_time on node 0!\n");
 167           MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
 168           exit(GETTIMEOFDAY_ERROR);
 169         }
 170
 171         sequential_usecs =
 172           (stop_time->tv_sec * 1000000 + stop_time->tv_usec) -
 173           (start_time->tv_sec * 1000000 + start_time->tv_usec);
 174
 175         // verify correctness
 176         for (i = 0; i < N && vcalc[i] == vcheck[i]; i++);
 177
 178         printf("prog: blocking, i: %d ", current_iteration);
 179
 180         if (i == N) {
 181           printf
 182             ("ptime: %ld us, stime: %ld us, speedup: %.3f, nodes: %d, efficiency: %.3f\n",
 183              parallel_usecs, sequential_usecs,
 184              (double) sequential_usecs / (double) parallel_usecs, size,
 185              (double) sequential_usecs / ((double) parallel_usecs *
 186                                           (double) size));
 187
 188           parallel_usecs_total += parallel_usecs;
 189           sequential_usecs_total += sequential_usecs;
 190           successful_iterations++;
 191         } else {
 192           printf("parallel calc != serial calc, ");
 193         }
 194
 195         free(vcheck);
 196
 197       }
 198
 199       free(matrix);
 200       free(vector);
 201       free(vcalc);
 202     }
 203
 204   }
 205
 206   if (0 == rank) {
 207     printf("prog: blocking, ");
 208     if (0 < successful_iterations) {
 209       printf
 210         ("iterations: %d, avg. ptime: %.3f us, avg. stime: %.3f us, avg. speedup: %.3f, nodes: %d, avg. efficiency: %.3f\n",
 211          successful_iterations,
 212          (double) parallel_usecs_total / (double) successful_iterations,
 213          (double) sequential_usecs_total / (double) successful_iterations,
 214          (double) sequential_usecs_total / (double) parallel_usecs_total,
 215          size,
 216          (double) sequential_usecs_total / ((double) parallel_usecs_total *
 217                                             (double) size));
 218     } else {
 219       printf("no successful iterations!\n");
 220     }
 221
 222     free(start_time);
 223     free(stop_time);
 224
 225   }
 226
 227   MPI_Finalize();
 228
 229   return 0;
 230 }