src/smpi/sample/mvmul.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <mpi.h>
   4
   5 #define ITERATIONS         10
   6
   7 #define USAGE_ERROR        1
   8 #define SANITY_ERROR       2
   9 #define GETTIMEOFDAY_ERROR 3
  10
  11 int main(int argc, char* argv[]) {
  12
  13   int size, rank;
  14   int N, n, i, j, k, current_iteration, successful_iterations = 0;
  15   double *matrix, *vector, *vcalc, *vcheck;
  16   MPI_Status status;
  17   struct timeval *start_time, *stop_time;
  18   long parallel_usecs, parallel_usecs_total = 0, sequential_usecs, sequential_usecs_total = 0;
  19
  20   MPI_Init(&argc, &argv);
  21
  22   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  23   MPI_Comm_size(MPI_COMM_WORLD, &size);
  24
  25   if (0 == rank) {
  26
  27     // root node parses cmdline args
  28     if (2 > argc || !isdigit(*argv[1])) {
  29       printf("usage:\n%s <size>\n", argv[0]);
  30       MPI_Abort(MPI_COMM_WORLD, USAGE_ERROR);
  31       exit(USAGE_ERROR);
  32     }
  33
  34     N = atoi(argv[1]);
  35
  36     start_time     = (struct timeval *)malloc(sizeof(struct timeval));
  37     stop_time      = (struct timeval *)malloc(sizeof(struct timeval));
  38
  39   }
  40
  41   for(current_iteration = 0; current_iteration < ITERATIONS; current_iteration++) {
  42
  43     if (0 == rank) {
  44
  45       matrix         = (double *)malloc(N*N*sizeof(double));
  46       vector         = (double *)malloc(N*sizeof(double));
  47
  48       for(i = 0; i < N*N; i++) {
  49         matrix[i] = (double)rand()/((double)RAND_MAX + 1);
  50       }
  51
  52       for(i = 0; i < N; i++) {
  53         vector[i] = (double)rand()/((double)RAND_MAX + 1);
  54       }
  55
  56       // for the sake of argument, the parallel algorithm begins
  57       // when the root node begins to transmit the matrix to the
  58       // workers.
  59       if (-1 == gettimeofday(start_time, NULL)) {
  60         printf("couldn't set start_time on node 0!\n");
  61         MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
  62         exit(GETTIMEOFDAY_ERROR);
  63       }
  64
  65       for(i = 1; i < size; i++) {
  66         MPI_Send(&N, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
  67       }
  68
  69     } else {
  70       MPI_Recv(&N, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
  71     }
  72
  73     // this algorithm uses at most N processors...
  74     if (rank < N) {
  75
  76       if (size > N) size = N;
  77       n = N / size + ((rank < (N % size)) ? 1 : 0);
  78
  79       if (0 == rank) {
  80
  81         for(i = 1, j = n; i < size && j < N; i++, j+=k) {
  82           k = N / size + ((i < (N % size)) ? 1 : 0);
  83           MPI_Send(matrix+N*j, N*k, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
  84           MPI_Send(vector, N, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
  85         }
  86
  87         // sanity check
  88         #ifdef DEBUG
  89         if(i != size || j != N) {
  90           printf("index calc error: i = %d, size = %d, j = %d, N = %d\n", i, size, j, N);
  91           MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
  92           exit(SANITY_ERROR);
  93         }
  94         #endif
  95
  96         vcalc = (double *)malloc(N*sizeof(double));
  97
  98       } else {
  99
 100         matrix = (double *)malloc(N*n*sizeof(double));
 101         vector = (double *)malloc(N*sizeof(double));
 102         vcalc  = (double *)malloc(n*sizeof(double));
 103
 104         MPI_Recv(matrix, N*n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
 105         MPI_Recv(vector, N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
 106
 107       }
 108
 109       for(i = 0; i < n; i++) {
 110         vcalc[i] = 0.0;
 111         for(j = 0; j < N; j++) {
 112           vcalc[i] += matrix[N*i+j] * vector[j];
 113         }
 114       }
 115
 116       if (0 != rank) {
 117         MPI_Send(vcalc, n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
 118       } else {
 119
 120         for(i = 1, j = n; i < size && j < N; i++, j+=k) {
 121           k = N / size + ((i < (N % size)) ? 1 : 0);
 122           MPI_Recv(vcalc+j, k, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &status);
 123         }
 124
 125         // sanity check
 126         #ifdef DEBUG
 127         if(i != size || j != N) {
 128           printf("index calc error 2: i = %d, size = %d, j = %d, N = %d\n", i, size, j, N);
 129           MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
 130           exit(SANITY_ERROR);
 131         }
 132         #endif
 133
 134         if (-1 == gettimeofday(stop_time, NULL)) {
 135           printf("couldn't set stop_time on node 0!\n");
 136           MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
 137           exit(GETTIMEOFDAY_ERROR);
 138         }
 139
 140         parallel_usecs = (stop_time->tv_sec*1000000+stop_time->tv_usec) - (start_time->tv_sec*1000000+start_time->tv_usec);
 141
 142         if (-1 == gettimeofday(start_time, NULL)) {
 143           printf("couldn't set start_time on node 0!\n");
 144           MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
 145           exit(GETTIMEOFDAY_ERROR);
 146         }
 147
 148         // calculate serially
 149         vcheck = (double *)malloc(N*sizeof(double));
 150         for(i = 0; i < N; i++) {
 151           vcheck[i] = 0.0;
 152           for(j = 0; j < N; j++) {
 153             vcheck[i] += matrix[N*i+j] * vector[j];
 154           }
 155         }
 156
 157         if (-1 == gettimeofday(stop_time, NULL)) {
 158           printf("couldn't set stop_time on node 0!\n");
 159           MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
 160           exit(GETTIMEOFDAY_ERROR);
 161         }
 162
 163         sequential_usecs = (stop_time->tv_sec*1000000+stop_time->tv_usec) - (start_time->tv_sec*1000000+start_time->tv_usec);
 164
 165         // verify correctness
 166         for(i = 0; i < N && vcalc[i] == vcheck[i]; i++);
 167
 168         printf("prog: blocking, i: %d ", current_iteration);
 169
 170         if (i == N) {
 171           printf("ptime: %d us, stime: %d us, speedup: %.3f, nodes: %d, efficiency: %.3f\n",
 172               parallel_usecs,
 173               sequential_usecs,
 174               (double)sequential_usecs / (double)parallel_usecs,
 175               size,
 176               (double)sequential_usecs / ((double)parallel_usecs * (double)size));
 177
 178           parallel_usecs_total += parallel_usecs;
 179           sequential_usecs_total += sequential_usecs;
 180           successful_iterations++;
 181         } else {
 182           printf("parallel calc != serial calc, ");
 183         }
 184
 185         free(vcheck);
 186
 187       }
 188
 189       free(matrix);
 190       free(vector);
 191       free(vcalc);
 192     }
 193
 194   }
 195
 196   if(0 == rank) {
 197     printf("prog: blocking, ");
 198     if(0 < successful_iterations) {
 199       printf("iterations: %d, avg. ptime: %d us, avg. stime: %d us, avg. speedup: %.3f, nodes: %d, avg. efficiency: %.3f\n",
 200           successful_iterations,
 201           parallel_usecs_total / successful_iterations,
 202           sequential_usecs_total / successful_iterations,
 203           (double)sequential_usecs_total / (double)parallel_usecs_total,
 204           size,
 205           (double)sequential_usecs_total / ((double)parallel_usecs_total * (double)size));
 206     } else {
 207       printf("no successful iterations!\n");
 208     }
 209
 210     free(start_time);
 211     free(stop_time);
 212
 213   }
 214
 215   MPI_Finalize();
 216
 217   return 0;
 218 }