examples/smpi/mvmul.c

   1 /* Copyright (c) 2009, 2010. The SimGrid Team.
   2  * All rights reserved.                                                     */
   3
   4 /* This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the license (GNU LGPL) which comes with this package. */
   6
   7 #include <stdio.h>
   8 #include <stdlib.h>
   9 #include <ctype.h>
  10 #include <mpi.h>
  11
  12 #define ITERATIONS         10
  13
  14 #define USAGE_ERROR        1
  15 #define SANITY_ERROR       2
  16 #define GETTIMEOFDAY_ERROR 3
  17
  18 int main(int argc, char *argv[])
  19 {
  20
  21   int size, rank;
  22   int N, n, i, j, k, current_iteration, successful_iterations = 0;
  23   double *matrix = NULL, *vector = NULL, *vcalc, *vcheck;
  24   MPI_Status status;
  25   struct timeval *start_time = NULL, *stop_time = NULL;
  26   long parallel_usecs, parallel_usecs_total =
  27       0, sequential_usecs, sequential_usecs_total = 0;
  28
  29   MPI_Init(&argc, &argv);
  30
  31   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  32   MPI_Comm_size(MPI_COMM_WORLD, &size);
  33
  34   if (0 == rank) {
  35
  36     // root node parses cmdline args
  37     if (2 > argc || !isdigit(*argv[1])) {
  38       printf("usage:\n%s <size>\n", argv[0]);
  39       MPI_Abort(MPI_COMM_WORLD, USAGE_ERROR);
  40       exit(USAGE_ERROR);
  41     }
  42
  43     N = atoi(argv[1]);
  44
  45     start_time = (struct timeval *) malloc(sizeof(struct timeval));
  46     stop_time = (struct timeval *) malloc(sizeof(struct timeval));
  47
  48   }
  49
  50   for (current_iteration = 0; current_iteration < ITERATIONS;
  51        current_iteration++) {
  52
  53     if (0 == rank) {
  54
  55       matrix = (double *) malloc(N * N * sizeof(double));
  56       vector = (double *) malloc(N * sizeof(double));
  57
  58       for (i = 0; i < N * N; i++) {
  59         matrix[i] = (double) rand() / ((double) RAND_MAX + 1);
  60       }
  61
  62       for (i = 0; i < N; i++) {
  63         vector[i] = (double) rand() / ((double) RAND_MAX + 1);
  64       }
  65
  66       // for the sake of argument, the parallel algorithm begins
  67       // when the root node begins to transmit the matrix to the
  68       // workers.
  69       if (-1 == gettimeofday(start_time, NULL)) {
  70         printf("couldn't set start_time on node 0!\n");
  71         MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
  72         exit(GETTIMEOFDAY_ERROR);
  73       }
  74
  75       for (i = 1; i < size; i++) {
  76         MPI_Send(&N, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
  77       }
  78
  79     } else {
  80       MPI_Recv(&N, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
  81     }
  82
  83     // this algorithm uses at most N processors...
  84     if (rank < N) {
  85
  86       if (size > N)
  87         size = N;
  88       n = N / size + ((rank < (N % size)) ? 1 : 0);
  89
  90       if (0 == rank) {
  91
  92         for (i = 1, j = n; i < size && j < N; i++, j += k) {
  93           k = N / size + ((i < (N % size)) ? 1 : 0);
  94           MPI_Send(matrix + N * j, N * k, MPI_DOUBLE, i, 0,
  95                    MPI_COMM_WORLD);
  96           MPI_Send(vector, N, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
  97         }
  98
  99         // sanity check
 100 #ifdef DEBUG
 101         if (i != size || j != N) {
 102           printf("index calc error: i = %d, size = %d, j = %d, N = %d\n",
 103                  i, size, j, N);
 104           MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
 105           exit(SANITY_ERROR);
 106         }
 107 #endif
 108
 109         vcalc = (double *) malloc(N * sizeof(double));
 110
 111       } else {
 112
 113         matrix = (double *) malloc(N * n * sizeof(double));
 114         vector = (double *) malloc(N * sizeof(double));
 115         vcalc = (double *) malloc(n * sizeof(double));
 116
 117         MPI_Recv(matrix, N * n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
 118         MPI_Recv(vector, N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
 119
 120       }
 121
 122       for (i = 0; i < n; i++) {
 123         vcalc[i] = 0.0;
 124         for (j = 0; j < N; j++) {
 125           vcalc[i] += matrix[N * i + j] * vector[j];
 126         }
 127       }
 128
 129       if (0 != rank) {
 130         MPI_Send(vcalc, n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
 131       } else {
 132
 133         for (i = 1, j = n; i < size && j < N; i++, j += k) {
 134           k = N / size + ((i < (N % size)) ? 1 : 0);
 135           MPI_Recv(vcalc + j, k, MPI_DOUBLE, i, 0, MPI_COMM_WORLD,
 136                    &status);
 137         }
 138
 139         // sanity check
 140 #ifdef DEBUG
 141         if (i != size || j != N) {
 142           printf("index calc error 2: i = %d, size = %d, j = %d, N = %d\n",
 143                  i, size, j, N);
 144           MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
 145           exit(SANITY_ERROR);
 146         }
 147 #endif
 148
 149         if (-1 == gettimeofday(stop_time, NULL)) {
 150           printf("couldn't set stop_time on node 0!\n");
 151           MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
 152           exit(GETTIMEOFDAY_ERROR);
 153         }
 154
 155         parallel_usecs =
 156             (stop_time->tv_sec * 1000000 + stop_time->tv_usec) -
 157             (start_time->tv_sec * 1000000 + start_time->tv_usec);
 158
 159         if (-1 == gettimeofday(start_time, NULL)) {
 160           printf("couldn't set start_time on node 0!\n");
 161           MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
 162           exit(GETTIMEOFDAY_ERROR);
 163         }
 164         // calculate serially
 165         vcheck = (double *) malloc(N * sizeof(double));
 166         for (i = 0; i < N; i++) {
 167           vcheck[i] = 0.0;
 168           for (j = 0; j < N; j++) {
 169             vcheck[i] += matrix[N * i + j] * vector[j];
 170           }
 171         }
 172
 173         if (-1 == gettimeofday(stop_time, NULL)) {
 174           printf("couldn't set stop_time on node 0!\n");
 175           MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
 176           exit(GETTIMEOFDAY_ERROR);
 177         }
 178
 179         sequential_usecs =
 180             (stop_time->tv_sec * 1000000 + stop_time->tv_usec) -
 181             (start_time->tv_sec * 1000000 + start_time->tv_usec);
 182
 183         // verify correctness
 184         for (i = 0; i < N && vcalc[i] == vcheck[i]; i++);
 185
 186         printf("prog: blocking, i: %d ", current_iteration);
 187
 188         if (i == N) {
 189           printf
 190               ("ptime: %ld us, stime: %ld us, speedup: %.3f, nodes: %d, efficiency: %.3f\n",
 191                parallel_usecs, sequential_usecs,
 192                (double) sequential_usecs / (double) parallel_usecs, size,
 193                (double) sequential_usecs / ((double) parallel_usecs *
 194                                             (double) size));
 195
 196           parallel_usecs_total += parallel_usecs;
 197           sequential_usecs_total += sequential_usecs;
 198           successful_iterations++;
 199         } else {
 200           printf("parallel calc != serial calc, ");
 201         }
 202
 203         free(vcheck);
 204
 205       }
 206
 207       free(matrix);
 208       free(vector);
 209       free(vcalc);
 210     }
 211
 212   }
 213
 214   if (0 == rank) {
 215     printf("prog: blocking, ");
 216     if (0 < successful_iterations) {
 217       printf
 218           ("iterations: %d, avg. ptime: %.3f us, avg. stime: %.3f us, avg. speedup: %.3f, nodes: %d, avg. efficiency: %.3f\n",
 219            successful_iterations,
 220            (double) parallel_usecs_total / (double) successful_iterations,
 221            (double) sequential_usecs_total /
 222            (double) successful_iterations,
 223            (double) sequential_usecs_total / (double) parallel_usecs_total,
 224            size,
 225            (double) sequential_usecs_total /
 226            ((double) parallel_usecs_total * (double) size));
 227     } else {
 228       printf("no successful iterations!\n");
 229     }
 230
 231     free(start_time);
 232     free(stop_time);
 233
 234   }
 235
 236   MPI_Finalize();
 237
 238   return 0;
 239 }