9 #define GETTIMEOFDAY_ERROR 3
11 int main(int argc, char *argv[])
15 int N, n, i, j, k, current_iteration, successful_iterations = 0;
16 double *matrix, *vector, *vcalc, *vcheck;
18 struct timeval *start_time, *stop_time;
19 long parallel_usecs, parallel_usecs_total =
20 0, sequential_usecs, sequential_usecs_total = 0;
22 MPI_Init(&argc, &argv);
24 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
25 MPI_Comm_size(MPI_COMM_WORLD, &size);
29 // root node parses cmdline args
30 if (2 > argc || !isdigit(*argv[1])) {
31 printf("usage:\n%s <size>\n", argv[0]);
32 MPI_Abort(MPI_COMM_WORLD, USAGE_ERROR);
38 start_time = (struct timeval *) malloc(sizeof(struct timeval));
39 stop_time = (struct timeval *) malloc(sizeof(struct timeval));
43 for (current_iteration = 0; current_iteration < ITERATIONS;
44 current_iteration++) {
48 matrix = (double *) malloc(N * N * sizeof(double));
49 vector = (double *) malloc(N * sizeof(double));
51 for (i = 0; i < N * N; i++) {
52 matrix[i] = (double) rand() / ((double) RAND_MAX + 1);
55 for (i = 0; i < N; i++) {
56 vector[i] = (double) rand() / ((double) RAND_MAX + 1);
59 // for the sake of argument, the parallel algorithm begins
60 // when the root node begins to transmit the matrix to the
62 if (-1 == gettimeofday(start_time, NULL)) {
63 printf("couldn't set start_time on node 0!\n");
64 MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
65 exit(GETTIMEOFDAY_ERROR);
68 for (i = 1; i < size; i++) {
69 MPI_Send(&N, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
73 MPI_Recv(&N, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
76 // this algorithm uses at most N processors...
81 n = N / size + ((rank < (N % size)) ? 1 : 0);
85 for (i = 1, j = n; i < size && j < N; i++, j += k) {
86 k = N / size + ((i < (N % size)) ? 1 : 0);
87 MPI_Send(matrix + N * j, N * k, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
88 MPI_Send(vector, N, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
93 if (i != size || j != N) {
94 printf("index calc error: i = %d, size = %d, j = %d, N = %d\n", i,
96 MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
101 vcalc = (double *) malloc(N * sizeof(double));
105 matrix = (double *) malloc(N * n * sizeof(double));
106 vector = (double *) malloc(N * sizeof(double));
107 vcalc = (double *) malloc(n * sizeof(double));
109 MPI_Recv(matrix, N * n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
110 MPI_Recv(vector, N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
114 for (i = 0; i < n; i++) {
116 for (j = 0; j < N; j++) {
117 vcalc[i] += matrix[N * i + j] * vector[j];
122 MPI_Send(vcalc, n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
125 for (i = 1, j = n; i < size && j < N; i++, j += k) {
126 k = N / size + ((i < (N % size)) ? 1 : 0);
127 MPI_Recv(vcalc + j, k, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &status);
132 if (i != size || j != N) {
133 printf("index calc error 2: i = %d, size = %d, j = %d, N = %d\n", i,
135 MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
140 if (-1 == gettimeofday(stop_time, NULL)) {
141 printf("couldn't set stop_time on node 0!\n");
142 MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
143 exit(GETTIMEOFDAY_ERROR);
147 (stop_time->tv_sec * 1000000 + stop_time->tv_usec) -
148 (start_time->tv_sec * 1000000 + start_time->tv_usec);
150 if (-1 == gettimeofday(start_time, NULL)) {
151 printf("couldn't set start_time on node 0!\n");
152 MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
153 exit(GETTIMEOFDAY_ERROR);
155 // calculate serially
156 vcheck = (double *) malloc(N * sizeof(double));
157 for (i = 0; i < N; i++) {
159 for (j = 0; j < N; j++) {
160 vcheck[i] += matrix[N * i + j] * vector[j];
164 if (-1 == gettimeofday(stop_time, NULL)) {
165 printf("couldn't set stop_time on node 0!\n");
166 MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
167 exit(GETTIMEOFDAY_ERROR);
171 (stop_time->tv_sec * 1000000 + stop_time->tv_usec) -
172 (start_time->tv_sec * 1000000 + start_time->tv_usec);
174 // verify correctness
175 for (i = 0; i < N && vcalc[i] == vcheck[i]; i++);
177 printf("prog: blocking, i: %d ", current_iteration);
181 ("ptime: %ld us, stime: %ld us, speedup: %.3f, nodes: %d, efficiency: %.3f\n",
182 parallel_usecs, sequential_usecs,
183 (double) sequential_usecs / (double) parallel_usecs, size,
184 (double) sequential_usecs / ((double) parallel_usecs *
187 parallel_usecs_total += parallel_usecs;
188 sequential_usecs_total += sequential_usecs;
189 successful_iterations++;
191 printf("parallel calc != serial calc, ");
206 printf("prog: blocking, ");
207 if (0 < successful_iterations) {
209 ("iterations: %d, avg. ptime: %.3f us, avg. stime: %.3f us, avg. speedup: %.3f, nodes: %d, avg. efficiency: %.3f\n",
210 successful_iterations,
211 (double) parallel_usecs_total / (double) successful_iterations,
212 (double) sequential_usecs_total / (double) successful_iterations,
213 (double) sequential_usecs_total / (double) parallel_usecs_total,
215 (double) sequential_usecs_total / ((double) parallel_usecs_total *
218 printf("no successful iterations!\n");