1 /* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
17 #define SANITY_ERROR 2
18 #define GETTIMEOFDAY_ERROR 3
20 int main(int argc, char *argv[])
24 int N, n, i, j, k, current_iteration, successful_iterations = 0;
25 double *matrix = NULL, *vector = NULL, *vcalc, *vcheck;
27 struct timeval *start_time = NULL, *stop_time = NULL;
28 long parallel_usecs, parallel_usecs_total =
29 0, sequential_usecs, sequential_usecs_total = 0;
31 MPI_Init(&argc, &argv);
33 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
34 MPI_Comm_size(MPI_COMM_WORLD, &size);
38 // root node parses cmdline args
39 if (2 > argc || !isdigit(*argv[1])) {
40 printf("usage:\n%s <size>\n", argv[0]);
41 MPI_Abort(MPI_COMM_WORLD, USAGE_ERROR);
45 N = xbt_str_parse_int(argv[1], "Invalid size: %s");
47 start_time = (struct timeval *) malloc(sizeof(struct timeval));
48 stop_time = (struct timeval *) malloc(sizeof(struct timeval));
52 for (current_iteration = 0; current_iteration < ITERATIONS;
53 current_iteration++) {
57 matrix = (double *) malloc(N * N * sizeof(double));
58 vector = (double *) malloc(N * sizeof(double));
60 for (i = 0; i < N * N; i++) {
61 matrix[i] = (double) rand() / ((double) RAND_MAX + 1);
64 for (i = 0; i < N; i++) {
65 vector[i] = (double) rand() / ((double) RAND_MAX + 1);
68 // for the sake of argument, the parallel algorithm begins
69 // when the root node begins to transmit the matrix to the
71 if (-1 == gettimeofday(start_time, NULL)) {
72 printf("couldn't set start_time on node 0!\n");
73 MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
74 exit(GETTIMEOFDAY_ERROR);
77 for (i = 1; i < size; i++) {
78 MPI_Send(&N, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
82 MPI_Recv(&N, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
85 // this algorithm uses at most N processors...
90 n = N / size + ((rank < (N % size)) ? 1 : 0);
94 for (i = 1, j = n; i < size && j < N; i++, j += k) {
95 k = N / size + ((i < (N % size)) ? 1 : 0);
96 MPI_Send(matrix + N * j, N * k, MPI_DOUBLE, i, 0,
98 MPI_Send(vector, N, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
103 if (i != size || j != N) {
104 printf("index calc error: i = %d, size = %d, j = %d, N = %d\n",
106 MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
111 vcalc = (double *) malloc(N * sizeof(double));
115 matrix = (double *) malloc(N * n * sizeof(double));
116 vector = (double *) malloc(N * sizeof(double));
117 vcalc = (double *) malloc(n * sizeof(double));
119 MPI_Recv(matrix, N * n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
120 MPI_Recv(vector, N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
124 for (i = 0; i < n; i++) {
126 for (j = 0; j < N; j++) {
127 vcalc[i] += matrix[N * i + j] * vector[j];
132 MPI_Send(vcalc, n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
135 for (i = 1, j = n; i < size && j < N; i++, j += k) {
136 k = N / size + ((i < (N % size)) ? 1 : 0);
137 MPI_Recv(vcalc + j, k, MPI_DOUBLE, i, 0, MPI_COMM_WORLD,
143 if (i != size || j != N) {
144 printf("index calc error 2: i = %d, size = %d, j = %d, N = %d\n",
146 MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
151 if (-1 == gettimeofday(stop_time, NULL)) {
152 printf("couldn't set stop_time on node 0!\n");
153 MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
154 exit(GETTIMEOFDAY_ERROR);
158 (stop_time->tv_sec * 1000000 + stop_time->tv_usec) -
159 (start_time->tv_sec * 1000000 + start_time->tv_usec);
161 if (-1 == gettimeofday(start_time, NULL)) {
162 printf("couldn't set start_time on node 0!\n");
163 MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
164 exit(GETTIMEOFDAY_ERROR);
166 // calculate serially
167 vcheck = (double *) malloc(N * sizeof(double));
168 for (i = 0; i < N; i++) {
170 for (j = 0; j < N; j++) {
171 vcheck[i] += matrix[N * i + j] * vector[j];
175 if (-1 == gettimeofday(stop_time, NULL)) {
176 printf("couldn't set stop_time on node 0!\n");
177 MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
178 exit(GETTIMEOFDAY_ERROR);
182 (stop_time->tv_sec * 1000000 + stop_time->tv_usec) -
183 (start_time->tv_sec * 1000000 + start_time->tv_usec);
185 // verify correctness
186 for (i = 0; i < N && vcalc[i] == vcheck[i]; i++);
188 printf("prog: blocking, i: %d ", current_iteration);
192 ("ptime: %ld us, stime: %ld us, speedup: %.3f, nodes: %d, efficiency: %.3f\n",
193 parallel_usecs, sequential_usecs,
194 (double) sequential_usecs / (double) parallel_usecs, size,
195 (double) sequential_usecs / ((double) parallel_usecs *
198 parallel_usecs_total += parallel_usecs;
199 sequential_usecs_total += sequential_usecs;
200 successful_iterations++;
202 printf("parallel calc != serial calc, ");
217 printf("prog: blocking, ");
218 if (0 < successful_iterations) {
220 ("iterations: %d, avg. ptime: %.3f us, avg. stime: %.3f us, avg. speedup: %.3f, nodes: %d, avg. efficiency: %.3f\n",
221 successful_iterations,
222 (double) parallel_usecs_total / (double) successful_iterations,
223 (double) sequential_usecs_total /
224 (double) successful_iterations,
225 (double) sequential_usecs_total / (double) parallel_usecs_total,
227 (double) sequential_usecs_total /
228 ((double) parallel_usecs_total * (double) size));
230 printf("no successful iterations!\n");