9 #define GETTIMEOFDAY_ERROR 3
11 int main(int argc, char* argv[]) {
14 int N, n, i, j, k, current_iteration, successful_iterations = 0;
15 double *matrix, *vector, *vcalc, *vcheck;
17 struct timeval *start_time, *stop_time;
18 long parallel_usecs, parallel_usecs_total = 0, sequential_usecs, sequential_usecs_total = 0;
20 MPI_Init(&argc, &argv);
22 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
23 MPI_Comm_size(MPI_COMM_WORLD, &size);
27 // root node parses cmdline args
28 if (2 > argc || !isdigit(*argv[1])) {
29 printf("usage:\n%s <size>\n", argv[0]);
30 MPI_Abort(MPI_COMM_WORLD, USAGE_ERROR);
36 start_time = (struct timeval *)malloc(sizeof(struct timeval));
37 stop_time = (struct timeval *)malloc(sizeof(struct timeval));
41 for(current_iteration = 0; current_iteration < ITERATIONS; current_iteration++) {
45 matrix = (double *)malloc(N*N*sizeof(double));
46 vector = (double *)malloc(N*sizeof(double));
48 for(i = 0; i < N*N; i++) {
49 matrix[i] = (double)rand()/((double)RAND_MAX + 1);
52 for(i = 0; i < N; i++) {
53 vector[i] = (double)rand()/((double)RAND_MAX + 1);
56 // for the sake of argument, the parallel algorithm begins
57 // when the root node begins to transmit the matrix to the
59 if (-1 == gettimeofday(start_time, NULL)) {
60 printf("couldn't set start_time on node 0!\n");
61 MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
62 exit(GETTIMEOFDAY_ERROR);
65 for(i = 1; i < size; i++) {
66 MPI_Send(&N, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
70 MPI_Recv(&N, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
73 // this algorithm uses at most N processors...
76 if (size > N) size = N;
77 n = N / size + ((rank < (N % size)) ? 1 : 0);
81 for(i = 1, j = n; i < size && j < N; i++, j+=k) {
82 k = N / size + ((i < (N % size)) ? 1 : 0);
83 MPI_Send(matrix+N*j, N*k, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
84 MPI_Send(vector, N, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
89 if(i != size || j != N) {
90 printf("index calc error: i = %d, size = %d, j = %d, N = %d\n", i, size, j, N);
91 MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
96 vcalc = (double *)malloc(N*sizeof(double));
100 matrix = (double *)malloc(N*n*sizeof(double));
101 vector = (double *)malloc(N*sizeof(double));
102 vcalc = (double *)malloc(n*sizeof(double));
104 MPI_Recv(matrix, N*n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
105 MPI_Recv(vector, N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
109 for(i = 0; i < n; i++) {
111 for(j = 0; j < N; j++) {
112 vcalc[i] += matrix[N*i+j] * vector[j];
117 MPI_Send(vcalc, n, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
120 for(i = 1, j = n; i < size && j < N; i++, j+=k) {
121 k = N / size + ((i < (N % size)) ? 1 : 0);
122 MPI_Recv(vcalc+j, k, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &status);
127 if(i != size || j != N) {
128 printf("index calc error 2: i = %d, size = %d, j = %d, N = %d\n", i, size, j, N);
129 MPI_Abort(MPI_COMM_WORLD, SANITY_ERROR);
134 if (-1 == gettimeofday(stop_time, NULL)) {
135 printf("couldn't set stop_time on node 0!\n");
136 MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
137 exit(GETTIMEOFDAY_ERROR);
140 parallel_usecs = (stop_time->tv_sec*1000000+stop_time->tv_usec) - (start_time->tv_sec*1000000+start_time->tv_usec);
142 if (-1 == gettimeofday(start_time, NULL)) {
143 printf("couldn't set start_time on node 0!\n");
144 MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
145 exit(GETTIMEOFDAY_ERROR);
148 // calculate serially
149 vcheck = (double *)malloc(N*sizeof(double));
150 for(i = 0; i < N; i++) {
152 for(j = 0; j < N; j++) {
153 vcheck[i] += matrix[N*i+j] * vector[j];
157 if (-1 == gettimeofday(stop_time, NULL)) {
158 printf("couldn't set stop_time on node 0!\n");
159 MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
160 exit(GETTIMEOFDAY_ERROR);
163 sequential_usecs = (stop_time->tv_sec*1000000+stop_time->tv_usec) - (start_time->tv_sec*1000000+start_time->tv_usec);
165 // verify correctness
166 for(i = 0; i < N && vcalc[i] == vcheck[i]; i++);
168 printf("prog: blocking, i: %d ", current_iteration);
171 printf("ptime: %d us, stime: %d us, speedup: %.3f, nodes: %d, efficiency: %.3f\n",
174 (double)sequential_usecs / (double)parallel_usecs,
176 (double)sequential_usecs / ((double)parallel_usecs * (double)size));
178 parallel_usecs_total += parallel_usecs;
179 sequential_usecs_total += sequential_usecs;
180 successful_iterations++;
182 printf("parallel calc != serial calc, ");
197 printf("prog: blocking, ");
198 if(0 < successful_iterations) {
199 printf("iterations: %d, avg. ptime: %d us, avg. stime: %d us, avg. speedup: %.3f, nodes: %d, avg. efficiency: %.3f\n",
200 successful_iterations,
201 parallel_usecs_total / successful_iterations,
202 sequential_usecs_total / successful_iterations,
203 (double)sequential_usecs_total / (double)parallel_usecs_total,
205 (double)sequential_usecs_total / ((double)parallel_usecs_total * (double)size));
207 printf("no successful iterations!\n");