3 * ICS691: High Performance Computing
5 * Homework 3, Exercise 2, Step 1
17 #define MALLOC_ERROR 2
18 #define GETTIMEOFDAY_ERROR 3
20 void * checked_malloc(int rank, const char * varname, size_t size) {
24 printf("node %d could not malloc memory for %s.\n", rank, varname);
25 MPI_Abort(MPI_COMM_WORLD, MALLOC_ERROR);
31 int main(int argc, char* argv[]) {
33 // timing/system variables
34 int iteration, iterations = ITERATIONS;
35 int step, steps = STEPS, step_size = STEP_SIZE;
36 long usecs, total_usecs;
37 struct timeval *start_time, *stop_time;
40 // mpi/communications variables
43 MPI_Comm row_comm, col_comm;
45 // algorithm variables
47 int *A, *A_t, *B, *C, *D, *a, *b, *abuf, *bbuf;
50 MPI_Init(&argc, &argv);
52 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
56 MPI_Comm_size(MPI_COMM_WORLD, &size);
58 program = basename(argv[0]);
60 // root node parses cmdline args
62 if (3 > argc || !isdigit(*argv[1]) || !isdigit(*argv[2])) {
63 printf("usage:\n%s <N> <P> [<iterations>]\n", program);
64 MPI_Abort(MPI_COMM_WORLD, USAGE_ERROR);
69 //N_start = atoi(argv[1]);
75 if (4 <= argc && isdigit(*argv[3])) {
76 iterations = atoi(argv[3]);
79 if (5 <= argc && isdigit(*argv[4])) {
80 steps = atoi(argv[4]);
83 if (6 <= argc && isdigit(*argv[5])) {
84 step_size = atoi(argv[5]);
89 printf("P^2 must equal size.\n");
90 MPI_Abort(MPI_COMM_WORLD, USAGE_ERROR);
94 start_time = (struct timeval *)checked_malloc(rank, "start_time", sizeof(struct timeval));
95 stop_time = (struct timeval *)checked_malloc(rank, "stop_time", sizeof(struct timeval));
99 // send command line parameters except N, since it can vary
100 MPI_Bcast(&P, 1, MPI_INT, 0, MPI_COMM_WORLD);
101 MPI_Bcast(&iterations, 1, MPI_INT, 0, MPI_COMM_WORLD);
102 MPI_Bcast(&steps, 1, MPI_INT, 0, MPI_COMM_WORLD);
103 MPI_Bcast(&step_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
108 // create row/column communicators
109 MPI_Comm_split(MPI_COMM_WORLD, row, col, &row_comm);
110 MPI_Comm_split(MPI_COMM_WORLD, col, row, &col_comm);
112 for (step = 0; step < steps; step++) {
117 N = N_start + step * step_size;
119 printf("P must divide N and %d does not divide %d.\n", P, N);
124 MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
126 // if root passes N = -1, skip this round
127 if (-1 == N) continue;
131 // initialize matrix components
132 A = (int *)checked_malloc(rank, "A", n*n*sizeof(int));
133 A_t = (int *)checked_malloc(rank, "A_t", n*n*sizeof(int));
134 B = (int *)checked_malloc(rank, "B", n*n*sizeof(int));
135 C = (int *)checked_malloc(rank, "C", n*n*sizeof(int));
136 D = (int *)checked_malloc(rank, "D", n*n*sizeof(int));
138 for (i = 0; i < n; i++) {
139 for (j = 0; j < n; j++) {
147 // d is the check matrix
149 for (k = 0; k < N; k++) {
152 D[n*i+j] += (I+k) * k;
159 abuf = (int *)checked_malloc(rank, "abuf", n*sizeof(int));
160 bbuf = (int *)checked_malloc(rank, "bbuf", n*sizeof(int));
162 for (iteration = 0; iteration < iterations; iteration++) {
164 for (i = 0; i < n*n; i++) {
168 // node zero sets start time
169 if (0 == rank && -1 == gettimeofday(start_time, NULL)) {
170 printf("couldn't set start_time on node 0!\n");
171 MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
172 exit(GETTIMEOFDAY_ERROR);
175 // populate transpose of A
176 for (i = 0; i < n; i++) {
177 for (j = 0; j < n; j++) {
178 A_t[n*i+j] = A[n*j+i];
182 // perform calculations
183 for (k = 0; k < N; k++) {
197 MPI_Bcast(a, n, MPI_INT, k/n, row_comm);
198 MPI_Bcast(b, n, MPI_INT, k/n, col_comm);
200 for (i = 0; i < n; i++) {
201 for (j = 0; j < n; j++) {
202 C[n*i+j] += a[i] * b[j];
208 // node zero sets stop time
209 if (0 == rank && -1 == gettimeofday(stop_time, NULL)) {
210 printf("couldn't set stop_time on node 0!\n");
211 MPI_Abort(MPI_COMM_WORLD, GETTIMEOFDAY_ERROR);
212 exit(GETTIMEOFDAY_ERROR);
216 for (i = 0; i < n*n && C[i] == D[i]; i++);
218 MPI_Reduce(&j, &k, 1, MPI_INT, MPI_LAND, 0, MPI_COMM_WORLD);
220 // node zero prints stats
222 usecs = (stop_time->tv_sec*1000000+stop_time->tv_usec) - (start_time->tv_sec*1000000+start_time->tv_usec);
223 printf("prog: %s, N: %d, P: %d, procs: %d, time: %d us, check: %d\n", program, N, P, P*P, usecs, k);
224 total_usecs += usecs;
229 // node 0 prints final stats
231 printf("prog: %s, N: %d, P: %d, procs: %d, iterations: %d, avg. time: %d us\n",
232 program, N, P, P*P, iterations, total_usecs / iterations);
235 // free data structures