1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * (C) 2008 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
7 * This code may be used to test the performance of some of the
8 * noncontiguous datatype operations, including vector and indexed
9 * pack and unpack operations. To simplify the use of this code for
10 * tuning an MPI implementation, it uses no communication, just the
11 * MPI_Pack and MPI_Unpack routines. In addition, the individual tests are
12 * in separate routines, making it easier to compare the compiler-generated
13 * code for the user (manual) pack/unpack with the code used by
14 * the MPI implementation. Further, to be fair to the MPI implementation,
15 * the routines are passed the source and destination buffers; this ensures
16 * that the compiler can't optimize for statically allocated buffers.
23 /* Needed for restrict and const definitions */
24 #include "mpitestconf.h"
26 static int verbose = 0;
29 #define THRESHOLD 0.10
30 #define VARIANCE_THRESHOLD ((THRESHOLD * THRESHOLD) / 2)
33 double mean(double *list, int count);
34 double mean(double *list, int count)
40 for (i = 0; i < count; i++)
47 double noise(double *list, int count);
48 double noise(double *list, int count)
50 double *margin, retval;
53 if (!(margin = malloc(count * sizeof(double)))) {
54 printf("Unable to allocate memory\n");
58 for (i = 0; i < count; i++)
59 margin[i] = list[i] / mean(list, count);
62 for (i = 0; i < count; i++) {
63 retval += ((margin[i] - 1) * (margin[i] - 1));
73 /* Here are the tests */
75 /* Test packing a vector of individual doubles */
76 /* We don't use restrict in the function args because assignments between
77 restrict pointers is not valid in C and some compilers, such as the
78 IBM xlc compilers, flag that use as an error.*/
79 int TestVecPackDouble(int n, int stride,
80 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src);
81 int TestVecPackDouble(int n, int stride,
82 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src)
84 double *restrict d_dest;
85 const double *restrict d_src;
88 double t1, t2, t[NTRIALS];
93 printf("TestVecPackDouble (USER): ");
94 for (j = 0; j < NTRIALS; j++) {
96 for (rep = 0; rep < N_REPS; rep++) {
105 t2 = MPI_Wtime() - t1;
108 printf("%.3f ", t[j]);
111 printf("[%.3f]\n", noise(t, NTRIALS));
112 /* If there is too much noise, discard the test */
113 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
117 printf("Too much noise; discarding measurement\n");
120 *avgTimeUser = mean(t, NTRIALS) / N_REPS;
122 /* MPI Vector code */
123 MPI_Type_vector(n, 1, stride, MPI_DOUBLE, &vectype);
124 MPI_Type_commit(&vectype);
127 printf("TestVecPackDouble (MPI): ");
128 for (j = 0; j < NTRIALS; j++) {
130 for (rep = 0; rep < N_REPS; rep++) {
132 MPI_Pack((void *) src, 1, vectype, dest, n * sizeof(double), &position, MPI_COMM_SELF);
134 t2 = MPI_Wtime() - t1;
137 printf("%.3f ", t[j]);
140 printf("[%.3f]\n", noise(t, NTRIALS));
141 /* If there is too much noise, discard the test */
142 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
146 printf("Too much noise; discarding measurement\n");
149 *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
152 MPI_Type_free(&vectype);
157 /* Test unpacking a vector of individual doubles */
158 /* See above for why restrict is not used in the function args */
159 int TestVecUnPackDouble(int n, int stride,
160 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src);
161 int TestVecUnPackDouble(int n, int stride,
162 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src)
164 double *restrict d_dest;
165 const double *restrict d_src;
168 double t1, t2, t[NTRIALS];
169 MPI_Datatype vectype;
173 printf("TestVecUnPackDouble (USER): ");
174 for (j = 0; j < NTRIALS; j++) {
176 for (rep = 0; rep < N_REPS; rep++) {
185 t2 = MPI_Wtime() - t1;
188 printf("%.3f ", t[j]);
191 printf("[%.3f]\n", noise(t, NTRIALS));
192 /* If there is too much noise, discard the test */
193 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
197 printf("Too much noise; discarding measurement\n");
200 *avgTimeUser = mean(t, NTRIALS) / N_REPS;
202 /* MPI Vector code */
203 MPI_Type_vector(n, 1, stride, MPI_DOUBLE, &vectype);
204 MPI_Type_commit(&vectype);
207 printf("TestVecUnPackDouble (MPI): ");
208 for (j = 0; j < NTRIALS; j++) {
210 for (rep = 0; rep < N_REPS; rep++) {
212 MPI_Unpack((void *) src, n * sizeof(double),
213 &position, dest, 1, vectype, MPI_COMM_SELF);
215 t2 = MPI_Wtime() - t1;
218 printf("%.3f ", t[j]);
221 printf("[%.3f]\n", noise(t, NTRIALS));
222 /* If there is too much noise, discard the test */
223 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
227 printf("Too much noise; discarding measurement\n");
230 *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
233 MPI_Type_free(&vectype);
238 /* Test packing a vector of 2-individual doubles */
239 /* See above for why restrict is not used in the function args */
240 int TestVecPack2Double(int n, int stride,
241 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src);
242 int TestVecPack2Double(int n, int stride,
243 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src)
245 double *restrict d_dest;
246 const double *restrict d_src;
249 double t1, t2, t[NTRIALS];
250 MPI_Datatype vectype;
254 printf("TestVecPack2Double (USER): ");
255 for (j = 0; j < NTRIALS; j++) {
257 for (rep = 0; rep < N_REPS; rep++) {
262 *d_dest++ = d_src[0];
263 *d_dest++ = d_src[1];
267 t2 = MPI_Wtime() - t1;
270 printf("%.3f ", t[j]);
273 printf("[%.3f]\n", noise(t, NTRIALS));
274 /* If there is too much noise, discard the test */
275 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
279 printf("Too much noise; discarding measurement\n");
282 *avgTimeUser = mean(t, NTRIALS) / N_REPS;
284 /* MPI Vector code */
285 MPI_Type_vector(n, 2, stride, MPI_DOUBLE, &vectype);
286 MPI_Type_commit(&vectype);
289 printf("TestVecPack2Double (MPI): ");
290 for (j = 0; j < NTRIALS; j++) {
292 for (rep = 0; rep < N_REPS; rep++) {
294 MPI_Pack((void *) src, 1, vectype, dest, 2 * n * sizeof(double),
295 &position, MPI_COMM_SELF);
297 t2 = MPI_Wtime() - t1;
300 printf("%.3f ", t[j]);
303 printf("[%.3f]\n", noise(t, NTRIALS));
304 /* If there is too much noise, discard the test */
305 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
309 printf("Too much noise; discarding measurement\n");
312 *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
314 MPI_Type_free(&vectype);
319 /* This creates an indexed type that is like a vector (for simplicity
320 of construction). There is a possibility that the MPI implementation
321 will recognize and simplify this (e.g., in MPI_Type_commit); if so,
322 let us know and we'll add a version that is not as regular
324 /* See above for why restrict is not used in the function args */
325 int TestIndexPackDouble(int n, int stride,
326 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src);
327 int TestIndexPackDouble(int n, int stride,
328 double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src)
330 double *restrict d_dest;
331 const double *restrict d_src;
334 int *restrict displs = 0;
335 double t1, t2, t[NTRIALS];
336 MPI_Datatype indextype;
338 displs = (int *) malloc(n * sizeof(int));
339 for (i = 0; i < n; i++)
340 displs[i] = i * stride;
344 printf("TestIndexPackDouble (USER): ");
345 for (j = 0; j < NTRIALS; j++) {
347 for (rep = 0; rep < N_REPS; rep++) {
351 for (i = 0; i < n; i++) {
352 *d_dest++ = d_src[displs[i]];
355 t2 = MPI_Wtime() - t1;
358 printf("%.3f ", t[j]);
361 printf("[%.3f]\n", noise(t, NTRIALS));
362 /* If there is too much noise, discard the test */
363 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
367 printf("Too much noise; discarding measurement\n");
370 *avgTimeUser = mean(t, NTRIALS) / N_REPS;
373 MPI_Type_create_indexed_block(n, 1, displs, MPI_DOUBLE, &indextype);
374 MPI_Type_commit(&indextype);
379 printf("TestIndexPackDouble (MPI): ");
380 for (j = 0; j < NTRIALS; j++) {
382 for (rep = 0; rep < N_REPS; rep++) {
384 MPI_Pack((void *) src, 1, indextype, dest, n * sizeof(double),
385 &position, MPI_COMM_SELF);
387 t2 = MPI_Wtime() - t1;
390 printf("%.3f ", t[j]);
393 printf("[%.3f]\n", noise(t, NTRIALS));
394 /* If there is too much noise, discard the test */
395 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
399 printf("Too much noise; discarding measurement\n");
402 *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
404 MPI_Type_free(&indextype);
409 int Report(const char *name, const char *packname, double avgTimeMPI, double avgTimeUser);
410 int Report(const char *name, const char *packname, double avgTimeMPI, double avgTimeUser)
412 double diffTime, maxTime;
415 /* Move this into a common routine */
416 diffTime = avgTimeMPI - avgTimeUser;
418 diffTime = -diffTime;
419 if (avgTimeMPI > avgTimeUser)
420 maxTime = avgTimeMPI;
422 maxTime = avgTimeUser;
425 printf("%-30s:\t%g\t%g\t(%g%%)\n", name,
426 avgTimeMPI, avgTimeUser, 100 * (diffTime / maxTime));
429 if (avgTimeMPI > avgTimeUser && (diffTime > THRESHOLD * maxTime)) {
431 printf("%s:\tMPI %s code is too slow: MPI %g\t User %g\n",
432 name, packname, avgTimeMPI, avgTimeUser);
438 /* Finally, here's the main program */
439 int main(int argc, char *argv[])
441 int n, stride, errs = 0;
443 double avgTimeUser, avgTimeMPI;
445 MPI_Init(&argc, &argv);
446 if (getenv("MPITEST_VERBOSE"))
451 dest = (void *) malloc(n * sizeof(double));
452 src = (void *) malloc(n * ((1 + stride) * sizeof(double)));
453 /* Touch the source and destination arrays */
454 memset(src, 0, n * (1 + stride) * sizeof(double));
455 memset(dest, 0, n * sizeof(double));
457 TestVecPackDouble(n, stride, &avgTimeUser, &avgTimeMPI, dest, src);
458 errs += Report("VecPackDouble", "Pack", avgTimeMPI, avgTimeUser);
460 TestVecUnPackDouble(n, stride, &avgTimeUser, &avgTimeMPI, src, dest);
461 errs += Report("VecUnPackDouble", "Unpack", avgTimeMPI, avgTimeUser);
463 TestIndexPackDouble(n, stride, &avgTimeUser, &avgTimeMPI, dest, src);
464 errs += Report("VecIndexDouble", "Pack", avgTimeMPI, avgTimeUser);
469 dest = (void *) malloc(2 * n * sizeof(double));
470 src = (void *) malloc((1 + n) * ((1 + stride) * sizeof(double)));
471 memset(dest, 0, 2 * n * sizeof(double));
472 memset(src, 0, (1 + n) * (1 + stride) * sizeof(double));
473 TestVecPack2Double(n, stride, &avgTimeUser, &avgTimeMPI, dest, src);
474 errs += Report("VecPack2Double", "Pack", avgTimeMPI, avgTimeUser);
482 printf(" No Errors\n");
485 printf(" Found %d performance problems\n", errs);