1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * (C) 2008 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
7 * This code may be used to test the performance of some of the
8 * noncontiguous datatype operations, including vector and indexed
9 * pack and unpack operations. To simplify the use of this code for
10 * tuning an MPI implementation, it uses no communication, just the
11 * MPI_Pack and MPI_Unpack routines. In addition, the individual tests are
12 * in separate routines, making it easier to compare the compiler-generated
13 * code for the user (manual) pack/unpack with the code used by
14 * the MPI implementation. Further, to be fair to the MPI implementation,
15 * the routines are passed the source and destination buffers; this ensures
16 * that the compiler can't optimize for statically allocated buffers.
23 /* Needed for restrict and const definitions */
24 #include "mpitestconf.h"
26 static int verbose = 0;
29 #define THRESHOLD 0.10
30 #define VARIANCE_THRESHOLD ((THRESHOLD * THRESHOLD) / 2)
33 double mean(double *list, int count);
34 double mean(double *list, int count)
40 for (i = 0; i < count; i++)
47 double noise(double *list, int count);
48 double noise(double *list, int count)
50 double *margin, retval;
53 if (!(margin = malloc(count * sizeof(double)))) {
54 printf("Unable to allocate memory\n");
58 for (i = 0; i < count; i++)
59 margin[i] = list[i] / mean(list, count);
62 for (i = 0; i < count; i++) {
63 retval += ((margin[i] - 1) * (margin[i] - 1));
66 if (retval < 0) retval = -retval;
71 /* Here are the tests */
73 /* Test packing a vector of individual doubles */
74 /* We don't use restrict in the function args because assignments between
75 restrict pointers is not valid in C and some compilers, such as the
76 IBM xlc compilers, flag that use as an error.*/
77 int TestVecPackDouble( int n, int stride,
78 double *avgTimeUser, double *avgTimeMPI,
79 double *dest, const double *src );
80 int TestVecPackDouble( int n, int stride,
81 double *avgTimeUser, double *avgTimeMPI,
82 double *dest, const double *src )
84 double *restrict d_dest;
85 const double *restrict d_src;
88 double t1, t2, t[NTRIALS];
92 if (verbose) printf("TestVecPackDouble (USER): ");
93 for (j = 0; j < NTRIALS; j++) {
95 for (rep=0; rep<N_REPS; rep++) {
104 t2 = MPI_Wtime() - t1;
106 if (verbose) printf("%.3f ", t[j]);
108 if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
109 /* If there is too much noise, discard the test */
110 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
114 printf("Too much noise; discarding measurement\n");
117 *avgTimeUser = mean(t, NTRIALS) / N_REPS;
119 /* MPI Vector code */
120 MPI_Type_vector( n, 1, stride, MPI_DOUBLE, &vectype );
121 MPI_Type_commit( &vectype );
123 if (verbose) printf("TestVecPackDouble (MPI): ");
124 for (j = 0; j < NTRIALS; j++) {
126 for (rep=0; rep<N_REPS; rep++) {
128 MPI_Pack( (void *)src, 1, vectype, dest, n*sizeof(double),
129 &position, MPI_COMM_SELF );
131 t2 = MPI_Wtime() - t1;
133 if (verbose) printf("%.3f ", t[j]);
135 if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
136 /* If there is too much noise, discard the test */
137 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
141 printf("Too much noise; discarding measurement\n");
144 *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
147 MPI_Type_free( &vectype );
152 /* Test unpacking a vector of individual doubles */
153 /* See above for why restrict is not used in the function args */
154 int TestVecUnPackDouble( int n, int stride,
155 double *avgTimeUser, double *avgTimeMPI,
156 double *dest, const double *src );
157 int TestVecUnPackDouble( int n, int stride,
158 double *avgTimeUser, double *avgTimeMPI,
159 double *dest, const double *src )
161 double *restrict d_dest;
162 const double *restrict d_src;
165 double t1, t2, t[NTRIALS];
166 MPI_Datatype vectype;
169 if (verbose) printf("TestVecUnPackDouble (USER): ");
170 for (j = 0; j < NTRIALS; j++) {
172 for (rep=0; rep<N_REPS; rep++) {
181 t2 = MPI_Wtime() - t1;
183 if (verbose) printf("%.3f ", t[j]);
185 if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
186 /* If there is too much noise, discard the test */
187 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
191 printf("Too much noise; discarding measurement\n");
194 *avgTimeUser = mean(t, NTRIALS) / N_REPS;
196 /* MPI Vector code */
197 MPI_Type_vector( n, 1, stride, MPI_DOUBLE, &vectype );
198 MPI_Type_commit( &vectype );
200 if (verbose) printf("TestVecUnPackDouble (MPI): ");
201 for (j = 0; j < NTRIALS; j++) {
203 for (rep=0; rep<N_REPS; rep++) {
205 MPI_Unpack( (void *)src, n*sizeof(double),
206 &position, dest, 1, vectype, MPI_COMM_SELF );
208 t2 = MPI_Wtime() - t1;
210 if (verbose) printf("%.3f ", t[j]);
212 if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
213 /* If there is too much noise, discard the test */
214 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
218 printf("Too much noise; discarding measurement\n");
221 *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
224 MPI_Type_free( &vectype );
229 /* Test packing a vector of 2-individual doubles */
230 /* See above for why restrict is not used in the function args */
231 int TestVecPack2Double( int n, int stride,
232 double *avgTimeUser, double *avgTimeMPI,
233 double *dest, const double *src );
234 int TestVecPack2Double( int n, int stride,
235 double *avgTimeUser, double *avgTimeMPI,
236 double *dest, const double *src )
238 double *restrict d_dest;
239 const double *restrict d_src;
242 double t1, t2, t[NTRIALS];
243 MPI_Datatype vectype;
246 if (verbose) printf("TestVecPack2Double (USER): ");
247 for (j = 0; j < NTRIALS; j++) {
249 for (rep=0; rep<N_REPS; rep++) {
254 *d_dest++ = d_src[0];
255 *d_dest++ = d_src[1];
259 t2 = MPI_Wtime() - t1;
261 if (verbose) printf("%.3f ", t[j]);
263 if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
264 /* If there is too much noise, discard the test */
265 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
269 printf("Too much noise; discarding measurement\n");
272 *avgTimeUser = mean(t, NTRIALS) / N_REPS;
274 /* MPI Vector code */
275 MPI_Type_vector( n, 2, stride, MPI_DOUBLE, &vectype );
276 MPI_Type_commit( &vectype );
278 if (verbose) printf("TestVecPack2Double (MPI): ");
279 for (j = 0; j < NTRIALS; j++) {
281 for (rep=0; rep<N_REPS; rep++) {
283 MPI_Pack( (void *)src, 1, vectype, dest, 2*n*sizeof(double),
284 &position, MPI_COMM_SELF );
286 t2 = MPI_Wtime() - t1;
288 if (verbose) printf("%.3f ", t[j]);
290 if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
291 /* If there is too much noise, discard the test */
292 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
296 printf("Too much noise; discarding measurement\n");
299 *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
301 MPI_Type_free( &vectype );
306 /* This creates an indexed type that is like a vector (for simplicity
307 of construction). There is a possibility that the MPI implementation
308 will recognize and simplify this (e.g., in MPI_Type_commit); if so,
309 let us know and we'll add a version that is not as regular
311 /* See above for why restrict is not used in the function args */
312 int TestIndexPackDouble( int n, int stride,
313 double *avgTimeUser, double *avgTimeMPI,
314 double *dest, const double *src );
315 int TestIndexPackDouble( int n, int stride,
316 double *avgTimeUser, double *avgTimeMPI,
317 double *dest, const double *src )
319 double *restrict d_dest;
320 const double *restrict d_src;
323 int *restrict displs = 0;
324 double t1, t2, t[NTRIALS];
325 MPI_Datatype indextype;
327 displs = (int *)malloc( n * sizeof(int) );
328 for (i=0; i<n; i++) displs[i] = i * stride;
331 if (verbose) printf("TestIndexPackDouble (USER): ");
332 for (j = 0; j < NTRIALS; j++) {
334 for (rep=0; rep<N_REPS; rep++) {
338 for (i=0; i<n; i++) {
339 *d_dest++ = d_src[displs[i]];
342 t2 = MPI_Wtime() - t1;
344 if (verbose) printf("%.3f ", t[j]);
346 if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
347 /* If there is too much noise, discard the test */
348 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
352 printf("Too much noise; discarding measurement\n");
355 *avgTimeUser = mean(t, NTRIALS) / N_REPS;
358 MPI_Type_create_indexed_block( n, 1, displs, MPI_DOUBLE, &indextype );
359 MPI_Type_commit( &indextype );
363 if (verbose) printf("TestIndexPackDouble (MPI): ");
364 for (j = 0; j < NTRIALS; j++) {
366 for (rep=0; rep<N_REPS; rep++) {
368 MPI_Pack( (void *)src, 1, indextype, dest, n*sizeof(double),
369 &position, MPI_COMM_SELF );
371 t2 = MPI_Wtime() - t1;
373 if (verbose) printf("%.3f ", t[j]);
375 if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
376 /* If there is too much noise, discard the test */
377 if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
381 printf("Too much noise; discarding measurement\n");
384 *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
386 MPI_Type_free( &indextype );
391 int Report( const char *name, const char *packname,
392 double avgTimeMPI, double avgTimeUser );
393 int Report( const char *name, const char *packname,
394 double avgTimeMPI, double avgTimeUser )
396 double diffTime, maxTime;
399 /* Move this into a common routine */
400 diffTime = avgTimeMPI - avgTimeUser;
401 if (diffTime < 0) diffTime = - diffTime;
402 if (avgTimeMPI > avgTimeUser) maxTime = avgTimeMPI;
403 else maxTime = avgTimeUser;
406 printf( "%-30s:\t%g\t%g\t(%g%%)\n", name,
407 avgTimeMPI, avgTimeUser,
408 100 * (diffTime / maxTime) );
411 if (avgTimeMPI > avgTimeUser && (diffTime > THRESHOLD * maxTime)) {
413 printf( "%s:\tMPI %s code is too slow: MPI %g\t User %g\n",
414 name, packname, avgTimeMPI, avgTimeUser );
420 /* Finally, here's the main program */
421 int main( int argc, char *argv[] )
423 int n, stride, err, errs = 0;
425 double avgTimeUser, avgTimeMPI;
427 MPI_Init( &argc, &argv );
428 if (getenv("MPITEST_VERBOSE")) verbose = 1;
432 dest = (void *)malloc( n * sizeof(double) );
433 src = (void *)malloc( n * ((1+stride)*sizeof(double)) );
434 /* Touch the source and destination arrays */
435 memset( src, 0, n * (1+stride)*sizeof(double) );
436 memset( dest, 0, n * sizeof(double) );
438 err = TestVecPackDouble( n, stride, &avgTimeUser, &avgTimeMPI,
440 errs += Report( "VecPackDouble", "Pack", avgTimeMPI, avgTimeUser );
442 err = TestVecUnPackDouble( n, stride, &avgTimeUser, &avgTimeMPI,
444 errs += Report( "VecUnPackDouble", "Unpack", avgTimeMPI, avgTimeUser );
446 err = TestIndexPackDouble( n, stride, &avgTimeUser, &avgTimeMPI,
448 errs += Report( "VecIndexDouble", "Pack", avgTimeMPI, avgTimeUser );
453 dest = (void *)malloc( 2*n * sizeof(double) );
454 src = (void *)malloc( (1 + n) * ((1+stride)*sizeof(double)) );
455 memset( dest, 0, 2*n * sizeof(double) );
456 memset( src, 0, (1+n) * (1+stride)*sizeof(double) );
457 err = TestVecPack2Double( n, stride, &avgTimeUser, &avgTimeMPI,
459 errs += Report( "VecPack2Double", "Pack", avgTimeMPI, avgTimeUser );
467 printf( " No Errors\n" );
470 printf( " Found %d performance problems\n", errs );