From aac4ba41ea27fe8168bebed009abebd7e5c3f006 Mon Sep 17 00:00:00 2001 From: degomme Date: Sat, 11 Feb 2017 21:48:07 +0100 Subject: [PATCH] Update perf --- .../smpi/mpich3-test/perf/CMakeLists.txt | 4 +- teshsuite/smpi/mpich3-test/perf/allredtrace.c | 160 ++-- teshsuite/smpi/mpich3-test/perf/commcreatep.c | 93 +-- teshsuite/smpi/mpich3-test/perf/dtpack.c | 764 +++++++++--------- teshsuite/smpi/mpich3-test/perf/indexperf.c | 194 ++--- teshsuite/smpi/mpich3-test/perf/manyrma.c | 552 ++++++------- teshsuite/smpi/mpich3-test/perf/nestvec.c | 200 ++--- teshsuite/smpi/mpich3-test/perf/nestvec2.c | 241 +++--- .../smpi/mpich3-test/perf/non_zero_root.c | 105 +-- teshsuite/smpi/mpich3-test/perf/sendrecvl.c | 371 +++++---- teshsuite/smpi/mpich3-test/perf/testlist | 8 +- teshsuite/smpi/mpich3-test/perf/timer.c | 48 +- .../smpi/mpich3-test/perf/transp-datatype.c | 124 ++- teshsuite/smpi/mpich3-test/perf/twovec.c | 49 +- 14 files changed, 1471 insertions(+), 1442 deletions(-) diff --git a/teshsuite/smpi/mpich3-test/perf/CMakeLists.txt b/teshsuite/smpi/mpich3-test/perf/CMakeLists.txt index afeca4ccd5..9bdedeb313 100644 --- a/teshsuite/smpi/mpich3-test/perf/CMakeLists.txt +++ b/teshsuite/smpi/mpich3-test/perf/CMakeLists.txt @@ -9,8 +9,8 @@ if(enable_smpi AND enable_smpi_MPICH3_testsuite) include_directories(BEFORE "${CMAKE_HOME_DIRECTORY}/include/smpi") include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include/") - foreach(file commcreatep non_zero_root sendrecvl timer transp-datatype twovec) - #not compiled files dtpack indexperf manyrma nestvec2 nestvec + foreach(file commcreatep non_zero_root sendrecvl timer transp-datatype twovec dtpack indexperf nestvec2 nestvec) + #not compiled files manyrma add_executable(${file} ${file}.c) target_link_libraries(${file} simgrid mtest_c) endforeach() diff --git a/teshsuite/smpi/mpich3-test/perf/allredtrace.c b/teshsuite/smpi/mpich3-test/perf/allredtrace.c index 070271be19..8fb077bfbf 100644 --- a/teshsuite/smpi/mpich3-test/perf/allredtrace.c +++ b/teshsuite/smpi/mpich3-test/perf/allredtrace.c @@ -5,13 +5,13 @@ */ /* - * This code is intended to test the trace overhead when using an + * This code is intended to test the trace overhead when using an * MPI tracing package. To perform the test, follow these steps: * * 1) Run with the versbose mode selected to determine the delay argument * to use in subsequent tests: * mpiexec -n 4096 allredtrace -v - * Assume that the computed delay count is 6237; that value is used in + * Assume that the computed delay count is 6237; that value is used in * the following. * * 2) Run with an explicit delay count, without tracing enabled: @@ -20,7 +20,7 @@ * 3) Build allredtrace with tracing enabled, then run: * mpiexec -n 4096 allredtrace -delaycount 6237 * - * Compare the total times. The tracing version should take slightly + * Compare the total times. The tracing version should take slightly * longer but no more than, for example, 15%. */ #include "mpi.h" @@ -30,126 +30,132 @@ static int verbose = 0; static int lCount = 0; -void Delay( int ); -void SetupDelay( double ); +void Delay(int); +void SetupDelay(double); -int main( int argc, char *argv[] ) +int main(int argc, char *argv[]) { double usecPerCall = 100; double t, t1, tsum; int i, nLoop = 100; int rank; - MPI_Init( &argc, &argv ); - MPI_Comm_rank( MPI_COMM_WORLD, &rank ); - - /* Process arguments. We allow the delay count to be set from the - command line to ensure reproducibility*/ - for (i=1; i 10 * tick) nLoop = nLoop / 2; - - /* Compare measured delay */ - if (t > 2*sec) { - lCount = lCount / 2; - if (direction == 1) break; - direction = -1; - } - else if (t < sec / 2) { - lCount = lCount * 2; - if (direction == -1) break; - direction = 1; - } - else if (t < sec) { - /* sec/2 <= t < sec , so estimate the lCount to hit sec */ - lCount = (sec/t) * lCount; - } - else - break; + t = MPI_Wtime(); + for (i = 0; i < nLoop; i++) { + Delay(lCount); + } + t = MPI_Wtime() - t; + t = t / nLoop; + if (verbose) + printf("lCount = %d, time = %e\n", lCount, t); + if (t > 10 * tick) + nLoop = nLoop / 2; + + /* Compare measured delay */ + if (t > 2 * sec) { + lCount = lCount / 2; + if (direction == 1) + break; + direction = -1; + } + else if (t < sec / 2) { + lCount = lCount * 2; + if (direction == -1) + break; + direction = 1; + } + else if (t < sec) { + /* sec/2 <= t < sec , so estimate the lCount to hit sec */ + lCount = (sec / t) * lCount; + } + else + break; } - if (verbose) printf( "lCount = %d, t = %e\n", lCount, t ); + if (verbose) + printf("lCount = %d, t = %e\n", lCount, t); /* Should coordinate with the other processes - take the max? */ } -double delayCounter = 0; -void Delay( int count ) +volatile double delayCounter = 0; +void Delay(int count) { int i; delayCounter = 0.0; - for (i=0; i 0) { - rdiff = (t[i] - t[i-1]) / t[i]; - if (rdiff >= 4) { - errs++; - fprintf( stderr, "Relative difference between group of size %d and %d is %e exceeds 4\n", - gsize[i-1], gsize[i], rdiff ); - } - } - } + for (i = 4; i <= maxi; i++) { + double rdiff; + if (t[i] > 0) { + rdiff = (t[i] - t[i - 1]) / t[i]; + if (rdiff >= 4) { + errs++; + fprintf(stderr, + "Relative difference between group of size %d and %d is %e exceeds 4\n", + gsize[i - 1], gsize[i], rdiff); + } + } + } } - MTest_Finalize( errs ); + MTest_Finalize(errs); MPI_Finalize(); diff --git a/teshsuite/smpi/mpich3-test/perf/dtpack.c b/teshsuite/smpi/mpich3-test/perf/dtpack.c index a31a55dc20..8b2e05318e 100644 --- a/teshsuite/smpi/mpich3-test/perf/dtpack.c +++ b/teshsuite/smpi/mpich3-test/perf/dtpack.c @@ -4,13 +4,13 @@ * See COPYRIGHT in top-level directory. */ /* - * This code may be used to test the performance of some of the + * This code may be used to test the performance of some of the * noncontiguous datatype operations, including vector and indexed - * pack and unpack operations. To simplify the use of this code for + * pack and unpack operations. To simplify the use of this code for * tuning an MPI implementation, it uses no communication, just the * MPI_Pack and MPI_Unpack routines. In addition, the individual tests are * in separate routines, making it easier to compare the compiler-generated - * code for the user (manual) pack/unpack with the code used by + * code for the user (manual) pack/unpack with the code used by * the MPI implementation. Further, to be fair to the MPI implementation, * the routines are passed the source and destination buffers; this ensures * that the compiler can't optimize for statically allocated buffers. @@ -33,441 +33,455 @@ static int verbose = 0; double mean(double *list, int count); double mean(double *list, int count) { - double retval; - int i; + double retval; + int i; - retval = 0; - for (i = 0; i < count; i++) - retval += list[i]; - retval /= count; + retval = 0; + for (i = 0; i < count; i++) + retval += list[i]; + retval /= count; - return retval; + return retval; } double noise(double *list, int count); double noise(double *list, int count) { - double *margin, retval; - int i; + double *margin, retval; + int i; - if (!(margin = malloc(count * sizeof(double)))) { - printf("Unable to allocate memory\n"); - return -1; - } + if (!(margin = malloc(count * sizeof(double)))) { + printf("Unable to allocate memory\n"); + return -1; + } - for (i = 0; i < count; i++) - margin[i] = list[i] / mean(list, count); + for (i = 0; i < count; i++) + margin[i] = list[i] / mean(list, count); - retval = 0; - for (i = 0; i < count; i++) { - retval += ((margin[i] - 1) * (margin[i] - 1)); - } - retval /= count; - if (retval < 0) retval = -retval; + retval = 0; + for (i = 0; i < count; i++) { + retval += ((margin[i] - 1) * (margin[i] - 1)); + } + retval /= count; + if (retval < 0) + retval = -retval; - return retval; + return retval; } /* Here are the tests */ /* Test packing a vector of individual doubles */ /* We don't use restrict in the function args because assignments between - restrict pointers is not valid in C and some compilers, such as the + restrict pointers is not valid in C and some compilers, such as the IBM xlc compilers, flag that use as an error.*/ -int TestVecPackDouble( int n, int stride, - double *avgTimeUser, double *avgTimeMPI, - double *dest, const double *src ); -int TestVecPackDouble( int n, int stride, - double *avgTimeUser, double *avgTimeMPI, - double *dest, const double *src ) +int TestVecPackDouble(int n, int stride, + double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src); +int TestVecPackDouble(int n, int stride, + double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src) { - double *restrict d_dest; - const double *restrict d_src; - register int i, j; - int rep, position; - double t1, t2, t[NTRIALS]; - MPI_Datatype vectype; - - /* User code */ - if (verbose) printf("TestVecPackDouble (USER): "); - for (j = 0; j < NTRIALS; j++) { - t1 = MPI_Wtime(); - for (rep=0; rep VARIANCE_THRESHOLD) { - *avgTimeUser = 0; - *avgTimeMPI = 0; - if (verbose) - printf("Too much noise; discarding measurement\n"); - return 0; - } - *avgTimeUser = mean(t, NTRIALS) / N_REPS; - - /* MPI Vector code */ - MPI_Type_vector( n, 1, stride, MPI_DOUBLE, &vectype ); - MPI_Type_commit( &vectype ); - - if (verbose) printf("TestVecPackDouble (MPI): "); - for (j = 0; j < NTRIALS; j++) { - t1 = MPI_Wtime(); - for (rep=0; rep VARIANCE_THRESHOLD) { - *avgTimeUser = 0; - *avgTimeMPI = 0; - if (verbose) - printf("Too much noise; discarding measurement\n"); - } - else { - *avgTimeMPI = mean(t, NTRIALS) / N_REPS; - } - - MPI_Type_free( &vectype ); - - return 0; + double *restrict d_dest; + const double *restrict d_src; + register int i, j; + int rep, position; + double t1, t2, t[NTRIALS]; + MPI_Datatype vectype; + + /* User code */ + if (verbose) + printf("TestVecPackDouble (USER): "); + for (j = 0; j < NTRIALS; j++) { + t1 = MPI_Wtime(); + for (rep = 0; rep < N_REPS; rep++) { + i = n; + d_dest = dest; + d_src = src; + while (i--) { + *d_dest++ = *d_src; + d_src += stride; + } + } + t2 = MPI_Wtime() - t1; + t[j] = t2; + if (verbose) + printf("%.3f ", t[j]); + } + if (verbose) + printf("[%.3f]\n", noise(t, NTRIALS)); + /* If there is too much noise, discard the test */ + if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) { + *avgTimeUser = 0; + *avgTimeMPI = 0; + if (verbose) + printf("Too much noise; discarding measurement\n"); + return 0; + } + *avgTimeUser = mean(t, NTRIALS) / N_REPS; + + /* MPI Vector code */ + MPI_Type_vector(n, 1, stride, MPI_DOUBLE, &vectype); + MPI_Type_commit(&vectype); + + if (verbose) + printf("TestVecPackDouble (MPI): "); + for (j = 0; j < NTRIALS; j++) { + t1 = MPI_Wtime(); + for (rep = 0; rep < N_REPS; rep++) { + position = 0; + MPI_Pack((void *) src, 1, vectype, dest, n * sizeof(double), &position, MPI_COMM_SELF); + } + t2 = MPI_Wtime() - t1; + t[j] = t2; + if (verbose) + printf("%.3f ", t[j]); + } + if (verbose) + printf("[%.3f]\n", noise(t, NTRIALS)); + /* If there is too much noise, discard the test */ + if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) { + *avgTimeUser = 0; + *avgTimeMPI = 0; + if (verbose) + printf("Too much noise; discarding measurement\n"); + } + else { + *avgTimeMPI = mean(t, NTRIALS) / N_REPS; + } + + MPI_Type_free(&vectype); + + return 0; } /* Test unpacking a vector of individual doubles */ /* See above for why restrict is not used in the function args */ -int TestVecUnPackDouble( int n, int stride, - double *avgTimeUser, double *avgTimeMPI, - double *dest, const double *src ); -int TestVecUnPackDouble( int n, int stride, - double *avgTimeUser, double *avgTimeMPI, - double *dest, const double *src ) +int TestVecUnPackDouble(int n, int stride, + double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src); +int TestVecUnPackDouble(int n, int stride, + double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src) { - double *restrict d_dest; - const double *restrict d_src; - register int i, j; - int rep, position; - double t1, t2, t[NTRIALS]; - MPI_Datatype vectype; - - /* User code */ - if (verbose) printf("TestVecUnPackDouble (USER): "); - for (j = 0; j < NTRIALS; j++) { - t1 = MPI_Wtime(); - for (rep=0; rep VARIANCE_THRESHOLD) { - *avgTimeUser = 0; - *avgTimeMPI = 0; - if (verbose) - printf("Too much noise; discarding measurement\n"); - return 0; - } - *avgTimeUser = mean(t, NTRIALS) / N_REPS; - - /* MPI Vector code */ - MPI_Type_vector( n, 1, stride, MPI_DOUBLE, &vectype ); - MPI_Type_commit( &vectype ); - - if (verbose) printf("TestVecUnPackDouble (MPI): "); - for (j = 0; j < NTRIALS; j++) { - t1 = MPI_Wtime(); - for (rep=0; rep VARIANCE_THRESHOLD) { - *avgTimeUser = 0; - *avgTimeMPI = 0; - if (verbose) - printf("Too much noise; discarding measurement\n"); - } - else { - *avgTimeMPI = mean(t, NTRIALS) / N_REPS; - } - - MPI_Type_free( &vectype ); - - return 0; + double *restrict d_dest; + const double *restrict d_src; + register int i, j; + int rep, position; + double t1, t2, t[NTRIALS]; + MPI_Datatype vectype; + + /* User code */ + if (verbose) + printf("TestVecUnPackDouble (USER): "); + for (j = 0; j < NTRIALS; j++) { + t1 = MPI_Wtime(); + for (rep = 0; rep < N_REPS; rep++) { + i = n; + d_dest = dest; + d_src = src; + while (i--) { + *d_dest = *d_src++; + d_dest += stride; + } + } + t2 = MPI_Wtime() - t1; + t[j] = t2; + if (verbose) + printf("%.3f ", t[j]); + } + if (verbose) + printf("[%.3f]\n", noise(t, NTRIALS)); + /* If there is too much noise, discard the test */ + if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) { + *avgTimeUser = 0; + *avgTimeMPI = 0; + if (verbose) + printf("Too much noise; discarding measurement\n"); + return 0; + } + *avgTimeUser = mean(t, NTRIALS) / N_REPS; + + /* MPI Vector code */ + MPI_Type_vector(n, 1, stride, MPI_DOUBLE, &vectype); + MPI_Type_commit(&vectype); + + if (verbose) + printf("TestVecUnPackDouble (MPI): "); + for (j = 0; j < NTRIALS; j++) { + t1 = MPI_Wtime(); + for (rep = 0; rep < N_REPS; rep++) { + position = 0; + MPI_Unpack((void *) src, n * sizeof(double), + &position, dest, 1, vectype, MPI_COMM_SELF); + } + t2 = MPI_Wtime() - t1; + t[j] = t2; + if (verbose) + printf("%.3f ", t[j]); + } + if (verbose) + printf("[%.3f]\n", noise(t, NTRIALS)); + /* If there is too much noise, discard the test */ + if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) { + *avgTimeUser = 0; + *avgTimeMPI = 0; + if (verbose) + printf("Too much noise; discarding measurement\n"); + } + else { + *avgTimeMPI = mean(t, NTRIALS) / N_REPS; + } + + MPI_Type_free(&vectype); + + return 0; } /* Test packing a vector of 2-individual doubles */ /* See above for why restrict is not used in the function args */ -int TestVecPack2Double( int n, int stride, - double *avgTimeUser, double *avgTimeMPI, - double *dest, const double *src ); -int TestVecPack2Double( int n, int stride, - double *avgTimeUser, double *avgTimeMPI, - double *dest, const double *src ) +int TestVecPack2Double(int n, int stride, + double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src); +int TestVecPack2Double(int n, int stride, + double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src) { - double *restrict d_dest; - const double *restrict d_src; - register int i, j; - int rep, position; - double t1, t2, t[NTRIALS]; - MPI_Datatype vectype; - - /* User code */ - if (verbose) printf("TestVecPack2Double (USER): "); - for (j = 0; j < NTRIALS; j++) { - t1 = MPI_Wtime(); - for (rep=0; rep VARIANCE_THRESHOLD) { - *avgTimeUser = 0; - *avgTimeMPI = 0; - if (verbose) - printf("Too much noise; discarding measurement\n"); - return 0; - } - *avgTimeUser = mean(t, NTRIALS) / N_REPS; - - /* MPI Vector code */ - MPI_Type_vector( n, 2, stride, MPI_DOUBLE, &vectype ); - MPI_Type_commit( &vectype ); - - if (verbose) printf("TestVecPack2Double (MPI): "); - for (j = 0; j < NTRIALS; j++) { - t1 = MPI_Wtime(); - for (rep=0; rep VARIANCE_THRESHOLD) { - *avgTimeUser = 0; - *avgTimeMPI = 0; - if (verbose) - printf("Too much noise; discarding measurement\n"); - } - else { - *avgTimeMPI = mean(t, NTRIALS) / N_REPS; - } - MPI_Type_free( &vectype ); - - return 0; + double *restrict d_dest; + const double *restrict d_src; + register int i, j; + int rep, position; + double t1, t2, t[NTRIALS]; + MPI_Datatype vectype; + + /* User code */ + if (verbose) + printf("TestVecPack2Double (USER): "); + for (j = 0; j < NTRIALS; j++) { + t1 = MPI_Wtime(); + for (rep = 0; rep < N_REPS; rep++) { + i = n; + d_dest = dest; + d_src = src; + while (i--) { + *d_dest++ = d_src[0]; + *d_dest++ = d_src[1]; + d_src += stride; + } + } + t2 = MPI_Wtime() - t1; + t[j] = t2; + if (verbose) + printf("%.3f ", t[j]); + } + if (verbose) + printf("[%.3f]\n", noise(t, NTRIALS)); + /* If there is too much noise, discard the test */ + if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) { + *avgTimeUser = 0; + *avgTimeMPI = 0; + if (verbose) + printf("Too much noise; discarding measurement\n"); + return 0; + } + *avgTimeUser = mean(t, NTRIALS) / N_REPS; + + /* MPI Vector code */ + MPI_Type_vector(n, 2, stride, MPI_DOUBLE, &vectype); + MPI_Type_commit(&vectype); + + if (verbose) + printf("TestVecPack2Double (MPI): "); + for (j = 0; j < NTRIALS; j++) { + t1 = MPI_Wtime(); + for (rep = 0; rep < N_REPS; rep++) { + position = 0; + MPI_Pack((void *) src, 1, vectype, dest, 2 * n * sizeof(double), + &position, MPI_COMM_SELF); + } + t2 = MPI_Wtime() - t1; + t[j] = t2; + if (verbose) + printf("%.3f ", t[j]); + } + if (verbose) + printf("[%.3f]\n", noise(t, NTRIALS)); + /* If there is too much noise, discard the test */ + if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) { + *avgTimeUser = 0; + *avgTimeMPI = 0; + if (verbose) + printf("Too much noise; discarding measurement\n"); + } + else { + *avgTimeMPI = mean(t, NTRIALS) / N_REPS; + } + MPI_Type_free(&vectype); + + return 0; } /* This creates an indexed type that is like a vector (for simplicity - of construction). There is a possibility that the MPI implementation + of construction). There is a possibility that the MPI implementation will recognize and simplify this (e.g., in MPI_Type_commit); if so, - let us know and we'll add a version that is not as regular + let us know and we'll add a version that is not as regular */ /* See above for why restrict is not used in the function args */ -int TestIndexPackDouble( int n, int stride, - double *avgTimeUser, double *avgTimeMPI, - double *dest, const double *src ); -int TestIndexPackDouble( int n, int stride, - double *avgTimeUser, double *avgTimeMPI, - double *dest, const double *src ) +int TestIndexPackDouble(int n, int stride, + double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src); +int TestIndexPackDouble(int n, int stride, + double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src) { - double *restrict d_dest; - const double *restrict d_src; - register int i, j; - int rep, position; - int *restrict displs = 0; - double t1, t2, t[NTRIALS]; - MPI_Datatype indextype; - - displs = (int *)malloc( n * sizeof(int) ); - for (i=0; i VARIANCE_THRESHOLD) { - *avgTimeUser = 0; - *avgTimeMPI = 0; - if (verbose) - printf("Too much noise; discarding measurement\n"); - return 0; - } - *avgTimeUser = mean(t, NTRIALS) / N_REPS; - - /* MPI Index code */ - MPI_Type_create_indexed_block( n, 1, displs, MPI_DOUBLE, &indextype ); - MPI_Type_commit( &indextype ); - - free( displs ); - - if (verbose) printf("TestIndexPackDouble (MPI): "); - for (j = 0; j < NTRIALS; j++) { - t1 = MPI_Wtime(); - for (rep=0; rep VARIANCE_THRESHOLD) { - *avgTimeUser = 0; - *avgTimeMPI = 0; - if (verbose) - printf("Too much noise; discarding measurement\n"); - } - else { - *avgTimeMPI = mean(t, NTRIALS) / N_REPS; - } - MPI_Type_free( &indextype ); - - return 0; + double *restrict d_dest; + const double *restrict d_src; + register int i, j; + int rep, position; + int *restrict displs = 0; + double t1, t2, t[NTRIALS]; + MPI_Datatype indextype; + + displs = (int *) malloc(n * sizeof(int)); + for (i = 0; i < n; i++) + displs[i] = i * stride; + + /* User code */ + if (verbose) + printf("TestIndexPackDouble (USER): "); + for (j = 0; j < NTRIALS; j++) { + t1 = MPI_Wtime(); + for (rep = 0; rep < N_REPS; rep++) { + i = n; + d_dest = dest; + d_src = src; + for (i = 0; i < n; i++) { + *d_dest++ = d_src[displs[i]]; + } + } + t2 = MPI_Wtime() - t1; + t[j] = t2; + if (verbose) + printf("%.3f ", t[j]); + } + if (verbose) + printf("[%.3f]\n", noise(t, NTRIALS)); + /* If there is too much noise, discard the test */ + if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) { + *avgTimeUser = 0; + *avgTimeMPI = 0; + if (verbose) + printf("Too much noise; discarding measurement\n"); + return 0; + } + *avgTimeUser = mean(t, NTRIALS) / N_REPS; + + /* MPI Index code */ + MPI_Type_create_indexed_block(n, 1, displs, MPI_DOUBLE, &indextype); + MPI_Type_commit(&indextype); + + free(displs); + + if (verbose) + printf("TestIndexPackDouble (MPI): "); + for (j = 0; j < NTRIALS; j++) { + t1 = MPI_Wtime(); + for (rep = 0; rep < N_REPS; rep++) { + position = 0; + MPI_Pack((void *) src, 1, indextype, dest, n * sizeof(double), + &position, MPI_COMM_SELF); + } + t2 = MPI_Wtime() - t1; + t[j] = t2; + if (verbose) + printf("%.3f ", t[j]); + } + if (verbose) + printf("[%.3f]\n", noise(t, NTRIALS)); + /* If there is too much noise, discard the test */ + if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) { + *avgTimeUser = 0; + *avgTimeMPI = 0; + if (verbose) + printf("Too much noise; discarding measurement\n"); + } + else { + *avgTimeMPI = mean(t, NTRIALS) / N_REPS; + } + MPI_Type_free(&indextype); + + return 0; } -int Report( const char *name, const char *packname, - double avgTimeMPI, double avgTimeUser ); -int Report( const char *name, const char *packname, - double avgTimeMPI, double avgTimeUser ) +int Report(const char *name, const char *packname, double avgTimeMPI, double avgTimeUser); +int Report(const char *name, const char *packname, double avgTimeMPI, double avgTimeUser) { - double diffTime, maxTime; - int errs=0; - - /* Move this into a common routine */ - diffTime = avgTimeMPI - avgTimeUser; - if (diffTime < 0) diffTime = - diffTime; - if (avgTimeMPI > avgTimeUser) maxTime = avgTimeMPI; - else maxTime = avgTimeUser; - - if (verbose) { - printf( "%-30s:\t%g\t%g\t(%g%%)\n", name, - avgTimeMPI, avgTimeUser, - 100 * (diffTime / maxTime) ); - fflush(stdout); - } - if (avgTimeMPI > avgTimeUser && (diffTime > THRESHOLD * maxTime)) { - errs++; - printf( "%s:\tMPI %s code is too slow: MPI %g\t User %g\n", - name, packname, avgTimeMPI, avgTimeUser ); - } - - return errs; + double diffTime, maxTime; + int errs = 0; + + /* Move this into a common routine */ + diffTime = avgTimeMPI - avgTimeUser; + if (diffTime < 0) + diffTime = -diffTime; + if (avgTimeMPI > avgTimeUser) + maxTime = avgTimeMPI; + else + maxTime = avgTimeUser; + + if (verbose) { + printf("%-30s:\t%g\t%g\t(%g%%)\n", name, + avgTimeMPI, avgTimeUser, 100 * (diffTime / maxTime)); + fflush(stdout); + } + if (avgTimeMPI > avgTimeUser && (diffTime > THRESHOLD * maxTime)) { + errs++; + printf("%s:\tMPI %s code is too slow: MPI %g\t User %g\n", + name, packname, avgTimeMPI, avgTimeUser); + } + + return errs; } /* Finally, here's the main program */ -int main( int argc, char *argv[] ) +int main(int argc, char *argv[]) { - int n, stride, err, errs = 0; + int n, stride, errs = 0; void *dest, *src; double avgTimeUser, avgTimeMPI; - MPI_Init( &argc, &argv ); - if (getenv("MPITEST_VERBOSE")) verbose = 1; + MPI_Init(&argc, &argv); + if (getenv("MPITEST_VERBOSE")) + verbose = 1; - n = 30000; + n = 30000; stride = 4; - dest = (void *)malloc( n * sizeof(double) ); - src = (void *)malloc( n * ((1+stride)*sizeof(double)) ); + dest = (void *) malloc(n * sizeof(double)); + src = (void *) malloc(n * ((1 + stride) * sizeof(double))); /* Touch the source and destination arrays */ - memset( src, 0, n * (1+stride)*sizeof(double) ); - memset( dest, 0, n * sizeof(double) ); + memset(src, 0, n * (1 + stride) * sizeof(double)); + memset(dest, 0, n * sizeof(double)); - err = TestVecPackDouble( n, stride, &avgTimeUser, &avgTimeMPI, - dest, src ); - errs += Report( "VecPackDouble", "Pack", avgTimeMPI, avgTimeUser ); + TestVecPackDouble(n, stride, &avgTimeUser, &avgTimeMPI, dest, src); + errs += Report("VecPackDouble", "Pack", avgTimeMPI, avgTimeUser); - err = TestVecUnPackDouble( n, stride, &avgTimeUser, &avgTimeMPI, - src, dest ); - errs += Report( "VecUnPackDouble", "Unpack", avgTimeMPI, avgTimeUser ); + TestVecUnPackDouble(n, stride, &avgTimeUser, &avgTimeMPI, src, dest); + errs += Report("VecUnPackDouble", "Unpack", avgTimeMPI, avgTimeUser); - err = TestIndexPackDouble( n, stride, &avgTimeUser, &avgTimeMPI, - dest, src ); - errs += Report( "VecIndexDouble", "Pack", avgTimeMPI, avgTimeUser ); + TestIndexPackDouble(n, stride, &avgTimeUser, &avgTimeMPI, dest, src); + errs += Report("VecIndexDouble", "Pack", avgTimeMPI, avgTimeUser); free(dest); free(src); - - dest = (void *)malloc( 2*n * sizeof(double) ); - src = (void *)malloc( (1 + n) * ((1+stride)*sizeof(double)) ); - memset( dest, 0, 2*n * sizeof(double) ); - memset( src, 0, (1+n) * (1+stride)*sizeof(double) ); - err = TestVecPack2Double( n, stride, &avgTimeUser, &avgTimeMPI, - dest, src ); - errs += Report( "VecPack2Double", "Pack", avgTimeMPI, avgTimeUser ); + + dest = (void *) malloc(2 * n * sizeof(double)); + src = (void *) malloc((1 + n) * ((1 + stride) * sizeof(double))); + memset(dest, 0, 2 * n * sizeof(double)); + memset(src, 0, (1 + n) * (1 + stride) * sizeof(double)); + TestVecPack2Double(n, stride, &avgTimeUser, &avgTimeMPI, dest, src); + errs += Report("VecPack2Double", "Pack", avgTimeMPI, avgTimeUser); free(dest); free(src); - + if (errs == 0) { - printf( " No Errors\n" ); + printf(" No Errors\n"); } else { - printf( " Found %d performance problems\n", errs ); + printf(" Found %d performance problems\n", errs); } fflush(stdout); diff --git a/teshsuite/smpi/mpich3-test/perf/indexperf.c b/teshsuite/smpi/mpich3-test/perf/indexperf.c index e05463bc89..38c3822533 100644 --- a/teshsuite/smpi/mpich3-test/perf/indexperf.c +++ b/teshsuite/smpi/mpich3-test/perf/indexperf.c @@ -23,163 +23,163 @@ static int verbose = 0; -int main( int argc, char **argv ) +int main(int argc, char **argv) { - double *inbuf, *outbuf, *outbuf2; - MPI_Aint lb, extent; - int *index_displacement; - int icount, errs=0; - int i, packsize, position, inbufsize; + double *inbuf, *outbuf, *outbuf2; + MPI_Aint lb, extent; + int *index_displacement; + int icount, errs = 0; + int i, packsize, position, inbufsize; MPI_Datatype itype1, stype1; - double t0, t1; - double tpack, tspack, tmanual; - int ntry; + double t0, t1; + double tpack, tspack, tmanual; + int ntry; - MPI_Init( &argc, &argv ); + MPI_Init(&argc, &argv); icount = 2014; /* Create a simple block indexed datatype */ - index_displacement = (int *)malloc( icount * sizeof(int) ); + index_displacement = (int *) malloc(icount * sizeof(int)); if (!index_displacement) { - fprintf( stderr, "Unable to allocated index array of size %d\n", - icount ); - MPI_Abort( MPI_COMM_WORLD, 1 ); + fprintf(stderr, "Unable to allocated index array of size %d\n", icount); + MPI_Abort(MPI_COMM_WORLD, 1); } - for (i=0; i= wsize) destRank = destRank - wsize; + while (destRank >= wsize) + destRank = destRank - wsize; srcRank = wrank - 1; - if (srcRank < 0) srcRank += wsize; + if (srcRank < 0) + srcRank += wsize; /* Create groups for PSCW */ - MPI_Comm_group( MPI_COMM_WORLD, &wgroup ); - MPI_Group_incl( wgroup, 1, &destRank, &accessGroup ); - MPI_Group_incl( wgroup, 1, &srcRank, &exposureGroup ); - MPI_Group_free( &wgroup ); + MPI_Comm_group(MPI_COMM_WORLD, &wgroup); + MPI_Group_incl(wgroup, 1, &destRank, &accessGroup); + MPI_Group_incl(wgroup, 1, &srcRank, &exposureGroup); + MPI_Group_free(&wgroup); arraysize = maxSz * MAX_COUNT; - arraybuffer = (int*)malloc( arraysize * sizeof(int) ); + arraybuffer = (int *) malloc(arraysize * sizeof(int)); if (!arraybuffer) { - fprintf( stderr, "Unable to allocate %d words\n", arraysize ); - MPI_Abort( MPI_COMM_WORLD, 1 ); + fprintf(stderr, "Unable to allocate %d words\n", arraysize); + MPI_Abort(MPI_COMM_WORLD, 1); } - MPI_Win_create( arraybuffer, arraysize*sizeof(int), (int)sizeof(int), - MPI_INFO_NULL, MPI_COMM_WORLD, &win ); + MPI_Win_create(arraybuffer, arraysize * sizeof(int), (int) sizeof(int), + MPI_INFO_NULL, MPI_COMM_WORLD, &win); /* FIXME: we need a test on performance consistency. - The test needs to have both a relative growth limit and - an absolute limit. - */ + * The test needs to have both a relative growth limit and + * an absolute limit. + */ if (maxCount > MAX_COUNT) { - fprintf( stderr, "MaxCount must not exceed %d\n", MAX_COUNT ); - MPI_Abort( MPI_COMM_WORLD, 1 ); + fprintf(stderr, "MaxCount must not exceed %d\n", MAX_COUNT); + MPI_Abort(MPI_COMM_WORLD, 1); } if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_ACC)) { - for (sz=1; sz<=maxSz; sz = sz + sz) { - if (wrank == 0) - printf( "Accumulate with fence, %d elements\n", sz ); - cnt = 1; - while (cnt <= maxCount) { - RunAccFence( win, destRank, cnt, sz, t ); - if (wrank == 0) { - PrintResults( cnt, t ); - } - cnt = 2 * cnt; - } - } + for (sz = 1; sz <= maxSz; sz = sz + sz) { + if (wrank == 0) + printf("Accumulate with fence, %d elements\n", sz); + cnt = 1; + while (cnt <= maxCount) { + RunAccFence(win, destRank, cnt, sz, t); + if (wrank == 0) { + PrintResults(cnt, t); + } + cnt = 2 * cnt; + } + } } if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_ACC)) { - for (sz=1; sz<=maxSz; sz = sz + sz) { - if (wrank == 0) - printf( "Accumulate with lock, %d elements\n", sz ); - cnt = 1; - while (cnt <= maxCount) { - RunAccLock( win, destRank, cnt, sz, t ); - if (wrank == 0) { - PrintResults( cnt, t ); - } - cnt = 2 * cnt; - } - } + for (sz = 1; sz <= maxSz; sz = sz + sz) { + if (wrank == 0) + printf("Accumulate with lock, %d elements\n", sz); + cnt = 1; + while (cnt <= maxCount) { + RunAccLock(win, destRank, cnt, sz, t); + if (wrank == 0) { + PrintResults(cnt, t); + } + cnt = 2 * cnt; + } + } } if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_PUT)) { - for (sz=1; sz<=maxSz; sz = sz + sz) { - if (wrank == 0) - printf( "Put with fence, %d elements\n", sz ); - cnt = 1; - while (cnt <= maxCount) { - RunPutFence( win, destRank, cnt, sz, t ); - if (wrank == 0) { - PrintResults( cnt, t ); - } - cnt = 2 * cnt; - } - } + for (sz = 1; sz <= maxSz; sz = sz + sz) { + if (wrank == 0) + printf("Put with fence, %d elements\n", sz); + cnt = 1; + while (cnt <= maxCount) { + RunPutFence(win, destRank, cnt, sz, t); + if (wrank == 0) { + PrintResults(cnt, t); + } + cnt = 2 * cnt; + } + } } if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_PUT)) { - for (sz=1; sz<=maxSz; sz = sz + sz) { - if (wrank == 0) - printf( "Put with lock, %d elements\n", sz ); - cnt = 1; - while (cnt <= maxCount) { - RunPutLock( win, destRank, cnt, sz, t ); - if (wrank == 0) { - PrintResults( cnt, t ); - } - cnt = 2 * cnt; - } - } + for (sz = 1; sz <= maxSz; sz = sz + sz) { + if (wrank == 0) + printf("Put with lock, %d elements\n", sz); + cnt = 1; + while (cnt <= maxCount) { + RunPutLock(win, destRank, cnt, sz, t); + if (wrank == 0) { + PrintResults(cnt, t); + } + cnt = 2 * cnt; + } + } } if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_PUT)) { - for (sz=1; sz<=maxSz; sz = sz + sz) { - if (wrank == 0) - printf( "Put with pscw, %d elements\n", sz ); - cnt = 1; - while (cnt <= maxCount) { - RunPutPSCW( win, destRank, cnt, sz, - exposureGroup, accessGroup, t ); - if (wrank == 0) { - PrintResults( cnt, t ); - } - cnt = 2 * cnt; - } - } + for (sz = 1; sz <= maxSz; sz = sz + sz) { + if (wrank == 0) + printf("Put with pscw, %d elements\n", sz); + cnt = 1; + while (cnt <= maxCount) { + RunPutPSCW(win, destRank, cnt, sz, exposureGroup, accessGroup, t); + if (wrank == 0) { + PrintResults(cnt, t); + } + cnt = 2 * cnt; + } + } } if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_ACC)) { - for (sz=1; sz<=maxSz; sz = sz + sz) { - if (wrank == 0) - printf( "Accumulate with pscw, %d elements\n", sz ); - cnt = 1; - while (cnt <= maxCount) { - RunAccPSCW( win, destRank, cnt, sz, - exposureGroup, accessGroup, t ); - if (wrank == 0) { - PrintResults( cnt, t ); - } - cnt = 2 * cnt; - } - } + for (sz = 1; sz <= maxSz; sz = sz + sz) { + if (wrank == 0) + printf("Accumulate with pscw, %d elements\n", sz); + cnt = 1; + while (cnt <= maxCount) { + RunAccPSCW(win, destRank, cnt, sz, exposureGroup, accessGroup, t); + if (wrank == 0) { + PrintResults(cnt, t); + } + cnt = 2 * cnt; + } + } } - MPI_Win_free( &win ); + MPI_Win_free(&win); + + MPI_Group_free(&accessGroup); + MPI_Group_free(&exposureGroup); - MPI_Group_free( &accessGroup ); - MPI_Group_free( &exposureGroup ); - MPI_Finalize(); return 0; } -void RunAccFence( MPI_Win win, int destRank, int cnt, int sz, timing t[] ) +void RunAccFence(MPI_Win win, int destRank, int cnt, int sz, timing t[]) { int k, i, j, one = 1; - for (k=0; k 0) rate = (long)(cnt) / d2; - /* count, op, sync, op/each, sync/each, rate */ - printf( "%d\t%e\t%e\t%e\t%e\t%ld\n", cnt, - d1, d2, - d1 / cnt, d2 / cnt, rate ); + long rate = 0; + /* Use the minimum times because they are more stable - if timing + * accuracy is an issue, use the min over multiple trials */ + d1 = minD1; + d2 = minD2; + /* d1 = d1 / MAX_RUNS; d2 = d2 / MAX_RUNS); */ + if (d2 > 0) + rate = (long) (cnt) / d2; + /* count, op, sync, op/each, sync/each, rate */ + printf("%d\t%e\t%e\t%e\t%e\t%ld\n", cnt, d1, d2, d1 / cnt, d2 / cnt, rate); } } diff --git a/teshsuite/smpi/mpich3-test/perf/nestvec.c b/teshsuite/smpi/mpich3-test/perf/nestvec.c index 494f847f20..6ee12f5e65 100644 --- a/teshsuite/smpi/mpich3-test/perf/nestvec.c +++ b/teshsuite/smpi/mpich3-test/perf/nestvec.c @@ -30,161 +30,161 @@ static int verbose = 0; -int main( int argc, char **argv ) +int main(int argc, char **argv) { - int vcount = 16, vblock = vcount*vcount/2, vstride=2*vcount*vblock; - int v2stride, typesize, packsize, i, position, errs = 0; - char *inbuf, *outbuf, *outbuf2; + int vcount = 16, vblock = vcount * vcount / 2, vstride = 2 * vcount * vblock; + int v2stride, typesize, packsize, i, position, errs = 0; + char *inbuf, *outbuf, *outbuf2; MPI_Datatype ft1type, ft2type, ft3type; MPI_Datatype ftopttype; - MPI_Aint lb, extent; - double t0, t1; - double tpack, tmanual, tpackopt; - int ntry; - - MPI_Init( &argc, &argv ); - - MPI_Type_contiguous( 6, MPI_FLOAT, &ft1type ); - MPI_Type_size( ft1type, &typesize ); + MPI_Aint lb, extent; + double t0, t1; + double tpack, tmanual, tpackopt; + int ntry; + + MPI_Init(&argc, &argv); + + MPI_Type_contiguous(6, MPI_FLOAT, &ft1type); + MPI_Type_size(ft1type, &typesize); v2stride = vcount * vcount * vcount * vcount * typesize; - MPI_Type_vector( vcount, vblock, vstride, ft1type, &ft2type ); - MPI_Type_create_hvector( 2, 1, v2stride, ft2type, &ft3type ); - MPI_Type_commit( &ft3type ); - MPI_Type_free( &ft1type ); - MPI_Type_free( &ft2type ); + MPI_Type_vector(vcount, vblock, vstride, ft1type, &ft2type); + MPI_Type_create_hvector(2, 1, v2stride, ft2type, &ft3type); + MPI_Type_commit(&ft3type); + MPI_Type_free(&ft1type); + MPI_Type_free(&ft2type); #if defined(MPICH) && defined(PRINT_DATATYPE_INTERNALS) /* To use MPIDU_Datatype_debug to print the datatype internals, - you must configure MPICH with --enable-g=log */ + * you must configure MPICH with --enable-g=log */ if (verbose) { - printf( "Original datatype:\n" ); - MPIDU_Datatype_debug( ft3type, 10 ); + printf("Original datatype:\n"); + MPIDU_Datatype_debug(ft3type, 10); } #endif /* The same type, but without using the contiguous type */ - MPI_Type_vector( vcount, 6*vblock, 6*vstride, MPI_FLOAT, &ft2type ); - MPI_Type_create_hvector( 2, 1, v2stride, ft2type, &ftopttype ); - MPI_Type_commit( &ftopttype ); - MPI_Type_free( &ft2type ); + MPI_Type_vector(vcount, 6 * vblock, 6 * vstride, MPI_FLOAT, &ft2type); + MPI_Type_create_hvector(2, 1, v2stride, ft2type, &ftopttype); + MPI_Type_commit(&ftopttype); + MPI_Type_free(&ft2type); #if defined(MPICH) && defined(PRINT_DATATYPE_INTERNALS) if (verbose) { - printf( "\n\nMerged datatype:\n" ); - MPIDU_Datatype_debug( ftopttype, 10 ); + printf("\n\nMerged datatype:\n"); + MPIDU_Datatype_debug(ftopttype, 10); } #endif - MPI_Type_get_extent( ft3type, &lb, &extent ); - MPI_Type_size( ft3type, &typesize ); + MPI_Type_get_extent(ft3type, &lb, &extent); + MPI_Type_size(ft3type, &typesize); - MPI_Pack_size( 1, ft3type, MPI_COMM_WORLD, &packsize ); + MPI_Pack_size(1, ft3type, MPI_COMM_WORLD, &packsize); - inbuf = (char *)malloc( extent ); - outbuf = (char *)malloc( packsize ); - outbuf2 = (char *)malloc( packsize ); + inbuf = (char *) malloc(extent); + outbuf = (char *) malloc(packsize); + outbuf2 = (char *) malloc(packsize); if (!inbuf) { - fprintf( stderr, "Unable to allocate %ld for inbuf\n", (long)extent ); - MPI_Abort( MPI_COMM_WORLD, 1 ); + fprintf(stderr, "Unable to allocate %ld for inbuf\n", (long) extent); + MPI_Abort(MPI_COMM_WORLD, 1); } if (!outbuf) { - fprintf( stderr, "Unable to allocate %ld for outbuf\n", (long)packsize ); - MPI_Abort( MPI_COMM_WORLD, 1 ); + fprintf(stderr, "Unable to allocate %ld for outbuf\n", (long) packsize); + MPI_Abort(MPI_COMM_WORLD, 1); } if (!outbuf2) { - fprintf( stderr, "Unable to allocate %ld for outbuf2\n", (long)packsize ); - MPI_Abort( MPI_COMM_WORLD, 1 ); + fprintf(stderr, "Unable to allocate %ld for outbuf2\n", (long) packsize); + MPI_Abort(MPI_COMM_WORLD, 1); } - for (i=0; i (1 + ERROR_MARGIN)) { /* If the difference is more than 10%, it's an error */ - printf("%.3f\t%.3f\n", 1000000.0 * ts, 1000000.0 * t); - printf("Too much difference in performance\n"); - } - else printf(" No Errors\n"); - } - - MPI_Finalize(); - free(sbuf); - free(rbuf); - return 0; + /* Print out the results */ + if (!rank) { + if ((ts / t) > (1 + ERROR_MARGIN)) { /* If the difference is more than 10%, it's an error */ + printf("%.3f\t%.3f\n", 1000000.0 * ts, 1000000.0 * t); + printf("Too much difference in performance\n"); + } + else + printf(" No Errors\n"); + } + + MPI_Finalize(); + + return 0; } diff --git a/teshsuite/smpi/mpich3-test/perf/sendrecvl.c b/teshsuite/smpi/mpich3-test/perf/sendrecvl.c index 1a54a70a63..74703d63ca 100644 --- a/teshsuite/smpi/mpich3-test/perf/sendrecvl.c +++ b/teshsuite/smpi/mpich3-test/perf/sendrecvl.c @@ -14,11 +14,11 @@ #include #define MAXTESTS 32 -#define ERROR_MARGIN 1.0 /* FIXME: This number is pretty much randomly chosen */ +#define ERROR_MARGIN 1.0 /* FIXME: This number is pretty much randomly chosen */ static int verbose = 0; -int main( int argc, char *argv[] ) +int main(int argc, char *argv[]) { int wsize, wrank, partner, len, maxlen, k, reps, repsleft; double t1; @@ -26,239 +26,234 @@ int main( int argc, char *argv[] ) char *rbuf, *sbuf; double times[3][MAXTESTS]; - MPI_Init( &argc, &argv ); - if (getenv("MPITEST_VERBOSE")) verbose = 1; + MPI_Init(&argc, &argv); + if (getenv("MPITEST_VERBOSE")) + verbose = 1; + + MPI_Comm_size(MPI_COMM_WORLD, &wsize); + MPI_Comm_rank(MPI_COMM_WORLD, &wrank); - MPI_Comm_size( MPI_COMM_WORLD, &wsize ); - MPI_Comm_rank( MPI_COMM_WORLD, &wrank ); - if (wsize < 2) { - fprintf( stderr, "This program requires at least 2 processes\n" ); - MPI_Abort( MPI_COMM_WORLD, 1 ); + fprintf(stderr, "This program requires at least 2 processes\n"); + MPI_Abort(MPI_COMM_WORLD, 1); } /* Set partner based on whether rank is odd or even */ if (wrank & 0x1) { - partner = wrank - 1; + partner = wrank - 1; } else if (wrank < wsize - 1) { - partner = wrank + 1; + partner = wrank + 1; } - else - /* Handle wsize odd */ - partner = MPI_PROC_NULL; + else + /* Handle wsize odd */ + partner = MPI_PROC_NULL; /* Allocate and initialize buffers */ - maxlen = 1024*1024; - rbuf = (char *)malloc( maxlen ); - sbuf = (char *)malloc( maxlen ); + maxlen = 1024 * 1024; + rbuf = (char *) malloc(maxlen); + sbuf = (char *) malloc(maxlen); if (!rbuf || !sbuf) { - fprintf( stderr, "Could not allocate %d byte buffers\n", maxlen ); - MPI_Abort( MPI_COMM_WORLD, 2 ); + fprintf(stderr, "Could not allocate %d byte buffers\n", maxlen); + MPI_Abort(MPI_COMM_WORLD, 2); } - for (k=0; k 0) { - t1 = t1 * 1.e6; - if (verbose) - printf( "%d\t%g\t%g\n", len, t1, len/t1 ); - } - else { - t1 = t1 * 1.e6; - if (verbose) - printf( "%d\t%g\tINF\n", len, t1 ); - } - if (verbose) - fflush( stdout ); - } + for (k = 0; k < 20; k++) { + /* We use a simple linear form for the number of tests to + * reduce the impact of the granularity of the timer */ + reps = 50 - k; + repsleft = reps; + /* Make sure that both processes are ready to start */ + MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0, + MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + t1 = MPI_Wtime(); + while (repsleft--) { + MPI_Irecv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, &rreq); + MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD); + MPI_Wait(&rreq, MPI_STATUS_IGNORE); + } + t1 = MPI_Wtime() - t1; + times[0][k] = t1 / reps; + if (wrank == 0) { + t1 = t1 / reps; + if (t1 > 0) { + t1 = t1 * 1.e6; + if (verbose) + printf("%d\t%g\t%g\n", len, t1, len / t1); + } + else { + t1 = t1 * 1.e6; + if (verbose) + printf("%d\t%g\tINF\n", len, t1); + } + if (verbose) + fflush(stdout); + } - len *= 2; + len *= 2; } - MPI_Barrier( MPI_COMM_WORLD ); + MPI_Barrier(MPI_COMM_WORLD); /* Test Sendrecv, head to head */ if (wrank == 0 && verbose) { - printf( "Sendrecv\n" ); - printf( "len\ttime (usec)\trate (MB/s)\n" ); + printf("Sendrecv\n"); + printf("len\ttime (usec)\trate (MB/s)\n"); } /* Send powers of 2 bytes */ len = 1; - for (k=0; k<20; k++) { - /* We use a simple linear form for the number of tests to - reduce the impact of the granularity of the timer */ - reps = 50-k; - repsleft = reps; - /* Make sure that both processes are ready to start */ - MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, partner, 0, - MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, - MPI_STATUS_IGNORE ); - t1 = MPI_Wtime(); - while (repsleft--) { - MPI_Sendrecv( sbuf, len, MPI_BYTE, partner, k, - rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, - MPI_STATUS_IGNORE ); - } - t1 = MPI_Wtime() - t1; - times[1][k] = t1 / reps; - if (wrank == 0) { - t1 = t1 / reps; - if (t1 > 0) { - t1 = t1 * 1.e6; - if (verbose) - printf( "%d\t%g\t%g\n", len, t1, len/t1 ); - } - else { - t1 = t1 * 1.e6; - if (verbose) - printf( "%d\t%g\tINF\n", len, t1 ); - } - if (verbose) - fflush( stdout ); - } + for (k = 0; k < 20; k++) { + /* We use a simple linear form for the number of tests to + * reduce the impact of the granularity of the timer */ + reps = 50 - k; + repsleft = reps; + /* Make sure that both processes are ready to start */ + MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0, + MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + t1 = MPI_Wtime(); + while (repsleft--) { + MPI_Sendrecv(sbuf, len, MPI_BYTE, partner, k, + rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + t1 = MPI_Wtime() - t1; + times[1][k] = t1 / reps; + if (wrank == 0) { + t1 = t1 / reps; + if (t1 > 0) { + t1 = t1 * 1.e6; + if (verbose) + printf("%d\t%g\t%g\n", len, t1, len / t1); + } + else { + t1 = t1 * 1.e6; + if (verbose) + printf("%d\t%g\tINF\n", len, t1); + } + if (verbose) + fflush(stdout); + } - len *= 2; + len *= 2; } - MPI_Barrier( MPI_COMM_WORLD ); + MPI_Barrier(MPI_COMM_WORLD); /* Test Send/recv, ping-pong */ if (wrank == 0 && verbose) { - printf( "Pingpong\n" ); - printf( "len\ttime (usec)\trate (MB/s)\n" ); + printf("Pingpong\n"); + printf("len\ttime (usec)\trate (MB/s)\n"); } /* Send powers of 2 bytes */ len = 1; - for (k=0; k<20; k++) { - /* We use a simple linear form for the number of tests to - reduce the impact of the granularity of the timer */ - reps = 50-k; - repsleft = reps; - /* Make sure that both processes are ready to start */ - MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, partner, 0, - MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, - MPI_STATUS_IGNORE ); - t1 = MPI_Wtime(); - while (repsleft--) { - if (wrank & 0x1) { - MPI_Send( sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD ); - MPI_Recv( rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, - MPI_STATUS_IGNORE ); - } - else { - MPI_Recv( rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, - MPI_STATUS_IGNORE ); - MPI_Send( sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD ); - } - } - t1 = MPI_Wtime() - t1; - times[2][k] = t1 / reps; - if (wrank == 0) { - t1 = t1 / reps; - if (t1 > 0) { - t1 = t1 * 1.e6; - if (verbose) - printf( "%d\t%g\t%g\n", len, t1, len/t1 ); - } - else { - t1 = t1 * 1.e6; - if (verbose) - printf( "%d\t%g\tINF\n", len, t1 ); - } - if (verbose) - fflush( stdout ); - } + for (k = 0; k < 20; k++) { + /* We use a simple linear form for the number of tests to + * reduce the impact of the granularity of the timer */ + reps = 50 - k; + repsleft = reps; + /* Make sure that both processes are ready to start */ + MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0, + MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + t1 = MPI_Wtime(); + while (repsleft--) { + if (wrank & 0x1) { + MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD); + MPI_Recv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + else { + MPI_Recv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD); + } + } + t1 = MPI_Wtime() - t1; + times[2][k] = t1 / reps; + if (wrank == 0) { + t1 = t1 / reps; + if (t1 > 0) { + t1 = t1 * 1.e6; + if (verbose) + printf("%d\t%g\t%g\n", len, t1, len / t1); + } + else { + t1 = t1 * 1.e6; + if (verbose) + printf("%d\t%g\tINF\n", len, t1); + } + if (verbose) + fflush(stdout); + } - len *= 2; + len *= 2; } - - + + /* At this point, we could optionally analyze the results and report - success or failure based on some criteria, such as near monotone - increases in bandwidth. This test was created because of a - fall-off in performance noted in the ch3:sock device:channel */ + * success or failure based on some criteria, such as near monotone + * increases in bandwidth. This test was created because of a + * fall-off in performance noted in the ch3:sock device:channel */ if (wrank == 0) { - int nPerfErrors = 0; - len = 1; - for (k=0; k<20; k++) { - double T0,T1,T2; - T0 = times[0][k] * 1.e6; - T1 = times[1][k] * 1.e6; - T2 = times[2][k] * 1.e6; - if (verbose) - printf( "%d\t%12.2f\t%12.2f\t%12.2f\n", len, T0, T1, T2 ); - /* Lets look at long messages only */ - if (k > 10) { - double T0Old, T1Old, T2Old; - T0Old = times[0][k-1] * 1.0e6; - T1Old = times[1][k-1] * 1.0e6; - T2Old = times[2][k-1] * 1.0e6; - if (T0 > (2+ERROR_MARGIN) * T0Old) { - nPerfErrors++; - if (verbose) - printf( "Irecv-Send:\t%d\t%12.2f\t%12.2f\n", len, T0Old, T0 ); - } - if (T1 > (2+ERROR_MARGIN) * T1Old) { - nPerfErrors++; - if (verbose) - printf( "Sendrecv:\t%d\t%12.2f\t%12.2f\n", len, T1Old, T1 ); - } - if (T2 > (2+ERROR_MARGIN) * T2Old) { - nPerfErrors++; - if (verbose) - printf( "Pingpong:\t%d\t%12.2f\t%12.2f\n", len, T2Old, T2 ); - } - } - len *= 2; - } - if (nPerfErrors > 8) { - /* Allow for 1-2 errors for eager-rendezvous shifting - * point and cache effects. There should be a better way - * of doing this. */ - printf( " Found %d performance errors\n", nPerfErrors ); - } - else { - printf( " No Errors\n" ); - } - fflush( stdout ); + int nPerfErrors = 0; + len = 1; + for (k = 0; k < 20; k++) { + double T0, T1, T2; + T0 = times[0][k] * 1.e6; + T1 = times[1][k] * 1.e6; + T2 = times[2][k] * 1.e6; + if (verbose) + printf("%d\t%12.2f\t%12.2f\t%12.2f\n", len, T0, T1, T2); + /* Lets look at long messages only */ + if (k > 10) { + double T0Old, T1Old, T2Old; + T0Old = times[0][k - 1] * 1.0e6; + T1Old = times[1][k - 1] * 1.0e6; + T2Old = times[2][k - 1] * 1.0e6; + if (T0 > (2 + ERROR_MARGIN) * T0Old) { + nPerfErrors++; + if (verbose) + printf("Irecv-Send:\t%d\t%12.2f\t%12.2f\n", len, T0Old, T0); + } + if (T1 > (2 + ERROR_MARGIN) * T1Old) { + nPerfErrors++; + if (verbose) + printf("Sendrecv:\t%d\t%12.2f\t%12.2f\n", len, T1Old, T1); + } + if (T2 > (2 + ERROR_MARGIN) * T2Old) { + nPerfErrors++; + if (verbose) + printf("Pingpong:\t%d\t%12.2f\t%12.2f\n", len, T2Old, T2); + } + } + len *= 2; + } + if (nPerfErrors > 8) { + /* Allow for 1-2 errors for eager-rendezvous shifting + * point and cache effects. There should be a better way + * of doing this. */ + printf(" Found %d performance errors\n", nPerfErrors); + } + else { + printf(" No Errors\n"); + } + fflush(stdout); } - free( sbuf ); - free( rbuf ); + free(sbuf); + free(rbuf); MPI_Finalize(); diff --git a/teshsuite/smpi/mpich3-test/perf/testlist b/teshsuite/smpi/mpich3-test/perf/testlist index 096a5bb51c..03ddbe37f2 100644 --- a/teshsuite/smpi/mpich3-test/perf/testlist +++ b/teshsuite/smpi/mpich3-test/perf/testlist @@ -2,10 +2,10 @@ transp-datatype 2 sendrecvl 2 twovec 1 xfail=ticket1788 #Need MPI_Pack -#dtpack 1 xfail=ticket1789 -#nestvec 1 xfail=ticket1788 -#nestvec2 1 xfail=ticket1788 -#indexperf 1 xfail=ticket1788 +dtpack 1 xfail=ticket1789 +nestvec 1 xfail=ticket1788 +nestvec2 1 xfail=ticket1788 +indexperf 1 xfail=ticket1788 non_zero_root 4 timer 1 # The commcreatep test looks at how communicator creation scales with group diff --git a/teshsuite/smpi/mpich3-test/perf/timer.c b/teshsuite/smpi/mpich3-test/perf/timer.c index 1a778c4493..4a77d18b34 100644 --- a/teshsuite/smpi/mpich3-test/perf/timer.c +++ b/teshsuite/smpi/mpich3-test/perf/timer.c @@ -4,7 +4,7 @@ * See COPYRIGHT in top-level directory. */ -/* +/* * Check that the timer produces monotone nondecreasing times and that * the Tick is reasonable */ @@ -17,51 +17,53 @@ static int verbose = 0; #define MAX_TIMER_TEST 5000 -int main(int argc, char* argv[]) +int main(int argc, char *argv[]) { double t1[MAX_TIMER_TEST], tick[MAX_TIMER_TEST], tickval; double minDiff, maxDiff, diff; int i, nZeros = 0; int errs = 0; - MTest_Init(&argc,&argv); + MTest_Init(&argc, &argv); - for (i=0; i maxDiff) maxDiff = diff; + nZeros = 0; + for (i = 1; i < MAX_TIMER_TEST; i++) { + diff = t1[i] - t1[i - 1]; + if (diff == 0.0) + nZeros++; + else if (diff < minDiff) + minDiff = diff; + if (diff > maxDiff) + maxDiff = diff; } /* Are the time diff values and tick values consistent */ if (verbose) { - printf( "Tick = %e, timer range = [%e,%e]\n", tickval, minDiff, - maxDiff ); - if (nZeros) printf( "Wtime difference was 0 %d times\n", nZeros ); - } + printf("Tick = %e, timer range = [%e,%e]\n", tickval, minDiff, maxDiff); + if (nZeros) + printf("Wtime difference was 0 %d times\n", nZeros); + } MTest_Finalize(errs); MPI_Finalize(); diff --git a/teshsuite/smpi/mpich3-test/perf/transp-datatype.c b/teshsuite/smpi/mpich3-test/perf/transp-datatype.c index b7ebc278b3..430831b6d0 100644 --- a/teshsuite/smpi/mpich3-test/perf/transp-datatype.c +++ b/teshsuite/smpi/mpich3-test/perf/transp-datatype.c @@ -21,11 +21,11 @@ #define SIZE 100 #define ITER 100 -int main(int argc, char* argv[]) +int main(int argc, char *argv[]) { int i, j, k; - static double a[SIZE][SIZE],b[SIZE][SIZE]; - double t1,t2,t,ts,tst; + static double a[SIZE][SIZE], b[SIZE][SIZE]; + double t1, t2, t, ts, tst; double temp; int myrank, mysize, errs = 0; MPI_Status status; @@ -33,94 +33,92 @@ int main(int argc, char* argv[]) MPI_Datatype col, xpose; - MTest_Init( &argc, &argv ); - MPI_Comm_rank(MPI_COMM_WORLD,&myrank); - MPI_Comm_size( MPI_COMM_WORLD, &mysize ); + MTest_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_size(MPI_COMM_WORLD, &mysize); if (mysize != 2) { - fprintf( stderr, "This test must be run with 2 processes\n" ); - MPI_Abort( MPI_COMM_WORLD, 1 ); + fprintf(stderr, "This test must be run with 2 processes\n"); + MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Type_extent(MPI_DOUBLE, &sizeofreal); - + MPI_Type_vector(SIZE, 1, SIZE, MPI_DOUBLE, &col); MPI_Type_hvector(SIZE, 1, sizeofreal, col, &xpose); MPI_Type_commit(&xpose); /* Preset the arrays so that they're in memory */ - for (i=0; i 2 * tst) { - errs ++; - fprintf( stderr, "Transpose time with datatypes is more than twice time without datatypes\n" ); - fprintf( stderr, "%f\t%f\t%f\n", t, ts, tst ); - } + /* if t and tst are too different, then there is a performance + * problem in the handling of the datatypes */ + + if (t > 2 * tst) { + errs++; + fprintf(stderr, + "Transpose time with datatypes is more than twice time without datatypes\n"); + fprintf(stderr, "%f\t%f\t%f\n", t, ts, tst); + } } MPI_Type_free(&col); MPI_Type_free(&xpose); - MTest_Finalize( errs ); + MTest_Finalize(errs); MPI_Finalize(); return 0; } diff --git a/teshsuite/smpi/mpich3-test/perf/twovec.c b/teshsuite/smpi/mpich3-test/perf/twovec.c index 653e8ced7b..e99e18ec17 100644 --- a/teshsuite/smpi/mpich3-test/perf/twovec.c +++ b/teshsuite/smpi/mpich3-test/perf/twovec.c @@ -21,7 +21,7 @@ */ #define SKIP 4 -#define NUM_SIZES 15 +#define NUM_SIZES 16 #define FRACTION 1.0 /* Don't make the number of loops too high; we create so many @@ -39,7 +39,7 @@ int main(int argc, char *argv[]) MPI_Init(&argc, &argv); tmean = 0; - size = 1; + size = 1; for (i = -SKIP; i < NUM_SIZES; i++) { nrows = ncols = size; @@ -55,8 +55,8 @@ int main(int argc, char *argv[]) t[i] = MPI_Wtime() - ttmp; if (t[i] < 100 * MPI_Wtick()) { /* Time is too inaccurate to use. Set to zero. - Consider increasing the LOOPS value to make this - time large enough */ + * Consider increasing the LOOPS value to make this + * time large enough */ t[i] = 0; } tmean += t[i]; @@ -73,31 +73,32 @@ int main(int argc, char *argv[]) tmean /= NUM_SIZES; /* Now, analyze the times to see that they do not grow too fast - as a function of size. As that is a vague criteria, we do the - following as a simple test: - Compute the mean of the first half and the second half of the - data - Compare the two means - If the mean of the second half is more than FRACTION times the - mean of the first half, then the time may be growing too fast. + * as a function of size. As that is a vague criteria, we do the + * following as a simple test: + * Compute the mean of the first half and the second half of the + * data + * Compare the two means + * If the mean of the second half is more than FRACTION times the + * mean of the first half, then the time may be growing too fast. */ tMeanLower = tMeanHigher = 0; - for (i=0; i 0 && tMeanHigher > (1 + FRACTION) * tMeanLower) errs++; + * FRACTION here - the goal is to detect significant growth in + * execution time as the size increases, and there is no MPI + * standard requirement here to meet. + * + * If the times were too small, then the test also passes - the + * goal is to find implementation problems that lead to excessive + * time in these routines. + */ + if (tMeanLower > 0 && tMeanHigher > (1 + FRACTION) * tMeanLower) + errs++; if (errs) { fprintf(stderr, "too much difference in performance: "); -- 2.20.1