X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/01cfa7276bea34f747662c55a1ce2f2f7d38c1c6..aac4ba41ea27fe8168bebed009abebd7e5c3f006:/teshsuite/smpi/mpich3-test/perf/sendrecvl.c diff --git a/teshsuite/smpi/mpich3-test/perf/sendrecvl.c b/teshsuite/smpi/mpich3-test/perf/sendrecvl.c index 1a54a70a63..74703d63ca 100644 --- a/teshsuite/smpi/mpich3-test/perf/sendrecvl.c +++ b/teshsuite/smpi/mpich3-test/perf/sendrecvl.c @@ -14,11 +14,11 @@ #include #define MAXTESTS 32 -#define ERROR_MARGIN 1.0 /* FIXME: This number is pretty much randomly chosen */ +#define ERROR_MARGIN 1.0 /* FIXME: This number is pretty much randomly chosen */ static int verbose = 0; -int main( int argc, char *argv[] ) +int main(int argc, char *argv[]) { int wsize, wrank, partner, len, maxlen, k, reps, repsleft; double t1; @@ -26,239 +26,234 @@ int main( int argc, char *argv[] ) char *rbuf, *sbuf; double times[3][MAXTESTS]; - MPI_Init( &argc, &argv ); - if (getenv("MPITEST_VERBOSE")) verbose = 1; + MPI_Init(&argc, &argv); + if (getenv("MPITEST_VERBOSE")) + verbose = 1; + + MPI_Comm_size(MPI_COMM_WORLD, &wsize); + MPI_Comm_rank(MPI_COMM_WORLD, &wrank); - MPI_Comm_size( MPI_COMM_WORLD, &wsize ); - MPI_Comm_rank( MPI_COMM_WORLD, &wrank ); - if (wsize < 2) { - fprintf( stderr, "This program requires at least 2 processes\n" ); - MPI_Abort( MPI_COMM_WORLD, 1 ); + fprintf(stderr, "This program requires at least 2 processes\n"); + MPI_Abort(MPI_COMM_WORLD, 1); } /* Set partner based on whether rank is odd or even */ if (wrank & 0x1) { - partner = wrank - 1; + partner = wrank - 1; } else if (wrank < wsize - 1) { - partner = wrank + 1; + partner = wrank + 1; } - else - /* Handle wsize odd */ - partner = MPI_PROC_NULL; + else + /* Handle wsize odd */ + partner = MPI_PROC_NULL; /* Allocate and initialize buffers */ - maxlen = 1024*1024; - rbuf = (char *)malloc( maxlen ); - sbuf = (char *)malloc( maxlen ); + maxlen = 1024 * 1024; + rbuf = (char *) malloc(maxlen); + sbuf = (char *) malloc(maxlen); if (!rbuf || !sbuf) { - fprintf( stderr, "Could not allocate %d byte buffers\n", maxlen ); - MPI_Abort( MPI_COMM_WORLD, 2 ); + fprintf(stderr, "Could not allocate %d byte buffers\n", maxlen); + MPI_Abort(MPI_COMM_WORLD, 2); } - for (k=0; k 0) { - t1 = t1 * 1.e6; - if (verbose) - printf( "%d\t%g\t%g\n", len, t1, len/t1 ); - } - else { - t1 = t1 * 1.e6; - if (verbose) - printf( "%d\t%g\tINF\n", len, t1 ); - } - if (verbose) - fflush( stdout ); - } + for (k = 0; k < 20; k++) { + /* We use a simple linear form for the number of tests to + * reduce the impact of the granularity of the timer */ + reps = 50 - k; + repsleft = reps; + /* Make sure that both processes are ready to start */ + MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0, + MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + t1 = MPI_Wtime(); + while (repsleft--) { + MPI_Irecv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, &rreq); + MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD); + MPI_Wait(&rreq, MPI_STATUS_IGNORE); + } + t1 = MPI_Wtime() - t1; + times[0][k] = t1 / reps; + if (wrank == 0) { + t1 = t1 / reps; + if (t1 > 0) { + t1 = t1 * 1.e6; + if (verbose) + printf("%d\t%g\t%g\n", len, t1, len / t1); + } + else { + t1 = t1 * 1.e6; + if (verbose) + printf("%d\t%g\tINF\n", len, t1); + } + if (verbose) + fflush(stdout); + } - len *= 2; + len *= 2; } - MPI_Barrier( MPI_COMM_WORLD ); + MPI_Barrier(MPI_COMM_WORLD); /* Test Sendrecv, head to head */ if (wrank == 0 && verbose) { - printf( "Sendrecv\n" ); - printf( "len\ttime (usec)\trate (MB/s)\n" ); + printf("Sendrecv\n"); + printf("len\ttime (usec)\trate (MB/s)\n"); } /* Send powers of 2 bytes */ len = 1; - for (k=0; k<20; k++) { - /* We use a simple linear form for the number of tests to - reduce the impact of the granularity of the timer */ - reps = 50-k; - repsleft = reps; - /* Make sure that both processes are ready to start */ - MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, partner, 0, - MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, - MPI_STATUS_IGNORE ); - t1 = MPI_Wtime(); - while (repsleft--) { - MPI_Sendrecv( sbuf, len, MPI_BYTE, partner, k, - rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, - MPI_STATUS_IGNORE ); - } - t1 = MPI_Wtime() - t1; - times[1][k] = t1 / reps; - if (wrank == 0) { - t1 = t1 / reps; - if (t1 > 0) { - t1 = t1 * 1.e6; - if (verbose) - printf( "%d\t%g\t%g\n", len, t1, len/t1 ); - } - else { - t1 = t1 * 1.e6; - if (verbose) - printf( "%d\t%g\tINF\n", len, t1 ); - } - if (verbose) - fflush( stdout ); - } + for (k = 0; k < 20; k++) { + /* We use a simple linear form for the number of tests to + * reduce the impact of the granularity of the timer */ + reps = 50 - k; + repsleft = reps; + /* Make sure that both processes are ready to start */ + MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0, + MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + t1 = MPI_Wtime(); + while (repsleft--) { + MPI_Sendrecv(sbuf, len, MPI_BYTE, partner, k, + rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + t1 = MPI_Wtime() - t1; + times[1][k] = t1 / reps; + if (wrank == 0) { + t1 = t1 / reps; + if (t1 > 0) { + t1 = t1 * 1.e6; + if (verbose) + printf("%d\t%g\t%g\n", len, t1, len / t1); + } + else { + t1 = t1 * 1.e6; + if (verbose) + printf("%d\t%g\tINF\n", len, t1); + } + if (verbose) + fflush(stdout); + } - len *= 2; + len *= 2; } - MPI_Barrier( MPI_COMM_WORLD ); + MPI_Barrier(MPI_COMM_WORLD); /* Test Send/recv, ping-pong */ if (wrank == 0 && verbose) { - printf( "Pingpong\n" ); - printf( "len\ttime (usec)\trate (MB/s)\n" ); + printf("Pingpong\n"); + printf("len\ttime (usec)\trate (MB/s)\n"); } /* Send powers of 2 bytes */ len = 1; - for (k=0; k<20; k++) { - /* We use a simple linear form for the number of tests to - reduce the impact of the granularity of the timer */ - reps = 50-k; - repsleft = reps; - /* Make sure that both processes are ready to start */ - MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, partner, 0, - MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, - MPI_STATUS_IGNORE ); - t1 = MPI_Wtime(); - while (repsleft--) { - if (wrank & 0x1) { - MPI_Send( sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD ); - MPI_Recv( rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, - MPI_STATUS_IGNORE ); - } - else { - MPI_Recv( rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, - MPI_STATUS_IGNORE ); - MPI_Send( sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD ); - } - } - t1 = MPI_Wtime() - t1; - times[2][k] = t1 / reps; - if (wrank == 0) { - t1 = t1 / reps; - if (t1 > 0) { - t1 = t1 * 1.e6; - if (verbose) - printf( "%d\t%g\t%g\n", len, t1, len/t1 ); - } - else { - t1 = t1 * 1.e6; - if (verbose) - printf( "%d\t%g\tINF\n", len, t1 ); - } - if (verbose) - fflush( stdout ); - } + for (k = 0; k < 20; k++) { + /* We use a simple linear form for the number of tests to + * reduce the impact of the granularity of the timer */ + reps = 50 - k; + repsleft = reps; + /* Make sure that both processes are ready to start */ + MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0, + MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + t1 = MPI_Wtime(); + while (repsleft--) { + if (wrank & 0x1) { + MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD); + MPI_Recv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + else { + MPI_Recv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD); + } + } + t1 = MPI_Wtime() - t1; + times[2][k] = t1 / reps; + if (wrank == 0) { + t1 = t1 / reps; + if (t1 > 0) { + t1 = t1 * 1.e6; + if (verbose) + printf("%d\t%g\t%g\n", len, t1, len / t1); + } + else { + t1 = t1 * 1.e6; + if (verbose) + printf("%d\t%g\tINF\n", len, t1); + } + if (verbose) + fflush(stdout); + } - len *= 2; + len *= 2; } - - + + /* At this point, we could optionally analyze the results and report - success or failure based on some criteria, such as near monotone - increases in bandwidth. This test was created because of a - fall-off in performance noted in the ch3:sock device:channel */ + * success or failure based on some criteria, such as near monotone + * increases in bandwidth. This test was created because of a + * fall-off in performance noted in the ch3:sock device:channel */ if (wrank == 0) { - int nPerfErrors = 0; - len = 1; - for (k=0; k<20; k++) { - double T0,T1,T2; - T0 = times[0][k] * 1.e6; - T1 = times[1][k] * 1.e6; - T2 = times[2][k] * 1.e6; - if (verbose) - printf( "%d\t%12.2f\t%12.2f\t%12.2f\n", len, T0, T1, T2 ); - /* Lets look at long messages only */ - if (k > 10) { - double T0Old, T1Old, T2Old; - T0Old = times[0][k-1] * 1.0e6; - T1Old = times[1][k-1] * 1.0e6; - T2Old = times[2][k-1] * 1.0e6; - if (T0 > (2+ERROR_MARGIN) * T0Old) { - nPerfErrors++; - if (verbose) - printf( "Irecv-Send:\t%d\t%12.2f\t%12.2f\n", len, T0Old, T0 ); - } - if (T1 > (2+ERROR_MARGIN) * T1Old) { - nPerfErrors++; - if (verbose) - printf( "Sendrecv:\t%d\t%12.2f\t%12.2f\n", len, T1Old, T1 ); - } - if (T2 > (2+ERROR_MARGIN) * T2Old) { - nPerfErrors++; - if (verbose) - printf( "Pingpong:\t%d\t%12.2f\t%12.2f\n", len, T2Old, T2 ); - } - } - len *= 2; - } - if (nPerfErrors > 8) { - /* Allow for 1-2 errors for eager-rendezvous shifting - * point and cache effects. There should be a better way - * of doing this. */ - printf( " Found %d performance errors\n", nPerfErrors ); - } - else { - printf( " No Errors\n" ); - } - fflush( stdout ); + int nPerfErrors = 0; + len = 1; + for (k = 0; k < 20; k++) { + double T0, T1, T2; + T0 = times[0][k] * 1.e6; + T1 = times[1][k] * 1.e6; + T2 = times[2][k] * 1.e6; + if (verbose) + printf("%d\t%12.2f\t%12.2f\t%12.2f\n", len, T0, T1, T2); + /* Lets look at long messages only */ + if (k > 10) { + double T0Old, T1Old, T2Old; + T0Old = times[0][k - 1] * 1.0e6; + T1Old = times[1][k - 1] * 1.0e6; + T2Old = times[2][k - 1] * 1.0e6; + if (T0 > (2 + ERROR_MARGIN) * T0Old) { + nPerfErrors++; + if (verbose) + printf("Irecv-Send:\t%d\t%12.2f\t%12.2f\n", len, T0Old, T0); + } + if (T1 > (2 + ERROR_MARGIN) * T1Old) { + nPerfErrors++; + if (verbose) + printf("Sendrecv:\t%d\t%12.2f\t%12.2f\n", len, T1Old, T1); + } + if (T2 > (2 + ERROR_MARGIN) * T2Old) { + nPerfErrors++; + if (verbose) + printf("Pingpong:\t%d\t%12.2f\t%12.2f\n", len, T2Old, T2); + } + } + len *= 2; + } + if (nPerfErrors > 8) { + /* Allow for 1-2 errors for eager-rendezvous shifting + * point and cache effects. There should be a better way + * of doing this. */ + printf(" Found %d performance errors\n", nPerfErrors); + } + else { + printf(" No Errors\n"); + } + fflush(stdout); } - free( sbuf ); - free( rbuf ); + free(sbuf); + free(rbuf); MPI_Finalize();