X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/6b514200338a9ba7dbb0f0b8f0d244bd74daa07a..0f3d85e5d91158b55c5dcd30d0400059574e4425:/teshsuite/smpi/mpich3-test/perf/sendrecvl.c diff --git a/teshsuite/smpi/mpich3-test/perf/sendrecvl.c b/teshsuite/smpi/mpich3-test/perf/sendrecvl.c new file mode 100644 index 0000000000..f0d6843533 --- /dev/null +++ b/teshsuite/smpi/mpich3-test/perf/sendrecvl.c @@ -0,0 +1,272 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ +/* + * (C) 2006 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +/* This program provides a simple test of send-receive performance between + two (or more) processes. This sometimes called head-to-head or + ping-ping test, as both processes send at the same time. +*/ + +#include "mpi.h" +#include +#include + +#define MAXTESTS 32 +#define ERROR_MARGIN 1.0 /* FIXME: This number is pretty much randomly chosen */ + +static int verbose = 0; + +int main( int argc, char *argv[] ) +{ + int wsize, wrank, partner, len, maxlen, k, reps, repsleft; + double t1; + MPI_Request rreq; + char *rbuf, *sbuf; + double times[3][MAXTESTS]; + + MPI_Init( &argc, &argv ); + if (getenv("MPITEST_VERBOSE")) verbose = 1; + + MPI_Comm_size( MPI_COMM_WORLD, &wsize ); + MPI_Comm_rank( MPI_COMM_WORLD, &wrank ); + + if (wsize < 2) { + fprintf( stderr, "This program requires at least 2 processes\n" ); + MPI_Abort( MPI_COMM_WORLD, 1 ); + } + /* Set partner based on whether rank is odd or even */ + if (wrank & 0x1) { + partner = wrank - 1; + } + else if (wrank < wsize - 1) { + partner = wrank + 1; + } + else + /* Handle wsize odd */ + partner = MPI_PROC_NULL; + + /* Allocate and initialize buffers */ + maxlen = 1024*1024; + rbuf = (char *)malloc( maxlen ); + sbuf = (char *)malloc( maxlen ); + if (!rbuf || !sbuf) { + fprintf( stderr, "Could not allocate %d byte buffers\n", maxlen ); + MPI_Abort( MPI_COMM_WORLD, 2 ); + } + for (k=0; k 0) { + double rate; + rate = (len / t1) / 1.e6; + t1 = t1 * 1.e6; + if (verbose) + printf( "%d\t%g\t%g\n", len, t1, len/t1 ); + } + else { + t1 = t1 * 1.e6; + if (verbose) + printf( "%d\t%g\tINF\n", len, t1 ); + } + if (verbose) + fflush( stdout ); + } + + len *= 2; + } + + MPI_Barrier( MPI_COMM_WORLD ); + + /* Test Sendrecv, head to head */ + if (wrank == 0 && verbose) { + printf( "Sendrecv\n" ); + printf( "len\ttime (usec)\trate (MB/s)\n" ); + } + + /* Send powers of 2 bytes */ + len = 1; + for (k=0; k<20; k++) { + /* We use a simple linear form for the number of tests to + reduce the impact of the granularity of the timer */ + reps = 50-k; + repsleft = reps; + /* Make sure that both processes are ready to start */ + MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, partner, 0, + MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, + MPI_STATUS_IGNORE ); + t1 = MPI_Wtime(); + while (repsleft--) { + MPI_Sendrecv( sbuf, len, MPI_BYTE, partner, k, + rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, + MPI_STATUS_IGNORE ); + } + t1 = MPI_Wtime() - t1; + times[1][k] = t1 / reps; + if (wrank == 0) { + t1 = t1 / reps; + if (t1 > 0) { + double rate; + rate = (len / t1) / 1.e6; + t1 = t1 * 1.e6; + if (verbose) + printf( "%d\t%g\t%g\n", len, t1, len/t1 ); + } + else { + t1 = t1 * 1.e6; + if (verbose) + printf( "%d\t%g\tINF\n", len, t1 ); + } + if (verbose) + fflush( stdout ); + } + + len *= 2; + } + + MPI_Barrier( MPI_COMM_WORLD ); + + /* Test Send/recv, ping-pong */ + if (wrank == 0 && verbose) { + printf( "Pingpong\n" ); + printf( "len\ttime (usec)\trate (MB/s)\n" ); + } + + /* Send powers of 2 bytes */ + len = 1; + for (k=0; k<20; k++) { + /* We use a simple linear form for the number of tests to + reduce the impact of the granularity of the timer */ + reps = 50-k; + repsleft = reps; + /* Make sure that both processes are ready to start */ + MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, partner, 0, + MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, + MPI_STATUS_IGNORE ); + t1 = MPI_Wtime(); + while (repsleft--) { + if (wrank & 0x1) { + MPI_Send( sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD ); + MPI_Recv( rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, + MPI_STATUS_IGNORE ); + } + else { + MPI_Recv( rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, + MPI_STATUS_IGNORE ); + MPI_Send( sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD ); + } + } + t1 = MPI_Wtime() - t1; + times[2][k] = t1 / reps; + if (wrank == 0) { + t1 = t1 / reps; + if (t1 > 0) { + double rate; + rate = (len / t1) / 1.e6; + t1 = t1 * 1.e6; + if (verbose) + printf( "%d\t%g\t%g\n", len, t1, len/t1 ); + } + else { + t1 = t1 * 1.e6; + if (verbose) + printf( "%d\t%g\tINF\n", len, t1 ); + } + if (verbose) + fflush( stdout ); + } + + len *= 2; + } + + + /* At this point, we could optionally analyze the results and report + success or failure based on some criteria, such as near monotone + increases in bandwidth. This test was created because of a + fall-off in performance noted in the ch3:sock device:channel */ + + if (wrank == 0) { + int nPerfErrors = 0; + len = 1; + for (k=0; k<20; k++) { + double T0,T1,T2; + T0 = times[0][k] * 1.e6; + T1 = times[1][k] * 1.e6; + T2 = times[2][k] * 1.e6; + if (verbose) + printf( "%d\t%12.2f\t%12.2f\t%12.2f\n", len, T0, T1, T2 ); + /* Lets look at long messages only */ + if (k > 10) { + double T0Old, T1Old, T2Old; + T0Old = times[0][k-1] * 1.0e6; + T1Old = times[1][k-1] * 1.0e6; + T2Old = times[2][k-1] * 1.0e6; + if (T0 > (2+ERROR_MARGIN) * T0Old) { + nPerfErrors++; + if (verbose) + printf( "Irecv-Send:\t%d\t%12.2f\t%12.2f\n", len, T0Old, T0 ); + } + if (T1 > (2+ERROR_MARGIN) * T1Old) { + nPerfErrors++; + if (verbose) + printf( "Sendrecv:\t%d\t%12.2f\t%12.2f\n", len, T1Old, T1 ); + } + if (T2 > (2+ERROR_MARGIN) * T2Old) { + nPerfErrors++; + if (verbose) + printf( "Pingpong:\t%d\t%12.2f\t%12.2f\n", len, T2Old, T2 ); + } + } + len *= 2; + } + if (nPerfErrors > 8) { + /* Allow for 1-2 errors for eager-rendezvous shifting + * point and cache effects. There should be a better way + * of doing this. */ + printf( " Found %d performance errors\n", nPerfErrors ); + } + else { + printf( " No Errors\n" ); + } + fflush( stdout ); + } + + free( sbuf ); + free( rbuf ); + + MPI_Finalize(); + + return 0; +}