1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * (C) 2006 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
7 /* This program provides a simple test of send-receive performance between
8 two (or more) processes. This sometimes called head-to-head or
9 ping-ping test, as both processes send at the same time.
17 #define ERROR_MARGIN 1.0 /* FIXME: This number is pretty much randomly chosen */
19 static int verbose = 0;
21 int main( int argc, char *argv[] )
23 int wsize, wrank, partner, len, maxlen, k, reps, repsleft;
27 double times[3][MAXTESTS];
29 MPI_Init( &argc, &argv );
30 if (getenv("MPITEST_VERBOSE")) verbose = 1;
32 MPI_Comm_size( MPI_COMM_WORLD, &wsize );
33 MPI_Comm_rank( MPI_COMM_WORLD, &wrank );
36 fprintf( stderr, "This program requires at least 2 processes\n" );
37 MPI_Abort( MPI_COMM_WORLD, 1 );
39 /* Set partner based on whether rank is odd or even */
43 else if (wrank < wsize - 1) {
47 /* Handle wsize odd */
48 partner = MPI_PROC_NULL;
50 /* Allocate and initialize buffers */
52 rbuf = (char *)malloc( maxlen );
53 sbuf = (char *)malloc( maxlen );
55 fprintf( stderr, "Could not allocate %d byte buffers\n", maxlen );
56 MPI_Abort( MPI_COMM_WORLD, 2 );
58 for (k=0; k<maxlen; k++) {
63 MPI_Barrier( MPI_COMM_WORLD );
65 /* Test Irecv and send, head to head */
66 if (wrank == 0 && verbose) {
67 printf( "Irecv-send\n" );
68 printf( "len\ttime \trate\n" );
71 /* Send powers of 2 bytes */
73 for (k=0; k<20; k++) {
74 /* We use a simple linear form for the number of tests to
75 reduce the impact of the granularity of the timer */
78 /* Make sure that both processes are ready to start */
79 MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
80 MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD,
84 MPI_Irecv( rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, &rreq );
85 MPI_Send( sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD );
86 MPI_Wait( &rreq, MPI_STATUS_IGNORE );
88 t1 = MPI_Wtime() - t1;
89 times[0][k] = t1 / reps;
94 rate = (len / t1) / 1.e6;
97 printf( "%d\t%g\t%g\n", len, t1, len/t1 );
102 printf( "%d\t%g\tINF\n", len, t1 );
111 MPI_Barrier( MPI_COMM_WORLD );
113 /* Test Sendrecv, head to head */
114 if (wrank == 0 && verbose) {
115 printf( "Sendrecv\n" );
116 printf( "len\ttime (usec)\trate (MB/s)\n" );
119 /* Send powers of 2 bytes */
121 for (k=0; k<20; k++) {
122 /* We use a simple linear form for the number of tests to
123 reduce the impact of the granularity of the timer */
126 /* Make sure that both processes are ready to start */
127 MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
128 MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD,
132 MPI_Sendrecv( sbuf, len, MPI_BYTE, partner, k,
133 rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD,
136 t1 = MPI_Wtime() - t1;
137 times[1][k] = t1 / reps;
142 rate = (len / t1) / 1.e6;
145 printf( "%d\t%g\t%g\n", len, t1, len/t1 );
150 printf( "%d\t%g\tINF\n", len, t1 );
159 MPI_Barrier( MPI_COMM_WORLD );
161 /* Test Send/recv, ping-pong */
162 if (wrank == 0 && verbose) {
163 printf( "Pingpong\n" );
164 printf( "len\ttime (usec)\trate (MB/s)\n" );
167 /* Send powers of 2 bytes */
169 for (k=0; k<20; k++) {
170 /* We use a simple linear form for the number of tests to
171 reduce the impact of the granularity of the timer */
174 /* Make sure that both processes are ready to start */
175 MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
176 MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD,
181 MPI_Send( sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD );
182 MPI_Recv( rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD,
186 MPI_Recv( rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD,
188 MPI_Send( sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD );
191 t1 = MPI_Wtime() - t1;
192 times[2][k] = t1 / reps;
197 rate = (len / t1) / 1.e6;
200 printf( "%d\t%g\t%g\n", len, t1, len/t1 );
205 printf( "%d\t%g\tINF\n", len, t1 );
215 /* At this point, we could optionally analyze the results and report
216 success or failure based on some criteria, such as near monotone
217 increases in bandwidth. This test was created because of a
218 fall-off in performance noted in the ch3:sock device:channel */
223 for (k=0; k<20; k++) {
225 T0 = times[0][k] * 1.e6;
226 T1 = times[1][k] * 1.e6;
227 T2 = times[2][k] * 1.e6;
229 printf( "%d\t%12.2f\t%12.2f\t%12.2f\n", len, T0, T1, T2 );
230 /* Lets look at long messages only */
232 double T0Old, T1Old, T2Old;
233 T0Old = times[0][k-1] * 1.0e6;
234 T1Old = times[1][k-1] * 1.0e6;
235 T2Old = times[2][k-1] * 1.0e6;
236 if (T0 > (2+ERROR_MARGIN) * T0Old) {
239 printf( "Irecv-Send:\t%d\t%12.2f\t%12.2f\n", len, T0Old, T0 );
241 if (T1 > (2+ERROR_MARGIN) * T1Old) {
244 printf( "Sendrecv:\t%d\t%12.2f\t%12.2f\n", len, T1Old, T1 );
246 if (T2 > (2+ERROR_MARGIN) * T2Old) {
249 printf( "Pingpong:\t%d\t%12.2f\t%12.2f\n", len, T2Old, T2 );
254 if (nPerfErrors > 8) {
255 /* Allow for 1-2 errors for eager-rendezvous shifting
256 * point and cache effects. There should be a better way
258 printf( " Found %d performance errors\n", nPerfErrors );
261 printf( " No Errors\n" );