1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * (C) 2010 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
7 /* This test is a simplification of the one in perf/manyrma.c that tests
8 for correct handling of the case where many RMA operations occur between
9 synchronization events.
10 This is one of the ways that RMA may be used, and is used in the
11 reference implementation of the graph500 benchmark.
18 #define MAX_COUNT 65536*4/16
19 #define MAX_RMA_SIZE 2 /* 16 in manyrma performance test */
22 typedef enum { SYNC_NONE=0,
23 SYNC_ALL=-1, SYNC_FENCE=1, SYNC_LOCK=2, SYNC_PSCW=4 } sync_t;
24 typedef enum { RMA_NONE=0, RMA_ALL=-1, RMA_PUT=1, RMA_ACC=2, RMA_GET=4 } rma_t;
25 /* Note GET not yet implemented */
26 /* By default, run only a subset of the available tests, to keep the
27 total runtime reasonably short. Command line arguments may be used
28 to run other tests. */
29 sync_t syncChoice = SYNC_FENCE;
30 rma_t rmaChoice = RMA_ACC;
32 static int verbose = 0;
34 void RunAccFence( MPI_Win win, int destRank, int cnt, int sz );
35 void RunAccLock( MPI_Win win, int destRank, int cnt, int sz );
36 void RunPutFence( MPI_Win win, int destRank, int cnt, int sz );
37 void RunPutLock( MPI_Win win, int destRank, int cnt, int sz );
38 void RunAccPSCW( MPI_Win win, int destRank, int cnt, int sz,
39 MPI_Group exposureGroup, MPI_Group accessGroup );
40 void RunPutPSCW( MPI_Win win, int destRank, int cnt, int sz,
41 MPI_Group exposureGroup, MPI_Group accessGroup );
43 int main( int argc, char *argv[] )
45 int arraysize, i, cnt, sz, maxCount=MAX_COUNT, *arraybuffer;
46 int wrank, wsize, destRank, srcRank;
48 MPI_Group wgroup, accessGroup, exposureGroup;
49 int maxSz = MAX_RMA_SIZE;
51 MPI_Init( &argc, &argv );
53 for (i=1; i<argc; i++) {
54 if (strcmp( argv[i], "-put" ) == 0) {
55 if (rmaChoice == RMA_ALL) rmaChoice = RMA_NONE;
58 else if (strcmp( argv[i], "-acc" ) == 0) {
59 if (rmaChoice == RMA_ALL) rmaChoice = RMA_NONE;
62 else if (strcmp( argv[i], "-fence" ) == 0) {
63 if (syncChoice == SYNC_ALL) syncChoice = SYNC_NONE;
64 syncChoice |= SYNC_FENCE;
66 else if (strcmp( argv[i], "-lock" ) == 0) {
67 if (syncChoice == SYNC_ALL) syncChoice = SYNC_NONE;
68 syncChoice |= SYNC_LOCK;
70 else if (strcmp( argv[i], "-pscw" ) == 0) {
71 if (syncChoice == SYNC_ALL) syncChoice = SYNC_NONE;
72 syncChoice |= SYNC_PSCW;
74 else if (strcmp( argv[i], "-maxsz" ) == 0) {
76 maxSz = atoi( argv[i] );
78 else if (strcmp( argv[i], "-maxcount" ) == 0) {
80 maxCount = atoi( argv[i] );
83 fprintf( stderr, "Unrecognized argument %s\n", argv[i] );
84 fprintf( stderr, "%s [ -put ] [ -acc ] [ -lock ] [ -fence ] [ -pscw ] [ -maxsz msgsize ]\n", argv[0] );
85 MPI_Abort( MPI_COMM_WORLD, 1 );
89 MPI_Comm_rank( MPI_COMM_WORLD, &wrank );
90 MPI_Comm_size( MPI_COMM_WORLD, &wsize );
92 while (destRank >= wsize) destRank = destRank - wsize;
94 if (srcRank < 0) srcRank += wsize;
96 /* Create groups for PSCW */
97 MPI_Comm_group( MPI_COMM_WORLD, &wgroup );
98 MPI_Group_incl( wgroup, 1, &destRank, &accessGroup );
99 MPI_Group_incl( wgroup, 1, &srcRank, &exposureGroup );
100 MPI_Group_free( &wgroup );
102 arraysize = maxSz * MAX_COUNT;
103 arraybuffer = (int*)malloc( arraysize * sizeof(int) );
105 fprintf( stderr, "Unable to allocate %d words\n", arraysize );
106 MPI_Abort( MPI_COMM_WORLD, 1 );
109 MPI_Win_create( arraybuffer, arraysize*sizeof(int), (int)sizeof(int),
110 MPI_INFO_NULL, MPI_COMM_WORLD, &win );
112 if (maxCount > MAX_COUNT) {
113 fprintf( stderr, "MaxCount must not exceed %d\n", MAX_COUNT );
114 MPI_Abort( MPI_COMM_WORLD, 1 );
117 if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_ACC)) {
118 for (sz=1; sz<=maxSz; sz = sz + sz) {
119 if (wrank == 0 && verbose)
120 printf( "Accumulate with fence, %d elements\n", sz );
122 while (cnt <= maxCount) {
123 RunAccFence( win, destRank, cnt, sz );
129 if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_ACC)) {
130 for (sz=1; sz<=maxSz; sz = sz + sz) {
131 if (wrank == 0 && verbose)
132 printf( "Accumulate with lock, %d elements\n", sz );
134 while (cnt <= maxCount) {
135 RunAccLock( win, destRank, cnt, sz );
141 if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_PUT)) {
142 for (sz=1; sz<=maxSz; sz = sz + sz) {
143 if (wrank == 0 && verbose)
144 printf( "Put with fence, %d elements\n", sz );
146 while (cnt <= maxCount) {
147 RunPutFence( win, destRank, cnt, sz );
153 if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_PUT)) {
154 for (sz=1; sz<=maxSz; sz = sz + sz) {
155 if (wrank == 0 && verbose)
156 printf( "Put with lock, %d elements\n", sz );
158 while (cnt <= maxCount) {
159 RunPutLock( win, destRank, cnt, sz );
165 if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_PUT)) {
166 for (sz=1; sz<=maxSz; sz = sz + sz) {
167 if (wrank == 0 && verbose)
168 printf( "Put with pscw, %d elements\n", sz );
170 while (cnt <= maxCount) {
171 RunPutPSCW( win, destRank, cnt, sz,
172 exposureGroup, accessGroup );
178 if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_ACC)) {
179 for (sz=1; sz<=maxSz; sz = sz + sz) {
180 if (wrank == 0 && verbose)
181 printf( "Accumulate with pscw, %d elements\n", sz );
183 while (cnt <= maxCount) {
184 RunAccPSCW( win, destRank, cnt, sz,
185 exposureGroup, accessGroup );
191 MPI_Win_free( &win );
193 MPI_Group_free( &accessGroup );
194 MPI_Group_free( &exposureGroup );
196 /* If we get here without timing out or failing, we succeeded */
197 if (wrank == 0) printf( " No Errors\n" );
204 void RunAccFence( MPI_Win win, int destRank, int cnt, int sz )
206 int k, i, j, one = 1;
208 for (k=0; k<MAX_RUNS; k++) {
209 MPI_Barrier( MPI_COMM_WORLD );
210 MPI_Win_fence( 0, win );
212 for (i=0; i<cnt; i++) {
213 MPI_Accumulate( &one, sz, MPI_INT, destRank,
214 j, sz, MPI_INT, MPI_SUM, win );
217 MPI_Win_fence( 0, win );
221 void RunAccLock( MPI_Win win, int destRank, int cnt, int sz )
223 int k, i, j, one = 1;
225 for (k=0; k<MAX_RUNS; k++) {
226 MPI_Barrier( MPI_COMM_WORLD );
227 MPI_Win_lock( MPI_LOCK_SHARED, destRank, 0, win );
229 for (i=0; i<cnt; i++) {
230 MPI_Accumulate( &one, sz, MPI_INT, destRank,
231 j, sz, MPI_INT, MPI_SUM, win );
234 MPI_Win_unlock( destRank, win );
238 void RunPutFence( MPI_Win win, int destRank, int cnt, int sz )
240 int k, i, j, one = 1;
242 for (k=0; k<MAX_RUNS; k++) {
243 MPI_Barrier( MPI_COMM_WORLD );
244 MPI_Win_fence( 0, win );
246 for (i=0; i<cnt; i++) {
247 MPI_Put( &one, sz, MPI_INT, destRank,
248 j, sz, MPI_INT, win );
251 MPI_Win_fence( 0, win );
255 void RunPutLock( MPI_Win win, int destRank, int cnt, int sz )
257 int k, i, j, one = 1;
259 for (k=0; k<MAX_RUNS; k++) {
260 MPI_Barrier( MPI_COMM_WORLD );
261 MPI_Win_lock( MPI_LOCK_SHARED, destRank, 0, win );
263 for (i=0; i<cnt; i++) {
264 MPI_Put( &one, sz, MPI_INT, destRank, j, sz, MPI_INT, win );
267 MPI_Win_unlock( destRank, win );
271 void RunPutPSCW( MPI_Win win, int destRank, int cnt, int sz,
272 MPI_Group exposureGroup, MPI_Group accessGroup )
274 int k, i, j, one = 1;
276 for (k=0; k<MAX_RUNS; k++) {
277 MPI_Barrier( MPI_COMM_WORLD );
278 MPI_Win_post( exposureGroup, 0, win );
279 MPI_Win_start( accessGroup, 0, win );
281 for (i=0; i<cnt; i++) {
282 MPI_Put( &one, sz, MPI_INT, destRank, j, sz, MPI_INT, win );
285 MPI_Win_complete( win );
290 void RunAccPSCW( MPI_Win win, int destRank, int cnt, int sz,
291 MPI_Group exposureGroup, MPI_Group accessGroup )
293 int k, i, j, one = 1;
295 for (k=0; k<MAX_RUNS; k++) {
296 MPI_Barrier( MPI_COMM_WORLD );
297 MPI_Win_post( exposureGroup, 0, win );
298 MPI_Win_start( accessGroup, 0, win );
300 for (i=0; i<cnt; i++) {
301 MPI_Accumulate( &one, sz, MPI_INT, destRank,
302 j, sz, MPI_INT, MPI_SUM, win );
305 MPI_Win_complete( win );