1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * (C) 2010 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
7 /* This test is a simplification of the one in perf/manyrma.c that tests
8 for correct handling of the case where many RMA operations occur between
9 synchronization events.
10 This is one of the ways that RMA may be used, and is used in the
11 reference implementation of the graph500 benchmark.
18 #define MAX_COUNT 65536*4/16
19 #define MAX_RMA_SIZE 2 /* 16 in manyrma performance test */
21 #define MAX_ITER_TIME 5.0 /* seconds */
23 typedef enum { SYNC_NONE = 0,
24 SYNC_ALL = -1, SYNC_FENCE = 1, SYNC_LOCK = 2, SYNC_PSCW = 4
26 typedef enum { RMA_NONE = 0, RMA_ALL = -1, RMA_PUT = 1, RMA_ACC = 2, RMA_GET = 4 } rma_t;
27 /* Note GET not yet implemented */
28 /* By default, run only a subset of the available tests, to keep the
29 total runtime reasonably short. Command line arguments may be used
30 to run other tests. */
31 sync_t syncChoice = SYNC_FENCE;
32 rma_t rmaChoice = RMA_ACC;
34 static int verbose = 0;
36 void RunAccFence(MPI_Win win, int destRank, int cnt, int sz);
37 void RunAccLock(MPI_Win win, int destRank, int cnt, int sz);
38 void RunPutFence(MPI_Win win, int destRank, int cnt, int sz);
39 void RunPutLock(MPI_Win win, int destRank, int cnt, int sz);
40 void RunAccPSCW(MPI_Win win, int destRank, int cnt, int sz,
41 MPI_Group exposureGroup, MPI_Group accessGroup);
42 void RunPutPSCW(MPI_Win win, int destRank, int cnt, int sz,
43 MPI_Group exposureGroup, MPI_Group accessGroup);
45 int main(int argc, char *argv[])
47 int arraysize, i, cnt, sz, maxCount = MAX_COUNT, *arraybuffer;
48 int wrank, wsize, destRank, srcRank;
50 MPI_Group wgroup, accessGroup, exposureGroup;
51 int maxSz = MAX_RMA_SIZE;
54 MPI_Init(&argc, &argv);
56 for (i = 1; i < argc; i++) {
57 if (strcmp(argv[i], "-put") == 0) {
58 if (rmaChoice == RMA_ALL)
62 else if (strcmp(argv[i], "-acc") == 0) {
63 if (rmaChoice == RMA_ALL)
67 else if (strcmp(argv[i], "-fence") == 0) {
68 if (syncChoice == SYNC_ALL)
69 syncChoice = SYNC_NONE;
70 syncChoice |= SYNC_FENCE;
72 else if (strcmp(argv[i], "-lock") == 0) {
73 if (syncChoice == SYNC_ALL)
74 syncChoice = SYNC_NONE;
75 syncChoice |= SYNC_LOCK;
77 else if (strcmp(argv[i], "-pscw") == 0) {
78 if (syncChoice == SYNC_ALL)
79 syncChoice = SYNC_NONE;
80 syncChoice |= SYNC_PSCW;
82 else if (strcmp(argv[i], "-maxsz") == 0) {
84 maxSz = atoi(argv[i]);
86 else if (strcmp(argv[i], "-maxcount") == 0) {
88 maxCount = atoi(argv[i]);
91 fprintf(stderr, "Unrecognized argument %s\n", argv[i]);
93 "%s [ -put ] [ -acc ] [ -lock ] [ -fence ] [ -pscw ] [ -maxsz msgsize ]\n",
95 MPI_Abort(MPI_COMM_WORLD, 1);
99 MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
100 MPI_Comm_size(MPI_COMM_WORLD, &wsize);
101 destRank = wrank + 1;
102 while (destRank >= wsize)
103 destRank = destRank - wsize;
108 /* Create groups for PSCW */
109 MPI_Comm_group(MPI_COMM_WORLD, &wgroup);
110 MPI_Group_incl(wgroup, 1, &destRank, &accessGroup);
111 MPI_Group_incl(wgroup, 1, &srcRank, &exposureGroup);
112 MPI_Group_free(&wgroup);
114 arraysize = maxSz * MAX_COUNT;
115 #ifdef USE_WIN_ALLOCATE
116 MPI_Win_allocate(arraysize * sizeof(int), (int) sizeof(int), MPI_INFO_NULL,
117 MPI_COMM_WORLD, &arraybuffer, &win);
119 fprintf(stderr, "Unable to allocate %d words\n", arraysize);
120 MPI_Abort(MPI_COMM_WORLD, 1);
123 arraybuffer = (int *) malloc(arraysize * sizeof(int));
125 fprintf(stderr, "Unable to allocate %d words\n", arraysize);
126 MPI_Abort(MPI_COMM_WORLD, 1);
129 MPI_Win_create(arraybuffer, arraysize * sizeof(int), (int) sizeof(int),
130 MPI_INFO_NULL, MPI_COMM_WORLD, &win);
133 if (maxCount > MAX_COUNT) {
134 fprintf(stderr, "MaxCount must not exceed %d\n", MAX_COUNT);
135 MPI_Abort(MPI_COMM_WORLD, 1);
138 if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_ACC)) {
139 for (sz = 1; sz <= maxSz; sz = sz + sz) {
140 if (wrank == 0 && verbose)
141 printf("Accumulate with fence, %d elements\n", sz);
142 for (cnt = 1; cnt <= maxCount; cnt *= 2) {
144 RunAccFence(win, destRank, cnt, sz);
146 if (end - start > MAX_ITER_TIME)
152 if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_ACC)) {
153 for (sz = 1; sz <= maxSz; sz = sz + sz) {
154 if (wrank == 0 && verbose)
155 printf("Accumulate with lock, %d elements\n", sz);
156 for (cnt = 1; cnt <= maxCount; cnt *= 2) {
158 RunAccLock(win, destRank, cnt, sz);
160 if (end - start > MAX_ITER_TIME)
166 if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_PUT)) {
167 for (sz = 1; sz <= maxSz; sz = sz + sz) {
168 if (wrank == 0 && verbose)
169 printf("Put with fence, %d elements\n", sz);
170 for (cnt = 1; cnt <= maxCount; cnt *= 2) {
172 RunPutFence(win, destRank, cnt, sz);
174 if (end - start > MAX_ITER_TIME)
180 if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_PUT)) {
181 for (sz = 1; sz <= maxSz; sz = sz + sz) {
182 if (wrank == 0 && verbose)
183 printf("Put with lock, %d elements\n", sz);
184 for (cnt = 1; cnt <= maxCount; cnt *= 2) {
186 RunPutLock(win, destRank, cnt, sz);
188 if (end - start > MAX_ITER_TIME)
194 if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_PUT)) {
195 for (sz = 1; sz <= maxSz; sz = sz + sz) {
196 if (wrank == 0 && verbose)
197 printf("Put with pscw, %d elements\n", sz);
198 for (cnt = 1; cnt <= maxCount; cnt *= 2) {
200 RunPutPSCW(win, destRank, cnt, sz, exposureGroup, accessGroup);
202 if (end - start > MAX_ITER_TIME)
208 if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_ACC)) {
209 for (sz = 1; sz <= maxSz; sz = sz + sz) {
210 if (wrank == 0 && verbose)
211 printf("Accumulate with pscw, %d elements\n", sz);
212 for (cnt = 1; cnt <= maxCount; cnt *= 2) {
214 RunAccPSCW(win, destRank, cnt, sz, exposureGroup, accessGroup);
216 if (end - start > MAX_ITER_TIME)
224 #ifndef USE_WIN_ALLOCATE
228 MPI_Group_free(&accessGroup);
229 MPI_Group_free(&exposureGroup);
231 /* If we get here without timing out or failing, we succeeded */
233 printf(" No Errors\n");
240 void RunAccFence(MPI_Win win, int destRank, int cnt, int sz)
242 int k, i, j, one = 1;
244 for (k = 0; k < MAX_RUNS; k++) {
245 MPI_Barrier(MPI_COMM_WORLD);
246 MPI_Win_fence(0, win);
248 for (i = 0; i < cnt; i++) {
249 MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
252 MPI_Win_fence(0, win);
256 void RunAccLock(MPI_Win win, int destRank, int cnt, int sz)
258 int k, i, j, one = 1;
260 for (k = 0; k < MAX_RUNS; k++) {
261 MPI_Barrier(MPI_COMM_WORLD);
262 MPI_Win_lock(MPI_LOCK_SHARED, destRank, 0, win);
264 for (i = 0; i < cnt; i++) {
265 MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
268 MPI_Win_unlock(destRank, win);
272 void RunPutFence(MPI_Win win, int destRank, int cnt, int sz)
274 int k, i, j, one = 1;
276 for (k = 0; k < MAX_RUNS; k++) {
277 MPI_Barrier(MPI_COMM_WORLD);
278 MPI_Win_fence(0, win);
280 for (i = 0; i < cnt; i++) {
281 MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
284 MPI_Win_fence(0, win);
288 void RunPutLock(MPI_Win win, int destRank, int cnt, int sz)
290 int k, i, j, one = 1;
292 for (k = 0; k < MAX_RUNS; k++) {
293 MPI_Barrier(MPI_COMM_WORLD);
294 MPI_Win_lock(MPI_LOCK_SHARED, destRank, 0, win);
296 for (i = 0; i < cnt; i++) {
297 MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
300 MPI_Win_unlock(destRank, win);
304 void RunPutPSCW(MPI_Win win, int destRank, int cnt, int sz,
305 MPI_Group exposureGroup, MPI_Group accessGroup)
307 int k, i, j, one = 1;
309 for (k = 0; k < MAX_RUNS; k++) {
310 MPI_Barrier(MPI_COMM_WORLD);
311 MPI_Win_post(exposureGroup, 0, win);
312 MPI_Win_start(accessGroup, 0, win);
314 for (i = 0; i < cnt; i++) {
315 MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
318 MPI_Win_complete(win);
323 void RunAccPSCW(MPI_Win win, int destRank, int cnt, int sz,
324 MPI_Group exposureGroup, MPI_Group accessGroup)
326 int k, i, j, one = 1;
328 for (k = 0; k < MAX_RUNS; k++) {
329 MPI_Barrier(MPI_COMM_WORLD);
330 MPI_Win_post(exposureGroup, 0, win);
331 MPI_Win_start(accessGroup, 0, win);
333 for (i = 0; i < cnt; i++) {
334 MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
337 MPI_Win_complete(win);