1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
3 * (C) 2010 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
7 /* This test is a simplification of the one in perf/manyrma.c that tests
8 for correct handling of the case where many RMA operations occur between
9 synchronization events.
10 This is one of the ways that RMA may be used, and is used in the
11 reference implementation of the graph500 benchmark.
18 #define MAX_COUNT 65536*4/16
19 #define MAX_RMA_SIZE 2 /* 16 in manyrma performance test */
21 #define MAX_ITER_TIME 5.0 /* seconds */
23 typedef enum { SYNC_NONE = 0,
24 SYNC_ALL = -1, SYNC_FENCE = 1, SYNC_LOCK = 2, SYNC_PSCW = 4
26 typedef enum { RMA_NONE = 0, RMA_ALL = -1, RMA_PUT = 1, RMA_ACC = 2, RMA_GET = 4 } rma_t;
27 /* Note GET not yet implemented */
28 /* By default, run only a subset of the available tests, to keep the
29 total runtime reasonably short. Command line arguments may be used
30 to run other tests. */
31 sync_t syncChoice = SYNC_FENCE;
32 rma_t rmaChoice = RMA_ACC;
34 static int verbose = 0;
36 void RunAccFence(MPI_Win win, int destRank, int cnt, int sz);
37 void RunAccLock(MPI_Win win, int destRank, int cnt, int sz);
38 void RunPutFence(MPI_Win win, int destRank, int cnt, int sz);
39 void RunPutLock(MPI_Win win, int destRank, int cnt, int sz);
40 void RunAccPSCW(MPI_Win win, int destRank, int cnt, int sz,
41 MPI_Group exposureGroup, MPI_Group accessGroup);
42 void RunPutPSCW(MPI_Win win, int destRank, int cnt, int sz,
43 MPI_Group exposureGroup, MPI_Group accessGroup);
45 int main(int argc, char *argv[])
47 int arraysize, i, cnt, sz, maxCount = MAX_COUNT, *arraybuffer;
48 int wrank, wsize, destRank, srcRank;
50 MPI_Group wgroup, accessGroup, exposureGroup;
51 int maxSz = MAX_RMA_SIZE;
54 MPI_Init(&argc, &argv);
56 for (i = 1; i < argc; i++) {
57 if (strcmp(argv[i], "-put") == 0) {
58 if (rmaChoice == RMA_ALL)
62 else if (strcmp(argv[i], "-acc") == 0) {
63 if (rmaChoice == RMA_ALL)
67 else if (strcmp(argv[i], "-fence") == 0) {
68 if (syncChoice == SYNC_ALL)
69 syncChoice = SYNC_NONE;
70 syncChoice |= SYNC_FENCE;
72 else if (strcmp(argv[i], "-lock") == 0) {
73 if (syncChoice == SYNC_ALL)
74 syncChoice = SYNC_NONE;
75 syncChoice |= SYNC_LOCK;
77 else if (strcmp(argv[i], "-pscw") == 0) {
78 if (syncChoice == SYNC_ALL)
79 syncChoice = SYNC_NONE;
80 syncChoice |= SYNC_PSCW;
82 else if (strcmp(argv[i], "-maxsz") == 0) {
84 maxSz = atoi(argv[i]);
86 else if (strcmp(argv[i], "-maxcount") == 0) {
88 maxCount = atoi(argv[i]);
91 fprintf(stderr, "Unrecognized argument %s\n", argv[i]);
93 "%s [ -put ] [ -acc ] [ -lock ] [ -fence ] [ -pscw ] [ -maxsz msgsize ]\n",
95 MPI_Abort(MPI_COMM_WORLD, 1);
99 MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
100 MPI_Comm_size(MPI_COMM_WORLD, &wsize);
101 destRank = wrank + 1;
102 while (destRank >= wsize)
103 destRank = destRank - wsize;
108 /* Create groups for PSCW */
109 MPI_Comm_group(MPI_COMM_WORLD, &wgroup);
110 MPI_Group_incl(wgroup, 1, &destRank, &accessGroup);
111 MPI_Group_incl(wgroup, 1, &srcRank, &exposureGroup);
112 MPI_Group_free(&wgroup);
114 arraysize = maxSz * MAX_COUNT;
115 arraybuffer = (int *) malloc(arraysize * sizeof(int));
117 fprintf(stderr, "Unable to allocate %d words\n", arraysize);
118 MPI_Abort(MPI_COMM_WORLD, 1);
121 MPI_Win_create(arraybuffer, arraysize * sizeof(int), (int) sizeof(int),
122 MPI_INFO_NULL, MPI_COMM_WORLD, &win);
124 if (maxCount > MAX_COUNT) {
125 fprintf(stderr, "MaxCount must not exceed %d\n", MAX_COUNT);
126 MPI_Abort(MPI_COMM_WORLD, 1);
129 if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_ACC)) {
130 for (sz = 1; sz <= maxSz; sz = sz + sz) {
131 if (wrank == 0 && verbose)
132 printf("Accumulate with fence, %d elements\n", sz);
133 for (cnt = 1; cnt <= maxCount; cnt *= 2) {
135 RunAccFence(win, destRank, cnt, sz);
137 if (end - start > MAX_ITER_TIME)
143 if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_ACC)) {
144 for (sz = 1; sz <= maxSz; sz = sz + sz) {
145 if (wrank == 0 && verbose)
146 printf("Accumulate with lock, %d elements\n", sz);
147 for (cnt = 1; cnt <= maxCount; cnt *= 2) {
149 RunAccLock(win, destRank, cnt, sz);
151 if (end - start > MAX_ITER_TIME)
157 if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_PUT)) {
158 for (sz = 1; sz <= maxSz; sz = sz + sz) {
159 if (wrank == 0 && verbose)
160 printf("Put with fence, %d elements\n", sz);
161 for (cnt = 1; cnt <= maxCount; cnt *= 2) {
163 RunPutFence(win, destRank, cnt, sz);
165 if (end - start > MAX_ITER_TIME)
171 if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_PUT)) {
172 for (sz = 1; sz <= maxSz; sz = sz + sz) {
173 if (wrank == 0 && verbose)
174 printf("Put with lock, %d elements\n", sz);
175 for (cnt = 1; cnt <= maxCount; cnt *= 2) {
177 RunPutLock(win, destRank, cnt, sz);
179 if (end - start > MAX_ITER_TIME)
185 if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_PUT)) {
186 for (sz = 1; sz <= maxSz; sz = sz + sz) {
187 if (wrank == 0 && verbose)
188 printf("Put with pscw, %d elements\n", sz);
189 for (cnt = 1; cnt <= maxCount; cnt *= 2) {
191 RunPutPSCW(win, destRank, cnt, sz, exposureGroup, accessGroup);
193 if (end - start > MAX_ITER_TIME)
199 if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_ACC)) {
200 for (sz = 1; sz <= maxSz; sz = sz + sz) {
201 if (wrank == 0 && verbose)
202 printf("Accumulate with pscw, %d elements\n", sz);
203 for (cnt = 1; cnt <= maxCount; cnt *= 2) {
205 RunAccPSCW(win, destRank, cnt, sz, exposureGroup, accessGroup);
207 if (end - start > MAX_ITER_TIME)
215 MPI_Group_free(&accessGroup);
216 MPI_Group_free(&exposureGroup);
218 /* If we get here without timing out or failing, we succeeded */
220 printf(" No Errors\n");
227 void RunAccFence(MPI_Win win, int destRank, int cnt, int sz)
229 int k, i, j, one = 1;
231 for (k = 0; k < MAX_RUNS; k++) {
232 MPI_Barrier(MPI_COMM_WORLD);
233 MPI_Win_fence(0, win);
235 for (i = 0; i < cnt; i++) {
236 MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
239 MPI_Win_fence(0, win);
243 void RunAccLock(MPI_Win win, int destRank, int cnt, int sz)
245 int k, i, j, one = 1;
247 for (k = 0; k < MAX_RUNS; k++) {
248 MPI_Barrier(MPI_COMM_WORLD);
249 MPI_Win_lock(MPI_LOCK_SHARED, destRank, 0, win);
251 for (i = 0; i < cnt; i++) {
252 MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
255 MPI_Win_unlock(destRank, win);
259 void RunPutFence(MPI_Win win, int destRank, int cnt, int sz)
261 int k, i, j, one = 1;
263 for (k = 0; k < MAX_RUNS; k++) {
264 MPI_Barrier(MPI_COMM_WORLD);
265 MPI_Win_fence(0, win);
267 for (i = 0; i < cnt; i++) {
268 MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
271 MPI_Win_fence(0, win);
275 void RunPutLock(MPI_Win win, int destRank, int cnt, int sz)
277 int k, i, j, one = 1;
279 for (k = 0; k < MAX_RUNS; k++) {
280 MPI_Barrier(MPI_COMM_WORLD);
281 MPI_Win_lock(MPI_LOCK_SHARED, destRank, 0, win);
283 for (i = 0; i < cnt; i++) {
284 MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
287 MPI_Win_unlock(destRank, win);
291 void RunPutPSCW(MPI_Win win, int destRank, int cnt, int sz,
292 MPI_Group exposureGroup, MPI_Group accessGroup)
294 int k, i, j, one = 1;
296 for (k = 0; k < MAX_RUNS; k++) {
297 MPI_Barrier(MPI_COMM_WORLD);
298 MPI_Win_post(exposureGroup, 0, win);
299 MPI_Win_start(accessGroup, 0, win);
301 for (i = 0; i < cnt; i++) {
302 MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
305 MPI_Win_complete(win);
310 void RunAccPSCW(MPI_Win win, int destRank, int cnt, int sz,
311 MPI_Group exposureGroup, MPI_Group accessGroup)
313 int k, i, j, one = 1;
315 for (k = 0; k < MAX_RUNS; k++) {
316 MPI_Barrier(MPI_COMM_WORLD);
317 MPI_Win_post(exposureGroup, 0, win);
318 MPI_Win_start(accessGroup, 0, win);
320 for (i = 0; i < cnt; i++) {
321 MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
324 MPI_Win_complete(win);