teshsuite/smpi/mpich3-test/coll/nonblocking3.c

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
   2 /*
   3  *  (C) 2011 by Argonne National Laboratory.
   4  *      See COPYRIGHT in top-level directory.
   5  */
   6
   7 /* This test attempts to execute multiple simultaneous nonblocking collective
   8  * (NBC) MPI routines at the same time, and manages their completion with a
   9  * variety of routines (MPI_{Wait,Test}{,_all,_any,_some}).  It also throws a
  10  * few point-to-point operations into the mix.
  11  *
  12  * Possible improvements:
  13  * - post operations on multiple comms from multiple threads
  14  */
  15
  16 #include "mpi.h"
  17 #include <stdlib.h>
  18 #include <stdio.h>
  19 #include <string.h>
  20 #include <assert.h>
  21 /* USE_STRICT_MPI may be defined in mpitestconf.h */
  22 #include "mpitestconf.h"
  23
  24 #ifdef HAVE_UNISTD_H
  25 #include <unistd.h>
  26 #endif
  27
  28 static int errs = 0;
  29
  30 /* Constants that control the high level test harness behavior. */
  31 /* MAIN_ITERATIONS is how many NBC ops the test will attempt to issue. */
  32 #define MAIN_ITERATIONS (100000)
  33 /* WINDOW is the maximum number of outstanding NBC requests at any given time */
  34 #define WINDOW (20)
  35 /* we sleep with probability 1/CHANCE_OF_SLEEP */
  36 #define CHANCE_OF_SLEEP (1000)
  37 /* JITTER_DELAY is denominated in microseconds (us) */
  38 #define JITTER_DELAY (50000) /* 0.05 seconds */
  39 /* NUM_COMMS is the number of communicators on which ops will be posted */
  40 #define NUM_COMMS (4)
  41
  42 /* Constants that control behavior of the individual testing operations.
  43  * Altering these can help to explore the testing space, but increasing them too
  44  * much can consume too much memory (often O(n^2) usage). */
  45 /* FIXME is COUNT==10 too limiting? should we try a larger count too (~500)? */
  46 #define COUNT (10)
  47 #define PRIME (17)
  48
  49 #define my_assert(cond_)                                                                 \
  50     do {                                                                                 \
  51         if (!(cond_)) {                                                                  \
  52             ++errs;                                                                      \
  53             if (errs < 10) {                                                             \
  54                 fprintf(stderr, "assertion (%s) failed on line %d\n", #cond_, __LINE__); \
  55             }                                                                            \
  56         }                                                                                \
  57     } while (0)
  58
  59 /* Since MPICH is currently the only NBC implementation in existence, just use
  60  * this quick-and-dirty #ifdef to decide whether to test the nonblocking
  61  * collectives.  Eventually we can add a configure option or configure test, or
  62  * the MPI-3 standard will be released and these can be gated on a MPI_VERSION
  63  * check */
  64 #if !defined(USE_STRICT_MPI) && defined(MPICH)
  65 #define TEST_NBC_ROUTINES 1
  66 #endif
  67
  68 #if defined(TEST_NBC_ROUTINES)
  69 /* Intended to act like "rand_r", but we can be sure that it will exist and be
  70  * consistent across all of comm world.  Returns a number in the range
  71  * [0,GEN_PRN_MAX] */
  72 #define GEN_PRN_MAX (4294967291-1)
  73 static unsigned int gen_prn(unsigned int x)
  74 {
  75     /* a simple "multiplicative congruential method" PRNG, with parameters:
  76      *   m=4294967291, largest 32-bit prime
  77      *   a=279470273, good primitive root of m from "TABLES OF LINEAR
  78      *                CONGRUENTIAL GENERATORS OF DIFFERENT SIZES AND GOOD
  79      *                LATTICE STRUCTURE", by Pierre L’Ecuyer */
  80     return (279470273UL * (unsigned long)x) % 4294967291UL;
  81 }
  82
  83 /* given a random unsigned int value "rndval_" from gen_prn, this evaluates to a
  84  * value in the range [min_,max_) */
  85 #define rand_range(rndval_,min_,max_) \
  86     ((unsigned int)((min_) + ((rndval_) * (1.0 / (GEN_PRN_MAX+1.0)) * ((max_) - (min_)))))
  87
  88
  89 static void sum_fn(void *invec, void *inoutvec, int *len, MPI_Datatype *datatype)
  90 {
  91     int i;
  92     int *in = invec;
  93     int *inout = inoutvec;
  94     for (i = 0; i < *len; ++i) {
  95         inout[i] = in[i] + inout[i];
  96     }
  97 }
  98
  99 /* used to keep track of buffers that should be freed after the corresponding
 100  * operation has completed */
 101 struct laundry {
 102     int case_num; /* which test case initiated this req/laundry */
 103     MPI_Comm comm;
 104     int *buf;
 105     int *recvbuf;
 106     int *sendcounts;
 107     int *recvcounts;
 108     int *sdispls;
 109     int *rdispls;
 110     int *sendtypes;
 111     int *recvtypes;
 112 };
 113
 114 static void cleanup_laundry(struct laundry *l)
 115 {
 116     l->case_num = -1;
 117     l->comm = MPI_COMM_NULL;
 118     if (l->buf) free(l->buf);
 119     if (l->recvbuf) free(l->recvbuf);
 120     if (l->sendcounts) free(l->sendcounts);
 121     if (l->recvcounts) free(l->recvcounts);
 122     if (l->sdispls) free(l->sdispls);
 123     if (l->rdispls) free(l->rdispls);
 124     if (l->sendtypes) free(l->sendtypes);
 125     if (l->recvtypes) free(l->recvtypes);
 126 }
 127
 128 /* Starts a "random" operation on "comm" corresponding to "rndnum" and returns
 129  * in (*req) a request handle corresonding to that operation.  This call should
 130  * be considered collective over comm (with a consistent value for "rndnum"),
 131  * even though the operation may only be a point-to-point request. */
 132 static void start_random_nonblocking(MPI_Comm comm, unsigned int rndnum, MPI_Request *req, struct laundry *l)
 133 {
 134     int i, j;
 135     int rank, size;
 136     int *buf = NULL;
 137     int *recvbuf = NULL;
 138     int *sendcounts = NULL;
 139     int *recvcounts = NULL;
 140     int *sdispls = NULL;
 141     int *rdispls = NULL;
 142     int *sendtypes = NULL;
 143     int *recvtypes = NULL;
 144     char *buf_alias = NULL;
 145
 146     MPI_Comm_rank(comm, &rank);
 147     MPI_Comm_size(comm, &size);
 148
 149     *req = MPI_REQUEST_NULL;
 150
 151     l->case_num = -1;
 152     l->comm = comm;
 153
 154     l->buf        = buf        = malloc(COUNT*size*sizeof(int));
 155     l->recvbuf    = recvbuf    = malloc(COUNT*size*sizeof(int));
 156     l->sendcounts = sendcounts = malloc(size*sizeof(int));
 157     l->recvcounts = recvcounts = malloc(size*sizeof(int));
 158     l->sdispls    = sdispls    = malloc(size*sizeof(int));
 159     l->rdispls    = rdispls    = malloc(size*sizeof(int));
 160     l->sendtypes  = sendtypes  = malloc(size*sizeof(MPI_Datatype));
 161     l->recvtypes  = recvtypes  = malloc(size*sizeof(MPI_Datatype));
 162
 163 #define NUM_CASES (21)
 164     l->case_num = rand_range(rndnum, 0, NUM_CASES);
 165     switch (l->case_num) {
 166         case 0: /* MPI_Ibcast */
 167             for (i = 0; i < COUNT; ++i) {
 168                 if (rank == 0) {
 169                     buf[i] = i;
 170                 }
 171                 else {
 172                     buf[i] = 0xdeadbeef;
 173                 }
 174             }
 175             MPI_Ibcast(buf, COUNT, MPI_INT, 0, comm, req);
 176             break;
 177
 178         case 1: /* MPI_Ibcast (again, but designed to stress scatter/allgather impls) */
 179             /* FIXME fiddle with PRIME and buffer allocation s.t. PRIME is much larger (1021?) */
 180             buf_alias = (char *)buf;
 181             my_assert(COUNT*size*sizeof(int) > PRIME); /* sanity */
 182             for (i = 0; i < PRIME; ++i) {
 183                 if (rank == 0)
 184                     buf_alias[i] = i;
 185                 else
 186                     buf_alias[i] = 0xdb;
 187             }
 188             for (i = PRIME; i < COUNT * size * sizeof(int); ++i) {
 189                 buf_alias[i] = 0xbf;
 190             }
 191             MPI_Ibcast(buf, PRIME, MPI_SIGNED_CHAR, 0, comm, req);
 192             break;
 193
 194         case 2: /* MPI_Ibarrier */
 195             MPI_Ibarrier(comm, req);
 196             break;
 197
 198         case 3: /* MPI_Ireduce */
 199             for (i = 0; i < COUNT; ++i) {
 200                 buf[i] = rank + i;
 201                 recvbuf[i] = 0xdeadbeef;
 202             }
 203             MPI_Ireduce(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, 0, comm, req);
 204             break;
 205
 206         case 4: /* same again, use a user op and free it before the wait */
 207             {
 208                 MPI_Op op = MPI_OP_NULL;
 209                 MPI_Op_create(sum_fn, /*commute=*/1, &op);
 210                 for (i = 0; i < COUNT; ++i) {
 211                     buf[i] = rank + i;
 212                     recvbuf[i] = 0xdeadbeef;
 213                 }
 214                 MPI_Ireduce(buf, recvbuf, COUNT, MPI_INT, op, 0, comm, req);
 215                 MPI_Op_free(&op);
 216             }
 217             break;
 218
 219         case 5: /* MPI_Iallreduce */
 220             for (i = 0; i < COUNT; ++i) {
 221                 buf[i] = rank + i;
 222                 recvbuf[i] = 0xdeadbeef;
 223             }
 224             MPI_Iallreduce(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req);
 225             break;
 226
 227         case 6: /* MPI_Ialltoallv (a weak test, neither irregular nor sparse) */
 228             for (i = 0; i < size; ++i) {
 229                 sendcounts[i] = COUNT;
 230                 recvcounts[i] = COUNT;
 231                 sdispls[i] = COUNT * i;
 232                 rdispls[i] = COUNT * i;
 233                 for (j = 0; j < COUNT; ++j) {
 234                     buf[i*COUNT+j] = rank + (i * j);
 235                     recvbuf[i*COUNT+j] = 0xdeadbeef;
 236                 }
 237             }
 238             MPI_Ialltoallv(buf, sendcounts, sdispls, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, comm, req);
 239             break;
 240
 241         case 7: /* MPI_Igather */
 242             for (i = 0; i < size*COUNT; ++i) {
 243                 buf[i] = rank + i;
 244                 recvbuf[i] = 0xdeadbeef;
 245             }
 246             MPI_Igather(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req);
 247             break;
 248
 249         case 8: /* same test again, just use a dup'ed datatype and free it before the wait */
 250             {
 251                 MPI_Datatype type = MPI_DATATYPE_NULL;
 252                 MPI_Type_dup(MPI_INT, &type);
 253                 for (i = 0; i < size*COUNT; ++i) {
 254                     buf[i] = rank + i;
 255                     recvbuf[i] = 0xdeadbeef;
 256                 }
 257                 MPI_Igather(buf, COUNT, MPI_INT, recvbuf, COUNT, type, 0, comm, req);
 258                 MPI_Type_free(&type); /* should cause implementations that don't refcount
 259                                          correctly to blow up or hang in the wait */
 260             }
 261             break;
 262
 263         case 9: /* MPI_Iscatter */
 264             for (i = 0; i < size; ++i) {
 265                 for (j = 0; j < COUNT; ++j) {
 266                     if (rank == 0)
 267                         buf[i*COUNT+j] = i + j;
 268                     else
 269                         buf[i*COUNT+j] = 0xdeadbeef;
 270                     recvbuf[i*COUNT+j] = 0xdeadbeef;
 271                 }
 272             }
 273             MPI_Iscatter(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req);
 274             break;
 275
 276         case 10: /* MPI_Iscatterv */
 277             for (i = 0; i < size; ++i) {
 278                 /* weak test, just test the regular case where all counts are equal */
 279                 sendcounts[i] = COUNT;
 280                 sdispls[i] = i * COUNT;
 281                 for (j = 0; j < COUNT; ++j) {
 282                     if (rank == 0)
 283                         buf[i*COUNT+j] = i + j;
 284                     else
 285                         buf[i*COUNT+j] = 0xdeadbeef;
 286                     recvbuf[i*COUNT+j] = 0xdeadbeef;
 287                 }
 288             }
 289             MPI_Iscatterv(buf, sendcounts, sdispls, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req);
 290             break;
 291
 292         case 11: /* MPI_Ireduce_scatter */
 293             for (i = 0; i < size; ++i) {
 294                 recvcounts[i] = COUNT;
 295                 for (j = 0; j < COUNT; ++j) {
 296                     buf[i*COUNT+j] = rank + i;
 297                     recvbuf[i*COUNT+j] = 0xdeadbeef;
 298                 }
 299             }
 300             MPI_Ireduce_scatter(buf, recvbuf, recvcounts, MPI_INT, MPI_SUM, comm, req);
 301             break;
 302
 303         case 12: /* MPI_Ireduce_scatter_block */
 304             for (i = 0; i < size; ++i) {
 305                 for (j = 0; j < COUNT; ++j) {
 306                     buf[i*COUNT+j] = rank + i;
 307                     recvbuf[i*COUNT+j] = 0xdeadbeef;
 308                 }
 309             }
 310             MPI_Ireduce_scatter_block(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req);
 311             break;
 312
 313         case 13: /* MPI_Igatherv */
 314             for (i = 0; i < size*COUNT; ++i) {
 315                 buf[i] = 0xdeadbeef;
 316                 recvbuf[i] = 0xdeadbeef;
 317             }
 318             for (i = 0; i < COUNT; ++i) {
 319                 buf[i] = rank + i;
 320             }
 321             for (i = 0; i < size; ++i) {
 322                 recvcounts[i] = COUNT;
 323                 rdispls[i] = i * COUNT;
 324             }
 325             MPI_Igatherv(buf, COUNT, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, 0, comm, req);
 326             break;
 327
 328         case 14: /* MPI_Ialltoall */
 329             for (i = 0; i < size; ++i) {
 330                 for (j = 0; j < COUNT; ++j) {
 331                     buf[i*COUNT+j] = rank + (i * j);
 332                     recvbuf[i*COUNT+j] = 0xdeadbeef;
 333                 }
 334             }
 335             MPI_Ialltoall(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, comm, req);
 336             break;
 337
 338         case 15: /* MPI_Iallgather */
 339             for (i = 0; i < size*COUNT; ++i) {
 340                 buf[i] = rank + i;
 341                 recvbuf[i] = 0xdeadbeef;
 342             }
 343             MPI_Iallgather(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, comm, req);
 344             break;
 345
 346         case 16: /* MPI_Iallgatherv */
 347             for (i = 0; i < size; ++i) {
 348                 for (j = 0; j < COUNT; ++j) {
 349                     recvbuf[i*COUNT+j] = 0xdeadbeef;
 350                 }
 351                 recvcounts[i] = COUNT;
 352                 rdispls[i] = i * COUNT;
 353             }
 354             for (i = 0; i < COUNT; ++i)
 355                 buf[i] = rank + i;
 356             MPI_Iallgatherv(buf, COUNT, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, comm, req);
 357             break;
 358
 359         case 17: /* MPI_Iscan */
 360             for (i = 0; i < COUNT; ++i) {
 361                 buf[i] = rank + i;
 362                 recvbuf[i] = 0xdeadbeef;
 363             }
 364             MPI_Iscan(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req);
 365             break;
 366
 367         case 18: /* MPI_Iexscan */
 368             for (i = 0; i < COUNT; ++i) {
 369                 buf[i] = rank + i;
 370                 recvbuf[i] = 0xdeadbeef;
 371             }
 372             MPI_Iexscan(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req);
 373             break;
 374
 375         case 19: /* MPI_Ialltoallw (a weak test, neither irregular nor sparse) */
 376             for (i = 0; i < size; ++i) {
 377                 sendcounts[i] = COUNT;
 378                 recvcounts[i] = COUNT;
 379                 sdispls[i] = COUNT * i * sizeof(int);
 380                 rdispls[i] = COUNT * i * sizeof(int);
 381                 sendtypes[i] = MPI_INT;
 382                 recvtypes[i] = MPI_INT;
 383                 for (j = 0; j < COUNT; ++j) {
 384                     buf[i*COUNT+j] = rank + (i * j);
 385                     recvbuf[i*COUNT+j] = 0xdeadbeef;
 386                 }
 387             }
 388             MPI_Ialltoallw(buf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, req);
 389             break;
 390
 391         case 20: /* basic pt2pt MPI_Isend/MPI_Irecv pairing */
 392             /* even ranks send to odd ranks, but only if we have a full pair */
 393             if ((rank % 2 != 0) || (rank != size-1)) {
 394                 for (j = 0; j < COUNT; ++j) {
 395                     buf[j] = j;
 396                     recvbuf[j] = 0xdeadbeef;
 397                 }
 398                 if (rank % 2 == 0)
 399                     MPI_Isend(buf, COUNT, MPI_INT, rank+1, 5, comm, req);
 400                 else
 401                     MPI_Irecv(recvbuf, COUNT, MPI_INT, rank-1, 5, comm, req);
 402             }
 403             break;
 404
 405         default:
 406             fprintf(stderr, "unexpected value for l->case_num=%d)\n", (l->case_num));
 407             MPI_Abort(comm, 1);
 408             exit(1);
 409             break;
 410     }
 411 }
 412
 413 static void check_after_completion(struct laundry *l)
 414 {
 415     int i, j;
 416     int rank, size;
 417     MPI_Comm comm   = l->comm;
 418     int *buf        = l->buf;
 419     int *recvbuf    = l->recvbuf;
 420     int *sendcounts = l->sendcounts;
 421     int *recvcounts = l->recvcounts;
 422     int *sdispls    = l->sdispls;
 423     int *rdispls    = l->rdispls;
 424     int *sendtypes  = l->sendtypes;
 425     int *recvtypes  = l->recvtypes;
 426     char *buf_alias = (char *)buf;
 427
 428     MPI_Comm_rank(comm, &rank);
 429     MPI_Comm_size(comm, &size);
 430
 431     /* these cases all correspond to cases in start_random_nonblocking */
 432     switch (l->case_num) {
 433         case 0: /* MPI_Ibcast */
 434             for (i = 0; i < COUNT; ++i) {
 435                 if (buf[i] != i)
 436                     printf("buf[%d]=%d i=%d\n", i, buf[i], i);
 437                 my_assert(buf[i] == i);
 438             }
 439             break;
 440
 441         case 1: /* MPI_Ibcast (again, but designed to stress scatter/allgather impls) */
 442             for (i = 0; i < PRIME; ++i) {
 443                 if (buf_alias[i] != i)
 444                     printf("buf_alias[%d]=%d i=%d\n", i, buf_alias[i], i);
 445                 my_assert(buf_alias[i] == i);
 446             }
 447             break;
 448
 449         case 2: /* MPI_Ibarrier */
 450             /* nothing to check */
 451             break;
 452
 453         case 3: /* MPI_Ireduce */
 454             if (rank == 0) {
 455                 for (i = 0; i < COUNT; ++i) {
 456                     if (recvbuf[i] != ((size * (size-1) / 2) + (i * size)))
 457                         printf("got recvbuf[%d]=%d, expected %d\n", i, recvbuf[i], ((size * (size-1) / 2) + (i * size)));
 458                     my_assert(recvbuf[i] == ((size * (size-1) / 2) + (i * size)));
 459                 }
 460             }
 461             break;
 462
 463         case 4: /* same again, use a user op and free it before the wait */
 464             if (rank == 0) {
 465                 for (i = 0; i < COUNT; ++i) {
 466                     if (recvbuf[i] != ((size * (size-1) / 2) + (i * size)))
 467                         printf("got recvbuf[%d]=%d, expected %d\n", i, recvbuf[i], ((size * (size-1) / 2) + (i * size)));
 468                     my_assert(recvbuf[i] == ((size * (size-1) / 2) + (i * size)));
 469                 }
 470             }
 471             break;
 472
 473         case 5: /* MPI_Iallreduce */
 474             for (i = 0; i < COUNT; ++i) {
 475                 if (recvbuf[i] != ((size * (size-1) / 2) + (i * size)))
 476                     printf("got recvbuf[%d]=%d, expected %d\n", i, recvbuf[i], ((size * (size-1) / 2) + (i * size)));
 477                 my_assert(recvbuf[i] == ((size * (size-1) / 2) + (i * size)));
 478             }
 479             break;
 480
 481         case 6: /* MPI_Ialltoallv (a weak test, neither irregular nor sparse) */
 482             for (i = 0; i < size; ++i) {
 483                 for (j = 0; j < COUNT; ++j) {
 484                     /*printf("recvbuf[%d*COUNT+%d]=%d, expecting %d\n", i, j, recvbuf[i*COUNT+j], (i + (rank * j)));*/
 485                     my_assert(recvbuf[i*COUNT+j] == (i + (rank * j)));
 486                 }
 487             }
 488             break;
 489
 490         case 7: /* MPI_Igather */
 491             if (rank == 0) {
 492                 for (i = 0; i < size; ++i) {
 493                     for (j = 0; j < COUNT; ++j) {
 494                         my_assert(recvbuf[i*COUNT+j] == i + j);
 495                     }
 496                 }
 497             }
 498             else {
 499                 for (i = 0; i < size*COUNT; ++i) {
 500                     my_assert(recvbuf[i] == 0xdeadbeef);
 501                 }
 502             }
 503             break;
 504
 505         case 8: /* same test again, just use a dup'ed datatype and free it before the wait */
 506             if (rank == 0) {
 507                 for (i = 0; i < size; ++i) {
 508                     for (j = 0; j < COUNT; ++j) {
 509                         my_assert(recvbuf[i*COUNT+j] == i + j);
 510                     }
 511                 }
 512             }
 513             else {
 514                 for (i = 0; i < size*COUNT; ++i) {
 515                     my_assert(recvbuf[i] == 0xdeadbeef);
 516                 }
 517             }
 518             break;
 519
 520         case 9: /* MPI_Iscatter */
 521             for (j = 0; j < COUNT; ++j) {
 522                 my_assert(recvbuf[j] == rank + j);
 523             }
 524             if (rank != 0) {
 525                 for (i = 0; i < size*COUNT; ++i) {
 526                     /* check we didn't corrupt the sendbuf somehow */
 527                     my_assert(buf[i] == 0xdeadbeef);
 528                 }
 529             }
 530             break;
 531
 532         case 10: /* MPI_Iscatterv */
 533             for (j = 0; j < COUNT; ++j) {
 534                 my_assert(recvbuf[j] == rank + j);
 535             }
 536             if (rank != 0) {
 537                 for (i = 0; i < size*COUNT; ++i) {
 538                     /* check we didn't corrupt the sendbuf somehow */
 539                     my_assert(buf[i] == 0xdeadbeef);
 540                 }
 541             }
 542             for (i = 1; i < size; ++i) {
 543                 for (j = 0; j < COUNT; ++j) {
 544                     /* check we didn't corrupt the rest of the recvbuf */
 545                     my_assert(recvbuf[i*COUNT+j] == 0xdeadbeef);
 546                 }
 547             }
 548             break;
 549
 550         case 11: /* MPI_Ireduce_scatter */
 551             for (j = 0; j < COUNT; ++j) {
 552                 my_assert(recvbuf[j] == (size * rank + ((size - 1) * size) / 2));
 553             }
 554             for (i = 1; i < size; ++i) {
 555                 for (j = 0; j < COUNT; ++j) {
 556                     /* check we didn't corrupt the rest of the recvbuf */
 557                     my_assert(recvbuf[i*COUNT+j] == 0xdeadbeef);
 558                 }
 559             }
 560             break;
 561
 562         case 12: /* MPI_Ireduce_scatter_block */
 563             for (j = 0; j < COUNT; ++j) {
 564                 my_assert(recvbuf[j] == (size * rank + ((size - 1) * size) / 2));
 565             }
 566             for (i = 1; i < size; ++i) {
 567                 for (j = 0; j < COUNT; ++j) {
 568                     /* check we didn't corrupt the rest of the recvbuf */
 569                     my_assert(recvbuf[i*COUNT+j] == 0xdeadbeef);
 570                 }
 571             }
 572             break;
 573
 574         case 13: /* MPI_Igatherv */
 575             if (rank == 0) {
 576                 for (i = 0; i < size; ++i) {
 577                     for (j = 0; j < COUNT; ++j) {
 578                         my_assert(recvbuf[i*COUNT+j] == i + j);
 579                     }
 580                 }
 581             }
 582             else {
 583                 for (i = 0; i < size*COUNT; ++i) {
 584                     my_assert(recvbuf[i] == 0xdeadbeef);
 585                 }
 586             }
 587             break;
 588
 589         case 14: /* MPI_Ialltoall */
 590             for (i = 0; i < size; ++i) {
 591                 for (j = 0; j < COUNT; ++j) {
 592                     /*printf("recvbuf[%d*COUNT+%d]=%d, expecting %d\n", i, j, recvbuf[i*COUNT+j], (i + (i * j)));*/
 593                     my_assert(recvbuf[i*COUNT+j] == (i + (rank * j)));
 594                 }
 595             }
 596             break;
 597
 598         case 15: /* MPI_Iallgather */
 599             for (i = 0; i < size; ++i) {
 600                 for (j = 0; j < COUNT; ++j) {
 601                     my_assert(recvbuf[i*COUNT+j] == i + j);
 602                 }
 603             }
 604             break;
 605
 606         case 16: /* MPI_Iallgatherv */
 607             for (i = 0; i < size; ++i) {
 608                 for (j = 0; j < COUNT; ++j) {
 609                     my_assert(recvbuf[i*COUNT+j] == i + j);
 610                 }
 611             }
 612             break;
 613
 614         case 17: /* MPI_Iscan */
 615             for (i = 0; i < COUNT; ++i) {
 616                 my_assert(recvbuf[i] == ((rank * (rank+1) / 2) + (i * (rank + 1))));
 617             }
 618             break;
 619
 620         case 18: /* MPI_Iexscan */
 621             for (i = 0; i < COUNT; ++i) {
 622                 if (rank == 0)
 623                     my_assert(recvbuf[i] == 0xdeadbeef);
 624                 else
 625                     my_assert(recvbuf[i] == ((rank * (rank+1) / 2) + (i * (rank + 1)) - (rank + i)));
 626             }
 627             break;
 628
 629         case 19: /* MPI_Ialltoallw (a weak test, neither irregular nor sparse) */
 630             for (i = 0; i < size; ++i) {
 631                 for (j = 0; j < COUNT; ++j) {
 632                     /*printf("recvbuf[%d*COUNT+%d]=%d, expecting %d\n", i, j, recvbuf[i*COUNT+j], (i + (rank * j)));*/
 633                     my_assert(recvbuf[i*COUNT+j] == (i + (rank * j)));
 634                 }
 635             }
 636             break;
 637
 638         case 20: /* basic pt2pt MPI_Isend/MPI_Irecv pairing */
 639             /* even ranks send to odd ranks, but only if we have a full pair */
 640             if ((rank % 2 != 0) || (rank != size-1)) {
 641                 for (j = 0; j < COUNT; ++j) {
 642                     /* only odd procs did a recv */
 643                     if (rank % 2 == 0) {
 644                         my_assert(recvbuf[j] == 0xdeadbeef);
 645                     }
 646                     else {
 647                         if (recvbuf[j] != j) printf("recvbuf[%d]=%d j=%d\n", j, recvbuf[j], j);
 648                         my_assert(recvbuf[j] == j);
 649                     }
 650                 }
 651             }
 652             break;
 653
 654         default:
 655             printf("invalid case_num (%d) detected\n", l->case_num);
 656             assert(0);
 657             break;
 658     }
 659 }
 660 #undef NUM_CASES
 661
 662 static void complete_something_somehow(unsigned int rndnum, int numreqs, MPI_Request reqs[], int *outcount, int indices[])
 663 {
 664     int i, idx, flag;
 665
 666 #define COMPLETION_CASES (8)
 667     switch (rand_range(rndnum, 0, COMPLETION_CASES)) {
 668         case 0:
 669             MPI_Waitall(numreqs, reqs, MPI_STATUSES_IGNORE);
 670             *outcount = numreqs;
 671             for (i = 0; i < numreqs; ++i) {
 672                 indices[i] = i;
 673             }
 674             break;
 675
 676         case 1:
 677             MPI_Testsome(numreqs, reqs, outcount, indices, MPI_STATUS_IGNORE);
 678             if (*outcount == MPI_UNDEFINED) {
 679                 *outcount = 0;
 680             }
 681             break;
 682
 683         case 2:
 684             MPI_Waitsome(numreqs, reqs, outcount, indices, MPI_STATUS_IGNORE);
 685             if (*outcount == MPI_UNDEFINED) {
 686                 *outcount = 0;
 687             }
 688             break;
 689
 690         case 3:
 691             MPI_Waitany(numreqs, reqs, &idx, MPI_STATUS_IGNORE);
 692             if (idx == MPI_UNDEFINED) {
 693                 *outcount = 0;
 694             }
 695             else {
 696                 *outcount = 1;
 697                 indices[0] = idx;
 698             }
 699             break;
 700
 701         case 4:
 702             MPI_Testany(numreqs, reqs, &idx, &flag, MPI_STATUS_IGNORE);
 703             if (idx == MPI_UNDEFINED) {
 704                 *outcount = 0;
 705             }
 706             else {
 707                 *outcount = 1;
 708                 indices[0] = idx;
 709             }
 710             break;
 711
 712         case 5:
 713             MPI_Testall(numreqs, reqs, &flag, MPI_STATUSES_IGNORE);
 714             if (flag) {
 715                 *outcount = numreqs;
 716                 for (i = 0; i < numreqs; ++i) {
 717                     indices[i] = i;
 718                 }
 719             }
 720             else {
 721                 *outcount = 0;
 722             }
 723             break;
 724
 725         case 6:
 726             /* select a new random index and wait on it */
 727             rndnum = gen_prn(rndnum);
 728             idx = rand_range(rndnum, 0, numreqs);
 729             MPI_Wait(&reqs[idx], MPI_STATUS_IGNORE);
 730             *outcount = 1;
 731             indices[0] = idx;
 732             break;
 733
 734         case 7:
 735             /* select a new random index and wait on it */
 736             rndnum = gen_prn(rndnum);
 737             idx = rand_range(rndnum, 0, numreqs);
 738             MPI_Test(&reqs[idx], &flag, MPI_STATUS_IGNORE);
 739             *outcount = (flag ? 1 : 0);
 740             indices[0] = idx;
 741             break;
 742
 743         default:
 744             assert(0);
 745             break;
 746     }
 747 #undef COMPLETION_CASES
 748 }
 749 #endif /* defined(TEST_NBC_ROUTINES) */
 750
 751 int main(int argc, char **argv)
 752 {
 753     int wrank, wsize;
 754 #if defined(TEST_NBC_ROUTINES)
 755     int i, num_posted, num_completed;
 756     unsigned int seed = 0x10bc;
 757     unsigned int post_seq, complete_seq;
 758     struct laundry larr[WINDOW];
 759     MPI_Request reqs[WINDOW];
 760     int outcount;
 761     int indices[WINDOW];
 762     MPI_Comm comms[NUM_COMMS];
 763     MPI_Comm comm;
 764 #endif
 765
 766     MPI_Init(&argc, &argv);
 767     MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
 768     MPI_Comm_size(MPI_COMM_WORLD, &wsize);
 769
 770 #if defined(TEST_NBC_ROUTINES)
 771
 772     /* it is critical that all processes in the communicator start with a
 773      * consistent value for "post_seq" */
 774     post_seq = complete_seq = gen_prn(seed);
 775
 776     num_completed = 0;
 777     num_posted = 0;
 778
 779     /* construct all of the communicators, just dups of comm world for now */
 780     for (i = 0; i < NUM_COMMS; ++i) {
 781         MPI_Comm_dup(MPI_COMM_WORLD, &comms[i]);
 782     }
 783
 784     /* fill the entire window of ops */
 785     for (i = 0; i < WINDOW; ++i) {
 786         reqs[i] = MPI_REQUEST_NULL;
 787         memset(&larr[i], 0, sizeof(struct laundry));
 788         larr[i].case_num = -1;
 789
 790         /* randomly select a comm, using a new seed to avoid correlating
 791          * particular kinds of NBC ops with particular communicators */
 792         comm = comms[rand_range(gen_prn(post_seq), 0, NUM_COMMS)];
 793
 794         start_random_nonblocking(comm, post_seq, &reqs[i], &larr[i]);
 795         ++num_posted;
 796         post_seq = gen_prn(post_seq);
 797     }
 798
 799     /* now loop repeatedly, completing ops with "random" completion functions,
 800      * until we've posted and completed MAIN_ITERATIONS ops */
 801     while (num_completed < MAIN_ITERATIONS) {
 802         complete_something_somehow(complete_seq, WINDOW, reqs, &outcount, indices);
 803         complete_seq = gen_prn(complete_seq);
 804         for (i = 0; i < outcount; ++i) {
 805             int idx = indices[i];
 806             assert(reqs[idx] == MPI_REQUEST_NULL);
 807             if (larr[idx].case_num != -1) {
 808                 check_after_completion(&larr[idx]);
 809                 cleanup_laundry(&larr[idx]);
 810                 ++num_completed;
 811                 if (num_posted < MAIN_ITERATIONS) {
 812                     comm = comms[rand_range(gen_prn(post_seq), 0, NUM_COMMS)];
 813                     start_random_nonblocking(comm, post_seq, &reqs[idx], &larr[idx]);
 814                     ++num_posted;
 815                     post_seq = gen_prn(post_seq);
 816                 }
 817             }
 818         }
 819
 820         /* "randomly" and infrequently introduce some jitter into the system */
 821         if (0 == rand_range(gen_prn(complete_seq + wrank), 0, CHANCE_OF_SLEEP)) {
 822             usleep(JITTER_DELAY); /* take a short nap */
 823         }
 824     }
 825
 826     for (i = 0; i < NUM_COMMS; ++i) {
 827         MPI_Comm_free(&comms[i]);
 828     }
 829
 830 #endif /* defined(TEST_NBC_ROUTINES) */
 831
 832     if (wrank == 0) {
 833         if (errs)
 834             printf("found %d errors\n", errs);
 835         else
 836             printf(" No errors\n");
 837     }
 838
 839     MPI_Finalize();
 840
 841     return 0;
 842 }
 843