teshsuite/smpi/mpich3-test/rma/linked_list_fop.c

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
   2 /*
   3  *
   4  *  (C) 2003 by Argonne National Laboratory.
   5  *      See COPYRIGHT in top-level directory.
   6  */
   7
   8 /*            MPI-3 distributed linked list construction example
   9  *            --------------------------------------------------
  10  *
  11  * Construct a distributed shared linked list using proposed MPI-3 dynamic
  12  * windows.  Initially process 0 creates the head of the list, attaches it to
  13  * the window, and broadcasts the pointer to all processes.  All processes then
  14  * concurrently append N new elements to the list.  When a process attempts to
  15  * attach its element to the tail of list it may discover that its tail pointer
  16  * is stale and it must chase ahead to the new tail before the element can be
  17  * attached.
  18  */
  19
  20 #include <stdio.h>
  21 #include <stdlib.h>
  22 #include <mpi.h>
  23 #include <assert.h>
  24 #include "mpitest.h"
  25
  26 #ifdef HAVE_UNISTD_H
  27 #include <unistd.h>
  28 #endif
  29
  30 #define NUM_ELEMS 32
  31 #define NPROBE    100
  32 #define ELEM_PER_ROW 16
  33
  34 /* Linked list pointer */
  35 typedef struct {
  36     int      rank;
  37     MPI_Aint disp;
  38 } llist_ptr_t;
  39
  40 /* Linked list element */
  41 typedef struct {
  42     int value;
  43     llist_ptr_t next;
  44 } llist_elem_t;
  45
  46 static const llist_ptr_t nil = { -1, (MPI_Aint) MPI_BOTTOM };
  47 static const int verbose = 0;
  48
  49 /* List of locally allocated list elements. */
  50 static llist_elem_t **my_elems = NULL;
  51 static int my_elems_size  = 0;
  52 static int my_elems_count = 0;
  53
  54 /* Allocate a new shared linked list element */
  55 MPI_Aint alloc_elem(int value, MPI_Win win) {
  56     MPI_Aint disp;
  57     llist_elem_t *elem_ptr;
  58
  59     /* Allocate the new element and register it with the window */
  60     MPI_Alloc_mem(sizeof(llist_elem_t), MPI_INFO_NULL, &elem_ptr);
  61     elem_ptr->value = value;
  62     elem_ptr->next  = nil;
  63     MPI_Win_attach(win, elem_ptr, sizeof(llist_elem_t));
  64
  65     /* Add the element to the list of local elements so we can free it later. */
  66     if (my_elems_size == my_elems_count) {
  67         my_elems_size += 100;
  68         my_elems = realloc(my_elems, my_elems_size*sizeof(void*));
  69     }
  70     my_elems[my_elems_count] = elem_ptr;
  71     my_elems_count++;
  72
  73     MPI_Get_address(elem_ptr, &disp);
  74     return disp;
  75 }
  76
  77 int main(int argc, char **argv) {
  78     int           procid, nproc, i;
  79     MPI_Win       llist_win;
  80     llist_ptr_t   head_ptr, tail_ptr;
  81
  82     MPI_Init(&argc, &argv);
  83
  84     MPI_Comm_rank(MPI_COMM_WORLD, &procid);
  85     MPI_Comm_size(MPI_COMM_WORLD, &nproc);
  86
  87     MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &llist_win);
  88
  89     /* Process 0 creates the head node */
  90     if (procid == 0)
  91         head_ptr.disp = alloc_elem(-1, llist_win);
  92
  93     /* Broadcast the head pointer to everyone */
  94     head_ptr.rank = 0;
  95     MPI_Bcast(&head_ptr.disp, 1, MPI_AINT, 0, MPI_COMM_WORLD);
  96     tail_ptr = head_ptr;
  97
  98     /* All processes concurrently append NUM_ELEMS elements to the list */
  99     for (i = 0; i < NUM_ELEMS; i++) {
 100         llist_ptr_t new_elem_ptr;
 101         int success;
 102
 103         /* Create a new list element and register it with the window */
 104         new_elem_ptr.rank = procid;
 105         new_elem_ptr.disp = alloc_elem(procid, llist_win);
 106
 107         /* Append the new node to the list.  This might take multiple attempts if
 108            others have already appended and our tail pointer is stale. */
 109         do {
 110             llist_ptr_t next_tail_ptr = nil;
 111
 112             MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);
 113
 114             MPI_Compare_and_swap((void*) &new_elem_ptr.rank, (void*) &nil.rank,
 115                                   (void*) &next_tail_ptr.rank, MPI_INT, tail_ptr.rank,
 116                                   (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.rank), llist_win);
 117
 118             MPI_Win_unlock(tail_ptr.rank, llist_win);
 119             success = (next_tail_ptr.rank == nil.rank);
 120
 121             if (success) {
 122                 int i, flag;
 123                 MPI_Aint result;
 124
 125                 MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);
 126
 127                 MPI_Fetch_and_op(&new_elem_ptr.disp, &result, MPI_AINT, tail_ptr.rank,
 128                                   (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp),
 129                                   MPI_REPLACE, llist_win);
 130
 131                 /* Note: accumulate is faster, since we don't need the result.  Replacing with
 132                    Fetch_and_op to create a more complete test case. */
 133                 /*
 134                 MPI_Accumulate(&new_elem_ptr.disp, 1, MPI_AINT, tail_ptr.rank,
 135                                (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), 1,
 136                                MPI_AINT, MPI_REPLACE, llist_win);
 137                 */
 138
 139                 MPI_Win_unlock(tail_ptr.rank, llist_win);
 140                 tail_ptr = new_elem_ptr;
 141
 142                 /* For implementations that use pt-to-pt messaging, force progress for other threads'
 143                    RMA operations. */
 144                 for (i = 0; i < NPROBE; i++)
 145                     MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE);
 146
 147             } else {
 148                 /* Tail pointer is stale, fetch the displacement.  May take multiple tries
 149                    if it is being updated. */
 150                 do {
 151                     MPI_Aint junk = 0;
 152
 153                     MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);
 154
 155                     MPI_Fetch_and_op(NULL, &next_tail_ptr.disp, MPI_AINT, tail_ptr.rank,
 156                                       (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp),
 157                                       MPI_NO_OP, llist_win);
 158
 159                     MPI_Win_unlock(tail_ptr.rank, llist_win);
 160                 } while (next_tail_ptr.disp == nil.disp);
 161                 tail_ptr = next_tail_ptr;
 162             }
 163         } while (!success);
 164     }
 165
 166     MPI_Barrier(MPI_COMM_WORLD);
 167
 168     /* Traverse the list and verify that all processes inserted exactly the correct
 169        number of elements. */
 170     if (procid == 0) {
 171         int  have_root = 0;
 172         int  errors    = 0;
 173         int *counts, count = 0;
 174
 175         counts = (int*) malloc(sizeof(int) * nproc);
 176         assert(counts != NULL);
 177
 178         for (i = 0; i < nproc; i++)
 179             counts[i] = 0;
 180
 181         tail_ptr = head_ptr;
 182
 183         /* Walk the list and tally up the number of elements inserted by each rank */
 184         while (tail_ptr.disp != nil.disp) {
 185             llist_elem_t elem;
 186
 187             MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);
 188
 189             MPI_Get(&elem, sizeof(llist_elem_t), MPI_BYTE,
 190                     tail_ptr.rank, tail_ptr.disp, sizeof(llist_elem_t), MPI_BYTE, llist_win);
 191
 192             MPI_Win_unlock(tail_ptr.rank, llist_win);
 193
 194             tail_ptr = elem.next;
 195
 196             /* This is not the root */
 197             if (have_root) {
 198                 assert(elem.value >= 0 && elem.value < nproc);
 199                 counts[elem.value]++;
 200                 count++;
 201
 202                 if (verbose) {
 203                     int last_elem = tail_ptr.disp == nil.disp;
 204                     printf("%2d%s", elem.value, last_elem ? "" : " -> ");
 205                     if (count % ELEM_PER_ROW == 0 && !last_elem)
 206                         printf("\n");
 207                 }
 208             }
 209
 210             /* This is the root */
 211             else {
 212                 assert(elem.value == -1);
 213                 have_root = 1;
 214             }
 215         }
 216
 217         if (verbose)
 218           printf("\n\n");
 219
 220         /* Verify the counts we collected */
 221         for (i = 0; i < nproc; i++) {
 222             int expected = NUM_ELEMS;
 223
 224             if (counts[i] != expected) {
 225                 printf("Error: Rank %d inserted %d elements, expected %d\n", i, counts[i], expected);
 226                 errors++;
 227             }
 228         }
 229
 230         printf("%s\n", errors == 0 ? " No Errors" : "FAIL");
 231         free(counts);
 232     }
 233
 234     MPI_Win_free(&llist_win);
 235
 236     /* Free all the elements in the list */
 237     for ( ; my_elems_count > 0; my_elems_count--)
 238         MPI_Free_mem(my_elems[my_elems_count-1]);
 239
 240     MPI_Finalize();
 241     return 0;
 242 }