1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
4 * (C) 2003 by Argonne National Laboratory.
5 * See COPYRIGHT in top-level directory.
8 /* MPI-3 distributed linked list construction example
9 * --------------------------------------------------
11 * Construct a distributed shared linked list using proposed MPI-3 dynamic
12 * windows. Initially process 0 creates the head of the list, attaches it to
13 * the window, and broadcasts the pointer to all processes. All processes then
14 * concurrently append N new elements to the list. When a process attempts to
15 * attach its element to the tail of list it may discover that its tail pointer
16 * is stale and it must chase ahead to the new tail before the element can be
32 #define ELEM_PER_ROW 16
34 /* Linked list pointer */
40 /* Linked list element */
46 static const llist_ptr_t nil = { -1, (MPI_Aint) MPI_BOTTOM };
48 static const int verbose = 0;
50 /* Allocate a new shared linked list element */
51 static MPI_Aint alloc_elem(int value, MPI_Win win, llist_elem_t ***my_elems, int* my_elems_size, int* my_elems_count)
54 llist_elem_t *elem_ptr;
56 /* Allocate the new element and register it with the window */
57 MPI_Alloc_mem(sizeof(llist_elem_t), MPI_INFO_NULL, &elem_ptr);
58 elem_ptr->value = value;
60 MPI_Win_attach(win, elem_ptr, sizeof(llist_elem_t));
62 /* Add the element to the list of local elements so we can free it later. */
63 if (*my_elems_size == *my_elems_count) {
64 *my_elems_size += 100;
65 *my_elems = realloc(*my_elems, *my_elems_size * sizeof(void *));
67 (*my_elems)[*my_elems_count] = elem_ptr;
70 MPI_Get_address(elem_ptr, &disp);
74 int main(int argc, char **argv)
78 llist_ptr_t head_ptr, tail_ptr;
79 /* List of locally allocated list elements. */
80 llist_elem_t **my_elems = NULL;
81 int my_elems_size = 0;
82 int my_elems_count = 0;
84 MPI_Init(&argc, &argv);
86 MPI_Comm_rank(MPI_COMM_WORLD, &procid);
87 MPI_Comm_size(MPI_COMM_WORLD, &nproc);
89 MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &llist_win);
91 /* Process 0 creates the head node */
93 head_ptr.disp = alloc_elem(-1, llist_win, &my_elems, &my_elems_size, &my_elems_count);
95 /* Broadcast the head pointer to everyone */
97 MPI_Bcast(&head_ptr.disp, 1, MPI_AINT, 0, MPI_COMM_WORLD);
100 /* All processes concurrently append NUM_ELEMS elements to the list */
101 for (i = 0; i < NUM_ELEMS; i++) {
102 llist_ptr_t new_elem_ptr;
105 /* Create a new list element and register it with the window */
106 new_elem_ptr.rank = procid;
107 new_elem_ptr.disp = alloc_elem(procid, llist_win, &my_elems, &my_elems_size, &my_elems_count);
109 /* Append the new node to the list. This might take multiple attempts if
110 * others have already appended and our tail pointer is stale. */
112 llist_ptr_t next_tail_ptr = nil;
114 MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);
116 MPI_Compare_and_swap((void *) &new_elem_ptr.rank, (void *) &nil.rank,
117 (void *) &next_tail_ptr.rank, MPI_INT, tail_ptr.rank,
118 (MPI_Aint) & (((llist_elem_t *) tail_ptr.disp)->next.rank),
121 MPI_Win_unlock(tail_ptr.rank, llist_win);
122 success = (next_tail_ptr.rank == nil.rank);
128 MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);
130 MPI_Fetch_and_op(&new_elem_ptr.disp, &result, MPI_AINT, tail_ptr.rank,
131 (MPI_Aint) & (((llist_elem_t *) tail_ptr.disp)->next.disp),
132 MPI_REPLACE, llist_win);
134 /* Note: accumulate is faster, since we don't need the result. Replacing with
135 * Fetch_and_op to create a more complete test case. */
137 * MPI_Accumulate(&new_elem_ptr.disp, 1, MPI_AINT, tail_ptr.rank,
138 * (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), 1,
139 * MPI_AINT, MPI_REPLACE, llist_win);
142 MPI_Win_unlock(tail_ptr.rank, llist_win);
143 tail_ptr = new_elem_ptr;
145 /* For implementations that use pt-to-pt messaging, force progress for other threads'
147 for (i = 0; i < NPROBE; i++)
148 MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag,
153 /* Tail pointer is stale, fetch the displacement. May take multiple tries
154 * if it is being updated. */
156 MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);
158 MPI_Fetch_and_op(NULL, &next_tail_ptr.disp, MPI_AINT, tail_ptr.rank,
159 (MPI_Aint) & (((llist_elem_t *) tail_ptr.disp)->next.disp),
160 MPI_NO_OP, llist_win);
162 MPI_Win_unlock(tail_ptr.rank, llist_win);
163 } while (next_tail_ptr.disp == nil.disp);
164 tail_ptr = next_tail_ptr;
169 MPI_Barrier(MPI_COMM_WORLD);
171 /* Traverse the list and verify that all processes inserted exactly the correct
172 * number of elements. */
176 int *counts, count = 0;
178 counts = (int *) malloc(sizeof(int) * nproc);
179 assert(counts != NULL);
181 for (i = 0; i < nproc; i++)
186 /* Walk the list and tally up the number of elements inserted by each rank */
187 while (tail_ptr.disp != nil.disp) {
190 MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win);
192 MPI_Get(&elem, sizeof(llist_elem_t), MPI_BYTE,
193 tail_ptr.rank, tail_ptr.disp, sizeof(llist_elem_t), MPI_BYTE, llist_win);
195 MPI_Win_unlock(tail_ptr.rank, llist_win);
197 tail_ptr = elem.next;
199 /* This is not the root */
201 assert(elem.value >= 0 && elem.value < nproc);
202 counts[elem.value]++;
206 int last_elem = tail_ptr.disp == nil.disp;
207 printf("%2d%s", elem.value, last_elem ? "" : " -> ");
208 if (count % ELEM_PER_ROW == 0 && !last_elem)
213 /* This is the root */
215 assert(elem.value == -1);
223 /* Verify the counts we collected */
224 for (i = 0; i < nproc; i++) {
225 int expected = NUM_ELEMS;
227 if (counts[i] != expected) {
228 printf("Error: Rank %d inserted %d elements, expected %d\n", i, counts[i],
234 printf("%s\n", errors == 0 ? " No Errors" : "FAIL");
238 MPI_Win_free(&llist_win);
240 /* Free all the elements in the list */
241 for (; my_elems_count > 0; my_elems_count--)
242 MPI_Free_mem(my_elems[my_elems_count - 1]);