add MPICH3 rma tests (15 out of 88 should be passing now)

[simgrid.git] / teshsuite / smpi / mpich3-test / rma / mcs-mutex.c
diff --git a/teshsuite/smpi/mpich3-test/rma/mcs-mutex.c b/teshsuite/smpi/mpich3-test/rma/mcs-mutex.c

new file mode 100644 (file)

index 0000000..533fbdb
--- /dev/null
+++ b/teshsuite/smpi/mpich3-test/rma/mcs-mutex.c
@@ -0,0 +1,216 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2013 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <strings.h>
+
+#include <mpi.h>
+#include "mcs-mutex.h"
+
+/* TODO: Make these mutex operations no-ops for sequential runs */
+
+/** Create an MCS mutex.  Collective on comm.
+  *
+  * @param[out] comm communicator containing all processes that will use the
+  *                  mutex
+  * @param[out] tail_rank rank of the process in comm that holds the tail
+  *                  pointer
+  * @param[out] hdl  handle to the mutex
+  * @return          MPI status
+  */
+int MCS_Mutex_create(int tail_rank, MPI_Comm comm, MCS_Mutex * hdl_out)
+{
+    int rank, nproc;
+    MCS_Mutex hdl;
+
+    hdl = malloc(sizeof(struct mcs_mutex_s));
+    assert(hdl != NULL);
+
+    MPI_Comm_dup(comm, &hdl->comm);
+
+    MPI_Comm_rank(hdl->comm, &rank);
+    MPI_Comm_size(hdl->comm, &nproc);
+
+    hdl->tail_rank = tail_rank;
+
+#ifdef USE_WIN_SHARED
+    MPI_Win_allocate_shared(2*sizeof(int), sizeof(int), MPI_INFO_NULL,
+                            hdl->comm, &hdl->base, &hdl->window);
+#else
+    MPI_Win_allocate(2*sizeof(int), sizeof(int), MPI_INFO_NULL, hdl->comm,
+                     &hdl->base, &hdl->window);
+#endif
+
+    MPI_Win_lock_all(0, hdl->window);
+
+    hdl->base[0] = MPI_PROC_NULL;
+    hdl->base[1] = MPI_PROC_NULL;
+
+    MPI_Win_sync(hdl->window);
+    MPI_Barrier(hdl->comm);
+
+    *hdl_out = hdl;
+    return MPI_SUCCESS;
+}
+
+
+/** Free an MCS mutex.  Collective on ranks in the communicator used at the
+  * time of creation.
+  *
+  * @param[in] hdl handle to the group that will be freed
+  * @return        MPI status
+  */
+int MCS_Mutex_free(MCS_Mutex * hdl_ptr)
+{
+    MCS_Mutex hdl = *hdl_ptr;
+
+    MPI_Win_unlock_all(hdl->window);
+
+    MPI_Win_free(&hdl->window);
+    MPI_Comm_free(&hdl->comm);
+
+    free(hdl);
+    hdl_ptr = NULL;
+
+    return MPI_SUCCESS;
+}
+
+
+/** Lock a mutex.
+  *
+  * @param[in] hdl   Handle to the mutex
+  * @return          MPI status
+  */
+int MCS_Mutex_lock(MCS_Mutex hdl)
+{
+    int rank, nproc;
+    int prev;
+
+    MPI_Comm_rank(hdl->comm, &rank);
+    MPI_Comm_size(hdl->comm, &nproc);
+
+    /* This store is safe, since it cannot happen concurrently with a remote
+     * write */
+    hdl->base[MCS_MTX_ELEM_DISP] = MPI_PROC_NULL;
+    MPI_Win_sync(hdl->window);
+
+    MPI_Fetch_and_op(&rank, &prev, MPI_INT, hdl->tail_rank, MCS_MTX_TAIL_DISP,
+                     MPI_REPLACE, hdl->window);
+    MPI_Win_flush(hdl->tail_rank, hdl->window);
+
+    /* If there was a previous tail, update their next pointer and wait for
+     * notification.  Otherwise, the mutex was successfully acquired. */
+    if (prev != MPI_PROC_NULL) {
+        /* Wait for notification */
+        MPI_Status status;
+
+        MPI_Accumulate(&rank, 1, MPI_INT, prev, MCS_MTX_ELEM_DISP, 1, MPI_INT, MPI_REPLACE, hdl->window);
+        MPI_Win_flush(prev, hdl->window);
+
+        debug_print("%2d: LOCK   - waiting for notification from %d\n", rank, prev);
+        MPI_Recv(NULL, 0, MPI_BYTE, prev, MCS_MUTEX_TAG, hdl->comm, &status);
+    }
+
+    debug_print("%2d: LOCK   - lock acquired\n", rank);
+
+    return MPI_SUCCESS;
+}
+
+
+/** Attempt to acquire a mutex.
+  *
+  * @param[in] hdl   Handle to the mutex
+  * @param[out] success Indicates whether the mutex was acquired
+  * @return          MPI status
+  */
+int MCS_Mutex_trylock(MCS_Mutex hdl, int *success)
+{
+    int rank, nproc;
+    int tail, nil = MPI_PROC_NULL;
+
+    MPI_Comm_rank(hdl->comm, &rank);
+    MPI_Comm_size(hdl->comm, &nproc);
+
+    /* This store is safe, since it cannot happen concurrently with a remote
+     * write */
+    hdl->base[MCS_MTX_ELEM_DISP] = MPI_PROC_NULL;
+    MPI_Win_sync(hdl->window);
+
+    /* Check if the lock is available and claim it if it is. */
+    MPI_Compare_and_swap(&rank, &nil, &tail, MPI_INT, hdl->tail_rank,
+                         MCS_MTX_TAIL_DISP, hdl->window);
+    MPI_Win_flush(hdl->tail_rank, hdl->window);
+
+    /* If the old tail was MPI_PROC_NULL, we have claimed the mutex */
+    *success = (tail == nil);
+
+    debug_print("%2d: TRYLOCK - %s\n", rank, (*success) ? "Success" : "Non-success");
+
+    return MPI_SUCCESS;
+}
+
+
+/** Unlock a mutex.
+  *
+  * @param[in] hdl   Handle to the mutex
+  * @return          MPI status
+  */
+int MCS_Mutex_unlock(MCS_Mutex hdl)
+{
+    int rank, nproc, next;
+
+    MPI_Comm_rank(hdl->comm, &rank);
+    MPI_Comm_size(hdl->comm, &nproc);
+
+    MPI_Win_sync(hdl->window);
+
+    /* Read my next pointer.  FOP is used since another process may write to
+     * this location concurrent with this read. */
+    MPI_Fetch_and_op(NULL, &next, MPI_INT, rank, MCS_MTX_ELEM_DISP, MPI_NO_OP,
+                     hdl->window);
+    MPI_Win_flush(rank, hdl->window);
+
+    if ( next == MPI_PROC_NULL) {
+        int tail;
+        int nil = MPI_PROC_NULL;
+
+        /* Check if we are the at the tail of the lock queue.  If so, we're
+         * done.  If not, we need to send notification. */
+        MPI_Compare_and_swap(&nil, &rank, &tail, MPI_INT, hdl->tail_rank,
+                             MCS_MTX_TAIL_DISP, hdl->window);
+        MPI_Win_flush(hdl->tail_rank, hdl->window);
+
+        if (tail != rank) {
+            debug_print("%2d: UNLOCK - waiting for next pointer (tail = %d)\n", rank, tail);
+            assert(tail >= 0 && tail < nproc);
+
+            for (;;) {
+                int flag;
+
+                MPI_Fetch_and_op(NULL, &next, MPI_INT, rank, MCS_MTX_ELEM_DISP,
+                                 MPI_NO_OP, hdl->window);
+
+                MPI_Win_flush(rank, hdl->window);
+                if (next != MPI_PROC_NULL) break;
+
+                MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag,
+                           MPI_STATUS_IGNORE);
+            }
+        }
+    }
+
+    /* Notify the next waiting process */
+    if (next != MPI_PROC_NULL) {
+        debug_print("%2d: UNLOCK - notifying %d\n", rank, next);
+        MPI_Send(NULL, 0, MPI_BYTE, next, MCS_MUTEX_TAG, hdl->comm);
+    }
+
+    debug_print("%2d: UNLOCK - lock released\n", rank);
+
+    return MPI_SUCCESS;
+}