X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/2b4010b2a5fda082278a02a35a9e9eb18b58e110..826897d361add5db3272a9810e70371a40ba1660:/teshsuite/smpi/mpich3-test/rma/mcs-mutex.c diff --git a/teshsuite/smpi/mpich3-test/rma/mcs-mutex.c b/teshsuite/smpi/mpich3-test/rma/mcs-mutex.c new file mode 100644 index 0000000000..533fbdb882 --- /dev/null +++ b/teshsuite/smpi/mpich3-test/rma/mcs-mutex.c @@ -0,0 +1,216 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ +/* + * (C) 2013 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include +#include + +#include +#include "mcs-mutex.h" + +/* TODO: Make these mutex operations no-ops for sequential runs */ + +/** Create an MCS mutex. Collective on comm. + * + * @param[out] comm communicator containing all processes that will use the + * mutex + * @param[out] tail_rank rank of the process in comm that holds the tail + * pointer + * @param[out] hdl handle to the mutex + * @return MPI status + */ +int MCS_Mutex_create(int tail_rank, MPI_Comm comm, MCS_Mutex * hdl_out) +{ + int rank, nproc; + MCS_Mutex hdl; + + hdl = malloc(sizeof(struct mcs_mutex_s)); + assert(hdl != NULL); + + MPI_Comm_dup(comm, &hdl->comm); + + MPI_Comm_rank(hdl->comm, &rank); + MPI_Comm_size(hdl->comm, &nproc); + + hdl->tail_rank = tail_rank; + +#ifdef USE_WIN_SHARED + MPI_Win_allocate_shared(2*sizeof(int), sizeof(int), MPI_INFO_NULL, + hdl->comm, &hdl->base, &hdl->window); +#else + MPI_Win_allocate(2*sizeof(int), sizeof(int), MPI_INFO_NULL, hdl->comm, + &hdl->base, &hdl->window); +#endif + + MPI_Win_lock_all(0, hdl->window); + + hdl->base[0] = MPI_PROC_NULL; + hdl->base[1] = MPI_PROC_NULL; + + MPI_Win_sync(hdl->window); + MPI_Barrier(hdl->comm); + + *hdl_out = hdl; + return MPI_SUCCESS; +} + + +/** Free an MCS mutex. Collective on ranks in the communicator used at the + * time of creation. + * + * @param[in] hdl handle to the group that will be freed + * @return MPI status + */ +int MCS_Mutex_free(MCS_Mutex * hdl_ptr) +{ + MCS_Mutex hdl = *hdl_ptr; + + MPI_Win_unlock_all(hdl->window); + + MPI_Win_free(&hdl->window); + MPI_Comm_free(&hdl->comm); + + free(hdl); + hdl_ptr = NULL; + + return MPI_SUCCESS; +} + + +/** Lock a mutex. + * + * @param[in] hdl Handle to the mutex + * @return MPI status + */ +int MCS_Mutex_lock(MCS_Mutex hdl) +{ + int rank, nproc; + int prev; + + MPI_Comm_rank(hdl->comm, &rank); + MPI_Comm_size(hdl->comm, &nproc); + + /* This store is safe, since it cannot happen concurrently with a remote + * write */ + hdl->base[MCS_MTX_ELEM_DISP] = MPI_PROC_NULL; + MPI_Win_sync(hdl->window); + + MPI_Fetch_and_op(&rank, &prev, MPI_INT, hdl->tail_rank, MCS_MTX_TAIL_DISP, + MPI_REPLACE, hdl->window); + MPI_Win_flush(hdl->tail_rank, hdl->window); + + /* If there was a previous tail, update their next pointer and wait for + * notification. Otherwise, the mutex was successfully acquired. */ + if (prev != MPI_PROC_NULL) { + /* Wait for notification */ + MPI_Status status; + + MPI_Accumulate(&rank, 1, MPI_INT, prev, MCS_MTX_ELEM_DISP, 1, MPI_INT, MPI_REPLACE, hdl->window); + MPI_Win_flush(prev, hdl->window); + + debug_print("%2d: LOCK - waiting for notification from %d\n", rank, prev); + MPI_Recv(NULL, 0, MPI_BYTE, prev, MCS_MUTEX_TAG, hdl->comm, &status); + } + + debug_print("%2d: LOCK - lock acquired\n", rank); + + return MPI_SUCCESS; +} + + +/** Attempt to acquire a mutex. + * + * @param[in] hdl Handle to the mutex + * @param[out] success Indicates whether the mutex was acquired + * @return MPI status + */ +int MCS_Mutex_trylock(MCS_Mutex hdl, int *success) +{ + int rank, nproc; + int tail, nil = MPI_PROC_NULL; + + MPI_Comm_rank(hdl->comm, &rank); + MPI_Comm_size(hdl->comm, &nproc); + + /* This store is safe, since it cannot happen concurrently with a remote + * write */ + hdl->base[MCS_MTX_ELEM_DISP] = MPI_PROC_NULL; + MPI_Win_sync(hdl->window); + + /* Check if the lock is available and claim it if it is. */ + MPI_Compare_and_swap(&rank, &nil, &tail, MPI_INT, hdl->tail_rank, + MCS_MTX_TAIL_DISP, hdl->window); + MPI_Win_flush(hdl->tail_rank, hdl->window); + + /* If the old tail was MPI_PROC_NULL, we have claimed the mutex */ + *success = (tail == nil); + + debug_print("%2d: TRYLOCK - %s\n", rank, (*success) ? "Success" : "Non-success"); + + return MPI_SUCCESS; +} + + +/** Unlock a mutex. + * + * @param[in] hdl Handle to the mutex + * @return MPI status + */ +int MCS_Mutex_unlock(MCS_Mutex hdl) +{ + int rank, nproc, next; + + MPI_Comm_rank(hdl->comm, &rank); + MPI_Comm_size(hdl->comm, &nproc); + + MPI_Win_sync(hdl->window); + + /* Read my next pointer. FOP is used since another process may write to + * this location concurrent with this read. */ + MPI_Fetch_and_op(NULL, &next, MPI_INT, rank, MCS_MTX_ELEM_DISP, MPI_NO_OP, + hdl->window); + MPI_Win_flush(rank, hdl->window); + + if ( next == MPI_PROC_NULL) { + int tail; + int nil = MPI_PROC_NULL; + + /* Check if we are the at the tail of the lock queue. If so, we're + * done. If not, we need to send notification. */ + MPI_Compare_and_swap(&nil, &rank, &tail, MPI_INT, hdl->tail_rank, + MCS_MTX_TAIL_DISP, hdl->window); + MPI_Win_flush(hdl->tail_rank, hdl->window); + + if (tail != rank) { + debug_print("%2d: UNLOCK - waiting for next pointer (tail = %d)\n", rank, tail); + assert(tail >= 0 && tail < nproc); + + for (;;) { + int flag; + + MPI_Fetch_and_op(NULL, &next, MPI_INT, rank, MCS_MTX_ELEM_DISP, + MPI_NO_OP, hdl->window); + + MPI_Win_flush(rank, hdl->window); + if (next != MPI_PROC_NULL) break; + + MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, + MPI_STATUS_IGNORE); + } + } + } + + /* Notify the next waiting process */ + if (next != MPI_PROC_NULL) { + debug_print("%2d: UNLOCK - notifying %d\n", rank, next); + MPI_Send(NULL, 0, MPI_BYTE, next, MCS_MUTEX_TAG, hdl->comm); + } + + debug_print("%2d: UNLOCK - lock released\n", rank); + + return MPI_SUCCESS; +}