else {
/* Check that the group was created (and that any errors were
caught) */
- MPI_Group_size( group_array[i], &group_size );
+ rc = MPI_Group_size( group_array[i], &group_size );
if (group_size != size) {
fprintf( stderr, "Group number %d not correct (size = %d)\n",
i, size );
#include <math.h>
#include "mpi.h"
-/* Make sure datatype creation is independent of data size */
+/* Make sure datatype creation is independent of data size
+ Note, however, that there is no guarantee or expectation
+ that the time would be constant. In particular, some
+ optimizations might take more time than others.
+
+ The real goal of this is to ensure that the time to create
+ a datatype doesn't increase strongly with the number of elements
+ within the datatype, particularly for these datatypes that are
+ quite simple patterns.
+ */
#define SKIP 4
#define NUM_SIZES 16
-#define FRACTION 0.2
+#define FRACTION 1.0
/* Don't make the number of loops too high; we create so many
* datatypes before trying to free them */
{
MPI_Datatype column[LOOPS], xpose[LOOPS];
double t[NUM_SIZES], ttmp, tmean;
+ double tMeanLower, tMeanHigher;
int size;
int i, j, errs = 0, nrows, ncols;
MPI_Init(&argc, &argv);
tmean = 0;
- size = 1;
- for (i = 0; i < NUM_SIZES + SKIP; i++) {
+ size = 1;
+ for (i = -SKIP; i < NUM_SIZES; i++) {
nrows = ncols = size;
ttmp = MPI_Wtime();
MPI_Type_commit(&xpose[j]);
}
- if (i >= SKIP) {
- t[i - SKIP] = MPI_Wtime() - ttmp;
- tmean += t[i - SKIP];
+ if (i >= 0) {
+ t[i] = MPI_Wtime() - ttmp;
+ if (t[i] < 100 * MPI_Wtick()) {
+ /* Time is too inaccurate to use. Set to zero.
+ Consider increasing the LOOPS value to make this
+ time large enough */
+ t[i] = 0;
+ }
+ tmean += t[i];
}
for (j = 0; j < LOOPS; j++) {
MPI_Type_free(&column[j]);
}
- if (i >= SKIP)
+ if (i >= 0)
size *= 2;
}
tmean /= NUM_SIZES;
- /* Now, analyze the times to see that they are nearly independent
- * of size */
- for (i = 0; i < NUM_SIZES; i++) {
- /* The difference between the value and the mean is more than
- * a "FRACTION" of mean. */
- if (fabs(t[i] - tmean) > (FRACTION * tmean))
- errs++;
- }
+ /* Now, analyze the times to see that they do not grow too fast
+ as a function of size. As that is a vague criteria, we do the
+ following as a simple test:
+ Compute the mean of the first half and the second half of the
+ data
+ Compare the two means
+ If the mean of the second half is more than FRACTION times the
+ mean of the first half, then the time may be growing too fast.
+ */
+ tMeanLower = tMeanHigher = 0;
+ for (i=0; i<NUM_SIZES/2; i++)
+ tMeanLower += t[i];
+ tMeanLower /= (NUM_SIZES/2);
+ for (i=NUM_SIZES/2; i<NUM_SIZES; i++)
+ tMeanHigher += t[i];
+ tMeanHigher /= (NUM_SIZES - NUM_SIZES/2);
+ /* A large value (even 1 or greater) is a good choice for
+ FRACTION here - the goal is to detect significant growth in
+ execution time as the size increases, and there is no MPI
+ standard requirement here to meet.
+
+ If the times were too small, then the test also passes - the
+ goal is to find implementation problems that lead to excessive
+ time in these routines.
+ */
+ if (tMeanLower > 0 && tMeanHigher > (1 + FRACTION) * tMeanLower) errs++;
if (errs) {
fprintf(stderr, "too much difference in performance: ");
add_executable(anyall anyall.c)
add_executable(bottom bottom.c)
+# add_executable(big_count_status big_count_status.c)
# add_executable(bsend1 bsend1.c)
# add_executable(bsend2 bsend2.c)
# add_executable(bsend3 bsend3.c)
add_executable(inactivereq inactivereq.c)
add_executable(isendself isendself.c)
add_executable(isendselfprobe isendselfprobe.c)
+ add_executable(issendselfcancel issendselfcancel.c)
# add_executable(large_message large_message.c)
add_executable(mprobe mprobe.c)
add_executable(pingping pingping.c)
add_executable(waittestnull waittestnull.c)
target_link_libraries(anyall simgrid mtest_c)
+# target_link_libraries(big_count_status simgrid mtest_c)
target_link_libraries(bottom simgrid mtest_c)
# target_link_libraries(bsend1 simgrid mtest_c)
# target_link_libraries(bsend2 simgrid mtest_c)
target_link_libraries(inactivereq simgrid mtest_c)
target_link_libraries(isendself simgrid mtest_c)
target_link_libraries(isendselfprobe simgrid mtest_c)
+ target_link_libraries(issendselfcancel simgrid mtest_c)
# target_link_libraries(large_message simgrid mtest_c)
target_link_libraries(mprobe simgrid mtest_c)
target_link_libraries(pingping simgrid mtest_c)
set(examples_src
${examples_src}
${CMAKE_CURRENT_SOURCE_DIR}/anyall.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/big_count_status.c
${CMAKE_CURRENT_SOURCE_DIR}/bottom.c
${CMAKE_CURRENT_SOURCE_DIR}/bsend1.c
${CMAKE_CURRENT_SOURCE_DIR}/bsend2.c
${CMAKE_CURRENT_SOURCE_DIR}/inactivereq.c
${CMAKE_CURRENT_SOURCE_DIR}/isendself.c
${CMAKE_CURRENT_SOURCE_DIR}/isendselfprobe.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/issendselfcancel.c
${CMAKE_CURRENT_SOURCE_DIR}/large_message.c
${CMAKE_CURRENT_SOURCE_DIR}/mprobe.c
${CMAKE_CURRENT_SOURCE_DIR}/pingping.c
--- /dev/null
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ * (C) 2013 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include <mpi.h>
+#include <assert.h>
+#include <stdio.h>
+
+int test_count(MPI_Count count)
+{
+ MPI_Status stat;
+ int cancelled, cancelled2;
+ MPI_Count bcount, bcount2;
+ int nerrs = 0;
+
+ bcount = count;
+ cancelled = 0;
+ MPI_Status_set_cancelled(&stat, cancelled);
+ MPI_Status_set_elements_x(&stat, MPI_BYTE, bcount);
+ MPI_Get_elements_x(&stat, MPI_BYTE, &bcount2);
+ MPI_Test_cancelled(&stat, &cancelled2);
+ if (bcount != bcount2) {
+ fprintf(stderr, "Count Error: expected %lx, got %lx\n", bcount, bcount2);
+ nerrs++;
+ }
+ if (cancelled != cancelled2) {
+ fprintf(stderr, "Cancelled Error: expected %d, got %d\n", cancelled, cancelled2);
+ nerrs++;
+ }
+ return nerrs;
+}
+
+int main(int argc, char **argv)
+{
+ int nerrors = 0;
+
+ MPI_Init(&argc, &argv);
+ /* baseline: this tiny value should pose no problems */
+ nerrors += test_count(60);
+ /* one with no next-to-high-bits set */
+ nerrors += test_count(0x3654321f71234567);
+ /* masking after shift can help the count_high, but count_low is still
+ * wrong */
+ nerrors += test_count(0x7654321f71234567);
+ /* original problematic count reported by Artem Yalozo */
+ nerrors += test_count(0x7654321ff1234567);
+
+ if (nerrors != 0) {
+ fprintf(stderr, "found %d errors\n", nerrors);
+ }
+ else {
+ printf(" No Errors\n");
+ }
+ MPI_Finalize();
+ return 0;
+}
}
/* Initiate the bsends */
- MPI_Bsend( msg1, msgsize, MPI_CHAR, dest, 0, comm );
- MPI_Bsend( msg2, msgsize, MPI_CHAR, dest, 0, comm );
- MPI_Bsend( msg3, msgsize, MPI_CHAR, dest, 0, comm );
+ MPI_Bsend( msg1, msgsize, MPI_UNSIGNED_CHAR, dest, 0, comm );
+ MPI_Bsend( msg2, msgsize, MPI_UNSIGNED_CHAR, dest, 0, comm );
+ MPI_Bsend( msg3, msgsize, MPI_UNSIGNED_CHAR, dest, 0, comm );
/* Synchronize with our partner */
- MPI_Sendrecv( 0, 0, MPI_CHAR, dest, 10,
- 0, 0, MPI_CHAR, dest, 10, comm, MPI_STATUS_IGNORE );
+ MPI_Sendrecv( NULL, 0, MPI_UNSIGNED_CHAR, dest, 10,
+ NULL, 0, MPI_UNSIGNED_CHAR, dest, 10, comm, MPI_STATUS_IGNORE );
/* Detach the buffers. There should be pending operations */
MPI_Buffer_detach ( &bufp, &outsize );
}
/* Wait for the synchronize */
- MPI_Sendrecv( 0, 0, MPI_CHAR, source, 10,
- 0, 0, MPI_CHAR, source, 10, comm, MPI_STATUS_IGNORE );
+ MPI_Sendrecv( NULL, 0, MPI_UNSIGNED_CHAR, source, 10,
+ NULL, 0, MPI_UNSIGNED_CHAR, source, 10, comm, MPI_STATUS_IGNORE );
/* Wait 2 seconds */
tstart = MPI_Wtime();
while (MPI_Wtime() - tstart < 2.0) ;
/* Now receive the messages */
- MPI_Recv( msg1, msgsize, MPI_CHAR, source, 0, comm, &status1 );
- MPI_Recv( msg2, msgsize, MPI_CHAR, source, 0, comm, &status2 );
- MPI_Recv( msg3, msgsize, MPI_CHAR, source, 0, comm, &status3 );
+ MPI_Recv( msg1, msgsize, MPI_UNSIGNED_CHAR, source, 0, comm, &status1 );
+ MPI_Recv( msg2, msgsize, MPI_UNSIGNED_CHAR, source, 0, comm, &status2 );
+ MPI_Recv( msg3, msgsize, MPI_UNSIGNED_CHAR, source, 0, comm, &status3 );
/* Check that we have the correct data */
for (i=0; i<msgsize; i++) {
--- /dev/null
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ * (C) 2014 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "mpi.h"
+
+int main(int argc, char **argv)
+{
+ MPI_Request req;
+ MPI_Status status;
+
+ MPI_Init(&argc, &argv);
+
+ MPI_Issend(NULL, 0, MPI_BYTE, 0, 123, MPI_COMM_SELF, &req);
+
+ MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_SELF, &status);
+ assert(status.MPI_SOURCE == 0);
+ assert(status.MPI_TAG == 123);
+
+ MPI_Cancel(&req);
+ assert(req != MPI_REQUEST_NULL);
+
+ MPI_Request_free(&req);
+
+ MPI_Irecv(NULL, 0, MPI_BYTE, 0, 123, MPI_COMM_SELF, &req);
+ MPI_Cancel(&req);
+ MPI_Wait(&req, &status);
+
+ printf(" No Errors\n");
+
+ MPI_Finalize();
+ return 0;
+}
int i;
int count, size;
int self, npes;
+ double secs;
MPI_Request request[MAXPES];
MPI_Status status;
}
for (size = 1; size <= MYBUFSIZE ; size += size) {
+ secs = -MPI_Wtime ();
for (count = 0; count < NUM_RUNS; count++) {
MPI_Barrier (MPI_COMM_WORLD);
}
MPI_Barrier (MPI_COMM_WORLD);
+ secs += MPI_Wtime ();
if (self == 0) {
+ secs = secs / (double) NUM_RUNS;
MTestPrintfMsg( 1, "length = %d ints\n", size );
}
}
int *buf[MAX_NMSGS];
for (i=0; i<nmsg; i++) {
- buf[i] = (int *)malloc( msgSize );
+ buf[i] = (int *)malloc( msgSize * sizeof(int) );
if (!buf[i]) {
fprintf( stderr, "Unable to allocate %d bytes\n",
msgSize );
MPI_STATUS_IGNORE );
/* Try to fill up the outgoing message buffers */
for (i=0; i<nmsg; i++) {
- MPI_Isend( buf[i], msgSize, MPI_CHAR, partner, testnum, comm,
+ MPI_Isend( buf[i], msgSize, MPI_INT, partner, testnum, comm,
&r[i] );
}
for (i=0; i<nmsg; i++) {
- MPI_Recv( buf[i], msgSize, MPI_CHAR, partner, testnum, comm,
+ MPI_Recv( buf[i], msgSize, MPI_INT, partner, testnum, comm,
MPI_STATUS_IGNORE );
}
MPI_Waitall( nmsg, r, MPI_STATUSES_IGNORE );
/* Try to fill up the outgoing message buffers */
tsend = MPI_Wtime();
for (i=0; i<nmsg; i++) {
- MPI_Isend( buf[i], msgSize, MPI_CHAR, partner, testnum, comm,
+ MPI_Isend( buf[i], msgSize, MPI_INT, partner, testnum, comm,
&r[i] );
}
tsend = MPI_Wtime() - tsend;
for (i=0; i<nmsg; i++) {
- MPI_Recv( buf[i], msgSize, MPI_CHAR, partner, testnum, comm,
+ MPI_Recv( buf[i], msgSize, MPI_INT, partner, testnum, comm,
MPI_STATUS_IGNORE );
}
MPI_Waitall( nmsg, r, MPI_STATUSES_IGNORE );
#bsendalign 2
#bsendpending 2
isendself 1
+issendselfcancel 1
#needs MPI_Buffer_attach, MPI_Bsend, MPI_Buffer_detach
#bsendfrag 2
#needs MPI_Intercomm_create
# perhaps disable in the release tarball
#large_message 3
mprobe 2 mpiversion=3.0
+big_count_status 1 mpiversion=3.0
add_executable(accfence2_am accfence2_am.c)
add_executable(accfence2 accfence2.c)
# add_executable(accpscw1 accpscw1.c)
+# add_executable(acc-loc acc-loc.c)
# add_executable(adlb_mimic1 adlb_mimic1.c)
add_executable(allocmem allocmem.c)
# add_executable(attrorderwin attrorderwin.c)
+# add_executable(badrma badrma.c)
# add_executable(baseattrwin baseattrwin.c)
# add_executable(compare_and_swap compare_and_swap.c)
# add_executable(contention_put contention_put.c)
# add_executable(contention_putget contention_putget.c)
# add_executable(contig_displ contig_displ.c)
add_executable(epochtest epochtest.c)
+# add_executable(fence_shm fence_shm.c)
# add_executable(fetchandadd_am fetchandadd_am.c)
# add_executable(fetchandadd fetchandadd.c)
# add_executable(fetchandadd_tree_am fetchandadd_tree_am.c)
# add_executable(locknull locknull.c)
# add_executable(lockopts lockopts.c)
# add_executable(manyrma2 manyrma2.c)
+ add_executable(manyrma3 manyrma3.c)
# add_executable(mcs-mutex mcs-mutex.c)
# add_executable(mixedsync mixedsync.c)
# add_executable(mutex_bench mutex_bench.c)
# add_executable(reqops reqops.c)
# add_executable(rmanull rmanull.c)
# add_executable(rmazero rmazero.c)
+# add_executable(rma-contig rma-contig.c)
# add_executable(selfrma selfrma.c)
# add_executable(strided_acc_indexed strided_acc_indexed.c)
# add_executable(strided_acc_onelock strided_acc_onelock.c)
# add_executable(win_shared win_shared.c)
# add_executable(win_shared_noncontig win_shared_noncontig.c)
# add_executable(win_shared_noncontig_put win_shared_noncontig_put.c)
+# add_executable(win_large_shm win_large_shm.c)
+# add_executable(win_zero win_zero.c)
# add_executable(wintest wintest.c)
target_link_libraries(accfence2_am simgrid mtest_c)
target_link_libraries(accfence2 simgrid mtest_c)
# target_link_libraries(accpscw1 simgrid mtest_c)
+# target_link_libraries(acc-loc simgrid mtest_c)
# target_link_libraries(adlb_mimic1 simgrid mtest_c)
target_link_libraries(allocmem simgrid mtest_c)
# target_link_libraries(attrorderwin simgrid mtest_c)
+# target_link_libraries(badrma simgrid mtest_c)
# target_link_libraries(baseattrwin simgrid mtest_c)
# target_link_libraries(compare_and_swap simgrid mtest_c)
# target_link_libraries(contention_put simgrid mtest_c)
# target_link_libraries(contention_putget simgrid mtest_c)
# target_link_libraries(contig_displ simgrid mtest_c)
target_link_libraries(epochtest simgrid mtest_c)
+# target_link_libraries(fence_shm simgrid mtest_c)
# target_link_libraries(fetchandadd_am simgrid mtest_c)
# target_link_libraries(fetchandadd simgrid mtest_c)
# target_link_libraries(fetchandadd_tree_am simgrid mtest_c)
# target_link_libraries(locknull simgrid mtest_c)
# target_link_libraries(lockopts simgrid mtest_c)
# target_link_libraries(manyrma2 simgrid mtest_c)
+ target_link_libraries(manyrma3 simgrid mtest_c)
# target_link_libraries(mcs-mutex simgrid mtest_c)
# target_link_libraries(mixedsync simgrid mtest_c)
# target_link_libraries(mutex_bench simgrid mtest_c)
# target_link_libraries(putpscw1 simgrid mtest_c)
# target_link_libraries(req_example simgrid mtest_c)
# target_link_libraries(reqops simgrid mtest_c)
+# target_link_libraries(rma-contig simgrid mtest_c)
# target_link_libraries(rmanull simgrid mtest_c)
# target_link_libraries(rmazero simgrid mtest_c)
# target_link_libraries(selfrma simgrid mtest_c)
# target_link_libraries(win_shared simgrid mtest_c)
# target_link_libraries(win_shared_noncontig simgrid mtest_c)
# target_link_libraries(win_shared_noncontig_put simgrid mtest_c)
+# target_link_libraries(win_large_shm simgrid mtest_c)
+# target_link_libraries(win_zero simgrid mtest_c)
# target_link_libraries(wintest simgrid mtest_c)
set_target_properties(accfence1 PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
set_target_properties(accfence2_am PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
set_target_properties(accfence2 PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
+# set_target_properties(acc-loc PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(accpscw1 PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(adlb_mimic1 PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
set_target_properties(allocmem PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(attrorderwin PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
+# set_target_properties(badrma PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(baseattrwin PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(compare_and_swap PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(contention_put PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(contention_putget PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(contig_displ PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
set_target_properties(epochtest PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
+# set_target_properties(fence_shm PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(fetchandadd_am PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(fetchandadd PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(fetchandadd_tree_am PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(locknull PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(lockopts PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(manyrma2 PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
+ set_target_properties(manyrma3 PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(mcs-mutex PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(mixedsync PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(mutex_bench PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(reqops PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(rmanull PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(rmazero PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
+# set_target_properties(rma-contig PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(selfrma PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(strided_acc_indexed PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(strided_acc_onelock PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(win_shared PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(win_shared_noncontig PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(win_shared_noncontig_put PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
+# set_target_properties(win_large_shm PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
+# set_target_properties(win_zero PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
# set_target_properties(wintest PROPERTIES COMPILE_FLAGS "${MPICH_FLAGS}")
endif()
${CMAKE_CURRENT_SOURCE_DIR}/accfence1.c
${CMAKE_CURRENT_SOURCE_DIR}/accfence2_am.c
${CMAKE_CURRENT_SOURCE_DIR}/accfence2.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/acc-loc.c
${CMAKE_CURRENT_SOURCE_DIR}/accpscw1.c
${CMAKE_CURRENT_SOURCE_DIR}/adlb_mimic1.c
${CMAKE_CURRENT_SOURCE_DIR}/allocmem.c
${CMAKE_CURRENT_SOURCE_DIR}/attrorderwin.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/badrma.c
${CMAKE_CURRENT_SOURCE_DIR}/baseattrwin.c
${CMAKE_CURRENT_SOURCE_DIR}/compare_and_swap.c
${CMAKE_CURRENT_SOURCE_DIR}/contention_put.c
${CMAKE_CURRENT_SOURCE_DIR}/contention_putget.c
${CMAKE_CURRENT_SOURCE_DIR}/contig_displ.c
${CMAKE_CURRENT_SOURCE_DIR}/epochtest.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/fence_shm.c
${CMAKE_CURRENT_SOURCE_DIR}/fetchandadd_am.c
${CMAKE_CURRENT_SOURCE_DIR}/fetchandadd.c
${CMAKE_CURRENT_SOURCE_DIR}/fetchandadd_tree_am.c
${CMAKE_CURRENT_SOURCE_DIR}/locknull.c
${CMAKE_CURRENT_SOURCE_DIR}/lockopts.c
${CMAKE_CURRENT_SOURCE_DIR}/manyrma2.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/manyrma3.c
${CMAKE_CURRENT_SOURCE_DIR}/mcs-mutex.c
${CMAKE_CURRENT_SOURCE_DIR}/mixedsync.c
${CMAKE_CURRENT_SOURCE_DIR}/mutex_bench.c
${CMAKE_CURRENT_SOURCE_DIR}/reqops.c
${CMAKE_CURRENT_SOURCE_DIR}/rmanull.c
${CMAKE_CURRENT_SOURCE_DIR}/rmazero.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/rma-contig.c
${CMAKE_CURRENT_SOURCE_DIR}/selfrma.c
${CMAKE_CURRENT_SOURCE_DIR}/strided_acc_indexed.c
${CMAKE_CURRENT_SOURCE_DIR}/strided_acc_onelock.c
${CMAKE_CURRENT_SOURCE_DIR}/win_dynamic_acc.c
${CMAKE_CURRENT_SOURCE_DIR}/win_flavors.c
${CMAKE_CURRENT_SOURCE_DIR}/win_info.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/win_large_shm.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/win_zero.c
${CMAKE_CURRENT_SOURCE_DIR}/winname.c
${CMAKE_CURRENT_SOURCE_DIR}/win_shared.c
${CMAKE_CURRENT_SOURCE_DIR}/win_shared_noncontig.c
--- /dev/null
+/*
+ * (C) 2006 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ *
+ * Portions of this code were written by Intel Corporation.
+ * Copyright (C) 2011-2012 Intel Corporation. Intel provides this material
+ * to Argonne National Laboratory subject to Software Grant and Corporate
+ * Contributor License Agreement dated February 8, 2012.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <mpi.h>
+#include "mpitest.h"
+
+typedef struct {
+ int val;
+ int loc;
+} twoint_t;
+
+static int errors = 0;
+
+int main(int argc, char **argv) {
+ int me, nproc;
+ twoint_t *data = NULL;
+ twoint_t mine;
+ MPI_Win win;
+
+ MTest_Init(&argc, &argv);
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &me);
+ MPI_Comm_size(MPI_COMM_WORLD, &nproc);
+
+ if (me == 0) {
+ MPI_Alloc_mem(sizeof(twoint_t), MPI_INFO_NULL, &data);
+ }
+
+ MPI_Win_create(data, me == 0 ? sizeof(twoint_t) : 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win);
+ MPI_Win_fence(MPI_MODE_NOPRECEDE, win);
+
+ /* All processes perform MAXLOC and MINLOC operations on a 2INT on rank 0.
+ * The loc is the origin process' rank, and the value is (nproc-me). In
+ * the case of MAXLOC, rank 0 should win and in the case of MINLOC, rank
+ * nproc-1 should win.
+ */
+
+ /** Test MAXLOC **/
+
+ if (me == 0) {
+ data->val = 0;
+ data->loc = -1;
+ }
+ MPI_Win_fence(0, win);
+
+ mine.loc = me;
+ mine.val = nproc - me;
+ MPI_Accumulate(&mine, 1, MPI_2INT, 0, 0, 1, MPI_2INT, MPI_MAXLOC, win);
+ MPI_Win_fence(0, win);
+
+ if (me == 0 && (data->loc != 0 || data->val != nproc)) {
+ errors++;
+ printf("Expected: { loc = %d, val = %d } Actual: { loc = %d, val = %d }\n",
+ 0, nproc, data->loc, data->val);
+ }
+
+ /** Test MINLOC **/
+
+ if (me == 0) {
+ data->val = nproc;
+ data->loc = -1;
+ }
+ MPI_Win_fence(0, win);
+
+ mine.loc = me;
+ mine.val = nproc - me;
+ MPI_Accumulate(&mine, 1, MPI_2INT, 0, 0, 1, MPI_2INT, MPI_MINLOC, win);
+ MPI_Win_fence(0, win);
+
+ if (me == 0 && (data->loc != nproc-1 || data->val != 1)) {
+ errors++;
+ printf("Expected: { loc = %d, val = %d } Actual: { loc = %d, val = %d }\n",
+ nproc-1, 1, data->loc, data->val);
+ }
+
+ /* All processes perform MAXLOC and MINLOC operations on a 2INT on rank 0.
+ * The loc is the origin process' rank, and the value is 1. In both cases,
+ * rank 0 should win because the values are equal and it has the lowest
+ * loc.
+ */
+
+ /** Test MAXLOC **/
+
+ if (me == 0) {
+ data->val = 0;
+ data->loc = -1;
+ }
+ MPI_Win_fence(MPI_MODE_NOSUCCEED, win);
+
+ mine.loc = me;
+ mine.val = 1;
+
+ MPI_Win_lock(MPI_LOCK_SHARED, 0, MPI_MODE_NOCHECK, win);
+ MPI_Accumulate(&mine, 1, MPI_2INT, 0, 0, 1, MPI_2INT, MPI_MAXLOC, win);
+ MPI_Win_unlock(0, win);
+
+ MPI_Barrier(MPI_COMM_WORLD);
+
+ if (me == 0 && (data->loc != 0 || data->val != 1)) {
+ errors++;
+ printf("Expected: { loc = %d, val = %d } Actual: { loc = %d, val = %d }\n",
+ 0, 1, data->loc, data->val);
+ }
+ MPI_Win_fence(MPI_MODE_NOPRECEDE, win);
+
+ /** Test MINLOC **/
+
+ if (me == 0) {
+ data->val = nproc;
+ data->loc = -1;
+ }
+ MPI_Win_fence(MPI_MODE_NOSUCCEED, win);
+
+ mine.loc = me;
+ mine.val = 1;
+
+ MPI_Win_lock(MPI_LOCK_SHARED, 0, MPI_MODE_NOCHECK, win);
+ MPI_Accumulate(&mine, 1, MPI_2INT, 0, 0, 1, MPI_2INT, MPI_MINLOC, win);
+ MPI_Win_unlock(0, win);
+
+ MPI_Barrier(MPI_COMM_WORLD);
+
+ if (me == 0 && (data->loc != 0 || data->val != 1)) {
+ errors++;
+ printf("Expected: { loc = %d, val = %d } Actual: { loc = %d, val = %d }\n",
+ 0, 1, data->loc, data->val);
+ }
+
+ MPI_Win_free(&win);
+
+ if (me == 0) {
+ MPI_Free_mem(data);
+ }
+
+ MTest_Finalize(errors);
+ MPI_Finalize();
+ return 0;
+}
--- /dev/null
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ * (C) 2001 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "mpi.h"
+#include "stdio.h"
+
+#define SIZE 100
+
+MPI_Win win;
+int win_buf[SIZE], origin_buf[SIZE], result_buf[SIZE];
+
+int do_test(int origin_count, MPI_Datatype origin_type, int result_count,
+ MPI_Datatype result_type, int target_count, MPI_Datatype target_type)
+{
+ int errs = 0, ret, origin_type_size, result_type_size;
+
+ ret = MPI_Put(origin_buf, origin_count, origin_type, 1, 0, target_count, target_type, win);
+ if (ret)
+ errs++;
+
+ ret = MPI_Get(origin_buf, origin_count, origin_type, 1, 0, target_count, target_type, win);
+ if (ret)
+ errs++;
+
+ ret = MPI_Accumulate(origin_buf, origin_count, origin_type, 1, 0, target_count,
+ target_type, MPI_SUM, win);
+ if (ret)
+ errs++;
+
+ ret = MPI_Get_accumulate(origin_buf, origin_count, origin_type, result_buf, result_count,
+ result_type, 1, 0, target_count, target_type, MPI_SUM, win);
+ if (ret)
+ errs++;
+
+ MPI_Type_size(origin_type, &origin_type_size);
+ MPI_Type_size(result_type, &result_type_size);
+
+ if (origin_count == 0 || origin_type_size == 0) {
+ ret = MPI_Put(NULL, origin_count, origin_type, 1, 0, target_count, target_type, win);
+ if (ret)
+ errs++;
+
+ ret = MPI_Get(NULL, origin_count, origin_type, 1, 0, target_count, target_type, win);
+ if (ret)
+ errs++;
+
+ ret = MPI_Accumulate(NULL, origin_count, origin_type, 1, 0, target_count, target_type,
+ MPI_SUM, win);
+ if (ret)
+ errs++;
+
+ ret = MPI_Get_accumulate(NULL, origin_count, origin_type, result_buf, result_count,
+ result_type, 1, 0, target_count, target_type, MPI_SUM, win);
+ if (ret)
+ errs++;
+
+ if (result_count == 0 || result_type_size == 0) {
+ ret = MPI_Get_accumulate(NULL, origin_count, origin_type, NULL, result_count,
+ result_type, 1, 0, target_count, target_type, MPI_SUM, win);
+ if (ret)
+ errs++;
+ }
+ }
+
+ return errs;
+}
+
+int main(int argc, char *argv[])
+{
+ int rank, nprocs, i, j, k;
+ int errs = 0;
+ MPI_Datatype types[4];
+
+ MPI_Init(&argc, &argv);
+ MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+ if (nprocs < 2) {
+ printf("Run this program with 2 or more processes\n");
+ MPI_Abort(MPI_COMM_WORLD, 1);
+ }
+
+ /* types[0] is of zero size. Everything else is non-zero size */
+ MPI_Type_contiguous(0, MPI_INT, &types[0]);
+ MPI_Type_commit(&types[0]);
+
+ MPI_Type_contiguous(1, MPI_INT, &types[1]);
+ MPI_Type_commit(&types[1]);
+
+ types[2] = MPI_INT;
+ types[3] = MPI_DOUBLE;
+
+ MPI_Win_create(win_buf, SIZE * sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
+
+ MPI_Win_fence(0, win);
+
+ if (rank == 0) {
+ /* zero-count */
+ for (i = 0; i < 4; i++)
+ for (j = 0; j < 4; j++)
+ for (k = 0; k < 4; k++)
+ do_test(0, types[i], 0, types[j], 0, types[k]);
+
+ /* single zero-size datatype, but non-zero count */
+ for (i = 1; i < 4; i++) {
+ for (j = 1; j < 4; j++) {
+ do_test(1, types[0], 0, types[i], 0, types[j]);
+ do_test(0, types[i], 1, types[0], 0, types[j]);
+ do_test(0, types[i], 0, types[j], 1, types[0]);
+ }
+ }
+
+ /* two zero-size datatypes, but non-zero count */
+ for (i = 1; i < 4; i++) {
+ do_test(1, types[0], 1, types[0], 0, types[i]);
+ do_test(1, types[0], 0, types[i], 1, types[0]);
+ do_test(0, types[i], 1, types[0], 1, types[0]);
+
+ do_test(1, types[0], 2, types[0], 0, types[i]);
+ do_test(2, types[0], 1, types[0], 0, types[i]);
+
+ do_test(1, types[0], 0, types[i], 2, types[0]);
+ do_test(2, types[0], 0, types[i], 1, types[0]);
+
+ do_test(0, types[i], 1, types[0], 2, types[0]);
+ do_test(0, types[i], 2, types[0], 1, types[0]);
+ }
+
+ /* three zero-size datatypes, but non-zero count */
+ do_test(1, types[0], 1, types[0], 1, types[0]);
+ do_test(1, types[0], 1, types[0], 2, types[0]);
+ do_test(1, types[0], 2, types[0], 1, types[0]);
+ do_test(1, types[0], 2, types[0], 2, types[0]);
+ do_test(2, types[0], 1, types[0], 1, types[0]);
+ do_test(2, types[0], 1, types[0], 2, types[0]);
+ do_test(2, types[0], 2, types[0], 1, types[0]);
+ }
+ MPI_Win_fence(0, win);
+
+ MPI_Win_free(&win);
+ MPI_Type_free(&types[0]);
+ MPI_Type_free(&types[1]);
+
+ if (!errs && !rank)
+ printf(" No Errors\n");
+
+ MPI_Finalize();
+
+ return 0;
+}
--- /dev/null
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ * (C) 2014 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include <stdio.h>
+#include "mpi.h"
+#include "mpitest.h"
+
+#define ELEM_PER_PROC 1
+
+static int errors = 0;
+
+int main(int argc, char *argv[])
+{
+ int rank, nprocs;
+ int shm_rank, shm_nprocs;
+ MPI_Comm shm_comm;
+ MPI_Win shm_win;
+ int *my_base;
+ int one = 1;
+ int result_data;
+
+ MTest_Init(&argc, &argv);
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+
+ /* run with two processes. */
+
+ MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &shm_comm);
+
+ MPI_Comm_rank(shm_comm, &shm_rank);
+ MPI_Comm_size(shm_comm, &shm_nprocs);
+
+ if (shm_nprocs >= 2) {
+ MPI_Win_allocate_shared(sizeof(int) * ELEM_PER_PROC, sizeof(int), MPI_INFO_NULL,
+ shm_comm, &my_base, &shm_win);
+
+ /* Test for FENCE with no asserts. */
+
+ if (shm_rank == 1) {
+ *my_base = 0;
+
+ MPI_Win_fence(0, shm_win);
+ MPI_Win_fence(0, shm_win);
+
+ if (my_base[0] != one) {
+ errors++;
+ printf("Expected: my_base[0] = %d Actual: my_base[0] = %d\n", one, my_base[0]);
+ }
+ }
+
+ if (shm_rank == 0) {
+ MPI_Win_fence(0, shm_win);
+ MPI_Put(&one, 1, MPI_INT, 1, 0, 1, MPI_INT, shm_win);
+ MPI_Win_fence(0, shm_win);
+ }
+
+ /* Test for FENCE with assert MPI_MODE_NOPRECEDE. */
+
+ if (shm_rank == 1) {
+ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 1, 0, shm_win);
+ MPI_Put(&one, 1, MPI_INT, 1, 0, 1, MPI_INT, shm_win);
+ MPI_Win_unlock(1, shm_win);
+
+ MPI_Win_fence(MPI_MODE_NOPRECEDE, shm_win);
+ MPI_Win_fence(0, shm_win);
+ }
+
+ if (shm_rank == 0) {
+ result_data = 0;
+ MPI_Win_fence(MPI_MODE_NOPRECEDE, shm_win);
+ MPI_Get(&result_data, 1, MPI_INT, 1, 0, 1, MPI_INT, shm_win);
+ MPI_Win_fence(0, shm_win);
+
+ if (result_data != one) {
+ errors++;
+ printf("Expected: result_data = %d Actual: result_data = %d\n",
+ one, result_data);
+ }
+ }
+
+ MPI_Win_free(&shm_win);
+ }
+
+ MPI_Comm_free(&shm_comm);
+
+ MTest_Finalize(errors);
+ MPI_Finalize();
+ return 0;
+}
for (j = 0; j < nproc; j++) {
TYPE_C rank_cnv = (TYPE_C) rank;
MPI_Fetch_and_op(&rank_cnv, &res_ptr[j], TYPE_MPI, j, rank, MPI_SUM, win);
- res_ptr[j] = i*rank;
}
MPI_Win_fence(MPI_MODE_NOSUCCEED, win);
MPI_Barrier(MPI_COMM_WORLD);
for (j = 0; j < nproc; j++) {
TYPE_C rank_cnv = (TYPE_C) rank;
MPI_Fetch_and_op(&rank_cnv, &res_ptr[j], TYPE_MPI, j, rank, MPI_SUM, win);
- res_ptr[j] = i*rank;
}
MPI_Win_unlock_all(win);
MPI_Barrier(MPI_COMM_WORLD);
for (j = 0; j < nproc; j++) {
TYPE_C rank_cnv = (TYPE_C) rank;
MPI_Fetch_and_op(&rank_cnv, &res_ptr[j], TYPE_MPI, j, rank, MPI_SUM, win);
- res_ptr[j] = i*rank;
MPI_Win_flush(j, win);
}
MPI_Win_unlock_all(win);
/* This test is a simplification of the one in perf/manyrma.c that tests
for correct handling of the case where many RMA operations occur between
synchronization events.
- This is one of the ways that RMA may be used, and is used in the
+ This is one of the ways that RMA may be used, and is used in the
reference implementation of the graph500 benchmark.
*/
#include "mpi.h"
#include <string.h>
#define MAX_COUNT 65536*4/16
-#define MAX_RMA_SIZE 2 /* 16 in manyrma performance test */
+#define MAX_RMA_SIZE 2 /* 16 in manyrma performance test */
#define MAX_RUNS 10
+#define MAX_ITER_TIME 5.0 /* seconds */
-typedef enum { SYNC_NONE=0,
- SYNC_ALL=-1, SYNC_FENCE=1, SYNC_LOCK=2, SYNC_PSCW=4 } sync_t;
-typedef enum { RMA_NONE=0, RMA_ALL=-1, RMA_PUT=1, RMA_ACC=2, RMA_GET=4 } rma_t;
+typedef enum { SYNC_NONE = 0,
+ SYNC_ALL = -1, SYNC_FENCE = 1, SYNC_LOCK = 2, SYNC_PSCW = 4
+} sync_t;
+typedef enum { RMA_NONE = 0, RMA_ALL = -1, RMA_PUT = 1, RMA_ACC = 2, RMA_GET = 4 } rma_t;
/* Note GET not yet implemented */
/* By default, run only a subset of the available tests, to keep the
total runtime reasonably short. Command line arguments may be used
static int verbose = 0;
-void RunAccFence( MPI_Win win, int destRank, int cnt, int sz );
-void RunAccLock( MPI_Win win, int destRank, int cnt, int sz );
-void RunPutFence( MPI_Win win, int destRank, int cnt, int sz );
-void RunPutLock( MPI_Win win, int destRank, int cnt, int sz );
-void RunAccPSCW( MPI_Win win, int destRank, int cnt, int sz,
- MPI_Group exposureGroup, MPI_Group accessGroup );
-void RunPutPSCW( MPI_Win win, int destRank, int cnt, int sz,
- MPI_Group exposureGroup, MPI_Group accessGroup );
+void RunAccFence(MPI_Win win, int destRank, int cnt, int sz);
+void RunAccLock(MPI_Win win, int destRank, int cnt, int sz);
+void RunPutFence(MPI_Win win, int destRank, int cnt, int sz);
+void RunPutLock(MPI_Win win, int destRank, int cnt, int sz);
+void RunAccPSCW(MPI_Win win, int destRank, int cnt, int sz,
+ MPI_Group exposureGroup, MPI_Group accessGroup);
+void RunPutPSCW(MPI_Win win, int destRank, int cnt, int sz,
+ MPI_Group exposureGroup, MPI_Group accessGroup);
-int main( int argc, char *argv[] )
+int main(int argc, char *argv[])
{
- int arraysize, i, cnt, sz, maxCount=MAX_COUNT, *arraybuffer;
+ int arraysize, i, cnt, sz, maxCount = MAX_COUNT, *arraybuffer;
int wrank, wsize, destRank, srcRank;
MPI_Win win;
MPI_Group wgroup, accessGroup, exposureGroup;
- int maxSz = MAX_RMA_SIZE;
-
- MPI_Init( &argc, &argv );
-
- for (i=1; i<argc; i++) {
- if (strcmp( argv[i], "-put" ) == 0) {
- if (rmaChoice == RMA_ALL) rmaChoice = RMA_NONE;
- rmaChoice |= RMA_PUT;
- }
- else if (strcmp( argv[i], "-acc" ) == 0) {
- if (rmaChoice == RMA_ALL) rmaChoice = RMA_NONE;
- rmaChoice |= RMA_ACC;
- }
- else if (strcmp( argv[i], "-fence" ) == 0) {
- if (syncChoice == SYNC_ALL) syncChoice = SYNC_NONE;
- syncChoice |= SYNC_FENCE;
- }
- else if (strcmp( argv[i], "-lock" ) == 0) {
- if (syncChoice == SYNC_ALL) syncChoice = SYNC_NONE;
- syncChoice |= SYNC_LOCK;
- }
- else if (strcmp( argv[i], "-pscw" ) == 0) {
- if (syncChoice == SYNC_ALL) syncChoice = SYNC_NONE;
- syncChoice |= SYNC_PSCW;
- }
- else if (strcmp( argv[i], "-maxsz" ) == 0) {
- i++;
- maxSz = atoi( argv[i] );
- }
- else if (strcmp( argv[i], "-maxcount" ) == 0) {
- i++;
- maxCount = atoi( argv[i] );
- }
- else {
- fprintf( stderr, "Unrecognized argument %s\n", argv[i] );
- fprintf( stderr, "%s [ -put ] [ -acc ] [ -lock ] [ -fence ] [ -pscw ] [ -maxsz msgsize ]\n", argv[0] );
- MPI_Abort( MPI_COMM_WORLD, 1 );
- }
+ int maxSz = MAX_RMA_SIZE;
+ double start, end;
+
+ MPI_Init(&argc, &argv);
+
+ for (i = 1; i < argc; i++) {
+ if (strcmp(argv[i], "-put") == 0) {
+ if (rmaChoice == RMA_ALL)
+ rmaChoice = RMA_NONE;
+ rmaChoice |= RMA_PUT;
+ }
+ else if (strcmp(argv[i], "-acc") == 0) {
+ if (rmaChoice == RMA_ALL)
+ rmaChoice = RMA_NONE;
+ rmaChoice |= RMA_ACC;
+ }
+ else if (strcmp(argv[i], "-fence") == 0) {
+ if (syncChoice == SYNC_ALL)
+ syncChoice = SYNC_NONE;
+ syncChoice |= SYNC_FENCE;
+ }
+ else if (strcmp(argv[i], "-lock") == 0) {
+ if (syncChoice == SYNC_ALL)
+ syncChoice = SYNC_NONE;
+ syncChoice |= SYNC_LOCK;
+ }
+ else if (strcmp(argv[i], "-pscw") == 0) {
+ if (syncChoice == SYNC_ALL)
+ syncChoice = SYNC_NONE;
+ syncChoice |= SYNC_PSCW;
+ }
+ else if (strcmp(argv[i], "-maxsz") == 0) {
+ i++;
+ maxSz = atoi(argv[i]);
+ }
+ else if (strcmp(argv[i], "-maxcount") == 0) {
+ i++;
+ maxCount = atoi(argv[i]);
+ }
+ else {
+ fprintf(stderr, "Unrecognized argument %s\n", argv[i]);
+ fprintf(stderr,
+ "%s [ -put ] [ -acc ] [ -lock ] [ -fence ] [ -pscw ] [ -maxsz msgsize ]\n",
+ argv[0]);
+ MPI_Abort(MPI_COMM_WORLD, 1);
+ }
}
-
- MPI_Comm_rank( MPI_COMM_WORLD, &wrank );
- MPI_Comm_size( MPI_COMM_WORLD, &wsize );
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
+ MPI_Comm_size(MPI_COMM_WORLD, &wsize);
destRank = wrank + 1;
- while (destRank >= wsize) destRank = destRank - wsize;
+ while (destRank >= wsize)
+ destRank = destRank - wsize;
srcRank = wrank - 1;
- if (srcRank < 0) srcRank += wsize;
+ if (srcRank < 0)
+ srcRank += wsize;
/* Create groups for PSCW */
- MPI_Comm_group( MPI_COMM_WORLD, &wgroup );
- MPI_Group_incl( wgroup, 1, &destRank, &accessGroup );
- MPI_Group_incl( wgroup, 1, &srcRank, &exposureGroup );
- MPI_Group_free( &wgroup );
+ MPI_Comm_group(MPI_COMM_WORLD, &wgroup);
+ MPI_Group_incl(wgroup, 1, &destRank, &accessGroup);
+ MPI_Group_incl(wgroup, 1, &srcRank, &exposureGroup);
+ MPI_Group_free(&wgroup);
arraysize = maxSz * MAX_COUNT;
- arraybuffer = (int*)malloc( arraysize * sizeof(int) );
+ arraybuffer = (int *) malloc(arraysize * sizeof(int));
if (!arraybuffer) {
- fprintf( stderr, "Unable to allocate %d words\n", arraysize );
- MPI_Abort( MPI_COMM_WORLD, 1 );
+ fprintf(stderr, "Unable to allocate %d words\n", arraysize);
+ MPI_Abort(MPI_COMM_WORLD, 1);
}
- MPI_Win_create( arraybuffer, arraysize*sizeof(int), (int)sizeof(int),
- MPI_INFO_NULL, MPI_COMM_WORLD, &win );
+ MPI_Win_create(arraybuffer, arraysize * sizeof(int), (int) sizeof(int),
+ MPI_INFO_NULL, MPI_COMM_WORLD, &win);
if (maxCount > MAX_COUNT) {
- fprintf( stderr, "MaxCount must not exceed %d\n", MAX_COUNT );
- MPI_Abort( MPI_COMM_WORLD, 1 );
+ fprintf(stderr, "MaxCount must not exceed %d\n", MAX_COUNT);
+ MPI_Abort(MPI_COMM_WORLD, 1);
}
if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_ACC)) {
- for (sz=1; sz<=maxSz; sz = sz + sz) {
- if (wrank == 0 && verbose)
- printf( "Accumulate with fence, %d elements\n", sz );
- cnt = 1;
- while (cnt <= maxCount) {
- RunAccFence( win, destRank, cnt, sz );
- cnt = 2 * cnt;
- }
- }
+ for (sz = 1; sz <= maxSz; sz = sz + sz) {
+ if (wrank == 0 && verbose)
+ printf("Accumulate with fence, %d elements\n", sz);
+ for (cnt = 1; cnt <= maxCount; cnt *= 2) {
+ start = MPI_Wtime();
+ RunAccFence(win, destRank, cnt, sz);
+ end = MPI_Wtime();
+ if (end - start > MAX_ITER_TIME)
+ break;
+ }
+ }
}
if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_ACC)) {
- for (sz=1; sz<=maxSz; sz = sz + sz) {
- if (wrank == 0 && verbose)
- printf( "Accumulate with lock, %d elements\n", sz );
- cnt = 1;
- while (cnt <= maxCount) {
- RunAccLock( win, destRank, cnt, sz );
- cnt = 2 * cnt;
- }
- }
+ for (sz = 1; sz <= maxSz; sz = sz + sz) {
+ if (wrank == 0 && verbose)
+ printf("Accumulate with lock, %d elements\n", sz);
+ for (cnt = 1; cnt <= maxCount; cnt *= 2) {
+ start = MPI_Wtime();
+ RunAccLock(win, destRank, cnt, sz);
+ end = MPI_Wtime();
+ if (end - start > MAX_ITER_TIME)
+ break;
+ }
+ }
}
if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_PUT)) {
- for (sz=1; sz<=maxSz; sz = sz + sz) {
- if (wrank == 0 && verbose)
- printf( "Put with fence, %d elements\n", sz );
- cnt = 1;
- while (cnt <= maxCount) {
- RunPutFence( win, destRank, cnt, sz );
- cnt = 2 * cnt;
- }
- }
+ for (sz = 1; sz <= maxSz; sz = sz + sz) {
+ if (wrank == 0 && verbose)
+ printf("Put with fence, %d elements\n", sz);
+ for (cnt = 1; cnt <= maxCount; cnt *= 2) {
+ start = MPI_Wtime();
+ RunPutFence(win, destRank, cnt, sz);
+ end = MPI_Wtime();
+ if (end - start > MAX_ITER_TIME)
+ break;
+ }
+ }
}
if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_PUT)) {
- for (sz=1; sz<=maxSz; sz = sz + sz) {
- if (wrank == 0 && verbose)
- printf( "Put with lock, %d elements\n", sz );
- cnt = 1;
- while (cnt <= maxCount) {
- RunPutLock( win, destRank, cnt, sz );
- cnt = 2 * cnt;
- }
- }
+ for (sz = 1; sz <= maxSz; sz = sz + sz) {
+ if (wrank == 0 && verbose)
+ printf("Put with lock, %d elements\n", sz);
+ for (cnt = 1; cnt <= maxCount; cnt *= 2) {
+ start = MPI_Wtime();
+ RunPutLock(win, destRank, cnt, sz);
+ end = MPI_Wtime();
+ if (end - start > MAX_ITER_TIME)
+ break;
+ }
+ }
}
if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_PUT)) {
- for (sz=1; sz<=maxSz; sz = sz + sz) {
- if (wrank == 0 && verbose)
- printf( "Put with pscw, %d elements\n", sz );
- cnt = 1;
- while (cnt <= maxCount) {
- RunPutPSCW( win, destRank, cnt, sz,
- exposureGroup, accessGroup );
- cnt = 2 * cnt;
- }
- }
+ for (sz = 1; sz <= maxSz; sz = sz + sz) {
+ if (wrank == 0 && verbose)
+ printf("Put with pscw, %d elements\n", sz);
+ for (cnt = 1; cnt <= maxCount; cnt *= 2) {
+ start = MPI_Wtime();
+ RunPutPSCW(win, destRank, cnt, sz, exposureGroup, accessGroup);
+ end = MPI_Wtime();
+ if (end - start > MAX_ITER_TIME)
+ break;
+ }
+ }
}
if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_ACC)) {
- for (sz=1; sz<=maxSz; sz = sz + sz) {
- if (wrank == 0 && verbose)
- printf( "Accumulate with pscw, %d elements\n", sz );
- cnt = 1;
- while (cnt <= maxCount) {
- RunAccPSCW( win, destRank, cnt, sz,
- exposureGroup, accessGroup );
- cnt = 2 * cnt;
- }
- }
+ for (sz = 1; sz <= maxSz; sz = sz + sz) {
+ if (wrank == 0 && verbose)
+ printf("Accumulate with pscw, %d elements\n", sz);
+ for (cnt = 1; cnt <= maxCount; cnt *= 2) {
+ start = MPI_Wtime();
+ RunAccPSCW(win, destRank, cnt, sz, exposureGroup, accessGroup);
+ end = MPI_Wtime();
+ if (end - start > MAX_ITER_TIME)
+ break;
+ }
+ }
}
- MPI_Win_free( &win );
+ MPI_Win_free(&win);
- MPI_Group_free( &accessGroup );
- MPI_Group_free( &exposureGroup );
+ MPI_Group_free(&accessGroup);
+ MPI_Group_free(&exposureGroup);
/* If we get here without timing out or failing, we succeeded */
- if (wrank == 0) printf( " No Errors\n" );
-
+ if (wrank == 0)
+ printf(" No Errors\n");
+
MPI_Finalize();
return 0;
}
-void RunAccFence( MPI_Win win, int destRank, int cnt, int sz )
+void RunAccFence(MPI_Win win, int destRank, int cnt, int sz)
{
int k, i, j, one = 1;
- for (k=0; k<MAX_RUNS; k++) {
- MPI_Barrier( MPI_COMM_WORLD );
- MPI_Win_fence( 0, win );
- j = 0;
- for (i=0; i<cnt; i++) {
- MPI_Accumulate( &one, sz, MPI_INT, destRank,
- j, sz, MPI_INT, MPI_SUM, win );
- j += sz;
- }
- MPI_Win_fence( 0, win );
+ for (k = 0; k < MAX_RUNS; k++) {
+ MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Win_fence(0, win);
+ j = 0;
+ for (i = 0; i < cnt; i++) {
+ MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
+ j += sz;
+ }
+ MPI_Win_fence(0, win);
}
}
-void RunAccLock( MPI_Win win, int destRank, int cnt, int sz )
+void RunAccLock(MPI_Win win, int destRank, int cnt, int sz)
{
int k, i, j, one = 1;
- for (k=0; k<MAX_RUNS; k++) {
- MPI_Barrier( MPI_COMM_WORLD );
- MPI_Win_lock( MPI_LOCK_SHARED, destRank, 0, win );
- j = 0;
- for (i=0; i<cnt; i++) {
- MPI_Accumulate( &one, sz, MPI_INT, destRank,
- j, sz, MPI_INT, MPI_SUM, win );
- j += sz;
- }
- MPI_Win_unlock( destRank, win );
+ for (k = 0; k < MAX_RUNS; k++) {
+ MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Win_lock(MPI_LOCK_SHARED, destRank, 0, win);
+ j = 0;
+ for (i = 0; i < cnt; i++) {
+ MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
+ j += sz;
+ }
+ MPI_Win_unlock(destRank, win);
}
}
-void RunPutFence( MPI_Win win, int destRank, int cnt, int sz )
+void RunPutFence(MPI_Win win, int destRank, int cnt, int sz)
{
int k, i, j, one = 1;
- for (k=0; k<MAX_RUNS; k++) {
- MPI_Barrier( MPI_COMM_WORLD );
- MPI_Win_fence( 0, win );
- j = 0;
- for (i=0; i<cnt; i++) {
- MPI_Put( &one, sz, MPI_INT, destRank,
- j, sz, MPI_INT, win );
- j += sz;
- }
- MPI_Win_fence( 0, win );
+ for (k = 0; k < MAX_RUNS; k++) {
+ MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Win_fence(0, win);
+ j = 0;
+ for (i = 0; i < cnt; i++) {
+ MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
+ j += sz;
+ }
+ MPI_Win_fence(0, win);
}
}
-void RunPutLock( MPI_Win win, int destRank, int cnt, int sz )
+void RunPutLock(MPI_Win win, int destRank, int cnt, int sz)
{
int k, i, j, one = 1;
- for (k=0; k<MAX_RUNS; k++) {
- MPI_Barrier( MPI_COMM_WORLD );
- MPI_Win_lock( MPI_LOCK_SHARED, destRank, 0, win );
- j = 0;
- for (i=0; i<cnt; i++) {
- MPI_Put( &one, sz, MPI_INT, destRank, j, sz, MPI_INT, win );
- j += sz;
- }
- MPI_Win_unlock( destRank, win );
+ for (k = 0; k < MAX_RUNS; k++) {
+ MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Win_lock(MPI_LOCK_SHARED, destRank, 0, win);
+ j = 0;
+ for (i = 0; i < cnt; i++) {
+ MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
+ j += sz;
+ }
+ MPI_Win_unlock(destRank, win);
}
}
-void RunPutPSCW( MPI_Win win, int destRank, int cnt, int sz,
- MPI_Group exposureGroup, MPI_Group accessGroup )
+void RunPutPSCW(MPI_Win win, int destRank, int cnt, int sz,
+ MPI_Group exposureGroup, MPI_Group accessGroup)
{
int k, i, j, one = 1;
- for (k=0; k<MAX_RUNS; k++) {
- MPI_Barrier( MPI_COMM_WORLD );
- MPI_Win_post( exposureGroup, 0, win );
- MPI_Win_start( accessGroup, 0, win );
- j = 0;
- for (i=0; i<cnt; i++) {
- MPI_Put( &one, sz, MPI_INT, destRank, j, sz, MPI_INT, win );
- j += sz;
- }
- MPI_Win_complete( win );
- MPI_Win_wait( win );
+ for (k = 0; k < MAX_RUNS; k++) {
+ MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Win_post(exposureGroup, 0, win);
+ MPI_Win_start(accessGroup, 0, win);
+ j = 0;
+ for (i = 0; i < cnt; i++) {
+ MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
+ j += sz;
+ }
+ MPI_Win_complete(win);
+ MPI_Win_wait(win);
}
}
-void RunAccPSCW( MPI_Win win, int destRank, int cnt, int sz,
- MPI_Group exposureGroup, MPI_Group accessGroup )
+void RunAccPSCW(MPI_Win win, int destRank, int cnt, int sz,
+ MPI_Group exposureGroup, MPI_Group accessGroup)
{
int k, i, j, one = 1;
- for (k=0; k<MAX_RUNS; k++) {
- MPI_Barrier( MPI_COMM_WORLD );
- MPI_Win_post( exposureGroup, 0, win );
- MPI_Win_start( accessGroup, 0, win );
- j = 0;
- for (i=0; i<cnt; i++) {
- MPI_Accumulate( &one, sz, MPI_INT, destRank,
- j, sz, MPI_INT, MPI_SUM, win );
- j += sz;
- }
- MPI_Win_complete( win );
- MPI_Win_wait( win );
+ for (k = 0; k < MAX_RUNS; k++) {
+ MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Win_post(exposureGroup, 0, win);
+ MPI_Win_start(accessGroup, 0, win);
+ j = 0;
+ for (i = 0; i < cnt; i++) {
+ MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
+ j += sz;
+ }
+ MPI_Win_complete(win);
+ MPI_Win_wait(win);
}
}
--- /dev/null
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ * (C) 2013 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "mpi.h"
+#include <stdio.h>
+
+#define MAX_COUNT 4096
+
+int main(int argc, char *argv[])
+{
+ int i, winbuf, one = 1, rank;
+ MPI_Win win;
+
+ MPI_Init(&argc, &argv);
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+ MPI_Win_create(&winbuf, sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
+
+ MPI_Win_fence(0, win);
+ for (i = 0; i < MAX_COUNT; i++)
+ MPI_Accumulate(&one, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_SUM, win);
+ MPI_Win_fence(0, win);
+
+ MPI_Win_free(&win);
+
+ if (rank == 0)
+ printf(" No Errors\n");
+
+ MPI_Finalize();
+
+ return 0;
+}
MPI_Win_allocate_shared(2*sizeof(int), sizeof(int), MPI_INFO_NULL,
hdl->comm, &hdl->base, &hdl->window);
#else
- MPI_Win_allocate(2*sizeof(int), sizeof(int), MPI_INFO_NULL, hdl->comm,
+#ifdef USE_WIN_ALLOC_SHM
+ MPI_Info_create(&hdl->win_info);
+ MPI_Info_set(hdl->win_info, "alloc_shm", "true");
+#else
+ MPI_Info_create(&hdl->win_info);
+ MPI_Info_set(hdl->win_info, "alloc_shm", "false");
+#endif
+ MPI_Win_allocate(2*sizeof(int), sizeof(int), hdl->win_info, hdl->comm,
&hdl->base, &hdl->window);
#endif
MPI_Win_free(&hdl->window);
MPI_Comm_free(&hdl->comm);
+#ifndef USE_WIN_SHARED
+ MPI_Info_free(&hdl->win_info);
+#endif
free(hdl);
hdl_ptr = NULL;
MPI_Comm comm;
MPI_Win window;
int *base;
+ MPI_Info win_info;
};
typedef struct mcs_mutex_s * MCS_Mutex;
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
- * Copyright (C) 2013. See COPYRIGHT in top-level directory.
+ *
+ * (C) 2013 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
*/
/** MPI Mutex test -- James Dinan <dinan@mcs.anl.gov>
MPI_Request get_req;
double *baseptr;
double data[M][N]; /* M buffers of length N */
+ MPI_Info win_info;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
assert(M < NSTEPS);
- MPI_Win_allocate(NSTEPS*N*sizeof(double), sizeof(double), MPI_INFO_NULL,
+ MPI_Info_create(&win_info);
+
+#ifdef USE_WIN_ALLOC_SHM
+ MPI_Info_set(win_info, "alloc_shm", "true");
+#else
+ MPI_Info_set(win_info, "alloc_shm", "false");
+#endif
+
+ MPI_Win_allocate(NSTEPS*N*sizeof(double), sizeof(double), win_info,
MPI_COMM_WORLD, &baseptr, &win);
MPI_Win_lock_all(0, win);
MPI_Win_free(&win);
+ MPI_Info_free(&win_info);
+
MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank == 0 && all_errors == 0)
--- /dev/null
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ * (C) 2013 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <mpi.h>
+
+#define MAX_DATA_SIZE (1024*128*16)
+#define MAX_NUM_ITERATIONS (8192*4)
+#define MIN_NUM_ITERATIONS 8
+#define NUM_WARMUP_ITER 1
+
+const int verbose = 0;
+static int rank;
+
+void run_test(int lock_mode, int lock_assert)
+{
+ int nproc, test_iter, target_rank, data_size;
+ int *buf, *win_buf;
+ MPI_Win win;
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &nproc);
+
+ if (rank == 0 && verbose) {
+ printf("Starting one-sided contiguous performance test with %d processes\n", nproc);
+
+ printf("Synchronization mode: ");
+
+ switch (lock_mode) {
+ case MPI_LOCK_EXCLUSIVE:
+ printf("Exclusive lock");
+ break;
+ case MPI_LOCK_SHARED:
+ printf("Shared lock");
+ break;
+ default:
+ printf("Unknown lock");
+ break;
+ }
+
+ if (lock_assert & MPI_MODE_NOCHECK)
+ printf(", MPI_MODE_NOCHECK");
+
+ printf("\n");
+ }
+
+ MPI_Alloc_mem(MAX_DATA_SIZE, MPI_INFO_NULL, &buf);
+ MPI_Alloc_mem(MAX_DATA_SIZE, MPI_INFO_NULL, &win_buf);
+ memset(buf, rank, MAX_DATA_SIZE);
+ memset(win_buf, rank, MAX_DATA_SIZE);
+ MPI_Win_create(win_buf, MAX_DATA_SIZE, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win);
+
+ if (rank == 0 && verbose)
+ printf("%12s %12s %12s %12s %12s %12s %12s %12s\n", "Trg. Rank", "Xfer Size",
+ "Get (usec)", "Put (usec)", "Acc (usec)",
+ "Get (MiB/s)", "Put (MiB/s)", "Acc (MiB/s)");
+
+ for (target_rank = 0; rank == 0 && target_rank < nproc; target_rank++) {
+ for (data_size = sizeof(double); data_size <= MAX_DATA_SIZE; data_size *= 2) {
+ double t_get, t_put, t_acc;
+ int num_iter = MAX_NUM_ITERATIONS;
+
+ /* Scale the number of iterations by log_2 of the data size, so
+ * that we run each test for a reasonable amount of time. */
+ {
+ int t = data_size, my_log2 = 0;
+ while (t >>= 1)
+ my_log2++;
+ if (my_log2)
+ num_iter = (num_iter / my_log2 < MIN_NUM_ITERATIONS) ?
+ MIN_NUM_ITERATIONS : num_iter / my_log2;
+ }
+
+ for (test_iter = 0; test_iter < num_iter + NUM_WARMUP_ITER; test_iter++) {
+ if (test_iter == NUM_WARMUP_ITER)
+ t_get = MPI_Wtime();
+
+ MPI_Win_lock(lock_mode, target_rank, lock_assert, win);
+ MPI_Get(buf, data_size, MPI_BYTE, target_rank, 0, data_size, MPI_BYTE, win);
+ MPI_Win_unlock(target_rank, win);
+ }
+ t_get = (MPI_Wtime() - t_get) / num_iter;
+
+ for (test_iter = 0; test_iter < num_iter + NUM_WARMUP_ITER; test_iter++) {
+ if (test_iter == NUM_WARMUP_ITER)
+ t_put = MPI_Wtime();
+
+ MPI_Win_lock(lock_mode, target_rank, lock_assert, win);
+ MPI_Put(buf, data_size, MPI_BYTE, target_rank, 0, data_size, MPI_BYTE, win);
+ MPI_Win_unlock(target_rank, win);
+ }
+ t_put = (MPI_Wtime() - t_put) / num_iter;
+
+ for (test_iter = 0; test_iter < num_iter + NUM_WARMUP_ITER; test_iter++) {
+ if (test_iter == NUM_WARMUP_ITER)
+ t_acc = MPI_Wtime();
+
+ MPI_Win_lock(lock_mode, target_rank, lock_assert, win);
+ MPI_Accumulate(buf, data_size / sizeof(int), MPI_INT, target_rank,
+ 0, data_size / sizeof(int), MPI_INT, MPI_SUM, win);
+ MPI_Win_unlock(target_rank, win);
+ }
+ t_acc = (MPI_Wtime() - t_acc) / num_iter;
+
+ if (rank == 0 && verbose)
+ printf("%12d %12d %12.3f %12.3f %12.3f %12.3f %12.3f %12.3f\n", target_rank,
+ data_size, t_get * 1.0e6, t_put * 1.0e6, t_acc * 1.0e6,
+ data_size / (1024.0 * 1024.0) / t_get, data_size / (1024.0 * 1024.0) / t_put,
+ data_size / (1024.0 * 1024.0) / t_acc);
+ }
+ }
+
+ MPI_Barrier(MPI_COMM_WORLD);
+
+ MPI_Win_free(&win);
+ MPI_Free_mem(win_buf);
+ MPI_Free_mem(buf);
+}
+
+int main(int argc, char **argv)
+{
+ MPI_Init(&argc, &argv);
+
+ run_test(MPI_LOCK_EXCLUSIVE, 0);
+ run_test(MPI_LOCK_EXCLUSIVE, MPI_MODE_NOCHECK);
+ run_test(MPI_LOCK_SHARED, 0);
+ run_test(MPI_LOCK_SHARED, MPI_MODE_NOCHECK);
+
+ MPI_Finalize();
+
+ if (rank == 0)
+ printf(" No Errors\n");
+
+ return 0;
+}
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
- * (C) 2013 by Argonne National Laboratory.
+ * (C) 2010 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include "mpi.h"
MPI_Comm comm;
MPI_Win win;
MPI_Request req;
+ MPI_Datatype derived_dtp;
MTest_Init( &argc, &argv );
MPI_Abort( MPI_COMM_WORLD, 1 );
}
+ MPI_Type_contiguous(2, MPI_INT, &derived_dtp);
+ MPI_Type_commit(&derived_dtp);
+
/* The following loop is used to run through a series of communicators
* that are subsets of MPI_COMM_WORLD, of size 1 or greater. */
while (MTestGetIntracommGeneral( &comm, 1, 1 )) {
MPI_Comm_rank( comm, &rank );
MPI_Comm_size( comm, &size );
- MPI_Win_create( buf, bufsize, sizeof(int), MPI_INFO_NULL, comm, &win );
+ MPI_Win_create( buf, bufsize, 2*sizeof(int), MPI_INFO_NULL, comm, &win );
/* To improve reporting of problems about operations, we
change the error handler to errors return */
MPI_Win_set_errhandler( win, MPI_ERRORS_RETURN );
MPI_Accumulate( rmabuf, count, MPI_INT, TARGET,
0, count, MPI_INT, MPI_SUM, win );
);
+ TEST_FENCE_OP("Accumulate_derived",
+ MPI_Accumulate( rmabuf, count, derived_dtp, TARGET,
+ 0, count, derived_dtp, MPI_SUM, win );
+ );
TEST_FENCE_OP("Get accumulate",
MPI_Get_accumulate( rmabuf, count, MPI_INT, result,
count, MPI_INT, TARGET, 0,
MPI_Accumulate( rmabuf, count, MPI_INT, TARGET, 0,
count, MPI_INT, MPI_SUM, win );
);
+ TEST_PT_OP("Accumulate_derived",
+ MPI_Accumulate( rmabuf, count, derived_dtp, TARGET, 0,
+ count, derived_dtp, MPI_SUM, win );
+ );
TEST_PT_OP("Get accumulate",
MPI_Get_accumulate( rmabuf, count, MPI_INT, result, count,
MPI_INT, TARGET, 0, count,
MPI_Raccumulate( rmabuf, count, MPI_INT, TARGET, 0,
count, MPI_INT, MPI_SUM, win, &req );
);
+ TEST_REQ_OP("Raccumulate_derived", req,
+ MPI_Raccumulate( rmabuf, count, derived_dtp, TARGET, 0,
+ count, derived_dtp, MPI_SUM, win, &req );
+ );
TEST_REQ_OP("Rget_accumulate", req,
MPI_Rget_accumulate( rmabuf, count, MPI_INT, result,
count, MPI_INT, TARGET, 0,
MTestFreeComm(&comm);
}
+ MPI_Type_free(&derived_dtp);
+
free( result );
free( buf );
free( rmabuf );
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
- * (C) 2012 by Argonne National Laboratory.
+ * (C) 2011 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
- * (C) 2012 by Argonne National Laboratory.
+ * (C) 2011 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#put_bottom 2
#win_flavors 4 mpiversion=3.0
#manyrma2 2 timeLimit=500
+manyrma3 2
#win_shared 4 mpiversion=3.0
#win_shared_noncontig 4 mpiversion=3.0
#win_shared_noncontig_put 4 mpiversion=3.0
#linked_list_bench_lock_excl 4 mpiversion=3.0
#linked_list_bench_lock_shr 4 mpiversion=3.0
#linked_list_bench_lock_shr_nocheck 4 mpiversion=3.0
+#badrma 2 mpiversion=3.0
+#acc-loc 4
+#fence_shm 2 mpiversion=3.0
#mutex_bench 4 mpiversion=3.0
#mutex_bench_shared 4 mpiversion=3.0
+
+## This test is not strictly correct. This was meant to test out the
+## case when MPI_Test is not nonblocking. However, we ended up
+## assuming that MPI_Win_lock will be nonblocking. That is not
+## specified by the standard and might not be true. Commenting this
+## out till be find a better way to test the original problem with
+## MPI_Test.
+# nb_test 2 mpiversion=3.0 xfail=ticket1910
#include <mpi.h>
#include "mpitest.h"
-#define ITER 100
+#define ITER_PER_RANK 25
const int verbose = 0;
int i, j, rank, nproc;
int errors = 0, all_errors = 0;
int val = 0, one = 1;
+ int iter;
MPI_Aint *val_ptrs;
MPI_Win dyn_win;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
+ iter = ITER_PER_RANK * nproc;
+
val_ptrs = malloc(nproc * sizeof(MPI_Aint));
MPI_Get_address(&val, &val_ptrs[rank]);
MPI_COMM_WORLD);
MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &dyn_win);
- MPI_Win_attach(dyn_win, &one, sizeof(int));
+ MPI_Win_attach(dyn_win, &val, sizeof(int));
- for (i = 0; i < ITER; i++) {
+ for (i = 0; i < iter; i++) {
MPI_Win_fence(MPI_MODE_NOPRECEDE, dyn_win);
MPI_Accumulate(&one, 1, MPI_INT, i%nproc, val_ptrs[i%nproc], 1, MPI_INT, MPI_SUM, dyn_win);
MPI_Win_fence(MPI_MODE_NOSUCCEED, dyn_win);
MPI_Barrier(MPI_COMM_WORLD);
/* Read and verify my data */
- if ( val != ITER ) {
+ if ( val != iter ) {
errors++;
- printf("%d -- Got %d, expected %d\n", rank, val, ITER);
+ printf("%d -- Got %d, expected %d\n", rank, val, iter);
}
- MPI_Win_detach(dyn_win, &one);
+ MPI_Win_detach(dyn_win, &val);
MPI_Win_free(&dyn_win);
MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Info_get(info_out, "same_size", MPI_MAX_INFO_VAL, buf, &flag);
if (flag && VERBOSE) printf("%d: same_size = %s\n", rank, buf);
+ MPI_Info_get(info_out, "alloc_shm", MPI_MAX_INFO_VAL, buf, &flag);
+ if (flag && VERBOSE) printf("%d: alloc_shm = %s\n", rank, buf);
+
MPI_Info_free(&info_in);
MPI_Info_free(&info_out);
MPI_Win_free(&win);
--- /dev/null
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ * (C) 2001 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+/* test MPI_WIN_ALLOCATE and MPI_WIN_ALLOCATE_SHARED when allocating
+ SHM memory with size of 1GB per process */
+
+#include "mpi.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+int main(int argc, char **argv) {
+ int my_rank, shared_rank;
+ void *mybase = NULL;
+ MPI_Win win;
+ MPI_Info win_info;
+ MPI_Comm shared_comm;
+ int shm_win_size = 1024 * 1024 * 1024 * sizeof(char); /* 1GB */
+
+ MPI_Init(&argc, &argv);
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+
+ MPI_Info_create(&win_info);
+ MPI_Info_set(win_info, (char*)"alloc_shm", (char*)"true");
+
+ MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, my_rank, MPI_INFO_NULL, &shared_comm);
+
+ MPI_Comm_rank(shared_comm, &shared_rank);
+
+ /* every processes allocate 1GB window memory */
+ MPI_Win_allocate(shm_win_size, sizeof(char), win_info, MPI_COMM_WORLD, &mybase, &win);
+
+ MPI_Win_free(&win);
+
+ MPI_Win_allocate_shared(shm_win_size, sizeof(char), win_info, shared_comm, &mybase, &win);
+
+ MPI_Win_free(&win);
+
+ /* some processes allocate 1GB and some processes allocate zero bytes */
+ if (my_rank % 2 == 0)
+ MPI_Win_allocate(shm_win_size, sizeof(char), win_info, MPI_COMM_WORLD, &mybase, &win);
+ else
+ MPI_Win_allocate(0, sizeof(char), win_info, MPI_COMM_WORLD, &mybase, &win);
+
+ MPI_Win_free(&win);
+
+ if (shared_rank % 2 == 0)
+ MPI_Win_allocate_shared(shm_win_size, sizeof(char), win_info, shared_comm, &mybase, &win);
+ else
+ MPI_Win_allocate_shared(0, sizeof(char), win_info, shared_comm, &mybase, &win);
+
+ MPI_Win_free(&win);
+
+ /* some processes allocate 1GB and some processes allocate smaller bytes */
+ if (my_rank % 2 == 0)
+ MPI_Win_allocate(shm_win_size, sizeof(char), win_info, MPI_COMM_WORLD, &mybase, &win);
+ else
+ MPI_Win_allocate(shm_win_size/2, sizeof(char), win_info, MPI_COMM_WORLD, &mybase, &win);
+
+ MPI_Win_free(&win);
+
+ /* some processes allocate 1GB and some processes allocate smaller bytes */
+ if (shared_rank % 2 == 0)
+ MPI_Win_allocate_shared(shm_win_size, sizeof(char), win_info, shared_comm, &mybase, &win);
+ else
+ MPI_Win_allocate_shared(shm_win_size/2, sizeof(char), win_info, shared_comm, &mybase, &win);
+
+ MPI_Win_free(&win);
+
+ MPI_Comm_free(&shared_comm);
+
+ MPI_Info_free(&win_info);
+
+ if (my_rank == 0)
+ printf(" No Errors\n");
+
+ MPI_Finalize();
+
+ return 0;
+}
/* Locate absolute base */
MPI_Win_shared_query(shm_win, MPI_PROC_NULL, &size, &disp_unit, &base);
+ /* make sure the query returned the right values */
+ if (disp_unit != sizeof(int))
+ errors++;
+ if (size != ELEM_PER_PROC * sizeof(int))
+ errors++;
+ if ((shm_rank == 0) && (base != my_base))
+ errors++;
+ if (shm_rank && (base == my_base))
+ errors++;
+
if (verbose) printf("%d -- size = %d baseptr = %p my_baseptr = %p\n", shm_rank,
(int) size, (void*) base, (void*) my_base);
- assert(size == ELEM_PER_PROC * sizeof(int));
- if (shm_rank == 0)
- assert(base == my_base);
- else
- assert(base != my_base);
-
MPI_Win_lock_all(MPI_MODE_NOCHECK, shm_win);
/* Write to all my data */
MPI_Aint size;
MPI_Win_shared_query(shm_win, i, &size, &disp_unit, &base);
- assert(size == ELEM_PER_PROC * sizeof(int));
+ assert(size >= ELEM_PER_PROC * sizeof(int));
for (j = 0; j < ELEM_PER_PROC; j++) {
if ( base[j] != j ) {
MPI_Win_shared_query(shm_win, i, &size, &disp_unit, &base);
if (i % 2 == 0) {
- assert(size == ELEM_PER_PROC * sizeof(int));
+ assert(size >= ELEM_PER_PROC * sizeof(int));
for (j = 0; j < ELEM_PER_PROC; j++) {
if ( base[j] != j ) {
--- /dev/null
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ * (C) 2001 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+/* test MPI_WIN_ALLOCATE_SHARED when size of total shared memory region is 0. */
+
+#include "mpi.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char **argv)
+{
+ MPI_Win win;
+ void *win_buf = NULL;
+ int world_rank, shm_rank;
+ MPI_Comm shm_comm;
+
+ MPI_Init(&argc, &argv);
+ MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+
+ MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, world_rank, MPI_INFO_NULL, &shm_comm);
+
+ MPI_Comm_rank(shm_comm, &shm_rank);
+
+ /* Some ranks allocate zero bytes */
+ if (shm_rank % 2 == 0)
+ MPI_Win_allocate_shared(0, sizeof(char), MPI_INFO_NULL, shm_comm, &win_buf, &win);
+ else
+ MPI_Win_allocate_shared(1, sizeof(char), MPI_INFO_NULL, shm_comm, &win_buf, &win);
+ MPI_Win_free(&win);
+
+ if (world_rank % 2 == 0)
+ MPI_Win_allocate(0, sizeof(char), MPI_INFO_NULL, MPI_COMM_WORLD, &win_buf, &win);
+ else
+ MPI_Win_allocate(1, sizeof(char), MPI_INFO_NULL, MPI_COMM_WORLD, &win_buf, &win);
+ MPI_Win_free(&win);
+
+ win_buf = NULL;
+ if (world_rank % 2 == 0)
+ MPI_Win_create(NULL, 0, sizeof(char), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
+ else {
+ win_buf = (void *) malloc(sizeof(char));
+ MPI_Win_create(win_buf, 1, sizeof(char), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
+ }
+ MPI_Win_free(&win);
+ if (win_buf)
+ free(win_buf);
+
+ /* All ranks allocate zero bytes */
+ MPI_Win_allocate_shared(0, sizeof(char), MPI_INFO_NULL, shm_comm, &win_buf, &win);
+ MPI_Win_free(&win);
+ MPI_Win_allocate(0, sizeof(char), MPI_INFO_NULL, MPI_COMM_WORLD, &win_buf, &win);
+ MPI_Win_free(&win);
+ MPI_Win_create(NULL, 0, sizeof(char), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
+ MPI_Win_free(&win);
+
+ MPI_Comm_free(&shm_comm);
+
+ if (world_rank == 0)
+ printf(" No Errors\n");
+
+ MPI_Finalize();
+
+ return 0;
+
+}
# add_executable(dgraph_unwgt dgraph_unwgt.c)
#add_executable(dims1 dims1.c)
#add_executable(dims2 dims2.c)
+#add_executable(dims3 dims3.c)
+#add_executable(dims4 dims4.c)
# add_executable(distgraph1 distgraph1.c)
# add_executable(graphcr2 graphcr2.c)
# add_executable(graphcr graphcr.c)
# target_link_libraries(dgraph_unwgt simgrid mtest_c)
# target_link_libraries(dims1 simgrid mtest_c)
# target_link_libraries(dims2 simgrid mtest_c)
+# target_link_libraries(dims3 simgrid mtest_c)
+# target_link_libraries(dims4 simgrid mtest_c)
# target_link_libraries(distgraph1 simgrid mtest_c)
# target_link_libraries(graphcr2 simgrid mtest_c)
# target_link_libraries(graphcr simgrid mtest_c)
${CMAKE_CURRENT_SOURCE_DIR}/dgraph_unwgt.c
${CMAKE_CURRENT_SOURCE_DIR}/dims1.c
${CMAKE_CURRENT_SOURCE_DIR}/dims2.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/dims3.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/dims4.c
${CMAKE_CURRENT_SOURCE_DIR}/distgraph1.c
${CMAKE_CURRENT_SOURCE_DIR}/graphcr2.c
${CMAKE_CURRENT_SOURCE_DIR}/graphcr.c
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ * (C) 2011 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
--- /dev/null
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ * (C) 2003 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+#include "mpi.h"
+#include <stdio.h>
+#include "mpitest.h"
+
+static inline void print_err(int *dims, int ndims)
+{
+ int i;
+
+ printf("[ ");
+ for (i = 0; i < ndims; i++)
+ printf("%d ", dims[i]);
+ printf("] Suboptimal distribution!\n");
+}
+
+int main(int argc, char **argv)
+{
+ int errs = 0;
+ int dims[4], ndims, nnodes;
+
+ MTest_Init(&argc, &argv);
+
+ for (ndims = 3; ndims <= 4; ndims++) {
+ for (nnodes = 2; nnodes <= 4096; nnodes *= 2) {
+ int i;
+ for (i = 0; i < ndims; i++)
+ dims[i] = 0;
+
+ MPI_Dims_create(nnodes, ndims, dims);
+
+ /* Checking */
+ for (i = 0; i < ndims - 1; i++)
+ if (dims[i] / 2 > dims[i + 1]) {
+ print_err(dims, ndims);
+ ++errs;
+ break;
+ }
+ }
+ }
+
+ MTest_Finalize(errs);
+ MPI_Finalize();
+
+ return 0;
+}
--- /dev/null
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ * (C) 2011 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+#include "mpi.h"
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+ int nproc = (1000 * 1000 * 1000);
+ int ret[3] = {0, 0, 0};
+ int errs = 0, i, rank;
+
+ MPI_Init(&argc, &argv);
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+ MPI_Dims_create(nproc, 3, ret);
+
+ for (i = 0; i < 3; i++)
+ if (ret[i] != 1000)
+ errs++;
+
+ if (!errs && rank == 0)
+ printf(" No Errors\n");
+
+ MPI_Finalize();
+ return 0;
+}
#need MPI_Dims_create
#dims1 4
#dims2 1
+#dims3 1
+#dims4 1
#need MPI_Error_class, MPI_Comm_remote_size, MPI_Graph_map
#graphmap1 4
#need MPI_Topo_test, MPI_Cart_create