IF(HAVE_TRACING)
ADD_TESH(tesh-smpi-replay-ti-tracing --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/pingpong --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/pingpong TI_output.tesh)
ENDIF()
- FOREACH (GATHER_COLL default ompi mpich ompi_basic_linear ompi_linear_sync ompi_binomial mvapich2)
+ FOREACH (GATHER_COLL default ompi mpich ompi_basic_linear ompi_linear_sync ompi_binomial mvapich2 impi)
ADD_TESH(tesh-smpi-gather-coll-${GATHER_COLL} --cfg smpi/gather:${GATHER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/gather --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/gather gather_coll.tesh)
ENDFOREACH()
FOREACH (ALLGATHER_COLL default 2dmesh 3dmesh bruck GB loosely_lr
NTSLR NTSLR_NB pair rdb rhv ring SMP_NTS
- smp_simple spreading_simple ompi mpich ompi_neighborexchange mvapich2)
+ smp_simple spreading_simple ompi mpich ompi_neighborexchange mvapich2 impi)
ADD_TESH(tesh-smpi-allgather-coll-${ALLGATHER_COLL} --cfg smpi/allgather:${ALLGATHER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allgather --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgather allgather_coll.tesh)
ENDFOREACH()
- FOREACH (ALLGATHERV_COLL default GB pair ring ompi mpich ompi_neighborexchange ompi_bruck mpich_rdb mpich_ring mvapich2)
+ FOREACH (ALLGATHERV_COLL default GB pair ring ompi mpich ompi_neighborexchange ompi_bruck mpich_rdb mpich_ring mvapich2 impi)
ADD_TESH(tesh-smpi-allgatherv-coll-${ALLGATHERV_COLL} --cfg smpi/allgatherv:${ALLGATHERV_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allgatherv --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgatherv allgatherv_coll.tesh)
ENDFOREACH()
FOREACH (ALLREDUCE_COLL default lr rab1 rab2 rab_rdb
rdb smp_binomial smp_binomial_pipeline
- smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast ompi mpich ompi_ring_segmented mvapich2 mvapich2_rs)
+ smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast ompi mpich ompi_ring_segmented mvapich2 mvapich2_rs impi)
ADD_TESH(tesh-smpi-allreduce-coll-${ALLREDUCE_COLL} --cfg smpi/allreduce:${ALLREDUCE_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allreduce --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allreduce allreduce_coll.tesh)
ENDFOREACH()
FOREACH (ALLREDUCE_COLL_LARGE ompi_ring_segmented)
ADD_TESH(tesh-smpi-allreduce-coll-large-${ALLREDUCE_COLL_LARGE} --cfg smpi/allreduce:${ALLREDUCE_COLL_LARGE} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allreduce --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allreduce allreduce_coll_large.tesh)
ENDFOREACH()
- FOREACH (ALLTOALL_COLL 2dmesh 3dmesh pair pair_one_barrier pair_light_barrier
+ FOREACH (ALLTOALL_COLL 2dmesh 3dmesh pair pair_rma pair_one_barrier pair_light_barrier
pair_mpi_barrier rdb ring ring_light_barrier
ring_mpi_barrier ring_one_barrier
- bruck basic_linear ompi mpich mvapich2 mvapich2_scatter_dest)
+ bruck basic_linear ompi mpich mvapich2 mvapich2_scatter_dest, impi)
ADD_TESH(tesh-smpi-alltoall-coll-${ALLTOALL_COLL} --cfg smpi/alltoall:${ALLTOALL_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/alltoall --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/alltoall alltoall_coll.tesh)
ENDFOREACH()
FOREACH (ALLTOALLV_COLL default pair pair_light_barrier pair_mpi_barrier
pair_one_barrier ring ring_light_barrier
- ring_mpi_barrier ring_one_barrier bruck ompi mpich mvapich2 ompi_basic_linear)
+ ring_mpi_barrier ring_one_barrier bruck ompi mpich mvapich2 ompi_basic_linear impi)
ADD_TESH(tesh-smpi-alltoallv-coll-${ALLTOALLV_COLL} --cfg smpi/alltoallv:${ALLTOALLV_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/alltoallv --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/alltoallv alltoallv_coll.tesh)
ENDFOREACH()
FOREACH (BCAST_COLL default arrival_pattern_aware arrival_pattern_aware_wait arrival_scatter
binomial_tree flattree flattree_pipeline NTSB NTSL NTSL_Isend scatter_LR_allgather
- scatter_rdb_allgather SMP_binary SMP_binomial SMP_linear ompi mpich ompi_split_bintree ompi_pipeline mvapich2)
+ scatter_rdb_allgather SMP_binary SMP_binomial SMP_linear ompi mpich ompi_split_bintree ompi_pipeline mvapich2 impi)
ADD_TESH(tesh-smpi-bcast-coll-${BCAST_COLL} --cfg smpi/bcast:${BCAST_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/bcast --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/bcast bcast_coll.tesh)
ENDFOREACH()
- FOREACH (REDUCE_COLL default arrival_pattern_aware binomial flat_tree NTSL scatter_gather ompi mpich ompi_chain ompi_binary ompi_basic_linear ompi_binomial ompi_in_order_binary mvapich2 mvapich2_knomial rab)
- FOREACH (REDUCE_COLL default arrival_pattern_aware binomial flat_tree NTSL scatter_gather ompi mpich ompi_chain ompi_binary ompi_basic_linear ompi_binomial ompi_in_order_binary mvapich2 mvapich2_knomial)
+ FOREACH (REDUCE_COLL default arrival_pattern_aware binomial flat_tree NTSL scatter_gather ompi mpich ompi_chain ompi_binary ompi_basic_linear ompi_binomial ompi_in_order_binary mvapich2 mvapich2_knomial impi rab)
ADD_TESH(tesh-smpi-reduce-coll-${REDUCE_COLL} --cfg smpi/reduce:${REDUCE_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/reduce --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/reduce reduce_coll.tesh)
ENDFOREACH()
- FOREACH (REDUCE_SCATTER_COLL default ompi mpich ompi_basic_recursivehalving ompi_ring mpich_noncomm mpich_pair mvapich2 mpich_rdb)
+ FOREACH (REDUCE_SCATTER_COLL default ompi mpich ompi_basic_recursivehalving ompi_ring mpich_noncomm mpich_pair mvapich2 mpich_rdb impi)
ADD_TESH(tesh-smpi-reduce-scatter-coll-${REDUCE_SCATTER_COLL} --cfg smpi/reduce_scatter:${REDUCE_SCATTER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/reduce --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/reduce reduce_scatter_coll.tesh)
ENDFOREACH()
- FOREACH (SCATTER_COLL default ompi mpich ompi_basic_linear ompi_binomial mvapich2)
+ FOREACH (SCATTER_COLL default ompi mpich ompi_basic_linear ompi_binomial mvapich2 impi)
ADD_TESH(tesh-smpi-scatter-coll-${SCATTER_COLL} --cfg smpi/scatter:${SCATTER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/scatter --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/scatter scatter_coll.tesh)
ENDFOREACH()
- FOREACH (BARRIER_COLL default ompi mpich ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering mvapich2_pair mvapich2)
+ FOREACH (BARRIER_COLL default ompi mpich ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering mvapich2_pair mvapich2 impi)
ADD_TESH(tesh-smpi-barrier-coll-${BARRIER_COLL} --cfg smpi/barrier:${BARRIER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/barrier --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/barrier barrier_coll.tesh)
ENDFOREACH()
# END TESH TESTS
IF(enable_smpi_MPICH3_testsuite)
ADD_TEST(test-smpi-mpich3-coll-thread ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:thread -execarg=--cfg=smpi/privatize_global_variables:yes)
- ADD_TEST(test-smpi-mpich3-coll-ompi-thread ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:thread -execarg=--cfg=smpi/coll_selector:ompi -execarg=--cfg=smpi/send_is_detached_thres:0 -execarg=--cfg=smpi/privatize_global_variables:yes -execarg=--cfg=smpi/bcast:binomial_tree)
- ADD_TEST(test-smpi-mpich3-coll-mpich-thread ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:thread -execarg=--cfg=smpi/coll_selector:mpich -execarg=--cfg=smpi/privatize_global_variables:yes)
- ADD_TEST(test-smpi-mpich3-coll-mvapich2-thread ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:thread -execarg=--cfg=smpi/coll_selector:mvapich2 -execarg=--cfg=smpi/privatize_global_variables:yes)
- SET_TESTS_PROPERTIES(test-smpi-mpich3-coll-thread test-smpi-mpich3-coll-ompi-thread test-smpi-mpich3-coll-mpich-thread test-smpi-mpich3-coll-mvapich2-thread PROPERTIES PASS_REGULAR_EXPRESSION "tests passed!")
+ SET_TESTS_PROPERTIES(test-smpi-mpich3-coll-thread PROPERTIES PASS_REGULAR_EXPRESSION "tests passed!")
IF(CONTEXT_UCONTEXT)
ADD_TEST(test-smpi-mpich3-coll-ompi-ucontext ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:ucontext -execarg=--cfg=smpi/coll_selector:ompi -execarg=--cfg=smpi/send_is_detached_thres:0 -execarg=--cfg=smpi/privatize_global_variables:yes -execarg=--cfg=smpi/bcast:binomial_tree)
SET_TESTS_PROPERTIES(test-smpi-mpich3-coll-ompi-ucontext PROPERTIES PASS_REGULAR_EXPRESSION "tests passed!")
ENDIF()
IF(HAVE_RAWCTX)
ADD_TEST(test-smpi-mpich3-coll-mpich-raw ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:raw -execarg=--cfg=smpi/coll_selector:mpich -execarg=--cfg=smpi/privatize_global_variables:yes)
- SET_TESTS_PROPERTIES(test-smpi-mpich3-coll-mpich-raw PROPERTIES PASS_REGULAR_EXPRESSION "tests passed!")
+ ADD_TEST(test-smpi-mpich3-coll-ompi-raw ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:raw -execarg=--cfg=smpi/coll_selector:ompi -execarg=--cfg=smpi/send_is_detached_thres:0 -execarg=--cfg=smpi/privatize_global_variables:yes -execarg=--cfg=smpi/bcast:binomial_tree)
+ ADD_TEST(test-smpi-mpich3-coll-mpich-raw ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:raw -execarg=--cfg=smpi/coll_selector:mpich -execarg=--cfg=smpi/privatize_global_variables:yes)
+ ADD_TEST(test-smpi-mpich3-coll-mvapich2-raw ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:raw -execarg=--cfg=smpi/coll_selector:mvapich2 -execarg=--cfg=smpi/privatize_global_variables:yes)
+ ADD_TEST(test-smpi-mpich3-coll-impi-raw ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:raw -execarg=--cfg=smpi/coll_selector:impi -execarg=--cfg=smpi/privatize_global_variables:yes)
+ SET_TESTS_PROPERTIES(test-smpi-mpich3-coll-mpich-raw test-smpi-mpich3-coll-ompi-raw test-smpi-mpich3-coll-mpich-raw test-smpi-mpich3-coll-mvapich2-raw test-smpi-mpich3-coll-impi-raw PROPERTIES PASS_REGULAR_EXPRESSION "tests passed!")
ENDIF()
IF(HAVE_RAWCTX)
ADD_TEST(test-smpi-mpich3-attr-raw ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/attr perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/attr -tests=testlist -execarg=--cfg=contexts/factory:raw)
COLL_APPLY(action, COLL_GATHER_SIG, ompi_linear_sync) COLL_sep \
COLL_APPLY(action, COLL_GATHER_SIG, mpich) COLL_sep \
COLL_APPLY(action, COLL_GATHER_SIG, mvapich2) COLL_sep \
+COLL_APPLY(action, COLL_GATHER_SIG, impi) COLL_sep \
COLL_APPLY(action, COLL_GATHER_SIG, automatic)
COLL_APPLY(action, COLL_ALLGATHER_SIG, ompi_neighborexchange) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHER_SIG, mvapich2) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHER_SIG, mpich) COLL_sep \
+COLL_APPLY(action, COLL_ALLGATHER_SIG, impi) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHER_SIG, automatic)
COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich_rdb) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich_ring) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHERV_SIG, mvapich2) COLL_sep \
+COLL_APPLY(action, COLL_ALLGATHERV_SIG, impi) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHERV_SIG, automatic)
COLL_ALLGATHERVS(COLL_PROTO, COLL_NOsep)
COLL_APPLY(action, COLL_ALLREDUCE_SIG, mpich) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, mvapich2) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, mvapich2_rs) COLL_sep \
+COLL_APPLY(action, COLL_ALLREDUCE_SIG, impi) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, automatic)
COLL_APPLY(action, COLL_ALLTOALL_SIG, mvapich2_scatter_dest) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALL_SIG, ompi) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALL_SIG, mpich) COLL_sep \
+COLL_APPLY(action, COLL_ALLTOALL_SIG, impi) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALL_SIG, automatic)
COLL_ALLTOALLS(COLL_PROTO, COLL_NOsep)
COLL_APPLY(action, COLL_ALLTOALLV_SIG, mpich) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALLV_SIG, ompi_basic_linear) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALLV_SIG, mvapich2) COLL_sep \
+COLL_APPLY(action, COLL_ALLTOALLV_SIG, impi) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALLV_SIG, automatic)
COLL_ALLTOALLVS(COLL_PROTO, COLL_NOsep)
COLL_APPLY(action, COLL_BCAST_SIG, ompi_pipeline) COLL_sep \
COLL_APPLY(action, COLL_BCAST_SIG, mpich) COLL_sep \
COLL_APPLY(action, COLL_BCAST_SIG, mvapich2) COLL_sep \
+COLL_APPLY(action, COLL_BCAST_SIG, impi) COLL_sep \
COLL_APPLY(action, COLL_BCAST_SIG, automatic)
COLL_BCASTS(COLL_PROTO, COLL_NOsep)
COLL_APPLY(action, COLL_REDUCE_SIG, mpich) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SIG, mvapich2) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SIG, mvapich2_knomial) COLL_sep \
+COLL_APPLY(action, COLL_REDUCE_SIG, impi) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SIG, rab) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SIG, automatic)
COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, mpich_rdb) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, mpich_noncomm) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, mvapich2) COLL_sep \
+COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, impi) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, automatic)
COLL_APPLY(action, COLL_SCATTER_SIG, ompi_binomial) COLL_sep \
COLL_APPLY(action, COLL_SCATTER_SIG, mpich) COLL_sep \
COLL_APPLY(action, COLL_SCATTER_SIG, mvapich2) COLL_sep \
+COLL_APPLY(action, COLL_SCATTER_SIG, impi) COLL_sep \
COLL_APPLY(action, COLL_SCATTER_SIG, automatic)
COLL_SCATTERS(COLL_PROTO, COLL_NOsep)
COLL_APPLY(action, COLL_BARRIER_SIG, mpich) COLL_sep \
COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2_pair) COLL_sep \
COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2) COLL_sep \
+COLL_APPLY(action, COLL_BARRIER_SIG, impi) COLL_sep \
COLL_APPLY(action, COLL_BARRIER_SIG, automatic)
COLL_BARRIERS(COLL_PROTO, COLL_NOsep)
--- /dev/null
+/* selector for collective algorithms based on openmpi's default coll_tuned_decision_fixed selector */
+
+/* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team.
+ * All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#include "colls_private.h"
+
+
+// This selector is based on information gathered on the Stampede cluster, with Intel MPI 4.1.3.049, and from the intel reference manual. The data was gathered launching one process/node. With other settings, selection will be different (more SMP aware algorithms, for instance)
+
+
+#define INTEL_MAX_NB_THRESHOLDS 32
+
+typedef struct {
+ int max_size;
+ int algo;
+} intel_tuning_table_element_element;
+
+typedef struct {
+ int max_num_proc;
+ int num_elems;
+ intel_tuning_table_element_element elems[INTEL_MAX_NB_THRESHOLDS];
+} intel_tuning_table_element;
+
+/*
+I_MPI_ADJUST_ALLREDUCE
+
+MPI_Allreduce
+
+1 - Recursive doubling algorithm
+2 - Rabenseifner's algorithm
+3 - Reduce + Bcast algorithm
+4 - Topology aware Reduce + Bcast algorithm
+5 - Binomial gather + scatter algorithm
+6 - Topology aware binominal gather + scatter algorithm
+7 - Shumilin's ring algorithm
+8 - Ring algorithm
+
+
+//as Shumilin's ring algorithm is unknown, default to ring'
+*/
+
+
+int (*intel_allreduce_functions_table[])(void *sendbuf,
+ void *recvbuf,
+ int count,
+ MPI_Datatype datatype,
+ MPI_Op op, MPI_Comm comm) ={
+ smpi_coll_tuned_allreduce_rdb,
+ smpi_coll_tuned_allreduce_rab1,
+ smpi_coll_tuned_allreduce_redbcast,
+ smpi_coll_tuned_allreduce_redbcast,
+ smpi_coll_tuned_allreduce_smp_binomial,
+ smpi_coll_tuned_allreduce_smp_binomial,
+ smpi_coll_tuned_allreduce_ompi_ring_segmented,
+ smpi_coll_tuned_allreduce_ompi_ring_segmented
+};
+
+intel_tuning_table_element intel_allreduce_table[] =
+{
+ { 2,9,{
+ {6,7},
+ {85,1},
+ {192,7},
+ {853,1},
+ {1279,7},
+ {16684,1},
+ {34279,8},
+ {1681224,3},
+ {2147483647,7}
+ }
+ },
+ { 4, 8,{
+ {16,7},
+ {47,1},
+ {2062,7},
+ {16699,1},
+ {33627,7},
+ {70732,8},
+ {1300705,3},
+ {2147483647,8}
+ }
+ },
+ {8,8,{
+ {118,1},
+ {146,4},
+ {16760,1},
+ {36364,6},
+ {136239,8},
+ {315710,7},
+ {3220366,3},
+ {2147483647,8}
+ }
+ },
+ {16,7,{
+ {934,1},
+ {1160,6},
+ {15505,1},
+ {52730,2},
+ {300705,8},
+ {563680,7},
+ {2147483647,3}
+ }
+ },
+ {2147483647,11,{
+ {5,6},
+ {11,4},
+ {182,1},
+ {700,6},
+ {1450,4},
+ {11146,1},
+ {25539,6},
+ {37634,4},
+ {93784,6},
+ {817658,2},
+ {2147483647,3}
+ }
+ }
+};
+
+
+
+/*I_MPI_ADJUST_ALLTOALL
+
+MPI_Alltoall
+
+1. Bruck's algorithm
+2. Isend/Irecv + waitall algorithm
+3. Pair wise exchange algorithm
+4. Plum's algorithm
+
+*/
+
+
+intel_tuning_table_element intel_alltoall_table[] =
+{
+ { 2,1,
+ {
+ {2147483647,3}
+ }
+ },
+ { 4,2,
+ {
+ {0,4},
+ {2147483647,2}
+ }
+ },
+ {8,1,
+ {
+ {2147483647,2}
+ }
+ },
+ {16,5,
+ {
+ {0,3},
+ {84645,2},
+ {167570,3},
+ {413152,4},
+ {2147483647,2}
+ }
+ },
+ {32,6,
+ {
+ {61,1},
+ {164,2},
+ {696,1},
+ {143254,2},
+ {387024,3},
+ {2147483647,2}
+ },
+ },
+ {64,4,
+ {
+ {523,1},
+ {146088,2},
+ {488989,4},
+ {2147483647,2}
+ }
+ },
+ {2147483647,3,
+ {
+ {270,1},
+ {628,4},
+ {2147483647,2}
+ }
+ }
+};
+int (*intel_alltoall_functions_table[])(void *sbuf, int scount,
+ MPI_Datatype sdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
+ MPI_Comm comm) ={
+ smpi_coll_tuned_alltoall_bruck,
+ smpi_coll_tuned_alltoall_mvapich2_scatter_dest,
+ smpi_coll_tuned_alltoall_pair,
+ smpi_coll_tuned_alltoall_pair//Plum is proprietary ? (and super efficient)
+};
+
+/*I_MPI_ADJUST_BARRIER
+
+MPI_Barrier
+
+1. Dissemination algorithm
+2. Recursive doubling algorithm
+3. Topology aware dissemination algorithm
+4. Topology aware recursive doubling algorithm
+5. Binominal gather + scatter algorithm
+6. Topology aware binominal gather + scatter algorithm
+
+*/
+static int intel_barrier_gather_scatter(MPI_Comm comm){
+ //our default barrier performs a antibcast/bcast
+ smpi_mpi_barrier(comm);
+ return MPI_SUCCESS;
+}
+
+int (*intel_barrier_functions_table[])(MPI_Comm comm) ={
+ smpi_coll_tuned_barrier_ompi_basic_linear,
+ smpi_coll_tuned_barrier_ompi_recursivedoubling,
+ smpi_coll_tuned_barrier_ompi_basic_linear,
+ smpi_coll_tuned_barrier_ompi_recursivedoubling,
+ intel_barrier_gather_scatter,
+ intel_barrier_gather_scatter
+};
+
+intel_tuning_table_element intel_barrier_table[] =
+{
+ {2,1,
+ {
+ {2147483647,2}
+ }
+ },
+ {4,1,
+ {
+ {2147483647,6}
+ }
+ },
+ {8,1,
+ {
+ {2147483647,1}
+ }
+ },
+ {64,1,
+ {
+ {2147483647,2}
+ }
+ },
+ {2147483647,1,
+ {
+ {2147483647,6}
+ }
+ }
+};
+
+
+/*I_MPI_ADJUST_BCAST
+
+MPI_Bcast
+
+1. Binomial algorithm
+2. Recursive doubling algorithm
+3. Ring algorithm
+4. Topology aware binomial algorithm
+5. Topology aware recursive doubling algorithm
+6. Topology aware ring algorithm
+7. Shumilin's bcast algorithm
+*/
+
+int (*intel_bcast_functions_table[])(void *buff, int count,
+ MPI_Datatype datatype, int root,
+ MPI_Comm comm) ={
+ smpi_coll_tuned_bcast_binomial_tree,
+ //smpi_coll_tuned_bcast_scatter_rdb_allgather,
+ smpi_coll_tuned_bcast_NTSL,
+ smpi_coll_tuned_bcast_NTSL,
+ smpi_coll_tuned_bcast_SMP_binomial,
+ //smpi_coll_tuned_bcast_scatter_rdb_allgather,
+ smpi_coll_tuned_bcast_NTSL,
+ smpi_coll_tuned_bcast_SMP_linear,
+ smpi_coll_tuned_bcast_mvapich2,//we don't know shumilin's algo'
+};
+
+intel_tuning_table_element intel_bcast_table[] =
+{
+ {2,9,
+ {
+ {1,2},
+ {402,7},
+ {682,5},
+ {1433,4},
+ {5734,7},
+ {21845,1},
+ {95963,6},
+ {409897,5},
+ {2147483647,1}
+ }
+ },
+ {4,1,
+ {
+ {2147483647,7}
+ }
+ },
+ {8,11,
+ {
+ {3,6},
+ {4,7},
+ {25,6},
+ {256,1},
+ {682,6},
+ {1264,1},
+ {2234,6},
+ {6655,5},
+ {16336,1},
+ {3998434,7},
+ {2147483647,6}
+ }
+ },
+ {2147483647,1,
+ {
+ {2147483647,7}
+ }
+ }
+};
+
+
+/*I_MPI_ADJUST_REDUCE
+
+MPI_Reduce
+
+1. Shumilin's algorithm
+2. Binomial algorithm
+3. Topology aware Shumilin's algorithm
+4. Topology aware binomial algorithm
+5. Rabenseifner's algorithm
+6. Topology aware Rabenseifner's algorithm
+
+*/
+
+int (*intel_reduce_functions_table[])(void *sendbuf, void *recvbuf,
+ int count, MPI_Datatype datatype,
+ MPI_Op op, int root,
+ MPI_Comm comm) ={
+ smpi_coll_tuned_reduce_mvapich2,
+ smpi_coll_tuned_reduce_binomial,
+ smpi_coll_tuned_reduce_mvapich2,
+ smpi_coll_tuned_reduce_binomial,
+ smpi_coll_tuned_reduce_rab,
+ smpi_coll_tuned_reduce_rab
+};
+
+intel_tuning_table_element intel_reduce_table[] =
+{
+ {2147483647,1,
+ {
+ {2147483647,1}
+ }
+ }
+};
+
+/* I_MPI_ADJUST_REDUCE_SCATTER
+
+MPI_Reduce_scatter
+
+1. Recursive having algorithm
+2. Pair wise exchange algorithm
+3. Recursive doubling algorithm
+4. Reduce + Scatterv algorithm
+5. Topology aware Reduce + Scatterv algorithm
+
+*/
+static int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf,
+ int *rcounts,
+ MPI_Datatype dtype,
+ MPI_Op op,
+ MPI_Comm comm)
+{
+ smpi_mpi_reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
+ return MPI_SUCCESS;
+}
+
+static int intel_reduce_scatter_recursivehalving(void *sbuf, void *rbuf,
+ int *rcounts,
+ MPI_Datatype dtype,
+ MPI_Op op,
+ MPI_Comm comm)
+{
+ if(smpi_op_is_commute(op))
+ return smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(sbuf, rbuf, rcounts,dtype, op,comm);
+ else
+ return smpi_coll_tuned_reduce_scatter_mvapich2(sbuf, rbuf, rcounts,dtype, op,comm);
+}
+
+int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf,
+ int *rcounts,
+ MPI_Datatype dtype,
+ MPI_Op op,
+ MPI_Comm comm
+ ) ={
+ intel_reduce_scatter_recursivehalving,
+ smpi_coll_tuned_reduce_scatter_mpich_pair,
+ smpi_coll_tuned_reduce_scatter_mpich_rdb,
+ intel_reduce_scatter_reduce_scatterv,
+ intel_reduce_scatter_reduce_scatterv
+};
+
+intel_tuning_table_element intel_reduce_scatter_table[] =
+{
+ {2,5,
+ {
+ {5,4},
+ {522429,2},
+ {1375877,5},
+ {2932736,2},
+ {2147483647,5}
+ }
+ },
+ {4,9,
+ {
+ {4,4},
+ {15,1},
+ {120,3},
+ {651,1},
+ {12188,3},
+ {33890,1},
+ {572117,2},
+ {1410202,5},
+ {2147483647,2}
+ }
+ },
+ {8,7,
+ {
+ {4,4},
+ {2263,1},
+ {25007,3},
+ {34861,1},
+ {169625,2},
+ {2734000,4},
+ {2147483647,2}
+ }
+ },
+ {16,5,
+ {
+ {4,4},
+ {14228,1},
+ {46084,3},
+ {522139,2},
+ {2147483647,5}
+ }
+ },
+ {32,5,
+ {
+ {4,4},
+ {27516,1},
+ {61693,3},
+ {2483469,2},
+ {2147483647,5}
+ }
+ },
+ {64,4,
+ {
+ {0,3},
+ {4,4},
+ {100396,1},
+ {2147483647,2}
+ }
+ },
+ {2147483647,6,
+ {
+ {0,3},
+ {4,4},
+ {186926,1},
+ {278259,3},
+ {1500100,2},
+ {2147483647,5}
+ }
+ }
+};
+
+/* I_MPI_ADJUST_ALLGATHER
+
+MPI_Allgather
+
+1. Recursive doubling algorithm
+2. Bruck's algorithm
+3. Ring algorithm
+4. Topology aware Gatherv + Bcast algorithm
+
+*/
+
+int (*intel_allgather_functions_table[])(void *sbuf, int scount,
+ MPI_Datatype sdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
+ MPI_Comm comm
+ ) ={
+ smpi_coll_tuned_allgather_rdb,
+ smpi_coll_tuned_allgather_bruck,
+ smpi_coll_tuned_allgather_ring,
+ smpi_coll_tuned_allgather_GB
+};
+
+intel_tuning_table_element intel_allgather_table[] =
+{
+ {4,11,
+ {
+ {1,4},
+ {384,1},
+ {1533,4},
+ {3296,1},
+ {10763,4},
+ {31816,3},
+ {193343,4},
+ {405857,3},
+ {597626,4},
+ {1844323,3},
+ {2147483647,4}
+ }
+ },
+ {8,10,
+ {
+ {12,4},
+ {46,1},
+ {205,4},
+ {3422,2},
+ {4200,4},
+ {8748,1},
+ {24080,3},
+ {33244,4},
+ {371159,1},
+ {2147483647,3}
+ }
+ },
+ {16, 8,
+ {
+ {3,4},
+ {53,1},
+ {100,4},
+ {170,1},
+ {6077,4},
+ {127644,1},
+ {143741,4},
+ {2147483647,3}
+ }
+ },
+ {2147483647,10,
+ {
+ {184,1},
+ {320,4},
+ {759,1},
+ {1219,4},
+ {2633,1},
+ {8259,4},
+ {123678,1},
+ {160801,4},
+ {284341,1},
+ {2147483647,4}
+ }
+ }
+};
+
+/* I_MPI_ADJUST_ALLGATHERV
+
+MPI_Allgatherv
+
+1. Recursive doubling algorithm
+2. Bruck's algorithm
+3. Ring algorithm
+4. Topology aware Gatherv + Bcast algorithm
+
+*/
+
+int (*intel_allgatherv_functions_table[])(void *sbuf, int scount,
+ MPI_Datatype sdtype,
+ void* rbuf, int *rcounts,
+ int *rdispls,
+ MPI_Datatype rdtype,
+ MPI_Comm comm
+ ) ={
+ smpi_coll_tuned_allgatherv_mpich_rdb,
+ smpi_coll_tuned_allgatherv_ompi_bruck,
+ smpi_coll_tuned_allgatherv_ring,
+ smpi_coll_tuned_allgatherv_GB
+};
+
+intel_tuning_table_element intel_allgatherv_table[] =
+{
+ {2,3,
+ {
+ {259668,3},
+ {635750,4},
+ {2147483647,3}
+ }
+ },
+ {4,7,
+ {
+ {1,1},
+ {5,4},
+ {46,1},
+ {2590,2},
+ {1177259,3},
+ {2767234,4},
+ {2147483647,3}
+ }
+ },
+ {8, 6,
+ {
+ {99,2},
+ {143,1},
+ {4646,2},
+ {63522,3},
+ {2187806,4},
+ {2147483647,3}
+ }
+ },
+ {2147483647,7,
+ {
+ {1,1},
+ {5,4},
+ {46,1},
+ {2590,2},
+ {1177259,3},
+ {2767234,4},
+ {2147483647,3}
+ }
+ }
+};
+
+
+/* I_MPI_ADJUST_GATHER
+
+MPI_Gather
+
+1. Binomial algorithm
+2. Topology aware binomial algorithm
+3. Shumilin's algorithm
+
+*/
+
+int (*intel_gather_functions_table[])(void *sbuf, int scount,
+ MPI_Datatype sdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
+ int root,
+ MPI_Comm comm
+ ) ={
+ smpi_coll_tuned_gather_ompi_binomial,
+ smpi_coll_tuned_gather_ompi_binomial,
+ smpi_coll_tuned_gather_mvapich2
+};
+
+intel_tuning_table_element intel_gather_table[] =
+{
+ {8,3,
+ {
+ {17561,3},
+ {44791,2},
+ {2147483647,3}
+ }
+ },
+ {16,7,
+ {
+ {16932,3},
+ {84425,2},
+ {158363,3},
+ {702801,2},
+ {1341444,3},
+ {2413569,2},
+ {2147483647,3}
+ }
+ },
+ {2147483647,4,
+ {
+ {47187,3},
+ {349696,2},
+ {2147483647,3},
+ {2147483647,1}
+ }
+ }
+};
+
+
+/* I_MPI_ADJUST_SCATTER
+
+MPI_Scatter
+
+1. Binomial algorithm
+2. Topology aware binomial algorithm
+3. Shumilin's algorithm
+
+*/
+
+int (*intel_scatter_functions_table[])(void *sbuf, int scount,
+ MPI_Datatype sdtype,
+ void* rbuf, int rcount,
+ MPI_Datatype rdtype,
+ int root, MPI_Comm comm
+ ) ={
+ smpi_coll_tuned_scatter_ompi_binomial,
+ smpi_coll_tuned_scatter_ompi_binomial,
+ smpi_coll_tuned_scatter_mvapich2
+};
+
+intel_tuning_table_element intel_scatter_table[] =
+{
+ {2,2,
+ {
+ {16391,1},
+ {2147483647,3}
+ }
+ },
+ {4,6,
+ {
+ {16723,3},
+ {153541,2},
+ {425631,3},
+ {794142,2},
+ {1257027,3},
+ {2147483647,2}
+ }
+ },
+ {8,7,
+ {
+ {2633,3},
+ {6144,2},
+ {14043,3},
+ {24576,2},
+ {107995,3},
+ {1752729,2},
+ {2147483647,3}
+ }
+ },
+ {16,7,
+ {
+ {2043,3},
+ {2252,2},
+ {17749,3},
+ {106020,2},
+ {628654,3},
+ {3751354,2},
+ {2147483647,3}
+ }
+ },
+ {2147483647,4,
+ {
+ {65907,3},
+ {245132,2},
+ {1042439,3},
+ {2147483647,2},
+ {2147483647,1}
+ }
+ }
+};
+
+
+
+/* I_MPI_ADJUST_ALLTOALLV
+
+MPI_Alltoallv
+
+1. Isend/Irecv + waitall algorithm
+2. Plum's algorithm
+
+*/
+
+int (*intel_alltoallv_functions_table[])(void *sbuf, int *scounts, int *sdisps,
+ MPI_Datatype sdtype,
+ void *rbuf, int *rcounts, int *rdisps,
+ MPI_Datatype rdtype,
+ MPI_Comm comm
+ ) ={
+ smpi_coll_tuned_alltoallv_ompi_basic_linear,
+ smpi_coll_tuned_alltoallv_bruck
+};
+
+intel_tuning_table_element intel_alltoallv_table[] =
+{
+ {2147483647,1,
+ {
+ {2147483647,1}
+ }
+ }
+};
+
+
+//These are collected from table 3.5-2 of the Intel MPI Reference Manual
+
+
+#define SIZECOMP_reduce_scatter\
+ int total_message_size = 0;\
+ for (i = 0; i < comm_size; i++) { \
+ total_message_size += rcounts[i];\
+ }\
+ size_t block_dsize = total_message_size*smpi_datatype_size(dtype);\
+
+#define SIZECOMP_allreduce\
+ size_t block_dsize =rcount * smpi_datatype_size(dtype);
+
+#define SIZECOMP_alltoall\
+ size_t block_dsize =send_count * smpi_datatype_size(send_type);
+
+#define SIZECOMP_bcast\
+ size_t block_dsize =count * smpi_datatype_size(datatype);
+
+#define SIZECOMP_reduce\
+ size_t block_dsize =count * smpi_datatype_size(datatype);
+
+#define SIZECOMP_barrier\
+ size_t block_dsize = 1;
+
+#define SIZECOMP_allgather\
+ size_t block_dsize =recv_count * smpi_datatype_size(recv_type);
+
+#define SIZECOMP_allgatherv\
+ int total_message_size = 0;\
+ for (i = 0; i < comm_size; i++) { \
+ total_message_size += recv_count[i];\
+ }\
+ size_t block_dsize = total_message_size*smpi_datatype_size(recv_type);
+
+#define SIZECOMP_gather\
+ int rank = smpi_comm_rank(comm);\
+ size_t block_dsize = (send_buff == MPI_IN_PLACE || rank ==root) ?\
+ recv_count * smpi_datatype_size(recv_type) :\
+ send_count * smpi_datatype_size(send_type);
+
+#define SIZECOMP_scatter\
+ int rank = smpi_comm_rank(comm);\
+ size_t block_dsize = (sendbuf == MPI_IN_PLACE || rank !=root ) ?\
+ recvcount * smpi_datatype_size(recvtype) :\
+ sendcount * smpi_datatype_size(sendtype);
+
+#define SIZECOMP_alltoallv\
+ size_t block_dsize = 1;
+
+#define IMPI_COLL_SELECT(cat, ret, args, args2)\
+ret smpi_coll_tuned_ ## cat ## _impi (COLL_UNPAREN args)\
+{\
+ int comm_size = smpi_comm_size(comm);\
+ int i =0;\
+ SIZECOMP_ ## cat\
+ i=0;\
+ int j =0;\
+ while(comm_size>=intel_ ## cat ## _table[i].max_num_proc\
+ && i < INTEL_MAX_NB_THRESHOLDS)\
+ i++;\
+ while(block_dsize >=intel_ ## cat ## _table[i].elems[j].max_size\
+ && j< intel_ ## cat ## _table[i].num_elems)\
+ j++;\
+ return (intel_ ## cat ## _functions_table[intel_ ## cat ## _table[i].elems[j].algo-1]\
+ args2);\
+}
+
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_GATHER_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_type, root, comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHER_SIG, (send_buff,send_count,send_type,recv_buff,recv_count,recv_type,comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLTOALL_SIG,(send_buff, send_count, send_type, recv_buff, recv_count, recv_type,comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLTOALLV_SIG, (send_buff, send_counts, send_disps, send_type, recv_buff, recv_counts, recv_disps, recv_type, comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_BCAST_SIG , (buf, count, datatype, root, comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SIG,(buf,rbuf, count, datatype, op, root, comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype,op,comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_BARRIER_SIG,(comm));
+