Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Add Intel MPI (impi) selector.
authorAugustin Degomme <augustin.degomme@imag.fr>
Mon, 28 Jul 2014 14:35:17 +0000 (16:35 +0200)
committerAugustin Degomme <augustin.degomme@imag.fr>
Mon, 28 Jul 2014 15:11:32 +0000 (17:11 +0200)
Thresholds were obtained on Stampede cluster, by activating debug output for 1 process/node.
Algorithm list is available in the documentation of Intel MPI, available on their site

problems:
- doesn't take into account SMP for now (selection logic evolves, and thresholds change)
- some algorithms are unavailable (proprietary/undocumented) such as Shumilin's or Plum's. So others are used in these cases... And that's bad.

buildtools/Cmake/AddTests.cmake
buildtools/Cmake/DefinePackages.cmake
src/smpi/colls/colls.h
src/smpi/colls/smpi_intel_mpi_selector.c [new file with mode: 0644]
src/smpi/smpi_mpi_dt.c

index 207d064..9f97524 100644 (file)
@@ -371,68 +371,68 @@ IF(NOT enable_memcheck)
     IF(HAVE_TRACING)
       ADD_TESH(tesh-smpi-replay-ti-tracing       --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/pingpong --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/pingpong TI_output.tesh)
     ENDIF()
-    FOREACH (GATHER_COLL default ompi mpich ompi_basic_linear ompi_linear_sync ompi_binomial mvapich2)
+    FOREACH (GATHER_COLL default ompi mpich ompi_basic_linear ompi_linear_sync ompi_binomial mvapich2 impi)
       ADD_TESH(tesh-smpi-gather-coll-${GATHER_COLL} --cfg smpi/gather:${GATHER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/gather --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/gather gather_coll.tesh)
     ENDFOREACH()
     FOREACH (ALLGATHER_COLL default  2dmesh 3dmesh bruck GB loosely_lr
                             NTSLR NTSLR_NB pair rdb  rhv ring SMP_NTS
-                            smp_simple spreading_simple ompi mpich ompi_neighborexchange mvapich2)
+                            smp_simple spreading_simple ompi mpich ompi_neighborexchange mvapich2 impi)
       ADD_TESH(tesh-smpi-allgather-coll-${ALLGATHER_COLL} --cfg smpi/allgather:${ALLGATHER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allgather --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgather allgather_coll.tesh)
     ENDFOREACH()
-    FOREACH (ALLGATHERV_COLL default GB pair ring ompi mpich ompi_neighborexchange ompi_bruck mpich_rdb mpich_ring mvapich2)
+    FOREACH (ALLGATHERV_COLL default GB pair ring ompi mpich ompi_neighborexchange ompi_bruck mpich_rdb mpich_ring mvapich2 impi)
       ADD_TESH(tesh-smpi-allgatherv-coll-${ALLGATHERV_COLL} --cfg smpi/allgatherv:${ALLGATHERV_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allgatherv --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgatherv allgatherv_coll.tesh)
     ENDFOREACH()
     FOREACH (ALLREDUCE_COLL default lr rab1 rab2 rab_rdb
                             rdb smp_binomial smp_binomial_pipeline
-                            smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast ompi mpich ompi_ring_segmented mvapich2 mvapich2_rs)
+                            smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast ompi mpich ompi_ring_segmented mvapich2 mvapich2_rs impi)
       ADD_TESH(tesh-smpi-allreduce-coll-${ALLREDUCE_COLL} --cfg smpi/allreduce:${ALLREDUCE_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allreduce --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allreduce allreduce_coll.tesh)
     ENDFOREACH()
     FOREACH (ALLREDUCE_COLL_LARGE ompi_ring_segmented)
       ADD_TESH(tesh-smpi-allreduce-coll-large-${ALLREDUCE_COLL_LARGE} --cfg smpi/allreduce:${ALLREDUCE_COLL_LARGE} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allreduce --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allreduce allreduce_coll_large.tesh)
     ENDFOREACH()
-    FOREACH (ALLTOALL_COLL 2dmesh 3dmesh pair pair_one_barrier pair_light_barrier
+    FOREACH (ALLTOALL_COLL 2dmesh 3dmesh pair pair_rma pair_one_barrier pair_light_barrier
                            pair_mpi_barrier rdb ring ring_light_barrier
                            ring_mpi_barrier ring_one_barrier
-                           bruck basic_linear ompi mpich mvapich2 mvapich2_scatter_dest)
+                           bruck basic_linear ompi mpich mvapich2 mvapich2_scatter_dest, impi)
       ADD_TESH(tesh-smpi-alltoall-coll-${ALLTOALL_COLL} --cfg smpi/alltoall:${ALLTOALL_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/alltoall --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/alltoall alltoall_coll.tesh)
     ENDFOREACH()
     FOREACH (ALLTOALLV_COLL default pair pair_light_barrier pair_mpi_barrier
                             pair_one_barrier  ring ring_light_barrier
-                            ring_mpi_barrier ring_one_barrier bruck ompi mpich mvapich2 ompi_basic_linear)
+                            ring_mpi_barrier ring_one_barrier bruck ompi mpich mvapich2 ompi_basic_linear impi)
       ADD_TESH(tesh-smpi-alltoallv-coll-${ALLTOALLV_COLL} --cfg smpi/alltoallv:${ALLTOALLV_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/alltoallv --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/alltoallv alltoallv_coll.tesh)
     ENDFOREACH()
     FOREACH (BCAST_COLL default arrival_pattern_aware arrival_pattern_aware_wait arrival_scatter
                         binomial_tree flattree flattree_pipeline NTSB NTSL NTSL_Isend scatter_LR_allgather
-                        scatter_rdb_allgather SMP_binary SMP_binomial SMP_linear ompi mpich ompi_split_bintree ompi_pipeline mvapich2)
+                        scatter_rdb_allgather SMP_binary SMP_binomial SMP_linear ompi mpich ompi_split_bintree ompi_pipeline mvapich2 impi)
       ADD_TESH(tesh-smpi-bcast-coll-${BCAST_COLL} --cfg smpi/bcast:${BCAST_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/bcast --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/bcast bcast_coll.tesh)
     ENDFOREACH()
-    FOREACH (REDUCE_COLL default arrival_pattern_aware binomial flat_tree NTSL scatter_gather ompi mpich ompi_chain ompi_binary ompi_basic_linear ompi_binomial ompi_in_order_binary mvapich2 mvapich2_knomial rab)
-    FOREACH (REDUCE_COLL default arrival_pattern_aware binomial flat_tree NTSL scatter_gather ompi mpich ompi_chain ompi_binary ompi_basic_linear ompi_binomial ompi_in_order_binary mvapich2 mvapich2_knomial)
+    FOREACH (REDUCE_COLL default arrival_pattern_aware binomial flat_tree NTSL scatter_gather ompi mpich ompi_chain ompi_binary ompi_basic_linear ompi_binomial ompi_in_order_binary mvapich2 mvapich2_knomial impi rab)
       ADD_TESH(tesh-smpi-reduce-coll-${REDUCE_COLL} --cfg smpi/reduce:${REDUCE_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/reduce --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/reduce reduce_coll.tesh)
     ENDFOREACH()
-    FOREACH (REDUCE_SCATTER_COLL default  ompi mpich ompi_basic_recursivehalving ompi_ring mpich_noncomm mpich_pair mvapich2 mpich_rdb)
+    FOREACH (REDUCE_SCATTER_COLL default  ompi mpich ompi_basic_recursivehalving ompi_ring mpich_noncomm mpich_pair mvapich2 mpich_rdb impi)
       ADD_TESH(tesh-smpi-reduce-scatter-coll-${REDUCE_SCATTER_COLL} --cfg smpi/reduce_scatter:${REDUCE_SCATTER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/reduce --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/reduce reduce_scatter_coll.tesh)
     ENDFOREACH()
-    FOREACH (SCATTER_COLL default  ompi mpich ompi_basic_linear ompi_binomial mvapich2)
+    FOREACH (SCATTER_COLL default  ompi mpich ompi_basic_linear ompi_binomial mvapich2 impi)
       ADD_TESH(tesh-smpi-scatter-coll-${SCATTER_COLL} --cfg smpi/scatter:${SCATTER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/scatter --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/scatter scatter_coll.tesh)
     ENDFOREACH()
-    FOREACH (BARRIER_COLL default  ompi mpich ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering mvapich2_pair mvapich2)
+    FOREACH (BARRIER_COLL default  ompi mpich ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering mvapich2_pair mvapich2 impi)
       ADD_TESH(tesh-smpi-barrier-coll-${BARRIER_COLL} --cfg smpi/barrier:${BARRIER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/barrier --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/barrier barrier_coll.tesh)
     ENDFOREACH()
     # END TESH TESTS
     IF(enable_smpi_MPICH3_testsuite)
       ADD_TEST(test-smpi-mpich3-coll-thread      ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:thread -execarg=--cfg=smpi/privatize_global_variables:yes)
-      ADD_TEST(test-smpi-mpich3-coll-ompi-thread ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:thread -execarg=--cfg=smpi/coll_selector:ompi -execarg=--cfg=smpi/send_is_detached_thres:0 -execarg=--cfg=smpi/privatize_global_variables:yes -execarg=--cfg=smpi/bcast:binomial_tree)
-      ADD_TEST(test-smpi-mpich3-coll-mpich-thread ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:thread -execarg=--cfg=smpi/coll_selector:mpich -execarg=--cfg=smpi/privatize_global_variables:yes)
-      ADD_TEST(test-smpi-mpich3-coll-mvapich2-thread ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:thread -execarg=--cfg=smpi/coll_selector:mvapich2 -execarg=--cfg=smpi/privatize_global_variables:yes)
-      SET_TESTS_PROPERTIES(test-smpi-mpich3-coll-thread test-smpi-mpich3-coll-ompi-thread test-smpi-mpich3-coll-mpich-thread test-smpi-mpich3-coll-mvapich2-thread   PROPERTIES PASS_REGULAR_EXPRESSION "tests passed!")
+      SET_TESTS_PROPERTIES(test-smpi-mpich3-coll-thread    PROPERTIES PASS_REGULAR_EXPRESSION "tests passed!")
       IF(CONTEXT_UCONTEXT)
         ADD_TEST(test-smpi-mpich3-coll-ompi-ucontext ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:ucontext -execarg=--cfg=smpi/coll_selector:ompi -execarg=--cfg=smpi/send_is_detached_thres:0 -execarg=--cfg=smpi/privatize_global_variables:yes -execarg=--cfg=smpi/bcast:binomial_tree)
         SET_TESTS_PROPERTIES(test-smpi-mpich3-coll-ompi-ucontext PROPERTIES PASS_REGULAR_EXPRESSION "tests passed!")
       ENDIF()
       IF(HAVE_RAWCTX)
         ADD_TEST(test-smpi-mpich3-coll-mpich-raw   ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:raw -execarg=--cfg=smpi/coll_selector:mpich -execarg=--cfg=smpi/privatize_global_variables:yes)
-        SET_TESTS_PROPERTIES(test-smpi-mpich3-coll-mpich-raw PROPERTIES PASS_REGULAR_EXPRESSION "tests passed!")
+      ADD_TEST(test-smpi-mpich3-coll-ompi-raw ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:raw -execarg=--cfg=smpi/coll_selector:ompi -execarg=--cfg=smpi/send_is_detached_thres:0 -execarg=--cfg=smpi/privatize_global_variables:yes -execarg=--cfg=smpi/bcast:binomial_tree)
+      ADD_TEST(test-smpi-mpich3-coll-mpich-raw ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:raw -execarg=--cfg=smpi/coll_selector:mpich -execarg=--cfg=smpi/privatize_global_variables:yes)
+      ADD_TEST(test-smpi-mpich3-coll-mvapich2-raw ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:raw -execarg=--cfg=smpi/coll_selector:mvapich2 -execarg=--cfg=smpi/privatize_global_variables:yes)
+      ADD_TEST(test-smpi-mpich3-coll-impi-raw ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/coll perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/coll -tests=testlist -execarg=--cfg=contexts/factory:raw -execarg=--cfg=smpi/coll_selector:impi -execarg=--cfg=smpi/privatize_global_variables:yes)
+        SET_TESTS_PROPERTIES(test-smpi-mpich3-coll-mpich-raw test-smpi-mpich3-coll-ompi-raw test-smpi-mpich3-coll-mpich-raw test-smpi-mpich3-coll-mvapich2-raw test-smpi-mpich3-coll-impi-raw  PROPERTIES PASS_REGULAR_EXPRESSION "tests passed!")
       ENDIF()
       IF(HAVE_RAWCTX)
         ADD_TEST(test-smpi-mpich3-attr-raw       ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/teshsuite/smpi/mpich3-test/attr perl ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/runtests -mpiexec=${CMAKE_BINARY_DIR}/smpi_script/bin/smpirun -srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich3-test/attr -tests=testlist -execarg=--cfg=contexts/factory:raw)
index ae8123a..8c920a4 100644 (file)
@@ -226,6 +226,7 @@ set(SMPI_SRC
   src/smpi/colls/scatter-ompi.c
   src/smpi/colls/smpi_automatic_selector.c
   src/smpi/colls/smpi_mpich_selector.c
+  src/smpi/colls/smpi_intel_mpi_selector.c
   src/smpi/colls/smpi_openmpi_selector.c
   src/smpi/colls/smpi_mvapich2_selector.c
   src/smpi/instr_smpi.c
index 2b7d8a1..463a933 100644 (file)
@@ -42,6 +42,7 @@ COLL_APPLY(action, COLL_GATHER_SIG, ompi_binomial) COLL_sep \
 COLL_APPLY(action, COLL_GATHER_SIG, ompi_linear_sync) COLL_sep \
 COLL_APPLY(action, COLL_GATHER_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_GATHER_SIG, mvapich2) COLL_sep \
+COLL_APPLY(action, COLL_GATHER_SIG, impi) COLL_sep \
 COLL_APPLY(action, COLL_GATHER_SIG, automatic)
 
 
@@ -75,6 +76,7 @@ COLL_APPLY(action, COLL_ALLGATHER_SIG, ompi) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHER_SIG, ompi_neighborexchange) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHER_SIG, mvapich2) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHER_SIG, mpich) COLL_sep \
+COLL_APPLY(action, COLL_ALLGATHER_SIG, impi) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHER_SIG, automatic)
 
 
@@ -99,6 +101,7 @@ COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich_rdb) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich_ring) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, mvapich2) COLL_sep \
+COLL_APPLY(action, COLL_ALLGATHERV_SIG, impi) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, automatic)
 
 COLL_ALLGATHERVS(COLL_PROTO, COLL_NOsep)
@@ -128,6 +131,7 @@ COLL_APPLY(action, COLL_ALLREDUCE_SIG, ompi_ring_segmented) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, mvapich2) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, mvapich2_rs) COLL_sep \
+COLL_APPLY(action, COLL_ALLREDUCE_SIG, impi) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, automatic)
 
@@ -160,6 +164,7 @@ COLL_APPLY(action, COLL_ALLTOALL_SIG, mvapich2) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALL_SIG, mvapich2_scatter_dest) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALL_SIG, ompi) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALL_SIG, mpich) COLL_sep \
+COLL_APPLY(action, COLL_ALLTOALL_SIG, impi) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALL_SIG, automatic)
 
 COLL_ALLTOALLS(COLL_PROTO, COLL_NOsep)
@@ -186,6 +191,7 @@ COLL_APPLY(action, COLL_ALLTOALLV_SIG, ompi) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALLV_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALLV_SIG, ompi_basic_linear) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALLV_SIG, mvapich2) COLL_sep \
+COLL_APPLY(action, COLL_ALLTOALLV_SIG, impi) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALLV_SIG, automatic)
 
 COLL_ALLTOALLVS(COLL_PROTO, COLL_NOsep)
@@ -217,6 +223,7 @@ COLL_APPLY(action, COLL_BCAST_SIG, ompi_split_bintree) COLL_sep \
 COLL_APPLY(action, COLL_BCAST_SIG, ompi_pipeline) COLL_sep \
 COLL_APPLY(action, COLL_BCAST_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_BCAST_SIG, mvapich2)   COLL_sep \
+COLL_APPLY(action, COLL_BCAST_SIG, impi)   COLL_sep \
 COLL_APPLY(action, COLL_BCAST_SIG, automatic)
 
 COLL_BCASTS(COLL_PROTO, COLL_NOsep)
@@ -245,6 +252,7 @@ COLL_APPLY(action, COLL_REDUCE_SIG, ompi_binomial) COLL_sep \
 COLL_APPLY(action, COLL_REDUCE_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_REDUCE_SIG, mvapich2) COLL_sep \
 COLL_APPLY(action, COLL_REDUCE_SIG, mvapich2_knomial) COLL_sep \
+COLL_APPLY(action, COLL_REDUCE_SIG, impi) COLL_sep \
 COLL_APPLY(action, COLL_REDUCE_SIG, rab) COLL_sep \
 COLL_APPLY(action, COLL_REDUCE_SIG, automatic)
 
@@ -266,6 +274,7 @@ COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, mpich_pair) COLL_sep \
 COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, mpich_rdb) COLL_sep \
 COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, mpich_noncomm) COLL_sep \
 COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, mvapich2) COLL_sep \
+COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, impi) COLL_sep \
 COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, automatic)
 
 
@@ -287,6 +296,7 @@ COLL_APPLY(action, COLL_SCATTER_SIG, ompi_basic_linear) COLL_sep \
 COLL_APPLY(action, COLL_SCATTER_SIG, ompi_binomial)  COLL_sep \
 COLL_APPLY(action, COLL_SCATTER_SIG, mpich)   COLL_sep \
 COLL_APPLY(action, COLL_SCATTER_SIG, mvapich2)   COLL_sep \
+COLL_APPLY(action, COLL_SCATTER_SIG, impi)   COLL_sep \
 COLL_APPLY(action, COLL_SCATTER_SIG, automatic)
 
 COLL_SCATTERS(COLL_PROTO, COLL_NOsep)
@@ -308,6 +318,7 @@ COLL_APPLY(action, COLL_BARRIER_SIG, ompi_doublering) COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, mpich)   COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2_pair)   COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2)   COLL_sep \
+COLL_APPLY(action, COLL_BARRIER_SIG, impi)   COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, automatic)
 
 COLL_BARRIERS(COLL_PROTO, COLL_NOsep)
diff --git a/src/smpi/colls/smpi_intel_mpi_selector.c b/src/smpi/colls/smpi_intel_mpi_selector.c
new file mode 100644 (file)
index 0000000..a1e94bf
--- /dev/null
@@ -0,0 +1,867 @@
+/* selector for collective algorithms based on openmpi's default coll_tuned_decision_fixed selector */
+
+/* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#include "colls_private.h"
+
+
+// This selector is based on information gathered on the Stampede cluster, with Intel MPI 4.1.3.049, and from the intel reference manual. The data was gathered launching one process/node. With other settings, selection will be different (more SMP aware algorithms, for instance)
+
+
+#define INTEL_MAX_NB_THRESHOLDS  32
+
+typedef struct {
+  int max_size;
+  int algo;
+} intel_tuning_table_element_element;
+
+typedef struct {
+  int max_num_proc;
+  int num_elems;
+  intel_tuning_table_element_element elems[INTEL_MAX_NB_THRESHOLDS];
+} intel_tuning_table_element;
+
+/*
+I_MPI_ADJUST_ALLREDUCE
+
+MPI_Allreduce
+
+1 - Recursive doubling algorithm
+2 - Rabenseifner's algorithm
+3 - Reduce + Bcast algorithm
+4 - Topology aware Reduce + Bcast algorithm
+5 - Binomial gather + scatter algorithm
+6 - Topology aware binominal gather + scatter algorithm
+7 - Shumilin's ring algorithm 
+8 - Ring algorithm
+
+
+//as Shumilin's ring algorithm is unknown, default to ring'
+*/
+
+
+int (*intel_allreduce_functions_table[])(void *sendbuf,
+      void *recvbuf,
+      int count,
+      MPI_Datatype datatype,
+      MPI_Op op, MPI_Comm comm) ={
+      smpi_coll_tuned_allreduce_rdb,
+      smpi_coll_tuned_allreduce_rab1,
+      smpi_coll_tuned_allreduce_redbcast,
+      smpi_coll_tuned_allreduce_redbcast,
+      smpi_coll_tuned_allreduce_smp_binomial,
+      smpi_coll_tuned_allreduce_smp_binomial,
+      smpi_coll_tuned_allreduce_ompi_ring_segmented,
+      smpi_coll_tuned_allreduce_ompi_ring_segmented
+};
+
+intel_tuning_table_element intel_allreduce_table[] =
+{
+  { 2,9,{
+    {6,7},
+    {85,1},
+    {192,7},
+    {853,1},
+    {1279,7},
+    {16684,1},
+    {34279,8},
+    {1681224,3},
+    {2147483647,7}
+  }
+  },
+  { 4, 8,{
+    {16,7},
+    {47,1},
+    {2062,7},
+    {16699,1},
+    {33627,7},
+    {70732,8},
+    {1300705,3},
+    {2147483647,8}
+  }
+  },
+  {8,8,{
+    {118,1},
+    {146,4},
+    {16760,1},
+    {36364,6},
+    {136239,8},
+    {315710,7},
+    {3220366,3},
+    {2147483647,8}
+    }
+  },
+  {16,7,{
+    {934,1},
+    {1160,6},
+    {15505,1},
+    {52730,2},
+    {300705,8},
+    {563680,7},
+    {2147483647,3}
+    }
+  },
+  {2147483647,11,{
+    {5,6},
+    {11,4},
+    {182,1},
+    {700,6},
+    {1450,4},
+    {11146,1},
+    {25539,6},
+    {37634,4},
+    {93784,6},
+    {817658,2},
+    {2147483647,3}
+  }
+  }
+};
+
+
+
+/*I_MPI_ADJUST_ALLTOALL 
+
+MPI_Alltoall 
+
+1. Bruck's algorithm 
+2. Isend/Irecv + waitall algorithm 
+3. Pair wise exchange algorithm 
+4. Plum's algorithm
+
+*/
+
+
+intel_tuning_table_element intel_alltoall_table[] =
+{
+    { 2,1,
+        {
+        {2147483647,3}
+        }
+    },
+    { 4,2,
+        {
+        {0,4},
+        {2147483647,2}
+        }
+    },
+    {8,1,
+        {
+        {2147483647,2}
+        }
+    },
+    {16,5,
+        {
+        {0,3},
+        {84645,2},
+        {167570,3},
+        {413152,4},
+        {2147483647,2}
+        }
+    },
+    {32,6,
+        {
+        {61,1},
+        {164,2},
+        {696,1},
+        {143254,2},
+        {387024,3},
+        {2147483647,2}
+        },
+    },
+    {64,4,
+        {
+        {523,1},
+        {146088,2},
+        {488989,4},
+        {2147483647,2}
+        }
+    },
+    {2147483647,3,
+        {
+        {270,1},
+        {628,4},
+        {2147483647,2}
+        }
+    }
+};
+int (*intel_alltoall_functions_table[])(void *sbuf, int scount, 
+                                             MPI_Datatype sdtype,
+                                             void* rbuf, int rcount, 
+                                             MPI_Datatype rdtype, 
+                                             MPI_Comm comm) ={
+      smpi_coll_tuned_alltoall_bruck,
+      smpi_coll_tuned_alltoall_mvapich2_scatter_dest,
+      smpi_coll_tuned_alltoall_pair,
+      smpi_coll_tuned_alltoall_pair//Plum is proprietary ? (and super efficient)
+};
+
+/*I_MPI_ADJUST_BARRIER 
+
+MPI_Barrier 
+
+1. Dissemination algorithm 
+2. Recursive doubling algorithm 
+3. Topology aware dissemination algorithm 
+4. Topology aware recursive doubling algorithm 
+5. Binominal gather + scatter algorithm 
+6. Topology aware binominal gather + scatter algorithm 
+
+*/
+static int intel_barrier_gather_scatter(MPI_Comm comm){
+    //our default barrier performs a antibcast/bcast
+    smpi_mpi_barrier(comm);
+    return MPI_SUCCESS;
+}
+
+int (*intel_barrier_functions_table[])(MPI_Comm comm) ={
+      smpi_coll_tuned_barrier_ompi_basic_linear,
+      smpi_coll_tuned_barrier_ompi_recursivedoubling,
+      smpi_coll_tuned_barrier_ompi_basic_linear,
+      smpi_coll_tuned_barrier_ompi_recursivedoubling,
+      intel_barrier_gather_scatter,
+      intel_barrier_gather_scatter
+};
+
+intel_tuning_table_element intel_barrier_table[] =
+{
+    {2,1,
+        {
+        {2147483647,2}
+        }
+    },
+    {4,1,
+        {
+        {2147483647,6}
+        }
+    },
+    {8,1,
+        {
+        {2147483647,1}
+        }
+    },
+    {64,1,
+        {
+        {2147483647,2}
+        }
+    },
+    {2147483647,1,
+        {
+        {2147483647,6}
+        }
+    }
+};
+
+
+/*I_MPI_ADJUST_BCAST 
+
+MPI_Bcast 
+
+1. Binomial algorithm 
+2. Recursive doubling algorithm 
+3. Ring algorithm 
+4. Topology aware binomial algorithm 
+5. Topology aware recursive doubling algorithm 
+6. Topology aware ring algorithm 
+7. Shumilin's bcast algorithm 
+*/
+
+int (*intel_bcast_functions_table[])(void *buff, int count,
+                                          MPI_Datatype datatype, int root,
+                                          MPI_Comm  comm) ={
+      smpi_coll_tuned_bcast_binomial_tree,
+      //smpi_coll_tuned_bcast_scatter_rdb_allgather,
+      smpi_coll_tuned_bcast_NTSL,
+      smpi_coll_tuned_bcast_NTSL,
+      smpi_coll_tuned_bcast_SMP_binomial,
+      //smpi_coll_tuned_bcast_scatter_rdb_allgather,
+            smpi_coll_tuned_bcast_NTSL,
+      smpi_coll_tuned_bcast_SMP_linear,
+      smpi_coll_tuned_bcast_mvapich2,//we don't know shumilin's algo'
+};
+
+intel_tuning_table_element intel_bcast_table[] =
+{
+    {2,9,
+        {
+        {1,2},
+        {402,7},
+        {682,5},
+        {1433,4},
+        {5734,7},
+        {21845,1},
+        {95963,6},
+        {409897,5},
+        {2147483647,1}
+        }
+    },
+    {4,1,
+        {
+        {2147483647,7}
+        }
+    },
+    {8,11,
+        {
+        {3,6},
+        {4,7},
+        {25,6},
+        {256,1},
+        {682,6},
+        {1264,1},
+        {2234,6},
+        {6655,5},
+        {16336,1},
+        {3998434,7},
+        {2147483647,6}
+        }
+    },
+    {2147483647,1,
+        {
+        {2147483647,7}
+        }
+    }
+};
+
+
+/*I_MPI_ADJUST_REDUCE 
+
+MPI_Reduce 
+
+1. Shumilin's algorithm 
+2. Binomial algorithm 
+3. Topology aware Shumilin's algorithm 
+4. Topology aware binomial algorithm 
+5. Rabenseifner's algorithm 
+6. Topology aware Rabenseifner's algorithm
+
+*/
+
+int (*intel_reduce_functions_table[])(void *sendbuf, void *recvbuf,
+                                            int count, MPI_Datatype  datatype,
+                                            MPI_Op   op, int root,
+                                            MPI_Comm   comm) ={
+      smpi_coll_tuned_reduce_mvapich2,
+      smpi_coll_tuned_reduce_binomial,
+      smpi_coll_tuned_reduce_mvapich2,
+      smpi_coll_tuned_reduce_binomial,
+      smpi_coll_tuned_reduce_rab,
+      smpi_coll_tuned_reduce_rab
+};
+
+intel_tuning_table_element intel_reduce_table[] =
+{
+    {2147483647,1,
+        {
+        {2147483647,1}
+        }
+    }
+};
+
+/* I_MPI_ADJUST_REDUCE_SCATTER 
+
+MPI_Reduce_scatter 
+
+1. Recursive having algorithm 
+2. Pair wise exchange algorithm 
+3. Recursive doubling algorithm 
+4. Reduce + Scatterv algorithm 
+5. Topology aware Reduce + Scatterv algorithm 
+
+*/
+static  int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf,
+                                                    int *rcounts,
+                                                    MPI_Datatype dtype,
+                                                    MPI_Op  op,
+                                                    MPI_Comm  comm)
+{
+  smpi_mpi_reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
+  return MPI_SUCCESS;
+}
+
+static  int  intel_reduce_scatter_recursivehalving(void *sbuf, void *rbuf,
+                                                    int *rcounts,
+                                                    MPI_Datatype dtype,
+                                                    MPI_Op  op,
+                                                    MPI_Comm  comm)
+{
+  if(smpi_op_is_commute(op))
+    return smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(sbuf, rbuf, rcounts,dtype, op,comm);
+  else
+    return smpi_coll_tuned_reduce_scatter_mvapich2(sbuf, rbuf, rcounts,dtype, op,comm);
+}
+
+int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf,
+                                                    int *rcounts,
+                                                    MPI_Datatype dtype,
+                                                    MPI_Op  op,
+                                                    MPI_Comm  comm
+                                                    ) ={
+      intel_reduce_scatter_recursivehalving,
+      smpi_coll_tuned_reduce_scatter_mpich_pair,
+      smpi_coll_tuned_reduce_scatter_mpich_rdb,
+      intel_reduce_scatter_reduce_scatterv,
+      intel_reduce_scatter_reduce_scatterv
+};
+
+intel_tuning_table_element intel_reduce_scatter_table[] =
+{
+    {2,5,
+    {
+        {5,4},
+        {522429,2},
+        {1375877,5},
+        {2932736,2},
+        {2147483647,5}
+        }
+    },
+    {4,9,
+        {
+        {4,4},
+        {15,1},
+        {120,3},
+        {651,1},
+        {12188,3},
+        {33890,1},
+        {572117,2},
+        {1410202,5},
+        {2147483647,2}
+        }
+    },
+    {8,7,
+        {
+        {4,4},
+        {2263,1},
+        {25007,3},
+        {34861,1},
+        {169625,2},
+        {2734000,4},
+        {2147483647,2}
+        }
+    },
+    {16,5,
+        {
+        {4,4},
+        {14228,1},
+        {46084,3},
+        {522139,2},
+        {2147483647,5}
+        }
+    },
+    {32,5,
+        {
+        {4,4},
+        {27516,1},
+        {61693,3},
+        {2483469,2},
+        {2147483647,5}
+        }
+    },
+    {64,4,
+        {
+        {0,3},
+        {4,4},
+        {100396,1},
+        {2147483647,2}
+        }
+    },
+    {2147483647,6,
+        {
+        {0,3},
+        {4,4},
+        {186926,1},
+        {278259,3},
+        {1500100,2},
+        {2147483647,5}
+        }
+    }
+};
+
+/* I_MPI_ADJUST_ALLGATHER 
+
+MPI_Allgather 
+
+1. Recursive doubling algorithm 
+2. Bruck's algorithm 
+3. Ring algorithm 
+4. Topology aware Gatherv + Bcast algorithm 
+
+*/
+
+int (*intel_allgather_functions_table[])(void *sbuf, int scount, 
+                                              MPI_Datatype sdtype,
+                                              void* rbuf, int rcount, 
+                                              MPI_Datatype rdtype, 
+                                              MPI_Comm  comm
+                                                    ) ={
+      smpi_coll_tuned_allgather_rdb,
+      smpi_coll_tuned_allgather_bruck,
+      smpi_coll_tuned_allgather_ring,
+      smpi_coll_tuned_allgather_GB
+};
+
+intel_tuning_table_element intel_allgather_table[] =
+{
+    {4,11,
+        {
+        {1,4},
+        {384,1},
+        {1533,4},
+        {3296,1},
+        {10763,4},
+        {31816,3},
+        {193343,4},
+        {405857,3},
+        {597626,4},
+        {1844323,3},
+        {2147483647,4}
+        }
+    },
+    {8,10,
+        {
+        {12,4},
+        {46,1},
+        {205,4},
+        {3422,2},
+        {4200,4},
+        {8748,1},
+        {24080,3},
+        {33244,4},
+        {371159,1},
+        {2147483647,3}
+        }
+    },
+    {16, 8,
+        {
+        {3,4},
+        {53,1},
+        {100,4},
+        {170,1},
+        {6077,4},
+        {127644,1},
+        {143741,4},
+        {2147483647,3}
+        }
+    },
+    {2147483647,10,
+        {
+        {184,1},
+        {320,4},
+        {759,1},
+        {1219,4},
+        {2633,1},
+        {8259,4},
+        {123678,1},
+        {160801,4},
+        {284341,1},
+        {2147483647,4}
+        }
+    }
+};
+
+/* I_MPI_ADJUST_ALLGATHERV 
+
+MPI_Allgatherv 
+
+1. Recursive doubling algorithm 
+2. Bruck's algorithm 
+3. Ring algorithm 
+4. Topology aware Gatherv + Bcast algorithm 
+
+*/
+
+int (*intel_allgatherv_functions_table[])(void *sbuf, int scount, 
+                                               MPI_Datatype sdtype,
+                                               void* rbuf, int *rcounts, 
+                                               int *rdispls,
+                                               MPI_Datatype rdtype, 
+                                               MPI_Comm  comm
+                                                    ) ={
+      smpi_coll_tuned_allgatherv_mpich_rdb,
+      smpi_coll_tuned_allgatherv_ompi_bruck,
+      smpi_coll_tuned_allgatherv_ring,
+      smpi_coll_tuned_allgatherv_GB
+};
+
+intel_tuning_table_element intel_allgatherv_table[] =
+{
+    {2,3,
+        {
+        {259668,3},
+        {635750,4},
+        {2147483647,3}
+        }
+    },
+    {4,7,
+        {
+        {1,1},
+        {5,4},
+        {46,1},
+        {2590,2},
+        {1177259,3},
+        {2767234,4},
+        {2147483647,3}
+        }
+    },
+    {8, 6,
+        {
+        {99,2},
+        {143,1},
+        {4646,2},
+        {63522,3},
+        {2187806,4},
+        {2147483647,3}
+        }
+    },
+    {2147483647,7,
+        {
+        {1,1},
+        {5,4},
+        {46,1},
+        {2590,2},
+        {1177259,3},
+        {2767234,4},
+        {2147483647,3}
+        }
+    }
+};
+
+
+/* I_MPI_ADJUST_GATHER
+
+MPI_Gather
+
+1. Binomial algorithm 
+2. Topology aware binomial algorithm 
+3. Shumilin's algorithm
+
+*/
+
+int (*intel_gather_functions_table[])(void *sbuf, int scount, 
+                                           MPI_Datatype sdtype,
+                                           void* rbuf, int rcount, 
+                                           MPI_Datatype rdtype, 
+                                           int root,
+                                           MPI_Comm  comm
+                                                    ) ={
+      smpi_coll_tuned_gather_ompi_binomial,
+      smpi_coll_tuned_gather_ompi_binomial,
+      smpi_coll_tuned_gather_mvapich2
+};
+
+intel_tuning_table_element intel_gather_table[] =
+{
+    {8,3,
+        {
+        {17561,3},
+        {44791,2},
+        {2147483647,3}
+        }
+    },
+    {16,7,
+        {
+        {16932,3},
+        {84425,2},
+        {158363,3},
+        {702801,2},
+        {1341444,3},
+        {2413569,2},
+        {2147483647,3}
+        }
+    },
+    {2147483647,4,
+        {
+        {47187,3},
+        {349696,2},
+        {2147483647,3},
+        {2147483647,1}
+        }
+    }
+};
+
+
+/* I_MPI_ADJUST_SCATTER 
+
+MPI_Scatter 
+
+1. Binomial algorithm 
+2. Topology aware binomial algorithm 
+3. Shumilin's algorithm 
+
+*/
+
+int (*intel_scatter_functions_table[])(void *sbuf, int scount, 
+                                            MPI_Datatype sdtype,
+                                            void* rbuf, int rcount, 
+                                            MPI_Datatype rdtype, 
+                                            int root, MPI_Comm  comm
+                                                    ) ={
+      smpi_coll_tuned_scatter_ompi_binomial,
+      smpi_coll_tuned_scatter_ompi_binomial,
+      smpi_coll_tuned_scatter_mvapich2
+};
+
+intel_tuning_table_element intel_scatter_table[] =
+{
+    {2,2,
+        {
+        {16391,1},
+        {2147483647,3}
+        }
+    },
+    {4,6,
+        {
+        {16723,3},
+        {153541,2},
+        {425631,3},
+        {794142,2},
+        {1257027,3},
+        {2147483647,2}
+        }
+    },
+    {8,7,
+        {
+        {2633,3},
+        {6144,2},
+        {14043,3},
+        {24576,2},
+        {107995,3},
+        {1752729,2},
+        {2147483647,3}
+        }
+    },
+    {16,7,
+        {
+        {2043,3},
+        {2252,2},
+        {17749,3},
+        {106020,2},
+        {628654,3},
+        {3751354,2},
+        {2147483647,3}
+        }
+    },
+    {2147483647,4,
+        {
+        {65907,3},
+        {245132,2},
+        {1042439,3},
+        {2147483647,2},
+        {2147483647,1}
+        }
+    }
+};
+
+
+
+/* I_MPI_ADJUST_ALLTOALLV 
+
+MPI_Alltoallv 
+
+1. Isend/Irecv + waitall algorithm 
+2. Plum's algorithm 
+
+*/
+
+int (*intel_alltoallv_functions_table[])(void *sbuf, int *scounts, int *sdisps,
+                                              MPI_Datatype sdtype,
+                                              void *rbuf, int *rcounts, int *rdisps,
+                                              MPI_Datatype rdtype,
+                                              MPI_Comm  comm
+                                                    ) ={
+      smpi_coll_tuned_alltoallv_ompi_basic_linear,
+      smpi_coll_tuned_alltoallv_bruck
+};
+
+intel_tuning_table_element intel_alltoallv_table[] =
+{
+    {2147483647,1,
+        {
+        {2147483647,1}
+        }
+    }
+};
+
+
+//These are collected from table 3.5-2 of the Intel MPI Reference Manual 
+
+    
+#define SIZECOMP_reduce_scatter\
+    int total_message_size = 0;\
+    for (i = 0; i < comm_size; i++) { \
+        total_message_size += rcounts[i];\
+    }\
+    size_t block_dsize = total_message_size*smpi_datatype_size(dtype);\
+    
+#define SIZECOMP_allreduce\
+  size_t block_dsize =rcount * smpi_datatype_size(dtype);
+  
+#define SIZECOMP_alltoall\
+  size_t block_dsize =send_count * smpi_datatype_size(send_type);
+
+#define SIZECOMP_bcast\
+  size_t block_dsize =count * smpi_datatype_size(datatype);
+
+#define SIZECOMP_reduce\
+  size_t block_dsize =count * smpi_datatype_size(datatype);
+
+#define SIZECOMP_barrier\
+  size_t block_dsize = 1;
+
+#define SIZECOMP_allgather\
+  size_t block_dsize =recv_count * smpi_datatype_size(recv_type);
+
+#define SIZECOMP_allgatherv\
+    int total_message_size = 0;\
+    for (i = 0; i < comm_size; i++) { \
+        total_message_size += recv_count[i];\
+    }\
+    size_t block_dsize = total_message_size*smpi_datatype_size(recv_type);
+    
+#define SIZECOMP_gather\
+  int rank = smpi_comm_rank(comm);\
+  size_t block_dsize = (send_buff == MPI_IN_PLACE || rank ==root) ?\
+                recv_count * smpi_datatype_size(recv_type) :\
+                send_count * smpi_datatype_size(send_type);
+
+#define SIZECOMP_scatter\
+  int rank = smpi_comm_rank(comm);\
+  size_t block_dsize = (sendbuf == MPI_IN_PLACE || rank !=root ) ?\
+                recvcount * smpi_datatype_size(recvtype) :\
+                sendcount * smpi_datatype_size(sendtype);
+
+#define SIZECOMP_alltoallv\
+  size_t block_dsize = 1;
+  
+#define IMPI_COLL_SELECT(cat, ret, args, args2)\
+ret smpi_coll_tuned_ ## cat ## _impi (COLL_UNPAREN args)\
+{\
+    int comm_size = smpi_comm_size(comm);\
+    int i =0;\
+    SIZECOMP_ ## cat\
+    i=0;\
+    int j =0;\
+    while(comm_size>=intel_ ## cat ## _table[i].max_num_proc\
+        && i < INTEL_MAX_NB_THRESHOLDS)\
+      i++;\
+    while(block_dsize >=intel_ ## cat ## _table[i].elems[j].max_size\
+         && j< intel_ ## cat ## _table[i].num_elems)\
+      j++;\
+    return (intel_ ## cat ## _functions_table[intel_ ## cat ## _table[i].elems[j].algo-1]\
+    args2);\
+}
+
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_GATHER_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_type, root, comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHER_SIG, (send_buff,send_count,send_type,recv_buff,recv_count,recv_type,comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLTOALL_SIG,(send_buff, send_count, send_type, recv_buff, recv_count, recv_type,comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLTOALLV_SIG, (send_buff, send_counts, send_disps, send_type, recv_buff, recv_counts, recv_disps, recv_type, comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_BCAST_SIG , (buf, count, datatype, root, comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SIG,(buf,rbuf, count, datatype, op, root, comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype,op,comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm));
+COLL_APPLY(IMPI_COLL_SELECT, COLL_BARRIER_SIG,(comm));
+
index 07bec7c..bedd205 100644 (file)
@@ -150,7 +150,8 @@ CREATE_MPI_DATATYPE(MPI_PTR, void*);
  */
 int is_datatype_valid(MPI_Datatype datatype) {
     return datatype != MPI_DATATYPE_NULL
-        && (datatype->flags & DT_FLAG_COMMITED);
+        && (datatype->flags & DT_FLAG_COMMITED)
+        && (smpi_datatype_size(datatype)>0);
 }
 
 size_t smpi_datatype_size(MPI_Datatype datatype)