Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'master' of git+ssh://scm.gforge.inria.fr//gitroot/simgrid/simgrid
authorJonathan Rouzaud-Cornabas <jonathan.rouzaud-cornabas@ens-lyon.fr>
Wed, 12 Jun 2013 15:13:46 +0000 (17:13 +0200)
committerJonathan Rouzaud-Cornabas <jonathan.rouzaud-cornabas@ens-lyon.fr>
Wed, 12 Jun 2013 15:13:46 +0000 (17:13 +0200)
41 files changed:
buildtools/Cmake/AddTests.cmake
buildtools/Cmake/DefinePackages.cmake
buildtools/Cmake/MakeExe.cmake
buildtools/Cmake/MakeJava.cmake
examples/java/io/Node.java
examples/msg/io/file.c
examples/msg/io/file_unlink.c
examples/msg/semaphores/CMakeLists.txt [new file with mode: 0644]
examples/msg/semaphores/synchro.c
examples/smpi/CMakeLists.txt
include/msg/msg.h
include/simgrid/simix.h
src/bindings/java/jmsg_file.c
src/bindings/java/jmsg_file.h
src/bindings/java/org/simgrid/msg/Host.java
src/include/smpi/smpi_interface.h
src/include/surf/surf.h
src/msg/msg_io.c
src/simgrid/sg_config.c
src/simix/smx_io.c
src/simix/smx_io_private.h
src/simix/smx_smurf_private.h
src/simix/smx_user.c
src/smpi/colls/allgather-ompi-neighborexchange.c [new file with mode: 0644]
src/smpi/colls/allgatherv-ompi-neighborexchange.c [new file with mode: 0644]
src/smpi/colls/colls.h
src/smpi/colls/gather-ompi.c [new file with mode: 0644]
src/smpi/colls/smpi_openmpi_selector.c
src/smpi/private.h
src/smpi/smpi_coll.c
src/smpi/smpi_global.c
src/smpi/smpi_mpi_dt.c
src/smpi/smpi_mpi_dt_private.h
src/smpi/smpi_pmpi.c
src/surf/storage.c
src/surf/storage_private.h
src/surf/workstation.c
teshsuite/smpi/CMakeLists.txt
teshsuite/smpi/gather_coll.c [new file with mode: 0644]
teshsuite/smpi/gather_coll.tesh [new file with mode: 0644]
tools/check_dist_archive.exclude

index 139057c..3ec061d 100644 (file)
@@ -371,35 +371,46 @@ if(NOT enable_memcheck)
       ADD_TEST(smpi-replay                      ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --cd ${CMAKE_BINARY_DIR}/examples/smpi ${CMAKE_HOME_DIRECTORY}/examples/smpi/replay/smpi_replay.tesh)
     endif()
 
+
+    FOREACH (GATHER_COLL default ompi ompi_basic_linear ompi_linear_sync ompi_binomial)
+        ADD_TEST(smpi-gather-coll-${GATHER_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/gather:${GATHER_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/gather_coll.tesh)
+    ENDFOREACH()
+    
     FOREACH (ALLGATHER_COLL default  2dmesh 3dmesh bruck GB loosely_lr lr
                            NTSLR NTSLR_NB pair rdb  rhv ring SMP_NTS
-                           smp_simple spreading_simple ompi)
+                           smp_simple spreading_simple ompi ompi_neighborexchange)
         ADD_TEST(smpi-allgather-coll-${ALLGATHER_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allgather:${ALLGATHER_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgather_coll.tesh)
     ENDFOREACH()
-    FOREACH (ALLGATHERV_COLL default GB pair ring ompi)
+    
+    FOREACH (ALLGATHERV_COLL default GB pair ring ompi ompi_neighborexchange)
         ADD_TEST(smpi-allgatherv-coll-${ALLGATHERV_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allgatherv:${ALLGATHERV_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgatherv_coll.tesh)
     ENDFOREACH()
+    
     FOREACH (ALLREDUCE_COLL default lr NTS rab1 rab2 rab_rdb
                            rab_rsag rdb smp_binomial smp_binomial_pipeline
                            smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast ompi ompi_ring_segmented)
         ADD_TEST(smpi-allreduce-coll-${ALLREDUCE_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allreduce:${ALLREDUCE_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allreduce_coll.tesh)
     ENDFOREACH()
+    
     FOREACH (ALLTOALL_COLL 2dmesh 3dmesh pair pair_one_barrier pair_light_barrier
                           pair_mpi_barrier rdb ring ring_light_barrier
                           ring_mpi_barrier ring_one_barrier
                           simple bruck basic_linear ompi)
         ADD_TEST(smpi-alltoall-coll-${ALLTOALL_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/alltoall:${ALLTOALL_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/alltoall_coll.tesh)
     ENDFOREACH()
+    
     FOREACH (ALLTOALLV_COLL default pair pair_light_barrier pair_mpi_barrier
                            pair_one_barrier  ring ring_light_barrier
                            ring_mpi_barrier ring_one_barrier bruck ompi)
         ADD_TEST(smpi-alltoallv-coll-${ALLTOALLV_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/alltoallv:${ALLTOALLV_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/alltoallv_coll.tesh)
     ENDFOREACH()
+    
     FOREACH (BCAST_COLL default arrival_nb arrival_pattern_aware arrival_pattern_aware_wait arrival_scatter
                        binomial_tree flattree flattree_pipeline NTSB NTSL NTSL_Isend scatter_LR_allgather
                        scatter_rdb_allgather SMP_binary SMP_binomial SMP_linear ompi ompi_split_bintree ompi_pipeline)
                ADD_TEST(smpi-bcast-coll-${BCAST_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/bcast:${BCAST_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/bcast_coll.tesh)
     ENDFOREACH()
+    
     FOREACH (REDUCE_COLL default arrival_pattern_aware binomial flat_tree NTSL scatter_gather ompi ompi_chain ompi_binary ompi_basic_linear ompi_binomial ompi_in_order_binary)
         ADD_TEST(smpi-reduce-coll-${REDUCE_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/reduce:${REDUCE_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/reduce_coll.tesh)
     ENDFOREACH()
index d22d059..0131674 100644 (file)
@@ -128,9 +128,11 @@ set(SMPI_SRC
   src/smpi/colls/allgather-SMP-NTS.c
   src/smpi/colls/allgather-smp-simple.c
   src/smpi/colls/allgather-spreading-simple.c
+  src/smpi/colls/allgather-ompi-neighborexchange.c
   src/smpi/colls/allgatherv-GB.c  
   src/smpi/colls/allgatherv-pair.c
   src/smpi/colls/allgatherv-ring.c
+  src/smpi/colls/allgatherv-ompi-neighborexchange.c
   src/smpi/colls/allreduce-lr.c
   src/smpi/colls/allreduce-NTS.c
   src/smpi/colls/allreduce-rab1.c
@@ -193,6 +195,7 @@ set(SMPI_SRC
   src/smpi/colls/reduce-NTSL.c
   src/smpi/colls/reduce-scatter-gather.c
   src/smpi/colls/reduce-ompi.c
+  src/smpi/colls/gather-ompi.c
   )
 
 if(SMPI_F2C)
@@ -843,14 +846,11 @@ set(EXAMPLES_CMAKEFILES_TXT
   examples/java/startKillTime/CMakeLists.txt
   examples/java/suspend/CMakeLists.txt
   examples/java/tracing/CMakeLists.txt
-  examples/scala/CMakeLists.txt
-  examples/scala/masterslave/CMakeLists.txt
-  examples/scala/master_slave_bypass/CMakeLists.txt
-  examples/scala/master_slave_kill/CMakeLists.txt
   examples/lua/CMakeLists.txt
   examples/msg/CMakeLists.txt
   examples/msg/actions/CMakeLists.txt
   examples/msg/bittorrent/CMakeLists.txt
+  examples/msg/chainsend/CMakeLists.txt
   examples/msg/chord/CMakeLists.txt
   examples/msg/cloud/CMakeLists.txt
   examples/msg/gpu/CMakeLists.txt
@@ -866,12 +866,16 @@ set(EXAMPLES_CMAKEFILES_TXT
   examples/msg/pmm/CMakeLists.txt
   examples/msg/priority/CMakeLists.txt
   examples/msg/properties/CMakeLists.txt
+  examples/msg/semaphores/CMakeLists.txt
   examples/msg/sendrecv/CMakeLists.txt
-  examples/msg/chainsend/CMakeLists.txt
   examples/msg/start_kill_time/CMakeLists.txt
   examples/msg/suspend/CMakeLists.txt
   examples/msg/token_ring/CMakeLists.txt
   examples/msg/tracing/CMakeLists.txt
+  examples/scala/CMakeLists.txt
+  examples/scala/master_slave_bypass/CMakeLists.txt
+  examples/scala/master_slave_kill/CMakeLists.txt
+  examples/scala/masterslave/CMakeLists.txt
   examples/simdag/CMakeLists.txt
   examples/simdag/dax/CMakeLists.txt
   examples/simdag/dot/CMakeLists.txt
@@ -889,20 +893,20 @@ set(TESHSUITE_CMAKEFILES_TXT
   teshsuite/msg/CMakeLists.txt
   teshsuite/msg/trace/CMakeLists.txt
   teshsuite/simdag/CMakeLists.txt
+  teshsuite/simdag/availability/CMakeLists.txt
   teshsuite/simdag/network/CMakeLists.txt
   teshsuite/simdag/network/mxn/CMakeLists.txt
   teshsuite/simdag/network/p2p/CMakeLists.txt
   teshsuite/simdag/partask/CMakeLists.txt
   teshsuite/simdag/platforms/CMakeLists.txt
-  teshsuite/simdag/availability/CMakeLists.txt
-  teshsuite/xbt/CMakeLists.txt
   teshsuite/smpi/CMakeLists.txt
   teshsuite/smpi/mpich-test/CMakeLists.txt
-  teshsuite/smpi/mpich-test/env/CMakeLists.txt
   teshsuite/smpi/mpich-test/coll/CMakeLists.txt
   teshsuite/smpi/mpich-test/context/CMakeLists.txt
+  teshsuite/smpi/mpich-test/env/CMakeLists.txt
   teshsuite/smpi/mpich-test/profile/CMakeLists.txt
   teshsuite/smpi/mpich-test/pt2pt/CMakeLists.txt
+  teshsuite/xbt/CMakeLists.txt
   )
 
 set(TOOLS_CMAKEFILES_TXT
index 33e9e2e..defe596 100644 (file)
@@ -10,29 +10,6 @@ add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/tools)
 ##################################################################
 
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/tools/tesh)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/tools/graphicator/)
-
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/testsuite/xbt)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/testsuite/surf)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/xbt)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/simdag)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/simdag/network)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/simdag/network/p2p)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/simdag/network/mxn)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/simdag/partask)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/simdag/platforms)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/simdag/availability)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/smpi)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich-test)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich-test/env)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich-test/coll)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich-test/context)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich-test/profile)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich-test/pt2pt)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/msg)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/msg/trace)
-
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/java)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/java/async)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/java/bittorrent)
@@ -52,43 +29,67 @@ add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/java/startKillTime)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/java/suspend)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/java/tracing)
 
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/scala)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/scala/masterslave)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/scala/master_slave_bypass)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/scala/master_slave_kill)
-
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/properties)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/actions)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/migration)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/sendrecv)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/chainsend)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/suspend)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/parallel_task)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/priority)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/masterslave)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/icomms)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/bittorrent)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/chainsend)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/chord)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/kademlia)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/token_ring)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/pmm)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/start_kill_time)
-
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/io)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/cloud)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/gpu)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/tracing)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/mc)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/gtnets)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/icomms)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/io)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/kademlia)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/masterslave)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/mc)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/migration)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/ns3)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/parallel_task)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/pmm)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/priority)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/properties)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/semaphores)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/sendrecv)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/start_kill_time)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/suspend)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/token_ring)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/msg/tracing)
+
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/scala)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/scala/master_slave_bypass)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/scala/master_slave_kill)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/scala/masterslave)
 
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/simdag)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/simdag/dax)
-add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/simdag/goal)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/simdag/dot)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/simdag/goal)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/simdag/metaxml)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/simdag/properties)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/simdag/scheduling)
 
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/smpi)
 add_subdirectory(${CMAKE_HOME_DIRECTORY}/examples/smpi/MM)
+
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/msg)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/msg/trace)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/simdag)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/simdag/availability)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/simdag/network)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/simdag/network/mxn)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/simdag/network/p2p)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/simdag/partask)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/simdag/platforms)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/smpi)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich-test)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich-test/coll)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich-test/context)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich-test/env)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich-test/profile)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/mpich-test/pt2pt)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/teshsuite/xbt)
+
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/testsuite/surf)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/testsuite/xbt)
+
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/tools/graphicator/)
+add_subdirectory(${CMAKE_HOME_DIRECTORY}/tools/tesh)
index a861c10..86d7dd3 100644 (file)
@@ -26,10 +26,11 @@ else()
 endif()
 message("-- [Java] SG_java includes: ${CHECK_INCLUDES}")
 
+target_link_libraries(SG_java simgrid)
+
 if(WIN32)
-  get_target_property(SIMGRID_LIB_NAME_NAME SG_java LIBRARY_OUTPUT_NAME)
   set_target_properties(SG_java PROPERTIES
-    LINK_FLAGS "-Wl,--subsystem,windows,--kill-at ${SIMGRID_LIB_NAME}"
+    LINK_FLAGS "-Wl,--subsystem,windows,--kill-at"
     PREFIX "")
   find_path(PEXPORTS_PATH NAMES pexports.exe PATHS NO_DEFAULT_PATHS)
   message(STATUS "pexports: ${PEXPORTS_PATH}")
@@ -37,8 +38,6 @@ if(WIN32)
     add_custom_command(TARGET SG_java POST_BUILD
       COMMAND ${PEXPORTS_PATH}/pexports.exe ${CMAKE_BINARY_DIR}/SG_java.dll > ${CMAKE_BINARY_DIR}/SG_java.def)
   endif(PEXPORTS_PATH)
-else()
-  target_link_libraries(SG_java simgrid)
 endif()
 
 # Rules to build simgrid.jar
index 80c70fd..353db84 100644 (file)
@@ -45,7 +45,7 @@ public class Node extends Process {
                                filename = FILENAME1;
                }
                Msg.info("Open file " + filename);
-               File file = new File(mount,filename, "rw");
+               File file = new File(mount,filename);
 
                long read = file.read(10000000,1);
                Msg.info("Having read " + read + " on " + filename);
index bcd2d94..84bded3 100644 (file)
@@ -49,13 +49,13 @@ int host(int argc, char *argv[])
 
   XBT_INFO("\tOpen file '%s'",file->name);
 
-  read = MSG_file_read(ptr,10000000,sizeof(char*),file);     // Read for 10MB
+  read = MSG_file_read(ptr,10000000,file);     // Read for 10MB
   XBT_INFO("\tHave read    %zu on %s",read,file->name);
 
-  write = MSG_file_write(ptr,100000,sizeof(char*),file);  // Write for 100KB
+  write = MSG_file_write(ptr,100000,file);  // Write for 100KB
   XBT_INFO("\tHave written %zu on %s",write,file->name);
 
-  read = MSG_file_read(ptr,110000,sizeof(char*),file);     // Read for 110KB
+  read = MSG_file_read(ptr,110000,file);     // Read for 110KB
   XBT_INFO("\tHave read    %zu on %s (of size %zu)",read,file->name,
       MSG_file_get_size(file));
 
index 60dcadc..30c7c17 100644 (file)
@@ -47,7 +47,7 @@ int host(int argc, char *argv[])
   file = MSG_file_open(mount,FILENAME1);
 
   // Write into the new file
-  write = MSG_file_write(ptr,100000,sizeof(char*),file);  // Write for 100Ko
+  write = MSG_file_write(ptr,100000,file);  // Write for 100Ko
   XBT_INFO("\tHave written %zu on %s",write,file->name);
 
   // Close the file
diff --git a/examples/msg/semaphores/CMakeLists.txt b/examples/msg/semaphores/CMakeLists.txt
new file mode 100644 (file)
index 0000000..d8b48dd
--- /dev/null
@@ -0,0 +1,30 @@
+cmake_minimum_required(VERSION 2.6)
+
+set(EXECUTABLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}")
+
+add_executable(synchro synchro.c)
+
+### Add definitions for compile
+target_link_libraries(synchro simgrid)
+
+set(tesh_files
+  ${tesh_files}
+  PARENT_SCOPE
+  )
+set(xml_files
+  ${xml_files}
+  PARENT_SCOPE
+  )
+set(examples_src
+  ${examples_src}
+  ${CMAKE_CURRENT_SOURCE_DIR}/synchro.c
+  PARENT_SCOPE
+  )
+set(bin_files
+  ${bin_files}
+  PARENT_SCOPE
+  )
+set(txt_files
+  ${txt_files}
+  PARENT_SCOPE
+  )
index b63a759..b66b1d3 100644 (file)
@@ -8,7 +8,7 @@ XBT_LOG_NEW_DEFAULT_CATEGORY(msg_semaphore_example,
 
 msg_sem_t sem;
 
-int peer(int argc, char* argv[]){
+static int peer(int argc, char* argv[]){
 
   int i = 0; 
   
@@ -27,6 +27,8 @@ int peer(int argc, char* argv[]){
   }
   MSG_process_sleep(50);
   XBT_INFO("Done");
+
+  return 0;
 }
 
 int main(int argc, char* argv[]) {
@@ -70,5 +72,5 @@ int main(int argc, char* argv[]) {
 
   msg_error_t res = MSG_main();
   printf("Finished\n");
-  return 0;
+  return (res != MSG_OK);
 }
index 58d9040..669d3e3 100644 (file)
@@ -62,11 +62,14 @@ set(txt_files
   ${CMAKE_CURRENT_SOURCE_DIR}/replay/actions0.txt
   ${CMAKE_CURRENT_SOURCE_DIR}/replay/actions1.txt
   ${CMAKE_CURRENT_SOURCE_DIR}/replay/actions_allReduce.txt
+  ${CMAKE_CURRENT_SOURCE_DIR}/replay/actions_allgatherv.txt
   ${CMAKE_CURRENT_SOURCE_DIR}/replay/actions_alltoall.txt
   ${CMAKE_CURRENT_SOURCE_DIR}/replay/actions_alltoallv.txt
   ${CMAKE_CURRENT_SOURCE_DIR}/replay/actions_barrier.txt
   ${CMAKE_CURRENT_SOURCE_DIR}/replay/actions_bcast.txt
   ${CMAKE_CURRENT_SOURCE_DIR}/replay/actions_bcast_reduce_datatypes.txt
+  ${CMAKE_CURRENT_SOURCE_DIR}/replay/actions_gather.txt
+  ${CMAKE_CURRENT_SOURCE_DIR}/replay/actions_reducescatter.txt
   ${CMAKE_CURRENT_SOURCE_DIR}/replay/actions_waitall.txt
   ${CMAKE_CURRENT_SOURCE_DIR}/replay/actions_with_isend.txt
   ${CMAKE_CURRENT_SOURCE_DIR}/replay/split_traces
index ccf0783..920a80c 100644 (file)
@@ -78,10 +78,8 @@ XBT_PUBLIC(const char *) MSG_environment_as_get_model(msg_as_t as);
 XBT_PUBLIC(xbt_dynar_t) MSG_environment_as_get_hosts(msg_as_t as);
 
 /************************** File handling ***********************************/
-XBT_PUBLIC(size_t) MSG_file_read(void* ptr, size_t size, size_t nmemb,
-                                 msg_file_t fd);
-XBT_PUBLIC(size_t) MSG_file_write(const void* ptr, size_t size, size_t nmemb,
-                                  msg_file_t fd);
+XBT_PUBLIC(size_t) MSG_file_read(void* ptr, size_t size, msg_file_t fd);
+XBT_PUBLIC(size_t) MSG_file_write(const void* ptr, size_t size, msg_file_t fd);
 XBT_PUBLIC(msg_file_t) MSG_file_open(const char* mount, const char* path);
 XBT_PUBLIC(int) MSG_file_close(msg_file_t fd);
 XBT_PUBLIC(size_t) MSG_file_get_size(msg_file_t fd);
index d6252f0..b3305ca 100644 (file)
@@ -460,10 +460,9 @@ XBT_PUBLIC(void) simcall_sem_acquire_timeout(smx_sem_t sem,
                                              double max_duration);
 XBT_PUBLIC(int) simcall_sem_get_capacity(smx_sem_t sem);
 
-XBT_PUBLIC(double) simcall_file_read(void* ptr, size_t size, size_t nmemb,
-                                     smx_file_t fd);
+XBT_PUBLIC(size_t) simcall_file_read(void* ptr, size_t size, smx_file_t fd);
 XBT_PUBLIC(size_t) simcall_file_write(const void* ptr, size_t size,
-                                      size_t nmemb, smx_file_t fd);
+                                      smx_file_t fd);
 XBT_PUBLIC(smx_file_t) simcall_file_open(const char* storage, const char* path);
 XBT_PUBLIC(int) simcall_file_close(smx_file_t fd);
 XBT_PUBLIC(int) simcall_file_unlink(smx_file_t fd);
index 63f4992..d1e57c2 100644 (file)
@@ -36,18 +36,18 @@ Java_org_simgrid_msg_File_open(JNIEnv *env, jobject jfile, jobject jstorage, job
   (*env)->ReleaseStringUTFChars(env, jpath, path);
 }
 JNIEXPORT jlong JNICALL
-Java_org_simgrid_msg_File_read(JNIEnv *env, jobject jfile, jlong jsize, jlong jnmemb) {
+Java_org_simgrid_msg_File_read(JNIEnv *env, jobject jfile, jlong jsize) {
   msg_file_t file = jfile_get_native(env, jfile);
   size_t n;
-  n = MSG_file_read(NULL,(size_t)jsize, (size_t)jnmemb, file);
+  n = MSG_file_read(NULL,(size_t)jsize, file);
   return (jlong)n;
 }
 
 JNIEXPORT jlong JNICALL
-Java_org_simgrid_msg_File_write(JNIEnv *env, jobject jfile, jlong jsize, jlong jnmemb) {
+Java_org_simgrid_msg_File_write(JNIEnv *env, jobject jfile, jlong jsize) {
   msg_file_t file = jfile_get_native(env, jfile);
   size_t n;
-  n = MSG_file_write(NULL, (size_t)jsize, (size_t)jnmemb, file);
+  n = MSG_file_write(NULL, (size_t)jsize, file);
   return (jlong)n;
 }
 JNIEXPORT void JNICALL
index dfa3690..182bef1 100644 (file)
@@ -33,13 +33,13 @@ Java_org_simgrid_msg_File_open(JNIEnv*, jobject, jobject, jobject);
  * Method                      read
  */
 JNIEXPORT jlong JNICALL
-Java_org_simgrid_msg_File_read(JNIEnv*, jobject, jlong, jlong);
+Java_org_simgrid_msg_File_read(JNIEnv*, jobject, jlong);
 /**
  * Class                       org_simgrid_msg_File
  * Method                      write
  */
 JNIEXPORT jlong JNICALL
-Java_org_simgrid_msg_File_write(JNIEnv*, jobject, jlong, jlong);
+Java_org_simgrid_msg_File_write(JNIEnv*, jobject, jlong);
 /**
  * Class                       org_simgrid_msg_File
  * Method                      close
index 11605f9..82a4753 100644 (file)
@@ -110,7 +110,7 @@ public class Host {
      * If there is a need to receive some messages asynchronously, and some not, 
      * two different mailboxes should be used.
      *
-     * @param alias The name of the mailbox 
+     * @param mailboxName The name of the mailbox
      */
     public static native void setAsyncMailbox(String mailboxName);
 
index 9c11e29..77736f9 100644 (file)
@@ -22,6 +22,15 @@ typedef struct mpi_coll_description {
   void *coll;
 } s_mpi_coll_description_t, *mpi_coll_description_t;
 
+
+/** \ingroup MPI gather
+ *  \brief The list of all available allgather collectives
+ */
+XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_gather_description[];
+XBT_PUBLIC_DATA(int (*mpi_coll_gather_fun)
+                (void *, int, MPI_Datatype, void *, int, MPI_Datatype,
+                 int, MPI_Comm));
+                 
 /** \ingroup MPI allgather
  *  \brief The list of all available allgather collectives
  */
index ba5ec83..ae2e61d 100644 (file)
@@ -227,10 +227,10 @@ typedef struct surf_network_model_extension_public {
 typedef struct surf_storage_model_extension_public {
   surf_action_t(*open) (void *storage, const char* mount, const char* path);
   surf_action_t(*close) (void *storage, surf_file_t fd);
-  surf_action_t(*read) (void *storage, void* ptr, double size, size_t nmemb,
+  surf_action_t(*read) (void *storage, void* ptr, size_t size,
                         surf_file_t fd);
   surf_action_t(*write) (void *storage, const void* ptr, size_t size,
-                         size_t nmemb, surf_file_t fd);
+                         surf_file_t fd);
   surf_action_t(*stat) (void *storage, surf_file_t fd);
   surf_action_t(*ls) (void *storage, const char *path);
 } s_surf_model_extension_storage_t;
@@ -263,10 +263,10 @@ typedef struct surf_workstation_model_extension_public {
   surf_action_t(*open) (void *workstation, const char* storage,
                         const char* path);
   surf_action_t(*close) (void *workstation, surf_file_t fd);
-  surf_action_t(*read) (void *workstation, void* ptr, size_t size, size_t nmemb,
+  surf_action_t(*read) (void *workstation, void* ptr, size_t size,
                         surf_file_t fd);
   surf_action_t(*write) (void *workstation, const void* ptr, size_t size,
-                         size_t nmemb, surf_file_t fd);
+                         surf_file_t fd);
   surf_action_t(*stat) (void *workstation, surf_file_t fd);
   int(*unlink) (void *workstation, surf_file_t fd);
   surf_action_t(*ls) (void *workstation, const char* mount, const char *path);
index b15cdb5..798e7e2 100644 (file)
@@ -23,28 +23,26 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(msg_io, msg,
  * \brief Read elements of a file
  *
  * \param ptr buffer to where the data is copied
- * \param size of each element
- * \param nmemb is the number of elements of data to read
+ * \param size of the file to read
  * \param fd is a the file descriptor
  * \return the number of items successfully read
  */
-size_t MSG_file_read(void* ptr, size_t size, size_t nmemb, msg_file_t fd)
+size_t MSG_file_read(void* ptr, size_t size, msg_file_t fd)
 {
-  return simcall_file_read(ptr, size, nmemb, fd->simdata->smx_file);
+  return simcall_file_read(ptr, size, fd->simdata->smx_file);
 }
 
 /** \ingroup msg_file_management
  * \brief Write elements into a file
  *
  * \param ptr buffer from where the data is copied
- * \param size of each element
- * \param nmemb is the number of elements of data to write
+ * \param size of the file to write
  * \param fd is a the file descriptor
  * \return the number of items successfully write
  */
-size_t MSG_file_write(const void* ptr, size_t size, size_t nmemb, msg_file_t fd)
+size_t MSG_file_write(const void* ptr, size_t size, msg_file_t fd)
 {
-  return simcall_file_write(ptr, size, nmemb, fd->simdata->smx_file);
+  return simcall_file_write(ptr, size, fd->simdata->smx_file);
 }
 
 /** \ingroup msg_file_management
index f0ea976..797a0ae 100644 (file)
@@ -247,6 +247,9 @@ static void _sg_cfg_cb__coll(const char *category,
   /* New Module missing */
   find_coll_description(table, val);
 }
+static void _sg_cfg_cb__coll_gather(const char *name, int pos){
+  _sg_cfg_cb__coll("gather", mpi_coll_gather_description, name, pos);
+}
 static void _sg_cfg_cb__coll_allgather(const char *name, int pos){
   _sg_cfg_cb__coll("allgather", mpi_coll_allgather_description, name, pos);
 }
@@ -758,6 +761,12 @@ void sg_config_init(int *argc, char **argv)
                     "Which collective selector to use",
                     xbt_cfgelm_string, &default_value, 1, 1, NULL,
                     NULL);
+                    
+               xbt_cfg_register(&_sg_cfg_set, "smpi/gather",
+                    "Which collective to use for gather",
+                    xbt_cfgelm_string, NULL, 1, 1, &_sg_cfg_cb__coll_gather,
+                    NULL);
+                    
     xbt_cfg_register(&_sg_cfg_set, "smpi/allgather",
                     "Which collective to use for allgather",
                     xbt_cfgelm_string, NULL, 1, 1, &_sg_cfg_cb__coll_allgather,
index 1b00eed..9f4f6dc 100644 (file)
@@ -17,15 +17,15 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_io, simix,
 
 //SIMIX FILE READ
 void SIMIX_pre_file_read(smx_simcall_t simcall, void *ptr, size_t size,
-                        size_t nmemb, smx_file_t fd)
+                       smx_file_t fd)
 {
-  smx_action_t action = SIMIX_file_read(simcall->issuer, ptr, size, nmemb, fd);
+  smx_action_t action = SIMIX_file_read(simcall->issuer, ptr, size, fd);
   xbt_fifo_push(action->simcalls, simcall);
   simcall->issuer->waiting_action = action;
 }
 
 smx_action_t SIMIX_file_read(smx_process_t process, void* ptr, size_t size,
-                             size_t nmemb, smx_file_t fd)
+                             smx_file_t fd)
 {
   smx_action_t action;
   smx_host_t host = process->smx_host;
@@ -46,7 +46,7 @@ smx_action_t SIMIX_file_read(smx_process_t process, void* ptr, size_t size,
 
   action->io.host = host;
   action->io.surf_io =
-      surf_workstation_model->extension.workstation.read(host, ptr, size, nmemb,
+      surf_workstation_model->extension.workstation.read(host, ptr, size,
                                                          fd->surf_file);
 
   surf_workstation_model->action_data_set(action->io.surf_io, action);
@@ -57,15 +57,15 @@ smx_action_t SIMIX_file_read(smx_process_t process, void* ptr, size_t size,
 
 //SIMIX FILE WRITE
 void SIMIX_pre_file_write(smx_simcall_t simcall, const void *ptr, size_t size,
-                         size_t nmemb, smx_file_t fd)
+                         smx_file_t fd)
 {
-  smx_action_t action = SIMIX_file_write(simcall->issuer, ptr, size, nmemb, fd);
+  smx_action_t action = SIMIX_file_write(simcall->issuer, ptr, size, fd);
   xbt_fifo_push(action->simcalls, simcall);
   simcall->issuer->waiting_action = action;
 }
 
 smx_action_t SIMIX_file_write(smx_process_t process, const void* ptr,
-                              size_t size, size_t nmemb, smx_file_t fd)
+                              size_t size, smx_file_t fd)
 {
   smx_action_t action;
   smx_host_t host = process->smx_host;
@@ -87,7 +87,7 @@ smx_action_t SIMIX_file_write(smx_process_t process, const void* ptr,
   action->io.host = host;
   action->io.surf_io =
       surf_workstation_model->extension.workstation.write(host, ptr, size,
-                                                          nmemb, fd->surf_file);
+                                                          fd->surf_file);
 
   surf_workstation_model->action_data_set(action->io.surf_io, action);
   XBT_DEBUG("Create io action %p", action);
index 0fd7e61..c4f1d65 100644 (file)
@@ -11,9 +11,9 @@
 #include "smx_smurf_private.h"
 
 void SIMIX_pre_file_read(smx_simcall_t simcall, void *ptr, size_t size,
-                        size_t nmemb, smx_file_t fd);
+                        smx_file_t fd);
 void SIMIX_pre_file_write(smx_simcall_t simcall, const void *ptr, size_t size,
-                         size_t nmemb, smx_file_t fd);
+                         smx_file_t fd);
 void SIMIX_pre_file_open(smx_simcall_t simcall, const char* mount,
                         const char* path);
 void SIMIX_pre_file_close(smx_simcall_t simcall, smx_file_t fd);
@@ -23,9 +23,9 @@ void SIMIX_pre_file_ls(smx_simcall_t simcall,
 size_t SIMIX_pre_file_get_size(smx_simcall_t simcall, smx_file_t fd);
 
 smx_action_t SIMIX_file_read(smx_process_t process, void* ptr, size_t size,
-                             size_t nmemb, smx_file_t fd);
+                             smx_file_t fd);
 smx_action_t SIMIX_file_write(smx_process_t process, const void* ptr,
-                              size_t size, size_t nmemb, smx_file_t fd);
+                              size_t size, smx_file_t fd);
 smx_action_t SIMIX_file_open(smx_process_t process, const char* storage,
                              const char* path);
 smx_action_t SIMIX_file_close(smx_process_t process, smx_file_t fd);
index 29144e0..5fccaac 100644 (file)
@@ -339,8 +339,8 @@ ACTION(SIMCALL_SEM_WOULD_BLOCK, sem_would_block, WITH_ANSWER, TINT(result), TSPE
 ACTION(SIMCALL_SEM_ACQUIRE, sem_acquire, WITHOUT_ANSWER, TVOID(result), TSPEC(sem, smx_sem_t)) sep \
 ACTION(SIMCALL_SEM_ACQUIRE_TIMEOUT, sem_acquire_timeout, WITHOUT_ANSWER, TVOID(result), TSPEC(sem, smx_sem_t), TDOUBLE(timeout)) sep \
 ACTION(SIMCALL_SEM_GET_CAPACITY, sem_get_capacity, WITH_ANSWER, TINT(result), TSPEC(sem, smx_sem_t)) sep \
-ACTION(SIMCALL_FILE_READ, file_read, WITHOUT_ANSWER, TDOUBLE(result), TPTR(ptr), TSIZE(size), TSIZE(nmemb), TSPEC(fd, smx_file_t)) sep \
-ACTION(SIMCALL_FILE_WRITE, file_write, WITHOUT_ANSWER, TSIZE(result), TCPTR(ptr), TSIZE(size), TSIZE(nmemb), TSPEC(fd, smx_file_t)) sep \
+ACTION(SIMCALL_FILE_READ, file_read, WITHOUT_ANSWER, TSIZE(result), TPTR(ptr), TSIZE(size), TSPEC(fd, smx_file_t)) sep \
+ACTION(SIMCALL_FILE_WRITE, file_write, WITHOUT_ANSWER, TSIZE(result), TCPTR(ptr), TSIZE(size), TSPEC(fd, smx_file_t)) sep \
 ACTION(SIMCALL_FILE_OPEN, file_open, WITHOUT_ANSWER, TSPEC(result, smx_file_t), TSTRING(mount), TSTRING(path)) sep \
 ACTION(SIMCALL_FILE_CLOSE, file_close, WITHOUT_ANSWER, TINT(result), TSPEC(fd, smx_file_t)) sep \
 ACTION(SIMCALL_FILE_UNLINK, file_unlink, WITH_ANSWER, TINT(result), TSPEC(fd, smx_file_t)) sep \
index d7580c3..544f97b 100644 (file)
@@ -1091,19 +1091,19 @@ int simcall_sem_get_capacity(smx_sem_t sem)
  * \ingroup simix_file_management
  *
  */
-double simcall_file_read(void* ptr, size_t size, size_t nmemb, smx_file_t fd)
+size_t simcall_file_read(void* ptr, size_t size, smx_file_t fd)
 {
-  return simcall_BODY_file_read(ptr, size, nmemb, fd);
+  return simcall_BODY_file_read(ptr, size, fd);
 }
 
 /**
  * \ingroup simix_file_management
  *
  */
-size_t simcall_file_write(const void* ptr, size_t size, size_t nmemb,
+size_t simcall_file_write(const void* ptr, size_t size,
                           smx_file_t fd)
 {
-  return simcall_BODY_file_write(ptr, size, nmemb, fd);
+  return simcall_BODY_file_write(ptr, size, fd);
 }
 
 /**
diff --git a/src/smpi/colls/allgather-ompi-neighborexchange.c b/src/smpi/colls/allgather-ompi-neighborexchange.c
new file mode 100644 (file)
index 0000000..238163e
--- /dev/null
@@ -0,0 +1,175 @@
+/*
+ * ompi_coll_tuned_allgather_intra_neighborexchange
+ *
+ * Function:     allgather using N/2 steps (O(N))
+ * Accepts:      Same arguments as MPI_Allgather
+ * Returns:      MPI_SUCCESS or error code
+ *
+ * Description:  Neighbor Exchange algorithm for allgather.
+ *               Described by Chen et.al. in 
+ *               "Performance Evaluation of Allgather Algorithms on 
+ *                Terascale Linux Cluster with Fast Ethernet",
+ *               Proceedings of the Eighth International Conference on 
+ *               High-Performance Computing inn Asia-Pacific Region
+ *               (HPCASIA'05), 2005
+ * 
+ *               Rank r exchanges message with one of its neighbors and
+ *               forwards the data further in the next step.
+ *
+ *               No additional memory requirements.
+ * 
+ * Limitations:  Algorithm works only on even number of processes.
+ *               For odd number of processes we switch to ring algorithm.
+ * 
+ * Example on 6 nodes:
+ *  Initial state
+ *    #     0      1      2      3      4      5
+ *         [0]    [ ]    [ ]    [ ]    [ ]    [ ]
+ *         [ ]    [1]    [ ]    [ ]    [ ]    [ ]
+ *         [ ]    [ ]    [2]    [ ]    [ ]    [ ]
+ *         [ ]    [ ]    [ ]    [3]    [ ]    [ ]
+ *         [ ]    [ ]    [ ]    [ ]    [4]    [ ]
+ *         [ ]    [ ]    [ ]    [ ]    [ ]    [5]
+ *   Step 0:
+ *    #     0      1      2      3      4      5
+ *         [0]    [0]    [ ]    [ ]    [ ]    [ ]
+ *         [1]    [1]    [ ]    [ ]    [ ]    [ ]
+ *         [ ]    [ ]    [2]    [2]    [ ]    [ ]
+ *         [ ]    [ ]    [3]    [3]    [ ]    [ ]
+ *         [ ]    [ ]    [ ]    [ ]    [4]    [4]
+ *         [ ]    [ ]    [ ]    [ ]    [5]    [5]
+ *   Step 1:
+ *    #     0      1      2      3      4      5
+ *         [0]    [0]    [0]    [ ]    [ ]    [0]
+ *         [1]    [1]    [1]    [ ]    [ ]    [1]
+ *         [ ]    [2]    [2]    [2]    [2]    [ ]
+ *         [ ]    [3]    [3]    [3]    [3]    [ ]
+ *         [4]    [ ]    [ ]    [4]    [4]    [4]
+ *         [5]    [ ]    [ ]    [5]    [5]    [5]
+ *   Step 2:
+ *    #     0      1      2      3      4      5
+ *         [0]    [0]    [0]    [0]    [0]    [0]
+ *         [1]    [1]    [1]    [1]    [1]    [1]
+ *         [2]    [2]    [2]    [2]    [2]    [2]
+ *         [3]    [3]    [3]    [3]    [3]    [3]
+ *         [4]    [4]    [4]    [4]    [4]    [4]
+ *         [5]    [5]    [5]    [5]    [5]    [5]
+ */
+ #include "colls_private.h"
+ #define MCA_COLL_BASE_TAG_ALLGATHER 555
+int 
+smpi_coll_tuned_allgather_ompi_neighborexchange(void *sbuf, int scount,
+                                                 MPI_Datatype sdtype,
+                                                 void* rbuf, int rcount,
+                                                 MPI_Datatype rdtype,
+                                                 MPI_Comm comm
+)
+{
+   int line = -1;
+   int rank, size;
+   int neighbor[2], offset_at_step[2], recv_data_from[2], send_data_from;
+   int i, even_rank;
+   int err = 0;
+   ptrdiff_t slb, rlb, sext, rext;
+   char *tmpsend = NULL, *tmprecv = NULL;
+
+   size = smpi_comm_size(comm);
+   rank = smpi_comm_rank(comm);
+
+   if (size % 2) {
+      XBT_DEBUG(
+                   "coll:tuned:allgather_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm", 
+                   size);
+      return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype,
+                                                  rbuf, rcount, rdtype,
+                                                  comm);
+   }
+
+   XBT_DEBUG(
+                "coll:tuned:allgather_intra_neighborexchange rank %d", rank);
+
+   err = smpi_datatype_extent (sdtype, &slb, &sext);
+   if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+   err = smpi_datatype_extent (rdtype, &rlb, &rext);
+   if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+   /* Initialization step:
+      - if send buffer is not MPI_IN_PLACE, copy send buffer to appropriate block
+        of receive buffer
+   */
+   tmprecv = (char*) rbuf + rank * rcount * rext;
+   if (MPI_IN_PLACE != sbuf) {
+      tmpsend = (char*) sbuf;
+      smpi_datatype_copy (tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
+   } 
+
+   /* Determine neighbors, order in which blocks will arrive, etc. */
+   even_rank = !(rank % 2);
+   if (even_rank) {
+      neighbor[0] = (rank + 1) % size;
+      neighbor[1] = (rank - 1 + size) % size;
+      recv_data_from[0] = rank;
+      recv_data_from[1] = rank;
+      offset_at_step[0] = (+2);
+      offset_at_step[1] = (-2);
+   } else {
+      neighbor[0] = (rank - 1 + size) % size;
+      neighbor[1] = (rank + 1) % size;
+      recv_data_from[0] = neighbor[0];
+      recv_data_from[1] = neighbor[0];
+      offset_at_step[0] = (-2);
+      offset_at_step[1] = (+2);
+   }
+
+   /* Communication loop:
+      - First step is special: exchange a single block with neighbor[0].
+      - Rest of the steps: 
+        update recv_data_from according to offset, and 
+        exchange two blocks with appropriate neighbor.
+        the send location becomes previous receve location.
+   */
+   tmprecv = (char*)rbuf + neighbor[0] * rcount * rext;
+   tmpsend = (char*)rbuf + rank * rcount * rext;
+   /* Sendreceive */
+   smpi_mpi_sendrecv(tmpsend, rcount, rdtype, neighbor[0],
+                                  MCA_COLL_BASE_TAG_ALLGATHER,
+                                  tmprecv, rcount, rdtype, neighbor[0],
+                                  MCA_COLL_BASE_TAG_ALLGATHER,
+                                  comm, MPI_STATUS_IGNORE);
+
+   /* Determine initial sending location */
+   if (even_rank) {
+      send_data_from = rank;
+   } else {
+      send_data_from = recv_data_from[0];
+   }
+
+   for (i = 1; i < (size / 2); i++) {
+      const int i_parity = i % 2;
+      recv_data_from[i_parity] = 
+         (recv_data_from[i_parity] + offset_at_step[i_parity] + size) % size;
+
+      tmprecv = (char*)rbuf + recv_data_from[i_parity] * rcount * rext;
+      tmpsend = (char*)rbuf + send_data_from * rcount * rext;
+      
+      /* Sendreceive */
+      smpi_mpi_sendrecv(tmpsend, 2 * rcount, rdtype, 
+                                     neighbor[i_parity], 
+                                     MCA_COLL_BASE_TAG_ALLGATHER,
+                                     tmprecv, 2 * rcount, rdtype,
+                                     neighbor[i_parity],
+                                     MCA_COLL_BASE_TAG_ALLGATHER,
+                                     comm, MPI_STATUS_IGNORE);
+
+      send_data_from = recv_data_from[i_parity];
+   }
+
+   return MPI_SUCCESS;
+
+ err_hndl:
+   XBT_DEBUG( "%s:%4d\tError occurred %d, rank %2d",
+                __FILE__, line, err, rank);
+   return err;
+}
diff --git a/src/smpi/colls/allgatherv-ompi-neighborexchange.c b/src/smpi/colls/allgatherv-ompi-neighborexchange.c
new file mode 100644 (file)
index 0000000..f80e681
--- /dev/null
@@ -0,0 +1,214 @@
+
+/*
+ * ompi_coll_tuned_allgatherv_intra_neighborexchange
+ *
+ * Function:     allgatherv using N/2 steps (O(N))
+ * Accepts:      Same arguments as MPI_Allgatherv
+ * Returns:      MPI_SUCCESS or error code
+ *
+ * Description:  Neighbor Exchange algorithm for allgather adapted for 
+ *               allgatherv.
+ *               Described by Chen et.al. in 
+ *               "Performance Evaluation of Allgather Algorithms on 
+ *                Terascale Linux Cluster with Fast Ethernet",
+ *               Proceedings of the Eighth International Conference on 
+ *               High-Performance Computing inn Asia-Pacific Region
+ *               (HPCASIA'05), 2005
+ * 
+ *               Rank r exchanges message with one of its neighbors and
+ *               forwards the data further in the next step.
+ *
+ *               No additional memory requirements.
+ * 
+ * Limitations:  Algorithm works only on even number of processes.
+ *               For odd number of processes we switch to ring algorithm.
+ * 
+ * Example on 6 nodes:
+ *  Initial state
+ *    #     0      1      2      3      4      5
+ *         [0]    [ ]    [ ]    [ ]    [ ]    [ ]
+ *         [ ]    [1]    [ ]    [ ]    [ ]    [ ]
+ *         [ ]    [ ]    [2]    [ ]    [ ]    [ ]
+ *         [ ]    [ ]    [ ]    [3]    [ ]    [ ]
+ *         [ ]    [ ]    [ ]    [ ]    [4]    [ ]
+ *         [ ]    [ ]    [ ]    [ ]    [ ]    [5]
+ *   Step 0:
+ *    #     0      1      2      3      4      5
+ *         [0]    [0]    [ ]    [ ]    [ ]    [ ]
+ *         [1]    [1]    [ ]    [ ]    [ ]    [ ]
+ *         [ ]    [ ]    [2]    [2]    [ ]    [ ]
+ *         [ ]    [ ]    [3]    [3]    [ ]    [ ]
+ *         [ ]    [ ]    [ ]    [ ]    [4]    [4]
+ *         [ ]    [ ]    [ ]    [ ]    [5]    [5]
+ *   Step 1:
+ *    #     0      1      2      3      4      5
+ *         [0]    [0]    [0]    [ ]    [ ]    [0]
+ *         [1]    [1]    [1]    [ ]    [ ]    [1]
+ *         [ ]    [2]    [2]    [2]    [2]    [ ]
+ *         [ ]    [3]    [3]    [3]    [3]    [ ]
+ *         [4]    [ ]    [ ]    [4]    [4]    [4]
+ *         [5]    [ ]    [ ]    [5]    [5]    [5]
+ *   Step 2:
+ *    #     0      1      2      3      4      5
+ *         [0]    [0]    [0]    [0]    [0]    [0]
+ *         [1]    [1]    [1]    [1]    [1]    [1]
+ *         [2]    [2]    [2]    [2]    [2]    [2]
+ *         [3]    [3]    [3]    [3]    [3]    [3]
+ *         [4]    [4]    [4]    [4]    [4]    [4]
+ *         [5]    [5]    [5]    [5]    [5]    [5]
+ */
+ #include "colls_private.h"
+ #define  MCA_COLL_BASE_TAG_ALLGATHERV 444
+int 
+smpi_coll_tuned_allgatherv_ompi_neighborexchange(void *sbuf, int scount,
+                                                  MPI_Datatype sdtype,
+                                                  void* rbuf, int *rcounts, int *rdispls,
+                                                  MPI_Datatype rdtype,
+                                                  MPI_Comm comm)
+{
+    int line = -1;
+    int rank, size;
+    int neighbor[2], offset_at_step[2], recv_data_from[2], send_data_from;
+  
+    int i, even_rank;
+    int err = 0;
+    ptrdiff_t slb, rlb, sext, rext;
+    char *tmpsend = NULL, *tmprecv = NULL;
+
+
+    size = smpi_comm_size(comm);
+    rank = smpi_comm_rank(comm);
+
+    if (size % 2) {
+        XBT_DEBUG(
+                     "coll:tuned:allgatherv_ompi_neighborexchange WARNING: odd size %d, switching to ring algorithm", 
+                     size);
+        return smpi_coll_tuned_allgatherv_ring(sbuf, scount, sdtype,
+                                                     rbuf, rcounts, 
+                                                     rdispls, rdtype,
+                                                     comm);
+    }
+
+    XBT_DEBUG(
+                 "coll:tuned:allgatherv_ompi_neighborexchange rank %d", rank);
+
+    err = smpi_datatype_extent (sdtype, &slb, &sext);
+    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+    err = smpi_datatype_extent (rdtype, &rlb, &rext);
+    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+    /* Initialization step:
+       - if send buffer is not MPI_IN_PLACE, copy send buffer to 
+       the appropriate block of receive buffer
+    */
+    tmprecv = (char*) rbuf + rdispls[rank] * rext;
+    if (MPI_IN_PLACE != sbuf) {
+        tmpsend = (char*) sbuf;
+        err = smpi_datatype_copy(tmpsend, scount, sdtype, 
+                              tmprecv, rcounts[rank], rdtype);
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
+    } 
+
+    /* Determine neighbors, order in which blocks will arrive, etc. */
+    even_rank = !(rank % 2);
+    if (even_rank) {
+        neighbor[0] = (rank + 1) % size;
+        neighbor[1] = (rank - 1 + size) % size;
+        recv_data_from[0] = rank;
+        recv_data_from[1] = rank;
+        offset_at_step[0] = (+2);
+        offset_at_step[1] = (-2);
+    } else {
+        neighbor[0] = (rank - 1 + size) % size;
+        neighbor[1] = (rank + 1) % size;
+        recv_data_from[0] = neighbor[0];
+        recv_data_from[1] = neighbor[0];
+        offset_at_step[0] = (-2);
+        offset_at_step[1] = (+2);
+    }
+
+    /* Communication loop:
+       - First step is special: exchange a single block with neighbor[0].
+       - Rest of the steps: 
+       update recv_data_from according to offset, and 
+       exchange two blocks with appropriate neighbor.
+       the send location becomes previous receve location.
+       Note, we need to create indexed datatype to send and receive these
+       blocks properly.
+    */
+    tmprecv = (char*)rbuf + rdispls[neighbor[0]] * rext;
+    tmpsend = (char*)rbuf + rdispls[rank] * rext;
+    smpi_mpi_sendrecv(tmpsend, rcounts[rank], rdtype, 
+                                   neighbor[0], MCA_COLL_BASE_TAG_ALLGATHERV,
+                                   tmprecv, rcounts[neighbor[0]], rdtype, 
+                                   neighbor[0], MCA_COLL_BASE_TAG_ALLGATHERV,
+                                   comm, MPI_STATUS_IGNORE);
+
+
+
+  
+   
+    /* Determine initial sending counts and displacements*/
+    if (even_rank) {
+        send_data_from = rank;
+    } else {
+        send_data_from = recv_data_from[0];
+    }
+
+    for (i = 1; i < (size / 2); i++) {
+        MPI_Datatype new_rdtype, new_sdtype;
+        int new_scounts[2], new_sdispls[2], new_rcounts[2], new_rdispls[2];
+        const int i_parity = i % 2;
+        recv_data_from[i_parity] = 
+            (recv_data_from[i_parity] + offset_at_step[i_parity] + size) % size;
+
+        /* Create new indexed types for sending and receiving.
+           We are sending data from ranks (send_data_from) and (send_data_from+1)
+           We are receiving data from ranks (recv_data_from[i_parity]) and
+           (recv_data_from[i_parity]+1).
+        */
+        
+        new_scounts[0] = rcounts[send_data_from];
+        new_scounts[1] = rcounts[(send_data_from + 1)];
+        new_sdispls[0] = rdispls[send_data_from];
+        new_sdispls[1] = rdispls[(send_data_from + 1)];
+        err = smpi_datatype_indexed(2, new_scounts, new_sdispls, rdtype, 
+                                      &new_sdtype);
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+        smpi_datatype_commit(&new_sdtype);
+
+        new_rcounts[0] = rcounts[recv_data_from[i_parity]];
+        new_rcounts[1] = rcounts[(recv_data_from[i_parity] + 1)];
+        new_rdispls[0] = rdispls[recv_data_from[i_parity]];
+        new_rdispls[1] = rdispls[(recv_data_from[i_parity] + 1)];
+        err = smpi_datatype_indexed(2, new_rcounts, new_rdispls, rdtype, 
+                                      &new_rdtype);
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+        smpi_datatype_commit(&new_rdtype);
+      
+        tmprecv = (char*)rbuf;
+        tmpsend = (char*)rbuf;
+      
+        /* Sendreceive */
+        smpi_mpi_sendrecv(tmpsend, 1, new_sdtype, neighbor[i_parity],
+                                       MCA_COLL_BASE_TAG_ALLGATHERV,
+                                       tmprecv, 1, new_rdtype, neighbor[i_parity],
+                                       MCA_COLL_BASE_TAG_ALLGATHERV,
+                                       comm, MPI_STATUS_IGNORE);
+
+        send_data_from = recv_data_from[i_parity];
+      
+        smpi_datatype_free(&new_sdtype);
+        smpi_datatype_free(&new_rdtype);
+    }
+
+    return MPI_SUCCESS;
+
+ err_hndl:
+    XBT_DEBUG(  "%s:%4d\tError occurred %d, rank %2d",
+                 __FILE__, line, err, rank);
+    return err;
+}
index 9af9ad3..4495b4d 100644 (file)
 #define COLL_NOsep 
 #define COLL_NOTHING(...) 
 
+/*************
+ * GATHER *
+ *************/
+#define COLL_GATHER_SIG gather, int, \
+                         (void *send_buff, int send_count, MPI_Datatype send_type, \
+                          void *recv_buff, int recv_count, MPI_Datatype recv_type, \
+                           int root, MPI_Comm comm)
+
+#define COLL_GATHERS(action, COLL_sep) \
+COLL_APPLY(action, COLL_GATHER_SIG, ompi) COLL_sep \
+COLL_APPLY(action, COLL_GATHER_SIG, ompi_basic_linear) COLL_sep \
+COLL_APPLY(action, COLL_GATHER_SIG, ompi_binomial) COLL_sep \
+COLL_APPLY(action, COLL_GATHER_SIG, ompi_linear_sync) \
+
+
+COLL_GATHERS(COLL_PROTO, COLL_NOsep)
 
 /*************
  * ALLGATHER *
@@ -45,7 +61,8 @@ COLL_APPLY(action, COLL_ALLGATHER_SIG, ring) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHER_SIG, SMP_NTS) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHER_SIG, smp_simple) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHER_SIG, spreading_simple) COLL_sep \
-COLL_APPLY(action, COLL_ALLGATHER_SIG, ompi)
+COLL_APPLY(action, COLL_ALLGATHER_SIG, ompi) COLL_sep \
+COLL_APPLY(action, COLL_ALLGATHER_SIG, ompi_neighborexchange)
 
 COLL_ALLGATHERS(COLL_PROTO, COLL_NOsep)
 
@@ -61,7 +78,8 @@ COLL_ALLGATHERS(COLL_PROTO, COLL_NOsep)
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, GB) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, pair) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, ring) COLL_sep \
-COLL_APPLY(action, COLL_ALLGATHERV_SIG, ompi)
+COLL_APPLY(action, COLL_ALLGATHERV_SIG, ompi) COLL_sep \
+COLL_APPLY(action, COLL_ALLGATHERV_SIG, ompi_neighborexchange)
 
 COLL_ALLGATHERVS(COLL_PROTO, COLL_NOsep)
 
diff --git a/src/smpi/colls/gather-ompi.c b/src/smpi/colls/gather-ompi.c
new file mode 100644 (file)
index 0000000..22e6e63
--- /dev/null
@@ -0,0 +1,410 @@
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2009 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "colls_private.h"
+#include "coll_tuned_topo.h"
+
+#define MCA_COLL_BASE_TAG_GATHER 333
+/* Todo: gather_intra_generic, gather_intra_binary, gather_intra_chain,
+ * gather_intra_pipeline, segmentation? */
+int
+smpi_coll_tuned_gather_ompi_binomial(void *sbuf, int scount,
+                                     MPI_Datatype sdtype,
+                                     void *rbuf, int rcount,
+                                     MPI_Datatype rdtype,
+                                     int root,
+                                     MPI_Comm comm)
+{
+    int line = -1;
+    int i;
+    int rank;
+    int vrank;
+    int size;
+    int total_recv = 0;
+    char *ptmp     = NULL;
+    char *tempbuf  = NULL;
+    int err;
+    ompi_coll_tree_t* bmtree;
+    MPI_Status status;
+    MPI_Aint sextent, slb, strue_lb, strue_extent; 
+    MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent;
+
+
+    size = smpi_comm_size(comm);
+    rank = smpi_comm_rank(comm);
+
+    XBT_DEBUG(
+                "smpi_coll_tuned_gather_ompi_binomial rank %d", rank);
+
+    /* create the binomial tree */
+   // COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
+    bmtree = ompi_coll_tuned_topo_build_in_order_bmtree(comm, root);
+    // data->cached_in_order_bmtree;
+
+    smpi_datatype_extent(sdtype, &slb, &sextent);
+    smpi_datatype_extent(sdtype, &strue_lb, &strue_extent);
+
+    vrank = (rank - root + size) % size;
+
+    if (rank == root) {
+        smpi_datatype_extent(rdtype, &rlb, &rextent);
+        smpi_datatype_extent(rdtype, &rtrue_lb, &rtrue_extent);
+       if (0 == root){
+           /* root on 0, just use the recv buffer */
+           ptmp = (char *) rbuf;
+           if (sbuf != MPI_IN_PLACE) {
+               err = smpi_datatype_copy(sbuf, scount, sdtype,
+                                     ptmp, rcount, rdtype);
+               if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+           }
+       } else {
+           /* root is not on 0, allocate temp buffer for recv,
+            * rotate data at the end */
+           tempbuf = (char *) malloc(rtrue_extent + (rcount*size - 1) * rextent);
+           if (NULL == tempbuf) {
+               err= MPI_ERR_OTHER; line = __LINE__; goto err_hndl;
+           }
+
+           ptmp = tempbuf - rlb;
+           if (sbuf != MPI_IN_PLACE) {
+               /* copy from sbuf to temp buffer */
+               err = smpi_datatype_copy(sbuf, scount, sdtype,
+                                     ptmp, rcount, rdtype);
+               if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+           } else {
+               /* copy from rbuf to temp buffer  */
+               err = smpi_datatype_copy((char *) rbuf + rank*rextent*rcount, rcount, rdtype, ptmp, rcount, rdtype );
+               if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+           }
+       }
+       total_recv = rcount;
+    } else if (!(vrank % 2)) {
+       /* other non-leaf nodes, allocate temp buffer for data received from
+        * children, the most we need is half of the total data elements due
+        * to the property of binimoal tree */
+       tempbuf = (char *) malloc(strue_extent + (scount*size - 1) * sextent);
+       if (NULL == tempbuf) {
+           err= MPI_ERR_OTHER; line = __LINE__; goto err_hndl;
+       }
+
+       ptmp = tempbuf - slb;
+       /* local copy to tempbuf */
+       err = smpi_datatype_copy(sbuf, scount, sdtype,
+                                   ptmp, scount, sdtype);
+       if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+       /* use sdtype,scount as rdtype,rdcount since they are ignored on
+        * non-root procs */
+       rdtype = sdtype;
+       rcount = scount;
+       rextent = sextent;
+       total_recv = rcount;
+    } else {
+       /* leaf nodes, no temp buffer needed, use sdtype,scount as
+        * rdtype,rdcount since they are ignored on non-root procs */
+       ptmp = (char *) sbuf;
+       total_recv = scount;
+    }
+
+    if (!(vrank % 2)) {
+       /* all non-leaf nodes recv from children */
+       for (i = 0; i < bmtree->tree_nextsize; i++) {
+           int mycount = 0, vkid;
+           /* figure out how much data I have to send to this child */
+           vkid = (bmtree->tree_next[i] - root + size) % size;
+           mycount = vkid - vrank;
+           if (mycount > (size - vkid))
+               mycount = size - vkid;
+           mycount *= rcount;
+
+           XBT_DEBUG(
+                        "smpi_coll_tuned_gather_ompi_binomial rank %d recv %d mycount = %d",
+                        rank, bmtree->tree_next[i], mycount);
+
+           smpi_mpi_recv(ptmp + total_recv*rextent, rcount*size-total_recv, rdtype,
+                                   bmtree->tree_next[i], MCA_COLL_BASE_TAG_GATHER,
+                                   comm, &status);
+
+           total_recv += mycount;
+       }
+    }
+
+    if (rank != root) {
+       /* all nodes except root send to parents */
+       XBT_DEBUG(
+                    "smpi_coll_tuned_gather_ompi_binomial rank %d send %d count %d\n",
+                    rank, bmtree->tree_prev, total_recv);
+
+       smpi_mpi_send(ptmp, total_recv, sdtype,
+                               bmtree->tree_prev,
+                               MCA_COLL_BASE_TAG_GATHER,
+                                comm);
+  }
+    if (rank == root) {
+       if (root != 0) {
+           /* rotate received data on root if root != 0 */
+           err = smpi_datatype_copy(ptmp, rcount*(size - root), rdtype,
+                                                (char *) rbuf + rextent*root*rcount, rcount*(size - root), rdtype );
+           if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+
+           err = smpi_datatype_copy( ptmp + rextent*rcount*(size-root), rcount*root,rdtype, 
+                                                (char *) rbuf,rcount*root,rdtype);
+           if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+           free(tempbuf);
+       }
+    } else if (!(vrank % 2)) {
+       /* other non-leaf nodes */
+       free(tempbuf);
+    }
+    return MPI_SUCCESS;
+
+ err_hndl:
+    if (NULL != tempbuf)
+       free(tempbuf);
+
+    XBT_DEBUG(  "%s:%4d\tError occurred %d, rank %2d",
+                __FILE__, line, err, rank);
+    return err;
+}
+
+/*
+ *     gather_intra_linear_sync
+ *
+ *     Function:       - synchronized gather operation with
+ *     Accepts:        - same arguments as MPI_Gather(), first segment size
+ *     Returns:        - MPI_SUCCESS or error code
+ */
+int
+smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount,
+                                         MPI_Datatype sdtype,
+                                         void *rbuf, int rcount,
+                                         MPI_Datatype rdtype,
+                                         int root, 
+                                         MPI_Comm comm)
+{
+    int i;
+    int ret, line;
+    int rank, size;
+    int first_segment_count;
+    size_t typelng;
+    MPI_Aint extent;
+    MPI_Aint lb;
+
+    int first_segment_size=0;
+    size = smpi_comm_size(comm);
+    rank = smpi_comm_rank(comm);
+    
+    size_t dsize, block_size;
+    if (rank == root) {
+        dsize= smpi_datatype_size(rdtype);
+        block_size = dsize * rcount;
+    } else {
+        dsize=smpi_datatype_size(sdtype);
+        block_size = dsize * scount;
+    }
+    
+     if (block_size > 92160){
+     first_segment_size = 32768;
+     }else{
+     first_segment_size = 1024;
+     }
+
+    XBT_DEBUG(
+                "smpi_coll_tuned_gather_ompi_linear_sync rank %d, segment %d", rank, first_segment_size);
+
+    if (rank != root) {
+        /* Non-root processes:
+           - receive zero byte message from the root,
+           - send the first segment of the data synchronously,
+           - send the second segment of the data.
+        */
+
+        typelng= smpi_datatype_size(sdtype);
+        smpi_datatype_extent(sdtype, &lb, &extent);
+        first_segment_count = scount;
+        COLL_TUNED_COMPUTED_SEGCOUNT( (size_t) first_segment_size, typelng, 
+                                      first_segment_count );
+
+        smpi_mpi_recv(sbuf, 0, MPI_BYTE, root, 
+                                MCA_COLL_BASE_TAG_GATHER,
+                                comm, MPI_STATUS_IGNORE);
+
+        smpi_mpi_send(sbuf, first_segment_count, sdtype, root,
+                                MCA_COLL_BASE_TAG_GATHER,
+                                 comm);
+
+        smpi_mpi_send((char*)sbuf + extent * first_segment_count, 
+                                (scount - first_segment_count), sdtype, 
+                                root, MCA_COLL_BASE_TAG_GATHER,
+                                 comm);
+    }
+
+    else {
+        /* Root process, 
+           - For every non-root node:
+          - post irecv for the first segment of the message
+          - send zero byte message to signal node to send the message
+          - post irecv for the second segment of the message
+          - wait for the first segment to complete
+           - Copy local data if necessary
+           - Waitall for all the second segments to complete.
+       */
+        char *ptmp;
+        MPI_Request *reqs = NULL, first_segment_req;
+        reqs = (MPI_Request *) calloc(size, sizeof(MPI_Request ));
+        if (NULL == reqs) { ret = -1; line = __LINE__; goto error_hndl; }
+        
+        typelng=smpi_datatype_size(rdtype);
+        smpi_datatype_extent(rdtype, &lb, &extent);
+        first_segment_count = rcount;
+        COLL_TUNED_COMPUTED_SEGCOUNT( (size_t)first_segment_size, typelng, 
+                                      first_segment_count );
+
+        ptmp = (char *) rbuf;
+        for (i = 0; i < size; ++i) {
+            if (i == rank) {  
+                /* skip myself */
+                reqs[i] = MPI_REQUEST_NULL; 
+                continue; 
+            } 
+
+            /* irecv for the first segment from i */
+            ptmp = (char*)rbuf + i * rcount * extent;
+            first_segment_req = smpi_mpi_irecv(ptmp, first_segment_count, rdtype, i,
+                                     MCA_COLL_BASE_TAG_GATHER, comm
+                                     );
+            
+            /* send sync message */
+            smpi_mpi_send(rbuf, 0, MPI_BYTE, i,
+                                    MCA_COLL_BASE_TAG_GATHER,
+                                     comm);
+
+            /* irecv for the second segment */
+            ptmp = (char*)rbuf + (i * rcount + first_segment_count) * extent;
+            reqs[i]=smpi_mpi_irecv(ptmp, (rcount - first_segment_count), 
+                                     rdtype, i, MCA_COLL_BASE_TAG_GATHER, comm
+                                     );
+
+            /* wait on the first segment to complete */
+            smpi_mpi_wait(&first_segment_req, MPI_STATUS_IGNORE);
+        }
+
+        /* copy local data if necessary */
+        if (MPI_IN_PLACE != sbuf) {
+            ret = smpi_datatype_copy(sbuf, scount, sdtype,
+                                  (char*)rbuf + rank * rcount * extent, 
+                                  rcount, rdtype);
+            if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
+        }
+        
+        /* wait all second segments to complete */
+        ret = smpi_mpi_waitall(size, reqs, MPI_STATUSES_IGNORE);
+
+        free(reqs);
+    }
+
+    /* All done */
+
+    return MPI_SUCCESS;
+ error_hndl:
+    XBT_DEBUG( 
+                   "ERROR_HNDL: node %d file %s line %d error %d\n", 
+                   rank, __FILE__, line, ret );
+    return ret;
+}
+
+/*
+ * Linear functions are copied from the BASIC coll module
+ * they do not segment the message and are simple implementations
+ * but for some small number of nodes and/or small data sizes they 
+ * are just as fast as tuned/tree based segmenting operations 
+ * and as such may be selected by the decision functions
+ * These are copied into this module due to the way we select modules
+ * in V1. i.e. in V2 we will handle this differently and so will not
+ * have to duplicate code.
+ * JPG following the examples from other coll_tuned implementations. Dec06.
+ */
+
+/* copied function (with appropriate renaming) starts here */
+/*
+ *     gather_intra
+ *
+ *     Function:       - basic gather operation
+ *     Accepts:        - same arguments as MPI_Gather()
+ *     Returns:        - MPI_SUCCESS or error code
+ */
+int
+smpi_coll_tuned_gather_ompi_basic_linear(void *sbuf, int scount,
+                                         MPI_Datatype sdtype,
+                                         void *rbuf, int rcount,
+                                         MPI_Datatype rdtype,
+                                         int root,
+                                         MPI_Comm comm)
+{
+    int i;
+    int err;
+    int rank;
+    int size;
+    char *ptmp;
+    MPI_Aint incr;
+    MPI_Aint extent;
+    MPI_Aint lb;
+
+    size = smpi_comm_size(comm);
+    rank = smpi_comm_rank(comm);
+
+    /* Everyone but root sends data and returns. */
+    XBT_DEBUG(
+                "ompi_coll_tuned_gather_intra_basic_linear rank %d", rank);
+
+    if (rank != root) {
+        smpi_mpi_send(sbuf, scount, sdtype, root,
+                                 MCA_COLL_BASE_TAG_GATHER,
+                                  comm);
+        return MPI_SUCCESS;
+    }
+
+    /* I am the root, loop receiving the data. */
+
+    smpi_datatype_extent(rdtype, &lb, &extent);
+    incr = extent * rcount;
+    for (i = 0, ptmp = (char *) rbuf; i < size; ++i, ptmp += incr) {
+        if (i == rank) {
+            if (MPI_IN_PLACE != sbuf) {
+                err = smpi_datatype_copy(sbuf, scount, sdtype,
+                                      ptmp, rcount, rdtype);
+            } else {
+                err = MPI_SUCCESS;
+            }
+        } else {
+            smpi_mpi_recv(ptmp, rcount, rdtype, i,
+                                    MCA_COLL_BASE_TAG_GATHER,
+                                    comm, MPI_STATUS_IGNORE);
+            err = MPI_SUCCESS;
+        }
+        if (MPI_SUCCESS != err) {
+            return err;
+        }
+    }
+
+    /* All done */
+
+    return MPI_SUCCESS;
+}
index 55f9879..72c3699 100644 (file)
@@ -426,15 +426,15 @@ int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount,
                                                          comm);
         }
     } else {
-        //if (communicator_size % 2) {
+        if (communicator_size % 2) {
             return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype, 
                                                         rbuf, rcount, rdtype, 
                                                         comm);
-        /*} else {
-            return  smpi_coll_tuned_allgather_intra_neighborexchange(sbuf, scount, sdtype,
+        } else {
+            return  smpi_coll_tuned_allgather_ompi_neighborexchange(sbuf, scount, sdtype,
                                                                      rbuf, rcount, rdtype,
-                                                                     comm, module);
-        }*/
+                                                                     comm);
+        }
     }
    
 #if defined(USE_MPICH2_DECISION)
@@ -448,17 +448,17 @@ int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount,
        - for everything else use ring.
     */
     if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
-        return smpi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, 
+        return smpi_coll_tuned_allgather_rdb(sbuf, scount, sdtype, 
                                                                  rbuf, rcount, rdtype, 
-                                                                 comm, module);
+                                                                 comm);
     } else if (total_dsize <= 81920) { 
-        return smpi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, 
+        return smpi_coll_tuned_allgather_bruck(sbuf, scount, sdtype, 
                                                      rbuf, rcount, rdtype,
-                                                     comm, module);
+                                                     comm);
     } 
-    return smpi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, 
+    return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype, 
                                                 rbuf, rcount, rdtype,
-                                                comm, module);
+                                                comm);
 #endif  /* defined(USE_MPICH2_DECISION) */
 }
 
@@ -500,30 +500,30 @@ int smpi_coll_tuned_allgatherv_ompi(void *sbuf, int scount,
                                                       comm);
 
     } else {
-//        if (communicator_size % 2) {
+        if (communicator_size % 2) {
             return smpi_coll_tuned_allgatherv_ring(sbuf, scount, sdtype, 
                                                          rbuf, rcounts, rdispls, rdtype, 
                                                          comm);
-/*        } else {
-            return  smpi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, sdtype,
+        } else {
+            return  smpi_coll_tuned_allgatherv_ompi_neighborexchange(sbuf, scount, sdtype,
                                                                       rbuf, rcounts, rdispls, rdtype, 
-                                                                      comm, module);
-        }*/
+                                                                      comm);
+        }
     }
 }
-/*
+
 int smpi_coll_tuned_gather_ompi(void *sbuf, int scount, 
                                            MPI_Datatype sdtype,
                                            void* rbuf, int rcount, 
                                            MPI_Datatype rdtype, 
                                            int root,
-                                           MPI_Comm  comm,
+                                           MPI_Comm  comm
                                            )
 {
-    const int large_segment_size = 32768;
-    const int small_segment_size = 1024;
+    //const int large_segment_size = 32768;
+    //const int small_segment_size = 1024;
 
-    const size_t large_block_size = 92160;
+    //const size_t large_block_size = 92160;
     const size_t intermediate_block_size = 6000;
     const size_t small_block_size = 1024;
 
@@ -533,46 +533,43 @@ int smpi_coll_tuned_gather_ompi(void *sbuf, int scount,
     int communicator_size, rank;
     size_t dsize, block_size;
 
-    OPAL_OUTPUT((smpi_coll_tuned_stream, 
-                 "smpi_coll_tuned_gather_ompi"));
+    XBT_DEBUG("smpi_coll_tuned_gather_ompi");
 
     communicator_size = smpi_comm_size(comm);
-    rank = ompi_comm_rank(comm);
+    rank = smpi_comm_rank(comm);
 
     // Determine block size 
     if (rank == root) {
-        ompi_datatype_type_size(rdtype, &dsize);
+        dsize = smpi_datatype_size(rdtype);
         block_size = dsize * rcount;
     } else {
-        ompi_datatype_type_size(sdtype, &dsize);
+        dsize = smpi_datatype_size(sdtype);
         block_size = dsize * scount;
     }
 
-    if (block_size > large_block_size) {
-        return smpi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype, 
-                                                         rbuf, rcount, rdtype, 
-                                                         root, comm, module,
-                                                         large_segment_size);
+/*    if (block_size > large_block_size) {*/
+/*        return smpi_coll_tuned_gather_ompi_linear_sync (sbuf, scount, sdtype, */
+/*                                                         rbuf, rcount, rdtype, */
+/*                                                         root, comm);*/
 
-    } else if (block_size > intermediate_block_size) {
-        return smpi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype, 
+/*    } else*/ if (block_size > intermediate_block_size) {
+        return smpi_coll_tuned_gather_ompi_linear_sync (sbuf, scount, sdtype, 
                                                          rbuf, rcount, rdtype, 
-                                                         root, comm, module,
-                                                         small_segment_size);
+                                                         root, comm);
 
     } else if ((communicator_size > large_communicator_size) ||
                ((communicator_size > small_communicator_size) &&
                 (block_size < small_block_size))) {
-        return smpi_coll_tuned_gather_intra_binomial (sbuf, scount, sdtype, 
+        return smpi_coll_tuned_gather_ompi_binomial (sbuf, scount, sdtype, 
                                                       rbuf, rcount, rdtype, 
-                                                      root, comm, module);
+                                                      root, comm);
 
     }
     // Otherwise, use basic linear 
-    return smpi_coll_tuned_gather_intra_basic_linear (sbuf, scount, sdtype, 
+    return smpi_coll_tuned_gather_ompi_basic_linear (sbuf, scount, sdtype, 
                                                       rbuf, rcount, rdtype, 
-                                                      root, comm, module);
-}*/
+                                                      root, comm);
+}
 /*
 int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount, 
                                             MPI_Datatype sdtype,
index 1f7d45d..cf46856 100644 (file)
@@ -114,7 +114,7 @@ void smpi_datatype_use(MPI_Datatype type);
 void smpi_datatype_unuse(MPI_Datatype type);
 
 int smpi_datatype_contiguous(int count, MPI_Datatype old_type,
-                       MPI_Datatype* new_type);
+                       MPI_Datatype* new_type, MPI_Aint lb);
 int smpi_datatype_vector(int count, int blocklen, int stride,
                       MPI_Datatype old_type, MPI_Datatype* new_type);
 
index 503c060..f242c77 100644 (file)
 #include "colls/colls.h"
 #include "simgrid/sg_config.h"
 
+s_mpi_coll_description_t mpi_coll_gather_description[] = {
+  {"default",
+   "gather default collective",
+   smpi_mpi_gather},
+COLL_GATHERS(COLL_DESCRIPTION, COLL_COMMA),
+  {NULL, NULL, NULL}      /* this array must be NULL terminated */
+};
+
+
 s_mpi_coll_description_t mpi_coll_allgather_description[] = {
   {"default",
    "allgather default collective",
@@ -62,7 +71,7 @@ COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA),
 
 s_mpi_coll_description_t mpi_coll_bcast_description[] = {
   {"default",
-   "allgather default collective",
+   "bcast default collective",
    smpi_mpi_bcast},
 COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA),
   {NULL, NULL, NULL}      /* this array must be NULL terminated */
@@ -70,7 +79,7 @@ COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA),
 
 s_mpi_coll_description_t mpi_coll_reduce_description[] = {
   {"default",
-   "allgather default collective",
+   "reduce default collective",
    smpi_mpi_reduce},
 COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA),
   {NULL, NULL, NULL}      /* this array must be NULL terminated */
@@ -126,6 +135,7 @@ int find_coll_description(s_mpi_coll_description_t * table,
 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_coll, smpi,
                                 "Logging specific to SMPI (coll)");
 
+int (*mpi_coll_gather_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm);
 int (*mpi_coll_allgather_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
 int (*mpi_coll_allgatherv_fun)(void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
 int (*mpi_coll_allreduce_fun)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm);
index 7fca6d7..189bac1 100644 (file)
@@ -360,6 +360,12 @@ int smpi_main(int (*realmain) (int argc, char *argv[]),int argc, char *argv[])
   SIMIX_function_register_default(realmain);
   SIMIX_launch_application(argv[2]);
 
+  int gather_id = find_coll_description(mpi_coll_gather_description,
+                                           sg_cfg_get_string("smpi/gather"));
+  mpi_coll_gather_fun = (int (*)(void *, int, MPI_Datatype,
+                                   void*, int, MPI_Datatype, int, MPI_Comm))
+                          mpi_coll_gather_description[gather_id].coll;
+
   int allgather_id = find_coll_description(mpi_coll_allgather_description,
                                            sg_cfg_get_string("smpi/allgather"));
   mpi_coll_allgather_fun = (int (*)(void *, int, MPI_Datatype,
index 6041b85..e42513b 100644 (file)
@@ -323,17 +323,100 @@ void smpi_datatype_unuse(MPI_Datatype type){
     smpi_datatype_free(&type);
 }
 
-int smpi_datatype_contiguous(int count, MPI_Datatype old_type, MPI_Datatype* new_type)
+
+
+
+/*
+Contiguous Implementation
+*/
+
+
+/*
+ *  Copies noncontiguous data into contiguous memory.
+ *  @param contiguous_hvector - output hvector
+ *  @param noncontiguous_hvector - input hvector
+ *  @param type - pointer contening :
+ *      - stride - stride of between noncontiguous data, in bytes
+ *      - block_length - the width or height of blocked matrix
+ *      - count - the number of rows of matrix
+ */
+void serialize_contiguous( const void *noncontiguous_hvector,
+                       void *contiguous_hvector,
+                       size_t count,
+                       void *type)
+{
+  s_smpi_mpi_contiguous_t* type_c = (s_smpi_mpi_contiguous_t*)type;
+  char* contiguous_vector_char = (char*)contiguous_hvector;
+  char* noncontiguous_vector_char = (char*)noncontiguous_hvector+type_c->lb;
+  memcpy(contiguous_vector_char,
+           noncontiguous_vector_char, count* type_c->block_count * type_c->size_oldtype);
+}
+/*
+ *  Copies contiguous data into noncontiguous memory.
+ *  @param noncontiguous_vector - output hvector
+ *  @param contiguous_vector - input hvector
+ *  @param type - pointer contening :
+ *      - stride - stride of between noncontiguous data, in bytes
+ *      - block_length - the width or height of blocked matrix
+ *      - count - the number of rows of matrix
+ */
+void unserialize_contiguous( const void *contiguous_vector,
+                         void *noncontiguous_vector,
+                         size_t count,
+                         void *type)
+{
+  s_smpi_mpi_contiguous_t* type_c = (s_smpi_mpi_contiguous_t*)type;
+  char* contiguous_vector_char = (char*)contiguous_vector;
+  char* noncontiguous_vector_char = (char*)noncontiguous_vector+type_c->lb;
+
+  memcpy(noncontiguous_vector_char,
+           contiguous_vector_char, count*  type_c->block_count * type_c->size_oldtype);
+}
+
+void free_contiguous(MPI_Datatype* d){
+}
+
+/*
+ * Create a Sub type contiguous to be able to serialize and unserialize it
+ * the structure s_smpi_mpi_contiguous_t is derived from s_smpi_subtype which
+ * required the functions unserialize and serialize
+ *
+ */
+s_smpi_mpi_contiguous_t* smpi_datatype_contiguous_create( MPI_Aint lb,
+                                                  int block_count,
+                                                  MPI_Datatype old_type,
+                                                  int size_oldtype){
+  s_smpi_mpi_contiguous_t *new_t= xbt_new(s_smpi_mpi_contiguous_t,1);
+  new_t->base.serialize = &serialize_contiguous;
+  new_t->base.unserialize = &unserialize_contiguous;
+  new_t->base.subtype_free = &free_contiguous;
+  new_t->lb = lb;
+  new_t->block_count = block_count;
+  new_t->old_type = old_type;
+  new_t->size_oldtype = size_oldtype;
+  return new_t;
+}
+
+
+
+
+int smpi_datatype_contiguous(int count, MPI_Datatype old_type, MPI_Datatype* new_type, MPI_Aint lb)
 {
   int retval;
   if(old_type->has_subtype){
          //handle this case as a hvector with stride equals to the extent of the datatype
          return smpi_datatype_hvector(count, 1, smpi_datatype_get_extent(old_type), old_type, new_type);
   }
+  
+  s_smpi_mpi_contiguous_t* subtype = smpi_datatype_contiguous_create( lb,
+                                                                count,
+                                                                old_type,
+                                                                smpi_datatype_size(old_type));
+                                                                
   smpi_datatype_create(new_type,
                                          count * smpi_datatype_size(old_type),
-                                         0,count * smpi_datatype_size(old_type),
-                                         0,NULL, DT_FLAG_CONTIGUOUS);
+                                         lb,lb + count * smpi_datatype_size(old_type),
+                                         1,subtype, DT_FLAG_CONTIGUOUS);
   retval=MPI_SUCCESS;
   return retval;
 }
@@ -536,7 +619,7 @@ void serialize_indexed( const void *noncontiguous_indexed,
   s_smpi_mpi_indexed_t* type_c = (s_smpi_mpi_indexed_t*)type;
   int i,j;
   char* contiguous_indexed_char = (char*)contiguous_indexed;
-  char* noncontiguous_indexed_char = (char*)noncontiguous_indexed;
+  char* noncontiguous_indexed_char = (char*)noncontiguous_indexed+type_c->block_indices[0] * type_c->size_oldtype;
   for(j=0; j<count;j++){
     for (i = 0; i < type_c->block_count; i++) {
       if (type_c->old_type->has_subtype == 0)
@@ -570,15 +653,15 @@ void unserialize_indexed( const void *contiguous_indexed,
                          size_t count,
                          void *type)
 {
+
   s_smpi_mpi_indexed_t* type_c = (s_smpi_mpi_indexed_t*)type;
   int i,j;
-
   char* contiguous_indexed_char = (char*)contiguous_indexed;
-  char* noncontiguous_indexed_char = (char*)noncontiguous_indexed;
+  char* noncontiguous_indexed_char = (char*)noncontiguous_indexed+type_c->block_indices[0]*smpi_datatype_get_extent(type_c->old_type);
   for(j=0; j<count;j++){
     for (i = 0; i < type_c->block_count; i++) {
       if (type_c->old_type->has_subtype == 0)
-        memcpy(noncontiguous_indexed_char,
+        memcpy(noncontiguous_indexed_char ,
              contiguous_indexed_char, type_c->block_lengths[i] * type_c->size_oldtype);
       else
         ((s_smpi_subtype_t*)type_c->old_type->substruct)->unserialize( contiguous_indexed_char,
@@ -666,9 +749,12 @@ int smpi_datatype_indexed(int count, int* blocklens, int* indices, MPI_Datatype
      smpi_datatype_create(new_type,  size *
                          smpi_datatype_size(old_type),lb,ub,1, subtype, DT_FLAG_DATA);
   }else{
+    s_smpi_mpi_contiguous_t* subtype = smpi_datatype_contiguous_create( lb,
+                                                                  size,
+                                                                  old_type,
+                                                                  smpi_datatype_size(old_type));
     smpi_datatype_create(new_type,  size *
-                         smpi_datatype_size(old_type),0,size *
-                         smpi_datatype_size(old_type),0, NULL, DT_FLAG_DATA|DT_FLAG_CONTIGUOUS);
+                         smpi_datatype_size(old_type),lb,ub,1, subtype, DT_FLAG_DATA|DT_FLAG_CONTIGUOUS);
   }
   retval=MPI_SUCCESS;
   return retval;
@@ -696,7 +782,7 @@ void serialize_hindexed( const void *noncontiguous_hindexed,
   s_smpi_mpi_hindexed_t* type_c = (s_smpi_mpi_hindexed_t*)type;
   int i,j;
   char* contiguous_hindexed_char = (char*)contiguous_hindexed;
-  char* noncontiguous_hindexed_char = (char*)noncontiguous_hindexed;
+  char* noncontiguous_hindexed_char = (char*)noncontiguous_hindexed+ type_c->block_indices[0];
   for(j=0; j<count;j++){
     for (i = 0; i < type_c->block_count; i++) {
       if (type_c->old_type->has_subtype == 0)
@@ -733,7 +819,7 @@ void unserialize_hindexed( const void *contiguous_hindexed,
   int i,j;
 
   char* contiguous_hindexed_char = (char*)contiguous_hindexed;
-  char* noncontiguous_hindexed_char = (char*)noncontiguous_hindexed;
+  char* noncontiguous_hindexed_char = (char*)noncontiguous_hindexed+ type_c->block_indices[0];
   for(j=0; j<count;j++){
     for (i = 0; i < type_c->block_count; i++) {
       if (type_c->old_type->has_subtype == 0)
@@ -823,9 +909,13 @@ int smpi_datatype_hindexed(int count, int* blocklens, MPI_Aint* indices, MPI_Dat
                          ub
                          ,1, subtype, DT_FLAG_DATA);
   }else{
+    s_smpi_mpi_contiguous_t* subtype = smpi_datatype_contiguous_create( lb,
+                                                                  size,
+                                                                  old_type,
+                                                                  smpi_datatype_size(old_type));
     smpi_datatype_create(new_type,  size * smpi_datatype_size(old_type),
                                             0,size * smpi_datatype_size(old_type),
-                                            0, NULL, DT_FLAG_DATA|DT_FLAG_CONTIGUOUS);
+                                            1, subtype, DT_FLAG_DATA|DT_FLAG_CONTIGUOUS);
   }
   retval=MPI_SUCCESS;
   return retval;
@@ -853,7 +943,7 @@ void serialize_struct( const void *noncontiguous_struct,
   s_smpi_mpi_struct_t* type_c = (s_smpi_mpi_struct_t*)type;
   int i,j;
   char* contiguous_struct_char = (char*)contiguous_struct;
-  char* noncontiguous_struct_char = (char*)noncontiguous_struct;
+  char* noncontiguous_struct_char = (char*)noncontiguous_struct+ type_c->block_indices[0];
   for(j=0; j<count;j++){
     for (i = 0; i < type_c->block_count; i++) {
       if (type_c->old_types[i]->has_subtype == 0)
@@ -891,7 +981,7 @@ void unserialize_struct( const void *contiguous_struct,
   int i,j;
 
   char* contiguous_struct_char = (char*)contiguous_struct;
-  char* noncontiguous_struct_char = (char*)noncontiguous_struct;
+  char* noncontiguous_struct_char = (char*)noncontiguous_struct+ type_c->block_indices[0];
   for(j=0; j<count;j++){
     for (i = 0; i < type_c->block_count; i++) {
       if (type_c->old_types[i]->has_subtype == 0)
@@ -993,7 +1083,11 @@ int smpi_datatype_struct(int count, int* blocklens, MPI_Aint* indices, MPI_Datat
 
     smpi_datatype_create(new_type,  size, lb, ub,1, subtype, DT_FLAG_DATA);
   }else{
-    smpi_datatype_create(new_type,  size, lb, ub,0, NULL, DT_FLAG_DATA|DT_FLAG_CONTIGUOUS);
+    s_smpi_mpi_contiguous_t* subtype = smpi_datatype_contiguous_create( lb,
+                                                                  size,
+                                                                  MPI_CHAR,
+                                                                  1);
+    smpi_datatype_create(new_type,  size, lb, ub,1, subtype, DT_FLAG_DATA|DT_FLAG_CONTIGUOUS);
   }
   return MPI_SUCCESS;
 }
index 656754d..23af198 100644 (file)
@@ -40,6 +40,14 @@ extern MPI_Datatype MPI_PTR;
   used for serialization/unserialization of messages
 */
 
+typedef struct s_smpi_mpi_contiguous{
+  s_smpi_subtype_t base;
+  MPI_Datatype old_type;
+  MPI_Aint lb;
+  size_t size_oldtype;
+  size_t block_count;
+} s_smpi_mpi_contiguous_t;
+
 typedef struct s_smpi_mpi_vector{
   s_smpi_subtype_t base;
   MPI_Datatype old_type;
@@ -90,7 +98,23 @@ typedef struct s_smpi_mpi_struct{
   Functions to handle serialization/unserialization of messages, 3 for each type of MPI_Type
   One for creating the substructure to handle, one for serialization, one for unserialization
 */
+void unserialize_contiguous( const void *contiguous_vector,
+                         void *noncontiguous_vector,
+                         size_t count,
+                         void *type);
 
+void serialize_contiguous( const void *noncontiguous_vector,
+                       void *contiguous_vector,
+                       size_t count,
+                       void *type);
+
+void free_contiguous(MPI_Datatype* type);
+
+s_smpi_mpi_contiguous_t* smpi_datatype_contiguous_create( MPI_Aint lb,
+                                                  int block_count,
+                                                  MPI_Datatype old_type,
+                                                  int size_oldtype);
+                                                  
 void unserialize_vector( const void *contiguous_vector,
                          void *noncontiguous_vector,
                          size_t count,
index 42d6294..783395b 100644 (file)
@@ -1609,7 +1609,7 @@ int PMPI_Gather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
              || recvtype == MPI_DATATYPE_NULL) {
     retval = MPI_ERR_TYPE;
   } else {
-    smpi_mpi_gather(sendbuf, sendcount, sendtype, recvbuf, recvcount,
+    mpi_coll_gather_fun(sendbuf, sendcount, sendtype, recvbuf, recvcount,
                     recvtype, root, comm);
     retval = MPI_SUCCESS;
   }
@@ -2023,7 +2023,7 @@ int PMPI_Type_contiguous(int count, MPI_Datatype old_type, MPI_Datatype* new_typ
   } else if (count<0){
     retval = MPI_ERR_COUNT;
   } else {
-    retval = smpi_datatype_contiguous(count, old_type, new_type);
+    retval = smpi_datatype_contiguous(count, old_type, new_type, 0);
   }
   smpi_bench_begin();
   return retval;
index dbca4f2..b328e8b 100644 (file)
@@ -34,7 +34,7 @@ static xbt_dynar_t storage_list;
 #define GENERIC_LMM_ACTION(action) action->generic_lmm_action
 #define GENERIC_ACTION(action) GENERIC_LMM_ACTION(action).generic_action
 
-static xbt_dict_t parse_storage_content(char *filename, unsigned long *used_size);
+static xbt_dict_t parse_storage_content(char *filename, size_t *used_size);
 static int storage_action_unref(surf_action_t action);
 static void storage_action_state_set(surf_action_t action, e_surf_action_state_t state);
 static surf_action_t storage_action_execute (void *storage, double size, e_surf_action_storage_type_t type);
@@ -46,7 +46,7 @@ static surf_action_t storage_action_ls(void *storage, const char* path)
   xbt_dict_t ls_dict = xbt_dict_new();
 
   char* key;
-  unsigned long size = 0;
+  size_t size = 0;
   xbt_dict_cursor_t cursor = NULL;
 
   xbt_dynar_t dyn = NULL;
@@ -123,8 +123,8 @@ static surf_action_t storage_action_close(void *storage, surf_file_t fd)
   return action;
 }
 
-static surf_action_t storage_action_read(void *storage, void* ptr, double size,
-                                         size_t nmemb, surf_file_t fd)
+static surf_action_t storage_action_read(void *storage, void* ptr, 
+                                        size_t size, surf_file_t fd)
 {
   if(size > fd->size)
     size = fd->size;
@@ -133,8 +133,7 @@ static surf_action_t storage_action_read(void *storage, void* ptr, double size,
 }
 
 static surf_action_t storage_action_write(void *storage, const void* ptr,
-                                          size_t size, size_t nmemb,
-                                          surf_file_t fd)
+                                          size_t size, surf_file_t fd)
 {
   char *filename = fd->name;
   XBT_DEBUG("\tWrite file '%s' size '%zu/%zu'",filename,size,fd->size);
@@ -212,7 +211,7 @@ static void* storage_create_resource(const char* id, const char* model,const cha
   double Bread  = atof(xbt_dict_get(storage_type->properties,"Bread"));
   double Bwrite = atof(xbt_dict_get(storage_type->properties,"Bwrite"));
   double Bconnection   = atof(xbt_dict_get(storage_type->properties,"Bconnection"));
-  XBT_DEBUG("Create resource with Bconnection '%f' Bread '%f' Bwrite '%f' and Size '%ld'",Bconnection,Bread,Bwrite,storage_type->size);
+  XBT_DEBUG("Create resource with Bconnection '%f' Bread '%f' Bwrite '%f' and Size '%lu'",Bconnection,Bread,Bwrite,(unsigned long)storage_type->size);
   storage->constraint       = lmm_constraint_new(storage_maxmin_system, storage, Bconnection);
   storage->constraint_read  = lmm_constraint_new(storage_maxmin_system, storage, Bread);
   storage->constraint_write = lmm_constraint_new(storage_maxmin_system, storage, Bwrite);
@@ -517,7 +516,7 @@ static void storage_parse_storage(sg_platf_storage_cbarg_t storage)
       (void *) xbt_strdup(storage->type_id));
 }
 
-static xbt_dict_t parse_storage_content(char *filename, unsigned long *used_size)
+static xbt_dict_t parse_storage_content(char *filename, size_t *used_size)
 {
   *used_size = 0;
   if ((!filename) || (strcmp(filename, "") == 0))
@@ -534,12 +533,12 @@ static xbt_dict_t parse_storage_content(char *filename, unsigned long *used_size
   size_t len = 0;
   ssize_t read;
   char path[1024];
-  unsigned long size;
+  size_t size;
 
 
   while ((read = xbt_getline(&line, &len, file)) != -1) {
     if (read){
-    if(sscanf(line,"%s %ld",path, &size)==2) {
+    if(sscanf(line,"%s %zu",path, &size)==2) {
         *used_size += size;
         xbt_dict_set(parse_content,path,(void*) size,NULL);
       } else {
index 534b1d0..1d896f5 100644 (file)
@@ -12,7 +12,7 @@ typedef struct s_storage_type {
   char *content;
   char *type_id;
   xbt_dict_t properties;
-  unsigned long size;
+  size_t size;
 } s_storage_type_t, *storage_type_t;
 
 typedef struct s_mount {
@@ -33,8 +33,8 @@ typedef struct storage {
   lmm_constraint_t constraint_write;    /* Constraint for maximum write bandwidth*/
   lmm_constraint_t constraint_read;     /* Constraint for maximum write bandwidth*/
   xbt_dict_t content; /* char * -> s_surf_file_t */
-  unsigned long size;
-  unsigned long used_size;
+  size_t size;
+  size_t used_size;
   xbt_dynar_t write_actions;
 } s_storage_t, *storage_t;
 
index b19db9d..3b3b9df 100644 (file)
@@ -344,21 +344,21 @@ static surf_action_t ws_action_close(void *workstation, surf_file_t fd)
 }
 
 static surf_action_t ws_action_read(void *workstation, void* ptr, size_t size,
-                                    size_t nmemb, surf_file_t fd)
+                                    surf_file_t fd)
 {
   storage_t st = find_storage_on_mount_list(workstation, fd->storage);
   XBT_DEBUG("READ on disk '%s'",st->generic_resource.name);
   surf_model_t model = st->generic_resource.model;
-  return model->extension.storage.read(st, ptr, (double)size, nmemb, fd);
+  return model->extension.storage.read(st, ptr, size, fd);
 }
 
 static surf_action_t ws_action_write(void *workstation, const void* ptr,
-                                     size_t size, size_t nmemb, surf_file_t fd)
+                                     size_t size, surf_file_t fd)
 {
   storage_t st = find_storage_on_mount_list(workstation, fd->storage);
   XBT_DEBUG("WRITE on disk '%s'",st->generic_resource.name);
   surf_model_t model = st->generic_resource.model;
-  return model->extension.storage.write(st,  ptr, size, nmemb, fd);
+  return model->extension.storage.write(st,  ptr, size, fd);
 }
 
 static int ws_file_unlink(void *workstation, surf_file_t fd)
index e74b5a5..78de3f4 100644 (file)
@@ -10,7 +10,7 @@ if(enable_smpi)
   set(EXECUTABLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}")
   
   include_directories("${CMAKE_HOME_DIRECTORY}/include/smpi")
-
+  add_executable(gather_coll gather_coll.c)
   add_executable(allgather_coll allgather_coll.c)
   add_executable(allgatherv_coll allgatherv_coll.c)
   add_executable(alltoall2 alltoall2.c)
@@ -36,6 +36,7 @@ if(enable_smpi)
   add_executable(indexed_test indexed_test.c)
   add_executable(struct_test struct_test.c)
 
+  target_link_libraries(gather_coll simgrid)
   target_link_libraries(allgather_coll simgrid)
   target_link_libraries(allgatherv_coll simgrid)
   target_link_libraries(alltoall2 simgrid)
@@ -73,6 +74,7 @@ endif()
 
 set(tesh_files
   ${tesh_files}
+  ${CMAKE_CURRENT_SOURCE_DIR}/gather_coll.tesh
   ${CMAKE_CURRENT_SOURCE_DIR}/allgather_coll.tesh
   ${CMAKE_CURRENT_SOURCE_DIR}/allgatherv_coll.tesh  
   ${CMAKE_CURRENT_SOURCE_DIR}/allreduce_coll.tesh
@@ -96,6 +98,7 @@ set(xml_files
   )
 set(examples_src
   ${examples_src}
+  ${CMAKE_CURRENT_SOURCE_DIR}/gather_coll.c
   ${CMAKE_CURRENT_SOURCE_DIR}/allgather_coll.c
   ${CMAKE_CURRENT_SOURCE_DIR}/allgatherv_coll.c  
   ${CMAKE_CURRENT_SOURCE_DIR}/allreduce_coll.c
diff --git a/teshsuite/smpi/gather_coll.c b/teshsuite/smpi/gather_coll.c
new file mode 100644 (file)
index 0000000..a6111b4
--- /dev/null
@@ -0,0 +1,65 @@
+/* Copyright (c) 2009, 2010. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include "mpi.h"
+
+#ifndef EXIT_SUCCESS
+#define EXIT_SUCCESS 0
+#define EXIT_FAILURE 1
+#endif
+
+int main(int argc, char *argv[])
+{
+  int rank, size;
+  int i;
+  int *sb;
+  int *rb;
+  int status;
+
+  int root = 0;
+
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+  int count = 2;
+  sb = (int *) xbt_malloc(count * sizeof(int));
+  rb = (int *) xbt_malloc(count * size * sizeof(int));
+  
+  for (i = 0; i < count; ++i)
+    sb[i] = rank * count + i;
+  for (i = 0; i < count * size; ++i)  
+    rb[i] = 0;
+
+  printf("[%d] sndbuf=[", rank);
+  for (i = 0; i < count; i++)
+    printf("%d ", sb[i]);
+  printf("]\n");
+
+  status = MPI_Gather(sb, count, MPI_INT, rb, count, MPI_INT, root, MPI_COMM_WORLD);
+
+  if (rank == root) {
+  printf("[%d] rcvbuf=[", rank);
+  for (i = 0; i < count * size; i++)
+    printf("%d ", rb[i]);
+  printf("]\n");
+
+
+    if (status != MPI_SUCCESS) {
+      printf("allgather returned %d\n", status);
+      fflush(stdout);
+    }
+  }
+  free(sb);
+  free(rb);
+  MPI_Barrier(MPI_COMM_WORLD);
+  MPI_Finalize();
+  return (EXIT_SUCCESS);
+}
diff --git a/teshsuite/smpi/gather_coll.tesh b/teshsuite/smpi/gather_coll.tesh
new file mode 100644 (file)
index 0000000..c4b40fe
--- /dev/null
@@ -0,0 +1,42 @@
+# Smpi Alltoall collectives tests
+! setenv LD_LIBRARY_PATH=../../lib
+! output sort
+
+p Test all to all
+$ ../../bin/smpirun -map -hostfile ${srcdir:=.}/hostfile -platform ${srcdir:=.}/../../examples/msg/small_platform.xml -np 16 --log=xbt_cfg.thres:critical ./gather_coll
+> You requested to use 16 processes, but there is only 5 processes in your hostfile...
+> [rank 0] -> Tremblay
+> [rank 1] -> Jupiter
+> [rank 2] -> Fafard
+> [rank 3] -> Ginette
+> [rank 4] -> Bourassa
+> [rank 5] -> Tremblay
+> [rank 6] -> Jupiter
+> [rank 7] -> Fafard
+> [rank 8] -> Ginette
+> [rank 9] -> Bourassa
+> [rank 10] -> Tremblay
+> [rank 11] -> Jupiter
+> [rank 12] -> Fafard
+> [rank 13] -> Ginette
+> [rank 14] -> Bourassa
+> [rank 15] -> Tremblay
+> [0.000000] [surf_config/INFO] Switching workstation model to compound since you changed the network and/or cpu model(s)
+> [0] rcvbuf=[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 ]
+> [0] sndbuf=[0 1 ]
+> [1] sndbuf=[2 3 ]
+> [2] sndbuf=[4 5 ]
+> [3] sndbuf=[6 7 ]
+> [4] sndbuf=[8 9 ]
+> [5] sndbuf=[10 11 ]
+> [6] sndbuf=[12 13 ]
+> [7] sndbuf=[14 15 ]
+> [8] sndbuf=[16 17 ]
+> [9] sndbuf=[18 19 ]
+> [10] sndbuf=[20 21 ]
+> [11] sndbuf=[22 23 ]
+> [12] sndbuf=[24 25 ]
+> [13] sndbuf=[26 27 ]
+> [14] sndbuf=[28 29 ]
+> [15] sndbuf=[30 31 ]
+
index 61a58e5..4ded2ab 100644 (file)
@@ -8,6 +8,7 @@
 - Makefile
 - doc/html/.*
 
++ \.cproject
 + \.gitignore
 + README\.(coding|git)
 + mk_win-dist.sh