From: degomme <augustin.degomme@unibas.ch>
Date: Tue, 14 Mar 2017 17:05:56 +0000 (+0100)
Subject: SMPI colls in not really C++. But cleaner than before.
X-Git-Tag: v3_15~117
X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/b7ed19dfcc221d7b3eca182abb5c4a3946671172

SMPI colls in not really C++. But cleaner than before.
Still needs a lot to be considered clean.
---

diff --git a/include/smpi/forward.hpp b/include/smpi/forward.hpp
index 7788f55599..c1a210ccf8 100644
--- a/include/smpi/forward.hpp
+++ b/include/smpi/forward.hpp
@@ -32,6 +32,8 @@ class Type_Struct;
 class Type_Vector;
 class Win;
 
+class Colls;
+
 }
 }
 
diff --git a/src/include/smpi/smpi_interface.h b/src/include/smpi/smpi_interface.h
index d5669c4918..74df6907f7 100644
--- a/src/include/smpi/smpi_interface.h
+++ b/src/include/smpi/smpi_interface.h
@@ -23,66 +23,51 @@ typedef struct mpi_coll_description* mpi_coll_description_t;
  *  \brief The list of all available allgather collectives
  */
 XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_gather_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_gather_fun)(void *, int, MPI_Datatype, void *, int, MPI_Datatype, int, MPI_Comm));
                  
 /** \ingroup MPI allgather
  *  \brief The list of all available allgather collectives
  */
 XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_allgather_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_allgather_fun) (void *, int, MPI_Datatype, void *, int, MPI_Datatype, MPI_Comm));
 
 /** \ingroup MPI allgather
  *  \brief The list of all available allgather collectives
  */
 XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_allgatherv_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_allgatherv_fun) (void *, int, MPI_Datatype, void *, int*, int*, MPI_Datatype, MPI_Comm));
 
 /** \ingroup MPI allreduce
  *  \brief The list of all available allgather collectives
  */
 XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_allreduce_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_allreduce_fun)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype,MPI_Op op,
-                MPI_Comm comm));
 
 /** \ingroup MPI alltoall
  *  \brief The list of all available alltoall collectives
  */
 XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_alltoall_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_alltoall_fun)(void *, int, MPI_Datatype, void *, int, MPI_Datatype, MPI_Comm));
 
 /** \ingroup MPI alltoallv
  *  \brief The list of all available alltoallv collectives
  */
 XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_alltoallv_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_alltoallv_fun)(void *, int*, int*, MPI_Datatype, void *, int*, int*, MPI_Datatype,
-                MPI_Comm));
 
 /** \ingroup MPI bcast
  *  \brief The list of all available bcast collectives
  */
 XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_bcast_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_bcast_fun)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com));
 
 /** \ingroup MPI reduce
  *  \brief The list of all available reduce collectives
  */
 XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_reduce_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_reduce_fun)(void *buf, void *rbuf, int count, MPI_Datatype datatype,
-                MPI_Op op, int root, MPI_Comm comm));
 
 /** \ingroup MPI reduce_scatter
  *  \brief The list of all available allgather collectives
  */
 XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_reduce_scatter_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_reduce_scatter_fun) (void *sbuf, void *rbuf, int *rcounts,
-                 MPI_Datatype dtype, MPI_Op op,MPI_Comm comm));
 
 /** \ingroup MPI scatter
  *  \brief The list of all available allgather collectives
  */
 XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_scatter_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_scatter_fun)(void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm));
 
 /** \ingroup MPI barrier
  *  \brief The list of all available allgather collectives
@@ -91,7 +76,7 @@ XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_barrier_description[];
 XBT_PUBLIC_DATA(int (*mpi_coll_barrier_fun)(MPI_Comm comm));
 
 XBT_PUBLIC(void) coll_help(const char *category, s_mpi_coll_description_t * table);
-XBT_PUBLIC(int) find_coll_description(s_mpi_coll_description_t * table, char *name, const char *desc);
+XBT_PUBLIC(int) find_coll_description(s_mpi_coll_description_t * table, const char *name, const char *desc);
 
 XBT_PUBLIC_DATA(void) (*smpi_coll_cleanup_callback)();
 XBT_PUBLIC(void) smpi_coll_cleanup_mvapich2(void);
diff --git a/src/smpi/colls/allgather/allgather-2dmesh.cpp b/src/smpi/colls/allgather/allgather-2dmesh.cpp
index 21be3ac9e2..103b4452b3 100644
--- a/src/smpi/colls/allgather/allgather-2dmesh.cpp
+++ b/src/smpi/colls/allgather/allgather-2dmesh.cpp
@@ -105,8 +105,13 @@ static int is_2dmesh(int num, int *i, int *j)
  * "simple"
  * Auther: Ahmad Faraj
 ****************************************************************************/
+
+namespace simgrid{
+namespace smpi{
+
+
 int
-smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype
+Coll_allgather_2dmesh::allgather(void *send_buff, int send_count, MPI_Datatype
                                  send_type, void *recv_buff, int recv_count,
                                  MPI_Datatype recv_type, MPI_Comm comm)
 {
@@ -192,3 +197,6 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype
 
   return MPI_SUCCESS;
 }
+
+}
+}
diff --git a/src/smpi/colls/allgather/allgather-3dmesh.cpp b/src/smpi/colls/allgather/allgather-3dmesh.cpp
index dfe154d01f..2c80059f85 100644
--- a/src/smpi/colls/allgather/allgather-3dmesh.cpp
+++ b/src/smpi/colls/allgather/allgather-3dmesh.cpp
@@ -92,7 +92,11 @@ static int is_3dmesh(int num, int *i, int *j, int *k)
  * follows "simple"
  * Auther: Ahmad Faraj
 ****************************************************************************/
-int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count,
+namespace simgrid{
+namespace smpi{
+
+
+int Coll_allgather_3dmesh::allgather(void *send_buff, int send_count,
                                      MPI_Datatype send_type, void *recv_buff,
                                      int recv_count, MPI_Datatype recv_type,
                                      MPI_Comm comm)
@@ -206,3 +210,7 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count,
 
   return MPI_SUCCESS;
 }
+
+
+}
+}
diff --git a/src/smpi/colls/allgather/allgather-GB.cpp b/src/smpi/colls/allgather/allgather-GB.cpp
index 1a107b02c9..f50e07f9fa 100644
--- a/src/smpi/colls/allgather/allgather-GB.cpp
+++ b/src/smpi/colls/allgather/allgather-GB.cpp
@@ -6,17 +6,19 @@
 
 #include "../colls_private.h"
 
+using namespace simgrid::smpi;
+
 // Allgather - gather/bcast algorithm
-int smpi_coll_tuned_allgather_GB(void *send_buff, int send_count,
+int Coll_allgather_GB::allgather(void *send_buff, int send_count,
                                  MPI_Datatype send_type, void *recv_buff,
                                  int recv_count, MPI_Datatype recv_type,
                                  MPI_Comm comm)
 {
   int num_procs;
   num_procs = comm->size();
-  mpi_coll_gather_fun(send_buff, send_count, send_type, recv_buff, recv_count, recv_type,
+  Colls::gather(send_buff, send_count, send_type, recv_buff, recv_count, recv_type,
              0, comm);
-  mpi_coll_bcast_fun(recv_buff, (recv_count * num_procs), recv_type, 0, comm);
+  Colls::bcast(recv_buff, (recv_count * num_procs), recv_type, 0, comm);
 
   return MPI_SUCCESS;
 }
diff --git a/src/smpi/colls/allgather/allgather-NTSLR-NB.cpp b/src/smpi/colls/allgather/allgather-NTSLR-NB.cpp
index dd1018a592..d714abfb1c 100644
--- a/src/smpi/colls/allgather/allgather-NTSLR-NB.cpp
+++ b/src/smpi/colls/allgather/allgather-NTSLR-NB.cpp
@@ -6,9 +6,13 @@
 
 #include "../colls_private.h"
 
+namespace simgrid{
+namespace smpi{
+
+
 // Allgather-Non-Topoloty-Scecific-Logical-Ring algorithm
 int
-smpi_coll_tuned_allgather_NTSLR_NB(void *sbuf, int scount, MPI_Datatype stype,
+Coll_allgather_NTSLR_NB::allgather(void *sbuf, int scount, MPI_Datatype stype,
                                    void *rbuf, int rcount, MPI_Datatype rtype,
                                    MPI_Comm comm)
 {
@@ -30,7 +34,7 @@ smpi_coll_tuned_allgather_NTSLR_NB(void *sbuf, int scount, MPI_Datatype stype,
   // irregular case use default MPI fucntions
   if (scount * sextent != rcount * rextent) {
     XBT_WARN("MPI_allgather_NTSLR_NB use default MPI_allgather.");  
-    smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
+    Coll_allgather_default::allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
     return MPI_SUCCESS;    
   }
 
@@ -67,3 +71,6 @@ smpi_coll_tuned_allgather_NTSLR_NB(void *sbuf, int scount, MPI_Datatype stype,
 
   return MPI_SUCCESS;
 }
+
+}
+}
diff --git a/src/smpi/colls/allgather/allgather-NTSLR.cpp b/src/smpi/colls/allgather/allgather-NTSLR.cpp
index 5191e85e28..5c8faee997 100644
--- a/src/smpi/colls/allgather/allgather-NTSLR.cpp
+++ b/src/smpi/colls/allgather/allgather-NTSLR.cpp
@@ -6,9 +6,14 @@
 
 #include "../colls_private.h"
 
+namespace simgrid{
+namespace smpi{
+
+
+
 // Allgather-Non-Topoloty-Scecific-Logical-Ring algorithm
 int
-smpi_coll_tuned_allgather_NTSLR(void *sbuf, int scount, MPI_Datatype stype,
+Coll_allgather_NTSLR::allgather(void *sbuf, int scount, MPI_Datatype stype,
                                 void *rbuf, int rcount, MPI_Datatype rtype,
                                 MPI_Comm comm)
 {
@@ -26,7 +31,7 @@ smpi_coll_tuned_allgather_NTSLR(void *sbuf, int scount, MPI_Datatype stype,
   // irregular case use default MPI fucntions
   if (scount * sextent != rcount * rextent) {
     XBT_WARN("MPI_allgather_NTSLR use default MPI_allgather.");  
-    smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
+    Coll_allgather_default::allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
     return MPI_SUCCESS;    
   }
 
@@ -54,3 +59,7 @@ smpi_coll_tuned_allgather_NTSLR(void *sbuf, int scount, MPI_Datatype stype,
 
   return MPI_SUCCESS;
 }
+
+
+}
+}
diff --git a/src/smpi/colls/allgather/allgather-SMP-NTS.cpp b/src/smpi/colls/allgather/allgather-SMP-NTS.cpp
index 0ea5ff7d24..571312063e 100644
--- a/src/smpi/colls/allgather/allgather-SMP-NTS.cpp
+++ b/src/smpi/colls/allgather/allgather-SMP-NTS.cpp
@@ -6,7 +6,11 @@
 
 #include "../colls_private.h"
 
-int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount,
+namespace simgrid{
+namespace smpi{
+
+
+int Coll_allgather_SMP_NTS::allgather(void *sbuf, int scount,
                                       MPI_Datatype stype, void *rbuf,
                                       int rcount, MPI_Datatype rtype,
                                       MPI_Comm comm)
@@ -42,7 +46,7 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount,
   /* for too small number of processes, use default implementation */
   if (comm_size <= num_core) {
     XBT_WARN("MPI_allgather_SMP_NTS use default MPI_allgather.");  	  
-    smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
+    Coll_allgather_default::allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
     return MPI_SUCCESS;    
   }
 
@@ -157,3 +161,7 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount,
 
   return MPI_SUCCESS;
 }
+
+
+}
+}
diff --git a/src/smpi/colls/allgather/allgather-bruck.cpp b/src/smpi/colls/allgather/allgather-bruck.cpp
index 68edee294d..7040938b52 100644
--- a/src/smpi/colls/allgather/allgather-bruck.cpp
+++ b/src/smpi/colls/allgather/allgather-bruck.cpp
@@ -67,7 +67,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * Comment: Original bruck algorithm from MPICH is slightly modified by
  *          Ahmad Faraj.  
  ****************************************************************************/
-int smpi_coll_tuned_allgather_bruck(void *send_buff, int send_count,
+
+namespace simgrid{
+namespace smpi{
+
+
+
+int Coll_allgather_bruck::allgather(void *send_buff, int send_count,
                                     MPI_Datatype send_type, void *recv_buff,
                                     int recv_count, MPI_Datatype recv_type,
                                     MPI_Comm comm)
@@ -133,3 +139,7 @@ int smpi_coll_tuned_allgather_bruck(void *send_buff, int send_count,
   smpi_free_tmp_buffer(tmp_buff);
   return MPI_SUCCESS;
 }
+
+
+}
+}
diff --git a/src/smpi/colls/allgather/allgather-loosely-lr.cpp b/src/smpi/colls/allgather/allgather-loosely-lr.cpp
index 1d31885092..e7307433a2 100644
--- a/src/smpi/colls/allgather/allgather-loosely-lr.cpp
+++ b/src/smpi/colls/allgather/allgather-loosely-lr.cpp
@@ -6,7 +6,11 @@
 
 #include "../colls_private.h"
 
-int smpi_coll_tuned_allgather_loosely_lr(void *sbuf, int scount,
+namespace simgrid{
+namespace smpi{
+
+
+int Coll_allgather_loosely_lr::allgather(void *sbuf, int scount,
                                          MPI_Datatype stype, void *rbuf,
                                          int rcount, MPI_Datatype rtype,
                                          MPI_Comm comm)
@@ -131,3 +135,7 @@ if(comm->get_leaders_comm()==MPI_COMM_NULL){
 
   return MPI_SUCCESS;
 }
+
+
+}
+}
diff --git a/src/smpi/colls/allgather/allgather-mvapich-smp.cpp b/src/smpi/colls/allgather/allgather-mvapich-smp.cpp
index 32d4aea198..9c5f5b1b80 100644
--- a/src/smpi/colls/allgather/allgather-mvapich-smp.cpp
+++ b/src/smpi/colls/allgather/allgather-mvapich-smp.cpp
@@ -36,9 +36,9 @@
  */
  #include "../colls_private.h"
 
+using namespace simgrid::smpi;
 
-
-int smpi_coll_tuned_allgather_mvapich2_smp(void *sendbuf,int sendcnt, MPI_Datatype sendtype,
+int Coll_allgather_mvapich2_smp::allgather(void *sendbuf,int sendcnt, MPI_Datatype sendtype,
                             void *recvbuf, int recvcnt,MPI_Datatype recvtype,
                             MPI_Comm  comm)
 {
@@ -82,7 +82,7 @@ int smpi_coll_tuned_allgather_mvapich2_smp(void *sendbuf,int sendcnt, MPI_Dataty
     /*If there is just one node, after gather itself,
      * root has all the data and it can do bcast*/
     if(local_rank == 0) {
-        mpi_errno = mpi_coll_gather_fun(sendbuf, sendcnt,sendtype, 
+        mpi_errno = Colls::gather(sendbuf, sendcnt,sendtype, 
                                     (void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)), 
                                      recvcnt, recvtype,
                                      0, shmem_comm);
@@ -90,12 +90,12 @@ int smpi_coll_tuned_allgather_mvapich2_smp(void *sendbuf,int sendcnt, MPI_Dataty
         /*Since in allgather all the processes could have 
          * its own data in place*/
         if(sendbuf == MPI_IN_PLACE) {
-            mpi_errno = mpi_coll_gather_fun((void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)), 
+            mpi_errno = Colls::gather((void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)), 
                                          recvcnt , recvtype, 
                                          recvbuf, recvcnt, recvtype,
                                          0, shmem_comm);
         } else {
-            mpi_errno = mpi_coll_gather_fun(sendbuf, sendcnt,sendtype, 
+            mpi_errno = Colls::gather(sendbuf, sendcnt,sendtype, 
                                          recvbuf, recvcnt, recvtype,
                                          0, shmem_comm);
         }
@@ -128,7 +128,7 @@ int smpi_coll_tuned_allgather_mvapich2_smp(void *sendbuf,int sendcnt, MPI_Dataty
 
             void* sendbuf=((char*)recvbuf)+recvtype->get_extent()*displs[leader_comm->rank()];
 
-            mpi_errno = mpi_coll_allgatherv_fun(sendbuf,
+            mpi_errno = Colls::allgatherv(sendbuf,
                                        (recvcnt*local_size),
                                        recvtype, 
                                        recvbuf, recvcnts,
@@ -141,7 +141,7 @@ int smpi_coll_tuned_allgather_mvapich2_smp(void *sendbuf,int sendcnt, MPI_Dataty
         
           
 
-            mpi_errno = smpi_coll_tuned_allgather_mpich(sendtmpbuf, 
+            mpi_errno = Coll_allgather_mpich::allgather(sendtmpbuf, 
                                                (recvcnt*local_size),
                                                recvtype,
                                                recvbuf, (recvcnt*local_size), recvtype,
@@ -151,6 +151,6 @@ int smpi_coll_tuned_allgather_mvapich2_smp(void *sendbuf,int sendcnt, MPI_Dataty
     }
 
     /*Bcast the entire data from node leaders to all other cores*/
-    mpi_errno = mpi_coll_bcast_fun (recvbuf, recvcnt * size, recvtype, 0, shmem_comm);
+    mpi_errno = Colls::bcast (recvbuf, recvcnt * size, recvtype, 0, shmem_comm);
     return mpi_errno;
 }
diff --git a/src/smpi/colls/allgather/allgather-ompi-neighborexchange.cpp b/src/smpi/colls/allgather/allgather-ompi-neighborexchange.cpp
index d8f9e8d968..1bb0e53b35 100644
--- a/src/smpi/colls/allgather/allgather-ompi-neighborexchange.cpp
+++ b/src/smpi/colls/allgather/allgather-ompi-neighborexchange.cpp
@@ -63,8 +63,12 @@
  */
  
  #include "../colls_private.h"
+
+namespace simgrid{
+namespace smpi{
+
 int 
-smpi_coll_tuned_allgather_ompi_neighborexchange(void *sbuf, int scount,
+Coll_allgather_ompi_neighborexchange::allgather(void *sbuf, int scount,
                                                  MPI_Datatype sdtype,
                                                  void* rbuf, int rcount,
                                                  MPI_Datatype rdtype,
@@ -86,7 +90,7 @@ smpi_coll_tuned_allgather_ompi_neighborexchange(void *sbuf, int scount,
       XBT_DEBUG(
                    "coll:tuned:allgather_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm", 
                    size);
-      return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype,
+      return Coll_allgather_ring::allgather(sbuf, scount, sdtype,
                                                   rbuf, rcount, rdtype,
                                                   comm);
    }
@@ -178,3 +182,7 @@ smpi_coll_tuned_allgather_ompi_neighborexchange(void *sbuf, int scount,
                 __FILE__, line, err, rank);
    return err;
 }
+
+
+}
+}
diff --git a/src/smpi/colls/allgather/allgather-pair.cpp b/src/smpi/colls/allgather/allgather-pair.cpp
index cfec7c89fb..553378d68c 100644
--- a/src/smpi/colls/allgather/allgather-pair.cpp
+++ b/src/smpi/colls/allgather/allgather-pair.cpp
@@ -64,8 +64,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *         phases, nodes in pair communicate their data.
  * Auther: Ahmad Faraj
  ****************************************************************************/
+
+namespace simgrid{
+namespace smpi{
+
+
 int
-smpi_coll_tuned_allgather_pair(void *send_buff, int send_count,
+Coll_allgather_pair::allgather(void *send_buff, int send_count,
                                MPI_Datatype send_type, void *recv_buff,
                                int recv_count, MPI_Datatype recv_type,
                                MPI_Comm comm)
@@ -101,3 +106,7 @@ smpi_coll_tuned_allgather_pair(void *send_buff, int send_count,
 
   return MPI_SUCCESS;
 }
+
+
+}
+}
diff --git a/src/smpi/colls/allgather/allgather-rdb.cpp b/src/smpi/colls/allgather/allgather-rdb.cpp
index 8fb6efa6ef..6d2950aa42 100644
--- a/src/smpi/colls/allgather/allgather-rdb.cpp
+++ b/src/smpi/colls/allgather/allgather-rdb.cpp
@@ -6,8 +6,12 @@
 
 #include "../colls_private.h"
 
+namespace simgrid{
+namespace smpi{
+
+
 int
-smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count,
+Coll_allgather_rdb::allgather(void *sbuf, int send_count,
                               MPI_Datatype send_type, void *rbuf,
                               int recv_count, MPI_Datatype recv_type,
                               MPI_Comm comm)
@@ -127,3 +131,7 @@ smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count,
 
   return success;
 }
+
+
+}
+}
diff --git a/src/smpi/colls/allgather/allgather-rhv.cpp b/src/smpi/colls/allgather/allgather-rhv.cpp
index 8e7f44bd28..12a929db11 100644
--- a/src/smpi/colls/allgather/allgather-rhv.cpp
+++ b/src/smpi/colls/allgather/allgather-rhv.cpp
@@ -6,10 +6,14 @@
 
 #include "../colls_private.h"
 
+namespace simgrid{
+namespace smpi{
+
+
 // now only work with power of two processes
 
 int
-smpi_coll_tuned_allgather_rhv(void *sbuf, int send_count,
+Coll_allgather_rhv::allgather(void *sbuf, int send_count,
                               MPI_Datatype send_type, void *rbuf,
                               int recv_count, MPI_Datatype recv_type,
                               MPI_Comm comm)
@@ -42,7 +46,7 @@ smpi_coll_tuned_allgather_rhv(void *sbuf, int send_count,
 
   if (send_chunk != recv_chunk) {
     XBT_WARN("MPI_allgather_rhv use default MPI_allgather.");  
-    smpi_mpi_allgather(sbuf, send_count, send_type, rbuf, recv_count,
+    Coll_allgather_default::allgather(sbuf, send_count, send_type, rbuf, recv_count,
                               recv_type, comm);
     return MPI_SUCCESS;        
   }
@@ -103,3 +107,7 @@ smpi_coll_tuned_allgather_rhv(void *sbuf, int send_count,
 
   return MPI_SUCCESS;
 }
+
+
+}
+}
diff --git a/src/smpi/colls/allgather/allgather-ring.cpp b/src/smpi/colls/allgather/allgather-ring.cpp
index c638b25bd1..14ffc97288 100644
--- a/src/smpi/colls/allgather/allgather-ring.cpp
+++ b/src/smpi/colls/allgather/allgather-ring.cpp
@@ -63,8 +63,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * Descrp: Function works in P - 1 steps. In step i, node j - i -> j -> j+ i.
  * Auther: Ahmad Faraj
  ****************************************************************************/
+
+namespace simgrid{
+namespace smpi{
+
+
 int
-smpi_coll_tuned_allgather_ring(void *send_buff, int send_count,
+Coll_allgather_ring::allgather(void *send_buff, int send_count,
                                MPI_Datatype send_type, void *recv_buff,
                                int recv_count, MPI_Datatype recv_type,
                                MPI_Comm comm)
@@ -97,3 +102,7 @@ smpi_coll_tuned_allgather_ring(void *send_buff, int send_count,
 
   return MPI_SUCCESS;
 }
+
+
+}
+}
diff --git a/src/smpi/colls/allgather/allgather-smp-simple.cpp b/src/smpi/colls/allgather/allgather-smp-simple.cpp
index 8d7f190a86..38f1c5c45a 100644
--- a/src/smpi/colls/allgather/allgather-smp-simple.cpp
+++ b/src/smpi/colls/allgather/allgather-smp-simple.cpp
@@ -6,7 +6,11 @@
 
 #include "../colls_private.h"
 
-int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount,
+namespace simgrid{
+namespace smpi{
+
+
+int Coll_allgather_smp_simple::allgather(void *send_buf, int scount,
                                          MPI_Datatype stype, void *recv_buf,
                                          int rcount, MPI_Datatype rtype,
                                          MPI_Comm comm)
@@ -121,3 +125,7 @@ int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount,
 
   return MPI_SUCCESS;
 }
+
+
+}
+}
diff --git a/src/smpi/colls/allgather/allgather-spreading-simple.cpp b/src/smpi/colls/allgather/allgather-spreading-simple.cpp
index 539358f2f1..57d0046b79 100644
--- a/src/smpi/colls/allgather/allgather-spreading-simple.cpp
+++ b/src/smpi/colls/allgather/allgather-spreading-simple.cpp
@@ -66,8 +66,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Auther: Ahmad Faraj
  ****************************************************************************/
+
+namespace simgrid{
+namespace smpi{
+
+
 int
-smpi_coll_tuned_allgather_spreading_simple(void *send_buff, int send_count,
+Coll_allgather_spreading_simple::allgather(void *send_buff, int send_count,
                                            MPI_Datatype send_type,
                                            void *recv_buff, int recv_count,
                                            MPI_Datatype recv_type,
@@ -117,3 +122,6 @@ smpi_coll_tuned_allgather_spreading_simple(void *send_buff, int send_count,
 
   return MPI_SUCCESS;
 }
+
+}
+}
diff --git a/src/smpi/colls/allgatherv/allgatherv-GB.cpp b/src/smpi/colls/allgatherv/allgatherv-GB.cpp
index bd53450ae0..6c41685422 100644
--- a/src/smpi/colls/allgatherv/allgatherv-GB.cpp
+++ b/src/smpi/colls/allgatherv/allgatherv-GB.cpp
@@ -6,13 +6,16 @@
 
 #include "../colls_private.h"
 
+namespace simgrid{
+namespace smpi{
+
 // Allgather - gather/bcast algorithm
-int smpi_coll_tuned_allgatherv_GB(void *send_buff, int send_count,
+int Coll_allgatherv_GB::allgatherv(void *send_buff, int send_count,
                                  MPI_Datatype send_type, void *recv_buff,
                                  int *recv_counts, int *recv_disps, MPI_Datatype recv_type,
                                  MPI_Comm comm)
 {
-  smpi_mpi_gatherv(send_buff, send_count, send_type, recv_buff, recv_counts,
+  Colls::gatherv(send_buff, send_count, send_type, recv_buff, recv_counts,
 		   recv_disps, recv_type, 0, comm);
   int num_procs, i, current, max = 0;
   num_procs = comm->size();
@@ -21,7 +24,10 @@ int smpi_coll_tuned_allgatherv_GB(void *send_buff, int send_count,
     if (current > max)
       max = current;
   }
-  mpi_coll_bcast_fun(recv_buff, max, recv_type, 0, comm);
+  Colls::bcast(recv_buff, max, recv_type, 0, comm);
 
   return MPI_SUCCESS;
 }
+
+}
+}
diff --git a/src/smpi/colls/allgatherv/allgatherv-mpich-rdb.cpp b/src/smpi/colls/allgatherv/allgatherv-mpich-rdb.cpp
index c224fbd7d8..5d5f94537d 100644
--- a/src/smpi/colls/allgatherv/allgatherv-mpich-rdb.cpp
+++ b/src/smpi/colls/allgatherv/allgatherv-mpich-rdb.cpp
@@ -7,7 +7,11 @@
         /* Short or medium size message and power-of-two no. of processes. Use
          * recursive doubling algorithm */
 #include "../colls_private.h"
-int smpi_coll_tuned_allgatherv_mpich_rdb (
+
+namespace simgrid{
+namespace smpi{
+
+int Coll_allgatherv_mpich_rdb::allgatherv (
   void *sendbuf,
   int sendcount,
   MPI_Datatype sendtype,
@@ -214,3 +218,6 @@ int smpi_coll_tuned_allgatherv_mpich_rdb (
   smpi_free_tmp_buffer(tmp_buf_rl);
   return MPI_SUCCESS;
 }
+
+}
+}
diff --git a/src/smpi/colls/allgatherv/allgatherv-mpich-ring.cpp b/src/smpi/colls/allgatherv/allgatherv-mpich-ring.cpp
index 2deb294b6d..273bf6246e 100644
--- a/src/smpi/colls/allgatherv/allgatherv-mpich-ring.cpp
+++ b/src/smpi/colls/allgatherv/allgatherv-mpich-ring.cpp
@@ -24,8 +24,12 @@
  *   recv_type: data type of elements being received
  *   comm: communication
  ****************************************************************************/
+
+namespace simgrid{
+namespace smpi{
+
 int
-smpi_coll_tuned_allgatherv_mpich_ring(void *sendbuf, int sendcount,
+Coll_allgatherv_mpich_ring::allgatherv(void *sendbuf, int sendcount,
     MPI_Datatype send_type, void *recvbuf,
     int *recvcounts, int *displs, MPI_Datatype recvtype,
     MPI_Comm comm)
@@ -124,3 +128,6 @@ smpi_coll_tuned_allgatherv_mpich_ring(void *sendbuf, int sendcount,
 
   return MPI_SUCCESS;
 }
+
+}
+}
diff --git a/src/smpi/colls/allgatherv/allgatherv-ompi-bruck.cpp b/src/smpi/colls/allgatherv/allgatherv-ompi-bruck.cpp
index f93667533b..8d28d925d1 100644
--- a/src/smpi/colls/allgatherv/allgatherv-ompi-bruck.cpp
+++ b/src/smpi/colls/allgatherv/allgatherv-ompi-bruck.cpp
@@ -76,7 +76,11 @@
  *         [5]    [5]    [5]    [5]    [5]    [5]    [5]
  *         [6]    [6]    [6]    [6]    [6]    [6]    [6]
  */
-int smpi_coll_tuned_allgatherv_ompi_bruck(void *sbuf, int scount,
+
+namespace simgrid{
+namespace smpi{
+
+int Coll_allgatherv_ompi_bruck::allgatherv(void *sbuf, int scount,
                                            MPI_Datatype sdtype,
                                            void *rbuf, int *rcounts,
                                            int *rdispls, 
@@ -173,3 +177,6 @@ int smpi_coll_tuned_allgatherv_ompi_bruck(void *sbuf, int scount,
 
 }
 
+
+}
+}
diff --git a/src/smpi/colls/allgatherv/allgatherv-ompi-neighborexchange.cpp b/src/smpi/colls/allgatherv/allgatherv-ompi-neighborexchange.cpp
index 025c8b8779..c24e79e0bf 100644
--- a/src/smpi/colls/allgatherv/allgatherv-ompi-neighborexchange.cpp
+++ b/src/smpi/colls/allgatherv/allgatherv-ompi-neighborexchange.cpp
@@ -65,8 +65,11 @@
  
  #include "../colls_private.h"
  
+namespace simgrid{
+namespace smpi{
+
 int 
-smpi_coll_tuned_allgatherv_ompi_neighborexchange(void *sbuf, int scount,
+Coll_allgatherv_ompi_neighborexchange::allgatherv(void *sbuf, int scount,
                                                   MPI_Datatype sdtype,
                                                   void* rbuf, int *rcounts, int *rdispls,
                                                   MPI_Datatype rdtype,
@@ -89,7 +92,7 @@ smpi_coll_tuned_allgatherv_ompi_neighborexchange(void *sbuf, int scount,
         XBT_DEBUG(
                      "coll:tuned:allgatherv_ompi_neighborexchange WARNING: odd size %d, switching to ring algorithm", 
                      size);
-        return smpi_coll_tuned_allgatherv_ring(sbuf, scount, sdtype,
+        return Coll_allgatherv_ring::allgatherv(sbuf, scount, sdtype,
                                                      rbuf, rcounts, 
                                                      rdispls, rdtype,
                                                      comm);
@@ -216,3 +219,6 @@ smpi_coll_tuned_allgatherv_ompi_neighborexchange(void *sbuf, int scount,
                  __FILE__, line, err, rank);
     return err;
 }
+
+}
+}
diff --git a/src/smpi/colls/allgatherv/allgatherv-pair.cpp b/src/smpi/colls/allgatherv/allgatherv-pair.cpp
index 438c8634b4..1f6756c53d 100644
--- a/src/smpi/colls/allgatherv/allgatherv-pair.cpp
+++ b/src/smpi/colls/allgatherv/allgatherv-pair.cpp
@@ -64,8 +64,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *         phases, nodes in pair communicate their data.
  * Auther: Ahmad Faraj
  ****************************************************************************/
+
+namespace simgrid{
+namespace smpi{
+
 int
-smpi_coll_tuned_allgatherv_pair(void *send_buff, int send_count,
+Coll_allgatherv_pair::allgatherv(void *send_buff, int send_count,
                                MPI_Datatype send_type, void *recv_buff,
                                int *recv_counts, int *recv_disps, MPI_Datatype recv_type,
                                MPI_Comm comm)
@@ -100,3 +104,6 @@ smpi_coll_tuned_allgatherv_pair(void *send_buff, int send_count,
 
   return MPI_SUCCESS;
 }
+
+}
+}
diff --git a/src/smpi/colls/allgatherv/allgatherv-ring.cpp b/src/smpi/colls/allgatherv/allgatherv-ring.cpp
index 7d7927ec7b..4712c557be 100644
--- a/src/smpi/colls/allgatherv/allgatherv-ring.cpp
+++ b/src/smpi/colls/allgatherv/allgatherv-ring.cpp
@@ -63,8 +63,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * Descrp: Function works in P - 1 steps. In step i, node j - i -> j -> j+ i.
  * Auther: Ahmad Faraj
  ****************************************************************************/
+
+namespace simgrid{
+namespace smpi{
+
 int
-smpi_coll_tuned_allgatherv_ring(void *send_buff, int send_count,
+Coll_allgatherv_ring::allgatherv(void *send_buff, int send_count,
                                MPI_Datatype send_type, void *recv_buff,
                                int *recv_counts, int *recv_disps, MPI_Datatype recv_type,
                                MPI_Comm comm)
@@ -98,3 +102,6 @@ smpi_coll_tuned_allgatherv_ring(void *send_buff, int send_count,
 
   return MPI_SUCCESS;
 }
+
+}
+}
diff --git a/src/smpi/colls/allreduce/allreduce-lr.cpp b/src/smpi/colls/allreduce/allreduce-lr.cpp
index ab847ee411..5ff1b88ff5 100644
--- a/src/smpi/colls/allreduce/allreduce-lr.cpp
+++ b/src/smpi/colls/allreduce/allreduce-lr.cpp
@@ -20,7 +20,7 @@
 //#include <star-reduction.c>
 
 int
-smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount,
+Coll_allreduce_lr::allreduce(void *sbuf, void *rbuf, int rcount,
                              MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
 {
   int tag = COLL_TAG_ALLREDUCE;
@@ -39,7 +39,7 @@ smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount,
   /* when communication size is smaller than number of process (not support) */
   if (rcount < size) {
     XBT_WARN("MPI_allreduce_lr use default MPI_allreduce.");	  
-    smpi_mpi_allreduce(sbuf, rbuf, rcount, dtype, op, comm);
+    Coll_allreduce_default::allreduce(sbuf, rbuf, rcount, dtype, op, comm);
     return MPI_SUCCESS; 
   }
 
@@ -97,7 +97,7 @@ smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount,
   /* when communication size is not divisible by number of process: 
      call the native implementation for the remain chunk at the end of the operation */
   if (remainder_flag) {
-    return mpi_coll_allreduce_fun((char *) sbuf + remainder_offset,
+    return Colls::allreduce((char *) sbuf + remainder_offset,
                          (char *) rbuf + remainder_offset, remainder, dtype, op,
                          comm);
   }
diff --git a/src/smpi/colls/allreduce/allreduce-mvapich-rs.cpp b/src/smpi/colls/allreduce/allreduce-mvapich-rs.cpp
index e8789a11d9..c94bd90758 100644
--- a/src/smpi/colls/allreduce/allreduce-mvapich-rs.cpp
+++ b/src/smpi/colls/allreduce/allreduce-mvapich-rs.cpp
@@ -23,7 +23,7 @@
 
 #include "../colls_private.h"
 
-int smpi_coll_tuned_allreduce_mvapich2_rs(void *sendbuf,
+int Coll_allreduce_mvapich2_rs::allreduce(void *sendbuf,
                             void *recvbuf,
                             int count,
                             MPI_Datatype datatype,
diff --git a/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp b/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp
index d2a2264c82..123de2ce44 100644
--- a/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp
+++ b/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp
@@ -37,10 +37,10 @@
 
 #include "../colls_private.h"
 
-#define MPIR_Allreduce_pt2pt_rd_MV2 smpi_coll_tuned_allreduce_rdb
-#define MPIR_Allreduce_pt2pt_rs_MV2 smpi_coll_tuned_allreduce_mvapich2_rs
+#define MPIR_Allreduce_pt2pt_rd_MV2 Coll_allreduce_rdb::allreduce
+#define MPIR_Allreduce_pt2pt_rs_MV2 Coll_allreduce_mvapich2_rs::allreduce
 
-extern int (*MV2_Allreduce_function)(void *sendbuf,
+extern int (*MV2_Allreducection)(void *sendbuf,
     void *recvbuf,
     int count,
     MPI_Datatype datatype,
@@ -59,7 +59,7 @@ static  int MPIR_Allreduce_reduce_p2p_MV2( void *sendbuf,
     MPI_Datatype datatype,
     MPI_Op op, MPI_Comm  comm)
 {
-  mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm);
+  Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm);
   return MPI_SUCCESS;
 }
 
@@ -69,13 +69,13 @@ static  int MPIR_Allreduce_reduce_shmem_MV2( void *sendbuf,
     MPI_Datatype datatype,
     MPI_Op op, MPI_Comm  comm)
 {
-  mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm);
+  Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm);
   return MPI_SUCCESS;
 }
     
     
 /* general two level allreduce helper function */
-int smpi_coll_tuned_allreduce_mvapich2_two_level(void *sendbuf,
+int Coll_allreduce_mvapich2_two_level::allreduce(void *sendbuf,
                              void *recvbuf,
                              int count,
                              MPI_Datatype datatype,
@@ -89,9 +89,9 @@ int smpi_coll_tuned_allreduce_mvapich2_two_level(void *sendbuf,
 
     //if not set (use of the algo directly, without mvapich2 selector)
     if(MV2_Allreduce_intra_function==NULL)
-      MV2_Allreduce_intra_function = smpi_coll_tuned_allreduce_mpich;
-    if(MV2_Allreduce_function==NULL)
-      MV2_Allreduce_function = smpi_coll_tuned_allreduce_rdb;
+      MV2_Allreduce_intra_function = Coll_allreduce_mpich::allreduce;
+    if(MV2_Allreducection==NULL)
+      MV2_Allreducection = Coll_allreduce_rdb::allreduce;
     
     if(comm->get_leaders_comm()==MPI_COMM_NULL){
       comm->init_smp();
@@ -135,7 +135,7 @@ int smpi_coll_tuned_allreduce_mvapich2_two_level(void *sendbuf,
           void* sendtmpbuf = (char *)smpi_get_tmp_sendbuffer(count*datatype->get_extent());
           Datatype::copy(recvbuf, count, datatype,sendtmpbuf, count, datatype);
             /* inter-node allreduce */
-            if(MV2_Allreduce_function == &MPIR_Allreduce_pt2pt_rd_MV2){
+            if(MV2_Allreducection == &MPIR_Allreduce_pt2pt_rd_MV2){
                 mpi_errno =
                     MPIR_Allreduce_pt2pt_rd_MV2(sendtmpbuf, recvbuf, count, datatype, op,
                                       leader_comm);
@@ -163,7 +163,7 @@ int smpi_coll_tuned_allreduce_mvapich2_two_level(void *sendbuf,
 
     /* Broadcasting the mesage from leader to the rest */
     /* Note: shared memory broadcast could improve the performance */
-    mpi_errno = mpi_coll_bcast_fun(recvbuf, count, datatype, 0, shmem_comm);
+    mpi_errno = Colls::bcast(recvbuf, count, datatype, 0, shmem_comm);
 
     return (mpi_errno);
 
diff --git a/src/smpi/colls/allreduce/allreduce-ompi-ring-segmented.cpp b/src/smpi/colls/allreduce/allreduce-ompi-ring-segmented.cpp
index 595a189ff8..8a88359791 100644
--- a/src/smpi/colls/allreduce/allreduce-ompi-ring-segmented.cpp
+++ b/src/smpi/colls/allreduce/allreduce-ompi-ring-segmented.cpp
@@ -155,7 +155,7 @@
 
 #include "../colls_private.h"
 int 
-smpi_coll_tuned_allreduce_ompi_ring_segmented(void *sbuf, void *rbuf, int count,
+Coll_allreduce_ompi_ring_segmented::allreduce(void *sbuf, void *rbuf, int count,
                                                MPI_Datatype dtype,
                                                MPI_Op op,
                                                MPI_Comm comm) 
@@ -202,7 +202,7 @@ smpi_coll_tuned_allreduce_ompi_ring_segmented(void *sbuf, void *rbuf, int count,
    /* Special case for count less than size * segcount - use regular ring */
    if (count < size * segcount) {
       XBT_DEBUG( "coll:tuned:allreduce_ring_segmented rank %d/%d, count %d, switching to regular ring", rank, size, count);
-      return (smpi_coll_tuned_allreduce_lr(sbuf, rbuf, count, dtype, op, 
+      return (Coll_allreduce_lr::allreduce(sbuf, rbuf, count, dtype, op, 
                                                    comm));
    }
 
diff --git a/src/smpi/colls/allreduce/allreduce-rab-rdb.cpp b/src/smpi/colls/allreduce/allreduce-rab-rdb.cpp
index f72175bd09..def46333ee 100644
--- a/src/smpi/colls/allreduce/allreduce-rab-rdb.cpp
+++ b/src/smpi/colls/allreduce/allreduce-rab-rdb.cpp
@@ -6,7 +6,7 @@
 
 #include "../colls_private.h"
 
-int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count,
+int Coll_allreduce_rab_rdb::allreduce(void *sbuff, void *rbuff, int count,
                                       MPI_Datatype dtype, MPI_Op op,
                                       MPI_Comm comm)
 {
diff --git a/src/smpi/colls/allreduce/allreduce-rab1.cpp b/src/smpi/colls/allreduce/allreduce-rab1.cpp
index 96c71055af..120abebf46 100644
--- a/src/smpi/colls/allreduce/allreduce-rab1.cpp
+++ b/src/smpi/colls/allreduce/allreduce-rab1.cpp
@@ -6,9 +6,10 @@
 
 #include "../colls_private.h"
 //#include <star-reduction.c>
+using namespace simgrid::smpi;
 
 // NP pow of 2 for now
-int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff,
+int Coll_allreduce_rab1::allreduce(void *sbuff, void *rbuff,
                                    int count, MPI_Datatype dtype,
                                    MPI_Op op, MPI_Comm comm)
 {
@@ -68,7 +69,7 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff,
     }
 
     memcpy(tmp_buf, (char *) recv + recv_idx * extent, recv_cnt * extent);
-    mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm);
+    Colls::allgather(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm);
 
     memcpy(rbuff, recv, count * extent);
     smpi_free_tmp_buffer(recv);
@@ -102,7 +103,7 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff,
     }
 
     memcpy(tmp_buf, (char *) rbuff + recv_idx * extent, recv_cnt * extent);
-    mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm);
+    Colls::allgather(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm);
     smpi_free_tmp_buffer(tmp_buf);
   }
 
diff --git a/src/smpi/colls/allreduce/allreduce-rab2.cpp b/src/smpi/colls/allreduce/allreduce-rab2.cpp
index 007a42b842..ac1e98f505 100644
--- a/src/smpi/colls/allreduce/allreduce-rab2.cpp
+++ b/src/smpi/colls/allreduce/allreduce-rab2.cpp
@@ -6,9 +6,10 @@
 
 #include "../colls_private.h"
 //#include <star-reduction.c>
+using namespace simgrid::smpi;
 
 // this requires that count >= NP
-int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff,
+int Coll_allreduce_rab2::allreduce(void *sbuff, void *rbuff,
                                    int count, MPI_Datatype dtype,
                                    MPI_Op op, MPI_Comm comm)
 {
@@ -46,14 +47,14 @@ int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff,
 
     memcpy(send, sbuff, s_extent * count);
 
-    mpi_coll_alltoall_fun(send, send_size, dtype, recv, send_size, dtype, comm);
+    Colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm);
 
     memcpy(tmp, recv, nbytes);
 
     for (i = 1, s_offset = nbytes; i < nprocs; i++, s_offset = i * nbytes)
       if(op!=MPI_OP_NULL) op->apply( (char *) recv + s_offset, tmp, &send_size, dtype);
 
-    mpi_coll_allgather_fun(tmp, send_size, dtype, recv, send_size, dtype, comm);
+    Colls::allgather(tmp, send_size, dtype, recv, send_size, dtype, comm);
     memcpy(rbuff, recv, count * s_extent);
 
     smpi_free_tmp_buffer(recv);
@@ -67,7 +68,7 @@ int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff,
 
     recv = (void *) smpi_get_tmp_recvbuffer(s_extent * send_size * nprocs);
 
-    mpi_coll_alltoall_fun(send, send_size, dtype, recv, send_size, dtype, comm);
+    Colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm);
 
     memcpy((char *) rbuff + r_offset, recv, nbytes);
 
@@ -75,7 +76,7 @@ int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff,
       if(op!=MPI_OP_NULL) op->apply( (char *) recv + s_offset, (char *) rbuff + r_offset,
                      &send_size, dtype);
 
-    mpi_coll_allgather_fun((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size,
+    Colls::allgather((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size,
                   dtype, comm);
     smpi_free_tmp_buffer(recv);
   }
diff --git a/src/smpi/colls/allreduce/allreduce-rdb.cpp b/src/smpi/colls/allreduce/allreduce-rdb.cpp
index cb99c01b54..82fc3fe9c8 100644
--- a/src/smpi/colls/allreduce/allreduce-rdb.cpp
+++ b/src/smpi/colls/allreduce/allreduce-rdb.cpp
@@ -7,7 +7,7 @@
 #include "../colls_private.h"
 //#include <star-reduction.c>
 
-int smpi_coll_tuned_allreduce_rdb(void *sbuff, void *rbuff, int count,
+int Coll_allreduce_rdb::allreduce(void *sbuff, void *rbuff, int count,
                                   MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
 {
   int nprocs, rank, tag = COLL_TAG_ALLREDUCE;
diff --git a/src/smpi/colls/allreduce/allreduce-redbcast.cpp b/src/smpi/colls/allreduce/allreduce-redbcast.cpp
index 26b9585300..35e40f17d9 100644
--- a/src/smpi/colls/allreduce/allreduce-redbcast.cpp
+++ b/src/smpi/colls/allreduce/allreduce-redbcast.cpp
@@ -6,11 +6,11 @@
 
 #include "../colls_private.h"
 
-int smpi_coll_tuned_allreduce_redbcast(void *buf, void *buf2, int count,
+int Coll_allreduce_redbcast::allreduce(void *buf, void *buf2, int count,
                                        MPI_Datatype datatype, MPI_Op op,
                                        MPI_Comm comm)
 {
-  mpi_coll_reduce_fun(buf, buf2, count, datatype, op, 0, comm);
-  mpi_coll_bcast_fun(buf2, count, datatype, 0, comm);
+  Colls::reduce(buf, buf2, count, datatype, op, 0, comm);
+  Colls::bcast(buf2, count, datatype, 0, comm);
   return MPI_SUCCESS;
 }
diff --git a/src/smpi/colls/allreduce/allreduce-smp-binomial-pipeline.cpp b/src/smpi/colls/allreduce/allreduce-smp-binomial-pipeline.cpp
index 9484f2652f..9ac2c82281 100644
--- a/src/smpi/colls/allreduce/allreduce-smp-binomial-pipeline.cpp
+++ b/src/smpi/colls/allreduce/allreduce-smp-binomial-pipeline.cpp
@@ -38,7 +38,7 @@ This fucntion performs all-reduce operation as follow. ** in a pipeline fashion
 3) binomial_tree bcast intra-communication between root of each SMP node
 4) binomial_tree bcast inside each SMP node
 */
-int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf,
+int Coll_allreduce_smp_binomial_pipeline::allreduce(void *send_buf,
                                                     void *recv_buf, int count,
                                                     MPI_Datatype dtype,
                                                     MPI_Op op, MPI_Comm comm)
diff --git a/src/smpi/colls/allreduce/allreduce-smp-binomial.cpp b/src/smpi/colls/allreduce/allreduce-smp-binomial.cpp
index 2545da91f6..7b93bf60ab 100644
--- a/src/smpi/colls/allreduce/allreduce-smp-binomial.cpp
+++ b/src/smpi/colls/allreduce/allreduce-smp-binomial.cpp
@@ -27,7 +27,7 @@ This fucntion performs all-reduce operation as follow.
 3) binomial_tree bcast intra-communication between root of each SMP node
 4) binomial_tree bcast inside each SMP node
 */
-int smpi_coll_tuned_allreduce_smp_binomial(void *send_buf, void *recv_buf,
+int Coll_allreduce_smp_binomial::allreduce(void *send_buf, void *recv_buf,
                                            int count, MPI_Datatype dtype,
                                            MPI_Op op, MPI_Comm comm)
 {
diff --git a/src/smpi/colls/allreduce/allreduce-smp-rdb.cpp b/src/smpi/colls/allreduce/allreduce-smp-rdb.cpp
index a5b40f1707..285d354b26 100644
--- a/src/smpi/colls/allreduce/allreduce-smp-rdb.cpp
+++ b/src/smpi/colls/allreduce/allreduce-smp-rdb.cpp
@@ -26,7 +26,7 @@ This fucntion performs all-reduce operation as follow.
 2) Recursive doubling intra-communication between root of each SMP node
 3) binomial_tree bcast inside each SMP node
 */
-int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count,
+int Coll_allreduce_smp_rdb::allreduce(void *send_buf, void *recv_buf, int count,
                                       MPI_Datatype dtype, MPI_Op op,
                                       MPI_Comm comm)
 {
diff --git a/src/smpi/colls/allreduce/allreduce-smp-rsag-lr.cpp b/src/smpi/colls/allreduce/allreduce-smp-rsag-lr.cpp
index 23abf0fde2..7e62a835ee 100644
--- a/src/smpi/colls/allreduce/allreduce-smp-rsag-lr.cpp
+++ b/src/smpi/colls/allreduce/allreduce-smp-rsag-lr.cpp
@@ -14,7 +14,7 @@ This fucntion performs all-reduce operation as follow.
 3) allgather - inter between root of each SMP node
 4) binomial_tree bcast inside each SMP node
 */
-int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf,
+int Coll_allreduce_smp_rsag_lr::allreduce(void *send_buf, void *recv_buf,
                                           int count, MPI_Datatype dtype,
                                           MPI_Op op, MPI_Comm comm)
 {
diff --git a/src/smpi/colls/allreduce/allreduce-smp-rsag-rab.cpp b/src/smpi/colls/allreduce/allreduce-smp-rsag-rab.cpp
index 021279555e..53cde44a07 100644
--- a/src/smpi/colls/allreduce/allreduce-smp-rsag-rab.cpp
+++ b/src/smpi/colls/allreduce/allreduce-smp-rsag-rab.cpp
@@ -18,7 +18,7 @@ This fucntion performs all-reduce operation as follow.
 3) allgather - inter between root of each SMP node
 4) binomial_tree bcast inside each SMP node
 */
-int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count,
+int Coll_allreduce_smp_rsag_rab::allreduce(void *sbuf, void *rbuf, int count,
                                            MPI_Datatype dtype, MPI_Op op,
                                            MPI_Comm comm)
 {
diff --git a/src/smpi/colls/allreduce/allreduce-smp-rsag.cpp b/src/smpi/colls/allreduce/allreduce-smp-rsag.cpp
index 4cb5d31b03..f5f5b75bf1 100644
--- a/src/smpi/colls/allreduce/allreduce-smp-rsag.cpp
+++ b/src/smpi/colls/allreduce/allreduce-smp-rsag.cpp
@@ -13,7 +13,7 @@ This fucntion performs all-reduce operation as follow.
 3) allgather - inter between root of each SMP node
 4) binomial_tree bcast inside each SMP node
 */
-int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf,
+int Coll_allreduce_smp_rsag::allreduce(void *send_buf, void *recv_buf,
                                        int count, MPI_Datatype dtype, MPI_Op op,
                                        MPI_Comm comm)
 {
diff --git a/src/smpi/colls/alltoall/alltoall-2dmesh.cpp b/src/smpi/colls/alltoall/alltoall-2dmesh.cpp
index ca0702735e..bfc1bc455b 100644
--- a/src/smpi/colls/alltoall/alltoall-2dmesh.cpp
+++ b/src/smpi/colls/alltoall/alltoall-2dmesh.cpp
@@ -53,7 +53,7 @@ static int alltoall_check_is_2dmesh(int num, int *i, int *j)
   return 0;
 }
 
-int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count,
+int Coll_alltoall_2dmesh::alltoall(void *send_buff, int send_count,
                                     MPI_Datatype send_type,
                                     void *recv_buff, int recv_count,
                                     MPI_Datatype recv_type, MPI_Comm comm)
diff --git a/src/smpi/colls/alltoall/alltoall-3dmesh.cpp b/src/smpi/colls/alltoall/alltoall-3dmesh.cpp
index 742998e92c..85415517f4 100644
--- a/src/smpi/colls/alltoall/alltoall-3dmesh.cpp
+++ b/src/smpi/colls/alltoall/alltoall-3dmesh.cpp
@@ -46,7 +46,7 @@ static int alltoall_check_is_3dmesh(int num, int *i, int *j, int *k)
   return 0;
 }
 
-int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count,
+int Coll_alltoall_3dmesh::alltoall(void *send_buff, int send_count,
                                     MPI_Datatype send_type,
                                     void *recv_buff, int recv_count,
                                     MPI_Datatype recv_type, MPI_Comm comm)
diff --git a/src/smpi/colls/alltoall/alltoall-basic-linear.cpp b/src/smpi/colls/alltoall/alltoall-basic-linear.cpp
new file mode 100644
index 0000000000..723c5b6121
--- /dev/null
+++ b/src/smpi/colls/alltoall/alltoall-basic-linear.cpp
@@ -0,0 +1,68 @@
+/* Copyright (c) 2013-2017. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+ #include "../colls_private.h"
+ 
+/*Naive and simple basic alltoall implementation. */
+
+
+namespace simgrid{
+namespace smpi{
+
+
+int Coll_alltoall_basic_linear::alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                                          void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
+{
+  int system_tag = 888;
+  int i;
+  int count;
+  MPI_Aint lb = 0, sendext = 0, recvext = 0;
+  MPI_Request *requests;
+
+  /* Initialize. */
+  int rank = comm->rank();
+  int size = comm->size();
+  XBT_DEBUG("<%d> algorithm alltoall_basic_linear() called.", rank);
+  sendtype->extent(&lb, &sendext);
+  recvtype->extent(&lb, &recvext);
+  /* simple optimization */
+  int err = Datatype::copy(static_cast<char *>(sendbuf) + rank * sendcount * sendext, sendcount, sendtype,
+                               static_cast<char *>(recvbuf) + rank * recvcount * recvext, recvcount, recvtype);
+  if (err == MPI_SUCCESS && size > 1) {
+    /* Initiate all send/recv to/from others. */
+    requests = xbt_new(MPI_Request, 2 * (size - 1));
+    /* Post all receives first -- a simple optimization */
+    count = 0;
+    for (i = (rank + 1) % size; i != rank; i = (i + 1) % size) {
+      requests[count] = Request::irecv_init(static_cast<char *>(recvbuf) + i * recvcount * recvext, recvcount,
+                                        recvtype, i, system_tag, comm);
+      count++;
+    }
+    /* Now post all sends in reverse order
+     *   - We would like to minimize the search time through message queue
+     *     when messages actually arrive in the order in which they were posted.
+     * TODO: check the previous assertion
+     */
+    for (i = (rank + size - 1) % size; i != rank; i = (i + size - 1) % size) {
+      requests[count] = Request::isend_init(static_cast<char *>(sendbuf) + i * sendcount * sendext, sendcount,
+                                        sendtype, i, system_tag, comm);
+      count++;
+    }
+    /* Wait for them all. */
+    Request::startall(count, requests);
+    XBT_DEBUG("<%d> wait for %d requests", rank, count);
+    Request::waitall(count, requests, MPI_STATUS_IGNORE);
+    for(i = 0; i < count; i++) {
+      if(requests[i]!=MPI_REQUEST_NULL)
+        Request::unref(&requests[i]);
+    }
+    xbt_free(requests);
+  }
+  return err;
+}
+
+}
+}
diff --git a/src/smpi/colls/alltoall/alltoall-bruck.cpp b/src/smpi/colls/alltoall/alltoall-bruck.cpp
index 9c0e30cd2a..fa262b7ab2 100644
--- a/src/smpi/colls/alltoall/alltoall-bruck.cpp
+++ b/src/smpi/colls/alltoall/alltoall-bruck.cpp
@@ -24,8 +24,15 @@
  * Auther: MPICH / modified by Ahmad Faraj
 
  ****************************************************************************/
+
+#include "../colls_private.h"
+
+namespace simgrid{
+namespace smpi{
+
+
 int
-smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count,
+Coll_alltoall_bruck::alltoall(void *send_buff, int send_count,
                                MPI_Datatype send_type, void *recv_buff,
                                int recv_count, MPI_Datatype recv_type,
                                MPI_Comm comm)
@@ -35,7 +42,7 @@ smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count,
   MPI_Datatype new_type;
 
   int *blocks_length, *disps;
-  int i, src, dst, rank, num_procs, count, remainder, block, position;
+  int i, src, dst, rank, num_procs, count, block, position;
   int pack_size, tag = COLL_TAG_ALLTOALL, pof2 = 1;
 
 
@@ -79,7 +86,7 @@ smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count,
       }
 
     MPI_Type_indexed(count, blocks_length, disps, recv_type, &new_type);
-    smpi_datatype_commit(&new_type);
+    new_type->commit();
 
     position = 0;
     MPI_Pack(recv_buff, 1, new_type, tmp_buff, pack_size, &position, comm);
@@ -113,3 +120,6 @@ smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count,
   smpi_free_tmp_buffer(tmp_buff);
   return MPI_SUCCESS;
 }
+
+}
+}
diff --git a/src/smpi/colls/alltoall/alltoall-mvapich-scatter-dest.cpp b/src/smpi/colls/alltoall/alltoall-mvapich-scatter-dest.cpp
index 047c876779..e812ea33d1 100644
--- a/src/smpi/colls/alltoall/alltoall-mvapich-scatter-dest.cpp
+++ b/src/smpi/colls/alltoall/alltoall-mvapich-scatter-dest.cpp
@@ -43,7 +43,7 @@
  
 #include "../colls_private.h"
 
-int smpi_coll_tuned_alltoall_mvapich2_scatter_dest(
+int Coll_alltoall_mvapich2_scatter_dest::alltoall(
                             void *sendbuf,
                             int sendcount,
                             MPI_Datatype sendtype,
diff --git a/src/smpi/colls/alltoall/alltoall-pair-light-barrier.cpp b/src/smpi/colls/alltoall/alltoall-pair-light-barrier.cpp
index 6b5f3e8d39..2299e719d6 100644
--- a/src/smpi/colls/alltoall/alltoall-pair-light-barrier.cpp
+++ b/src/smpi/colls/alltoall/alltoall-pair-light-barrier.cpp
@@ -28,7 +28,7 @@
 
  ****************************************************************************/
 int
-smpi_coll_tuned_alltoall_pair_light_barrier(void *send_buff, int send_count,
+Coll_alltoall_pair_light_barrier::alltoall(void *send_buff, int send_count,
                                             MPI_Datatype send_type,
                                             void *recv_buff, int recv_count,
                                             MPI_Datatype recv_type,
diff --git a/src/smpi/colls/alltoall/alltoall-pair-mpi-barrier.cpp b/src/smpi/colls/alltoall/alltoall-pair-mpi-barrier.cpp
index 2493ca433e..9c04f878f3 100644
--- a/src/smpi/colls/alltoall/alltoall-pair-mpi-barrier.cpp
+++ b/src/smpi/colls/alltoall/alltoall-pair-mpi-barrier.cpp
@@ -28,7 +28,7 @@
 
  ****************************************************************************/
 int
-smpi_coll_tuned_alltoall_pair_mpi_barrier(void *send_buff, int send_count,
+Coll_alltoall_pair_mpi_barrier::alltoall(void *send_buff, int send_count,
                                           MPI_Datatype send_type,
                                           void *recv_buff, int recv_count,
                                           MPI_Datatype recv_type, MPI_Comm comm)
@@ -54,7 +54,7 @@ smpi_coll_tuned_alltoall_pair_mpi_barrier(void *send_buff, int send_count,
 
   for (i = 0; i < num_procs; i++) {
     src = dst = rank ^ i;
-    mpi_coll_barrier_fun(comm);
+    Colls::barrier(comm);
     Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
                  tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
                  src, tag, comm, &s);
diff --git a/src/smpi/colls/alltoall/alltoall-pair-one-barrier.cpp b/src/smpi/colls/alltoall/alltoall-pair-one-barrier.cpp
index 6a12962759..c18406e5ef 100644
--- a/src/smpi/colls/alltoall/alltoall-pair-one-barrier.cpp
+++ b/src/smpi/colls/alltoall/alltoall-pair-one-barrier.cpp
@@ -27,7 +27,7 @@
 
  ****************************************************************************/
 int
-smpi_coll_tuned_alltoall_pair_one_barrier(void *send_buff, int send_count,
+Coll_alltoall_pair_one_barrier::alltoall(void *send_buff, int send_count,
                                           MPI_Datatype send_type,
                                           void *recv_buff, int recv_count,
                                           MPI_Datatype recv_type, MPI_Comm comm)
@@ -53,7 +53,7 @@ smpi_coll_tuned_alltoall_pair_one_barrier(void *send_buff, int send_count,
   send_chunk *= send_count;
   recv_chunk *= recv_count;
 
-  mpi_coll_barrier_fun(comm);
+  Colls::barrier(comm);
   for (i = 0; i < num_procs; i++) {
     src = dst = rank ^ i;
     Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
diff --git a/src/smpi/colls/alltoall/alltoall-pair.cpp b/src/smpi/colls/alltoall/alltoall-pair.cpp
index 4d2956af3d..1f1f5ef9ba 100644
--- a/src/smpi/colls/alltoall/alltoall-pair.cpp
+++ b/src/smpi/colls/alltoall/alltoall-pair.cpp
@@ -28,7 +28,7 @@
 
  ****************************************************************************/
 
-int smpi_coll_tuned_alltoall_pair_rma(void *send_buff, int send_count, MPI_Datatype send_type,
+int Coll_alltoall_pair_rma::alltoall(void *send_buff, int send_count, MPI_Datatype send_type,
                   void *recv_buff, int recv_count, MPI_Datatype recv_type,
                   MPI_Comm comm)
 {
@@ -62,7 +62,7 @@ int smpi_coll_tuned_alltoall_pair_rma(void *send_buff, int send_count, MPI_Datat
 }
 
 
-int smpi_coll_tuned_alltoall_pair(void *send_buff, int send_count,
+int Coll_alltoall_pair::alltoall(void *send_buff, int send_count,
                                   MPI_Datatype send_type,
                                   void *recv_buff, int recv_count,
                                   MPI_Datatype recv_type, MPI_Comm comm)
diff --git a/src/smpi/colls/alltoall/alltoall-rdb.cpp b/src/smpi/colls/alltoall/alltoall-rdb.cpp
index 92a0722e7d..f41ded7adf 100644
--- a/src/smpi/colls/alltoall/alltoall-rdb.cpp
+++ b/src/smpi/colls/alltoall/alltoall-rdb.cpp
@@ -27,7 +27,7 @@
  * Auther: MPICH / slightly modified by Ahmad Faraj.  
 
  ****************************************************************************/
-int smpi_coll_tuned_alltoall_rdb(void *send_buff, int send_count,
+int Coll_alltoall_rdb::alltoall(void *send_buff, int send_count,
                                  MPI_Datatype send_type,
                                  void *recv_buff, int recv_count,
                                  MPI_Datatype recv_type, MPI_Comm comm)
diff --git a/src/smpi/colls/alltoall/alltoall-ring-light-barrier.cpp b/src/smpi/colls/alltoall/alltoall-ring-light-barrier.cpp
index f670da3b63..5292eeff23 100644
--- a/src/smpi/colls/alltoall/alltoall-ring-light-barrier.cpp
+++ b/src/smpi/colls/alltoall/alltoall-ring-light-barrier.cpp
@@ -28,7 +28,7 @@
 
  ****************************************************************************/
 int
-smpi_coll_tuned_alltoall_ring_light_barrier(void *send_buff, int send_count,
+Coll_alltoall_ring_light_barrier::alltoall(void *send_buff, int send_count,
                                             MPI_Datatype send_type,
                                             void *recv_buff, int recv_count,
                                             MPI_Datatype recv_type,
diff --git a/src/smpi/colls/alltoall/alltoall-ring-mpi-barrier.cpp b/src/smpi/colls/alltoall/alltoall-ring-mpi-barrier.cpp
index aefe70fe5b..9145378770 100644
--- a/src/smpi/colls/alltoall/alltoall-ring-mpi-barrier.cpp
+++ b/src/smpi/colls/alltoall/alltoall-ring-mpi-barrier.cpp
@@ -27,7 +27,7 @@
 
  ****************************************************************************/
 int
-smpi_coll_tuned_alltoall_ring_mpi_barrier(void *send_buff, int send_count,
+Coll_alltoall_ring_mpi_barrier::alltoall(void *send_buff, int send_count,
                                           MPI_Datatype send_type,
                                           void *recv_buff, int recv_count,
                                           MPI_Datatype recv_type, MPI_Comm comm)
@@ -52,7 +52,7 @@ smpi_coll_tuned_alltoall_ring_mpi_barrier(void *send_buff, int send_count,
     src = (rank - i + num_procs) % num_procs;
     dst = (rank + i) % num_procs;
 
-    mpi_coll_barrier_fun(comm);
+    Colls::barrier(comm);
     Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
                  tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
                  src, tag, comm, &s);
diff --git a/src/smpi/colls/alltoall/alltoall-ring-one-barrier.cpp b/src/smpi/colls/alltoall/alltoall-ring-one-barrier.cpp
index 7ad212aa86..7f168b9dab 100644
--- a/src/smpi/colls/alltoall/alltoall-ring-one-barrier.cpp
+++ b/src/smpi/colls/alltoall/alltoall-ring-one-barrier.cpp
@@ -26,7 +26,7 @@
 
  ****************************************************************************/
 int
-smpi_coll_tuned_alltoall_ring_one_barrier(void *send_buff, int send_count,
+Coll_alltoall_ring_one_barrier::alltoall(void *send_buff, int send_count,
                                           MPI_Datatype send_type,
                                           void *recv_buff, int recv_count,
                                           MPI_Datatype recv_type, MPI_Comm comm)
@@ -47,7 +47,7 @@ smpi_coll_tuned_alltoall_ring_one_barrier(void *send_buff, int send_count,
   send_chunk *= send_count;
   recv_chunk *= recv_count;
 
-  mpi_coll_barrier_fun(comm);
+  Colls::barrier(comm);
   for (i = 0; i < num_procs; i++) {
     src = (rank - i + num_procs) % num_procs;
     dst = (rank + i) % num_procs;
diff --git a/src/smpi/colls/alltoall/alltoall-ring.cpp b/src/smpi/colls/alltoall/alltoall-ring.cpp
index dea762e39b..9029408630 100644
--- a/src/smpi/colls/alltoall/alltoall-ring.cpp
+++ b/src/smpi/colls/alltoall/alltoall-ring.cpp
@@ -26,7 +26,7 @@
 
  ****************************************************************************/
 int
-smpi_coll_tuned_alltoall_ring(void *send_buff, int send_count,
+Coll_alltoall_ring::alltoall(void *send_buff, int send_count,
                               MPI_Datatype send_type, void *recv_buff,
                               int recv_count, MPI_Datatype recv_type,
                               MPI_Comm comm)
diff --git a/src/smpi/colls/alltoallv/alltoallv-bruck.cpp b/src/smpi/colls/alltoallv/alltoallv-bruck.cpp
index 3a6f9e5a03..56d750d7e4 100644
--- a/src/smpi/colls/alltoallv/alltoallv-bruck.cpp
+++ b/src/smpi/colls/alltoallv/alltoallv-bruck.cpp
@@ -13,7 +13,7 @@
  * FIXME: uh, check smpi_pmpi again, but this routine is called for > 12, not
  * less...
  **/
-int smpi_coll_tuned_alltoallv_bruck(void *sendbuf, int *sendcounts, int *senddisps,
+int Coll_alltoallv_bruck::alltoallv(void *sendbuf, int *sendcounts, int *senddisps,
                                    MPI_Datatype sendtype, void *recvbuf,
                                    int *recvcounts, int *recvdisps, MPI_Datatype recvtype,
                                    MPI_Comm comm)
@@ -80,7 +80,7 @@ int smpi_coll_tuned_alltoallv_bruck(void *sendbuf, int *sendcounts, int *senddis
               count++;
             }
             /* Wait for them all. */
-            //smpi_mpi_startall(count, requests);
+            //Colls::startall(count, requests);
             XBT_DEBUG("<%d> wait for %d requests", rank, count);
             Request::waitall(count, requests, MPI_STATUSES_IGNORE);
             xbt_free(requests);
diff --git a/src/smpi/colls/alltoallv/alltoallv-ompi-basic-linear.cpp b/src/smpi/colls/alltoallv/alltoallv-ompi-basic-linear.cpp
index 0dfa76eb26..0ef5fde063 100644
--- a/src/smpi/colls/alltoallv/alltoallv-ompi-basic-linear.cpp
+++ b/src/smpi/colls/alltoallv/alltoallv-ompi-basic-linear.cpp
@@ -15,7 +15,7 @@
  * GEF Oct05 after asking Jeff.  
  */
 int
-smpi_coll_tuned_alltoallv_ompi_basic_linear(void *sbuf, int *scounts, int *sdisps,
+Coll_alltoallv_ompi_basic_linear::alltoallv(void *sbuf, int *scounts, int *sdisps,
                                             MPI_Datatype sdtype,
                                             void *rbuf, int *rcounts, int *rdisps,
                                             MPI_Datatype rdtype,
diff --git a/src/smpi/colls/alltoallv/alltoallv-pair-light-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-pair-light-barrier.cpp
index 13b0396291..4159fdea7c 100644
--- a/src/smpi/colls/alltoallv/alltoallv-pair-light-barrier.cpp
+++ b/src/smpi/colls/alltoallv/alltoallv-pair-light-barrier.cpp
@@ -28,7 +28,7 @@
 
  ****************************************************************************/
 int
-smpi_coll_tuned_alltoallv_pair_light_barrier(void *send_buff, int *send_counts, int *send_disps,
+Coll_alltoallv_pair_light_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps,
                                             MPI_Datatype send_type,
                                             void *recv_buff, int *recv_counts, int *recv_disps,
                                             MPI_Datatype recv_type,
diff --git a/src/smpi/colls/alltoallv/alltoallv-pair-mpi-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-pair-mpi-barrier.cpp
index a24b0fb392..76b88726db 100644
--- a/src/smpi/colls/alltoallv/alltoallv-pair-mpi-barrier.cpp
+++ b/src/smpi/colls/alltoallv/alltoallv-pair-mpi-barrier.cpp
@@ -28,7 +28,7 @@
 
  ****************************************************************************/
 int
-smpi_coll_tuned_alltoallv_pair_mpi_barrier(void *send_buff, int *send_counts, int *send_disps,
+Coll_alltoallv_pair_mpi_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps,
                                           MPI_Datatype send_type,
                                           void *recv_buff, int *recv_counts, int *recv_disps,
                                           MPI_Datatype recv_type, MPI_Comm comm)
@@ -51,7 +51,7 @@ smpi_coll_tuned_alltoallv_pair_mpi_barrier(void *send_buff, int *send_counts, in
 
   for (i = 0; i < num_procs; i++) {
     src = dst = rank ^ i;
-    smpi_mpi_barrier(comm);
+    Colls::barrier(comm);
     Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst,
                  tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type,
                  src, tag, comm, &s);
diff --git a/src/smpi/colls/alltoallv/alltoallv-pair-one-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-pair-one-barrier.cpp
index 2118658225..77187552ec 100644
--- a/src/smpi/colls/alltoallv/alltoallv-pair-one-barrier.cpp
+++ b/src/smpi/colls/alltoallv/alltoallv-pair-one-barrier.cpp
@@ -27,7 +27,7 @@
 
  ****************************************************************************/
 int
-smpi_coll_tuned_alltoallv_pair_one_barrier(void *send_buff, int *send_counts, int *send_disps,
+Coll_alltoallv_pair_one_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps,
                                           MPI_Datatype send_type,
                                           void *recv_buff,  int *recv_counts, int *recv_disps,                                                                                  MPI_Datatype recv_type, MPI_Comm comm)
 {
@@ -49,7 +49,7 @@ smpi_coll_tuned_alltoallv_pair_one_barrier(void *send_buff, int *send_counts, in
   send_chunk = send_type->get_extent();
   recv_chunk = recv_type->get_extent();
 
-  smpi_mpi_barrier(comm);
+  Colls::barrier(comm);
   for (i = 0; i < num_procs; i++) {
     src = dst = rank ^ i;
     Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst,
diff --git a/src/smpi/colls/alltoallv/alltoallv-pair.cpp b/src/smpi/colls/alltoallv/alltoallv-pair.cpp
index c8e276c4a0..b2ebef67f0 100644
--- a/src/smpi/colls/alltoallv/alltoallv-pair.cpp
+++ b/src/smpi/colls/alltoallv/alltoallv-pair.cpp
@@ -27,7 +27,7 @@
  * Auther: Ahmad Faraj
 
  ****************************************************************************/
-int smpi_coll_tuned_alltoallv_pair(void *send_buff, int *send_counts, int *send_disps,
+int Coll_alltoallv_pair::alltoallv(void *send_buff, int *send_counts, int *send_disps,
                                   MPI_Datatype send_type,
                                   void *recv_buff, int *recv_counts, int *recv_disps,
                                   MPI_Datatype recv_type, MPI_Comm comm)
diff --git a/src/smpi/colls/alltoallv/alltoallv-ring-light-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-ring-light-barrier.cpp
index eb9ef77798..076771ae17 100644
--- a/src/smpi/colls/alltoallv/alltoallv-ring-light-barrier.cpp
+++ b/src/smpi/colls/alltoallv/alltoallv-ring-light-barrier.cpp
@@ -28,7 +28,7 @@
 
  ****************************************************************************/
 int
-smpi_coll_tuned_alltoallv_ring_light_barrier(void *send_buff, int *send_counts, int *send_disps,
+Coll_alltoallv_ring_light_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps,
                                             MPI_Datatype send_type,
                                             void *recv_buff, int *recv_counts, int *recv_disps,
                                             MPI_Datatype recv_type,
diff --git a/src/smpi/colls/alltoallv/alltoallv-ring-mpi-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-ring-mpi-barrier.cpp
index 840a9d5e0d..c4d616927f 100644
--- a/src/smpi/colls/alltoallv/alltoallv-ring-mpi-barrier.cpp
+++ b/src/smpi/colls/alltoallv/alltoallv-ring-mpi-barrier.cpp
@@ -27,7 +27,7 @@
 
  ****************************************************************************/
 int
-smpi_coll_tuned_alltoallv_ring_mpi_barrier(void *send_buff, int *send_counts, int *send_disps,
+Coll_alltoallv_ring_mpi_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps,
                                           MPI_Datatype send_type,
                                           void *recv_buff, int *recv_counts, int *recv_disps,
                                           MPI_Datatype recv_type, MPI_Comm comm)
@@ -49,7 +49,7 @@ smpi_coll_tuned_alltoallv_ring_mpi_barrier(void *send_buff, int *send_counts, in
     src = (rank - i + num_procs) % num_procs;
     dst = (rank + i) % num_procs;
 
-    smpi_mpi_barrier(comm);
+    Colls::barrier(comm);
     Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst,
                  tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type,
                  src, tag, comm, &s);
diff --git a/src/smpi/colls/alltoallv/alltoallv-ring-one-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-ring-one-barrier.cpp
index 1bb68f7fe1..a73f029cf9 100644
--- a/src/smpi/colls/alltoallv/alltoallv-ring-one-barrier.cpp
+++ b/src/smpi/colls/alltoallv/alltoallv-ring-one-barrier.cpp
@@ -26,7 +26,7 @@
 
  ****************************************************************************/
 int
-smpi_coll_tuned_alltoallv_ring_one_barrier(void *send_buff, int *send_counts, int *send_disps,
+Coll_alltoallv_ring_one_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps,
                                           MPI_Datatype send_type,
                                           void *recv_buff, int *recv_counts, int *recv_disps,
                                           MPI_Datatype recv_type, MPI_Comm comm)
@@ -44,7 +44,7 @@ smpi_coll_tuned_alltoallv_ring_one_barrier(void *send_buff, int *send_counts, in
   send_chunk = send_type->get_extent();
   recv_chunk = recv_type->get_extent();
 
-  smpi_mpi_barrier(comm);
+  Colls::barrier(comm);
   for (i = 0; i < num_procs; i++) {
     src = (rank - i + num_procs) % num_procs;
     dst = (rank + i) % num_procs;
diff --git a/src/smpi/colls/alltoallv/alltoallv-ring.cpp b/src/smpi/colls/alltoallv/alltoallv-ring.cpp
index 4ac63179aa..09efe30da5 100644
--- a/src/smpi/colls/alltoallv/alltoallv-ring.cpp
+++ b/src/smpi/colls/alltoallv/alltoallv-ring.cpp
@@ -26,7 +26,7 @@
 
  ****************************************************************************/
 int
-smpi_coll_tuned_alltoallv_ring(void *send_buff, int *send_counts, int *send_disps,
+Coll_alltoallv_ring::alltoallv(void *send_buff, int *send_counts, int *send_disps,
 	                      MPI_Datatype send_type,
 			      void *recv_buff, int *recv_counts, int *recv_disps, 
 			      MPI_Datatype recv_type,
diff --git a/src/smpi/colls/barrier/barrier-mvapich2-pair.cpp b/src/smpi/colls/barrier/barrier-mvapich2-pair.cpp
index bdcb4456d0..51bc6b6f04 100644
--- a/src/smpi/colls/barrier/barrier-mvapich2-pair.cpp
+++ b/src/smpi/colls/barrier/barrier-mvapich2-pair.cpp
@@ -42,7 +42,7 @@
 #include "../colls_private.h"
 #include "../coll_tuned_topo.h"
 
-int smpi_coll_tuned_barrier_mvapich2_pair(MPI_Comm comm)
+int Coll_barrier_mvapich2_pair::barrier(MPI_Comm comm)
 {
 
     int size, rank;
diff --git a/src/smpi/colls/barrier/barrier-ompi.cpp b/src/smpi/colls/barrier/barrier-ompi.cpp
index 2b08bdf772..f57b2307ed 100644
--- a/src/smpi/colls/barrier/barrier-ompi.cpp
+++ b/src/smpi/colls/barrier/barrier-ompi.cpp
@@ -43,7 +43,7 @@
  * synchronous gurantee made by last ring of sends are synchronous
  *
  */
-int smpi_coll_tuned_barrier_ompi_doublering(MPI_Comm comm
+int Coll_barrier_ompi_doublering::barrier(MPI_Comm comm
 					     )
 {
     int rank, size;
@@ -104,7 +104,7 @@ int smpi_coll_tuned_barrier_ompi_doublering(MPI_Comm comm
  * To make synchronous, uses sync sends and sync sendrecvs
  */
 
-int smpi_coll_tuned_barrier_ompi_recursivedoubling(MPI_Comm comm
+int Coll_barrier_ompi_recursivedoubling::barrier(MPI_Comm comm
 						    )
 {
     int rank, size, adjsize;
@@ -179,7 +179,7 @@ int smpi_coll_tuned_barrier_ompi_recursivedoubling(MPI_Comm comm
  * To make synchronous, uses sync sends and sync sendrecvs
  */
 
-int smpi_coll_tuned_barrier_ompi_bruck(MPI_Comm comm
+int Coll_barrier_ompi_bruck::barrier(MPI_Comm comm
 					)
 {
     int rank, size;
@@ -212,7 +212,7 @@ int smpi_coll_tuned_barrier_ompi_bruck(MPI_Comm comm
  * To make synchronous, uses sync sends and sync sendrecvs
  */
 /* special case for two processes */
-int smpi_coll_tuned_barrier_ompi_two_procs(MPI_Comm comm
+int Coll_barrier_ompi_two_procs::barrier(MPI_Comm comm
 					    )
 {
     int remote;
@@ -245,7 +245,7 @@ int smpi_coll_tuned_barrier_ompi_two_procs(MPI_Comm comm
 
 /* copied function (with appropriate renaming) starts here */
 
-int smpi_coll_tuned_barrier_ompi_basic_linear(MPI_Comm comm)
+int Coll_barrier_ompi_basic_linear::barrier(MPI_Comm comm)
 {
     int i;
     int size = comm->size();
@@ -297,7 +297,7 @@ int smpi_coll_tuned_barrier_ompi_basic_linear(MPI_Comm comm)
  * Another recursive doubling type algorithm, but in this case
  * we go up the tree and back down the tree.  
  */
-int smpi_coll_tuned_barrier_ompi_tree(MPI_Comm comm)
+int Coll_barrier_ompi_tree::barrier(MPI_Comm comm)
 {
     int rank, size, depth;
     int jump, partner;
diff --git a/src/smpi/colls/bcast/bcast-NTSB.cpp b/src/smpi/colls/bcast/bcast-NTSB.cpp
index 74225c12d3..e7746da2cd 100644
--- a/src/smpi/colls/bcast/bcast-NTSB.cpp
+++ b/src/smpi/colls/bcast/bcast-NTSB.cpp
@@ -8,7 +8,7 @@
 
 int bcast_NTSB_segment_size_in_byte = 8192;
 
-int smpi_coll_tuned_bcast_NTSB(void *buf, int count, MPI_Datatype datatype,
+int Coll_bcast_NTSB::bcast(void *buf, int count, MPI_Datatype datatype,
                                int root, MPI_Comm comm)
 {
   int tag = COLL_TAG_BCAST;
@@ -177,7 +177,7 @@ int smpi_coll_tuned_bcast_NTSB(void *buf, int count, MPI_Datatype datatype,
   /* when count is not divisible by block size, use default BCAST for the remainder */
   if ((remainder != 0) && (count > segment)) {
     XBT_WARN("MPI_bcast_NTSB use default MPI_bcast.");	  	  
-    smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+    Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype,
               root, comm);
   }
 
diff --git a/src/smpi/colls/bcast/bcast-NTSL-Isend.cpp b/src/smpi/colls/bcast/bcast-NTSL-Isend.cpp
index 78cb092044..ee37369f75 100644
--- a/src/smpi/colls/bcast/bcast-NTSL-Isend.cpp
+++ b/src/smpi/colls/bcast/bcast-NTSL-Isend.cpp
@@ -11,7 +11,7 @@ static int bcast_NTSL_segment_size_in_byte = 8192;
 /* Non-topology-specific pipelined linear-bcast function 
    0->1, 1->2 ,2->3, ....., ->last node : in a pipeline fashion
 */
-int smpi_coll_tuned_bcast_NTSL_Isend(void *buf, int count, MPI_Datatype datatype,
+int Coll_bcast_NTSL_Isend::bcast(void *buf, int count, MPI_Datatype datatype,
                                int root, MPI_Comm comm)
 {
   int tag = COLL_TAG_BCAST;
@@ -124,7 +124,7 @@ int smpi_coll_tuned_bcast_NTSL_Isend(void *buf, int count, MPI_Datatype datatype
   /* when count is not divisible by block size, use default BCAST for the remainder */
   if ((remainder != 0) && (count > segment)) {
     XBT_WARN("MPI_bcast_NTSL_Isend_nb use default MPI_bcast.");	  	  
-    smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+    Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype,
               root, comm);
   }
 
diff --git a/src/smpi/colls/bcast/bcast-NTSL.cpp b/src/smpi/colls/bcast/bcast-NTSL.cpp
index 740efeb704..587e8da7d7 100644
--- a/src/smpi/colls/bcast/bcast-NTSL.cpp
+++ b/src/smpi/colls/bcast/bcast-NTSL.cpp
@@ -11,7 +11,7 @@ static int bcast_NTSL_segment_size_in_byte = 8192;
 /* Non-topology-specific pipelined linear-bcast function 
    0->1, 1->2 ,2->3, ....., ->last node : in a pipeline fashion
 */
-int smpi_coll_tuned_bcast_NTSL(void *buf, int count, MPI_Datatype datatype,
+int Coll_bcast_NTSL::bcast(void *buf, int count, MPI_Datatype datatype,
                                int root, MPI_Comm comm)
 {
   int tag = COLL_TAG_BCAST;
@@ -124,7 +124,7 @@ int smpi_coll_tuned_bcast_NTSL(void *buf, int count, MPI_Datatype datatype,
   /* when count is not divisible by block size, use default BCAST for the remainder */
   if ((remainder != 0) && (count > segment)) {
     XBT_WARN("MPI_bcast_arrival_NTSL use default MPI_bcast.");
-    smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+    Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype,
               root, comm);
   }
 
diff --git a/src/smpi/colls/bcast/bcast-SMP-binary.cpp b/src/smpi/colls/bcast/bcast-SMP-binary.cpp
index fa9cf451ae..02971613fd 100644
--- a/src/smpi/colls/bcast/bcast-SMP-binary.cpp
+++ b/src/smpi/colls/bcast/bcast-SMP-binary.cpp
@@ -9,7 +9,7 @@
 
 int bcast_SMP_binary_segment_byte = 8192;
 
-int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count,
+int Coll_bcast_SMP_binary::bcast(void *buf, int count,
                                      MPI_Datatype datatype, int root,
                                      MPI_Comm comm)
 {
@@ -33,7 +33,7 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count,
     host_num_core = comm->get_intra_comm()->size();
   }else{
     //implementation buggy in this case
-    return smpi_coll_tuned_bcast_mpich( buf , count, datatype,
+    return Coll_bcast_mpich::bcast( buf , count, datatype,
               root, comm);
   }
 
@@ -222,7 +222,7 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count,
   // when count is not divisible by block size, use default BCAST for the remainder
   if ((remainder != 0) && (count > segment)) {
     XBT_WARN("MPI_bcast_SMP_binary use default MPI_bcast.");	  
-    smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+    Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype,
               root, comm);
   }
 
diff --git a/src/smpi/colls/bcast/bcast-SMP-binomial.cpp b/src/smpi/colls/bcast/bcast-SMP-binomial.cpp
index 5ac33b6cd5..6dfed6a42f 100644
--- a/src/smpi/colls/bcast/bcast-SMP-binomial.cpp
+++ b/src/smpi/colls/bcast/bcast-SMP-binomial.cpp
@@ -6,7 +6,7 @@
 
 #include "../colls_private.h"
 
-int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count,
+int Coll_bcast_SMP_binomial::bcast(void *buf, int count,
                                        MPI_Datatype datatype, int root,
                                        MPI_Comm comm)
 {
@@ -27,7 +27,7 @@ int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count,
     num_core = comm->get_intra_comm()->size();
   }else{
     //implementation buggy in this case
-    return smpi_coll_tuned_bcast_mpich( buf , count, datatype,
+    return Coll_bcast_mpich::bcast( buf , count, datatype,
               root, comm);
   }
 
diff --git a/src/smpi/colls/bcast/bcast-SMP-linear.cpp b/src/smpi/colls/bcast/bcast-SMP-linear.cpp
index 356c53ac2a..2b3aee4023 100644
--- a/src/smpi/colls/bcast/bcast-SMP-linear.cpp
+++ b/src/smpi/colls/bcast/bcast-SMP-linear.cpp
@@ -8,7 +8,7 @@
 
 int bcast_SMP_linear_segment_byte = 8192;
 
-int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count,
+int Coll_bcast_SMP_linear::bcast(void *buf, int count,
                                      MPI_Datatype datatype, int root,
                                      MPI_Comm comm)
 {
@@ -32,7 +32,7 @@ int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count,
     num_core = comm->get_intra_comm()->size();
   }else{
     //implementation buggy in this case
-    return smpi_coll_tuned_bcast_mpich( buf , count, datatype,
+    return Coll_bcast_mpich::bcast( buf , count, datatype,
               root, comm);
   }
 
@@ -53,7 +53,7 @@ int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count,
   // call native when MPI communication size is too small
   if (size <= num_core) {
     XBT_WARN("MPI_bcast_SMP_linear use default MPI_bcast.");	  	  
-    smpi_mpi_bcast(buf, count, datatype, root, comm);
+    Coll_bcast_default::bcast(buf, count, datatype, root, comm);
     return MPI_SUCCESS;            
   }
   // if root is not zero send to rank zero first
@@ -170,7 +170,7 @@ int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count,
   // when count is not divisible by block size, use default BCAST for the remainder
   if ((remainder != 0) && (count > segment)) {
     XBT_WARN("MPI_bcast_SMP_linear use default MPI_bcast.");	  	  	 
-    smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+    Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype,
               root, comm);
   }
 
diff --git a/src/smpi/colls/bcast/bcast-arrival-pattern-aware-wait.cpp b/src/smpi/colls/bcast/bcast-arrival-pattern-aware-wait.cpp
index b070bef4e8..5baa7cf3e9 100644
--- a/src/smpi/colls/bcast/bcast-arrival-pattern-aware-wait.cpp
+++ b/src/smpi/colls/bcast/bcast-arrival-pattern-aware-wait.cpp
@@ -17,7 +17,7 @@ int bcast_arrival_pattern_aware_wait_segment_size_in_byte = 8192;
 #endif
 
 /* Non-topology-specific pipelined linear-bcast function */
-int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count,
+int Coll_bcast_arrival_pattern_aware_wait::bcast(void *buf, int count,
                                                      MPI_Datatype datatype,
                                                      int root, MPI_Comm comm)
 {
@@ -247,7 +247,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count,
   /* when count is not divisible by block size, use default BCAST for the remainder */
   if ((remainder != 0) && (count > segment)) {
     XBT_WARN("MPI_bcast_arrival_pattern_aware_wait use default MPI_bcast.");	  	  
-    smpi_mpi_bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
+    Colls::bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
   }
 
   return MPI_SUCCESS;
diff --git a/src/smpi/colls/bcast/bcast-arrival-pattern-aware.cpp b/src/smpi/colls/bcast/bcast-arrival-pattern-aware.cpp
index 81eb7ae73f..9138d9290d 100644
--- a/src/smpi/colls/bcast/bcast-arrival-pattern-aware.cpp
+++ b/src/smpi/colls/bcast/bcast-arrival-pattern-aware.cpp
@@ -12,7 +12,7 @@ static int bcast_NTSL_segment_size_in_byte = 8192;
 #define MAX_NODE 1024
 
 /* Non-topology-specific pipelined linear-bcast function */
-int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count,
+int Coll_bcast_arrival_pattern_aware::bcast(void *buf, int count,
                                                 MPI_Datatype datatype, int root,
                                                 MPI_Comm comm)
 {
@@ -357,7 +357,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count,
   /* when count is not divisible by block size, use default BCAST for the remainder */
   if ((remainder != 0) && (count > segment)) {
     XBT_WARN("MPI_bcast_arrival_pattern_aware use default MPI_bcast.");	  
-    smpi_mpi_bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
+    Colls::bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
   }
 
   return MPI_SUCCESS;
diff --git a/src/smpi/colls/bcast/bcast-arrival-scatter.cpp b/src/smpi/colls/bcast/bcast-arrival-scatter.cpp
index 2f0ab213c1..578b0f988a 100644
--- a/src/smpi/colls/bcast/bcast-arrival-scatter.cpp
+++ b/src/smpi/colls/bcast/bcast-arrival-scatter.cpp
@@ -15,7 +15,7 @@
 #endif
 
 /* Non-topology-specific pipelined linear-bcast function */
-int smpi_coll_tuned_bcast_arrival_scatter(void *buf, int count,
+int Coll_bcast_arrival_scatter::bcast(void *buf, int count,
                                           MPI_Datatype datatype, int root,
                                           MPI_Comm comm)
 {
@@ -59,7 +59,7 @@ int smpi_coll_tuned_bcast_arrival_scatter(void *buf, int count,
   /* message too small */
   if (count < size) {
     XBT_WARN("MPI_bcast_arrival_scatter use default MPI_bcast.");
-    smpi_mpi_bcast(buf, count, datatype, root, comm);
+    Colls::bcast(buf, count, datatype, root, comm);
     return MPI_SUCCESS;        
   }
 
diff --git a/src/smpi/colls/bcast/bcast-binomial-tree.cpp b/src/smpi/colls/bcast/bcast-binomial-tree.cpp
index 0ab2538705..d73682e7fd 100644
--- a/src/smpi/colls/bcast/bcast-binomial-tree.cpp
+++ b/src/smpi/colls/bcast/bcast-binomial-tree.cpp
@@ -69,7 +69,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  ****************************************************************************/
 
 int
-smpi_coll_tuned_bcast_binomial_tree(void *buff, int count,
+Coll_bcast_binomial_tree::bcast(void *buff, int count,
                                     MPI_Datatype data_type, int root,
                                     MPI_Comm comm)
 {
diff --git a/src/smpi/colls/bcast/bcast-flattree-pipeline.cpp b/src/smpi/colls/bcast/bcast-flattree-pipeline.cpp
index bac37d28dd..3f283e91bd 100644
--- a/src/smpi/colls/bcast/bcast-flattree-pipeline.cpp
+++ b/src/smpi/colls/bcast/bcast-flattree-pipeline.cpp
@@ -9,7 +9,7 @@
 int flattree_segment_in_byte = 8192;
 
 int
-smpi_coll_tuned_bcast_flattree_pipeline(void *buff, int count,
+Coll_bcast_flattree_pipeline::bcast(void *buff, int count,
                                         MPI_Datatype data_type, int root,
                                         MPI_Comm comm)
 {
@@ -25,7 +25,7 @@ smpi_coll_tuned_bcast_flattree_pipeline(void *buff, int count,
   int increment = segment * extent;
   if (pipe_length==0) {
     XBT_WARN("MPI_bcast_flattree_pipeline use default MPI_bcast_flattree.");
-    return smpi_coll_tuned_bcast_flattree(buff, count, data_type, root, comm);
+    return Coll_bcast_flattree::bcast(buff, count, data_type, root, comm);
   }
   rank = comm->rank();
   num_procs = comm->size();
diff --git a/src/smpi/colls/bcast/bcast-flattree.cpp b/src/smpi/colls/bcast/bcast-flattree.cpp
index 2a307f5f17..b77a1864fa 100644
--- a/src/smpi/colls/bcast/bcast-flattree.cpp
+++ b/src/smpi/colls/bcast/bcast-flattree.cpp
@@ -7,7 +7,7 @@
 #include "../colls_private.h"
 
 int
-smpi_coll_tuned_bcast_flattree(void *buff, int count, MPI_Datatype data_type,
+Coll_bcast_flattree::bcast(void *buff, int count, MPI_Datatype data_type,
                                int root, MPI_Comm comm)
 {
   MPI_Request *req_ptr;
diff --git a/src/smpi/colls/bcast/bcast-mvapich-smp.cpp b/src/smpi/colls/bcast/bcast-mvapich-smp.cpp
index af641acf40..73e5f60d61 100644
--- a/src/smpi/colls/bcast/bcast-mvapich-smp.cpp
+++ b/src/smpi/colls/bcast/bcast-mvapich-smp.cpp
@@ -51,17 +51,17 @@ extern int mv2_intra_node_knomial_factor;
 extern int mv2_bcast_two_level_system_size;
 #define INTRA_NODE_ROOT 0
 
-#define MPIR_Pipelined_Bcast_Zcpy_MV2 smpi_coll_tuned_bcast_mpich
-#define MPIR_Pipelined_Bcast_MV2 smpi_coll_tuned_bcast_mpich
-#define MPIR_Bcast_binomial_MV2 smpi_coll_tuned_bcast_binomial_tree
-#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather
-#define MPIR_Bcast_scatter_doubling_allgather_MV2 smpi_coll_tuned_bcast_scatter_rdb_allgather
-#define MPIR_Bcast_scatter_ring_allgather_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather
-#define MPIR_Shmem_Bcast_MV2 smpi_coll_tuned_bcast_mpich
-#define MPIR_Bcast_tune_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node
-#define MPIR_Bcast_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node
-#define MPIR_Knomial_Bcast_intra_node_MV2 smpi_coll_tuned_bcast_mvapich2_knomial_intra_node
-#define MPIR_Bcast_intra_MV2 smpi_coll_tuned_bcast_mvapich2_intra_node
+#define MPIR_Pipelined_Bcast_Zcpy_MV2 Coll_bcast_mpich::bcast
+#define MPIR_Pipelined_Bcast_MV2 Coll_bcast_mpich::bcast
+#define MPIR_Bcast_binomial_MV2 Coll_bcast_binomial_tree::bcast
+#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 Coll_bcast_scatter_LR_allgather::bcast
+#define MPIR_Bcast_scatter_doubling_allgather_MV2 Coll_bcast_scatter_rdb_allgather::bcast
+#define MPIR_Bcast_scatter_ring_allgather_MV2 Coll_bcast_scatter_LR_allgather::bcast
+#define MPIR_Shmem_Bcast_MV2 Coll_bcast_mpich::bcast
+#define MPIR_Bcast_tune_inter_node_helper_MV2 Coll_bcast_mvapich2_inter_node::bcast
+#define MPIR_Bcast_inter_node_helper_MV2 Coll_bcast_mvapich2_inter_node::bcast
+#define MPIR_Knomial_Bcast_intra_node_MV2 Coll_bcast_mvapich2_knomial_intra_node::bcast
+#define MPIR_Bcast_intra_MV2 Coll_bcast_mvapich2_intra_node::bcast
 
 extern int zcpy_knomial_factor;
 extern int mv2_pipelined_zcpy_knomial_factor;
@@ -73,7 +73,7 @@ extern int mv2_intra_node_knomial_factor;
 #define mv2_bcast_large_msg            512*1024
 #define mv2_knomial_intra_node_threshold 131072
 #define mv2_scatter_rd_inter_leader_bcast 1
-int smpi_coll_tuned_bcast_mvapich2_inter_node(void *buffer,
+int Coll_bcast_mvapich2_inter_node::bcast(void *buffer,
                                                  int count,
                                                  MPI_Datatype datatype,
                                                  int root,
@@ -91,11 +91,11 @@ int smpi_coll_tuned_bcast_mvapich2_inter_node(void *buffer,
 
 
     if (MV2_Bcast_function==NULL){
-      MV2_Bcast_function=smpi_coll_tuned_bcast_mpich;
+      MV2_Bcast_function=Coll_bcast_mpich::bcast;
     }
     
     if (MV2_Bcast_intra_node_function==NULL){
-      MV2_Bcast_intra_node_function= smpi_coll_tuned_bcast_mpich;
+      MV2_Bcast_intra_node_function= Coll_bcast_mpich::bcast;
     }
     
     if(comm->get_leaders_comm()==MPI_COMM_NULL){
@@ -168,7 +168,7 @@ int smpi_coll_tuned_bcast_mvapich2_inter_node(void *buffer,
 }
 
 
-int smpi_coll_tuned_bcast_mvapich2_knomial_intra_node(void *buffer,
+int Coll_bcast_mvapich2_knomial_intra_node::bcast(void *buffer,
                                       int count,
                                       MPI_Datatype datatype,
                                       int root, MPI_Comm  comm)
@@ -180,11 +180,11 @@ int smpi_coll_tuned_bcast_mvapich2_knomial_intra_node(void *buffer,
     int src, dst, mask, relative_rank;
     int k;
     if (MV2_Bcast_function==NULL){
-      MV2_Bcast_function=smpi_coll_tuned_bcast_mpich;
+      MV2_Bcast_function=Coll_bcast_mpich::bcast;
     }
     
     if (MV2_Bcast_intra_node_function==NULL){
-      MV2_Bcast_intra_node_function= smpi_coll_tuned_bcast_mpich;
+      MV2_Bcast_intra_node_function= Coll_bcast_mpich::bcast;
     }
     
     if(comm->get_leaders_comm()==MPI_COMM_NULL){
@@ -244,7 +244,7 @@ int smpi_coll_tuned_bcast_mvapich2_knomial_intra_node(void *buffer,
 }
 
 
-int smpi_coll_tuned_bcast_mvapich2_intra_node(void *buffer,
+int Coll_bcast_mvapich2_intra_node::bcast(void *buffer,
                          int count,
                          MPI_Datatype datatype,
                          int root, MPI_Comm  comm)
@@ -261,11 +261,11 @@ int smpi_coll_tuned_bcast_mvapich2_intra_node(void *buffer,
     if (count == 0)
         return MPI_SUCCESS;
     if (MV2_Bcast_function==NULL){
-      MV2_Bcast_function=smpi_coll_tuned_bcast_mpich;
+      MV2_Bcast_function=Coll_bcast_mpich::bcast;
     }
     
     if (MV2_Bcast_intra_node_function==NULL){
-      MV2_Bcast_intra_node_function= smpi_coll_tuned_bcast_mpich;
+      MV2_Bcast_intra_node_function= Coll_bcast_mpich::bcast;
     }
     
     if(comm->get_leaders_comm()==MPI_COMM_NULL){
diff --git a/src/smpi/colls/bcast/bcast-ompi-pipeline.cpp b/src/smpi/colls/bcast/bcast-ompi-pipeline.cpp
index 7f7893b23c..fe0ef548b6 100644
--- a/src/smpi/colls/bcast/bcast-ompi-pipeline.cpp
+++ b/src/smpi/colls/bcast/bcast-ompi-pipeline.cpp
@@ -10,7 +10,7 @@
 #define MAXTREEFANOUT 32
 
 
-int smpi_coll_tuned_bcast_ompi_pipeline( void* buffer,
+int Coll_bcast_ompi_pipeline::bcast( void* buffer,
                                       int original_count, 
                                       MPI_Datatype datatype, 
                                       int root,
diff --git a/src/smpi/colls/bcast/bcast-ompi-split-bintree.cpp b/src/smpi/colls/bcast/bcast-ompi-split-bintree.cpp
index 332d6cdda5..e5dffb31dc 100644
--- a/src/smpi/colls/bcast/bcast-ompi-split-bintree.cpp
+++ b/src/smpi/colls/bcast/bcast-ompi-split-bintree.cpp
@@ -60,7 +60,7 @@
   #define MAXTREEFANOUT 32
  
 int
-smpi_coll_tuned_bcast_ompi_split_bintree ( void* buffer,
+Coll_bcast_ompi_split_bintree::bcast ( void* buffer,
                                             int count, 
                                             MPI_Datatype datatype, 
                                             int root,
@@ -134,7 +134,7 @@ smpi_coll_tuned_bcast_ompi_split_bintree ( void* buffer,
         (segsize > counts[0] * type_size) ||
         (segsize > counts[1] * type_size) ) {
         /* call linear version here ! */
-        return (smpi_coll_tuned_bcast_SMP_linear ( buffer, count, datatype, 
+        return (Coll_bcast_SMP_linear::bcast ( buffer, count, datatype, 
                                                     root, comm));
     }
     type_extent = datatype->get_extent();
diff --git a/src/smpi/colls/bcast/bcast-scatter-LR-allgather.cpp b/src/smpi/colls/bcast/bcast-scatter-LR-allgather.cpp
index c9d2b20023..3649c038e5 100644
--- a/src/smpi/colls/bcast/bcast-scatter-LR-allgather.cpp
+++ b/src/smpi/colls/bcast/bcast-scatter-LR-allgather.cpp
@@ -68,7 +68,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
  ****************************************************************************/
 int
-smpi_coll_tuned_bcast_scatter_LR_allgather(void *buff, int count,
+Coll_bcast_scatter_LR_allgather::bcast(void *buff, int count,
                                            MPI_Datatype data_type, int root,
                                            MPI_Comm comm)
 {
diff --git a/src/smpi/colls/bcast/bcast-scatter-rdb-allgather.cpp b/src/smpi/colls/bcast/bcast-scatter-rdb-allgather.cpp
index 7b98385f0b..9a94599edb 100644
--- a/src/smpi/colls/bcast/bcast-scatter-rdb-allgather.cpp
+++ b/src/smpi/colls/bcast/bcast-scatter-rdb-allgather.cpp
@@ -93,7 +93,7 @@ static int scatter_for_bcast(
 }
 
 int
-smpi_coll_tuned_bcast_scatter_rdb_allgather (
+Coll_bcast_scatter_rdb_allgather::bcast (
     void *buffer, 
     int count, 
     MPI_Datatype datatype, 
diff --git a/src/smpi/colls/colls.h b/src/smpi/colls/colls.h
index 816a0659d1..35fbe1959b 100644
--- a/src/smpi/colls/colls.h
+++ b/src/smpi/colls/colls.h
@@ -14,13 +14,21 @@
 
 SG_BEGIN_DECL()
 
+
+namespace simgrid{
+namespace smpi{
+
 #define COLL_DESCRIPTION(cat, ret, args, name) \
   {# name,\
    # cat " " # name " collective",\
-   (void*)smpi_coll_tuned_ ## cat ## _ ## name}
+   (void*) Coll_ ## cat ## _ ## name::cat }
 
 #define COLL_PROTO(cat, ret, args, name) \
-  ret smpi_coll_tuned_ ## cat ## _ ## name(COLL_UNPAREN args);
+class Coll_ ## cat ## _ ## name : public Coll_ ## cat { \
+public: \
+static ret cat  (COLL_UNPAREN args); \
+};
+
 #define COLL_UNPAREN(...)  __VA_ARGS__
 
 #define COLL_APPLY(action, sig, name) action(sig, name)
@@ -31,12 +39,14 @@ SG_BEGIN_DECL()
 /*************
  * GATHER *
  *************/
+
 #define COLL_GATHER_SIG gather, int, \
 	                  (void *send_buff, int send_count, MPI_Datatype send_type, \
 	                   void *recv_buff, int recv_count, MPI_Datatype recv_type, \
                            int root, MPI_Comm comm)
 
 #define COLL_GATHERS(action, COLL_sep) \
+COLL_APPLY(action, COLL_GATHER_SIG, default) COLL_sep \
 COLL_APPLY(action, COLL_GATHER_SIG, ompi) COLL_sep \
 COLL_APPLY(action, COLL_GATHER_SIG, ompi_basic_linear) COLL_sep \
 COLL_APPLY(action, COLL_GATHER_SIG, ompi_binomial) COLL_sep \
@@ -47,8 +57,6 @@ COLL_APPLY(action, COLL_GATHER_SIG, mvapich2_two_level) COLL_sep \
 COLL_APPLY(action, COLL_GATHER_SIG, impi) COLL_sep \
 COLL_APPLY(action, COLL_GATHER_SIG, automatic)
 
-
-
 COLL_GATHERS(COLL_PROTO, COLL_NOsep)
 
 /*************
@@ -60,6 +68,7 @@ COLL_GATHERS(COLL_PROTO, COLL_NOsep)
                            MPI_Comm comm)
 
 #define COLL_ALLGATHERS(action, COLL_sep) \
+COLL_APPLY(action, COLL_ALLGATHER_SIG, default) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHER_SIG, 2dmesh) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHER_SIG, 3dmesh) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHER_SIG, bruck) COLL_sep \
@@ -82,7 +91,6 @@ COLL_APPLY(action, COLL_ALLGATHER_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHER_SIG, impi) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHER_SIG, automatic)
 
-
 COLL_ALLGATHERS(COLL_PROTO, COLL_NOsep)
 
 /**************
@@ -94,6 +102,7 @@ COLL_ALLGATHERS(COLL_PROTO, COLL_NOsep)
 			   MPI_Datatype recv_type, MPI_Comm comm)
 
 #define COLL_ALLGATHERVS(action, COLL_sep) \
+COLL_APPLY(action, COLL_ALLGATHERV_SIG, default) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, GB) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, pair) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, ring) COLL_sep \
@@ -117,6 +126,7 @@ COLL_ALLGATHERVS(COLL_PROTO, COLL_NOsep)
                            MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
 
 #define COLL_ALLREDUCES(action, COLL_sep) \
+COLL_APPLY(action, COLL_ALLREDUCE_SIG, default) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, lr) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab1) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab2) COLL_sep \
@@ -141,7 +151,6 @@ COLL_APPLY(action, COLL_ALLREDUCE_SIG, automatic)
 
 COLL_ALLREDUCES(COLL_PROTO, COLL_NOsep)
 
-
 /************
  * ALLTOALL *
  ************/
@@ -151,6 +160,7 @@ COLL_ALLREDUCES(COLL_PROTO, COLL_NOsep)
                           MPI_Comm comm)
 
 #define COLL_ALLTOALLS(action, COLL_sep) \
+COLL_APPLY(action, COLL_ALLTOALL_SIG, default) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALL_SIG, 2dmesh) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALL_SIG, 3dmesh) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALL_SIG, basic_linear) COLL_sep \
@@ -183,6 +193,7 @@ COLL_ALLTOALLS(COLL_PROTO, COLL_NOsep)
                           MPI_Comm comm)
 
 #define COLL_ALLTOALLVS(action, COLL_sep) \
+COLL_APPLY(action, COLL_ALLTOALLV_SIG, default) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALLV_SIG, bruck) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALLV_SIG, pair) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALLV_SIG, pair_light_barrier) COLL_sep \
@@ -209,6 +220,7 @@ COLL_ALLTOALLVS(COLL_PROTO, COLL_NOsep)
 	               int root, MPI_Comm comm)
 
 #define COLL_BCASTS(action, COLL_sep) \
+COLL_APPLY(action, COLL_BCAST_SIG, default) COLL_sep \
 COLL_APPLY(action, COLL_BCAST_SIG, arrival_pattern_aware) COLL_sep \
 COLL_APPLY(action, COLL_BCAST_SIG, arrival_pattern_aware_wait) COLL_sep \
 COLL_APPLY(action, COLL_BCAST_SIG, arrival_scatter) COLL_sep \
@@ -245,6 +257,7 @@ COLL_BCASTS(COLL_PROTO, COLL_NOsep)
                         MPI_Op op, int root, MPI_Comm comm)
 
 #define COLL_REDUCES(action, COLL_sep) \
+COLL_APPLY(action, COLL_REDUCE_SIG, default) COLL_sep \
 COLL_APPLY(action, COLL_REDUCE_SIG, arrival_pattern_aware) COLL_sep \
 COLL_APPLY(action, COLL_REDUCE_SIG, binomial) COLL_sep \
 COLL_APPLY(action, COLL_REDUCE_SIG, flat_tree) COLL_sep \
@@ -275,6 +288,7 @@ COLL_REDUCES(COLL_PROTO, COLL_NOsep)
                     MPI_Datatype dtype,MPI_Op  op,MPI_Comm  comm)
 
 #define COLL_REDUCE_SCATTERS(action, COLL_sep) \
+COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, default) COLL_sep \
 COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, ompi) COLL_sep \
 COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, ompi_basic_recursivehalving) COLL_sep \
 COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, ompi_ring)  COLL_sep \
@@ -300,6 +314,7 @@ COLL_REDUCE_SCATTERS(COLL_PROTO, COLL_NOsep)
                 int root, MPI_Comm comm)
 
 #define COLL_SCATTERS(action, COLL_sep) \
+COLL_APPLY(action, COLL_SCATTER_SIG, default) COLL_sep \
 COLL_APPLY(action, COLL_SCATTER_SIG, ompi) COLL_sep \
 COLL_APPLY(action, COLL_SCATTER_SIG, ompi_basic_linear) COLL_sep \
 COLL_APPLY(action, COLL_SCATTER_SIG, ompi_binomial)  COLL_sep \
@@ -319,6 +334,7 @@ COLL_SCATTERS(COLL_PROTO, COLL_NOsep)
                 (MPI_Comm comm)
 
 #define COLL_BARRIERS(action, COLL_sep) \
+COLL_APPLY(action, COLL_BARRIER_SIG, default) COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, ompi) COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, ompi_basic_linear) COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, ompi_two_procs)  COLL_sep \
@@ -334,6 +350,9 @@ COLL_APPLY(action, COLL_BARRIER_SIG, automatic)
 
 COLL_BARRIERS(COLL_PROTO, COLL_NOsep)
 
+}
+}
+
 SG_END_DECL()
 
 #endif
diff --git a/src/smpi/colls/gather/gather-mvapich.cpp b/src/smpi/colls/gather/gather-mvapich.cpp
index 97e14116aa..9936d250fe 100644
--- a/src/smpi/colls/gather/gather-mvapich.cpp
+++ b/src/smpi/colls/gather/gather-mvapich.cpp
@@ -37,9 +37,13 @@
 
 #include "../colls_private.h"
 
-#define MPIR_Gather_MV2_Direct smpi_coll_tuned_gather_ompi_basic_linear
-#define MPIR_Gather_MV2_two_level_Direct smpi_coll_tuned_gather_ompi_basic_linear
-#define MPIR_Gather_intra smpi_coll_tuned_gather_mpich
+
+
+
+
+#define MPIR_Gather_MV2_Direct Coll_gather_ompi_basic_linear::gather
+#define MPIR_Gather_MV2_two_level_Direct Coll_gather_ompi_basic_linear::gather
+#define MPIR_Gather_intra Coll_gather_mpich::gather
 typedef int (*MV2_Gather_function_ptr) (void *sendbuf,
     int sendcnt,
     MPI_Datatype sendtype,
@@ -54,6 +58,10 @@ extern MV2_Gather_function_ptr MV2_Gather_intra_node_function;
 #define TEMP_BUF_HAS_NO_DATA (0)
 #define TEMP_BUF_HAS_DATA (1)
 
+
+namespace simgrid{
+namespace smpi{
+
 /* sendbuf           - (in) sender's buffer
  * sendcnt           - (in) sender's element count
  * sendtype          - (in) sender's data type
@@ -121,7 +129,8 @@ static int MPIR_pt_pt_intra_gather( void *sendbuf, int sendcnt, MPI_Datatype sen
 }
 
 
-int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf,
+
+int Coll_gather_mvapich2_two_level::gather(void *sendbuf,
                                             int sendcnt,
                                             MPI_Datatype sendtype,
                                             void *recvbuf,
@@ -146,7 +155,7 @@ int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf,
 
     //if not set (use of the algo directly, without mvapich2 selector)
     if(MV2_Gather_intra_node_function==NULL)
-      MV2_Gather_intra_node_function=smpi_coll_tuned_gather_mpich;
+      MV2_Gather_intra_node_function= Coll_gather_mpich::gather;
     
     if(comm->get_leaders_comm()==MPI_COMM_NULL){
       comm->init_smp();
@@ -324,7 +333,7 @@ int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf,
                         recvcnts[i] = node_sizes[i] * recvcnt;
                     }
                 } 
-                smpi_mpi_gatherv(tmp_buf,
+                Colls::gatherv(tmp_buf,
                                          local_size * nbytes,
                                          MPI_BYTE, recvbuf, recvcnts,
                                          displs, recvtype,
@@ -342,7 +351,7 @@ int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf,
                         recvcnts[i] = node_sizes[i] * nbytes;
                     }
                 } 
-                smpi_mpi_gatherv(tmp_buf, local_size * nbytes,
+                Colls::gatherv(tmp_buf, local_size * nbytes,
                                          MPI_BYTE, leader_gather_buf,
                                          recvcnts, displs, MPI_BYTE,
                                          leader_root, leader_comm);
@@ -410,4 +419,6 @@ int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf,
 
     return (mpi_errno);
 }
+}
+}
 
diff --git a/src/smpi/colls/gather/gather-ompi.cpp b/src/smpi/colls/gather/gather-ompi.cpp
index e5133fd925..ad99d3cae2 100644
--- a/src/smpi/colls/gather/gather-ompi.cpp
+++ b/src/smpi/colls/gather/gather-ompi.cpp
@@ -22,10 +22,11 @@
 #include "../colls_private.h"
 #include "../coll_tuned_topo.h"
 
-/* Todo: gather_intra_generic, gather_intra_binary, gather_intra_chain,
- * gather_intra_pipeline, segmentation? */
-int
-smpi_coll_tuned_gather_ompi_binomial(void *sbuf, int scount,
+namespace simgrid{
+namespace smpi{
+
+
+int Coll_gather_ompi_binomial::gather(void *sbuf, int scount,
 				      MPI_Datatype sdtype,
 				      void *rbuf, int rcount,
 				      MPI_Datatype rdtype,
@@ -194,8 +195,7 @@ smpi_coll_tuned_gather_ompi_binomial(void *sbuf, int scount,
  *	Accepts:	- same arguments as MPI_Gather(), first segment size
  *	Returns:	- MPI_SUCCESS or error code
  */
-int
-smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount,
+int Coll_gather_ompi_linear_sync::gather(void *sbuf, int scount,
                                          MPI_Datatype sdtype,
                                          void *rbuf, int rcount,
                                          MPI_Datatype rdtype,
@@ -353,8 +353,7 @@ smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount,
  *	Accepts:	- same arguments as MPI_Gather()
  *	Returns:	- MPI_SUCCESS or error code
  */
-int
-smpi_coll_tuned_gather_ompi_basic_linear(void *sbuf, int scount,
+int Coll_gather_ompi_basic_linear::gather(void *sbuf, int scount,
 					  MPI_Datatype sdtype,
 					  void *rbuf, int rcount,
 					  MPI_Datatype rdtype,
@@ -411,3 +410,6 @@ smpi_coll_tuned_gather_ompi_basic_linear(void *sbuf, int scount,
 
     return MPI_SUCCESS;
 }
+
+}
+}
diff --git a/src/smpi/colls/reduce/reduce-NTSL.cpp b/src/smpi/colls/reduce/reduce-NTSL.cpp
index 22823123eb..167a2c326e 100644
--- a/src/smpi/colls/reduce/reduce-NTSL.cpp
+++ b/src/smpi/colls/reduce/reduce-NTSL.cpp
@@ -12,7 +12,7 @@ int reduce_NTSL_segment_size_in_byte = 8192;
 /* Non-topology-specific pipelined linear-bcast function 
    0->1, 1->2 ,2->3, ....., ->last node : in a pipeline fashion
 */
-int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count,
+int Coll_reduce_NTSL::reduce(void *buf, void *rbuf, int count,
                                 MPI_Datatype datatype, MPI_Op op, int root,
                                 MPI_Comm comm)
 {
@@ -142,7 +142,7 @@ int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count,
   /* when count is not divisible by block size, use default BCAST for the remainder */
   if ((remainder != 0) && (count > segment)) {
     XBT_WARN("MPI_reduce_NTSL use default MPI_reduce.");	  
-    smpi_mpi_reduce((char *)buf + (pipe_length * increment),
+    Coll_reduce_default::reduce((char *)buf + (pipe_length * increment),
                (char *)rbuf + (pipe_length * increment), remainder, datatype, op, root,
                comm);
   }
diff --git a/src/smpi/colls/reduce/reduce-arrival-pattern-aware.cpp b/src/smpi/colls/reduce/reduce-arrival-pattern-aware.cpp
index 8e68f07a94..277034fdce 100644
--- a/src/smpi/colls/reduce/reduce-arrival-pattern-aware.cpp
+++ b/src/smpi/colls/reduce/reduce-arrival-pattern-aware.cpp
@@ -18,7 +18,7 @@ int reduce_arrival_pattern_aware_segment_size_in_byte = 8192;
 #endif
 
 /* Non-topology-specific pipelined linear-reduce function */
-int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf,
+int Coll_reduce_arrival_pattern_aware::reduce(void *buf, void *rbuf,
                                                  int count,
                                                  MPI_Datatype datatype,
                                                  MPI_Op op, int root,
@@ -344,7 +344,7 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf,
 
   /* when count is not divisible by block size, use default BCAST for the remainder */
   if ((remainder != 0) && (count > segment)) {
-    smpi_mpi_reduce((char *)buf + (pipe_length * increment),
+    Coll_reduce_default::reduce((char *)buf + (pipe_length * increment),
 	       (char *)rbuf + (pipe_length * increment), remainder, datatype, op, root,
                comm);
   }
diff --git a/src/smpi/colls/reduce/reduce-binomial.cpp b/src/smpi/colls/reduce/reduce-binomial.cpp
index 05105b20f0..ac0b789fb4 100644
--- a/src/smpi/colls/reduce/reduce-binomial.cpp
+++ b/src/smpi/colls/reduce/reduce-binomial.cpp
@@ -8,7 +8,7 @@
 
 //#include <star-reduction.c>
 
-int smpi_coll_tuned_reduce_binomial(void *sendbuf, void *recvbuf, int count,
+int Coll_reduce_binomial::reduce(void *sendbuf, void *recvbuf, int count,
                                     MPI_Datatype datatype, MPI_Op op, int root,
                                     MPI_Comm comm)
 {
diff --git a/src/smpi/colls/reduce/reduce-flat-tree.cpp b/src/smpi/colls/reduce/reduce-flat-tree.cpp
index 66e5ebe1a1..76f3c5bc36 100644
--- a/src/smpi/colls/reduce/reduce-flat-tree.cpp
+++ b/src/smpi/colls/reduce/reduce-flat-tree.cpp
@@ -8,7 +8,7 @@
 //#include <star-reduction.c>
 
 int
-smpi_coll_tuned_reduce_flat_tree(void *sbuf, void *rbuf, int count,
+Coll_reduce_flat_tree::reduce(void *sbuf, void *rbuf, int count,
                                  MPI_Datatype dtype, MPI_Op op,
                                  int root, MPI_Comm comm)
 {
diff --git a/src/smpi/colls/reduce/reduce-mvapich-knomial.cpp b/src/smpi/colls/reduce/reduce-mvapich-knomial.cpp
index c23f09451b..cbcc1e4b1d 100644
--- a/src/smpi/colls/reduce/reduce-mvapich-knomial.cpp
+++ b/src/smpi/colls/reduce/reduce-mvapich-knomial.cpp
@@ -114,7 +114,7 @@ static int MPIR_Reduce_knomial_trace(int root, int reduce_knomial_factor,
     return 0; 
 }
         
-int smpi_coll_tuned_reduce_mvapich2_knomial (
+int Coll_reduce_mvapich2_knomial::reduce (
         void *sendbuf,
         void *recvbuf,
         int count,
diff --git a/src/smpi/colls/reduce/reduce-mvapich-two-level.cpp b/src/smpi/colls/reduce/reduce-mvapich-two-level.cpp
index 4905ae0a68..d2c5b63820 100644
--- a/src/smpi/colls/reduce/reduce-mvapich-two-level.cpp
+++ b/src/smpi/colls/reduce/reduce-mvapich-two-level.cpp
@@ -42,11 +42,11 @@
 #define SHMEM_COLL_BLOCK_SIZE (local_size * mv2_g_shmem_coll_max_msg_size)
 #define mv2_use_knomial_reduce 1
 
-#define MPIR_Reduce_inter_knomial_wrapper_MV2 smpi_coll_tuned_reduce_mvapich2_knomial
-#define MPIR_Reduce_intra_knomial_wrapper_MV2 smpi_coll_tuned_reduce_mvapich2_knomial
-#define MPIR_Reduce_binomial_MV2 smpi_coll_tuned_reduce_binomial
-#define MPIR_Reduce_redscat_gather_MV2 smpi_coll_tuned_reduce_scatter_gather
-#define MPIR_Reduce_shmem_MV2 smpi_coll_tuned_reduce_ompi_basic_linear
+#define MPIR_Reduce_inter_knomial_wrapper_MV2 Coll_reduce_mvapich2_knomial::reduce
+#define MPIR_Reduce_intra_knomial_wrapper_MV2 Coll_reduce_mvapich2_knomial::reduce
+#define MPIR_Reduce_binomial_MV2 Coll_reduce_binomial::reduce
+#define MPIR_Reduce_redscat_gather_MV2 Coll_reduce_scatter_gather::reduce
+#define MPIR_Reduce_shmem_MV2 Coll_reduce_ompi_basic_linear::reduce
 
 extern int (*MV2_Reduce_function)( void *sendbuf,
     void *recvbuf,
@@ -72,7 +72,7 @@ static int (*reduce_fn)(void *sendbuf,
                              MPI_Datatype datatype,
                              MPI_Op op, int root, MPI_Comm  comm);
 
-int smpi_coll_tuned_reduce_mvapich2_two_level( void *sendbuf,
+int Coll_reduce_mvapich2_two_level::reduce( void *sendbuf,
                                      void *recvbuf,
                                      int count,
                                      MPI_Datatype datatype,
@@ -92,9 +92,9 @@ int smpi_coll_tuned_reduce_mvapich2_two_level( void *sendbuf,
     
     //if not set (use of the algo directly, without mvapich2 selector)
     if(MV2_Reduce_function==NULL)
-      MV2_Reduce_function=smpi_coll_tuned_reduce_mpich;
+      MV2_Reduce_function=Coll_reduce_mpich::reduce;
     if(MV2_Reduce_intra_function==NULL)
-      MV2_Reduce_intra_function=smpi_coll_tuned_reduce_mpich;
+      MV2_Reduce_intra_function=Coll_reduce_mpich::reduce;
 
     if(comm->get_leaders_comm()==MPI_COMM_NULL){
       comm->init_smp();
diff --git a/src/smpi/colls/reduce/reduce-ompi.cpp b/src/smpi/colls/reduce/reduce-ompi.cpp
index da244bc837..e7c465ca01 100644
--- a/src/smpi/colls/reduce/reduce-ompi.cpp
+++ b/src/smpi/colls/reduce/reduce-ompi.cpp
@@ -324,7 +324,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi
      meaning that at least one datatype must fit in the segment !
 */
 
-int smpi_coll_tuned_reduce_ompi_chain( void *sendbuf, void *recvbuf, int count,
+int Coll_reduce_ompi_chain::reduce( void *sendbuf, void *recvbuf, int count,
                                         MPI_Datatype datatype, 
                                         MPI_Op  op, int root, 
                                         MPI_Comm  comm
@@ -352,7 +352,7 @@ int smpi_coll_tuned_reduce_ompi_chain( void *sendbuf, void *recvbuf, int count,
 }
 
 
-int smpi_coll_tuned_reduce_ompi_pipeline( void *sendbuf, void *recvbuf,
+int Coll_reduce_ompi_pipeline::reduce( void *sendbuf, void *recvbuf,
                                            int count, MPI_Datatype datatype,
                                            MPI_Op  op, int root,
                                            MPI_Comm  comm  )
@@ -397,7 +397,7 @@ int smpi_coll_tuned_reduce_ompi_pipeline( void *sendbuf, void *recvbuf,
                                            segcount, 0);
 }
 
-int smpi_coll_tuned_reduce_ompi_binary( void *sendbuf, void *recvbuf,
+int Coll_reduce_ompi_binary::reduce( void *sendbuf, void *recvbuf,
                                          int count, MPI_Datatype datatype,
                                          MPI_Op  op, int root,
                                          MPI_Comm  comm)
@@ -428,7 +428,7 @@ int smpi_coll_tuned_reduce_ompi_binary( void *sendbuf, void *recvbuf,
                                            segcount, 0);
 }
 
-int smpi_coll_tuned_reduce_ompi_binomial( void *sendbuf, void *recvbuf,
+int Coll_reduce_ompi_binomial::reduce( void *sendbuf, void *recvbuf,
                                            int count, MPI_Datatype datatype,
                                            MPI_Op  op, int root,
                                            MPI_Comm  comm)
@@ -476,7 +476,7 @@ int smpi_coll_tuned_reduce_ompi_binomial( void *sendbuf, void *recvbuf,
  * Acecpts:       same as MPI_Reduce()
  * Returns:       MPI_SUCCESS or error code
  */
-int smpi_coll_tuned_reduce_ompi_in_order_binary( void *sendbuf, void *recvbuf,
+int Coll_reduce_ompi_in_order_binary::reduce( void *sendbuf, void *recvbuf,
                                                   int count, 
                                                   MPI_Datatype datatype,
                                                   MPI_Op  op, int root,
@@ -589,7 +589,7 @@ int smpi_coll_tuned_reduce_ompi_in_order_binary( void *sendbuf, void *recvbuf,
  */
 
 int
-smpi_coll_tuned_reduce_ompi_basic_linear(void *sbuf, void *rbuf, int count,
+Coll_reduce_ompi_basic_linear::reduce(void *sbuf, void *rbuf, int count,
                                           MPI_Datatype dtype,
                                           MPI_Op op,
                                           int root,
diff --git a/src/smpi/colls/reduce/reduce-rab.cpp b/src/smpi/colls/reduce/reduce-rab.cpp
index 3a0c2c084f..22f34a05d3 100644
--- a/src/smpi/colls/reduce/reduce-rab.cpp
+++ b/src/smpi/colls/reduce/reduce-rab.cpp
@@ -65,7 +65,7 @@
 
    Exa.: size=13 ==> n=3, r=5  (i.e. size == 13 == 2**n+r ==  2**3 + 5)
 
-   The algoritm needs for the execution of one mpi_coll_reduce_fun
+   The algoritm needs for the execution of one Colls::reduce
 
    - for r==0
      exec_time = n*(L1+L2)     + buf_lng * (1-1/2**n) * (T1 + T2 + O/d)
@@ -207,7 +207,7 @@ Step 5.n)
      7: { [(a+b)+(c+d)] + [(e+f)+(g+h)] } + { [(i+j)+k] + [l+m] } for H
 
 
-For mpi_coll_allreduce_fun:
+For Colls::allreduce:
 ------------------
 
 Step 6.1)
@@ -249,7 +249,7 @@ Step 7)
      on all nodes 0..12
 
 
-For mpi_coll_reduce_fun:
+For Colls::reduce:
 ---------------
 
 Step 6.0)
@@ -921,19 +921,19 @@ static int MPI_I_anyReduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype
   } /* new_prot */
   /*otherwise:*/
   if (is_all)
-   return( mpi_coll_allreduce_fun(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, comm) );
+   return( Colls::allreduce(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, comm) );
   else
-   return( mpi_coll_reduce_fun(Sendbuf,Recvbuf, count,mpi_datatype,mpi_op, root, comm) );
+   return( Colls::reduce(Sendbuf,Recvbuf, count,mpi_datatype,mpi_op, root, comm) );
 }
 #endif /*REDUCE_LIMITS*/
 
 
-int smpi_coll_tuned_reduce_rab(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
+int Coll_reduce_rab::reduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
 {
   return( MPI_I_anyReduce(Sendbuf, Recvbuf, count, datatype, op, root, comm, 0) );
 }
 
-int smpi_coll_tuned_allreduce_rab(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
+int Coll_allreduce_rab::allreduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
 {
   return( MPI_I_anyReduce(Sendbuf, Recvbuf, count, datatype, op,   -1, comm, 1) );
 }
diff --git a/src/smpi/colls/reduce/reduce-scatter-gather.cpp b/src/smpi/colls/reduce/reduce-scatter-gather.cpp
index 105a490780..b8ac14235c 100644
--- a/src/smpi/colls/reduce/reduce-scatter-gather.cpp
+++ b/src/smpi/colls/reduce/reduce-scatter-gather.cpp
@@ -11,7 +11,7 @@
   Author: MPICH
  */
 
-int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf,
+int Coll_reduce_scatter_gather::reduce(void *sendbuf, void *recvbuf,
                                           int count, MPI_Datatype datatype,
                                           MPI_Op op, int root, MPI_Comm comm)
 {
diff --git a/src/smpi/colls/reduce_scatter/reduce_scatter-mpich.cpp b/src/smpi/colls/reduce_scatter/reduce_scatter-mpich.cpp
index f2387ee76b..e10e5b63a7 100644
--- a/src/smpi/colls/reduce_scatter/reduce_scatter-mpich.cpp
+++ b/src/smpi/colls/reduce_scatter/reduce_scatter-mpich.cpp
@@ -22,7 +22,7 @@ static inline int MPIU_Mirror_permutation(unsigned int x, int bits)
 }
 
 
-int smpi_coll_tuned_reduce_scatter_mpich_pair(void *sendbuf, void *recvbuf, int recvcounts[],
+int Coll_reduce_scatter_mpich_pair::reduce_scatter(void *sendbuf, void *recvbuf, int recvcounts[],
                               MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
 {
     int   rank, comm_size, i;
@@ -148,7 +148,7 @@ int smpi_coll_tuned_reduce_scatter_mpich_pair(void *sendbuf, void *recvbuf, int
 }
     
 
-int smpi_coll_tuned_reduce_scatter_mpich_noncomm(void *sendbuf, void *recvbuf, int recvcounts[],
+int Coll_reduce_scatter_mpich_noncomm::reduce_scatter(void *sendbuf, void *recvbuf, int recvcounts[],
                               MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
 {
     int mpi_errno = MPI_SUCCESS;
@@ -266,7 +266,7 @@ int smpi_coll_tuned_reduce_scatter_mpich_noncomm(void *sendbuf, void *recvbuf, i
 
 
 
-int smpi_coll_tuned_reduce_scatter_mpich_rdb(void *sendbuf, void *recvbuf, int recvcounts[],
+int Coll_reduce_scatter_mpich_rdb::reduce_scatter(void *sendbuf, void *recvbuf, int recvcounts[],
                               MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
 {
     int   rank, comm_size, i;
diff --git a/src/smpi/colls/reduce_scatter/reduce_scatter-ompi.cpp b/src/smpi/colls/reduce_scatter/reduce_scatter-ompi.cpp
index 0afa9b72cc..ed96d6f7f0 100644
--- a/src/smpi/colls/reduce_scatter/reduce_scatter-ompi.cpp
+++ b/src/smpi/colls/reduce_scatter/reduce_scatter-ompi.cpp
@@ -42,7 +42,7 @@
  *  Limitation: - Works only for commutative operations.
  */
 int
-smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(void *sbuf, 
+Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(void *sbuf, 
                                                             void *rbuf, 
                                                             int *rcounts,
                                                             MPI_Datatype dtype,
@@ -301,7 +301,7 @@ smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(void *sbuf,
 
 
 /*
- *   smpi_coll_tuned_reduce_scatter_ompi_ring
+ *   Coll_reduce_scatter_ompi_ring::reduce_scatter
  *
  *   Function:       Ring algorithm for reduce_scatter operation
  *   Accepts:        Same as MPI_Reduce_scatter()
@@ -360,7 +360,7 @@ smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(void *sbuf,
  *
  */
 int 
-smpi_coll_tuned_reduce_scatter_ompi_ring(void *sbuf, void *rbuf, int *rcounts,
+Coll_reduce_scatter_ompi_ring::reduce_scatter(void *sbuf, void *rbuf, int *rcounts,
                                           MPI_Datatype dtype,
                                           MPI_Op op,
                                           MPI_Comm comm
diff --git a/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp b/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp
index e76d75625d..d2cb88f90b 100644
--- a/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp
+++ b/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp
@@ -36,14 +36,14 @@
  */
 #include "../colls_private.h"
 
-#define MPIR_Scatter_MV2_Binomial smpi_coll_tuned_scatter_ompi_binomial
-#define MPIR_Scatter_MV2_Direct smpi_coll_tuned_scatter_ompi_basic_linear
+#define MPIR_Scatter_MV2_Binomial Coll_scatter_ompi_binomial::scatter
+#define MPIR_Scatter_MV2_Direct Coll_scatter_ompi_basic_linear::scatter
 
 extern int (*MV2_Scatter_intra_function) (void *sendbuf, int sendcount, MPI_Datatype sendtype,
     void *recvbuf, int recvcount, MPI_Datatype recvtype,
     int root, MPI_Comm comm);
 
-int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf,
+int Coll_scatter_mvapich2_two_level_direct::scatter(void *sendbuf,
                                       int sendcnt,
                                       MPI_Datatype sendtype,
                                       void *recvbuf,
@@ -63,7 +63,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf,
     MPI_Comm shmem_comm, leader_comm;
     //if not set (use of the algo directly, without mvapich2 selector)
     if(MV2_Scatter_intra_function==NULL)
-      MV2_Scatter_intra_function=smpi_coll_tuned_scatter_mpich;
+      MV2_Scatter_intra_function=Coll_scatter_mpich::scatter;
     
     if(comm->get_leaders_comm()==MPI_COMM_NULL){
       comm->init_smp();
@@ -158,7 +158,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf,
                             sendcnts[i] = node_sizes[i] * nbytes;
                         }
                     }
-                        smpi_mpi_scatterv(leader_scatter_buf, sendcnts, displs,
+                        Colls::scatterv(leader_scatter_buf, sendcnts, displs,
                                       MPI_BYTE, tmp_buf, nbytes * local_size,
                                       MPI_BYTE, leader_root, leader_comm);
                 } else {
@@ -174,7 +174,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf,
                             sendcnts[i] = node_sizes[i] * sendcnt;
                         }
                     }
-                    smpi_mpi_scatterv(sendbuf, sendcnts, displs,
+                    Colls::scatterv(sendbuf, sendcnts, displs,
                                               sendtype, tmp_buf,
                                               nbytes * local_size, MPI_BYTE,
                                               leader_root, leader_comm);
@@ -225,7 +225,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf,
 }
 
 
-int smpi_coll_tuned_scatter_mvapich2_two_level_binomial(void *sendbuf,
+int Coll_scatter_mvapich2_two_level_binomial::scatter(void *sendbuf,
                                         int sendcnt,
                                         MPI_Datatype sendtype,
                                         void *recvbuf,
@@ -247,7 +247,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_binomial(void *sendbuf,
 
     //if not set (use of the algo directly, without mvapich2 selector)
     if(MV2_Scatter_intra_function==NULL)
-      MV2_Scatter_intra_function=smpi_coll_tuned_scatter_mpich;
+      MV2_Scatter_intra_function=Coll_scatter_mpich::scatter;
     
     if(comm->get_leaders_comm()==MPI_COMM_NULL){
       comm->init_smp();
@@ -339,7 +339,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_binomial(void *sendbuf,
                             sendcnts[i] = node_sizes[i] * nbytes;
                         }
                     }
-                        smpi_mpi_scatterv(leader_scatter_buf, sendcnts, displs,
+                        Colls::scatterv(leader_scatter_buf, sendcnts, displs,
                                       MPI_BYTE, tmp_buf, nbytes * local_size,
                                       MPI_BYTE, leader_root, leader_comm);
                 } else {
@@ -355,7 +355,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_binomial(void *sendbuf,
                             sendcnts[i] = node_sizes[i] * sendcnt;
                         }
                     }
-                    smpi_mpi_scatterv(sendbuf, sendcnts, displs,
+                    Colls::scatterv(sendbuf, sendcnts, displs,
                                               sendtype, tmp_buf,
                                               nbytes * local_size, MPI_BYTE,
                                               leader_root, leader_comm);
diff --git a/src/smpi/colls/scatter/scatter-ompi.cpp b/src/smpi/colls/scatter/scatter-ompi.cpp
index 1aee0d7392..04bf1aefec 100644
--- a/src/smpi/colls/scatter/scatter-ompi.cpp
+++ b/src/smpi/colls/scatter/scatter-ompi.cpp
@@ -25,7 +25,7 @@
 
 
 int
-smpi_coll_tuned_scatter_ompi_binomial(void *sbuf, int scount,
+Coll_scatter_ompi_binomial::scatter(void *sbuf, int scount,
 				       MPI_Datatype sdtype,
 				       void *rbuf, int rcount,
 				       MPI_Datatype rdtype,
@@ -51,7 +51,7 @@ smpi_coll_tuned_scatter_ompi_binomial(void *sbuf, int scount,
     rank = comm->rank();
 
     XBT_DEBUG(
-                 "smpi_coll_tuned_scatter_ompi_binomial rank %d", rank);
+                 "Coll_scatter_ompi_binomial::scatter rank %d", rank);
 
     /* create the binomial tree */
     
@@ -191,7 +191,7 @@ smpi_coll_tuned_scatter_ompi_binomial(void *sbuf, int scount,
  *	Returns:	- MPI_SUCCESS or error code
  */
 int
-smpi_coll_tuned_scatter_ompi_basic_linear(void *sbuf, int scount,
+Coll_scatter_ompi_basic_linear::scatter(void *sbuf, int scount,
 					   MPI_Datatype sdtype,
 					   void *rbuf, int rcount,
 					   MPI_Datatype rdtype,
diff --git a/src/smpi/colls/smpi_automatic_selector.cpp b/src/smpi/colls/smpi_automatic_selector.cpp
index f6e5240ffc..af06809efe 100644
--- a/src/smpi/colls/smpi_automatic_selector.cpp
+++ b/src/smpi/colls/smpi_automatic_selector.cpp
@@ -23,9 +23,8 @@
          new_pajeNewEvent (SIMIX_get_clock(), PJ_container_get(cont_name), type, value);\
       }
 
-
 #define AUTOMATIC_COLL_BENCH(cat, ret, args, args2)\
-    ret smpi_coll_tuned_ ## cat ## _ ## automatic(COLL_UNPAREN args)\
+    ret Coll_ ## cat ## _automatic:: cat (COLL_UNPAREN args)\
 {\
   double time1, time2, time_min=DBL_MAX;\
   int min_coll=-1, global_coll=-1;\
@@ -34,7 +33,7 @@
   for (i = 0; mpi_coll_##cat##_description[i].name; i++){\
       if(!strcmp(mpi_coll_##cat##_description[i].name, "automatic"))continue;\
       if(!strcmp(mpi_coll_##cat##_description[i].name, "default"))continue;\
-      smpi_mpi_barrier(comm);\
+      Coll_barrier_default::barrier(comm);\
       TRACE_AUTO_COLL(cat)\
       time1 = SIMIX_get_clock();\
       try {\
@@ -46,7 +45,7 @@
       }\
       time2 = SIMIX_get_clock();\
       buf_out=time2-time1;\
-      smpi_mpi_reduce((void*)&buf_out,(void*)&buf_in, 1, MPI_DOUBLE, MPI_MAX, 0,comm );\
+      Coll_reduce_default::reduce((void*)&buf_out,(void*)&buf_in, 1, MPI_DOUBLE, MPI_MAX, 0,comm );\
       if(time2-time1<time_min){\
           min_coll=i;\
           time_min=time2-time1;\
@@ -63,8 +62,10 @@
   }else\
   XBT_WARN("The quickest %s was %s on rank %d and took %f",#cat,mpi_coll_##cat##_description[min_coll].name, comm->rank(), time_min);\
   return (min_coll!=-1)?MPI_SUCCESS:MPI_ERR_INTERN;\
-}\
+}
 
+namespace simgrid{
+namespace smpi{
 
 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm));
 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm));
@@ -77,3 +78,6 @@ COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_REDUCE_SIG,(buf,rbuf, count, datatype, op,
 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype,op,comm));
 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm));
 COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_BARRIER_SIG,(comm));
+
+}
+}
diff --git a/src/smpi/colls/smpi_default_selector.cpp b/src/smpi/colls/smpi_default_selector.cpp
new file mode 100644
index 0000000000..65911df302
--- /dev/null
+++ b/src/smpi/colls/smpi_default_selector.cpp
@@ -0,0 +1,349 @@
+/* selector with default/naive Simgrid algorithms. These should not be trusted for performance evaluations */
+
+/* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#include "colls_private.h"
+
+namespace simgrid{
+namespace smpi{
+
+int Coll_bcast_default::bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm)
+{
+  return Coll_bcast_binomial_tree::bcast(buf, count, datatype, root, comm);
+}
+
+int Coll_barrier_default::barrier(MPI_Comm comm)
+{
+  return Coll_barrier_ompi_basic_linear::barrier(comm);
+}
+
+
+int Coll_gather_default::gather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                     void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)
+{
+  int system_tag = COLL_TAG_GATHER;
+  MPI_Aint lb = 0;
+  MPI_Aint recvext = 0;
+
+  int rank = comm->rank();
+  int size = comm->size();
+  if(rank != root) {
+    // Send buffer to root
+    Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm);
+  } else {
+    recvtype->extent(&lb, &recvext);
+    // Local copy from root
+    Datatype::copy(sendbuf, sendcount, sendtype, static_cast<char*>(recvbuf) + root * recvcount * recvext,
+                       recvcount, recvtype);
+    // Receive buffers from senders
+    MPI_Request *requests = xbt_new(MPI_Request, size - 1);
+    int index = 0;
+    for (int src = 0; src < size; src++) {
+      if(src != root) {
+        requests[index] = Request::irecv_init(static_cast<char*>(recvbuf) + src * recvcount * recvext, recvcount, recvtype,
+                                          src, system_tag, comm);
+        index++;
+      }
+    }
+    // Wait for completion of irecv's.
+    Request::startall(size - 1, requests);
+    Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
+    for (int src = 0; src < size-1; src++) {
+      Request::unref(&requests[src]);
+    }
+    xbt_free(requests);
+  }
+  return MPI_SUCCESS;
+}
+
+int Coll_reduce_scatter_default::reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts, MPI_Datatype datatype, MPI_Op op,
+                             MPI_Comm comm)
+{
+  int rank = comm->rank();
+
+  /* arbitrarily choose root as rank 0 */
+  int size = comm->size();
+  int count = 0;
+  int *displs = xbt_new(int, size);
+  for (int i = 0; i < size; i++) {
+    displs[i] = count;
+    count += recvcounts[i];
+  }
+  void *tmpbuf = static_cast<void*>(smpi_get_tmp_sendbuffer(count*datatype->get_extent()));
+  int ret = MPI_SUCCESS;
+
+  ret = Coll_reduce_default::reduce(sendbuf, tmpbuf, count, datatype, op, 0, comm);
+  if(ret==MPI_SUCCESS)
+    ret = Colls::scatterv(tmpbuf, recvcounts, displs, datatype, recvbuf, recvcounts[rank], datatype, 0, comm);
+  xbt_free(displs);
+  smpi_free_tmp_buffer(tmpbuf);
+  return ret;
+}
+
+
+int Coll_allgather_default::allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                        void *recvbuf,int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
+{
+  int system_tag = COLL_TAG_ALLGATHER;
+  MPI_Aint lb = 0;
+  MPI_Aint recvext = 0;
+  MPI_Request *requests;
+
+  int rank = comm->rank();
+  int size = comm->size();
+  // FIXME: check for errors
+  recvtype->extent(&lb, &recvext);
+  // Local copy from self
+  Datatype::copy(sendbuf, sendcount, sendtype, static_cast<char *>(recvbuf) + rank * recvcount * recvext, recvcount,
+                     recvtype);
+  // Send/Recv buffers to/from others;
+  requests = xbt_new(MPI_Request, 2 * (size - 1));
+  int index = 0;
+  for (int other = 0; other < size; other++) {
+    if(other != rank) {
+      requests[index] = Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag,comm);
+      index++;
+      requests[index] = Request::irecv_init(static_cast<char *>(recvbuf) + other * recvcount * recvext, recvcount, recvtype,
+                                        other, system_tag, comm);
+      index++;
+    }
+  }
+  // Wait for completion of all comms.
+  Request::startall(2 * (size - 1), requests);
+  Request::waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE);
+  for (int other = 0; other < 2*(size-1); other++) {
+    Request::unref(&requests[other]);
+  }
+  xbt_free(requests);
+  return MPI_SUCCESS;
+}
+
+int Coll_allgatherv_default::allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+                         int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm)
+{
+  int system_tag = COLL_TAG_ALLGATHERV;
+  MPI_Aint lb = 0;
+  MPI_Aint recvext = 0;
+
+  int rank = comm->rank();
+  int size = comm->size();
+  recvtype->extent(&lb, &recvext);
+  // Local copy from self
+  Datatype::copy(sendbuf, sendcount, sendtype,
+                     static_cast<char *>(recvbuf) + displs[rank] * recvext,recvcounts[rank], recvtype);
+  // Send buffers to others;
+  MPI_Request *requests = xbt_new(MPI_Request, 2 * (size - 1));
+  int index = 0;
+  for (int other = 0; other < size; other++) {
+    if(other != rank) {
+      requests[index] =
+        Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag, comm);
+      index++;
+      requests[index] = Request::irecv_init(static_cast<char *>(recvbuf) + displs[other] * recvext, recvcounts[other],
+                          recvtype, other, system_tag, comm);
+      index++;
+    }
+  }
+  // Wait for completion of all comms.
+  Request::startall(2 * (size - 1), requests);
+  Request::waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE);
+  for (int other = 0; other < 2*(size-1); other++) {
+    Request::unref(&requests[other]);
+  }
+  xbt_free(requests);
+  return MPI_SUCCESS;
+}
+
+int Coll_scatter_default::scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                      void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)
+{
+  int system_tag = COLL_TAG_SCATTER;
+  MPI_Aint lb = 0;
+  MPI_Aint sendext = 0;
+  MPI_Request *requests;
+
+  int rank = comm->rank();
+  int size = comm->size();
+  if(rank != root) {
+    // Recv buffer from root
+    Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE);
+  } else {
+    sendtype->extent(&lb, &sendext);
+    // Local copy from root
+    if(recvbuf!=MPI_IN_PLACE){
+        Datatype::copy(static_cast<char *>(sendbuf) + root * sendcount * sendext,
+                           sendcount, sendtype, recvbuf, recvcount, recvtype);
+    }
+    // Send buffers to receivers
+    requests = xbt_new(MPI_Request, size - 1);
+    int index = 0;
+    for(int dst = 0; dst < size; dst++) {
+      if(dst != root) {
+        requests[index] = Request::isend_init(static_cast<char *>(sendbuf) + dst * sendcount * sendext, sendcount, sendtype,
+                                          dst, system_tag, comm);
+        index++;
+      }
+    }
+    // Wait for completion of isend's.
+    Request::startall(size - 1, requests);
+    Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
+    for (int dst = 0; dst < size-1; dst++) {
+      Request::unref(&requests[dst]);
+    }
+    xbt_free(requests);
+  }
+  return MPI_SUCCESS;
+}
+
+
+
+int Coll_reduce_default::reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
+                     MPI_Comm comm)
+{
+  int system_tag = COLL_TAG_REDUCE;
+  MPI_Aint lb = 0;
+  MPI_Aint dataext = 0;
+
+  char* sendtmpbuf = static_cast<char *>(sendbuf);
+
+  int rank = comm->rank();
+  int size = comm->size();
+  //non commutative case, use a working algo from openmpi
+  if(op != MPI_OP_NULL && !op->is_commutative()){
+    return Coll_reduce_ompi_basic_linear::reduce(sendtmpbuf, recvbuf, count, datatype, op, root, comm);
+  }
+
+  if( sendbuf == MPI_IN_PLACE ) {
+    sendtmpbuf = static_cast<char *>(smpi_get_tmp_sendbuffer(count*datatype->get_extent()));
+    Datatype::copy(recvbuf, count, datatype,sendtmpbuf, count, datatype);
+  }
+  
+  if(rank != root) {
+    // Send buffer to root
+    Request::send(sendtmpbuf, count, datatype, root, system_tag, comm);
+  } else {
+    datatype->extent(&lb, &dataext);
+    // Local copy from root
+    if (sendtmpbuf != nullptr && recvbuf != nullptr)
+      Datatype::copy(sendtmpbuf, count, datatype, recvbuf, count, datatype);
+    // Receive buffers from senders
+    MPI_Request *requests = xbt_new(MPI_Request, size - 1);
+    void **tmpbufs = xbt_new(void *, size - 1);
+    int index = 0;
+    for (int src = 0; src < size; src++) {
+      if (src != root) {
+         if (!smpi_process_get_replaying())
+          tmpbufs[index] = xbt_malloc(count * dataext);
+         else
+           tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext);
+        requests[index] =
+          Request::irecv_init(tmpbufs[index], count, datatype, src, system_tag, comm);
+        index++;
+      }
+    }
+    // Wait for completion of irecv's.
+    Request::startall(size - 1, requests);
+    for (int src = 0; src < size - 1; src++) {
+      index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE);
+      XBT_DEBUG("finished waiting any request with index %d", index);
+      if(index == MPI_UNDEFINED) {
+        break;
+      }else{
+        Request::unref(&requests[index]);
+      }
+      if(op) /* op can be MPI_OP_NULL that does nothing */
+        if(op!=MPI_OP_NULL) op->apply( tmpbufs[index], recvbuf, &count, datatype);
+    }
+      for(index = 0; index < size - 1; index++) {
+        smpi_free_tmp_buffer(tmpbufs[index]);
+      }
+    xbt_free(tmpbufs);
+    xbt_free(requests);
+
+  }
+  if( sendbuf == MPI_IN_PLACE ) {
+    smpi_free_tmp_buffer(sendtmpbuf);
+  }
+  return MPI_SUCCESS;
+}
+
+int Coll_allreduce_default::allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
+{
+  int ret;
+  ret = Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
+  if(ret==MPI_SUCCESS)
+    ret = Colls::bcast(recvbuf, count, datatype, 0, comm);
+  return ret;
+}
+
+int Coll_alltoall_default::alltoall( void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, MPI_Datatype rdtype, MPI_Comm comm)
+{
+  return Coll_alltoall_ompi::alltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
+}
+
+
+
+int Coll_alltoallv_default::alltoallv(void *sendbuf, int *sendcounts, int *senddisps, MPI_Datatype sendtype,
+                              void *recvbuf, int *recvcounts, int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm)
+{
+  int system_tag = 889;
+  int i;
+  int count;
+  MPI_Aint lb = 0;
+  MPI_Aint sendext = 0;
+  MPI_Aint recvext = 0;
+  MPI_Request *requests;
+
+  /* Initialize. */
+  int rank = comm->rank();
+  int size = comm->size();
+  XBT_DEBUG("<%d> algorithm basic_alltoallv() called.", rank);
+  sendtype->extent(&lb, &sendext);
+  recvtype->extent(&lb, &recvext);
+  /* Local copy from self */
+  int err = Datatype::copy(static_cast<char *>(sendbuf) + senddisps[rank] * sendext, sendcounts[rank], sendtype,
+                               static_cast<char *>(recvbuf) + recvdisps[rank] * recvext, recvcounts[rank], recvtype);
+  if (err == MPI_SUCCESS && size > 1) {
+    /* Initiate all send/recv to/from others. */
+    requests = xbt_new(MPI_Request, 2 * (size - 1));
+    count = 0;
+    /* Create all receives that will be posted first */
+    for (i = 0; i < size; ++i) {
+      if (i != rank && recvcounts[i] != 0) {
+        requests[count] = Request::irecv_init(static_cast<char *>(recvbuf) + recvdisps[i] * recvext,
+                                          recvcounts[i], recvtype, i, system_tag, comm);
+        count++;
+      }else{
+        XBT_DEBUG("<%d> skip request creation [src = %d, recvcounts[src] = %d]", rank, i, recvcounts[i]);
+      }
+    }
+    /* Now create all sends  */
+    for (i = 0; i < size; ++i) {
+      if (i != rank && sendcounts[i] != 0) {
+      requests[count] = Request::isend_init(static_cast<char *>(sendbuf) + senddisps[i] * sendext,
+                                        sendcounts[i], sendtype, i, system_tag, comm);
+      count++;
+      }else{
+        XBT_DEBUG("<%d> skip request creation [dst = %d, sendcounts[dst] = %d]", rank, i, sendcounts[i]);
+      }
+    }
+    /* Wait for them all. */
+    Request::startall(count, requests);
+    XBT_DEBUG("<%d> wait for %d requests", rank, count);
+    Request::waitall(count, requests, MPI_STATUS_IGNORE);
+    for(i = 0; i < count; i++) {
+      if(requests[i]!=MPI_REQUEST_NULL) 
+        Request::unref(&requests[i]);
+    }
+    xbt_free(requests);
+  }
+  return err;
+}
+
+}
+}
+
diff --git a/src/smpi/colls/smpi_intel_mpi_selector.cpp b/src/smpi/colls/smpi_intel_mpi_selector.cpp
index 574aeaab0e..350f2a56ce 100644
--- a/src/smpi/colls/smpi_intel_mpi_selector.cpp
+++ b/src/smpi/colls/smpi_intel_mpi_selector.cpp
@@ -53,14 +53,14 @@ int (*intel_allreduce_functions_table[])(void *sendbuf,
       int count,
       MPI_Datatype datatype,
       MPI_Op op, MPI_Comm comm) ={
-      smpi_coll_tuned_allreduce_rdb,
-      smpi_coll_tuned_allreduce_rab1,
-      smpi_coll_tuned_allreduce_redbcast,
-      smpi_coll_tuned_allreduce_mvapich2_two_level,
-      smpi_coll_tuned_allreduce_smp_binomial,
-      smpi_coll_tuned_allreduce_mvapich2_two_level,
-      smpi_coll_tuned_allreduce_ompi_ring_segmented,
-      smpi_coll_tuned_allreduce_ompi_ring_segmented
+      Coll_allreduce_rdb::allreduce,
+      Coll_allreduce_rab1::allreduce,
+      Coll_allreduce_redbcast::allreduce,
+      Coll_allreduce_mvapich2_two_level::allreduce,
+      Coll_allreduce_smp_binomial::allreduce,
+      Coll_allreduce_mvapich2_two_level::allreduce,
+      Coll_allreduce_ompi_ring_segmented::allreduce,
+      Coll_allreduce_ompi_ring_segmented::allreduce
 };
 
 intel_tuning_table_element intel_allreduce_table[] =
@@ -637,10 +637,10 @@ int (*intel_alltoall_functions_table[])(void *sbuf, int scount,
                                              void* rbuf, int rcount, 
                                              MPI_Datatype rdtype, 
                                              MPI_Comm comm) ={
-      smpi_coll_tuned_alltoall_bruck,
-      smpi_coll_tuned_alltoall_mvapich2_scatter_dest,
-      smpi_coll_tuned_alltoall_pair,
-      smpi_coll_tuned_alltoall_mvapich2//Plum is proprietary ? (and super efficient)
+      Coll_alltoall_bruck::alltoall,
+      Coll_alltoall_mvapich2_scatter_dest::alltoall,
+      Coll_alltoall_pair::alltoall,
+      Coll_alltoall_mvapich2::alltoall//Plum is proprietary ? (and super efficient)
 };
 
 /*I_MPI_ADJUST_BARRIER 
@@ -657,15 +657,15 @@ MPI_Barrier
 */
 static int intel_barrier_gather_scatter(MPI_Comm comm){
     //our default barrier performs a antibcast/bcast
-    smpi_mpi_barrier(comm);
+    Coll_barrier_default::barrier(comm);
     return MPI_SUCCESS;
 }
 
 int (*intel_barrier_functions_table[])(MPI_Comm comm) ={
-      smpi_coll_tuned_barrier_ompi_basic_linear,
-      smpi_coll_tuned_barrier_ompi_recursivedoubling,
-      smpi_coll_tuned_barrier_ompi_basic_linear,
-      smpi_coll_tuned_barrier_ompi_recursivedoubling,
+      Coll_barrier_ompi_basic_linear::barrier,
+      Coll_barrier_ompi_recursivedoubling::barrier,
+      Coll_barrier_ompi_basic_linear::barrier,
+      Coll_barrier_ompi_recursivedoubling::barrier,
       intel_barrier_gather_scatter,
       intel_barrier_gather_scatter
 };
@@ -799,15 +799,15 @@ MPI_Bcast
 int (*intel_bcast_functions_table[])(void *buff, int count,
                                           MPI_Datatype datatype, int root,
                                           MPI_Comm  comm) ={
-      smpi_coll_tuned_bcast_binomial_tree,
-      //smpi_coll_tuned_bcast_scatter_rdb_allgather,
-      smpi_coll_tuned_bcast_NTSL,
-      smpi_coll_tuned_bcast_NTSL,
-      smpi_coll_tuned_bcast_SMP_binomial,
-      //smpi_coll_tuned_bcast_scatter_rdb_allgather,
-      smpi_coll_tuned_bcast_NTSL,
-      smpi_coll_tuned_bcast_SMP_linear,
-      smpi_coll_tuned_bcast_mvapich2,//we don't know shumilin's algo'
+      Coll_bcast_binomial_tree::bcast,
+      //Coll_bcast_scatter_rdb_allgather::bcast,
+      Coll_bcast_NTSL::bcast,
+      Coll_bcast_NTSL::bcast,
+      Coll_bcast_SMP_binomial::bcast,
+      //Coll_bcast_scatter_rdb_allgather::bcast,
+      Coll_bcast_NTSL::bcast,
+      Coll_bcast_SMP_linear::bcast,
+      Coll_bcast_mvapich2::bcast,//we don't know shumilin's algo'
 };
 
 intel_tuning_table_element intel_bcast_table[] =
@@ -969,12 +969,12 @@ int (*intel_reduce_functions_table[])(void *sendbuf, void *recvbuf,
                                             int count, MPI_Datatype  datatype,
                                             MPI_Op   op, int root,
                                             MPI_Comm   comm) ={
-      smpi_coll_tuned_reduce_mvapich2,
-      smpi_coll_tuned_reduce_binomial,
-      smpi_coll_tuned_reduce_mvapich2,
-      smpi_coll_tuned_reduce_mvapich2_two_level,
-      smpi_coll_tuned_reduce_rab,
-      smpi_coll_tuned_reduce_rab
+      Coll_reduce_mvapich2::reduce,
+      Coll_reduce_binomial::reduce,
+      Coll_reduce_mvapich2::reduce,
+      Coll_reduce_mvapich2_two_level::reduce,
+      Coll_reduce_rab::reduce,
+      Coll_reduce_rab::reduce
 };
 
 intel_tuning_table_element intel_reduce_table[] =
@@ -1059,7 +1059,7 @@ static  int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf,
                                                     MPI_Op  op,
                                                     MPI_Comm  comm)
 {
-  smpi_mpi_reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
+  Coll_reduce_scatter_default::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
   return MPI_SUCCESS;
 }
 
@@ -1070,9 +1070,9 @@ static  int  intel_reduce_scatter_recursivehalving(void *sbuf, void *rbuf,
                                                     MPI_Comm  comm)
 {
   if(op==MPI_OP_NULL || op->is_commutative())
-    return smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(sbuf, rbuf, rcounts,dtype, op,comm);
+    return Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
   else
-    return smpi_coll_tuned_reduce_scatter_mvapich2(sbuf, rbuf, rcounts,dtype, op,comm);
+    return Coll_reduce_scatter_mvapich2::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
 }
 
 int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf,
@@ -1082,8 +1082,8 @@ int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf,
                                                     MPI_Comm  comm
                                                     ) ={
       intel_reduce_scatter_recursivehalving,
-      smpi_coll_tuned_reduce_scatter_mpich_pair,
-      smpi_coll_tuned_reduce_scatter_mpich_rdb,
+      Coll_reduce_scatter_mpich_pair::reduce_scatter,
+      Coll_reduce_scatter_mpich_rdb::reduce_scatter,
       intel_reduce_scatter_reduce_scatterv,
       intel_reduce_scatter_reduce_scatterv
 };
@@ -1491,10 +1491,10 @@ int (*intel_allgather_functions_table[])(void *sbuf, int scount,
                                               MPI_Datatype rdtype, 
                                               MPI_Comm  comm
                                                     ) ={
-      smpi_coll_tuned_allgather_rdb,
-      smpi_coll_tuned_allgather_bruck,
-      smpi_coll_tuned_allgather_ring,
-      smpi_coll_tuned_allgather_GB
+      Coll_allgather_rdb::allgather,
+      Coll_allgather_bruck::allgather,
+      Coll_allgather_ring::allgather,
+      Coll_allgather_GB::allgather
 };
 
 intel_tuning_table_element intel_allgather_table[] =
@@ -1661,10 +1661,10 @@ int (*intel_allgatherv_functions_table[])(void *sbuf, int scount,
                                                MPI_Datatype rdtype, 
                                                MPI_Comm  comm
                                                     ) ={
-      smpi_coll_tuned_allgatherv_mpich_rdb,
-      smpi_coll_tuned_allgatherv_ompi_bruck,
-      smpi_coll_tuned_allgatherv_ring,
-      smpi_coll_tuned_allgatherv_GB
+      Coll_allgatherv_mpich_rdb::allgatherv,
+      Coll_allgatherv_ompi_bruck::allgatherv,
+      Coll_allgatherv_ring::allgatherv,
+      Coll_allgatherv_GB::allgatherv
 };
 
 intel_tuning_table_element intel_allgatherv_table[] =
@@ -1872,9 +1872,9 @@ int (*intel_gather_functions_table[])(void *sbuf, int scount,
                                            int root,
                                            MPI_Comm  comm
                                                     ) ={
-      smpi_coll_tuned_gather_ompi_binomial,
-      smpi_coll_tuned_gather_ompi_binomial,
-      smpi_coll_tuned_gather_mvapich2
+      Coll_gather_ompi_binomial::gather,
+      Coll_gather_ompi_binomial::gather,
+      Coll_gather_mvapich2::gather
 };
 
 intel_tuning_table_element intel_gather_table[] =
@@ -1975,9 +1975,9 @@ int (*intel_scatter_functions_table[])(void *sbuf, int scount,
                                             MPI_Datatype rdtype, 
                                             int root, MPI_Comm  comm
                                                     ) ={
-      smpi_coll_tuned_scatter_ompi_binomial,
-      smpi_coll_tuned_scatter_ompi_binomial,
-      smpi_coll_tuned_scatter_mvapich2
+      Coll_scatter_ompi_binomial::scatter,
+      Coll_scatter_ompi_binomial::scatter,
+      Coll_scatter_mvapich2::scatter
 };
 
 intel_tuning_table_element intel_scatter_table[] =
@@ -2149,8 +2149,8 @@ int (*intel_alltoallv_functions_table[])(void *sbuf, int *scounts, int *sdisps,
                                               MPI_Datatype rdtype,
                                               MPI_Comm  comm
                                                     ) ={
-      smpi_coll_tuned_alltoallv_ompi_basic_linear,
-      smpi_coll_tuned_alltoallv_bruck
+      Coll_alltoallv_ompi_basic_linear::alltoallv,
+      Coll_alltoallv_bruck::alltoallv
 };
 
 intel_tuning_table_element intel_alltoallv_table[] =
@@ -2261,7 +2261,7 @@ intel_tuning_table_element intel_alltoallv_table[] =
   size_t block_dsize = 1;
   
 #define IMPI_COLL_SELECT(cat, ret, args, args2)\
-ret smpi_coll_tuned_ ## cat ## _impi (COLL_UNPAREN args)\
+ret Coll_ ## cat ## _impi:: cat (COLL_UNPAREN args)\
 {\
     int comm_size = comm->size();\
     int i =0;\
@@ -2289,6 +2289,9 @@ ret smpi_coll_tuned_ ## cat ## _impi (COLL_UNPAREN args)\
     args2);\
 }
 
+namespace simgrid{
+namespace smpi{
+
 COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm));
 COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm));
 COLL_APPLY(IMPI_COLL_SELECT, COLL_GATHER_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_type, root, comm));
@@ -2301,3 +2304,5 @@ COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype,
 COLL_APPLY(IMPI_COLL_SELECT, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm));
 COLL_APPLY(IMPI_COLL_SELECT, COLL_BARRIER_SIG,(comm));
 
+}
+}
diff --git a/src/smpi/colls/smpi_mpich_selector.cpp b/src/smpi/colls/smpi_mpich_selector.cpp
index f29b79c500..8a9ffcd40e 100644
--- a/src/smpi/colls/smpi_mpich_selector.cpp
+++ b/src/smpi/colls/smpi_mpich_selector.cpp
@@ -56,7 +56,7 @@
 
    End Algorithm: MPI_Allreduce
 */
-int smpi_coll_tuned_allreduce_mpich(void *sbuf, void *rbuf, int count,
+int Coll_allreduce_mpich::allreduce(void *sbuf, void *rbuf, int count,
                         MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
 {
     size_t dsize, block_dsize;
@@ -74,12 +74,12 @@ int smpi_coll_tuned_allreduce_mpich(void *sbuf, void *rbuf, int count,
 
     if (block_dsize > large_message && count >= pof2 && (op==MPI_OP_NULL || op->is_commutative())) {
       //for long messages
-       return (smpi_coll_tuned_allreduce_rab_rdb (sbuf, rbuf, 
+       return (Coll_allreduce_rab_rdb::allreduce (sbuf, rbuf, 
                                                                    count, dtype,
                                                                    op, comm));
     }else {
       //for short ones and count < pof2
-      return (smpi_coll_tuned_allreduce_rdb (sbuf, rbuf, 
+      return (Coll_allreduce_rdb::allreduce (sbuf, rbuf, 
                                                                    count, dtype,
                                                                    op, comm));
     }
@@ -132,7 +132,7 @@ int smpi_coll_tuned_allreduce_mpich(void *sbuf, void *rbuf, int count,
    End Algorithm: MPI_Alltoall
 */
 
-int smpi_coll_tuned_alltoall_mpich( void *sbuf, int scount, 
+int Coll_alltoall_mpich::alltoall( void *sbuf, int scount, 
                                              MPI_Datatype sdtype,
                                              void* rbuf, int rcount, 
                                              MPI_Datatype rdtype, 
@@ -162,26 +162,26 @@ int smpi_coll_tuned_alltoall_mpich( void *sbuf, int scount,
     block_dsize = dsize * scount;
 
     if ((block_dsize < short_size) && (communicator_size >= 8)) {
-        return smpi_coll_tuned_alltoall_bruck(sbuf, scount, sdtype, 
+        return Coll_alltoall_bruck::alltoall(sbuf, scount, sdtype, 
                                                     rbuf, rcount, rdtype,
                                                     comm);
 
     } else if (block_dsize < medium_size) {
-        return smpi_coll_tuned_alltoall_basic_linear(sbuf, scount, sdtype, 
+        return Coll_alltoall_basic_linear::alltoall(sbuf, scount, sdtype, 
                                                            rbuf, rcount, rdtype, 
                                                            comm);
     }else if (communicator_size%2){
-        return smpi_coll_tuned_alltoall_ring(sbuf, scount, sdtype, 
+        return Coll_alltoall_ring::alltoall(sbuf, scount, sdtype, 
                                                            rbuf, rcount, rdtype, 
                                                            comm);
     }
 
-    return smpi_coll_tuned_alltoall_ring (sbuf, scount, sdtype,
+    return Coll_alltoall_ring::alltoall (sbuf, scount, sdtype,
                                                     rbuf, rcount, rdtype,
                                                     comm);
 }
 
-int smpi_coll_tuned_alltoallv_mpich(void *sbuf, int *scounts, int *sdisps,
+int Coll_alltoallv_mpich::alltoallv(void *sbuf, int *scounts, int *sdisps,
                                               MPI_Datatype sdtype,
                                               void *rbuf, int *rcounts, int *rdisps,
                                               MPI_Datatype rdtype,
@@ -189,15 +189,15 @@ int smpi_coll_tuned_alltoallv_mpich(void *sbuf, int *scounts, int *sdisps,
                                               )
 {
     /* For starters, just keep the original algorithm. */
-    return smpi_coll_tuned_alltoallv_bruck(sbuf, scounts, sdisps, sdtype, 
+    return Coll_alltoallv_bruck::alltoallv(sbuf, scounts, sdisps, sdtype, 
                                                         rbuf, rcounts, rdisps,rdtype,
                                                         comm);
 }
 
 
-int smpi_coll_tuned_barrier_mpich(MPI_Comm  comm)
+int Coll_barrier_mpich::barrier(MPI_Comm  comm)
 {   
-    return smpi_coll_tuned_barrier_ompi_bruck(comm);
+    return Coll_barrier_ompi_bruck::barrier(comm);
 }
 
 /* This is the default implementation of broadcast. The algorithm is:
@@ -243,7 +243,7 @@ int smpi_coll_tuned_barrier_mpich(MPI_Comm  comm)
 */
 
 
-int smpi_coll_tuned_bcast_mpich(void *buff, int count,
+int Coll_bcast_mpich::bcast(void *buff, int count,
                                           MPI_Datatype datatype, int root,
                                           MPI_Comm  comm
                                           )
@@ -267,17 +267,17 @@ int smpi_coll_tuned_bcast_mpich(void *buff, int count,
        single-element broadcasts */
     if ((message_size < small_message_size) || (communicator_size <= 8)) {
         /* Binomial without segmentation */
-        return  smpi_coll_tuned_bcast_binomial_tree (buff, count, datatype, 
+        return  Coll_bcast_binomial_tree::bcast (buff, count, datatype, 
                                                       root, comm);
 
     } else if (message_size < intermediate_message_size && !(communicator_size%2)) {
         // SplittedBinary with 1KB segments
-        return smpi_coll_tuned_bcast_scatter_rdb_allgather(buff, count, datatype, 
+        return Coll_bcast_scatter_rdb_allgather::bcast(buff, count, datatype, 
                                                          root, comm);
 
     }
      //Handle large message sizes 
-     return smpi_coll_tuned_bcast_scatter_LR_allgather (buff, count, datatype, 
+     return Coll_bcast_scatter_LR_allgather::bcast (buff, count, datatype, 
                                                      root, comm);
                                                          
 }
@@ -339,7 +339,7 @@ int smpi_coll_tuned_bcast_mpich(void *buff, int count,
 */
 
 
-int smpi_coll_tuned_reduce_mpich( void *sendbuf, void *recvbuf,
+int Coll_reduce_mpich::reduce( void *sendbuf, void *recvbuf,
                                             int count, MPI_Datatype  datatype,
                                             MPI_Op   op, int root,
                                             MPI_Comm   comm
@@ -360,9 +360,9 @@ int smpi_coll_tuned_reduce_mpich( void *sendbuf, void *recvbuf,
 
 
     if ((count < pof2) || (message_size < 2048) || (op!=MPI_OP_NULL && !op->is_commutative())) {
-        return smpi_coll_tuned_reduce_binomial (sendbuf, recvbuf, count, datatype, op, root, comm); 
+        return Coll_reduce_binomial::reduce (sendbuf, recvbuf, count, datatype, op, root, comm); 
     }
-        return smpi_coll_tuned_reduce_scatter_gather(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+        return Coll_reduce_scatter_gather::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
                                                      segsize, max_requests*/);
 }
 
@@ -416,7 +416,7 @@ int smpi_coll_tuned_reduce_mpich( void *sendbuf, void *recvbuf,
 */
 
 
-int smpi_coll_tuned_reduce_scatter_mpich( void *sbuf, void *rbuf,
+int Coll_reduce_scatter_mpich::reduce_scatter( void *sbuf, void *rbuf,
                                                     int *rcounts,
                                                     MPI_Datatype dtype,
                                                     MPI_Op  op,
@@ -428,7 +428,7 @@ int smpi_coll_tuned_reduce_scatter_mpich( void *sbuf, void *rbuf,
 
     if(sbuf==rbuf)sbuf=MPI_IN_PLACE; //restore MPI_IN_PLACE as these algorithms handle it
 
-    XBT_DEBUG("smpi_coll_tuned_reduce_scatter_mpich");
+    XBT_DEBUG("Coll_reduce_scatter_mpich::reduce");
     
     comm_size = comm->size();
     // We need data size for decision function 
@@ -438,7 +438,7 @@ int smpi_coll_tuned_reduce_scatter_mpich( void *sbuf, void *rbuf,
     }
 
     if( (op==MPI_OP_NULL || op->is_commutative()) &&  total_message_size > 524288) { 
-        return smpi_coll_tuned_reduce_scatter_mpich_pair (sbuf, rbuf, rcounts, 
+        return Coll_reduce_scatter_mpich_pair::reduce_scatter (sbuf, rbuf, rcounts, 
                                                                     dtype, op, 
                                                                     comm); 
     }else if ((op!=MPI_OP_NULL && !op->is_commutative())) {
@@ -456,12 +456,12 @@ int smpi_coll_tuned_reduce_scatter_mpich( void *sbuf, void *rbuf,
 
         if (pof2 == comm_size && is_block_regular) {
             /* noncommutative, pof2 size, and block regular */
-            return smpi_coll_tuned_reduce_scatter_mpich_noncomm(sbuf, rbuf, rcounts, dtype, op, comm);
+            return Coll_reduce_scatter_mpich_noncomm::reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm);
         }
 
-       return smpi_coll_tuned_reduce_scatter_mpich_rdb(sbuf, rbuf, rcounts, dtype, op, comm);
+       return Coll_reduce_scatter_mpich_rdb::reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm);
     }else{      
-       return smpi_coll_tuned_reduce_scatter_mpich_rdb(sbuf, rbuf, rcounts, dtype, op, comm);
+       return Coll_reduce_scatter_mpich_rdb::reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm);
     }
 }
 
@@ -511,7 +511,7 @@ int smpi_coll_tuned_reduce_scatter_mpich( void *sbuf, void *rbuf,
    End Algorithm: MPI_Allgather
 */
 
-int smpi_coll_tuned_allgather_mpich(void *sbuf, int scount, 
+int Coll_allgather_mpich::allgather(void *sbuf, int scount, 
                                               MPI_Datatype sdtype,
                                               void* rbuf, int rcount, 
                                               MPI_Datatype rdtype, 
@@ -539,15 +539,15 @@ int smpi_coll_tuned_allgather_mpich(void *sbuf, int scount,
        - for everything else use ring.
     */
     if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
-        return smpi_coll_tuned_allgather_rdb(sbuf, scount, sdtype, 
+        return Coll_allgather_rdb::allgather(sbuf, scount, sdtype, 
                                                                  rbuf, rcount, rdtype, 
                                                                  comm);
     } else if (total_dsize <= 81920) { 
-        return smpi_coll_tuned_allgather_bruck(sbuf, scount, sdtype, 
+        return Coll_allgather_bruck::allgather(sbuf, scount, sdtype, 
                                                      rbuf, rcount, rdtype,
                                                      comm);
     } 
-    return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype, 
+    return Coll_allgather_ring::allgather(sbuf, scount, sdtype, 
                                                 rbuf, rcount, rdtype,
                                                 comm);
 }
@@ -589,7 +589,7 @@ int smpi_coll_tuned_allgather_mpich(void *sbuf, int scount,
 
    End Algorithm: MPI_Allgatherv
 */
-int smpi_coll_tuned_allgatherv_mpich(void *sbuf, int scount, 
+int Coll_allgatherv_mpich::allgatherv(void *sbuf, int scount, 
                                                MPI_Datatype sdtype,
                                                void* rbuf, int *rcounts, 
                                                int *rdispls,
@@ -612,15 +612,15 @@ int smpi_coll_tuned_allgatherv_mpich(void *sbuf, int scount,
     for (pow2_size  = 1; pow2_size < communicator_size; pow2_size <<=1); 
 
     if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
-        return smpi_coll_tuned_allgatherv_mpich_rdb(sbuf, scount, sdtype, 
+        return Coll_allgatherv_mpich_rdb::allgatherv(sbuf, scount, sdtype, 
                                                                  rbuf, rcounts, rdispls, rdtype, 
                                                                  comm);
     } else if (total_dsize <= 81920) { 
-        return smpi_coll_tuned_allgatherv_ompi_bruck(sbuf, scount, sdtype, 
+        return Coll_allgatherv_ompi_bruck::allgatherv(sbuf, scount, sdtype, 
                                                      rbuf, rcounts, rdispls, rdtype,
                                                      comm);
     } 
-    return smpi_coll_tuned_allgatherv_mpich_ring(sbuf, scount, sdtype,
+    return Coll_allgatherv_mpich_ring::allgatherv(sbuf, scount, sdtype,
                                                 rbuf, rcounts, rdispls, rdtype,
                                                 comm);
 }
@@ -646,8 +646,10 @@ int smpi_coll_tuned_allgatherv_mpich(void *sbuf, int scount,
 
    End Algorithm: MPI_Gather
 */
+namespace simgrid{
+namespace smpi{
 
-int smpi_coll_tuned_gather_mpich(void *sbuf, int scount, 
+int Coll_gather_mpich::gather(void *sbuf, int scount, 
                                            MPI_Datatype sdtype,
                                            void* rbuf, int rcount, 
                                            MPI_Datatype rdtype, 
@@ -655,11 +657,14 @@ int smpi_coll_tuned_gather_mpich(void *sbuf, int scount,
                                            MPI_Comm  comm
                                            )
 {
-        return smpi_coll_tuned_gather_ompi_binomial (sbuf, scount, sdtype, 
+        return Coll_gather_ompi_binomial::gather (sbuf, scount, sdtype, 
                                                       rbuf, rcount, rdtype, 
                                                       root, comm);
 }
 
+}
+}
+
 /* This is the default implementation of scatter. The algorithm is:
    
    Algorithm: MPI_Scatter
@@ -682,7 +687,7 @@ int smpi_coll_tuned_gather_mpich(void *sbuf, int scount,
 */
 
 
-int smpi_coll_tuned_scatter_mpich(void *sbuf, int scount, 
+int Coll_scatter_mpich::scatter(void *sbuf, int scount, 
                                             MPI_Datatype sdtype,
                                             void* rbuf, int rcount, 
                                             MPI_Datatype rdtype, 
@@ -694,7 +699,7 @@ int smpi_coll_tuned_scatter_mpich(void *sbuf, int scount,
       scount=rcount;
       sdtype=rdtype;
   }
-  int ret= smpi_coll_tuned_scatter_ompi_binomial (sbuf, scount, sdtype,
+  int ret= Coll_scatter_ompi_binomial::scatter (sbuf, scount, sdtype,
                                                        rbuf, rcount, rdtype, 
                                                        root, comm);
   if(comm->rank()!=root){
diff --git a/src/smpi/colls/smpi_mvapich2_selector.cpp b/src/smpi/colls/smpi_mvapich2_selector.cpp
index 1d6f4c430e..bda8db4287 100644
--- a/src/smpi/colls/smpi_mvapich2_selector.cpp
+++ b/src/smpi/colls/smpi_mvapich2_selector.cpp
@@ -11,8 +11,11 @@
 #include "smpi_mvapich2_selector_stampede.h"
 
 
+namespace simgrid{
+namespace smpi{
 
-int smpi_coll_tuned_alltoall_mvapich2( void *sendbuf, int sendcount, 
+
+int Coll_alltoall_mvapich2::alltoall( void *sendbuf, int sendcount, 
     MPI_Datatype sendtype,
     void* recvbuf, int recvcount,
     MPI_Datatype recvtype,
@@ -82,7 +85,7 @@ int smpi_coll_tuned_alltoall_mvapich2( void *sendbuf, int sendcount,
   return (mpi_errno);
 }
 
-int smpi_coll_tuned_allgather_mvapich2(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+int Coll_allgather_mvapich2::allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
     void *recvbuf, int recvcount, MPI_Datatype recvtype,
     MPI_Comm comm)
 {
@@ -149,9 +152,9 @@ int smpi_coll_tuned_allgather_mvapich2(void *sendbuf, int sendcount, MPI_Datatyp
   }
 
   /* Set inter-leader pt */
-  MV2_Allgather_function =
+  MV2_Allgatherction =
       mv2_allgather_thresholds_table[conf_index][range].inter_leader[range_threshold].
-      MV2_pt_Allgather_function;
+      MV2_pt_Allgatherction;
 
   is_two_level =  mv2_allgather_thresholds_table[conf_index][range].two_level[range_threshold];
 
@@ -163,7 +166,7 @@ int smpi_coll_tuned_allgather_mvapich2(void *sendbuf, int sendcount, MPI_Datatyp
                             recvbuf, recvcount, recvtype,
                             comm);
       }else{
-      mpi_errno = smpi_coll_tuned_allgather_mpich(sendbuf, sendcount, sendtype,
+      mpi_errno = Coll_allgather_mpich::allgather(sendbuf, sendcount, sendtype,
                             recvbuf, recvcount, recvtype,
                             comm);
       }
@@ -172,10 +175,10 @@ int smpi_coll_tuned_allgather_mvapich2(void *sendbuf, int sendcount, MPI_Datatyp
           recvbuf, recvcount, recvtype,
           comm);
     }
-  } else if(MV2_Allgather_function == &MPIR_Allgather_Bruck_MV2
-      || MV2_Allgather_function == &MPIR_Allgather_RD_MV2
-      || MV2_Allgather_function == &MPIR_Allgather_Ring_MV2) {
-      mpi_errno = MV2_Allgather_function(sendbuf, sendcount, sendtype,
+  } else if(MV2_Allgatherction == &MPIR_Allgather_Bruck_MV2
+      || MV2_Allgatherction == &MPIR_Allgather_RD_MV2
+      || MV2_Allgatherction == &MPIR_Allgather_Ring_MV2) {
+      mpi_errno = MV2_Allgatherction(sendbuf, sendcount, sendtype,
           recvbuf, recvcount, recvtype,
           comm);
   }else{
@@ -185,8 +188,7 @@ int smpi_coll_tuned_allgather_mvapich2(void *sendbuf, int sendcount, MPI_Datatyp
   return mpi_errno;
 }
 
-
-int smpi_coll_tuned_gather_mvapich2(void *sendbuf,
+int Coll_gather_mvapich2::gather(void *sendbuf,
     int sendcnt,
     MPI_Datatype sendtype,
     void *recvbuf,
@@ -255,7 +257,7 @@ int smpi_coll_tuned_gather_mvapich2(void *sendbuf,
 
     } else {
   // Indeed, direct (non SMP-aware)gather is MPICH one
-  mpi_errno = smpi_coll_tuned_gather_mpich(sendbuf, sendcnt, sendtype,
+  mpi_errno = Coll_gather_mpich::gather(sendbuf, sendcnt, sendtype,
       recvbuf, recvcnt, recvtype,
       root, comm);
   }
@@ -263,8 +265,7 @@ int smpi_coll_tuned_gather_mvapich2(void *sendbuf,
   return mpi_errno;
 }
 
-
-int smpi_coll_tuned_allgatherv_mvapich2(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+int Coll_allgatherv_mvapich2::allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
     void *recvbuf, int *recvcounts, int *displs,
     MPI_Datatype recvtype, MPI_Comm  comm )
 {
@@ -330,7 +331,7 @@ int smpi_coll_tuned_allgatherv_mvapich2(void *sendbuf, int sendcount, MPI_Dataty
 
 
 
-int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf,
+int Coll_allreduce_mvapich2::allreduce(void *sendbuf,
     void *recvbuf,
     int count,
     MPI_Datatype datatype,
@@ -378,10 +379,10 @@ int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf,
     if(mv2_allreduce_thresholds_table[range].mcast_enabled != 1){
         while ((range_threshold < (mv2_allreduce_thresholds_table[range].size_inter_table - 1))
             && ((mv2_allreduce_thresholds_table[range].
-                inter_leader[range_threshold].MV2_pt_Allreduce_function
+                inter_leader[range_threshold].MV2_pt_Allreducection
                 == &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2) ||
                 (mv2_allreduce_thresholds_table[range].
-                    inter_leader[range_threshold].MV2_pt_Allreduce_function
+                    inter_leader[range_threshold].MV2_pt_Allreducection
                     == &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2)
             )) {
             range_threshold++;
@@ -406,20 +407,20 @@ int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf,
         range_threshold_intra++;
     }
 
-    MV2_Allreduce_function = mv2_allreduce_thresholds_table[range].inter_leader[range_threshold]
-                                                                                .MV2_pt_Allreduce_function;
+    MV2_Allreducection = mv2_allreduce_thresholds_table[range].inter_leader[range_threshold]
+                                                                                .MV2_pt_Allreducection;
 
     MV2_Allreduce_intra_function = mv2_allreduce_thresholds_table[range].intra_node[range_threshold_intra]
-                                                                                    .MV2_pt_Allreduce_function;
+                                                                                    .MV2_pt_Allreducection;
 
     /* check if mcast is ready, otherwise replace mcast with other algorithm */
-    if((MV2_Allreduce_function == &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2)||
-        (MV2_Allreduce_function == &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2)){
+    if((MV2_Allreducection == &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2)||
+        (MV2_Allreducection == &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2)){
         {
-          MV2_Allreduce_function = &MPIR_Allreduce_pt2pt_rd_MV2;
+          MV2_Allreducection = &MPIR_Allreduce_pt2pt_rd_MV2;
         }
         if(is_two_level != 1) {
-            MV2_Allreduce_function = &MPIR_Allreduce_pt2pt_rd_MV2;
+            MV2_Allreducection = &MPIR_Allreduce_pt2pt_rd_MV2;
         }
     }
 
@@ -436,7 +437,7 @@ int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf,
             datatype, op, comm);
         }
     } else {
-        mpi_errno = MV2_Allreduce_function(sendbuf, recvbuf, count,
+        mpi_errno = MV2_Allreducection(sendbuf, recvbuf, count,
             datatype, op, comm);
     }
   }
@@ -449,7 +450,7 @@ int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf,
 }
 
 
-int smpi_coll_tuned_alltoallv_mvapich2(void *sbuf, int *scounts, int *sdisps,
+int Coll_alltoallv_mvapich2::alltoallv(void *sbuf, int *scounts, int *sdisps,
     MPI_Datatype sdtype,
     void *rbuf, int *rcounts, int *rdisps,
     MPI_Datatype rdtype,
@@ -458,25 +459,25 @@ int smpi_coll_tuned_alltoallv_mvapich2(void *sbuf, int *scounts, int *sdisps,
 {
 
   if (sbuf == MPI_IN_PLACE) {
-      return smpi_coll_tuned_alltoallv_ompi_basic_linear(sbuf, scounts, sdisps, sdtype,
+      return Coll_alltoallv_ompi_basic_linear::alltoallv(sbuf, scounts, sdisps, sdtype,
           rbuf, rcounts, rdisps,rdtype,
           comm);
   } else     /* For starters, just keep the original algorithm. */
-  return smpi_coll_tuned_alltoallv_ring(sbuf, scounts, sdisps, sdtype,
+  return Coll_alltoallv_ring::alltoallv(sbuf, scounts, sdisps, sdtype,
       rbuf, rcounts, rdisps,rdtype,
       comm);
 }
 
 
-int smpi_coll_tuned_barrier_mvapich2(MPI_Comm  comm)
+int Coll_barrier_mvapich2::barrier(MPI_Comm  comm)
 {   
-  return smpi_coll_tuned_barrier_mvapich2_pair(comm);
+  return Coll_barrier_mvapich2_pair::barrier(comm);
 }
 
 
 
 
-int smpi_coll_tuned_bcast_mvapich2(void *buffer,
+int Coll_bcast_mvapich2::bcast(void *buffer,
     int count,
     MPI_Datatype datatype,
     int root, MPI_Comm comm)
@@ -678,7 +679,7 @@ int smpi_coll_tuned_bcast_mvapich2(void *buffer,
 
 
 
-int smpi_coll_tuned_reduce_mvapich2( void *sendbuf,
+int Coll_reduce_mvapich2::reduce( void *sendbuf,
     void *recvbuf,
     int count,
     MPI_Datatype datatype,
@@ -796,7 +797,7 @@ int smpi_coll_tuned_reduce_mvapich2( void *sendbuf,
 }
 
 
-int smpi_coll_tuned_reduce_scatter_mvapich2(void *sendbuf, void *recvbuf, int *recvcnts,
+int Coll_reduce_scatter_mvapich2::reduce_scatter(void *sendbuf, void *recvbuf, int *recvcnts,
     MPI_Datatype datatype, MPI_Op op,
     MPI_Comm comm)
 {
@@ -860,7 +861,7 @@ int smpi_coll_tuned_reduce_scatter_mvapich2(void *sendbuf, void *recvbuf, int *r
               recvcnts, datatype,
               op, comm);
       }
-      mpi_errno =  smpi_coll_tuned_reduce_scatter_mpich_rdb(sendbuf, recvbuf,
+      mpi_errno =  Coll_reduce_scatter_mpich_rdb::reduce_scatter(sendbuf, recvbuf,
           recvcnts, datatype,
           op, comm);
   }
@@ -871,7 +872,7 @@ int smpi_coll_tuned_reduce_scatter_mvapich2(void *sendbuf, void *recvbuf, int *r
 
 
 
-int smpi_coll_tuned_scatter_mvapich2(void *sendbuf,
+int Coll_scatter_mvapich2::scatter(void *sendbuf,
     int sendcnt,
     MPI_Datatype sendtype,
     void *recvbuf,
@@ -1004,6 +1005,8 @@ int smpi_coll_tuned_scatter_mvapich2(void *sendbuf,
   return (mpi_errno);
 }
 
+}
+}
 void smpi_coll_cleanup_mvapich2(void){
 int i=0;
 if(mv2_alltoall_thresholds_table)
diff --git a/src/smpi/colls/smpi_mvapich2_selector_stampede.h b/src/smpi/colls/smpi_mvapich2_selector_stampede.h
index e0f2156377..c6a28b8029 100644
--- a/src/smpi/colls/smpi_mvapich2_selector_stampede.h
+++ b/src/smpi/colls/smpi_mvapich2_selector_stampede.h
@@ -10,6 +10,7 @@
 
 #define MV2_MAX_NB_THRESHOLDS  32
 
+using namespace simgrid::smpi;
 
 typedef struct {
   int min;
@@ -36,11 +37,11 @@ int *mv2_size_alltoall_tuning_table = NULL;
 mv2_alltoall_tuning_table **mv2_alltoall_thresholds_table = NULL;
 
 
-#define MPIR_Alltoall_bruck_MV2 smpi_coll_tuned_alltoall_bruck
-#define MPIR_Alltoall_RD_MV2 smpi_coll_tuned_alltoall_rdb
-#define MPIR_Alltoall_Scatter_dest_MV2 smpi_coll_tuned_alltoall_mvapich2_scatter_dest
-#define MPIR_Alltoall_pairwise_MV2 smpi_coll_tuned_alltoall_pair
-#define MPIR_Alltoall_inplace_MV2 smpi_coll_tuned_alltoall_ring 
+#define MPIR_Alltoall_bruck_MV2 Coll_alltoall_bruck::alltoall
+#define MPIR_Alltoall_RD_MV2 Coll_alltoall_rdb::alltoall
+#define MPIR_Alltoall_Scatter_dest_MV2 Coll_alltoall_mvapich2_scatter_dest::alltoall
+#define MPIR_Alltoall_pairwise_MV2 Coll_alltoall_pair::alltoall
+#define MPIR_Alltoall_inplace_MV2 Coll_alltoall_ring::alltoall 
 
 
 static void init_mv2_alltoall_tables_stampede(){
@@ -294,7 +295,7 @@ static void init_mv2_alltoall_tables_stampede(){
 typedef struct {
   int min;
   int max;
-  int (*MV2_pt_Allgather_function)(void *sendbuf,
+  int (*MV2_pt_Allgatherction)(void *sendbuf,
       int sendcount,
       MPI_Datatype sendtype,
       void *recvbuf,
@@ -309,7 +310,7 @@ typedef struct {
   mv2_allgather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
 } mv2_allgather_tuning_table;
 
-int (*MV2_Allgather_function)(void *sendbuf,
+int (*MV2_Allgatherction)(void *sendbuf,
     int sendcount,
     MPI_Datatype sendtype,
     void *recvbuf,
@@ -331,10 +332,10 @@ static int MPIR_Allgather_RD_Allgather_Comm_MV2( void *sendbuf,
     return 0;
 }
 
-#define MPIR_Allgather_Bruck_MV2 smpi_coll_tuned_allgather_bruck
-#define MPIR_Allgather_RD_MV2 smpi_coll_tuned_allgather_rdb
-#define MPIR_Allgather_Ring_MV2 smpi_coll_tuned_allgather_ring
-#define MPIR_2lvl_Allgather_MV2 smpi_coll_tuned_allgather_mvapich2_smp
+#define MPIR_Allgather_Bruck_MV2 Coll_allgather_bruck::allgather
+#define MPIR_Allgather_RD_MV2 Coll_allgather_rdb::allgather
+#define MPIR_Allgather_Ring_MV2 Coll_allgather_ring::allgather
+#define MPIR_2lvl_Allgather_MV2 Coll_allgather_mvapich2_smp::allgather
 
 static void init_mv2_allgather_tables_stampede(){
   int i;
@@ -590,9 +591,10 @@ MV2_Gather_function_ptr MV2_Gather_inter_leader_function = NULL;
 MV2_Gather_function_ptr MV2_Gather_intra_node_function = NULL;
 
 
-#define MPIR_Gather_MV2_Direct smpi_coll_tuned_gather_ompi_basic_linear
-#define MPIR_Gather_MV2_two_level_Direct smpi_coll_tuned_gather_mvapich2_two_level
-#define MPIR_Gather_intra smpi_coll_tuned_gather_mpich
+
+#define MPIR_Gather_MV2_Direct Coll_gather_ompi_basic_linear::gather
+#define MPIR_Gather_MV2_two_level_Direct Coll_gather_mvapich2_two_level::gather
+#define MPIR_Gather_intra Coll_gather_mpich::gather
 
 
 static void init_mv2_gather_tables_stampede(){
@@ -678,9 +680,9 @@ int (*MV2_Allgatherv_function)(void *sendbuf,
 int mv2_size_allgatherv_tuning_table = 0;
 mv2_allgatherv_tuning_table *mv2_allgatherv_thresholds_table = NULL;
 
-#define MPIR_Allgatherv_Rec_Doubling_MV2 smpi_coll_tuned_allgatherv_mpich_rdb
-#define MPIR_Allgatherv_Bruck_MV2 smpi_coll_tuned_allgatherv_ompi_bruck
-#define MPIR_Allgatherv_Ring_MV2 smpi_coll_tuned_allgatherv_mpich_ring
+#define MPIR_Allgatherv_Rec_Doubling_MV2 Coll_allgatherv_mpich_rdb::allgatherv
+#define MPIR_Allgatherv_Bruck_MV2 Coll_allgatherv_ompi_bruck::allgatherv
+#define MPIR_Allgatherv_Ring_MV2 Coll_allgatherv_mpich_ring::allgatherv
 
 
 static void init_mv2_allgatherv_tables_stampede(){
@@ -750,7 +752,7 @@ static void init_mv2_allgatherv_tables_stampede(){
 typedef struct {
   int min;
   int max;
-  int (*MV2_pt_Allreduce_function)(void *sendbuf,
+  int (*MV2_pt_Allreducection)(void *sendbuf,
       void *recvbuf,
       int count,
       MPI_Datatype datatype,
@@ -768,7 +770,7 @@ typedef struct {
 } mv2_allreduce_tuning_table;
 
 
-int (*MV2_Allreduce_function)(void *sendbuf,
+int (*MV2_Allreducection)(void *sendbuf,
     void *recvbuf,
     int count,
     MPI_Datatype datatype,
@@ -812,7 +814,7 @@ static  int MPIR_Allreduce_reduce_p2p_MV2( void *sendbuf,
     MPI_Datatype datatype,
     MPI_Op op, MPI_Comm  comm)
 {
-  mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm);
+  Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm);
   return MPI_SUCCESS;
 }
 
@@ -822,13 +824,13 @@ static  int MPIR_Allreduce_reduce_shmem_MV2( void *sendbuf,
     MPI_Datatype datatype,
     MPI_Op op, MPI_Comm  comm)
 {
-  mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm);
+  Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm);
   return MPI_SUCCESS;
 }
 
-#define MPIR_Allreduce_pt2pt_rd_MV2 smpi_coll_tuned_allreduce_rdb
-#define MPIR_Allreduce_pt2pt_rs_MV2 smpi_coll_tuned_allreduce_mvapich2_rs
-#define MPIR_Allreduce_two_level_MV2 smpi_coll_tuned_allreduce_mvapich2_two_level
+#define MPIR_Allreduce_pt2pt_rd_MV2 Coll_allreduce_rdb::allreduce
+#define MPIR_Allreduce_pt2pt_rs_MV2 Coll_allreduce_mvapich2_rs::allreduce
+#define MPIR_Allreduce_two_level_MV2 Coll_allreduce_mvapich2_two_level::allreduce
 
 
 static void init_mv2_allreduce_tables_stampede(){
@@ -1018,17 +1020,17 @@ int mv2_intra_node_knomial_factor = 4;
 
 #define INTRA_NODE_ROOT 0
 
-#define MPIR_Pipelined_Bcast_Zcpy_MV2 smpi_coll_tuned_bcast_mpich
-#define MPIR_Pipelined_Bcast_MV2 smpi_coll_tuned_bcast_mpich
-#define MPIR_Bcast_binomial_MV2 smpi_coll_tuned_bcast_binomial_tree
-#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather
-#define MPIR_Bcast_scatter_doubling_allgather_MV2 smpi_coll_tuned_bcast_scatter_rdb_allgather
-#define MPIR_Bcast_scatter_ring_allgather_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather
-#define MPIR_Shmem_Bcast_MV2 smpi_coll_tuned_bcast_mpich
-#define MPIR_Bcast_tune_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node
-#define MPIR_Bcast_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node
-#define MPIR_Knomial_Bcast_intra_node_MV2 smpi_coll_tuned_bcast_mvapich2_knomial_intra_node
-#define MPIR_Bcast_intra_MV2 smpi_coll_tuned_bcast_mvapich2_intra_node
+#define MPIR_Pipelined_Bcast_Zcpy_MV2 Coll_bcast_mpich::bcast
+#define MPIR_Pipelined_Bcast_MV2 Coll_bcast_mpich::bcast
+#define MPIR_Bcast_binomial_MV2 Coll_bcast_binomial_tree::bcast
+#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 Coll_bcast_scatter_LR_allgather::bcast
+#define MPIR_Bcast_scatter_doubling_allgather_MV2 Coll_bcast_scatter_rdb_allgather::bcast
+#define MPIR_Bcast_scatter_ring_allgather_MV2 Coll_bcast_scatter_LR_allgather::bcast
+#define MPIR_Shmem_Bcast_MV2 Coll_bcast_mpich::bcast
+#define MPIR_Bcast_tune_inter_node_helper_MV2 Coll_bcast_mvapich2_inter_node::bcast
+#define MPIR_Bcast_inter_node_helper_MV2 Coll_bcast_mvapich2_inter_node::bcast
+#define MPIR_Knomial_Bcast_intra_node_MV2 Coll_bcast_mvapich2_knomial_intra_node::bcast
+#define MPIR_Bcast_intra_MV2 Coll_bcast_mvapich2_intra_node::bcast
 
 static void init_mv2_bcast_tables_stampede(){
  //Stampede,
@@ -1291,12 +1293,12 @@ int (*MV2_Reduce_intra_function)( void *sendbuf,
     MPI_Comm  comm_ptr)=NULL;
 
 
-#define MPIR_Reduce_inter_knomial_wrapper_MV2 smpi_coll_tuned_reduce_mvapich2_knomial
-#define MPIR_Reduce_intra_knomial_wrapper_MV2 smpi_coll_tuned_reduce_mvapich2_knomial
-#define MPIR_Reduce_binomial_MV2 smpi_coll_tuned_reduce_binomial
-#define MPIR_Reduce_redscat_gather_MV2 smpi_coll_tuned_reduce_scatter_gather
-#define MPIR_Reduce_shmem_MV2 smpi_coll_tuned_reduce_ompi_basic_linear
-#define MPIR_Reduce_two_level_helper_MV2 smpi_coll_tuned_reduce_mvapich2_two_level
+#define MPIR_Reduce_inter_knomial_wrapper_MV2 Coll_reduce_mvapich2_knomial::reduce
+#define MPIR_Reduce_intra_knomial_wrapper_MV2 Coll_reduce_mvapich2_knomial::reduce
+#define MPIR_Reduce_binomial_MV2 Coll_reduce_binomial::reduce
+#define MPIR_Reduce_redscat_gather_MV2 Coll_reduce_scatter_gather::reduce
+#define MPIR_Reduce_shmem_MV2 Coll_reduce_ompi_basic_linear::reduce
+#define MPIR_Reduce_two_level_helper_MV2 Coll_reduce_mvapich2_two_level::reduce
 
 
 static void init_mv2_reduce_tables_stampede(){
@@ -1533,12 +1535,12 @@ static  int MPIR_Reduce_Scatter_Basic_MV2(void *sendbuf,
     MPI_Op op,
     MPI_Comm comm)
 {
-  smpi_mpi_reduce_scatter(sendbuf,recvbuf,recvcnts,datatype,op,comm);
+  Coll_reduce_scatter_default::reduce_scatter(sendbuf,recvbuf,recvcnts,datatype,op,comm);
   return MPI_SUCCESS;
 }
-#define MPIR_Reduce_scatter_non_comm_MV2 smpi_coll_tuned_reduce_scatter_mpich_noncomm
-#define MPIR_Reduce_scatter_Rec_Halving_MV2 smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving
-#define MPIR_Reduce_scatter_Pair_Wise_MV2 smpi_coll_tuned_reduce_scatter_mpich_pair
+#define MPIR_Reduce_scatter_non_comm_MV2 Coll_reduce_scatter_mpich_noncomm::reduce_scatter
+#define MPIR_Reduce_scatter_Rec_Halving_MV2 Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter
+#define MPIR_Reduce_scatter_Pair_Wise_MV2 Coll_reduce_scatter_mpich_pair::reduce_scatter
 
 
 
@@ -1661,10 +1663,10 @@ int MPIR_Scatter_mcst_wrap_MV2(void *sendbuf,
   return 0;
 }
 
-#define MPIR_Scatter_MV2_Binomial smpi_coll_tuned_scatter_ompi_binomial
-#define MPIR_Scatter_MV2_Direct smpi_coll_tuned_scatter_ompi_basic_linear
-#define MPIR_Scatter_MV2_two_level_Binomial smpi_coll_tuned_scatter_mvapich2_two_level_binomial
-#define MPIR_Scatter_MV2_two_level_Direct smpi_coll_tuned_scatter_mvapich2_two_level_direct
+#define MPIR_Scatter_MV2_Binomial Coll_scatter_ompi_binomial::scatter
+#define MPIR_Scatter_MV2_Direct Coll_scatter_ompi_basic_linear::scatter
+#define MPIR_Scatter_MV2_two_level_Binomial Coll_scatter_mvapich2_two_level_binomial::scatter
+#define MPIR_Scatter_MV2_two_level_Direct Coll_scatter_mvapich2_two_level_direct::scatter
 
 
 
diff --git a/src/smpi/colls/smpi_openmpi_selector.cpp b/src/smpi/colls/smpi_openmpi_selector.cpp
index 67c7a39263..d6d85a2c27 100644
--- a/src/smpi/colls/smpi_openmpi_selector.cpp
+++ b/src/smpi/colls/smpi_openmpi_selector.cpp
@@ -8,8 +8,10 @@
 
 #include "colls_private.h"
 
+namespace simgrid{
+namespace smpi{
 
-int smpi_coll_tuned_allreduce_ompi(void *sbuf, void *rbuf, int count,
+int Coll_allreduce_ompi::allreduce(void *sbuf, void *rbuf, int count,
                         MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
 {
     size_t dsize, block_dsize;
@@ -27,7 +29,7 @@ int smpi_coll_tuned_allreduce_ompi(void *sbuf, void *rbuf, int count,
     block_dsize = dsize * count;
 
     if (block_dsize < intermediate_message) {
-        return (smpi_coll_tuned_allreduce_rdb (sbuf, rbuf, 
+        return (Coll_allreduce_rdb::allreduce (sbuf, rbuf, 
                                                                    count, dtype,
                                                                    op, comm));
     } 
@@ -37,23 +39,23 @@ int smpi_coll_tuned_allreduce_ompi(void *sbuf, void *rbuf, int count,
         if ((comm_size * segment_size >= block_dsize)) {
             //FIXME: ok, these are not the right algorithms, try to find closer ones
             // lr is a good match for allreduce_ring (difference is mainly the use of sendrecv)
-            return smpi_coll_tuned_allreduce_lr(sbuf, rbuf, count, dtype,
+            return Coll_allreduce_lr::allreduce(sbuf, rbuf, count, dtype,
                                               op, comm);
         } else {
-           return (smpi_coll_tuned_allreduce_ompi_ring_segmented (sbuf, rbuf,
+           return (Coll_allreduce_ompi_ring_segmented::allreduce (sbuf, rbuf,
                                                                     count, dtype, 
                                                                     op, comm 
                                                                     /*segment_size*/));
         }
     }
 
-    return (smpi_coll_tuned_allreduce_redbcast(sbuf, rbuf, count, 
+    return (Coll_allreduce_redbcast::allreduce(sbuf, rbuf, count, 
                                                             dtype, op, comm));
 }
 
 
 
-int smpi_coll_tuned_alltoall_ompi( void *sbuf, int scount, 
+int Coll_alltoall_ompi::alltoall( void *sbuf, int scount, 
                                              MPI_Datatype sdtype,
                                              void* rbuf, int rcount, 
                                              MPI_Datatype rdtype, 
@@ -71,22 +73,22 @@ int smpi_coll_tuned_alltoall_ompi( void *sbuf, int scount,
     block_dsize = dsize * scount;
 
     if ((block_dsize < 200) && (communicator_size > 12)) {
-        return smpi_coll_tuned_alltoall_bruck(sbuf, scount, sdtype, 
+        return Coll_alltoall_bruck::alltoall(sbuf, scount, sdtype, 
                                                     rbuf, rcount, rdtype,
                                                     comm);
 
     } else if (block_dsize < 3000) {
-        return smpi_coll_tuned_alltoall_basic_linear(sbuf, scount, sdtype, 
+        return Coll_alltoall_basic_linear::alltoall(sbuf, scount, sdtype, 
                                                            rbuf, rcount, rdtype, 
                                                            comm);
     }
 
-    return smpi_coll_tuned_alltoall_ring (sbuf, scount, sdtype, 
+    return Coll_alltoall_ring::alltoall (sbuf, scount, sdtype, 
                                                     rbuf, rcount, rdtype,
                                                     comm);
 }
 
-int smpi_coll_tuned_alltoallv_ompi(void *sbuf, int *scounts, int *sdisps,
+int Coll_alltoallv_ompi::alltoallv(void *sbuf, int *scounts, int *sdisps,
                                               MPI_Datatype sdtype,
                                               void *rbuf, int *rcounts, int *rdisps,
                                               MPI_Datatype rdtype,
@@ -94,17 +96,17 @@ int smpi_coll_tuned_alltoallv_ompi(void *sbuf, int *scounts, int *sdisps,
                                               )
 {
     /* For starters, just keep the original algorithm. */
-    return smpi_coll_tuned_alltoallv_ompi_basic_linear(sbuf, scounts, sdisps, sdtype, 
+    return Coll_alltoallv_ompi_basic_linear::alltoallv(sbuf, scounts, sdisps, sdtype, 
                                                         rbuf, rcounts, rdisps,rdtype,
                                                         comm);
 }
 
 
-int smpi_coll_tuned_barrier_ompi(MPI_Comm  comm)
+int Coll_barrier_ompi::barrier(MPI_Comm  comm)
 {    int communicator_size = comm->size();
 
     if( 2 == communicator_size )
-        return smpi_coll_tuned_barrier_ompi_two_procs(comm);
+        return Coll_barrier_ompi_two_procs::barrier(comm);
 /*     * Basic optimisation. If we have a power of 2 number of nodes*/
 /*     * the use the recursive doubling algorithm, otherwise*/
 /*     * bruck is the one we want.*/
@@ -113,15 +115,15 @@ int smpi_coll_tuned_barrier_ompi(MPI_Comm  comm)
         for( ; communicator_size > 0; communicator_size >>= 1 ) {
             if( communicator_size & 0x1 ) {
                 if( has_one )
-                    return smpi_coll_tuned_barrier_ompi_bruck(comm);
+                    return Coll_barrier_ompi_bruck::barrier(comm);
                 has_one = 1;
             }
         }
     }
-    return smpi_coll_tuned_barrier_ompi_recursivedoubling(comm);
+    return Coll_barrier_ompi_recursivedoubling::barrier(comm);
 }
 
-int smpi_coll_tuned_bcast_ompi(void *buff, int count,
+int Coll_bcast_ompi::bcast(void *buff, int count,
                                           MPI_Datatype datatype, int root,
                                           MPI_Comm  comm
                                           )
@@ -151,12 +153,12 @@ int smpi_coll_tuned_bcast_ompi(void *buff, int count,
        single-element broadcasts */
     if ((message_size < small_message_size) || (count <= 1)) {
         /* Binomial without segmentation */
-        return  smpi_coll_tuned_bcast_binomial_tree (buff, count, datatype, 
+        return  Coll_bcast_binomial_tree::bcast (buff, count, datatype, 
                                                       root, comm);
 
     } else if (message_size < intermediate_message_size) {
         // SplittedBinary with 1KB segments
-        return smpi_coll_tuned_bcast_ompi_split_bintree(buff, count, datatype, 
+        return Coll_bcast_ompi_split_bintree::bcast(buff, count, datatype, 
                                                          root, comm);
 
     }
@@ -164,65 +166,65 @@ int smpi_coll_tuned_bcast_ompi(void *buff, int count,
     else if (communicator_size < (a_p128 * message_size + b_p128)) {
         //Pipeline with 128KB segments 
         //segsize = 1024  << 7;
-        return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype, 
+        return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype, 
                                                      root, comm);
                                                      
 
     } else if (communicator_size < 13) {
         // Split Binary with 8KB segments 
-        return smpi_coll_tuned_bcast_ompi_split_bintree(buff, count, datatype, 
+        return Coll_bcast_ompi_split_bintree::bcast(buff, count, datatype, 
                                                          root, comm);
        
     } else if (communicator_size < (a_p64 * message_size + b_p64)) {
         // Pipeline with 64KB segments 
         //segsize = 1024 << 6;
-        return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype, 
+        return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype, 
                                                      root, comm);
                                                      
 
     } else if (communicator_size < (a_p16 * message_size + b_p16)) {
         //Pipeline with 16KB segments 
         //segsize = 1024 << 4;
-        return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype, 
+        return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype, 
                                                      root, comm);
                                                      
 
     }
     /* Pipeline with 8KB segments */
     //segsize = 1024 << 3;
-    return smpi_coll_tuned_bcast_flattree_pipeline (buff, count, datatype, 
+    return Coll_bcast_flattree_pipeline::bcast (buff, count, datatype, 
                                                  root, comm
                                                  /*segsize*/);
 #if 0
     /* this is based on gige measurements */
 
     if (communicator_size  < 4) {
-        return smpi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm, module);
+        return Coll_bcast_intra_basic_linear::bcast (buff, count, datatype, root, comm, module);
     }
     if (communicator_size == 4) {
         if (message_size < 524288) segsize = 0;
         else segsize = 16384;
-        return smpi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize);
+        return Coll_bcast_intra_bintree::bcast (buff, count, datatype, root, comm, module, segsize);
     }
     if (communicator_size <= 8 && message_size < 4096) {
-        return smpi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm, module);
+        return Coll_bcast_intra_basic_linear::bcast (buff, count, datatype, root, comm, module);
     }
     if (communicator_size > 8 && message_size >= 32768 && message_size < 524288) {
         segsize = 16384;
-        return  smpi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize);
+        return  Coll_bcast_intra_bintree::bcast (buff, count, datatype, root, comm, module, segsize);
     }
     if (message_size >= 524288) {
         segsize = 16384;
-        return smpi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, module, segsize);
+        return Coll_bcast_intra_pipeline::bcast (buff, count, datatype, root, comm, module, segsize);
     }
     segsize = 0;
     /* once tested can swap this back in */
-    /* return smpi_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, segsize); */
-    return smpi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize);
+    /* return Coll_bcast_intra_bmtree::bcast (buff, count, datatype, root, comm, segsize); */
+    return Coll_bcast_intra_bintree::bcast (buff, count, datatype, root, comm, module, segsize);
 #endif  /* 0 */
 }
 
-int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf,
+int Coll_reduce_ompi::reduce( void *sendbuf, void *recvbuf,
                                             int count, MPI_Datatype  datatype,
                                             MPI_Op   op, int root,
                                             MPI_Comm   comm
@@ -255,35 +257,35 @@ int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf,
      */
     if(  (op!=MPI_OP_NULL) && !op->is_commutative() ) {
         if ((communicator_size < 12) && (message_size < 2048)) {
-            return smpi_coll_tuned_reduce_ompi_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm/*, module*/); 
+            return Coll_reduce_ompi_basic_linear::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module*/); 
         } 
-        return smpi_coll_tuned_reduce_ompi_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+        return Coll_reduce_ompi_in_order_binary::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
                                                              0, max_requests*/); 
     }
 
     if ((communicator_size < 8) && (message_size < 512)){
         /* Linear_0K */
-        return smpi_coll_tuned_reduce_ompi_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm); 
+        return Coll_reduce_ompi_basic_linear::reduce (sendbuf, recvbuf, count, datatype, op, root, comm); 
     } else if (((communicator_size < 8) && (message_size < 20480)) ||
                (message_size < 2048) || (count <= 1)) {
         /* Binomial_0K */
         //segsize = 0;
-        return smpi_coll_tuned_reduce_ompi_binomial(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+        return Coll_reduce_ompi_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
                                                      segsize, max_requests*/);
     } else if (communicator_size > (a1 * message_size + b1)) {
         // Binomial_1K 
         //segsize = 1024;
-        return smpi_coll_tuned_reduce_ompi_binomial(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+        return Coll_reduce_ompi_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
                                                      segsize, max_requests*/);
     } else if (communicator_size > (a2 * message_size + b2)) {
         // Pipeline_1K 
         //segsize = 1024;
-        return smpi_coll_tuned_reduce_ompi_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, 
+        return Coll_reduce_ompi_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, 
                                                       segsize, max_requests*/);
     } else if (communicator_size > (a3 * message_size + b3)) {
         // Binary_32K 
         //segsize = 32*1024;
-        return smpi_coll_tuned_reduce_ompi_binary( sendbuf, recvbuf, count, datatype, op, root,
+        return Coll_reduce_ompi_binary::reduce( sendbuf, recvbuf, count, datatype, op, root,
                                                     comm/*, module, segsize, max_requests*/);
     }
 //    if (communicator_size > (a4 * message_size + b4)) {
@@ -293,7 +295,7 @@ int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf,
         // Pipeline_64K 
 //        segsize = 64*1024;
 //    }
-    return smpi_coll_tuned_reduce_ompi_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, 
+    return Coll_reduce_ompi_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, 
                                                   segsize, max_requests*/);
 
 #if 0
@@ -303,8 +305,8 @@ int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf,
         fanout = communicator_size - 1;
         /* when linear implemented or taken from basic put here, right now using chain as a linear system */
         /* it is implemented and I shouldn't be calling a chain with a fanout bigger than MAXTREEFANOUT from topo.h! */
-        return smpi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module); 
-        /*        return smpi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); */
+        return Coll_reduce_intra_basic_linear::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, module); 
+        /*        return Coll_reduce_intra_chain::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); */
     }
     if (message_size < 524288) {
         if (message_size <= 65536 ) {
@@ -316,16 +318,16 @@ int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf,
         }
         /* later swap this for a binary tree */
         /*         fanout = 2; */
-        return smpi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, module,
+        return Coll_reduce_intra_chain::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, module,
                                                    segsize, fanout, max_requests);
     }
     segsize = 1024;
-    return smpi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, module,
+    return Coll_reduce_intra_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, module,
                                                   segsize, max_requests);
 #endif  /* 0 */
 }
 
-int smpi_coll_tuned_reduce_scatter_ompi( void *sbuf, void *rbuf,
+int Coll_reduce_scatter_ompi::reduce_scatter( void *sbuf, void *rbuf,
                                                     int *rcounts,
                                                     MPI_Datatype dtype,
                                                     MPI_Op  op,
@@ -340,7 +342,7 @@ int smpi_coll_tuned_reduce_scatter_ompi( void *sbuf, void *rbuf,
     const size_t large_message_size = 256 * 1024;
     int zerocounts = 0;
 
-    XBT_DEBUG("smpi_coll_tuned_reduce_scatter_ompi");
+    XBT_DEBUG("Coll_reduce_scatter_ompi::reduce_scatter");
     
     comm_size = comm->size();
     // We need data size for decision function 
@@ -354,7 +356,7 @@ int smpi_coll_tuned_reduce_scatter_ompi( void *sbuf, void *rbuf,
     }
 
     if(  ((op!=MPI_OP_NULL) && !op->is_commutative()) || (zerocounts)) {
-        smpi_mpi_reduce_scatter (sbuf, rbuf, rcounts, 
+        Coll_reduce_scatter_default::reduce_scatter (sbuf, rbuf, rcounts, 
                                                                     dtype, op, 
                                                                     comm); 
         return MPI_SUCCESS;
@@ -369,11 +371,11 @@ int smpi_coll_tuned_reduce_scatter_ompi( void *sbuf, void *rbuf,
         ((total_message_size <= large_message_size) && (pow2 == comm_size)) ||
         (comm_size >= a * total_message_size + b)) {
         return 
-            smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(sbuf, rbuf, rcounts,
+            Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(sbuf, rbuf, rcounts,
                                                                         dtype, op,
                                                                         comm);
     } 
-    return smpi_coll_tuned_reduce_scatter_ompi_ring(sbuf, rbuf, rcounts,
+    return Coll_reduce_scatter_ompi_ring::reduce_scatter(sbuf, rbuf, rcounts,
                                                      dtype, op,
                                                      comm);
 
@@ -381,7 +383,7 @@ int smpi_coll_tuned_reduce_scatter_ompi( void *sbuf, void *rbuf,
 
 }
 
-int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount, 
+int Coll_allgather_ompi::allgather(void *sbuf, int scount, 
                                               MPI_Datatype sdtype,
                                               void* rbuf, int rcount, 
                                               MPI_Datatype rdtype, 
@@ -395,7 +397,7 @@ int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount,
 
     /* Special case for 2 processes */
     if (communicator_size == 2) {
-        return smpi_coll_tuned_allgather_pair (sbuf, scount, sdtype, 
+        return Coll_allgather_pair::allgather (sbuf, scount, sdtype, 
                                                           rbuf, rcount, rdtype, 
                                                           comm/*, module*/);
     }
@@ -416,21 +418,21 @@ int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount,
     */
     if (total_dsize < 50000) {
         if (pow2_size == communicator_size) {
-            return smpi_coll_tuned_allgather_rdb(sbuf, scount, sdtype, 
+            return Coll_allgather_rdb::allgather(sbuf, scount, sdtype, 
                                                                      rbuf, rcount, rdtype,
                                                                      comm);
         } else {
-            return smpi_coll_tuned_allgather_bruck(sbuf, scount, sdtype, 
+            return Coll_allgather_bruck::allgather(sbuf, scount, sdtype, 
                                                          rbuf, rcount, rdtype, 
                                                          comm);
         }
     } else {
         if (communicator_size % 2) {
-            return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype, 
+            return Coll_allgather_ring::allgather(sbuf, scount, sdtype, 
                                                         rbuf, rcount, rdtype, 
                                                         comm);
         } else {
-            return  smpi_coll_tuned_allgather_ompi_neighborexchange(sbuf, scount, sdtype,
+            return  Coll_allgather_ompi_neighborexchange::allgather(sbuf, scount, sdtype,
                                                                      rbuf, rcount, rdtype,
                                                                      comm);
         }
@@ -447,21 +449,21 @@ int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount,
        - for everything else use ring.
     */
     if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
-        return smpi_coll_tuned_allgather_rdb(sbuf, scount, sdtype, 
+        return Coll_allgather_rdb::allgather(sbuf, scount, sdtype, 
                                                                  rbuf, rcount, rdtype, 
                                                                  comm);
     } else if (total_dsize <= 81920) { 
-        return smpi_coll_tuned_allgather_bruck(sbuf, scount, sdtype, 
+        return Coll_allgather_bruck::allgather(sbuf, scount, sdtype, 
                                                      rbuf, rcount, rdtype,
                                                      comm);
     } 
-    return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype, 
+    return Coll_allgather_ring::allgather(sbuf, scount, sdtype, 
                                                 rbuf, rcount, rdtype,
                                                 comm);
 #endif  /* defined(USE_MPICH2_DECISION) */
 }
 
-int smpi_coll_tuned_allgatherv_ompi(void *sbuf, int scount, 
+int Coll_allgatherv_ompi::allgatherv(void *sbuf, int scount, 
                                                MPI_Datatype sdtype,
                                                void* rbuf, int *rcounts, 
                                                int *rdispls,
@@ -477,7 +479,7 @@ int smpi_coll_tuned_allgatherv_ompi(void *sbuf, int scount,
     
     /* Special case for 2 processes */
     if (communicator_size == 2) {
-        return smpi_coll_tuned_allgatherv_pair(sbuf, scount, sdtype,
+        return Coll_allgatherv_pair::allgatherv(sbuf, scount, sdtype,
                                                            rbuf, rcounts, rdispls, rdtype, 
                                                            comm);
     }
@@ -491,27 +493,27 @@ int smpi_coll_tuned_allgatherv_ompi(void *sbuf, int scount,
     
     /* Decision based on allgather decision.   */
     if (total_dsize < 50000) {
-/*        return smpi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype, 
+/*        return Coll_allgatherv_intra_bruck::allgatherv(sbuf, scount, sdtype, 
                                                       rbuf, rcounts, rdispls, rdtype, 
                                                       comm, module);*/
-    return smpi_coll_tuned_allgatherv_ring(sbuf, scount, sdtype, 
+    return Coll_allgatherv_ring::allgatherv(sbuf, scount, sdtype, 
                                                       rbuf, rcounts, rdispls, rdtype, 
                                                       comm);
 
     } else {
         if (communicator_size % 2) {
-            return smpi_coll_tuned_allgatherv_ring(sbuf, scount, sdtype, 
+            return Coll_allgatherv_ring::allgatherv(sbuf, scount, sdtype, 
                                                          rbuf, rcounts, rdispls, rdtype, 
                                                          comm);
         } else {
-            return  smpi_coll_tuned_allgatherv_ompi_neighborexchange(sbuf, scount, sdtype,
+            return  Coll_allgatherv_ompi_neighborexchange::allgatherv(sbuf, scount, sdtype,
                                                                       rbuf, rcounts, rdispls, rdtype, 
                                                                       comm);
         }
     }
 }
 
-int smpi_coll_tuned_gather_ompi(void *sbuf, int scount, 
+int Coll_gather_ompi::gather(void *sbuf, int scount, 
                                            MPI_Datatype sdtype,
                                            void* rbuf, int rcount, 
                                            MPI_Datatype rdtype, 
@@ -552,25 +554,26 @@ int smpi_coll_tuned_gather_ompi(void *sbuf, int scount,
 /*                                                         root, comm);*/
 
 /*    } else*/ if (block_size > intermediate_block_size) {
-        return smpi_coll_tuned_gather_ompi_linear_sync (sbuf, scount, sdtype, 
+        return Coll_gather_ompi_linear_sync::gather (sbuf, scount, sdtype, 
                                                          rbuf, rcount, rdtype, 
                                                          root, comm);
 
     } else if ((communicator_size > large_communicator_size) ||
                ((communicator_size > small_communicator_size) &&
                 (block_size < small_block_size))) {
-        return smpi_coll_tuned_gather_ompi_binomial (sbuf, scount, sdtype, 
+        return Coll_gather_ompi_binomial::gather (sbuf, scount, sdtype, 
                                                       rbuf, rcount, rdtype, 
                                                       root, comm);
 
     }
     // Otherwise, use basic linear 
-    return smpi_coll_tuned_gather_ompi_basic_linear (sbuf, scount, sdtype, 
+    return Coll_gather_ompi_basic_linear::gather (sbuf, scount, sdtype, 
                                                       rbuf, rcount, rdtype, 
                                                       root, comm);
 }
 
-int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount, 
+
+int Coll_scatter_ompi::scatter(void *sbuf, int scount, 
                                             MPI_Datatype sdtype,
                                             void* rbuf, int rcount, 
                                             MPI_Datatype rdtype, 
@@ -582,7 +585,7 @@ int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount,
     int communicator_size, rank;
     size_t dsize, block_size;
 
-    XBT_DEBUG("smpi_coll_tuned_scatter_ompi");
+    XBT_DEBUG("Coll_scatter_ompi::scatter");
 
     communicator_size = comm->size();
     rank = comm->rank();
@@ -602,7 +605,7 @@ int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount,
             scount=rcount;
             sdtype=rdtype;
         }
-        int ret=smpi_coll_tuned_scatter_ompi_binomial (sbuf, scount, sdtype,
+        int ret=Coll_scatter_ompi_binomial::scatter (sbuf, scount, sdtype,
             rbuf, rcount, rdtype,
             root, comm);
         if(rank!=root){
@@ -610,8 +613,10 @@ int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount,
         }
         return ret;
     }
-    return smpi_coll_tuned_scatter_ompi_basic_linear (sbuf, scount, sdtype, 
+    return Coll_scatter_ompi_basic_linear::scatter (sbuf, scount, sdtype, 
                                                        rbuf, rcount, rdtype, 
                                                        root, comm);
 }
 
+}
+}
diff --git a/src/smpi/private.h b/src/smpi/private.h
index e2c0cda3d6..a324d2de05 100644
--- a/src/smpi/private.h
+++ b/src/smpi/private.h
@@ -17,6 +17,7 @@
 #include "xbt/xbt_os_time.h"
 #include "src/smpi/smpi_f2c.hpp"
 #include "src/smpi/smpi_group.hpp"
+#include "src/smpi/smpi_coll.hpp"
 #include "src/smpi/smpi_comm.hpp"
 #include "src/smpi/smpi_info.hpp"
 #include "src/smpi/smpi_op.hpp"
@@ -134,43 +135,7 @@ XBT_PRIVATE void smpi_mpi_init();
 XBT_PRIVATE void smpi_empty_status(MPI_Status * status);
 XBT_PRIVATE int smpi_mpi_get_count(MPI_Status * status, MPI_Datatype datatype);
 
-XBT_PRIVATE int smpi_info_c2f(MPI_Info info);
-XBT_PRIVATE int smpi_info_add_f(MPI_Info info);
-XBT_PRIVATE MPI_Info smpi_info_f2c(int info);
-
-
-XBT_PRIVATE void smpi_mpi_bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_barrier(MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_gather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                     void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts,
-                       MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                      void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, int root, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
-                        int recvcount, MPI_Datatype recvtype, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
-                         int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                      void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype,
-                      void *recvbuf, int recvcount,MPI_Datatype recvtype, int root, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
-                      MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_allreduce(void *sendbuf, void *recvbuf, int count,MPI_Datatype datatype, MPI_Op op,
-                      MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_scan(void *sendbuf, void *recvbuf, int count,MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
-                      MPI_Comm comm);
-
-XBT_PRIVATE int smpi_coll_tuned_alltoall_ompi2(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
-                                      int recvcount, MPI_Datatype recvtype, MPI_Comm comm);
-XBT_PRIVATE int smpi_coll_tuned_alltoall_bruck(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
-                                   int recvcount, MPI_Datatype recvtype, MPI_Comm comm);
-XBT_PRIVATE int smpi_coll_tuned_alltoall_basic_linear(void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                                          void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm);
-XBT_PRIVATE int smpi_coll_basic_alltoallv(void *sendbuf, int *sendcounts, int *senddisps, MPI_Datatype sendtype,
-                              void *recvbuf, int *recvcounts, int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm);
+
 XBT_PRIVATE int smpi_comm_keyval_create(MPI_Comm_copy_attr_function* copy_fn, MPI_Comm_delete_attr_function* delete_fn,
                                         int* keyval, void* extra_state);
 XBT_PRIVATE int smpi_comm_keyval_free(int* keyval);
diff --git a/src/smpi/smpi_base.cpp b/src/smpi/smpi_base.cpp
index 27d4471fce..86f22315cc 100644
--- a/src/smpi/smpi_base.cpp
+++ b/src/smpi/smpi_base.cpp
@@ -52,460 +52,6 @@ double smpi_mpi_wtime(){
   return time;
 }
 
-void smpi_mpi_bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm)
-{
-  smpi_coll_tuned_bcast_binomial_tree(buf, count, datatype, root, comm);
-}
-
-void smpi_mpi_barrier(MPI_Comm comm)
-{
-  smpi_coll_tuned_barrier_ompi_basic_linear(comm);
-}
-
-void smpi_mpi_gather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                     void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)
-{
-  int system_tag = COLL_TAG_GATHER;
-  MPI_Aint lb = 0;
-  MPI_Aint recvext = 0;
-
-  int rank = comm->rank();
-  int size = comm->size();
-  if(rank != root) {
-    // Send buffer to root
-    Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm);
-  } else {
-    recvtype->extent(&lb, &recvext);
-    // Local copy from root
-    Datatype::copy(sendbuf, sendcount, sendtype, static_cast<char*>(recvbuf) + root * recvcount * recvext,
-                       recvcount, recvtype);
-    // Receive buffers from senders
-    MPI_Request *requests = xbt_new(MPI_Request, size - 1);
-    int index = 0;
-    for (int src = 0; src < size; src++) {
-      if(src != root) {
-        requests[index] = Request::irecv_init(static_cast<char*>(recvbuf) + src * recvcount * recvext, recvcount, recvtype,
-                                          src, system_tag, comm);
-        index++;
-      }
-    }
-    // Wait for completion of irecv's.
-    Request::startall(size - 1, requests);
-    Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
-    for (int src = 0; src < size-1; src++) {
-      Request::unref(&requests[src]);
-    }
-    xbt_free(requests);
-  }
-}
-
-void smpi_mpi_reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts, MPI_Datatype datatype, MPI_Op op,
-                             MPI_Comm comm)
-{
-  int rank = comm->rank();
-
-  /* arbitrarily choose root as rank 0 */
-  int size = comm->size();
-  int count = 0;
-  int *displs = xbt_new(int, size);
-  for (int i = 0; i < size; i++) {
-    displs[i] = count;
-    count += recvcounts[i];
-  }
-  void *tmpbuf = static_cast<void*>(smpi_get_tmp_sendbuffer(count*datatype->get_extent()));
-
-  mpi_coll_reduce_fun(sendbuf, tmpbuf, count, datatype, op, 0, comm);
-  smpi_mpi_scatterv(tmpbuf, recvcounts, displs, datatype, recvbuf, recvcounts[rank], datatype, 0, comm);
-  xbt_free(displs);
-  smpi_free_tmp_buffer(tmpbuf);
-}
-
-void smpi_mpi_gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs,
-                      MPI_Datatype recvtype, int root, MPI_Comm comm)
-{
-  int system_tag = COLL_TAG_GATHERV;
-  MPI_Aint lb = 0;
-  MPI_Aint recvext = 0;
-
-  int rank = comm->rank();
-  int size = comm->size();
-  if (rank != root) {
-    // Send buffer to root
-    Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm);
-  } else {
-    recvtype->extent(&lb, &recvext);
-    // Local copy from root
-    Datatype::copy(sendbuf, sendcount, sendtype, static_cast<char*>(recvbuf) + displs[root] * recvext,
-                       recvcounts[root], recvtype);
-    // Receive buffers from senders
-    MPI_Request *requests = xbt_new(MPI_Request, size - 1);
-    int index = 0;
-    for (int src = 0; src < size; src++) {
-      if(src != root) {
-        requests[index] = Request::irecv_init(static_cast<char*>(recvbuf) + displs[src] * recvext,
-                          recvcounts[src], recvtype, src, system_tag, comm);
-        index++;
-      }
-    }
-    // Wait for completion of irecv's.
-    Request::startall(size - 1, requests);
-    Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
-    for (int src = 0; src < size-1; src++) {
-      Request::unref(&requests[src]);
-    }
-    xbt_free(requests);
-  }
-}
-
-void smpi_mpi_allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                        void *recvbuf,int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
-{
-  int system_tag = COLL_TAG_ALLGATHER;
-  MPI_Aint lb = 0;
-  MPI_Aint recvext = 0;
-  MPI_Request *requests;
-
-  int rank = comm->rank();
-  int size = comm->size();
-  // FIXME: check for errors
-  recvtype->extent(&lb, &recvext);
-  // Local copy from self
-  Datatype::copy(sendbuf, sendcount, sendtype, static_cast<char *>(recvbuf) + rank * recvcount * recvext, recvcount,
-                     recvtype);
-  // Send/Recv buffers to/from others;
-  requests = xbt_new(MPI_Request, 2 * (size - 1));
-  int index = 0;
-  for (int other = 0; other < size; other++) {
-    if(other != rank) {
-      requests[index] = Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag,comm);
-      index++;
-      requests[index] = Request::irecv_init(static_cast<char *>(recvbuf) + other * recvcount * recvext, recvcount, recvtype,
-                                        other, system_tag, comm);
-      index++;
-    }
-  }
-  // Wait for completion of all comms.
-  Request::startall(2 * (size - 1), requests);
-  Request::waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE);
-  for (int other = 0; other < 2*(size-1); other++) {
-    Request::unref(&requests[other]);
-  }
-  xbt_free(requests);
-}
-
-void smpi_mpi_allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
-                         int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm)
-{
-  int system_tag = COLL_TAG_ALLGATHERV;
-  MPI_Aint lb = 0;
-  MPI_Aint recvext = 0;
-
-  int rank = comm->rank();
-  int size = comm->size();
-  recvtype->extent(&lb, &recvext);
-  // Local copy from self
-  Datatype::copy(sendbuf, sendcount, sendtype,
-                     static_cast<char *>(recvbuf) + displs[rank] * recvext,recvcounts[rank], recvtype);
-  // Send buffers to others;
-  MPI_Request *requests = xbt_new(MPI_Request, 2 * (size - 1));
-  int index = 0;
-  for (int other = 0; other < size; other++) {
-    if(other != rank) {
-      requests[index] =
-        Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag, comm);
-      index++;
-      requests[index] = Request::irecv_init(static_cast<char *>(recvbuf) + displs[other] * recvext, recvcounts[other],
-                          recvtype, other, system_tag, comm);
-      index++;
-    }
-  }
-  // Wait for completion of all comms.
-  Request::startall(2 * (size - 1), requests);
-  Request::waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE);
-  for (int other = 0; other < 2*(size-1); other++) {
-    Request::unref(&requests[other]);
-  }
-  xbt_free(requests);
-}
-
-void smpi_mpi_scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                      void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)
-{
-  int system_tag = COLL_TAG_SCATTER;
-  MPI_Aint lb = 0;
-  MPI_Aint sendext = 0;
-  MPI_Request *requests;
-
-  int rank = comm->rank();
-  int size = comm->size();
-  if(rank != root) {
-    // Recv buffer from root
-    Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE);
-  } else {
-    sendtype->extent(&lb, &sendext);
-    // Local copy from root
-    if(recvbuf!=MPI_IN_PLACE){
-        Datatype::copy(static_cast<char *>(sendbuf) + root * sendcount * sendext,
-                           sendcount, sendtype, recvbuf, recvcount, recvtype);
-    }
-    // Send buffers to receivers
-    requests = xbt_new(MPI_Request, size - 1);
-    int index = 0;
-    for(int dst = 0; dst < size; dst++) {
-      if(dst != root) {
-        requests[index] = Request::isend_init(static_cast<char *>(sendbuf) + dst * sendcount * sendext, sendcount, sendtype,
-                                          dst, system_tag, comm);
-        index++;
-      }
-    }
-    // Wait for completion of isend's.
-    Request::startall(size - 1, requests);
-    Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
-    for (int dst = 0; dst < size-1; dst++) {
-      Request::unref(&requests[dst]);
-    }
-    xbt_free(requests);
-  }
-}
-
-void smpi_mpi_scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount,
-                       MPI_Datatype recvtype, int root, MPI_Comm comm)
-{
-  int system_tag = COLL_TAG_SCATTERV;
-  MPI_Aint lb = 0;
-  MPI_Aint sendext = 0;
-
-  int rank = comm->rank();
-  int size = comm->size();
-  if(rank != root) {
-    // Recv buffer from root
-    Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE);
-  } else {
-    sendtype->extent(&lb, &sendext);
-    // Local copy from root
-    if(recvbuf!=MPI_IN_PLACE){
-      Datatype::copy(static_cast<char *>(sendbuf) + displs[root] * sendext, sendcounts[root],
-                       sendtype, recvbuf, recvcount, recvtype);
-    }
-    // Send buffers to receivers
-    MPI_Request *requests = xbt_new(MPI_Request, size - 1);
-    int index = 0;
-    for (int dst = 0; dst < size; dst++) {
-      if (dst != root) {
-        requests[index] = Request::isend_init(static_cast<char *>(sendbuf) + displs[dst] * sendext, sendcounts[dst],
-                            sendtype, dst, system_tag, comm);
-        index++;
-      }
-    }
-    // Wait for completion of isend's.
-    Request::startall(size - 1, requests);
-    Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
-    for (int dst = 0; dst < size-1; dst++) {
-      Request::unref(&requests[dst]);
-    }
-    xbt_free(requests);
-  }
-}
-
-void smpi_mpi_reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
-                     MPI_Comm comm)
-{
-  int system_tag = COLL_TAG_REDUCE;
-  MPI_Aint lb = 0;
-  MPI_Aint dataext = 0;
-
-  char* sendtmpbuf = static_cast<char *>(sendbuf);
-
-  int rank = comm->rank();
-  int size = comm->size();
-  //non commutative case, use a working algo from openmpi
-  if(op != MPI_OP_NULL && !op->is_commutative()){
-    smpi_coll_tuned_reduce_ompi_basic_linear(sendtmpbuf, recvbuf, count, datatype, op, root, comm);
-    return;
-  }
-
-  if( sendbuf == MPI_IN_PLACE ) {
-    sendtmpbuf = static_cast<char *>(smpi_get_tmp_sendbuffer(count*datatype->get_extent()));
-    Datatype::copy(recvbuf, count, datatype,sendtmpbuf, count, datatype);
-  }
-  
-  if(rank != root) {
-    // Send buffer to root
-    Request::send(sendtmpbuf, count, datatype, root, system_tag, comm);
-  } else {
-    datatype->extent(&lb, &dataext);
-    // Local copy from root
-    if (sendtmpbuf != nullptr && recvbuf != nullptr)
-      Datatype::copy(sendtmpbuf, count, datatype, recvbuf, count, datatype);
-    // Receive buffers from senders
-    MPI_Request *requests = xbt_new(MPI_Request, size - 1);
-    void **tmpbufs = xbt_new(void *, size - 1);
-    int index = 0;
-    for (int src = 0; src < size; src++) {
-      if (src != root) {
-         if (!smpi_process_get_replaying())
-          tmpbufs[index] = xbt_malloc(count * dataext);
-         else
-           tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext);
-        requests[index] =
-          Request::irecv_init(tmpbufs[index], count, datatype, src, system_tag, comm);
-        index++;
-      }
-    }
-    // Wait for completion of irecv's.
-    Request::startall(size - 1, requests);
-    for (int src = 0; src < size - 1; src++) {
-      index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE);
-      XBT_DEBUG("finished waiting any request with index %d", index);
-      if(index == MPI_UNDEFINED) {
-        break;
-      }else{
-        Request::unref(&requests[index]);
-      }
-      if (op != MPI_OP_NULL) /* op can be MPI_OP_NULL that does nothing */
-        op->apply(tmpbufs[index], recvbuf, &count, datatype);
-    }
-      for(index = 0; index < size - 1; index++) {
-        smpi_free_tmp_buffer(tmpbufs[index]);
-      }
-    xbt_free(tmpbufs);
-    xbt_free(requests);
-
-  }
-  if( sendbuf == MPI_IN_PLACE ) {
-    smpi_free_tmp_buffer(sendtmpbuf);
-  }
-}
-
-void smpi_mpi_allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
-{
-  smpi_mpi_reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
-  smpi_mpi_bcast(recvbuf, count, datatype, 0, comm);
-}
-
-void smpi_mpi_scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
-{
-  int system_tag = -888;
-  MPI_Aint lb      = 0;
-  MPI_Aint dataext = 0;
-
-  int rank = comm->rank();
-  int size = comm->size();
-
-  datatype->extent(&lb, &dataext);
-
-  // Local copy from self
-  Datatype::copy(sendbuf, count, datatype, recvbuf, count, datatype);
-
-  // Send/Recv buffers to/from others;
-  MPI_Request *requests = xbt_new(MPI_Request, size - 1);
-  void **tmpbufs = xbt_new(void *, rank);
-  int index = 0;
-  for (int other = 0; other < rank; other++) {
-    tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext);
-    requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm);
-    index++;
-  }
-  for (int other = rank + 1; other < size; other++) {
-    requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm);
-    index++;
-  }
-  // Wait for completion of all comms.
-  Request::startall(size - 1, requests);
-
-  if(op != MPI_OP_NULL && op->is_commutative()){
-    for (int other = 0; other < size - 1; other++) {
-      index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE);
-      if(index == MPI_UNDEFINED) {
-        break;
-      }
-      if (index < rank)
-        // #Request is below rank: it's a irecv.
-        op->apply(tmpbufs[index], recvbuf, &count, datatype);
-    }
-  }else{
-    //non commutative case, wait in order
-    for (int other = 0; other < size - 1; other++) {
-      Request::wait(&(requests[other]), MPI_STATUS_IGNORE);
-      if (index < rank && op != MPI_OP_NULL)
-        op->apply(tmpbufs[other], recvbuf, &count, datatype);
-    }
-  }
-  for(index = 0; index < rank; index++) {
-    smpi_free_tmp_buffer(tmpbufs[index]);
-  }
-  for(index = 0; index < size-1; index++) {
-    Request::unref(&requests[index]);
-  }
-  xbt_free(tmpbufs);
-  xbt_free(requests);
-}
-
-void smpi_mpi_exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
-{
-  int system_tag = -888;
-  MPI_Aint lb         = 0;
-  MPI_Aint dataext    = 0;
-  int recvbuf_is_empty=1;
-  int rank = comm->rank();
-  int size = comm->size();
-
-  datatype->extent(&lb, &dataext);
-
-  // Send/Recv buffers to/from others;
-  MPI_Request *requests = xbt_new(MPI_Request, size - 1);
-  void **tmpbufs = xbt_new(void *, rank);
-  int index = 0;
-  for (int other = 0; other < rank; other++) {
-    tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext);
-    requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm);
-    index++;
-  }
-  for (int other = rank + 1; other < size; other++) {
-    requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm);
-    index++;
-  }
-  // Wait for completion of all comms.
-  Request::startall(size - 1, requests);
-
-  if(op != MPI_OP_NULL && op->is_commutative()){
-    for (int other = 0; other < size - 1; other++) {
-      index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE);
-      if(index == MPI_UNDEFINED) {
-        break;
-      }
-      if(index < rank) {
-        if(recvbuf_is_empty){
-          Datatype::copy(tmpbufs[index], count, datatype, recvbuf, count, datatype);
-          recvbuf_is_empty=0;
-        } else
-          // #Request is below rank: it's a irecv
-          op->apply(tmpbufs[index], recvbuf, &count, datatype);
-      }
-    }
-  }else{
-    //non commutative case, wait in order
-    for (int other = 0; other < size - 1; other++) {
-     Request::wait(&(requests[other]), MPI_STATUS_IGNORE);
-      if(index < rank) {
-        if (recvbuf_is_empty) {
-          Datatype::copy(tmpbufs[other], count, datatype, recvbuf, count, datatype);
-          recvbuf_is_empty = 0;
-        } else
-          if(op!=MPI_OP_NULL) op->apply( tmpbufs[other], recvbuf, &count, datatype);
-      }
-    }
-  }
-  for(index = 0; index < rank; index++) {
-    smpi_free_tmp_buffer(tmpbufs[index]);
-  }
-  for(index = 0; index < size-1; index++) {
-    Request::unref(&requests[index]);
-  }
-  xbt_free(tmpbufs);
-  xbt_free(requests);
-}
-
 void smpi_empty_status(MPI_Status * status)
 {
   if(status != MPI_STATUS_IGNORE) {
diff --git a/src/smpi/smpi_coll.cpp b/src/smpi/smpi_coll.cpp
index 15d583b86c..37a3612e43 100644
--- a/src/smpi/smpi_coll.cpp
+++ b/src/smpi/smpi_coll.cpp
@@ -17,51 +17,38 @@
 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_coll, smpi, "Logging specific to SMPI (coll)");
 
 s_mpi_coll_description_t mpi_coll_gather_description[] = {
-  {"default", "gather default collective", reinterpret_cast<void*>(&smpi_mpi_gather)},
    COLL_GATHERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}  /* this array must be nullptr terminated */
 };
 
-s_mpi_coll_description_t mpi_coll_allgather_description[] = { {"default",  "allgather default collective",
-   reinterpret_cast<void*>(&smpi_mpi_allgather)}, COLL_ALLGATHERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
+s_mpi_coll_description_t mpi_coll_allgather_description[] = {
+   COLL_ALLGATHERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}
+};
 
-s_mpi_coll_description_t mpi_coll_allgatherv_description[] = {{"default", "allgatherv default collective",
-   reinterpret_cast<void*>(&smpi_mpi_allgatherv)}, COLL_ALLGATHERVS(COLL_DESCRIPTION, COLL_COMMA),
+s_mpi_coll_description_t mpi_coll_allgatherv_description[] = { COLL_ALLGATHERVS(COLL_DESCRIPTION, COLL_COMMA),
    {nullptr, nullptr, nullptr}      /* this array must be nullptr terminated */
 };
 
-s_mpi_coll_description_t mpi_coll_allreduce_description[] = { {"default", "allreduce default collective",
-   reinterpret_cast<void*>(&smpi_mpi_allreduce)}, COLL_ALLREDUCES(COLL_DESCRIPTION, COLL_COMMA),
+s_mpi_coll_description_t mpi_coll_allreduce_description[] ={ COLL_ALLREDUCES(COLL_DESCRIPTION, COLL_COMMA),
   {nullptr, nullptr, nullptr}      /* this array must be nullptr terminated */
 };
 
-s_mpi_coll_description_t mpi_coll_reduce_scatter_description[] = {{"default", "reduce_scatter default collective",
-    reinterpret_cast<void*>(&smpi_mpi_reduce_scatter)}, COLL_REDUCE_SCATTERS(COLL_DESCRIPTION, COLL_COMMA),
+s_mpi_coll_description_t mpi_coll_reduce_scatter_description[] = {COLL_REDUCE_SCATTERS(COLL_DESCRIPTION, COLL_COMMA),
     {nullptr, nullptr, nullptr}      /* this array must be nullptr terminated */
 };
 
-s_mpi_coll_description_t mpi_coll_scatter_description[] = { {"default",  "scatter default collective",
-   reinterpret_cast<void*>(&smpi_mpi_scatter)}, COLL_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
+s_mpi_coll_description_t mpi_coll_scatter_description[] ={COLL_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
 
-s_mpi_coll_description_t mpi_coll_barrier_description[] = { {"default", "barrier default collective",
-   reinterpret_cast<void*>(&smpi_mpi_barrier)}, COLL_BARRIERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
+s_mpi_coll_description_t mpi_coll_barrier_description[] ={COLL_BARRIERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
 
-s_mpi_coll_description_t mpi_coll_alltoall_description[] = { {"default", "Ompi alltoall default collective",
-   reinterpret_cast<void*>(&smpi_coll_tuned_alltoall_ompi2)}, COLL_ALLTOALLS(COLL_DESCRIPTION, COLL_COMMA),
-  {"bruck", "Alltoall Bruck (SG) collective",
-   reinterpret_cast<void*>(&smpi_coll_tuned_alltoall_bruck)},
-  {"basic_linear", "Alltoall basic linear (SG) collective",
-   reinterpret_cast<void*>(&smpi_coll_tuned_alltoall_basic_linear)}, {nullptr, nullptr, nullptr}};
+s_mpi_coll_description_t mpi_coll_alltoall_description[] = {COLL_ALLTOALLS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
 
-s_mpi_coll_description_t mpi_coll_alltoallv_description[] = { {"default", "Ompi alltoallv default collective",
-   reinterpret_cast<void*>(&smpi_coll_basic_alltoallv)}, COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA),
+s_mpi_coll_description_t mpi_coll_alltoallv_description[] = {COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA),
    {nullptr, nullptr, nullptr}      /* this array must be nullptr terminated */
 };
 
-s_mpi_coll_description_t mpi_coll_bcast_description[] = { {"default", "bcast default collective ",
-   reinterpret_cast<void*>(&smpi_mpi_bcast)}, COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
+s_mpi_coll_description_t mpi_coll_bcast_description[] = {COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
 
-s_mpi_coll_description_t mpi_coll_reduce_description[] = { {"default", "reduce default collective",
-   reinterpret_cast<void*>(&smpi_mpi_reduce)}, COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} };
+s_mpi_coll_description_t mpi_coll_reduce_description[] = {COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} };
 
 
 
@@ -73,7 +60,7 @@ void coll_help(const char *category, s_mpi_coll_description_t * table)
     printf("  %s: %s\n", table[i].name, table[i].description);
 }
 
-int find_coll_description(s_mpi_coll_description_t * table, char *name, const char *desc)
+int find_coll_description(s_mpi_coll_description_t * table, const char *name, const char *desc)
 {
   char *name_list = nullptr;
   int selector_on=0;
@@ -108,204 +95,329 @@ int find_coll_description(s_mpi_coll_description_t * table, char *name, const ch
   return -1;
 }
 
-int (*mpi_coll_gather_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm);
-int (*mpi_coll_allgather_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
-int (*mpi_coll_allgatherv_fun)(void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
-int (*mpi_coll_allreduce_fun)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm);
-int (*mpi_coll_alltoall_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
-int (*mpi_coll_alltoallv_fun)(void *, int*, int*, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
-int (*mpi_coll_bcast_fun)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com);
-int (*mpi_coll_reduce_fun)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
-int (*mpi_coll_reduce_scatter_fun)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op  op,MPI_Comm  comm);
-int (*mpi_coll_scatter_fun)(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm);
-int (*mpi_coll_barrier_fun)(MPI_Comm comm);
 void (*smpi_coll_cleanup_callback)();
 
+namespace simgrid{
+namespace smpi{
+
+int (*Colls::gather)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm);
+int (*Colls::allgather)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
+int (*Colls::allgatherv)(void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
+int (*Colls::allreduce)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm);
+int (*Colls::alltoall)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
+int (*Colls::alltoallv)(void *, int*, int*, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
+int (*Colls::bcast)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com);
+int (*Colls::reduce)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
+int (*Colls::reduce_scatter)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op  op,MPI_Comm  comm);
+int (*Colls::scatter)(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm);
+int (*Colls::barrier)(MPI_Comm comm);
+
+
+#define COLL_SETTER(cat, ret, args, args2)\
+void Colls::set_##cat (const char * name){\
+    int id = find_coll_description(mpi_coll_## cat ##_description,\
+                                             name,#cat);\
+    cat = reinterpret_cast<ret (*) args>\
+        (mpi_coll_## cat ##_description[id].coll);\
+}
+
+COLL_APPLY(COLL_SETTER,COLL_GATHER_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_ALLGATHER_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_ALLGATHERV_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_REDUCE_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_ALLREDUCE_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_REDUCE_SCATTER_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_SCATTER_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_BARRIER_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_BCAST_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_ALLTOALL_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_ALLTOALLV_SIG,"");
+
+
+void Colls::set_collectives(){
+    const char* selector_name = static_cast<char*>(xbt_cfg_get_string("smpi/coll-selector"));
+    if (selector_name==nullptr || selector_name[0] == '\0')
+        selector_name = "default";
+
+    const char* name = xbt_cfg_get_string("smpi/gather");
+    if (name==nullptr || name[0] == '\0')
+        name = selector_name;
+      
+    set_gather(name);
+
+    name = xbt_cfg_get_string("smpi/allgather");
+    if (name==nullptr || name[0] == '\0')
+        name = selector_name;
+
+    set_allgather(name);
+
+    name = xbt_cfg_get_string("smpi/allgatherv");
+    if (name==nullptr || name[0] == '\0')
+        name = selector_name;
+
+    set_allgatherv(name);
+
+    name = xbt_cfg_get_string("smpi/allreduce");
+    if (name==nullptr || name[0] == '\0')
+        name = selector_name;
+
+    set_allreduce(name);
+
+    name = xbt_cfg_get_string("smpi/alltoall");
+    if (name==nullptr || name[0] == '\0')
+        name = selector_name;
+
+    set_alltoall(name);
+
+    name = xbt_cfg_get_string("smpi/alltoallv");
+    if (name==nullptr || name[0] == '\0')
+        name = selector_name;
+
+    set_alltoallv(name);
+
+    name = xbt_cfg_get_string("smpi/reduce");
+    if (name==nullptr || name[0] == '\0')
+        name = selector_name;
+
+    set_reduce(name);
 
-int smpi_coll_tuned_alltoall_ompi2(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
-                                   int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
+    name = xbt_cfg_get_string("smpi/reduce-scatter");
+    if (name==nullptr || name[0] == '\0')
+        name = selector_name;
+
+    set_reduce_scatter(name);
+
+    name = xbt_cfg_get_string("smpi/scatter");
+    if (name==nullptr || name[0] == '\0')
+        name = selector_name;
+
+    set_scatter(name);
+
+    name = xbt_cfg_get_string("smpi/bcast");
+    if (name==nullptr || name[0] == '\0')
+        name = selector_name;
+
+    set_bcast(name);
+
+    name = xbt_cfg_get_string("smpi/barrier");
+    if (name==nullptr || name[0] == '\0')
+        name = selector_name;
+
+    set_barrier(name);
+}
+
+
+int Colls::gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs,
+                      MPI_Datatype recvtype, int root, MPI_Comm comm)
 {
+  int system_tag = COLL_TAG_GATHERV;
+  MPI_Aint lb = 0;
+  MPI_Aint recvext = 0;
+
+  int rank = comm->rank();
   int size = comm->size();
-  int sendsize = sendtype->size() * sendcount;
-  if (sendsize < 200 && size > 12) {
-    return smpi_coll_tuned_alltoall_bruck(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
-  } else if (sendsize < 3000) {
-    return smpi_coll_tuned_alltoall_basic_linear(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
+  if (rank != root) {
+    // Send buffer to root
+    Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm);
   } else {
-    return smpi_coll_tuned_alltoall_ring(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
+    recvtype->extent(&lb, &recvext);
+    // Local copy from root
+    Datatype::copy(sendbuf, sendcount, sendtype, static_cast<char*>(recvbuf) + displs[root] * recvext,
+                       recvcounts[root], recvtype);
+    // Receive buffers from senders
+    MPI_Request *requests = xbt_new(MPI_Request, size - 1);
+    int index = 0;
+    for (int src = 0; src < size; src++) {
+      if(src != root) {
+        requests[index] = Request::irecv_init(static_cast<char*>(recvbuf) + displs[src] * recvext,
+                          recvcounts[src], recvtype, src, system_tag, comm);
+        index++;
+      }
+    }
+    // Wait for completion of irecv's.
+    Request::startall(size - 1, requests);
+    Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
+    for (int src = 0; src < size-1; src++) {
+      Request::unref(&requests[src]);
+    }
+    xbt_free(requests);
   }
+  return MPI_SUCCESS;
 }
 
-/**
- * Alltoall Bruck
- *
- * Openmpi calls this routine when the message size sent to each rank < 2000 bytes and size < 12
- * FIXME: uh, check smpi_pmpi again, but this routine is called for > 12, not less...
- **/
-int smpi_coll_tuned_alltoall_bruck(void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                                   void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
+
+int Colls::scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount,
+                       MPI_Datatype recvtype, int root, MPI_Comm comm)
 {
-  int system_tag = 777;
-  int i;
-  int count;
-  MPI_Aint lb;
+  int system_tag = COLL_TAG_SCATTERV;
+  MPI_Aint lb = 0;
   MPI_Aint sendext = 0;
-  MPI_Aint recvext = 0;
-  MPI_Request *requests;
 
-  // FIXME: check implementation
   int rank = comm->rank();
   int size = comm->size();
-  XBT_DEBUG("<%d> algorithm alltoall_bruck() called.", rank);
-  sendtype->extent(&lb, &sendext);
-  recvtype->extent(&lb, &recvext);
-  /* Local copy from self */
-  int err =  Datatype::copy(static_cast<char *>(sendbuf) + rank * sendcount * sendext, sendcount, sendtype,
-                                static_cast<char *>(recvbuf) + rank * recvcount * recvext, recvcount, recvtype);
-  if (err == MPI_SUCCESS && size > 1) {
-    /* Initiate all send/recv to/from others. */
-    requests = xbt_new(MPI_Request, 2 * (size - 1));
-    count = 0;
-    /* Create all receives that will be posted first */
-    for (i = 0; i < size; ++i) {
-      if (i != rank) {
-        requests[count] = Request::irecv_init(static_cast<char *>(recvbuf) + i * recvcount * recvext, recvcount,
-                                          recvtype, i, system_tag, comm);
-      count++;
-      }else{
-        XBT_DEBUG("<%d> skip request creation [src = %d, recvcount = %d]", rank, i, recvcount);
-      }
+  if(rank != root) {
+    // Recv buffer from root
+    Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE);
+  } else {
+    sendtype->extent(&lb, &sendext);
+    // Local copy from root
+    if(recvbuf!=MPI_IN_PLACE){
+      Datatype::copy(static_cast<char *>(sendbuf) + displs[root] * sendext, sendcounts[root],
+                       sendtype, recvbuf, recvcount, recvtype);
     }
-    /* Now create all sends  */
-    for (i = 0; i < size; ++i) {
-      if (i != rank) {
-        requests[count] = Request::isend_init(static_cast<char *>(sendbuf) + i * sendcount * sendext, sendcount,
-                                          sendtype, i, system_tag, comm);
-      count++;
-      }else{
-        XBT_DEBUG("<%d> skip request creation [dst = %d, sendcount = %d]", rank, i, sendcount);
+    // Send buffers to receivers
+    MPI_Request *requests = xbt_new(MPI_Request, size - 1);
+    int index = 0;
+    for (int dst = 0; dst < size; dst++) {
+      if (dst != root) {
+        requests[index] = Request::isend_init(static_cast<char *>(sendbuf) + displs[dst] * sendext, sendcounts[dst],
+                            sendtype, dst, system_tag, comm);
+        index++;
       }
     }
-    /* Wait for them all. */
-    Request::startall(count, requests);
-    XBT_DEBUG("<%d> wait for %d requests", rank, count);
-    Request::waitall(count, requests, MPI_STATUS_IGNORE);
-    for(i = 0; i < count; i++) {
-      if(requests[i]!=MPI_REQUEST_NULL)
-        Request::unref(&requests[i]);
+    // Wait for completion of isend's.
+    Request::startall(size - 1, requests);
+    Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
+    for (int dst = 0; dst < size-1; dst++) {
+      Request::unref(&requests[dst]);
     }
     xbt_free(requests);
   }
   return MPI_SUCCESS;
 }
 
-/**
- * Alltoall basic_linear (STARMPI:alltoall-simple)
- **/
-int smpi_coll_tuned_alltoall_basic_linear(void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                                          void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
+
+int Colls::scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
 {
-  int system_tag = 888;
-  int i;
-  int count;
-  MPI_Aint lb = 0, sendext = 0, recvext = 0;
-  MPI_Request *requests;
+  int system_tag = -888;
+  MPI_Aint lb      = 0;
+  MPI_Aint dataext = 0;
 
-  /* Initialize. */
   int rank = comm->rank();
   int size = comm->size();
-  XBT_DEBUG("<%d> algorithm alltoall_basic_linear() called.", rank);
-  sendtype->extent(&lb, &sendext);
-  recvtype->extent(&lb, &recvext);
-  /* simple optimization */
-  int err = Datatype::copy(static_cast<char *>(sendbuf) + rank * sendcount * sendext, sendcount, sendtype,
-                               static_cast<char *>(recvbuf) + rank * recvcount * recvext, recvcount, recvtype);
-  if (err == MPI_SUCCESS && size > 1) {
-    /* Initiate all send/recv to/from others. */
-    requests = xbt_new(MPI_Request, 2 * (size - 1));
-    /* Post all receives first -- a simple optimization */
-    count = 0;
-    for (i = (rank + 1) % size; i != rank; i = (i + 1) % size) {
-      requests[count] = Request::irecv_init(static_cast<char *>(recvbuf) + i * recvcount * recvext, recvcount,
-                                        recvtype, i, system_tag, comm);
-      count++;
-    }
-    /* Now post all sends in reverse order
-     *   - We would like to minimize the search time through message queue
-     *     when messages actually arrive in the order in which they were posted.
-     * TODO: check the previous assertion
-     */
-    for (i = (rank + size - 1) % size; i != rank; i = (i + size - 1) % size) {
-      requests[count] = Request::isend_init(static_cast<char *>(sendbuf) + i * sendcount * sendext, sendcount,
-                                        sendtype, i, system_tag, comm);
-      count++;
+
+  datatype->extent(&lb, &dataext);
+
+  // Local copy from self
+  Datatype::copy(sendbuf, count, datatype, recvbuf, count, datatype);
+
+  // Send/Recv buffers to/from others;
+  MPI_Request *requests = xbt_new(MPI_Request, size - 1);
+  void **tmpbufs = xbt_new(void *, rank);
+  int index = 0;
+  for (int other = 0; other < rank; other++) {
+    tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext);
+    requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm);
+    index++;
+  }
+  for (int other = rank + 1; other < size; other++) {
+    requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm);
+    index++;
+  }
+  // Wait for completion of all comms.
+  Request::startall(size - 1, requests);
+
+  if(op != MPI_OP_NULL && op->is_commutative()){
+    for (int other = 0; other < size - 1; other++) {
+      index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE);
+      if(index == MPI_UNDEFINED) {
+        break;
+      }
+      if(index < rank) {
+        // #Request is below rank: it's a irecv
+        if(op!=MPI_OP_NULL) op->apply( tmpbufs[index], recvbuf, &count, datatype);
+      }
     }
-    /* Wait for them all. */
-    Request::startall(count, requests);
-    XBT_DEBUG("<%d> wait for %d requests", rank, count);
-    Request::waitall(count, requests, MPI_STATUS_IGNORE);
-    for(i = 0; i < count; i++) {
-      if(requests[i]!=MPI_REQUEST_NULL)
-        Request::unref(&requests[i]);
+  }else{
+    //non commutative case, wait in order
+    for (int other = 0; other < size - 1; other++) {
+      Request::wait(&(requests[other]), MPI_STATUS_IGNORE);
+      if(index < rank) {
+        if(op!=MPI_OP_NULL) op->apply( tmpbufs[other], recvbuf, &count, datatype);
+      }
     }
-    xbt_free(requests);
   }
-  return err;
+  for(index = 0; index < rank; index++) {
+    smpi_free_tmp_buffer(tmpbufs[index]);
+  }
+  for(index = 0; index < size-1; index++) {
+    Request::unref(&requests[index]);
+  }
+  xbt_free(tmpbufs);
+  xbt_free(requests);
+  return MPI_SUCCESS;
 }
 
-int smpi_coll_basic_alltoallv(void *sendbuf, int *sendcounts, int *senddisps, MPI_Datatype sendtype,
-                              void *recvbuf, int *recvcounts, int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm)
+int Colls::exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
 {
-  int system_tag = 889;
-  int i;
-  int count;
-  MPI_Aint lb = 0;
-  MPI_Aint sendext = 0;
-  MPI_Aint recvext = 0;
-  MPI_Request *requests;
-
-  /* Initialize. */
+  int system_tag = -888;
+  MPI_Aint lb         = 0;
+  MPI_Aint dataext    = 0;
+  int recvbuf_is_empty=1;
   int rank = comm->rank();
   int size = comm->size();
-  XBT_DEBUG("<%d> algorithm basic_alltoallv() called.", rank);
-  sendtype->extent(&lb, &sendext);
-  recvtype->extent(&lb, &recvext);
-  /* Local copy from self */
-  int err = Datatype::copy(static_cast<char *>(sendbuf) + senddisps[rank] * sendext, sendcounts[rank], sendtype,
-                               static_cast<char *>(recvbuf) + recvdisps[rank] * recvext, recvcounts[rank], recvtype);
-  if (err == MPI_SUCCESS && size > 1) {
-    /* Initiate all send/recv to/from others. */
-    requests = xbt_new(MPI_Request, 2 * (size - 1));
-    count = 0;
-    /* Create all receives that will be posted first */
-    for (i = 0; i < size; ++i) {
-      if (i != rank && recvcounts[i] != 0) {
-        requests[count] = Request::irecv_init(static_cast<char *>(recvbuf) + recvdisps[i] * recvext,
-                                          recvcounts[i], recvtype, i, system_tag, comm);
-        count++;
-      }else{
-        XBT_DEBUG("<%d> skip request creation [src = %d, recvcounts[src] = %d]", rank, i, recvcounts[i]);
+
+  datatype->extent(&lb, &dataext);
+
+  // Send/Recv buffers to/from others;
+  MPI_Request *requests = xbt_new(MPI_Request, size - 1);
+  void **tmpbufs = xbt_new(void *, rank);
+  int index = 0;
+  for (int other = 0; other < rank; other++) {
+    tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext);
+    requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm);
+    index++;
+  }
+  for (int other = rank + 1; other < size; other++) {
+    requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm);
+    index++;
+  }
+  // Wait for completion of all comms.
+  Request::startall(size - 1, requests);
+
+  if(op != MPI_OP_NULL && op->is_commutative()){
+    for (int other = 0; other < size - 1; other++) {
+      index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE);
+      if(index == MPI_UNDEFINED) {
+        break;
       }
-    }
-    /* Now create all sends  */
-    for (i = 0; i < size; ++i) {
-      if (i != rank && sendcounts[i] != 0) {
-      requests[count] = Request::isend_init(static_cast<char *>(sendbuf) + senddisps[i] * sendext,
-                                        sendcounts[i], sendtype, i, system_tag, comm);
-      count++;
-      }else{
-        XBT_DEBUG("<%d> skip request creation [dst = %d, sendcounts[dst] = %d]", rank, i, sendcounts[i]);
+      if(index < rank) {
+        if(recvbuf_is_empty){
+          Datatype::copy(tmpbufs[index], count, datatype, recvbuf, count, datatype);
+          recvbuf_is_empty=0;
+        } else
+          // #Request is below rank: it's a irecv
+          if(op!=MPI_OP_NULL) op->apply( tmpbufs[index], recvbuf, &count, datatype);
       }
     }
-    /* Wait for them all. */
-    Request::startall(count, requests);
-    XBT_DEBUG("<%d> wait for %d requests", rank, count);
-    Request::waitall(count, requests, MPI_STATUS_IGNORE);
-    for(i = 0; i < count; i++) {
-      if(requests[i]!=MPI_REQUEST_NULL) 
-        Request::unref(&requests[i]);
+  }else{
+    //non commutative case, wait in order
+    for (int other = 0; other < size - 1; other++) {
+     Request::wait(&(requests[other]), MPI_STATUS_IGNORE);
+      if(index < rank) {
+        if (recvbuf_is_empty) {
+          Datatype::copy(tmpbufs[other], count, datatype, recvbuf, count, datatype);
+          recvbuf_is_empty = 0;
+        } else
+          if(op!=MPI_OP_NULL) op->apply( tmpbufs[other], recvbuf, &count, datatype);
+      }
     }
-    xbt_free(requests);
   }
-  return err;
+  for(index = 0; index < rank; index++) {
+    smpi_free_tmp_buffer(tmpbufs[index]);
+  }
+  for(index = 0; index < size-1; index++) {
+    Request::unref(&requests[index]);
+  }
+  xbt_free(tmpbufs);
+  xbt_free(requests);
+  return MPI_SUCCESS;
+}
+
+}
 }
+
+
+
+
+
diff --git a/src/smpi/smpi_coll.hpp b/src/smpi/smpi_coll.hpp
new file mode 100644
index 0000000000..7728984e27
--- /dev/null
+++ b/src/smpi/smpi_coll.hpp
@@ -0,0 +1,133 @@
+/*High level handling of collective algorithms*/
+/* Copyright (c) 2009-2010, 2012-2014. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#ifndef SMPI_COLL_HPP
+#define SMPI_COLL_HPP
+
+#include <xbt/base.h>
+
+#include "private.h"
+
+namespace simgrid{
+namespace smpi{
+
+class Colls{
+  private:
+  public:
+    static void set_collectives();
+    static void set_gather(const char* name);
+    static void set_allgather(const char* name);
+    static void set_allgatherv(const char* name);
+    static void set_alltoall(const char* name);
+    static void set_alltoallv(const char* name);
+    static void set_allreduce(const char* name);
+    static void set_reduce(const char* name);
+    static void set_reduce_scatter(const char* name);
+    static void set_scatter(const char* name);
+    static void set_barrier(const char* name);
+    static void set_bcast(const char* name);
+
+    static void coll_help(const char *category, s_mpi_coll_description_t * table);
+
+    static int (*gather)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm);
+    static int (*allgather)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
+    static int (*allgatherv)(void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
+    static int (*allreduce)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm);
+    static int (*alltoall)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
+    static int (*alltoallv)(void *, int*, int*, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
+    static int (*bcast)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com);
+    static int (*reduce)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
+    static int (*reduce_scatter)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op  op,MPI_Comm  comm);
+    static int (*scatter)(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm);
+    static int (*barrier)(MPI_Comm comm);
+
+//These fairly unused collectives only have one implementation in SMPI
+
+    static int gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, int root, MPI_Comm comm);
+    static int scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
+    static int scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
+    static int exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
+};
+
+class Coll_algo{
+  private:
+    char* description_;
+  public:
+    char* description();
+};
+
+class Coll_gather : public Coll_algo {
+  private:
+  public:
+    static int gather (void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm);
+};
+
+class Coll_allgather : public Coll_algo {
+  private:
+  public:
+    static int allgather (void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
+};
+
+class Coll_allgatherv : public Coll_algo {
+  private:
+  public:
+    static int allgatherv (void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
+};
+
+class Coll_allreduce : public Coll_algo {
+  private:
+  public:
+    static int allreduce (void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm);
+};
+
+class Coll_alltoall : public Coll_algo {
+  private:
+  public:
+    static int alltoall (void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
+};
+
+class Coll_alltoallv : public Coll_algo {
+  private:
+  public:
+    static int alltoallv (void *, int*, int*, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
+};
+
+class Coll_bcast : public Coll_algo {
+  private:
+  public:
+    static int bcast (void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com);
+};
+
+class Coll_reduce : public Coll_algo {
+  private:
+  public:
+    static int reduce (void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
+};
+
+class Coll_reduce_scatter : public Coll_algo {
+  private:
+  public:
+    static int reduce_scatter (void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op  op,MPI_Comm  comm);
+};
+
+class Coll_scatter : public Coll_algo {
+  private:
+  public:
+    static int scatter (void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm);
+};
+
+class Coll_barrier : public Coll_algo {
+  private:
+  public:
+    static int barrier (MPI_Comm);
+};
+
+
+}
+}
+
+#endif
diff --git a/src/smpi/smpi_comm.cpp b/src/smpi/smpi_comm.cpp
index aa6c13e920..179708675f 100644
--- a/src/smpi/smpi_comm.cpp
+++ b/src/smpi/smpi_comm.cpp
@@ -219,7 +219,7 @@ MPI_Comm Comm::split(int color, int key)
   } else {
     recvbuf = nullptr;
   }
-  smpi_mpi_gather(sendbuf, 2, MPI_INT, recvbuf, 2, MPI_INT, 0, this);
+  Coll_gather_default::gather(sendbuf, 2, MPI_INT, recvbuf, 2, MPI_INT, 0, this);
   xbt_free(sendbuf);
   /* Do the actual job */
   if(rank == 0) {
@@ -393,7 +393,7 @@ void Comm::init_smp(){
       leader_list[i]=-1;
   }
 
-  smpi_coll_tuned_allgather_mpich(&leader, 1, MPI_INT , leaders_map, 1, MPI_INT, this);
+  Coll_allgather_mpich::allgather(&leader, 1, MPI_INT , leaders_map, 1, MPI_INT, this);
 
   if(smpi_privatize_global_variables){ //we need to switch as the called function may silently touch global variables
      smpi_switch_data_segment(smpi_process_index());
@@ -452,7 +452,7 @@ void Comm::init_smp(){
   int my_local_size=comm_intra->size();
   if(comm_intra->rank()==0) {
     int* non_uniform_map = xbt_new0(int,leader_group_size);
-    smpi_coll_tuned_allgather_mpich(&my_local_size, 1, MPI_INT,
+    Coll_allgather_mpich::allgather(&my_local_size, 1, MPI_INT,
         non_uniform_map, 1, MPI_INT, leader_comm);
     for(i=0; i < leader_group_size; i++) {
       if(non_uniform_map[0] != non_uniform_map[i]) {
@@ -467,7 +467,7 @@ void Comm::init_smp(){
     }
     is_uniform_=is_uniform;
   }
-  smpi_coll_tuned_bcast_mpich(&(is_uniform_),1, MPI_INT, 0, comm_intra );
+  Coll_bcast_mpich::bcast(&(is_uniform_),1, MPI_INT, 0, comm_intra );
 
   if(smpi_privatize_global_variables){ //we need to switch as the called function may silently touch global variables
      smpi_switch_data_segment(smpi_process_index());
@@ -485,7 +485,7 @@ void Comm::init_smp(){
   }
 
   int global_blocked;
-  smpi_mpi_allreduce(&is_blocked, &(global_blocked), 1, MPI_INT, MPI_LAND, this);
+  Coll_allreduce_default::allreduce(&is_blocked, &(global_blocked), 1, MPI_INT, MPI_LAND, this);
 
   if(MPI_COMM_WORLD==MPI_COMM_UNINITIALIZED || this==MPI_COMM_WORLD){
     if(this->rank()==0){
diff --git a/src/smpi/smpi_global.cpp b/src/smpi/smpi_global.cpp
index 34065aacaf..bda2de80d2 100644
--- a/src/smpi/smpi_global.cpp
+++ b/src/smpi/smpi_global.cpp
@@ -715,57 +715,8 @@ static void smpi_init_logs(){
 }
 
 static void smpi_init_options(){
-  int gather_id = find_coll_description(mpi_coll_gather_description, xbt_cfg_get_string("smpi/gather"),"gather");
-    mpi_coll_gather_fun = reinterpret_cast<int (*)(void *, int, MPI_Datatype, void *, int, MPI_Datatype, int, MPI_Comm)>
-        (mpi_coll_gather_description[gather_id].coll);
-
-    int allgather_id = find_coll_description(mpi_coll_allgather_description,
-                                             xbt_cfg_get_string("smpi/allgather"),"allgather");
-    mpi_coll_allgather_fun = reinterpret_cast<int (*)(void *, int, MPI_Datatype, void *, int, MPI_Datatype, MPI_Comm)>
-        (mpi_coll_allgather_description[allgather_id].coll);
-
-    int allgatherv_id = find_coll_description(mpi_coll_allgatherv_description,
-                                              xbt_cfg_get_string("smpi/allgatherv"),"allgatherv");
-    mpi_coll_allgatherv_fun = reinterpret_cast<int (*)(void *, int, MPI_Datatype, void *, int *, int *, MPI_Datatype, MPI_Comm)>
-        (mpi_coll_allgatherv_description[allgatherv_id].coll);
-
-    int allreduce_id = find_coll_description(mpi_coll_allreduce_description,
-                                             xbt_cfg_get_string("smpi/allreduce"),"allreduce");
-    mpi_coll_allreduce_fun = reinterpret_cast<int (*)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)>
-        (mpi_coll_allreduce_description[allreduce_id].coll);
-
-    int alltoall_id = find_coll_description(mpi_coll_alltoall_description,
-                                            xbt_cfg_get_string("smpi/alltoall"),"alltoall");
-    mpi_coll_alltoall_fun = reinterpret_cast<int (*)(void *, int, MPI_Datatype, void *, int, MPI_Datatype, MPI_Comm)>
-        (mpi_coll_alltoall_description[alltoall_id].coll);
-
-    int alltoallv_id = find_coll_description(mpi_coll_alltoallv_description,
-                                             xbt_cfg_get_string("smpi/alltoallv"),"alltoallv");
-    mpi_coll_alltoallv_fun = reinterpret_cast<int (*)(void *, int *, int *, MPI_Datatype, void *, int *, int *, MPI_Datatype, MPI_Comm)>
-        (mpi_coll_alltoallv_description[alltoallv_id].coll);
-
-    int bcast_id = find_coll_description(mpi_coll_bcast_description, xbt_cfg_get_string("smpi/bcast"),"bcast");
-    mpi_coll_bcast_fun = reinterpret_cast<int (*)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com)>
-        (mpi_coll_bcast_description[bcast_id].coll);
-
-    int reduce_id = find_coll_description(mpi_coll_reduce_description, xbt_cfg_get_string("smpi/reduce"),"reduce");
-    mpi_coll_reduce_fun = reinterpret_cast<int (*)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)>
-        (mpi_coll_reduce_description[reduce_id].coll);
-
-    int reduce_scatter_id =
-        find_coll_description(mpi_coll_reduce_scatter_description,
-                              xbt_cfg_get_string("smpi/reduce-scatter"),"reduce_scatter");
-    mpi_coll_reduce_scatter_fun = reinterpret_cast<int (*)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)>
-        (mpi_coll_reduce_scatter_description[reduce_scatter_id].coll);
-
-    int scatter_id = find_coll_description(mpi_coll_scatter_description, xbt_cfg_get_string("smpi/scatter"),"scatter");
-    mpi_coll_scatter_fun = reinterpret_cast<int (*)(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)>
-        (mpi_coll_scatter_description[scatter_id].coll);
-
-    int barrier_id = find_coll_description(mpi_coll_barrier_description, xbt_cfg_get_string("smpi/barrier"),"barrier");
-    mpi_coll_barrier_fun = reinterpret_cast<int (*)(MPI_Comm comm)>
-        (mpi_coll_barrier_description[barrier_id].coll);
 
+    Colls::set_collectives();
     smpi_coll_cleanup_callback=nullptr;
     smpi_cpu_threshold = xbt_cfg_get_double("smpi/cpu-threshold");
     smpi_host_speed = xbt_cfg_get_double("smpi/host-speed");
diff --git a/src/smpi/smpi_pmpi.cpp b/src/smpi/smpi_pmpi.cpp
index 6e89a6d9a6..853c980c3b 100644
--- a/src/smpi/smpi_pmpi.cpp
+++ b/src/smpi/smpi_pmpi.cpp
@@ -1386,7 +1386,7 @@ int PMPI_Bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm c
     extra->send_size = count * dt_size_send;
     TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra);
     if (comm->size() > 1)
-      mpi_coll_bcast_fun(buf, count, datatype, root, comm);
+      Colls::bcast(buf, count, datatype, root, comm);
     retval = MPI_SUCCESS;
 
     TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
@@ -1409,7 +1409,7 @@ int PMPI_Barrier(MPI_Comm comm)
     extra->type            = TRACING_BARRIER;
     TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
 
-    mpi_coll_barrier_fun(comm);
+    Colls::barrier(comm);
     retval = MPI_SUCCESS;
 
     TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
@@ -1461,7 +1461,7 @@ int PMPI_Gather(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbu
 
     TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra);
 
-    mpi_coll_gather_fun(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcount, recvtype, root, comm);
+    Colls::gather(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcount, recvtype, root, comm);
 
     retval = MPI_SUCCESS;
     TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
@@ -1521,8 +1521,7 @@ int PMPI_Gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recv
     }
     TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra);
 
-    smpi_mpi_gatherv(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcounts, displs, recvtype, root, comm);
-    retval = MPI_SUCCESS;
+    retval = Colls::gatherv(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcounts, displs, recvtype, root, comm);
     TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
   }
 
@@ -1568,7 +1567,7 @@ int PMPI_Allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
 
     TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
 
-    mpi_coll_allgather_fun(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
+    Colls::allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
     retval = MPI_SUCCESS;
     TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
   }
@@ -1620,7 +1619,7 @@ int PMPI_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
 
     TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
 
-    mpi_coll_allgatherv_fun(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm);
+    Colls::allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm);
     retval = MPI_SUCCESS;
     TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
   }
@@ -1668,7 +1667,7 @@ int PMPI_Scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype,
     extra->recv_size = recvcount * dt_size_recv;
     TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra);
 
-    mpi_coll_scatter_fun(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm);
+    Colls::scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm);
     retval = MPI_SUCCESS;
     TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
   }
@@ -1721,9 +1720,8 @@ int PMPI_Scatterv(void *sendbuf, int *sendcounts, int *displs,
     extra->recv_size = recvcount * dt_size_recv;
     TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra);
 
-    smpi_mpi_scatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm);
+    retval = Colls::scatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm);
 
-    retval = MPI_SUCCESS;
     TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
   }
 
@@ -1756,7 +1754,7 @@ int PMPI_Reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
 
     TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra);
 
-    mpi_coll_reduce_fun(sendbuf, recvbuf, count, datatype, op, root, comm);
+    Colls::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
 
     retval = MPI_SUCCESS;
     TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
@@ -1811,7 +1809,7 @@ int PMPI_Allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatyp
 
     TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
 
-    mpi_coll_allreduce_fun(sendtmpbuf, recvbuf, count, datatype, op, comm);
+    Colls::allreduce(sendtmpbuf, recvbuf, count, datatype, op, comm);
 
     if( sendbuf == MPI_IN_PLACE )
       xbt_free(sendtmpbuf);
@@ -1849,9 +1847,8 @@ int PMPI_Scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MP
 
     TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
 
-    smpi_mpi_scan(sendbuf, recvbuf, count, datatype, op, comm);
+    retval = Colls::scan(sendbuf, recvbuf, count, datatype, op, comm);
 
-    retval = MPI_SUCCESS;
     TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
   }
 
@@ -1887,8 +1884,8 @@ int PMPI_Exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
     }
     TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
 
-    smpi_mpi_exscan(sendtmpbuf, recvbuf, count, datatype, op, comm);
-    retval = MPI_SUCCESS;
+    retval = Colls::exscan(sendtmpbuf, recvbuf, count, datatype, op, comm);
+
     TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
     if (sendbuf == MPI_IN_PLACE)
       xbt_free(sendtmpbuf);
@@ -1938,7 +1935,7 @@ int PMPI_Reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts, MPI_Datat
 
     TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
 
-    mpi_coll_reduce_scatter_fun(sendtmpbuf, recvbuf, recvcounts, datatype, op, comm);
+    Colls::reduce_scatter(sendtmpbuf, recvbuf, recvcounts, datatype, op, comm);
     retval = MPI_SUCCESS;
     TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
 
@@ -1991,7 +1988,7 @@ int PMPI_Reduce_scatter_block(void *sendbuf, void *recvbuf, int recvcount,
     int* recvcounts = static_cast<int*>(xbt_malloc(count * sizeof(int)));
     for (int i      = 0; i < count; i++)
       recvcounts[i] = recvcount;
-    mpi_coll_reduce_scatter_fun(sendtmpbuf, recvbuf, recvcounts, datatype, op, comm);
+    Colls::reduce_scatter(sendtmpbuf, recvbuf, recvcounts, datatype, op, comm);
     xbt_free(recvcounts);
     retval = MPI_SUCCESS;
 
@@ -2044,7 +2041,7 @@ int PMPI_Alltoall(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* rec
 
     TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
 
-    retval = mpi_coll_alltoall_fun(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcount, recvtype, comm);
+    retval = Colls::alltoall(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcount, recvtype, comm);
 
     TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
 
@@ -2117,7 +2114,7 @@ int PMPI_Alltoallv(void* sendbuf, int* sendcounts, int* senddisps, MPI_Datatype
     }
     extra->num_processes = size;
     TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
-    retval = mpi_coll_alltoallv_fun(sendtmpbuf, sendtmpcounts, sendtmpdisps, sendtmptype, recvbuf, recvcounts,
+    retval = Colls::alltoallv(sendtmpbuf, sendtmpcounts, sendtmpdisps, sendtmptype, recvbuf, recvcounts,
                                     recvdisps, recvtype, comm);
     TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
 
diff --git a/src/smpi/smpi_replay.cpp b/src/smpi/smpi_replay.cpp
index 282eb70e7d..3292e7c398 100644
--- a/src/smpi/smpi_replay.cpp
+++ b/src/smpi/smpi_replay.cpp
@@ -10,6 +10,8 @@
 
 #define KEY_SIZE (sizeof(int) * 2 + 1)
 
+using namespace simgrid::smpi;
+
 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_replay,smpi,"Trace Replay with SMPI");
 
 int communicator_size = 0;
@@ -445,7 +447,7 @@ static void action_barrier(const char *const *action){
   extra->type = TRACING_BARRIER;
   TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
 
-  mpi_coll_barrier_fun(MPI_COMM_WORLD);
+  Colls::barrier(MPI_COMM_WORLD);
 
   TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
   log_timed_action (action, clock);
@@ -477,7 +479,7 @@ static void action_bcast(const char *const *action)
   TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra);
   void *sendbuf = smpi_get_tmp_sendbuffer(size* MPI_CURRENT_TYPE->size());
 
-  mpi_coll_bcast_fun(sendbuf, size, MPI_CURRENT_TYPE, root, MPI_COMM_WORLD);
+  Colls::bcast(sendbuf, size, MPI_CURRENT_TYPE, root, MPI_COMM_WORLD);
 
   TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
   log_timed_action (action, clock);
@@ -511,7 +513,7 @@ static void action_reduce(const char *const *action)
 
   void *recvbuf = smpi_get_tmp_sendbuffer(comm_size* MPI_CURRENT_TYPE->size());
   void *sendbuf = smpi_get_tmp_sendbuffer(comm_size* MPI_CURRENT_TYPE->size());
-  mpi_coll_reduce_fun(sendbuf, recvbuf, comm_size, MPI_CURRENT_TYPE, MPI_OP_NULL, root, MPI_COMM_WORLD);
+  Colls::reduce(sendbuf, recvbuf, comm_size, MPI_CURRENT_TYPE, MPI_OP_NULL, root, MPI_COMM_WORLD);
   smpi_execute_flops(comp_size);
 
   TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
@@ -539,7 +541,7 @@ static void action_allReduce(const char *const *action) {
 
   void *recvbuf = smpi_get_tmp_sendbuffer(comm_size* MPI_CURRENT_TYPE->size());
   void *sendbuf = smpi_get_tmp_sendbuffer(comm_size* MPI_CURRENT_TYPE->size());
-  mpi_coll_allreduce_fun(sendbuf, recvbuf, comm_size, MPI_CURRENT_TYPE, MPI_OP_NULL, MPI_COMM_WORLD);
+  Colls::allreduce(sendbuf, recvbuf, comm_size, MPI_CURRENT_TYPE, MPI_OP_NULL, MPI_COMM_WORLD);
   smpi_execute_flops(comp_size);
 
   TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
@@ -574,7 +576,7 @@ static void action_allToAll(const char *const *action) {
 
   TRACE_smpi_collective_in(rank, -1, __FUNCTION__,extra);
 
-  mpi_coll_alltoall_fun(send, send_size, MPI_CURRENT_TYPE, recv, recv_size, MPI_CURRENT_TYPE2, MPI_COMM_WORLD);
+  Colls::alltoall(send, send_size, MPI_CURRENT_TYPE, recv, recv_size, MPI_CURRENT_TYPE2, MPI_COMM_WORLD);
 
   TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
   log_timed_action (action, clock);
@@ -622,7 +624,7 @@ static void action_gather(const char *const *action) {
 
   TRACE_smpi_collective_in(smpi_process_index(), root, __FUNCTION__, extra);
 
-  mpi_coll_gather_fun(send, send_size, MPI_CURRENT_TYPE, recv, recv_size, MPI_CURRENT_TYPE2, root, MPI_COMM_WORLD);
+  Colls::gather(send, send_size, MPI_CURRENT_TYPE, recv, recv_size, MPI_CURRENT_TYPE2, root, MPI_COMM_WORLD);
 
   TRACE_smpi_collective_out(smpi_process_index(), -1, __FUNCTION__);
   log_timed_action (action, clock);
@@ -680,7 +682,7 @@ static void action_gatherv(const char *const *action) {
 
   TRACE_smpi_collective_in(smpi_process_index(), root, __FUNCTION__, extra);
 
-  smpi_mpi_gatherv(send, send_size, MPI_CURRENT_TYPE, recv, recvcounts, disps, MPI_CURRENT_TYPE2, root, MPI_COMM_WORLD);
+  Colls::gatherv(send, send_size, MPI_CURRENT_TYPE, recv, recvcounts, disps, MPI_CURRENT_TYPE2, root, MPI_COMM_WORLD);
 
   TRACE_smpi_collective_out(smpi_process_index(), -1, __FUNCTION__);
   log_timed_action (action, clock);
@@ -726,7 +728,7 @@ static void action_reducescatter(const char *const *action) {
   void *sendbuf = smpi_get_tmp_sendbuffer(size* MPI_CURRENT_TYPE->size());
   void *recvbuf = smpi_get_tmp_recvbuffer(size* MPI_CURRENT_TYPE->size());
 
-  mpi_coll_reduce_scatter_fun(sendbuf, recvbuf, recvcounts, MPI_CURRENT_TYPE, MPI_OP_NULL, MPI_COMM_WORLD);
+  Colls::reduce_scatter(sendbuf, recvbuf, recvcounts, MPI_CURRENT_TYPE, MPI_OP_NULL, MPI_COMM_WORLD);
   smpi_execute_flops(comp_size);
 
   TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
@@ -769,7 +771,7 @@ static void action_allgather(const char *const *action) {
 
   TRACE_smpi_collective_in(rank, -1, __FUNCTION__,extra);
 
-  mpi_coll_allgather_fun(sendbuf, sendcount, MPI_CURRENT_TYPE, recvbuf, recvcount, MPI_CURRENT_TYPE2, MPI_COMM_WORLD);
+  Colls::allgather(sendbuf, sendcount, MPI_CURRENT_TYPE, recvbuf, recvcount, MPI_CURRENT_TYPE2, MPI_COMM_WORLD);
 
   TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
   log_timed_action (action, clock);
@@ -821,7 +823,7 @@ static void action_allgatherv(const char *const *action) {
 
   TRACE_smpi_collective_in(rank, -1, __FUNCTION__,extra);
 
-  mpi_coll_allgatherv_fun(sendbuf, sendcount, MPI_CURRENT_TYPE, recvbuf, recvcounts, disps, MPI_CURRENT_TYPE2,
+  Colls::allgatherv(sendbuf, sendcount, MPI_CURRENT_TYPE, recvbuf, recvcounts, disps, MPI_CURRENT_TYPE2,
                           MPI_COMM_WORLD);
 
   TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
@@ -885,7 +887,7 @@ static void action_allToAllv(const char *const *action) {
 
   TRACE_smpi_collective_in(rank, -1, __FUNCTION__,extra);
 
-  mpi_coll_alltoallv_fun(sendbuf, sendcounts, senddisps, MPI_CURRENT_TYPE,recvbuf, recvcounts, recvdisps,
+  Colls::alltoallv(sendbuf, sendcounts, senddisps, MPI_CURRENT_TYPE,recvbuf, recvcounts, recvdisps,
                          MPI_CURRENT_TYPE, MPI_COMM_WORLD);
 
   TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
diff --git a/src/smpi/smpi_win.cpp b/src/smpi/smpi_win.cpp
index de5d18cc33..c12c4581b5 100644
--- a/src/smpi/smpi_win.cpp
+++ b/src/smpi/smpi_win.cpp
@@ -29,12 +29,12 @@ Win::Win(void *base, MPI_Aint size, int disp_unit, MPI_Info info, MPI_Comm comm)
   if(rank==0){
     bar_ = MSG_barrier_init(comm_size);
   }
-  mpi_coll_allgather_fun(&(connected_wins_[rank]), sizeof(MPI_Win), MPI_BYTE, connected_wins_, sizeof(MPI_Win),
+  Colls::allgather(&(connected_wins_[rank]), sizeof(MPI_Win), MPI_BYTE, connected_wins_, sizeof(MPI_Win),
                          MPI_BYTE, comm);
 
-  mpi_coll_bcast_fun(&(bar_), sizeof(msg_bar_t), MPI_BYTE, 0, comm);
+  Colls::bcast(&(bar_), sizeof(msg_bar_t), MPI_BYTE, 0, comm);
 
-  mpi_coll_barrier_fun(comm);
+  Colls::barrier(comm);
 }
 
 Win::~Win(){
@@ -51,7 +51,7 @@ Win::~Win(){
     MPI_Info_free(&info_);
   }
 
-  mpi_coll_barrier_fun(comm_);
+  Colls::barrier(comm_);
   int rank=comm_->rank();
   if(rank == 0)
     MSG_barrier_destroy(bar_);
diff --git a/teshsuite/smpi/coll-allgather/coll-allgather.tesh b/teshsuite/smpi/coll-allgather/coll-allgather.tesh
index 6778ee1c5c..5c99a9d730 100644
--- a/teshsuite/smpi/coll-allgather/coll-allgather.tesh
+++ b/teshsuite/smpi/coll-allgather/coll-allgather.tesh
@@ -2,7 +2,7 @@
 ! setenv LD_LIBRARY_PATH=../../lib
 ! output sort
 
-p Test all to all
+p Test allgather
 $ ${bindir:=.}/../../../bin/smpirun -map -hostfile ../hostfile_coll -platform ../../../examples/platforms/small_platform.xml -np 16 --log=xbt_cfg.thres:critical ${bindir:=.}/coll-allgather --log=smpi_kernel.thres:warning --log=smpi_coll.thres:error
 > [rank 0] -> Tremblay
 > [rank 1] -> Tremblay
diff --git a/tools/cmake/DefinePackages.cmake b/tools/cmake/DefinePackages.cmake
index 8b9365bad1..9c63deb29c 100644
--- a/tools/cmake/DefinePackages.cmake
+++ b/tools/cmake/DefinePackages.cmake
@@ -140,9 +140,10 @@ set(SMPI_SRC
   src/smpi/colls/allreduce/allreduce-smp-rsag.cpp
   src/smpi/colls/allreduce/allreduce-mvapich-rs.cpp
   src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp
+  src/smpi/colls/alltoall/alltoall-basic-linear.cpp
   src/smpi/colls/alltoall/alltoall-2dmesh.cpp
   src/smpi/colls/alltoall/alltoall-3dmesh.cpp
-#  src/smpi/colls/alltoall/alltoall-bruck.cpp
+  src/smpi/colls/alltoall/alltoall-bruck.cpp
   src/smpi/colls/alltoall/alltoall-pair-light-barrier.cpp
   src/smpi/colls/alltoall/alltoall-pair-mpi-barrier.cpp
   src/smpi/colls/alltoall/alltoall-pair-one-barrier.cpp
@@ -200,6 +201,7 @@ set(SMPI_SRC
   src/smpi/colls/scatter/scatter-ompi.cpp
   src/smpi/colls/scatter/scatter-mvapich-two-level.cpp
   src/smpi/colls/smpi_automatic_selector.cpp
+  src/smpi/colls/smpi_default_selector.cpp
   src/smpi/colls/smpi_mpich_selector.cpp
   src/smpi/colls/smpi_intel_mpi_selector.cpp
   src/smpi/colls/smpi_openmpi_selector.cpp