continue cleanup of redundant algorithms

author Augustin Degomme <degomme@idpann.imag.fr>

Tue, 17 Sep 2013 16:35:22 +0000 (18:35 +0200)

committer Augustin Degomme <degomme@idpann.imag.fr>

Wed, 18 Sep 2013 14:12:36 +0000 (16:12 +0200)
author Augustin Degomme <degomme@idpann.imag.fr>
Tue, 17 Sep 2013 16:35:22 +0000 (18:35 +0200)
committer Augustin Degomme <degomme@idpann.imag.fr>
Wed, 18 Sep 2013 14:12:36 +0000 (16:12 +0200)
diff --git a/buildtools/Cmake/AddTests.cmake b/buildtools/Cmake/AddTests.cmake

index 84aff1b..24ef7a3 100644 (file)
--- a/buildtools/Cmake/AddTests.cmake
+++ b/buildtools/Cmake/AddTests.cmake
@@ -405,7 +405,7 @@ if(NOT enable_memcheck)
      ENDFOREACH()
  
      FOREACH (ALLREDUCE_COLL default lr rab1 rab2 rab_rdb
      ENDFOREACH()
  
      FOREACH (ALLREDUCE_COLL default lr rab1 rab2 rab_rdb
-                            rab_rsag rdb smp_binomial smp_binomial_pipeline
+                            rdb smp_binomial smp_binomial_pipeline
                              smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast ompi mpich ompi_ring_segmented)
        ADD_TEST(smpi-allreduce-coll-${ALLREDUCE_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allreduce:${ALLREDUCE_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allreduce_coll.tesh)
      ENDFOREACH()
                              smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast ompi mpich ompi_ring_segmented)
        ADD_TEST(smpi-allreduce-coll-${ALLREDUCE_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allreduce:${ALLREDUCE_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allreduce_coll.tesh)
      ENDFOREACH()
diff --git a/buildtools/Cmake/DefinePackages.cmake b/buildtools/Cmake/DefinePackages.cmake

index b89bc38..7a749a6 100644 (file)
--- a/buildtools/Cmake/DefinePackages.cmake
+++ b/buildtools/Cmake/DefinePackages.cmake
@@ -141,8 +141,6 @@ set(SMPI_SRC
    src/smpi/colls/allreduce-rab1.c
    src/smpi/colls/allreduce-rab2.c
    src/smpi/colls/allreduce-rab-rdb.c
    src/smpi/colls/allreduce-rab1.c
    src/smpi/colls/allreduce-rab2.c
    src/smpi/colls/allreduce-rab-rdb.c
-#  src/smpi/colls/allreduce-rab-reduce-scatter.c
-  src/smpi/colls/allreduce-rab-rsag.c
    src/smpi/colls/allreduce-rdb.c
    src/smpi/colls/allreduce-redbcast.c
    src/smpi/colls/allreduce-smp-binomial.c
    src/smpi/colls/allreduce-rdb.c
    src/smpi/colls/allreduce-redbcast.c
    src/smpi/colls/allreduce-smp-binomial.c
diff --git a/src/smpi/colls/allreduce-rab-reduce-scatter.c b/src/smpi/colls/allreduce-rab-reduce-scatter.c

deleted file mode 100644 (file)

index 13abd17..0000000
--- a/src/smpi/colls/allreduce-rab-reduce-scatter.c
+++ /dev/null
@@ -1,521 +0,0 @@
-#include "colls_private.h"
-#ifndef REDUCE_STUFF
-#define REDUCE_STUFF
-/*****************************************************************************
-
-Copyright (c) 2006, Ahmad Faraj & Xin Yuan,
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-  * Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
-  * Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
-  * Neither the name of the Florida State University nor the names of its
-    contributors may be used to endorse or promote products derived from this
-    software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-  *************************************************************************
-  *     Any results obtained from executing this software require the     *
-  *     acknowledgment and citation of the software and its owners.       *
-  *     The full citation is given below:                                 *
-  *                                                                       *
-  *     A. Faraj and X. Yuan. "Automatic Generation and Tuning of MPI     *
-  *     Collective Communication Routines." The 19th ACM International    *
-  *     Conference on Supercomputing (ICS), Cambridge, Massachusetts,     *
-  *     June 20-22, 2005.                                                 *
-  *************************************************************************
-
-*****************************************************************************/
-
-extern MPI_User_function *MPIR_Op_table[];
-
-
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*  $Id: mpich-stuff.h,v 1.1 2005/08/22 19:50:21 faraj Exp $
- *
- *  (C) 2001 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-#ifndef _MPICH_STUFF_H
-#define _MPICH_STUFF_H
-
-/*TOpaqOverview.tex
-  MPI Opaque Objects:
-
-  MPI Opaque objects such as 'MPI_Comm' or 'MPI_Datatype' are specified by 
-  integers (in the MPICH2 implementation); the MPI standard calls these
-  handles.  
-  Out of range values are invalid; the value 0 is reserved.
-  For most (with the possible exception of 
-  'MPI_Request' for performance reasons) MPI Opaque objects, the integer
-  encodes both the kind of object (allowing runtime tests to detect a datatype
-  passed where a communicator is expected) and important properties of the 
-  object.  Even the 'MPI_xxx_NULL' values should be encoded so that 
-  different null handles can be distinguished.  The details of the encoding
-  of the handles is covered in more detail in the MPICH2 Design Document.
-  For the most part, the ADI uses pointers to the underlying structures
-  rather than the handles themselves.  However, each structure contains an 
-  'handle' field that is the corresponding integer handle for the MPI object.
-
-  MPID objects (objects used within the implementation of MPI) are not opaque.
-
-  T*/
-
-/* Known MPI object types.  These are used for both the error handlers 
-   and for the handles.  This is a 4 bit value.  0 is reserved for so 
-   that all-zero handles can be flagged as an error. */
-/*E
-  MPID_Object_kind - Object kind (communicator, window, or file)
-
-  Notes:
-  This enum is used by keyvals and errhandlers to indicate the type of
-  object for which MPI opaque types the data is valid.  These are defined
-  as bits to allow future expansion to the case where an object is value for
-  multiple types (for example, we may want a universal error handler for 
-  errors return).  This is also used to indicate the type of MPI object a 
-  MPI handle represents.  It is an enum because only this applies only the
-  the MPI objects.
-
-  Module:
-  Attribute-DS
-  E*/
-typedef enum MPID_Object_kind {
-  MPID_COMM = 0x1,
-  MPID_GROUP = 0x2,
-  MPID_DATATYPE = 0x3,
-  MPID_FILE = 0x4,
-  MPID_ERRHANDLER = 0x5,
-  MPID_OP = 0x6,
-  MPID_INFO = 0x7,
-  MPID_WIN = 0x8,
-  MPID_KEYVAL = 0x9,
-  MPID_ATTR = 0xa,
-  MPID_REQUEST = 0xb
-} MPID_Object_kind;
-/* The above objects should correspond to MPI objects only. */
-#define HANDLE_MPI_KIND_SHIFT 26
-#define HANDLE_GET_MPI_KIND(a) ( ((a)&0x3c000000) >> HANDLE_MPI_KIND_SHIFT )
-
-/* Handle types.  These are really 2 bits */
-#define HANDLE_KIND_INVALID  0x0
-#define HANDLE_KIND_BUILTIN  0x1
-#define HANDLE_KIND_DIRECT   0x2
-#define HANDLE_KIND_INDIRECT 0x3
-/* Mask assumes that ints are at least 4 bytes */
-#define HANDLE_KIND_MASK 0xc0000000
-#define HANDLE_KIND_SHIFT 30
-#define HANDLE_GET_KIND(a) (((a)&HANDLE_KIND_MASK)>>HANDLE_KIND_SHIFT)
-#define HANDLE_SET_KIND(a,kind) ((a)|((kind)<<HANDLE_KIND_SHIFT))
-
-/* For indirect, the remainder of the handle has a block and index */
-#define HANDLE_INDIRECT_SHIFT 16
-#define HANDLE_BLOCK(a) (((a)& 0x03FF0000) >> HANDLE_INDIRECT_SHIFT)
-#define HANDLE_BLOCK_INDEX(a) ((a) & 0x0000FFFF)
-
-/* Handle block is between 1 and 1024 *elements* */
-#define HANDLE_BLOCK_SIZE 256
-/* Index size is bewtween 1 and 65536 *elements* */
-#define HANDLE_BLOCK_INDEX_SIZE 1024
-
-/* For direct, the remainder of the handle is the index into a predefined 
-   block */
-#define HANDLE_MASK 0x03FFFFFF
-#define HANDLE_INDEX(a) ((a)& HANDLE_MASK)
-
-/* ALL objects have the handle as the first value. */
-/* Inactive (unused and stored on the appropriate avail list) objects 
-   have MPIU_Handle_common as the head */
-typedef struct MPIU_Handle_common {
-  int handle;
-  volatile int ref_count;       /* This field is used to indicate that the
-                                   object is not in use (see, e.g., 
-                                   MPID_Comm_valid_ptr) */
-  void *next;                   /* Free handles use this field to point to the next
-                                   free object */
-} MPIU_Handle_common;
-
-/* All *active* (in use) objects have the handle as the first value; objects
-   with referene counts have the reference count as the second value.
-   See MPIU_Object_add_ref and MPIU_Object_release_ref. */
-typedef struct MPIU_Handle_head {
-  int handle;
-  volatile int ref_count;
-} MPIU_Handle_head;
-
-/* This type contains all of the data, except for the direct array,
-   used by the object allocators. */
-typedef struct MPIU_Object_alloc_t {
-  MPIU_Handle_common *avail;    /* Next available object */
-  int initialized;              /* */
-  void *(*indirect)[];          /* Pointer to indirect object blocks */
-  int indirect_size;            /* Number of allocated indirect blocks */
-  MPID_Object_kind kind;        /* Kind of object this is for */
-  int size;                     /* Size of an individual object */
-  void *direct;                 /* Pointer to direct block, used 
-                                   for allocation */
-  int direct_size;              /* Size of direct block */
-} MPIU_Object_alloc_t;
-extern void *MPIU_Handle_obj_alloc(MPIU_Object_alloc_t *);
-extern void MPIU_Handle_obj_alloc_start(MPIU_Object_alloc_t *);
-extern void MPIU_Handle_obj_alloc_complete(MPIU_Object_alloc_t *, int init);
-extern void MPIU_Handle_obj_free(MPIU_Object_alloc_t *, void *);
-void *MPIU_Handle_get_ptr_indirect(int, MPIU_Object_alloc_t *);
-extern void *MPIU_Handle_direct_init(void *direct, int direct_size,
-                                     int obj_size, int handle_type);
-#endif
-#define MPID_Getb_ptr(kind,a,bmsk,ptr)                                  \
-{                                                                       \
-   switch (HANDLE_GET_KIND(a)) {                                        \
-      case HANDLE_KIND_BUILTIN:                                         \
-          ptr=MPID_##kind##_builtin+((a)&(bmsk));                       \
-          break;                                                        \
-      case HANDLE_KIND_DIRECT:                                          \
-          ptr=MPID_##kind##_direct+HANDLE_INDEX(a);                     \
-          break;                                                        \
-      case HANDLE_KIND_INDIRECT:                                        \
-          ptr=((MPID_##kind*)                                           \
-               MPIU_Handle_get_ptr_indirect(a,&MPID_##kind##_mem));     \
-          break;                                                        \
-      case HANDLE_KIND_INVALID:                                         \
-      default:                                                         \
-          ptr=0;                                                       \
-          break;                                                       \
-    }                                                                   \
-}
-
-
-
-#define MPID_Op_get_ptr(a,ptr)         MPID_Getb_ptr(Op,a,0x000000ff,ptr)
-typedef enum MPID_Lang_t { MPID_LANG_C
-#ifdef HAVE_FORTRAN_BINDING
-      , MPID_LANG_FORTRAN, MPID_LANG_FORTRAN90
-#endif
-#ifdef HAVE_CXX_BINDING
-      , MPID_LANG_CXX
-#endif
-} MPID_Lang_t;
-/* Reduction and accumulate operations */
-/*E
-  MPID_Op_kind - Enumerates types of MPI_Op types
-
-  Notes:
-  These are needed for implementing 'MPI_Accumulate', since only predefined
-  operations are allowed for that operation.  
-
-  A gap in the enum values was made allow additional predefined operations
-  to be inserted.  This might include future additions to MPI or experimental
-  extensions (such as a Read-Modify-Write operation).
-
-  Module:
-  Collective-DS
-  E*/
-typedef enum MPID_Op_kind { MPID_OP_MAX = 1, MPID_OP_MIN = 2,
-  MPID_OP_SUM = 3, MPID_OP_PROD = 4,
-  MPID_OP_LAND = 5, MPID_OP_BAND = 6, MPID_OP_LOR = 7, MPID_OP_BOR = 8,
-  MPID_OP_LXOR = 9, MPID_OP_BXOR = 10, MPID_OP_MAXLOC = 11,
-  MPID_OP_MINLOC = 12, MPID_OP_REPLACE = 13,
-  MPID_OP_USER_NONCOMMUTE = 32, MPID_OP_USER = 33
-} MPID_Op_kind;
-
-/*S
-  MPID_User_function - Definition of a user function for MPI_Op types.
-
-  Notes:
-  This includes a 'const' to make clear which is the 'in' argument and 
-  which the 'inout' argument, and to indicate that the 'count' and 'datatype'
-  arguments are unchanged (they are addresses in an attempt to allow 
-  interoperation with Fortran).  It includes 'restrict' to emphasize that 
-  no overlapping operations are allowed.
-
-  We need to include a Fortran version, since those arguments will
-  have type 'MPI_Fint *' instead.  We also need to add a test to the
-  test suite for this case; in fact, we need tests for each of the handle
-  types to ensure that the transfered handle works correctly.
-
-  This is part of the collective module because user-defined operations
-  are valid only for the collective computation routines and not for 
-  RMA accumulate.
-
-  Yes, the 'restrict' is in the correct location.  C compilers that 
-  support 'restrict' should be able to generate code that is as good as a
-  Fortran compiler would for these functions.
-
-  We should note on the manual pages for user-defined operations that
-  'restrict' should be used when available, and that a cast may be 
-  required when passing such a function to 'MPI_Op_create'.
-
-  Question:
-  Should each of these function types have an associated typedef?
-
-  Should there be a C++ function here?
-
-  Module:
-  Collective-DS
-  S*/
-typedef union MPID_User_function {
-  void (*c_function) (const void *, void *, const int *, const MPI_Datatype *);
-  void (*f77_function) (const void *, void *,
-                        const MPI_Fint *, const MPI_Fint *);
-} MPID_User_function;
-/* FIXME: Should there be "restrict" in the definitions above, e.g., 
-   (*c_function)( const void restrict * , void restrict *, ... )? */
-
-/*S
-  MPID_Op - MPI_Op structure
-
-  Notes:
-  All of the predefined functions are commutative.  Only user functions may 
-  be noncummutative, so there are two separate op types for commutative and
-  non-commutative user-defined operations.
-
-  Operations do not require reference counts because there are no nonblocking
-  operations that accept user-defined operations.  Thus, there is no way that
-  a valid program can free an 'MPI_Op' while it is in use.
-
-  Module:
-  Collective-DS
-  S*/
-typedef struct MPID_Op {
-  int handle;                   /* value of MPI_Op for this structure */
-  volatile int ref_count;
-  MPID_Op_kind kind;
-  MPID_Lang_t language;
-  MPID_User_function function;
-} MPID_Op;
-#define MPID_OP_N_BUILTIN 14
-extern MPID_Op MPID_Op_builtin[MPID_OP_N_BUILTIN];
-extern MPID_Op MPID_Op_direct[];
-extern MPIU_Object_alloc_t MPID_Op_mem;
-
-/*****************************************************************************
-
-* Function: get_op_func
-
-* return: Pointer to MPI_User_function
-
-* inputs:
-   op: operator (max, min, etc)
-
-   * Descrp: Function returns the function associated with current operator
-   * op.
-
-   * Auther: AHMAD FARAJ
-
-****************************************************************************/
-MPI_User_function *get_op_func(MPI_Op op)
-{
-
-  if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN)
-    return MPIR_Op_table[op % 16 - 1];
-  return NULL;
-}
-
-#endif
-
-
-int smpi_coll_tuned_allreduce_rab_reduce_scatter(void *sbuff, void *rbuff,
-                                                 int count, MPI_Datatype dtype,
-                                                 MPI_Op op, MPI_Comm comm)
-{
-  int nprocs, rank, type_size, tag = COLL_TAG_ALLREDUCE;
-  int mask, dst, pof2, newrank, rem, newdst, i,
-      send_idx, recv_idx, last_idx, send_cnt, recv_cnt, *cnts, *disps;
-  MPI_Aint lb, extent;
-  MPI_Status status;
-  void *tmp_buf = NULL;
-  MPI_User_function *func = get_op_func(op);
-  nprocs = smpi_comm_size(comm);
-  rank = smpi_comm_rank(comm);
-
-  extent = smpi_datatype_get_extent(dtype);
-  tmp_buf = (void *) xbt_malloc(count * extent);
-
-  MPIR_Localcopy(sbuff, count, dtype, rbuff, count, dtype);
-
-  type_size = smpi_datatype_size(dtype);
-
-  // find nearest power-of-two less than or equal to comm_size
-  pof2 = 1;
-  while (pof2 <= nprocs)
-    pof2 <<= 1;
-  pof2 >>= 1;
-
-  rem = nprocs - pof2;
-
-  // In the non-power-of-two case, all even-numbered
-  // processes of rank < 2*rem send their data to
-  // (rank+1). These even-numbered processes no longer
-  // participate in the algorithm until the very end. The
-  // remaining processes form a nice power-of-two. 
-
-  if (rank < 2 * rem) {
-    // even       
-    if (rank % 2 == 0) {
-
-      MPIC_Send(rbuff, count, dtype, rank + 1, tag, comm);
-
-      // temporarily set the rank to -1 so that this
-      // process does not pariticipate in recursive
-      // doubling
-      newrank = -1;
-    } else                      // odd
-    {
-      MPIC_Recv(tmp_buf, count, dtype, rank - 1, tag, comm, &status);
-      // do the reduction on received data. since the
-      // ordering is right, it doesn't matter whether
-      // the operation is commutative or not.
-      (*func) (tmp_buf, rbuff, &count, &dtype);
-
-      // change the rank 
-      newrank = rank / 2;
-    }
-  }
-
-  else                          // rank >= 2 * rem 
-    newrank = rank - rem;
-
-  // If op is user-defined or count is less than pof2, use
-  // recursive doubling algorithm. Otherwise do a reduce-scatter
-  // followed by allgather. (If op is user-defined,
-  // derived datatypes are allowed and the user could pass basic
-  // datatypes on one process and derived on another as long as
-  // the type maps are the same. Breaking up derived
-  // datatypes to do the reduce-scatter is tricky, therefore
-  // using recursive doubling in that case.) 
-
-  if (newrank != -1) {
-    // do a reduce-scatter followed by allgather. for the
-    // reduce-scatter, calculate the count that each process receives
-    // and the displacement within the buffer 
-
-    cnts = (int *) xbt_malloc(pof2 * sizeof(int));
-    disps = (int *) xbt_malloc(pof2 * sizeof(int));
-
-    for (i = 0; i < (pof2 - 1); i++)
-      cnts[i] = count / pof2;
-    cnts[pof2 - 1] = count - (count / pof2) * (pof2 - 1);
-
-    disps[0] = 0;
-    for (i = 1; i < pof2; i++)
-      disps[i] = disps[i - 1] + cnts[i - 1];
-
-    mask = 0x1;
-    send_idx = recv_idx = 0;
-    last_idx = pof2;
-    while (mask < pof2) {
-      newdst = newrank ^ mask;
-      // find real rank of dest 
-      dst = (newdst < rem) ? newdst * 2 + 1 : newdst + rem;
-
-      send_cnt = recv_cnt = 0;
-      if (newrank < newdst) {
-        send_idx = recv_idx + pof2 / (mask * 2);
-        for (i = send_idx; i < last_idx; i++)
-          send_cnt += cnts[i];
-        for (i = recv_idx; i < send_idx; i++)
-          recv_cnt += cnts[i];
-      } else {
-        recv_idx = send_idx + pof2 / (mask * 2);
-        for (i = send_idx; i < recv_idx; i++)
-          send_cnt += cnts[i];
-        for (i = recv_idx; i < last_idx; i++)
-          recv_cnt += cnts[i];
-      }
-
-      // Send data from recvbuf. Recv into tmp_buf 
-      MPIC_Sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt,
-                    dtype, dst, tag,
-                    (char *) tmp_buf + disps[recv_idx] * extent, recv_cnt,
-                    dtype, dst, tag, comm, &status);
-
-      // tmp_buf contains data received in this step.
-      // recvbuf contains data accumulated so far 
-
-      // This algorithm is used only for predefined ops
-      // and predefined ops are always commutative.
-      (*func) ((char *) tmp_buf + disps[recv_idx] * extent,
-               (char *) rbuff + disps[recv_idx] * extent, &recv_cnt, &dtype);
-
-      // update send_idx for next iteration 
-      send_idx = recv_idx;
-      mask <<= 1;
-
-      // update last_idx, but not in last iteration because the value
-      // is needed in the allgather step below. 
-      if (mask < pof2)
-        last_idx = recv_idx + pof2 / mask;
-    }
-
-    // now do the allgather 
-
-    mask >>= 1;
-    while (mask > 0) {
-      newdst = newrank ^ mask;
-      // find real rank of dest
-      dst = (newdst < rem) ? newdst * 2 + 1 : newdst + rem;
-
-      send_cnt = recv_cnt = 0;
-      if (newrank < newdst) {
-        // update last_idx except on first iteration 
-        if (mask != pof2 / 2)
-          last_idx = last_idx + pof2 / (mask * 2);
-
-        recv_idx = send_idx + pof2 / (mask * 2);
-        for (i = send_idx; i < recv_idx; i++)
-          send_cnt += cnts[i];
-        for (i = recv_idx; i < last_idx; i++)
-          recv_cnt += cnts[i];
-      } else {
-        recv_idx = send_idx - pof2 / (mask * 2);
-        for (i = send_idx; i < last_idx; i++)
-          send_cnt += cnts[i];
-        for (i = recv_idx; i < send_idx; i++)
-          recv_cnt += cnts[i];
-      }
-
-      MPIC_Sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt,
-                    dtype, dst, tag,
-                    (char *) rbuff + disps[recv_idx] * extent, recv_cnt,
-                    dtype, dst, tag, comm, &status);
-
-      if (newrank > newdst)
-        send_idx = recv_idx;
-
-      mask >>= 1;
-    }
-
-    free(cnts);
-    free(disps);
-
-  }
-  // In the non-power-of-two case, all odd-numbered processes of
-  // rank < 2 * rem send the result to (rank-1), the ranks who didn't
-  // participate above.
-
-  if (rank < 2 * rem) {
-    if (rank % 2)               // odd 
-      MPIC_Send(rbuff, count, dtype, rank - 1, tag, comm);
-    else                        // even 
-      MPIC_Recv(rbuff, count, dtype, rank + 1, tag, comm, &status);
-  }
-
-  free(tmp_buf);
-  return MPI_SUCCESS;
-}
diff --git a/src/smpi/colls/allreduce-rab-rsag.c b/src/smpi/colls/allreduce-rab-rsag.c

deleted file mode 100644 (file)

index c387a22..0000000
--- a/src/smpi/colls/allreduce-rab-rsag.c
+++ /dev/null
@@ -1,191 +0,0 @@
-#include "colls_private.h"
-//#include <star-reduction.c>
-
-int smpi_coll_tuned_allreduce_rab_rsag(void *sbuff, void *rbuff, int count,
-                                       MPI_Datatype dtype, MPI_Op op,
-                                       MPI_Comm comm)
-{
-  int nprocs, rank, tag = COLL_TAG_ALLREDUCE;
-  int mask, dst, pof2, newrank, rem, newdst, i,
-      send_idx, recv_idx, last_idx, send_cnt, recv_cnt, *cnts, *disps;
-  MPI_Aint extent;
-  MPI_Status status;
-  void *tmp_buf = NULL;
-  nprocs = smpi_comm_size(comm);
-  rank = smpi_comm_rank(comm);
-
-  extent = smpi_datatype_get_extent(dtype);
-  tmp_buf = (void *) xbt_malloc(count * extent);
-
-  smpi_mpi_sendrecv(sbuff, count, dtype, rank, tag, rbuff, count, dtype, rank, tag,
-               comm, &status);
-
-  // find nearest power-of-two less than or equal to comm_size
-  pof2 = 1;
-  while (pof2 <= nprocs)
-    pof2 <<= 1;
-  pof2 >>= 1;
-
-  rem = nprocs - pof2;
-
-  // In the non-power-of-two case, all even-numbered
-  // processes of rank < 2*rem send their data to
-  // (rank+1). These even-numbered processes no longer
-  // participate in the algorithm until the very end. The
-  // remaining processes form a nice power-of-two. 
-
-  if (rank < 2 * rem) {
-    // even       
-    if (rank % 2 == 0) {
-
-      smpi_mpi_send(rbuff, count, dtype, rank + 1, tag, comm);
-
-      // temporarily set the rank to -1 so that this
-      // process does not pariticipate in recursive
-      // doubling
-      newrank = -1;
-    } else                      // odd
-    {
-      smpi_mpi_recv(tmp_buf, count, dtype, rank - 1, tag, comm, &status);
-      // do the reduction on received data. since the
-      // ordering is right, it doesn't matter whether
-      // the operation is commutative or not.
-      smpi_op_apply(op, tmp_buf, rbuff, &count, &dtype);
-
-      // change the rank 
-      newrank = rank / 2;
-    }
-  }
-
-  else                          // rank >= 2 * rem 
-    newrank = rank - rem;
-
-  // If op is user-defined or count is less than pof2, use
-  // recursive doubling algorithm. Otherwise do a reduce-scatter
-  // followed by allgather. (If op is user-defined,
-  // derived datatypes are allowed and the user could pass basic
-  // datatypes on one process and derived on another as long as
-  // the type maps are the same. Breaking up derived
-  // datatypes to do the reduce-scatter is tricky, therefore
-  // using recursive doubling in that case.) 
-
-  if (newrank != -1) {
-    // do a reduce-scatter followed by allgather. for the
-    // reduce-scatter, calculate the count that each process receives
-    // and the displacement within the buffer 
-
-    cnts = (int *) xbt_malloc(pof2 * sizeof(int));
-    disps = (int *) xbt_malloc(pof2 * sizeof(int));
-
-    for (i = 0; i < (pof2 - 1); i++)
-      cnts[i] = count / pof2;
-    cnts[pof2 - 1] = count - (count / pof2) * (pof2 - 1);
-
-    disps[0] = 0;
-    for (i = 1; i < pof2; i++)
-      disps[i] = disps[i - 1] + cnts[i - 1];
-
-    mask = 0x1;
-    send_idx = recv_idx = 0;
-    last_idx = pof2;
-    while (mask < pof2) {
-      newdst = newrank ^ mask;
-      // find real rank of dest 
-      dst = (newdst < rem) ? newdst * 2 + 1 : newdst + rem;
-
-      send_cnt = recv_cnt = 0;
-      if (newrank < newdst) {
-        send_idx = recv_idx + pof2 / (mask * 2);
-        for (i = send_idx; i < last_idx; i++)
-          send_cnt += cnts[i];
-        for (i = recv_idx; i < send_idx; i++)
-          recv_cnt += cnts[i];
-      } else {
-        recv_idx = send_idx + pof2 / (mask * 2);
-        for (i = send_idx; i < recv_idx; i++)
-          send_cnt += cnts[i];
-        for (i = recv_idx; i < last_idx; i++)
-          recv_cnt += cnts[i];
-      }
-
-      // Send data from recvbuf. Recv into tmp_buf 
-      smpi_mpi_sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt,
-                   dtype, dst, tag,
-                   (char *) tmp_buf + disps[recv_idx] * extent, recv_cnt,
-                   dtype, dst, tag, comm, &status);
-
-      // tmp_buf contains data received in this step.
-      // recvbuf contains data accumulated so far 
-
-      // This algorithm is used only for predefined ops
-      // and predefined ops are always commutative.
-      smpi_op_apply(op, (char *) tmp_buf + disps[recv_idx] * extent,
-                     (char *) rbuff + disps[recv_idx] * extent,
-                     &recv_cnt, &dtype);
-
-      // update send_idx for next iteration 
-      send_idx = recv_idx;
-      mask <<= 1;
-
-      // update last_idx, but not in last iteration because the value
-      // is needed in the allgather step below. 
-      if (mask < pof2)
-        last_idx = recv_idx + pof2 / mask;
-    }
-
-    // now do the allgather 
-
-    mask >>= 1;
-    while (mask > 0) {
-      newdst = newrank ^ mask;
-      // find real rank of dest
-      dst = (newdst < rem) ? newdst * 2 + 1 : newdst + rem;
-
-      send_cnt = recv_cnt = 0;
-      if (newrank < newdst) {
-        // update last_idx except on first iteration 
-        if (mask != pof2 / 2)
-          last_idx = last_idx + pof2 / (mask * 2);
-
-        recv_idx = send_idx + pof2 / (mask * 2);
-        for (i = send_idx; i < recv_idx; i++)
-          send_cnt += cnts[i];
-        for (i = recv_idx; i < last_idx; i++)
-          recv_cnt += cnts[i];
-      } else {
-        recv_idx = send_idx - pof2 / (mask * 2);
-        for (i = send_idx; i < last_idx; i++)
-          send_cnt += cnts[i];
-        for (i = recv_idx; i < send_idx; i++)
-          recv_cnt += cnts[i];
-      }
-
-      smpi_mpi_sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt,
-                   dtype, dst, tag,
-                   (char *) rbuff + disps[recv_idx] * extent, recv_cnt,
-                   dtype, dst, tag, comm, &status);
-
-      if (newrank > newdst)
-        send_idx = recv_idx;
-
-      mask >>= 1;
-    }
-
-    free(cnts);
-    free(disps);
-
-  }
-  // In the non-power-of-two case, all odd-numbered processes of
-  // rank < 2 * rem send the result to (rank-1), the ranks who didn't
-  // participate above.
-
-  if (rank < 2 * rem) {
-    if (rank % 2)               // odd 
-      smpi_mpi_send(rbuff, count, dtype, rank - 1, tag, comm);
-    else                        // even 
-      smpi_mpi_recv(rbuff, count, dtype, rank + 1, tag, comm, &status);
-  }
-
-  free(tmp_buf);
-  return MPI_SUCCESS;
-}
diff --git a/src/smpi/colls/colls.h b/src/smpi/colls/colls.h

index 6461a5a..3e01954 100644 (file)
--- a/src/smpi/colls/colls.h
+++ b/src/smpi/colls/colls.h
@@ -105,8 +105,6 @@ COLL_APPLY(action, COLL_ALLREDUCE_SIG, lr) COLL_sep \
  COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab1) COLL_sep \
  COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab2) COLL_sep \
  COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab_rdb) COLL_sep \
  COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab1) COLL_sep \
  COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab2) COLL_sep \
  COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab_rdb) COLL_sep \
-COLL_NOTHING(COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab_reduce_scatter) COLL_sep) \
-COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab_rsag) COLL_sep \
  COLL_APPLY(action, COLL_ALLREDUCE_SIG, rdb) COLL_sep \
  COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_binomial) COLL_sep \
  COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_binomial_pipeline) COLL_sep \
  COLL_APPLY(action, COLL_ALLREDUCE_SIG, rdb) COLL_sep \
  COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_binomial) COLL_sep \
  COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_binomial_pipeline) COLL_sep \
diff --git a/src/smpi/colls/smpi_mpich_selector.c b/src/smpi/colls/smpi_mpich_selector.c

index 8a2124a..d999646 100644 (file)
--- a/src/smpi/colls/smpi_mpich_selector.c
+++ b/src/smpi/colls/smpi_mpich_selector.c
@@ -74,7 +74,7 @@ int smpi_coll_tuned_allreduce_mpich(void *sbuf, void *rbuf, int count,
  
      if (block_dsize > large_message && count >= pof2 && smpi_op_is_commute(op)) {
        //for long messages
  
      if (block_dsize > large_message && count >= pof2 && smpi_op_is_commute(op)) {
        //for long messages
-       return (smpi_coll_tuned_allreduce_rab_rsag (sbuf, rbuf, 
+       return (smpi_coll_tuned_allreduce_rab_rdb (sbuf, rbuf, 
                                                                     count, dtype,
                                                                     op, comm));
      }else {
                                                                     count, dtype,
                                                                     op, comm));
      }else {
author	Augustin Degomme <degomme@idpann.imag.fr>
	Tue, 17 Sep 2013 16:35:22 +0000 (18:35 +0200)
committer	Augustin Degomme <degomme@idpann.imag.fr>
	Wed, 18 Sep 2013 14:12:36 +0000 (16:12 +0200)
buildtools/Cmake/AddTests.cmake		patch \| blob \| history
buildtools/Cmake/DefinePackages.cmake		patch \| blob \| history
src/smpi/colls/allreduce-rab-reduce-scatter.c	[deleted file]	patch \| blob \| history
src/smpi/colls/allreduce-rab-rsag.c	[deleted file]	patch \| blob \| history
src/smpi/colls/colls.h		patch \| blob \| history
src/smpi/colls/smpi_mpich_selector.c		patch \| blob \| history