*/
#include "colls_private.h"
+ #include "coll_tuned_topo.h"
#define MAXTREEFANOUT 32
- typedef struct ompi_coll_tree_t {
- int32_t tree_root;
- int32_t tree_fanout;
- int32_t tree_bmtree;
- int32_t tree_prev;
- int32_t tree_next[MAXTREEFANOUT];
- int32_t tree_nextsize;
- } ompi_coll_tree_t;
-
- ompi_coll_tree_t*
- ompi_coll_tuned_topo_build_tree( int fanout,
- MPI_Comm com,
- int root );
-
-
-/*
- * Some static helpers.
- */
-static int pown( int fanout, int num )
-{
- int j, p = 1;
- if( num < 0 ) return 0;
- if (1==num) return fanout;
- if (2==fanout) {
- return p<<num;
- }
- else {
- for( j = 0; j < num; j++ ) { p*= fanout; }
- }
- return p;
-}
-
-static int calculate_level( int fanout, int rank )
-{
- int level, num;
- if( rank < 0 ) return -1;
- for( level = 0, num = 0; num <= rank; level++ ) {
- num += pown(fanout, level);
- }
- return level-1;
-}
-
-static int calculate_num_nodes_up_to_level( int fanout, int level )
-{
- /* just use geometric progression formula for sum:
- a^0+a^1+...a^(n-1) = (a^n-1)/(a-1) */
- return ((pown(fanout,level) - 1)/(fanout - 1));
-}
-
-/*
- * And now the building functions.
- *
- * An example for fanout = 2, comm_size = 7
- *
- * 0 <-- delta = 1 (fanout^0)
- * / \
- * 1 2 <-- delta = 2 (fanout^1)
- * / \ / \
- * 3 5 4 6 <-- delta = 4 (fanout^2)
- */
-
-ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_tree( int fanout,
- MPI_Comm comm,
- int root )
-{
- int rank, size;
- int schild, sparent;
- int level; /* location of my rank in the tree structure of size */
- int delta; /* number of nodes on my level */
- int slimit; /* total number of nodes on levels above me */
- int shiftedrank;
- int i;
- ompi_coll_tree_t* tree;
-
- XBT_DEBUG( "coll:tuned:topo_build_tree Building fo %d rt %d", fanout, root);
-
- if (fanout<1) {
- XBT_DEBUG( "coll:tuned:topo_build_tree invalid fanout %d", fanout);
- return NULL;
- }
- if (fanout>MAXTREEFANOUT) {
- XBT_DEBUG("coll:tuned:topo_build_tree invalid fanout %d bigger than max %d", fanout, MAXTREEFANOUT);
- return NULL;
- }
-
- /*
- * Get size and rank of the process in this communicator
- */
- size = smpi_comm_size(comm);
- rank = smpi_comm_rank(comm);
-
- tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
- if (!tree) {
- XBT_DEBUG("coll:tuned:topo_build_tree PANIC::out of memory");
- return NULL;
- }
-
- tree->tree_root = MPI_UNDEFINED;
- tree->tree_nextsize = MPI_UNDEFINED;
-
- /*
- * Set root
- */
- tree->tree_root = root;
-
- /*
- * Initialize tree
- */
- tree->tree_fanout = fanout;
- tree->tree_bmtree = 0;
- tree->tree_root = root;
- tree->tree_prev = -1;
- tree->tree_nextsize = 0;
- for( i = 0; i < fanout; i++ ) {
- tree->tree_next[i] = -1;
- }
-
- /* return if we have less than 2 processes */
- if( size < 2 ) {
- return tree;
- }
-
- /*
- * Shift all ranks by root, so that the algorithm can be
- * designed as if root would be always 0
- * shiftedrank should be used in calculating distances
- * and position in tree
- */
- shiftedrank = rank - root;
- if( shiftedrank < 0 ) {
- shiftedrank += size;
- }
-
- /* calculate my level */
- level = calculate_level( fanout, shiftedrank );
- delta = pown( fanout, level );
-
- /* find my children */
- for( i = 0; i < fanout; i++ ) {
- schild = shiftedrank + delta * (i+1);
- if( schild < size ) {
- tree->tree_next[i] = (schild+root)%size;
- tree->tree_nextsize = tree->tree_nextsize + 1;
- } else {
- break;
- }
- }
-
- /* find my parent */
- slimit = calculate_num_nodes_up_to_level( fanout, level );
- sparent = shiftedrank;
- if( sparent < fanout ) {
- sparent = 0;
- } else {
- while( sparent >= slimit ) {
- sparent -= delta/fanout;
- }
- }
- tree->tree_prev = (sparent+root)%size;
-
- return tree;
-}
-
int
smpi_coll_tuned_bcast_ompi_split_bintree ( void* buffer,