#include "colls_private.h"
-
-// This selector is based on information gathered on the Stampede cluster, with Intel MPI 4.1.3.049, and from the intel reference manual. The data was gathered launching one process/node. With other settings, selection will be different (more SMP aware algorithms, for instance)
-
+// This selector is based on information gathered on the Stampede cluster, with Intel MPI 4.1.3.049, and from the intel reference manual. The data was gathered launching runs with 1,2,4,8,16 processes per node.
#define INTEL_MAX_NB_THRESHOLDS 32
+#define INTEL_MAX_NB_NUMPROCS 12
+#define INTEL_MAX_NB_PPN 5 /* 1 2 4 8 16 ppn */
typedef struct {
int max_size;
int algo;
-} intel_tuning_table_element_element;
+} intel_tuning_table_size_element;
typedef struct {
int max_num_proc;
int num_elems;
- intel_tuning_table_element_element elems[INTEL_MAX_NB_THRESHOLDS];
+ intel_tuning_table_size_element elems[INTEL_MAX_NB_THRESHOLDS];
+} intel_tuning_table_numproc_element;
+
+typedef struct {
+ int ppn;
+ intel_tuning_table_numproc_element elems[INTEL_MAX_NB_NUMPROCS];
} intel_tuning_table_element;
/*
7 - Shumilin's ring algorithm
8 - Ring algorithm
-
-//as Shumilin's ring algorithm is unknown, default to ring'
+ as Shumilin's ring algorithm is unknown, default to ring'
*/
smpi_coll_tuned_allreduce_rdb,
smpi_coll_tuned_allreduce_rab1,
smpi_coll_tuned_allreduce_redbcast,
- smpi_coll_tuned_allreduce_redbcast,
- smpi_coll_tuned_allreduce_smp_binomial,
+ smpi_coll_tuned_allreduce_mvapich2_two_level,
smpi_coll_tuned_allreduce_smp_binomial,
+ smpi_coll_tuned_allreduce_mvapich2_two_level,
smpi_coll_tuned_allreduce_ompi_ring_segmented,
smpi_coll_tuned_allreduce_ompi_ring_segmented
};
intel_tuning_table_element intel_allreduce_table[] =
{
- { 2,9,{
- {6,7},
- {85,1},
- {192,7},
- {853,1},
- {1279,7},
- {16684,1},
- {34279,8},
- {1681224,3},
- {2147483647,7}
- }
- },
- { 4, 8,{
- {16,7},
- {47,1},
- {2062,7},
- {16699,1},
- {33627,7},
- {70732,8},
- {1300705,3},
- {2147483647,8}
- }
- },
- {8,8,{
- {118,1},
- {146,4},
- {16760,1},
- {36364,6},
- {136239,8},
- {315710,7},
- {3220366,3},
- {2147483647,8}
- }
- },
- {16,7,{
- {934,1},
- {1160,6},
- {15505,1},
- {52730,2},
- {300705,8},
- {563680,7},
- {2147483647,3}
- }
- },
- {2147483647,11,{
- {5,6},
- {11,4},
- {182,1},
- {700,6},
- {1450,4},
- {11146,1},
- {25539,6},
- {37634,4},
- {93784,6},
- {817658,2},
- {2147483647,3}
+ {1,{
+ { 2,9,{
+ {6,7},
+ {85,1},
+ {192,7},
+ {853,1},
+ {1279,7},
+ {16684,1},
+ {34279,8},
+ {1681224,3},
+ {2147483647,7}
+ }
+ },
+ { 4, 8,{
+ {16,7},
+ {47,1},
+ {2062,7},
+ {16699,1},
+ {33627,7},
+ {70732,8},
+ {1300705,3},
+ {2147483647,8}
+ }
+ },
+ {8,8,{
+ {118,1},
+ {146,4},
+ {16760,1},
+ {36364,6},
+ {136239,8},
+ {315710,7},
+ {3220366,3},
+ {2147483647,8}
+ }
+ },
+ {16,7,{
+ {934,1},
+ {1160,6},
+ {15505,1},
+ {52730,2},
+ {300705,8},
+ {563680,7},
+ {2147483647,3}
+ }
+ },
+ {2147483647,11,{
+ {5,6},
+ {11,4},
+ {182,1},
+ {700,6},
+ {1450,4},
+ {11146,1},
+ {25539,6},
+ {37634,4},
+ {93784,6},
+ {817658,2},
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {2,{
+ { 4,6,{
+ {2084,7},
+ {15216,1},
+ {99715,7},
+ {168666,3},
+ {363889,2},
+ {2147483647,7}
+ }
+ },
+ { 8,6,{
+ {14978,1},
+ {66879,2},
+ {179296,8},
+ {304801,3},
+ {704509,7},
+ {2147483647,2}
+ }
+ },
+ { 16,6,{
+ {16405,1},
+ {81784,2},
+ {346385,8},
+ {807546,7},
+ {1259854,2},
+ {2147483647,3}
+ }
+ },
+ { 32,4,{
+ {8913,1},
+ {103578,2},
+ {615876,8},
+ {2147483647,2}
+ }
+ },
+ { 64,7,{
+ {1000,1},
+ {2249,2},
+ {6029,1},
+ {325357,2},
+ {1470976,8},
+ {2556670,7},
+ {2147483647,3}
+ }
+ },
+ { 128,5,{
+ {664,1},
+ {754706,2},
+ {1663862,4},
+ {3269097,2},
+ {2147483647,7}
+ }
+ },
+ { 2147483647,3,{
+ {789,1},
+ {2247589,2},
+ {2147483647,8}
+ }
+ }
+ }
+ },
+ {4,{
+ { 4,4,{
+ {5738,1},
+ {197433,2},
+ {593742,7},
+ {2147483647,2}
+ }
+ },
+ { 8,7,{
+ {5655,1},
+ {75166,2},
+ {177639,8},
+ {988014,3},
+ {1643869,2},
+ {2494859,8},
+ {2147483647,2}
+ }
+ },
+ { 16,7,{
+ {587,1},
+ {3941,2},
+ {9003,1},
+ {101469,2},
+ {355768,8},
+ {3341814,3},
+ {2147483647,8}
+ }
+ },
+ { 32,4,{
+ {795,1},
+ {146567,2},
+ {732118,8},
+ {2147483647,3}
+ }
+ },
+ { 64,4,{
+ {528,1},
+ {221277,2},
+ {1440737,8},
+ {2147483647,3}
+ }
+ },
+ { 128,4,{
+ {481,1},
+ {593833,2},
+ {2962021,8},
+ {2147483647,7}
+ }
+ },
+ { 256,2,{
+ {584,1},
+ {2147483647,2}
+ }
+ },
+ { 2147483647,3,{
+ {604,1},
+ {2997006,2},
+ {2147483647,8}
+ }
+ }
+ }
+ },
+ {8,{
+ { 8,6,{
+ {2560,1},
+ {114230,6},
+ {288510,8},
+ {664038,2},
+ {1339913,6},
+ {2147483647,4}
+ }
+ },
+ { 16,5,{
+ {497,1},
+ {54201,2},
+ {356217,8},
+ {3413609,3},
+ {2147483647,8}
+ }
+ },
+ { 32,5,{
+ {377,1},
+ {109745,2},
+ {716514,8},
+ {3976768,3},
+ {2147483647,8}
+ }
+ },
+ { 64,6,{
+ {109,1},
+ {649,5},
+ {266080,2},
+ {1493331,8},
+ {2541403,7},
+ {2147483647,3}
+ }
+ },
+ { 128,4,{
+ {7,1},
+ {751,5},
+ {408808,2},
+ {2147483647,8}
+ }
+ },
+ { 256,3,{
+ {828,5},
+ {909676,2},
+ {2147483647,8}
+ }
+ },
+ { 512,5,{
+ {847,5},
+ {1007066,2},
+ {1068775,4},
+ {2803389,2},
+ {2147483647,8}
+ }
+ },
+ { 2147483647,3,{
+ {1974,5},
+ {4007876,2},
+ {2147483647,8}
+ }
+ }
+ }
+ },
+ {16,{
+ { 16,12,{
+ {409,1},
+ {768,6},
+ {1365,4},
+ {3071,6},
+ {11299,2},
+ {21746,6},
+ {55629,2},
+ {86065,4},
+ {153867,2},
+ {590560,6},
+ {1448760,2},
+ {2147483647,8},
+ }
+ },
+ { 32,8,{
+ {6,1},
+ {24,5},
+ {86,1},
+ {875,5},
+ {74528,2},
+ {813050,8},
+ {1725981,7},
+ {2147483647,8},
+ }
+ },
+ { 64,6,{
+ {1018,5},
+ {1217,6},
+ {2370,5},
+ {160654,2},
+ {1885487,8},
+ {2147483647,3},
+ }
+ },
+ { 128,4,{
+ {2291,5},
+ {434465,2},
+ {3525103,8},
+ {2147483647,7},
+ }
+ },
+ { 256,3,{
+ {2189,5},
+ {713154,2},
+ {2147483647,8},
+ }
+ },
+ { 512,3,{
+ {2140,5},
+ {1235056,2},
+ {2147483647,8},
+ }
+ },
+ { 2147483647,3,{
+ {2153,5},
+ {2629855,2},
+ {2147483647,8},
+ }
+ }
}
}
};
intel_tuning_table_element intel_alltoall_table[] =
{
+ {1,{
{ 2,1,
{
{2147483647,3}
{2147483647,2}
}
}
+ }
+ },
+ {2, {
+ { 4,4,{
+ {1,2},
+ {75,3},
+ {131072,2},
+ {2147483647,2}
+ }
+ },
+ { 8,3,{
+ {709,1},
+ {131072,2},
+ {2147483647,2}
+ }
+ },
+ { 16,4,{
+ {40048,2},
+ {131072,3},
+ {155927,3},
+ {2147483647,4}
+ }
+ },
+ { 32,7,{
+ {105,1},
+ {130,2},
+ {1030,1},
+ {58893,2},
+ {131072,2},
+ {271838,3},
+ {2147483647,2}
+ }
+ },
+ { 2147483647,8,{
+ {521,1},
+ {2032,4},
+ {2412,2},
+ {4112,4},
+ {61620,2},
+ {131072,3},
+ {427408,3},
+ {2147483647,4}
+ }
+ }
+ }
+ },
+ {4,{
+ { 8,3,{
+ {512,1},
+ {32768,2},
+ {2147483647,2}
+ }
+ },
+ { 64,8,{
+ {7,1},
+ {199,4},
+ {764,1},
+ {6409,4},
+ {20026,2},
+ {32768,3},
+ {221643,4},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,7,{
+ {262,1},
+ {7592,4},
+ {22871,2},
+ {32768,3},
+ {47538,3},
+ {101559,4},
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {8,{
+ { 16,6,{
+ {973,1},
+ {5126,4},
+ {16898,2},
+ {32768,4},
+ {65456,4},
+ {2147483647,2}
+ }
+ },
+ { 32,7,{
+ {874,1},
+ {6727,4},
+ {17912,2},
+ {32768,3},
+ {41513,3},
+ {199604,4},
+ {2147483647,3}
+ }
+ },
+ { 64,8,{
+ {5,1},
+ {114,4},
+ {552,1},
+ {8130,4},
+ {32768,3},
+ {34486,3},
+ {160113,4},
+ {2147483647,3}
+ }
+ },
+ { 128,6,{
+ {270,1},
+ {3679,4},
+ {32768,3},
+ {64367,3},
+ {146595,4},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,4,{
+ {133,1},
+ {4017,4},
+ {32768,3},
+ {76351,4},
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {16,{
+ { 32,7,{
+ {963,1},
+ {1818,4},
+ {20007,2},
+ {32768,4},
+ {54296,4},
+ {169735,3},
+ {2147483647,2}
+ }
+ },
+ { 64,11,{
+ {17,1},
+ {42,4},
+ {592,1},
+ {2015,4},
+ {2753,2},
+ {6496,3},
+ {20402,4},
+ {32768,3},
+ {36246,3},
+ {93229,4},
+ {2147483647,3}
+ }
+ },
+ { 128,9,{
+ {18,1},
+ {40,4},
+ {287,1},
+ {1308,4},
+ {6842,1},
+ {32768,3},
+ {36986,3},
+ {129081,4},
+ {2147483647,3}
+ }
+ },
+ { 256,7,{
+ {135,1},
+ {1538,4},
+ {3267,1},
+ {4132,3},
+ {31469,4},
+ {32768,3},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,8,{
+ {66,1},
+ {1637,4},
+ {2626,1},
+ {4842,4},
+ {32768,3},
+ {33963,3},
+ {72978,4},
+ {2147483647,3}
+ }
+ }
+ }
+ }
};
int (*intel_alltoall_functions_table[])(void *sbuf, int scount,
MPI_Datatype sdtype,
smpi_coll_tuned_alltoall_bruck,
smpi_coll_tuned_alltoall_mvapich2_scatter_dest,
smpi_coll_tuned_alltoall_pair,
- smpi_coll_tuned_alltoall_pair//Plum is proprietary ? (and super efficient)
+ smpi_coll_tuned_alltoall_mvapich2//Plum is proprietary ? (and super efficient)
};
/*I_MPI_ADJUST_BARRIER
intel_tuning_table_element intel_barrier_table[] =
{
+ {1,{
{2,1,
{
{2147483647,2}
{2147483647,6}
}
}
+ }
+ },
+ {2,{
+ { 2,1,{
+ {2147483647,1}
+ }
+ },
+ { 4,1,{
+ {2147483647,3}
+ }
+ },
+ { 8,1,{
+ {2147483647,5}
+ }
+ },
+ { 32,1,{
+ {2147483647,2}
+ }
+ },
+ { 128,1,{
+ {2147483647,3}
+ }
+ },
+ { 2147483647,1,{
+ {2147483647,4}
+ }
+ }
+ }
+ },
+ {4,{
+ { 4,1,{
+ {2147483647,2}
+ }
+ },
+ { 8,1,{
+ {2147483647,5}
+ }
+ },
+ { 32,1,{
+ {2147483647,2}
+ }
+ },
+ { 2147483647,1,{
+ {2147483647,4}
+ }
+ }
+ }
+ },
+ {8,{
+ { 8,1,{
+ {2147483647,1}
+ }
+ },
+ { 32,1,{
+ {2147483647,2}
+ }
+ },
+ { 2147483647,1,{
+ {2147483647,4}
+ }
+ }
+ }
+ },
+ {16,{
+ { 4,1,{
+ {2147483647,2}
+ }
+ },
+ { 8,1,{
+ {2147483647,5}
+ }
+ },
+ { 32,1,{
+ {2147483647,2}
+ }
+ },
+ { 2147483647,1,{
+ {2147483647,4}
+ }
+ }
+ }
+ }
};
smpi_coll_tuned_bcast_NTSL,
smpi_coll_tuned_bcast_SMP_binomial,
//smpi_coll_tuned_bcast_scatter_rdb_allgather,
- smpi_coll_tuned_bcast_NTSL,
+ smpi_coll_tuned_bcast_NTSL,
smpi_coll_tuned_bcast_SMP_linear,
smpi_coll_tuned_bcast_mvapich2,//we don't know shumilin's algo'
};
intel_tuning_table_element intel_bcast_table[] =
{
+ {1,{
{2,9,
{
{1,2},
{2147483647,7}
}
}
+ }
+ },
+ {2,{
+ { 4,6,{
+ {806,4},
+ {18093,7},
+ {51366,6},
+ {182526,4},
+ {618390,1},
+ {2147483647,7}
+ }
+ },
+ { 8,6,{
+ {24,1},
+ {74,4},
+ {18137,1},
+ {614661,7},
+ {1284626,1},
+ {2147483647,2}
+ }
+ },
+ { 16,4,{
+ {1,1},
+ {158,7},
+ {16955,1},
+ {2147483647,7}
+ }
+ },
+ { 32,3,{
+ {242,7},
+ {10345,1},
+ {2147483647,7}
+ }
+ },
+ { 2147483647,4,{
+ {1,1},
+ {737,7},
+ {5340,1},
+ {2147483647,7}
+ }
+ }
+ }
+ },
+ {4,{
+ { 8,4,{
+ {256,4},
+ {17181,1},
+ {1048576,7},
+ {2147483647,7}
+ }
+ },
+ { 2147483647,1,{
+ {2147483647,7}
+ }
+ }
+ }
+ },
+ {8,{
+ { 16,5,{
+ {3,1},
+ {318,7},
+ {1505,1},
+ {1048576,7},
+ {2147483647,7}
+ }
+ },
+ { 32,3,{
+ {422,7},
+ {851,1},
+ {2147483647,7}
+ }
+ },
+ { 64,3,{
+ {468,7},
+ {699,1},
+ {2147483647,7}
+ }
+ },
+ { 2147483647,1,{
+ {2147483647,7}
+ }
+ }
+ }
+ },
+ {16,{
+ { 8,4,{
+ {256,4},
+ {17181,1},
+ {1048576,7},
+ {2147483647,7}
+ }
+ },
+ { 2147483647,1,{
+ {2147483647,7}
+ }
+ }
+ }
+ }
};
smpi_coll_tuned_reduce_mvapich2,
smpi_coll_tuned_reduce_binomial,
smpi_coll_tuned_reduce_mvapich2,
- smpi_coll_tuned_reduce_binomial,
+ smpi_coll_tuned_reduce_mvapich2_two_level,
smpi_coll_tuned_reduce_rab,
smpi_coll_tuned_reduce_rab
};
intel_tuning_table_element intel_reduce_table[] =
{
+ {1,{
{2147483647,1,
- {
- {2147483647,1}
- }
+ {
+ {2147483647,1}
+ }
+ }
+ }
+ },
+ {2,{
+ { 2,1,{
+ {2147483647,1}
}
+ },
+ { 4,2,{
+ {10541,3},
+ {2147483647,1}
+ }
+ },
+ { 2147483647,1,{
+ {2147483647,1}
+ }
+ }
+ }
+ },
+ {4,{
+ { 256,1,{
+ {2147483647,1}
+ }
+ },
+ { 2147483647,2,{
+ {45,3},
+ {2147483647,1}
+ }
+ }
+ }
+ },
+ {8,{
+ { 512,1,{
+ {2147483647,1}
+ }
+ },
+ { 2147483647,3,{
+ {5,1},
+ {11882,3},
+ {2147483647,1}
+ }
+ }
+ }
+ },
+ {16,{
+ { 256,1,{
+ {2147483647,1}
+ }
+ },
+ { 2147483647,2,{
+ {45,3},
+ {2147483647,1}
+ }
+ }
+ }
+ }
};
/* I_MPI_ADJUST_REDUCE_SCATTER
intel_tuning_table_element intel_reduce_scatter_table[] =
{
+ {1,{
{2,5,
{
{5,4},
{2147483647,5}
}
}
+ }
+ },
+ {2,{
+ { 2,2,{
+ {6,1},
+ {2147483647,2}
+ }
+ },
+ { 4,7,{
+ {5,4},
+ {13,5},
+ {59,3},
+ {76,1},
+ {91488,3},
+ {680063,4},
+ {2147483647,2}
+ }
+ },
+ { 8,8,{
+ {4,4},
+ {11,5},
+ {31,1},
+ {69615,3},
+ {202632,2},
+ {396082,5},
+ {1495696,4},
+ {2147483647,2}
+ }
+ },
+ { 16,1,{
+ {4,4},
+ {345,1},
+ {79523,3},
+ {2147483647,2}
+ }
+ },
+ { 32,5,{
+ {0,3},
+ {4,4},
+ {992,1},
+ {71417,3},
+ {2147483647,2}
+ }
+ },
+ { 64,4,{
+ {4,4},
+ {1472,1},
+ {196592,3},
+ {2147483647,2}
+ }
+ },
+ { 128,5,{
+ {0,3},
+ {4,4},
+ {32892,1},
+ {381072,3},
+ {2147483647,2}
+ }
+ },
+ { 2147483647,6,{
+ {0,2},
+ {4,4},
+ {33262,1},
+ {1571397,3},
+ {2211398,5},
+ {2147483647,4}
+ }
+ }
+ }
+ },
+ {4,{
+ { 4,7,{
+ {12,4},
+ {27,5},
+ {49,3},
+ {187,1},
+ {405673,3},
+ {594687,4},
+ {2147483647,2}
+ }
+ },
+ { 8,5,{
+ {24,5},
+ {155,1},
+ {204501,3},
+ {274267,5},
+ {2147483647,4}
+ }
+ },
+ { 16,6,{
+ {63,1},
+ {72,3},
+ {264,1},
+ {168421,3},
+ {168421,4},
+ {2147483647,2}
+ }
+ },
+ { 32,10,{
+ {0,3},
+ {4,4},
+ {12,1},
+ {18,5},
+ {419,1},
+ {188739,3},
+ {716329,4},
+ {1365841,5},
+ {2430194,2},
+ {2147483647,4}
+ }
+ },
+ { 64,8,{
+ {0,3},
+ {4,4},
+ {17,5},
+ {635,1},
+ {202937,3},
+ {308253,5},
+ {1389874,4},
+ {2147483647,2}
+ }
+ },
+ { 128,8,{
+ {0,3},
+ {4,4},
+ {16,5},
+ {1238,1},
+ {280097,3},
+ {631434,5},
+ {2605072,4},
+ {2147483647,2}
+ }
+ },
+ { 256,5,{
+ {0,2},
+ {4,4},
+ {16,5},
+ {2418,1},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,6,{
+ {0,2},
+ {4,4},
+ {16,5},
+ {33182,1},
+ {3763779,3},
+ {2147483647,4}
+ }
+ }
+ }
+ },
+ {8,{
+ { 8,6,{
+ {5,4},
+ {494,1},
+ {97739,3},
+ {522836,2},
+ {554174,5},
+ {2147483647,2}
+ }
+ },
+ { 16,8,{
+ {5,4},
+ {62,1},
+ {94,3},
+ {215,1},
+ {185095,3},
+ {454784,4},
+ {607911,5},
+ {2147483647,4}
+ }
+ },
+ { 32,7,{
+ {0,3},
+ {4,4},
+ {302,1},
+ {250841,3},
+ {665822,4},
+ {1760980,5},
+ {2147483647,4}
+ }
+ },
+ { 64,8,{
+ {0,3},
+ {4,4},
+ {41,5},
+ {306,1},
+ {332405,3},
+ {1269189,4},
+ {3712421,5},
+ {2147483647,4}
+ }
+ },
+ { 128,6,{
+ {0,3},
+ {4,4},
+ {39,5},
+ {526,1},
+ {487878,3},
+ {2147483647,4}
+ }
+ },
+ { 256,8,{
+ {0,2},
+ {4,4},
+ {36,5},
+ {1382,1},
+ {424162,3},
+ {632881,5},
+ {1127566,3},
+ {2147483647,4}
+ }
+ },
+ { 512,4,{
+ {4,4},
+ {34,5},
+ {5884,1},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,4,{
+ {5,4},
+ {32,5},
+ {25105,1},
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {16,{
+ { 4,7,{
+ {12,4},
+ {27,5},
+ {49,3},
+ {187,1},
+ {405673,3},
+ {594687,4},
+ {2147483647,2}
+ }
+ },
+ { 8,5,{
+ {24,5},
+ {155,1},
+ {204501,3},
+ {274267,5},
+ {2147483647,4}
+ }
+ },
+ { 16,6,{
+ {63,1},
+ {72,3},
+ {264,1},
+ {168421,3},
+ {168421,4},
+ {2147483647,2}
+ }
+ },
+ { 32,10,{
+ {0,3},
+ {4,4},
+ {12,1},
+ {18,5},
+ {419,1},
+ {188739,3},
+ {716329,4},
+ {1365841,5},
+ {2430194,2},
+ {2147483647,4}
+ }
+ },
+ { 64,8,{
+ {0,3},
+ {4,4},
+ {17,5},
+ {635,1},
+ {202937,3},
+ {308253,5},
+ {1389874,4},
+ {2147483647,2}
+ }
+ },
+ { 128,8,{
+ {0,3},
+ {4,4},
+ {16,5},
+ {1238,1},
+ {280097,3},
+ {631434,5},
+ {2605072,4},
+ {2147483647,2}
+ }
+ },
+ { 256,5,{
+ {0,2},
+ {4,4},
+ {16,5},
+ {2418,1},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,6,{
+ {0,2},
+ {4,4},
+ {16,5},
+ {33182,1},
+ {3763779,3},
+ {2147483647,4}
+ }
+ }
+ }
+ }
};
/* I_MPI_ADJUST_ALLGATHER
intel_tuning_table_element intel_allgather_table[] =
{
+ {1,{
{4,11,
{
{1,4},
{2147483647,4}
}
}
+ }
+ },
+ {2,{
+ { 8,6,{
+ {490,1},
+ {558,2},
+ {2319,1},
+ {46227,3},
+ {2215101,1},
+ {2147483647,3}
+ }
+ },
+ { 16,4,{
+ {1005,1},
+ {1042,2},
+ {2059,1},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,2,{
+ {2454,1},
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {4,{
+ { 8,2,{
+ {2861,1},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,2,{
+ {605,1},
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {8,{
+ { 16,4,{
+ {66,1},
+ {213,4},
+ {514,1},
+ {2147483647,3}
+ }
+ },
+ { 32,4,{
+ {91,1},
+ {213,4},
+ {514,1},
+ {2147483647,3}
+ }
+ },
+ { 64,4,{
+ {71,1},
+ {213,4},
+ {514,1},
+ {2147483647,3}
+ }
+ },
+ { 128,2,{
+ {305,1},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,2,{
+ {213,1},
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {16,{
+ { 8,2,{
+ {2861,1},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,2,{
+ {605,1},
+ {2147483647,3}
+ }
+ }
+ }
+ }
};
/* I_MPI_ADJUST_ALLGATHERV
intel_tuning_table_element intel_allgatherv_table[] =
{
+ {1,{
{2,3,
{
{259668,3},
{2147483647,3}
}
}
+ }
+ },
+ {2,{
+ { 4,3,{
+ {3147,1},
+ {5622,2},
+ {2147483647,3}
+ }
+ },
+ { 8,3,{
+ {975,1},
+ {4158,2},
+ {2147483647,3}
+ }
+ },
+ { 16,2,{
+ {2146,1},
+ {2147483647,3}
+ }
+ },
+ { 32,4,{
+ {81,1},
+ {414,2},
+ {1190,1},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,5,{
+ {1,2},
+ {3,1},
+ {783,2},
+ {1782,4},
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {4,{
+ { 8,2,{
+ {2554,1},
+ {2147483647,3}
+ }
+ },
+ { 16,4,{
+ {272,1},
+ {657,2},
+ {2078,1},
+ {2147483647,3}
+ }
+ },
+ { 32,2,{
+ {1081,1},
+ {2147483647,3}
+ }
+ },
+ { 64,2,{
+ {547,1},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,5,{
+ {19,1},
+ {239,2},
+ {327,1},
+ {821,4},
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {8,{
+ { 16,3,{
+ {55,1},
+ {514,2},
+ {2147483647,3}
+ }
+ },
+ { 32,4,{
+ {53,1},
+ {167,4},
+ {514,2},
+ {2147483647,3}
+ }
+ },
+ { 64,3,{
+ {13,1},
+ {319,4},
+ {2147483647,3}
+ }
+ },
+ { 128,7,{
+ {2,1},
+ {11,2},
+ {48,1},
+ {201,2},
+ {304,1},
+ {1048,4},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,5,{
+ {5,1},
+ {115,4},
+ {129,1},
+ {451,4},
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {16,{
+ { 8,2,{
+ {2554,1},
+ {2147483647,3}
+ }
+ },
+ { 16,4,{
+ {272,1},
+ {657,2},
+ {2078,1},
+ {2147483647,3}
+ }
+ },
+ { 32,2,{
+ {1081,1},
+ {2147483647,3}
+ }
+ },
+ { 64,2,{
+ {547,1},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,5,{
+ {19,1},
+ {239,2},
+ {327,1},
+ {821,4},
+ {2147483647,3}
+ }
+ }
+ }
+ }
};
intel_tuning_table_element intel_gather_table[] =
{
+ {1,{
{8,3,
{
{17561,3},
{2147483647,1}
}
}
+ }
+ },
+ {2,{
+ {2147483647,1,{
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {4,{
+ {2147483647,1,{
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {8,{
+ { 16,1,{
+ {2147483647,3}
+ }
+ },
+ { 32,2,{
+ {9,2},
+ {2147483647,3}
+ }
+ },
+ { 64,2,{
+ {784,2},
+ {2147483647,3}
+ }
+ },
+ { 128,3,{
+ {160,3},
+ {655,2},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,1,{
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {16,{
+ {2147483647,1,{
+ {2147483647,3}
+ }
+ }
+ }
+ }
};
intel_tuning_table_element intel_scatter_table[] =
{
+ {1,{
{2,2,
{
{16391,1},
{2147483647,1}
}
}
+ }
+ },
+ {2,{
+ {2147483647,1,{
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {4,{
+ { 8,1,{
+ {2147483647,3}
+ }
+ },
+ { 16,2,{
+ {140,3},
+ {1302,1},
+ {2147483647,3}
+ }
+ },
+ { 32,2,{
+ {159,3},
+ {486,1},
+ {2147483647,3}
+ }
+ },
+ { 64,2,{
+ {149,1},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,2,{
+ {139,1},
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {8,{
+ { 16,4,{
+ {587,1},
+ {1370,2},
+ {2102,1},
+ {2147483647,3}
+ }
+ },
+ { 32,3,{
+ {1038,1},
+ {2065,2},
+ {2147483647,3}
+ }
+ },
+ { 64,3,{
+ {515,1},
+ {2069,2},
+ {2147483647,3}
+ }
+ },
+ { 128,3,{
+ {284,1},
+ {796,2},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,2,{
+ {139,1},
+ {2147483647,3}
+ }
+ }
+ }
+ },
+ {16,{
+ { 8,1,{
+ {2147483647,3}
+ }
+ },
+ { 16,3,{
+ {140,3},
+ {1302,1},
+ {2147483647,3}
+ }
+ },
+ { 32,3,{
+ {159,3},
+ {486,1},
+ {2147483647,3}
+ }
+ },
+ { 64,2,{
+ {149,1},
+ {2147483647,3}
+ }
+ },
+ { 2147483647,2,{
+ {139,1},
+ {2147483647,3}
+ }
+ }
+ }
+ }
};
intel_tuning_table_element intel_alltoallv_table[] =
{
+ {1,{
+ {2147483647,1,
+ {
+ {2147483647,1}
+ }
+ }
+ }
+ },
+ {2,{
{2147483647,1,
{
{2147483647,1}
}
}
+ }
+ },
+ {4,{
+ { 8,1,{
+ {2147483647,1}//weirdly, intel reports the use of algo 0 here
+ }
+ },
+ { 2147483647,2,{
+ {4,1},//0 again
+ {2147483647,2}
+ }
+ }
+ }
+ },
+ {8,{
+ { 16,1,{
+ {2147483647,1}
+ }
+ },
+ { 2147483647,2,{
+ {0,1},//weird again, only for 0-sized messages
+ {2147483647,2}
+ }
+ }
+ }
+ },
+ {16,{
+ { 8,1,{
+ {2147483647,1}//0
+ }
+ },
+ { 2147483647,2,{
+ {4,1},//0
+ {2147483647,2}
+ }
+ }
+ }
+ }
};
int i =0;\
SIZECOMP_ ## cat\
i=0;\
- int j =0;\
- while(comm_size>=intel_ ## cat ## _table[i].max_num_proc\
- && i < INTEL_MAX_NB_THRESHOLDS)\
+ int j =0, k=0;\
+ if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){\
+ smpi_comm_init_smp(comm);\
+ }\
+ int local_size=1;\
+ if (smpi_comm_is_uniform(comm)) {\
+ local_size = smpi_comm_size(smpi_comm_get_intra_comm(comm));\
+ }\
+ while(i < INTEL_MAX_NB_PPN &&\
+ local_size!=intel_ ## cat ## _table[i].ppn)\
i++;\
- while(block_dsize >=intel_ ## cat ## _table[i].elems[j].max_size\
- && j< intel_ ## cat ## _table[i].num_elems)\
+ if(i==INTEL_MAX_NB_PPN) i=0;\
+ while(comm_size>intel_ ## cat ## _table[i].elems[j].max_num_proc\
+ && j < INTEL_MAX_NB_THRESHOLDS)\
j++;\
- return (intel_ ## cat ## _functions_table[intel_ ## cat ## _table[i].elems[j].algo-1]\
+ while(block_dsize >=intel_ ## cat ## _table[i].elems[j].elems[k].max_size\
+ && k< intel_ ## cat ## _table[i].elems[j].num_elems)\
+ k++;\
+ return (intel_ ## cat ## _functions_table[intel_ ## cat ## _table[i].elems[j].elems[k].algo-1]\
args2);\
}