From: Augustin Degomme Date: Wed, 20 Aug 2014 14:52:01 +0000 (+0200) Subject: Add settings for Intel MPI with 2,4,8,16 processes/node on Stampede X-Git-Tag: v3_12~850^2~10 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/cd37de87d7609dfe0b73aee81f3dd8c571b6f2db Add settings for Intel MPI with 2,4,8,16 processes/node on Stampede --- diff --git a/src/smpi/colls/smpi_intel_mpi_selector.c b/src/smpi/colls/smpi_intel_mpi_selector.c index a1e94bf04b..e2fbbe9271 100644 --- a/src/smpi/colls/smpi_intel_mpi_selector.c +++ b/src/smpi/colls/smpi_intel_mpi_selector.c @@ -9,20 +9,27 @@ #include "colls_private.h" -// This selector is based on information gathered on the Stampede cluster, with Intel MPI 4.1.3.049, and from the intel reference manual. The data was gathered launching one process/node. With other settings, selection will be different (more SMP aware algorithms, for instance) +// This selector is based on information gathered on the Stampede cluster, with Intel MPI 4.1.3.049, and from the intel reference manual. The data was gathered launching runs with 1,2,4,8,16 processes per node. #define INTEL_MAX_NB_THRESHOLDS 32 +#define INTEL_MAX_NB_NUMPROCS 12 +#define INTEL_MAX_NB_PPN 5 /* 1 2 4 8 16 ppn */ typedef struct { int max_size; int algo; -} intel_tuning_table_element_element; +} intel_tuning_table_size_element; typedef struct { int max_num_proc; int num_elems; - intel_tuning_table_element_element elems[INTEL_MAX_NB_THRESHOLDS]; + intel_tuning_table_size_element elems[INTEL_MAX_NB_THRESHOLDS]; +} intel_tuning_table_numproc_element; + +typedef struct { + int ppn; + intel_tuning_table_numproc_element elems[INTEL_MAX_NB_NUMPROCS]; } intel_tuning_table_element; /* @@ -52,71 +59,324 @@ int (*intel_allreduce_functions_table[])(void *sendbuf, smpi_coll_tuned_allreduce_rdb, smpi_coll_tuned_allreduce_rab1, smpi_coll_tuned_allreduce_redbcast, - smpi_coll_tuned_allreduce_redbcast, - smpi_coll_tuned_allreduce_smp_binomial, + smpi_coll_tuned_allreduce_mvapich2_two_level, smpi_coll_tuned_allreduce_smp_binomial, + smpi_coll_tuned_allreduce_mvapich2_two_level, smpi_coll_tuned_allreduce_ompi_ring_segmented, smpi_coll_tuned_allreduce_ompi_ring_segmented }; intel_tuning_table_element intel_allreduce_table[] = { - { 2,9,{ - {6,7}, - {85,1}, - {192,7}, - {853,1}, - {1279,7}, - {16684,1}, - {34279,8}, - {1681224,3}, - {2147483647,7} - } - }, - { 4, 8,{ - {16,7}, - {47,1}, - {2062,7}, - {16699,1}, - {33627,7}, - {70732,8}, - {1300705,3}, - {2147483647,8} - } - }, - {8,8,{ - {118,1}, - {146,4}, - {16760,1}, - {36364,6}, - {136239,8}, - {315710,7}, - {3220366,3}, - {2147483647,8} - } - }, - {16,7,{ - {934,1}, - {1160,6}, - {15505,1}, - {52730,2}, - {300705,8}, - {563680,7}, - {2147483647,3} - } - }, - {2147483647,11,{ - {5,6}, - {11,4}, - {182,1}, - {700,6}, - {1450,4}, - {11146,1}, - {25539,6}, - {37634,4}, - {93784,6}, - {817658,2}, - {2147483647,3} + {1,{ + { 2,9,{ + {6,7}, + {85,1}, + {192,7}, + {853,1}, + {1279,7}, + {16684,1}, + {34279,8}, + {1681224,3}, + {2147483647,7} + } + }, + { 4, 8,{ + {16,7}, + {47,1}, + {2062,7}, + {16699,1}, + {33627,7}, + {70732,8}, + {1300705,3}, + {2147483647,8} + } + }, + {8,8,{ + {118,1}, + {146,4}, + {16760,1}, + {36364,6}, + {136239,8}, + {315710,7}, + {3220366,3}, + {2147483647,8} + } + }, + {16,7,{ + {934,1}, + {1160,6}, + {15505,1}, + {52730,2}, + {300705,8}, + {563680,7}, + {2147483647,3} + } + }, + {2147483647,11,{ + {5,6}, + {11,4}, + {182,1}, + {700,6}, + {1450,4}, + {11146,1}, + {25539,6}, + {37634,4}, + {93784,6}, + {817658,2}, + {2147483647,3} + } + } + } + }, + {2,{ + { 4,6,{ + {2084,7}, + {15216,1}, + {99715,7}, + {168666,3}, + {363889,2}, + {2147483647,7} + } + }, + { 8,6,{ + {14978,1}, + {66879,2}, + {179296,8}, + {304801,3}, + {704509,7}, + {2147483647,2} + } + }, + { 16,6,{ + {16405,1}, + {81784,2}, + {346385,8}, + {807546,7}, + {1259854,2}, + {2147483647,3} + } + }, + { 32,4,{ + {8913,1}, + {103578,2}, + {615876,8}, + {2147483647,2} + } + }, + { 64,7,{ + {1000,1}, + {2249,2}, + {6029,1}, + {325357,2}, + {1470976,8}, + {2556670,7}, + {2147483647,3} + } + }, + { 128,5,{ + {664,1}, + {754706,2}, + {1663862,4}, + {3269097,2}, + {2147483647,7} + } + }, + { 2147483647,3,{ + {789,1}, + {2247589,2}, + {2147483647,8} + } + } + } + }, + {4,{ + { 4,4,{ + {5738,1}, + {197433,2}, + {593742,7}, + {2147483647,2} + } + }, + { 8,7,{ + {5655,1}, + {75166,2}, + {177639,8}, + {988014,3}, + {1643869,2}, + {2494859,8}, + {2147483647,2} + } + }, + { 16,7,{ + {587,1}, + {3941,2}, + {9003,1}, + {101469,2}, + {355768,8}, + {3341814,3}, + {2147483647,8} + } + }, + { 32,4,{ + {795,1}, + {146567,2}, + {732118,8}, + {2147483647,3} + } + }, + { 64,4,{ + {528,1}, + {221277,2}, + {1440737,8}, + {2147483647,3} + } + }, + { 128,4,{ + {481,1}, + {593833,2}, + {2962021,8}, + {2147483647,7} + } + }, + { 256,2,{ + {584,1}, + {2147483647,2} + } + }, + { 2147483647,3,{ + {604,1}, + {2997006,2}, + {2147483647,8} + } + } + } + }, + {8,{ + { 8,6,{ + {2560,1}, + {114230,6}, + {288510,8}, + {664038,2}, + {1339913,6}, + {2147483647,4} + } + }, + { 16,5,{ + {497,1}, + {54201,2}, + {356217,8}, + {3413609,3}, + {2147483647,8} + } + }, + { 32,5,{ + {377,1}, + {109745,2}, + {716514,8}, + {3976768,3}, + {2147483647,8} + } + }, + { 64,6,{ + {109,1}, + {649,5}, + {266080,2}, + {1493331,8}, + {2541403,7}, + {2147483647,3} + } + }, + { 128,4,{ + {7,1}, + {751,5}, + {408808,2}, + {2147483647,8} + } + }, + { 256,3,{ + {828,5}, + {909676,2}, + {2147483647,8} + } + }, + { 512,5,{ + {847,5}, + {1007066,2}, + {1068775,4}, + {2803389,2}, + {2147483647,8} + } + }, + { 2147483647,3,{ + {1974,5}, + {4007876,2}, + {2147483647,8} + } + } + } + }, + {16,{ + { 16,12,{ + {409,1}, + {768,6}, + {1365,4}, + {3071,6}, + {11299,2}, + {21746,6}, + {55629,2}, + {86065,4}, + {153867,2}, + {590560,6}, + {1448760,2}, + {2147483647,8}, + } + }, + { 32,8,{ + {6,1}, + {24,5}, + {86,1}, + {875,5}, + {74528,2}, + {813050,8}, + {1725981,7}, + {2147483647,8}, + } + }, + { 64,6,{ + {1018,5}, + {1217,6}, + {2370,5}, + {160654,2}, + {1885487,8}, + {2147483647,3}, + } + }, + { 128,4,{ + {2291,5}, + {434465,2}, + {3525103,8}, + {2147483647,7}, + } + }, + { 256,3,{ + {2189,5}, + {713154,2}, + {2147483647,8}, + } + }, + { 512,3,{ + {2140,5}, + {1235056,2}, + {2147483647,8}, + } + }, + { 2147483647,3,{ + {2153,5}, + {2629855,2}, + {2147483647,8}, + } + } } } }; @@ -137,6 +397,7 @@ MPI_Alltoall intel_tuning_table_element intel_alltoall_table[] = { + {1,{ { 2,1, { {2147483647,3} @@ -187,6 +448,192 @@ intel_tuning_table_element intel_alltoall_table[] = {2147483647,2} } } + } + }, + {2, { + { 4,4,{ + {1,2}, + {75,3}, + {131072,2}, + {2147483647,2} + } + }, + { 8,3,{ + {709,1}, + {131072,2}, + {2147483647,2} + } + }, + { 16,4,{ + {40048,2}, + {131072,3}, + {155927,3}, + {2147483647,4} + } + }, + { 32,7,{ + {105,1}, + {130,2}, + {1030,1}, + {58893,2}, + {131072,2}, + {271838,3}, + {2147483647,2} + } + }, + { 2147483647,8,{ + {521,1}, + {2032,4}, + {2412,2}, + {4112,4}, + {61620,2}, + {131072,3}, + {427408,3}, + {2147483647,4} + } + } + } + }, + {4,{ + { 8,3,{ + {512,1}, + {32768,2}, + {2147483647,2} + } + }, + { 64,8,{ + {7,1}, + {199,4}, + {764,1}, + {6409,4}, + {20026,2}, + {32768,3}, + {221643,4}, + {2147483647,3} + } + }, + { 2147483647,7,{ + {262,1}, + {7592,4}, + {22871,2}, + {32768,3}, + {47538,3}, + {101559,4}, + {2147483647,3} + } + } + } + }, + {8,{ + { 16,6,{ + {973,1}, + {5126,4}, + {16898,2}, + {32768,4}, + {65456,4}, + {2147483647,2} + } + }, + { 32,7,{ + {874,1}, + {6727,4}, + {17912,2}, + {32768,3}, + {41513,3}, + {199604,4}, + {2147483647,3} + } + }, + { 64,8,{ + {5,1}, + {114,4}, + {552,1}, + {8130,4}, + {32768,3}, + {34486,3}, + {160113,4}, + {2147483647,3} + } + }, + { 128,6,{ + {270,1}, + {3679,4}, + {32768,3}, + {64367,3}, + {146595,4}, + {2147483647,3} + } + }, + { 2147483647,4,{ + {133,1}, + {4017,4}, + {32768,3}, + {76351,4}, + {2147483647,3} + } + } + } + }, + {16,{ + { 32,7,{ + {963,1}, + {1818,4}, + {20007,2}, + {32768,4}, + {54296,4}, + {169735,3}, + {2147483647,2} + } + }, + { 64,11,{ + {17,1}, + {42,4}, + {592,1}, + {2015,4}, + {2753,2}, + {6496,3}, + {20402,4}, + {32768,3}, + {36246,3}, + {93229,4}, + {2147483647,3} + } + }, + { 128,9,{ + {18,1}, + {40,4}, + {287,1}, + {1308,4}, + {6842,1}, + {32768,3}, + {36986,3}, + {129081,4}, + {2147483647,3} + } + }, + { 256,7,{ + {135,1}, + {1538,4}, + {3267,1}, + {4132,3}, + {31469,4}, + {32768,3}, + {2147483647,3} + } + }, + { 2147483647,8,{ + {66,1}, + {1637,4}, + {2626,1}, + {4842,4}, + {32768,3}, + {33963,3}, + {72978,4}, + {2147483647,3} + } + } + } + } }; int (*intel_alltoall_functions_table[])(void *sbuf, int scount, MPI_Datatype sdtype, @@ -196,7 +643,7 @@ int (*intel_alltoall_functions_table[])(void *sbuf, int scount, smpi_coll_tuned_alltoall_bruck, smpi_coll_tuned_alltoall_mvapich2_scatter_dest, smpi_coll_tuned_alltoall_pair, - smpi_coll_tuned_alltoall_pair//Plum is proprietary ? (and super efficient) + smpi_coll_tuned_alltoall_mvapich2//Plum is proprietary ? (and super efficient) }; /*I_MPI_ADJUST_BARRIER @@ -228,6 +675,7 @@ int (*intel_barrier_functions_table[])(MPI_Comm comm) ={ intel_tuning_table_element intel_barrier_table[] = { + {1,{ {2,1, { {2147483647,2} @@ -253,6 +701,88 @@ intel_tuning_table_element intel_barrier_table[] = {2147483647,6} } } + } + }, + {2,{ + { 2,1,{ + {2147483647,1} + } + }, + { 4,1,{ + {2147483647,3} + } + }, + { 8,1,{ + {2147483647,5} + } + }, + { 32,1,{ + {2147483647,2} + } + }, + { 128,1,{ + {2147483647,3} + } + }, + { 2147483647,1,{ + {2147483647,4} + } + } + } + }, + {4,{ + { 4,1,{ + {2147483647,2} + } + }, + { 8,1,{ + {2147483647,5} + } + }, + { 32,1,{ + {2147483647,2} + } + }, + { 2147483647,1,{ + {2147483647,4} + } + } + } + }, + {8,{ + { 8,1,{ + {2147483647,1} + } + }, + { 32,1,{ + {2147483647,2} + } + }, + { 2147483647,1,{ + {2147483647,4} + } + } + } + }, + {16,{ + { 4,1,{ + {2147483647,2} + } + }, + { 8,1,{ + {2147483647,5} + } + }, + { 32,1,{ + {2147483647,2} + } + }, + { 2147483647,1,{ + {2147483647,4} + } + } + } + } }; @@ -278,13 +808,14 @@ int (*intel_bcast_functions_table[])(void *buff, int count, smpi_coll_tuned_bcast_NTSL, smpi_coll_tuned_bcast_SMP_binomial, //smpi_coll_tuned_bcast_scatter_rdb_allgather, - smpi_coll_tuned_bcast_NTSL, + smpi_coll_tuned_bcast_NTSL, smpi_coll_tuned_bcast_SMP_linear, smpi_coll_tuned_bcast_mvapich2,//we don't know shumilin's algo' }; intel_tuning_table_element intel_bcast_table[] = { + {1,{ {2,9, { {1,2}, @@ -323,6 +854,104 @@ intel_tuning_table_element intel_bcast_table[] = {2147483647,7} } } + } + }, + {2,{ + { 4,6,{ + {806,4}, + {18093,7}, + {51366,6}, + {182526,4}, + {618390,1}, + {2147483647,7} + } + }, + { 8,6,{ + {24,1}, + {74,4}, + {18137,1}, + {614661,7}, + {1284626,1}, + {2147483647,2} + } + }, + { 16,4,{ + {1,1}, + {158,7}, + {16955,1}, + {2147483647,7} + } + }, + { 32,3,{ + {242,7}, + {10345,1}, + {2147483647,7} + } + }, + { 2147483647,4,{ + {1,1}, + {737,7}, + {5340,1}, + {2147483647,7} + } + } + } + }, + {4,{ + { 8,4,{ + {256,4}, + {17181,1}, + {1048576,7}, + {2147483647,7} + } + }, + { 2147483647,1,{ + {2147483647,7} + } + } + } + }, + {8,{ + { 16,5,{ + {3,1}, + {318,7}, + {1505,1}, + {1048576,7}, + {2147483647,7} + } + }, + { 32,3,{ + {422,7}, + {851,1}, + {2147483647,7} + } + }, + { 64,3,{ + {468,7}, + {699,1}, + {2147483647,7} + } + }, + { 2147483647,1,{ + {2147483647,7} + } + } + } + }, + {16,{ + { 8,4,{ + {256,4}, + {17181,1}, + {1048576,7}, + {2147483647,7} + } + }, + { 2147483647,1,{ + {2147483647,7} + } + } + } + } }; @@ -346,18 +975,74 @@ int (*intel_reduce_functions_table[])(void *sendbuf, void *recvbuf, smpi_coll_tuned_reduce_mvapich2, smpi_coll_tuned_reduce_binomial, smpi_coll_tuned_reduce_mvapich2, - smpi_coll_tuned_reduce_binomial, + smpi_coll_tuned_reduce_mvapich2_two_level, smpi_coll_tuned_reduce_rab, smpi_coll_tuned_reduce_rab }; intel_tuning_table_element intel_reduce_table[] = { + {1,{ {2147483647,1, - { - {2147483647,1} - } + { + {2147483647,1} + } } + } + }, + {2,{ + { 2,1,{ + {2147483647,1} + } + }, + { 4,2,{ + {10541,3}, + {2147483647,1} + } + }, + { 2147483647,1,{ + {2147483647,1} + } + } + } + }, + {4,{ + { 256,1,{ + {2147483647,1} + } + }, + { 2147483647,2,{ + {45,3}, + {2147483647,1} + } + } + } + }, + {8,{ + { 512,1,{ + {2147483647,1} + } + }, + { 2147483647,3,{ + {5,1}, + {11882,3}, + {2147483647,1} + } + } + } + }, + {16,{ + { 256,1,{ + {2147483647,1} + } + }, + { 2147483647,2,{ + {45,3}, + {2147483647,1} + } + } + } + } }; /* I_MPI_ADJUST_REDUCE_SCATTER @@ -408,6 +1093,7 @@ int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf, intel_tuning_table_element intel_reduce_scatter_table[] = { + {1,{ {2,5, { {5,4}, @@ -477,6 +1163,318 @@ intel_tuning_table_element intel_reduce_scatter_table[] = {2147483647,5} } } + } + }, + {2,{ + { 2,2,{ + {6,1}, + {2147483647,2} + } + }, + { 4,7,{ + {5,4}, + {13,5}, + {59,3}, + {76,1}, + {91488,3}, + {680063,4}, + {2147483647,2} + } + }, + { 8,8,{ + {4,4}, + {11,5}, + {31,1}, + {69615,3}, + {202632,2}, + {396082,5}, + {1495696,4}, + {2147483647,2} + } + }, + { 16,1,{ + {4,4}, + {345,1}, + {79523,3}, + {2147483647,2} + } + }, + { 32,5,{ + {0,3}, + {4,4}, + {992,1}, + {71417,3}, + {2147483647,2} + } + }, + { 64,4,{ + {4,4}, + {1472,1}, + {196592,3}, + {2147483647,2} + } + }, + { 128,5,{ + {0,3}, + {4,4}, + {32892,1}, + {381072,3}, + {2147483647,2} + } + }, + { 2147483647,6,{ + {0,2}, + {4,4}, + {33262,1}, + {1571397,3}, + {2211398,5}, + {2147483647,4} + } + } + } + }, + {4,{ + { 4,7,{ + {12,4}, + {27,5}, + {49,3}, + {187,1}, + {405673,3}, + {594687,4}, + {2147483647,2} + } + }, + { 8,5,{ + {24,5}, + {155,1}, + {204501,3}, + {274267,5}, + {2147483647,4} + } + }, + { 16,6,{ + {63,1}, + {72,3}, + {264,1}, + {168421,3}, + {168421,4}, + {2147483647,2} + } + }, + { 32,10,{ + {0,3}, + {4,4}, + {12,1}, + {18,5}, + {419,1}, + {188739,3}, + {716329,4}, + {1365841,5}, + {2430194,2}, + {2147483647,4} + } + }, + { 64,8,{ + {0,3}, + {4,4}, + {17,5}, + {635,1}, + {202937,3}, + {308253,5}, + {1389874,4}, + {2147483647,2} + } + }, + { 128,8,{ + {0,3}, + {4,4}, + {16,5}, + {1238,1}, + {280097,3}, + {631434,5}, + {2605072,4}, + {2147483647,2} + } + }, + { 256,5,{ + {0,2}, + {4,4}, + {16,5}, + {2418,1}, + {2147483647,3} + } + }, + { 2147483647,6,{ + {0,2}, + {4,4}, + {16,5}, + {33182,1}, + {3763779,3}, + {2147483647,4} + } + } + } + }, + {8,{ + { 8,6,{ + {5,4}, + {494,1}, + {97739,3}, + {522836,2}, + {554174,5}, + {2147483647,2} + } + }, + { 16,8,{ + {5,4}, + {62,1}, + {94,3}, + {215,1}, + {185095,3}, + {454784,4}, + {607911,5}, + {2147483647,4} + } + }, + { 32,7,{ + {0,3}, + {4,4}, + {302,1}, + {250841,3}, + {665822,4}, + {1760980,5}, + {2147483647,4} + } + }, + { 64,8,{ + {0,3}, + {4,4}, + {41,5}, + {306,1}, + {332405,3}, + {1269189,4}, + {3712421,5}, + {2147483647,4} + } + }, + { 128,6,{ + {0,3}, + {4,4}, + {39,5}, + {526,1}, + {487878,3}, + {2147483647,4} + } + }, + { 256,8,{ + {0,2}, + {4,4}, + {36,5}, + {1382,1}, + {424162,3}, + {632881,5}, + {1127566,3}, + {2147483647,4} + } + }, + { 512,4,{ + {4,4}, + {34,5}, + {5884,1}, + {2147483647,3} + } + }, + { 2147483647,4,{ + {5,4}, + {32,5}, + {25105,1}, + {2147483647,3} + } + } + } + }, + {16,{ + { 4,7,{ + {12,4}, + {27,5}, + {49,3}, + {187,1}, + {405673,3}, + {594687,4}, + {2147483647,2} + } + }, + { 8,5,{ + {24,5}, + {155,1}, + {204501,3}, + {274267,5}, + {2147483647,4} + } + }, + { 16,6,{ + {63,1}, + {72,3}, + {264,1}, + {168421,3}, + {168421,4}, + {2147483647,2} + } + }, + { 32,10,{ + {0,3}, + {4,4}, + {12,1}, + {18,5}, + {419,1}, + {188739,3}, + {716329,4}, + {1365841,5}, + {2430194,2}, + {2147483647,4} + } + }, + { 64,8,{ + {0,3}, + {4,4}, + {17,5}, + {635,1}, + {202937,3}, + {308253,5}, + {1389874,4}, + {2147483647,2} + } + }, + { 128,8,{ + {0,3}, + {4,4}, + {16,5}, + {1238,1}, + {280097,3}, + {631434,5}, + {2605072,4}, + {2147483647,2} + } + }, + { 256,5,{ + {0,2}, + {4,4}, + {16,5}, + {2418,1}, + {2147483647,3} + } + }, + { 2147483647,6,{ + {0,2}, + {4,4}, + {16,5}, + {33182,1}, + {3763779,3}, + {2147483647,4} + } + } + } + } }; /* I_MPI_ADJUST_ALLGATHER @@ -504,6 +1502,7 @@ int (*intel_allgather_functions_table[])(void *sbuf, int scount, intel_tuning_table_element intel_allgather_table[] = { + {1,{ {4,11, { {1,4}, @@ -559,6 +1558,92 @@ intel_tuning_table_element intel_allgather_table[] = {2147483647,4} } } + } + }, + {2,{ + { 8,6,{ + {490,1}, + {558,2}, + {2319,1}, + {46227,3}, + {2215101,1}, + {2147483647,3} + } + }, + { 16,4,{ + {1005,1}, + {1042,2}, + {2059,1}, + {2147483647,3} + } + }, + { 2147483647,2,{ + {2454,1}, + {2147483647,3} + } + } + } + }, + {4,{ + { 8,2,{ + {2861,1}, + {2147483647,3} + } + }, + { 2147483647,2,{ + {605,1}, + {2147483647,3} + } + } + } + }, + {8,{ + { 16,4,{ + {66,1}, + {213,4}, + {514,1}, + {2147483647,3} + } + }, + { 32,4,{ + {91,1}, + {213,4}, + {514,1}, + {2147483647,3} + } + }, + { 64,4,{ + {71,1}, + {213,4}, + {514,1}, + {2147483647,3} + } + }, + { 128,2,{ + {305,1}, + {2147483647,3} + } + }, + { 2147483647,2,{ + {213,1}, + {2147483647,3} + } + } + } + }, + {16,{ + { 8,2,{ + {2861,1}, + {2147483647,3} + } + }, + { 2147483647,2,{ + {605,1}, + {2147483647,3} + } + } + } + } }; /* I_MPI_ADJUST_ALLGATHERV @@ -587,6 +1672,7 @@ int (*intel_allgatherv_functions_table[])(void *sbuf, int scount, intel_tuning_table_element intel_allgatherv_table[] = { + {1,{ {2,3, { {259668,3}, @@ -626,6 +1712,149 @@ intel_tuning_table_element intel_allgatherv_table[] = {2147483647,3} } } + } + }, + {2,{ + { 4,3,{ + {3147,1}, + {5622,2}, + {2147483647,3} + } + }, + { 8,3,{ + {975,1}, + {4158,2}, + {2147483647,3} + } + }, + { 16,2,{ + {2146,1}, + {2147483647,3} + } + }, + { 32,4,{ + {81,1}, + {414,2}, + {1190,1}, + {2147483647,3} + } + }, + { 2147483647,5,{ + {1,2}, + {3,1}, + {783,2}, + {1782,4}, + {2147483647,3} + } + } + } + }, + {4,{ + { 8,2,{ + {2554,1}, + {2147483647,3} + } + }, + { 16,4,{ + {272,1}, + {657,2}, + {2078,1}, + {2147483647,3} + } + }, + { 32,2,{ + {1081,1}, + {2147483647,3} + } + }, + { 64,2,{ + {547,1}, + {2147483647,3} + } + }, + { 2147483647,5,{ + {19,1}, + {239,2}, + {327,1}, + {821,4}, + {2147483647,3} + } + } + } + }, + {8,{ + { 16,3,{ + {55,1}, + {514,2}, + {2147483647,3} + } + }, + { 32,4,{ + {53,1}, + {167,4}, + {514,2}, + {2147483647,3} + } + }, + { 64,3,{ + {13,1}, + {319,4}, + {2147483647,3} + } + }, + { 128,7,{ + {2,1}, + {11,2}, + {48,1}, + {201,2}, + {304,1}, + {1048,4}, + {2147483647,3} + } + }, + { 2147483647,5,{ + {5,1}, + {115,4}, + {129,1}, + {451,4}, + {2147483647,3} + } + } + } + }, + {16,{ + { 8,2,{ + {2554,1}, + {2147483647,3} + } + }, + { 16,4,{ + {272,1}, + {657,2}, + {2078,1}, + {2147483647,3} + } + }, + { 32,2,{ + {1081,1}, + {2147483647,3} + } + }, + { 64,2,{ + {547,1}, + {2147483647,3} + } + }, + { 2147483647,5,{ + {19,1}, + {239,2}, + {327,1}, + {821,4}, + {2147483647,3} + } + } + } + } }; @@ -653,6 +1882,7 @@ int (*intel_gather_functions_table[])(void *sbuf, int scount, intel_tuning_table_element intel_gather_table[] = { + {1,{ {8,3, { {17561,3}, @@ -679,6 +1909,56 @@ intel_tuning_table_element intel_gather_table[] = {2147483647,1} } } + } + }, + {2,{ + {2147483647,1,{ + {2147483647,3} + } + } + } + }, + {4,{ + {2147483647,1,{ + {2147483647,3} + } + } + } + }, + {8,{ + { 16,1,{ + {2147483647,3} + } + }, + { 32,2,{ + {9,2}, + {2147483647,3} + } + }, + { 64,2,{ + {784,2}, + {2147483647,3} + } + }, + { 128,3,{ + {160,3}, + {655,2}, + {2147483647,3} + } + }, + { 2147483647,1,{ + {2147483647,3} + } + } + } + }, + {16,{ + {2147483647,1,{ + {2147483647,3} + } + } + } + } }; @@ -705,6 +1985,7 @@ int (*intel_scatter_functions_table[])(void *sbuf, int scount, intel_tuning_table_element intel_scatter_table[] = { + {1,{ {2,2, { {16391,1}, @@ -752,6 +2033,106 @@ intel_tuning_table_element intel_scatter_table[] = {2147483647,1} } } + } + }, + {2,{ + {2147483647,1,{ + {2147483647,3} + } + } + } + }, + {4,{ + { 8,1,{ + {2147483647,3} + } + }, + { 16,2,{ + {140,3}, + {1302,1}, + {2147483647,3} + } + }, + { 32,2,{ + {159,3}, + {486,1}, + {2147483647,3} + } + }, + { 64,2,{ + {149,1}, + {2147483647,3} + } + }, + { 2147483647,2,{ + {139,1}, + {2147483647,3} + } + } + } + }, + {8,{ + { 16,4,{ + {587,1}, + {1370,2}, + {2102,1}, + {2147483647,3} + } + }, + { 32,3,{ + {1038,1}, + {2065,2}, + {2147483647,3} + } + }, + { 64,3,{ + {515,1}, + {2069,2}, + {2147483647,3} + } + }, + { 128,3,{ + {284,1}, + {796,2}, + {2147483647,3} + } + }, + { 2147483647,2,{ + {139,1}, + {2147483647,3} + } + } + } + }, + {16,{ + { 8,1,{ + {2147483647,3} + } + }, + { 16,3,{ + {140,3}, + {1302,1}, + {2147483647,3} + } + }, + { 32,3,{ + {159,3}, + {486,1}, + {2147483647,3} + } + }, + { 64,2,{ + {149,1}, + {2147483647,3} + } + }, + { 2147483647,2,{ + {139,1}, + {2147483647,3} + } + } + } + } }; @@ -777,11 +2158,58 @@ int (*intel_alltoallv_functions_table[])(void *sbuf, int *scounts, int *sdisps, intel_tuning_table_element intel_alltoallv_table[] = { + {1,{ + {2147483647,1, + { + {2147483647,1} + } + } + } + }, + {2,{ {2147483647,1, { {2147483647,1} } } + } + }, + {4,{ + { 8,1,{ + {2147483647,1}//weirdly, intel reports the use of algo 0 here + } + }, + { 2147483647,2,{ + {4,1},//0 again + {2147483647,2} + } + } + } + }, + {8,{ + { 16,1,{ + {2147483647,1} + } + }, + { 2147483647,2,{ + {0,1},//weird again, only for 0-sized messages + {2147483647,2} + } + } + } + }, + {16,{ + { 8,1,{ + {2147483647,1}//0 + } + }, + { 2147483647,2,{ + {4,1},//0 + {2147483647,2} + } + } + } + } }; @@ -842,14 +2270,25 @@ ret smpi_coll_tuned_ ## cat ## _impi (COLL_UNPAREN args)\ int i =0;\ SIZECOMP_ ## cat\ i=0;\ - int j =0;\ - while(comm_size>=intel_ ## cat ## _table[i].max_num_proc\ - && i < INTEL_MAX_NB_THRESHOLDS)\ + int j =0, k=0;\ + if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){\ + smpi_comm_init_smp(comm);\ + }\ + int local_size=1;\ + if (smpi_comm_is_uniform(comm)) {\ + local_size = smpi_comm_size(smpi_comm_get_intra_comm(comm));\ + }\ + while(local_size!=intel_ ## cat ## _table[i].ppn\ + && i < INTEL_MAX_NB_PPN)\ i++;\ - while(block_dsize >=intel_ ## cat ## _table[i].elems[j].max_size\ - && j< intel_ ## cat ## _table[i].num_elems)\ + if(i==INTEL_MAX_NB_PPN) i=0;\ + while(comm_size>intel_ ## cat ## _table[i].elems[j].max_num_proc\ + && j < INTEL_MAX_NB_THRESHOLDS)\ j++;\ - return (intel_ ## cat ## _functions_table[intel_ ## cat ## _table[i].elems[j].algo-1]\ + while(block_dsize >=intel_ ## cat ## _table[i].elems[j].elems[k].max_size\ + && k< intel_ ## cat ## _table[i].elems[j].num_elems)\ + k++;\ + return (intel_ ## cat ## _functions_table[intel_ ## cat ## _table[i].elems[j].elems[k].algo-1]\ args2);\ }