+ mv2_bcast_tuning_table mv2_tmp_bcast_thresholds_table[] = {
+ {16,
+ 8192,
+ 4,
+ 4,
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ 11,
+ {{0, 8, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {8, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16, 1024, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {1024, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {8192, 16384, &MPIR_Bcast_binomial_MV2, -1},
+ {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {65536, 131072, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
+ {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_MV2, -1},
+ {262144, 524288, &MPIR_Bcast_scatter_doubling_allgather_MV2, -1},
+ {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}},
+ 11,
+ {{0, 8, &MPIR_Shmem_Bcast_MV2, 2},
+ {8, 16, &MPIR_Shmem_Bcast_MV2, 4},
+ {16, 1024, &MPIR_Shmem_Bcast_MV2, 2},
+ {1024, 8192, &MPIR_Shmem_Bcast_MV2, 4},
+ {8192, 16384, &MPIR_Shmem_Bcast_MV2, -1},
+ {16384, 32768, &MPIR_Shmem_Bcast_MV2, 4},
+ {32768, 65536, &MPIR_Shmem_Bcast_MV2, 2},
+ {65536, 131072, &MPIR_Shmem_Bcast_MV2, -1},
+ {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
+ {262144, 524288, &MPIR_Shmem_Bcast_MV2, -1},
+ {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
+ {32,
+ 8192,
+ 4,
+ 4,
+ {1, 1, 1, 1, 1, 1, 1, 1},
+ 8,
+ {{0, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {256, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {65536, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {131072, 262144, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {524288, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8}},
+ 8,
+ {{0, 128, &MPIR_Shmem_Bcast_MV2, 2},
+ {128, 256, &MPIR_Shmem_Bcast_MV2, 4},
+ {256, 32768, &MPIR_Shmem_Bcast_MV2, 2},
+ {32768, 65536, &MPIR_Shmem_Bcast_MV2, 4},
+ {65536, 131072, &MPIR_Shmem_Bcast_MV2, 2},
+ {131072, 262144, &MPIR_Shmem_Bcast_MV2, 8},
+ {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
+ {524288, -1, &MPIR_Shmem_Bcast_MV2, 8}}},
+ {64,
+ 8192,
+ 4,
+ 4,
+ {1, 1, 1, 1, 1, 1, 1, 1, 1},
+ 9,
+ {{0, 2, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {2, 4, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {4, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {32, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {256, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {4096, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {32768, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2}},
+ 9,
+ {{0, 2, &MPIR_Shmem_Bcast_MV2, 4},
+ {2, 4, &MPIR_Shmem_Bcast_MV2, 8},
+ {4, 16, &MPIR_Shmem_Bcast_MV2, 4},
+ {16, 32, &MPIR_Shmem_Bcast_MV2, 8},
+ {32, 128, &MPIR_Shmem_Bcast_MV2, 4},
+ {128, 256, &MPIR_Shmem_Bcast_MV2, 8},
+ {256, 4096, &MPIR_Shmem_Bcast_MV2, 4},
+ {4096, 32768, &MPIR_Shmem_Bcast_MV2, 8},
+ {32768, -1, &MPIR_Shmem_Bcast_MV2, 2}}},
+ {128,
+ 8192,
+ 4,
+ 4,
+ {1, 1, 1, 0},
+ 4,
+ {{0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16384, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}},
+ 4,
+ {{0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
+ {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+ {16384, 524288, &MPIR_Shmem_Bcast_MV2, 2},
+ {524288, -1, NULL, -1}}},
+ {256,
+ 8192,
+ 4,
+ 4,
+ {1, 1, 1, 1, 1},
+ 5,
+ {{0, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
+ {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
+ 5,
+ {{0, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+ {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
+ {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
+ {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
+ {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
+ {512,
+ 8192,
+ 4,
+ 4,
+ {1, 1, 1, 1, 1},
+ 5,
+ {{0, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {131072, 262144, &MPIR_Pipelined_Bcast_MV2, -1},
+ {262144, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
+ 5,
+ {{0, 4096, &MPIR_Shmem_Bcast_MV2, 8},
+ {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+ {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
+ {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
+ {262144, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
+ {1024,
+ 8192,
+ 4,
+ 4,
+ {1, 1, 1, 1, 1},
+ 5,
+ {{0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16384, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {65536, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
+ {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
+ 5,
+ {{0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
+ {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+ {16384, 65536, &MPIR_Shmem_Bcast_MV2, 2},
+ {65536, 524288, &MPIR_Shmem_Bcast_MV2, -1},
+ {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
+ {2048,
+ 8192,
+ 4,
+ 4,
+ {1, 1, 1, 1, 1, 1, 1},
+ 7,
+ {{0, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {32, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {32768, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
+ {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
+ 7,
+ {{0, 16, &MPIR_Shmem_Bcast_MV2, 8},
+ {16, 32, &MPIR_Shmem_Bcast_MV2, 4},
+ {32, 4096, &MPIR_Shmem_Bcast_MV2, 8},
+ {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+ {16384, 32768, &MPIR_Shmem_Bcast_MV2, 2},
+ {32768, 524288, &MPIR_Shmem_Bcast_MV2, -1},
+ {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}}};
+
+ std::copy_n(mv2_tmp_bcast_thresholds_table, mv2_size_bcast_tuning_table, mv2_bcast_thresholds_table);