node_sizes = smpi_comm_get_non_uniform_map(comm);
- displs = xbt_malloc(sizeof (int) * leader_comm_size);
- recvcnts = xbt_malloc(sizeof (int) * leader_comm_size);
+ displs = static_cast<int *>(xbt_malloc(sizeof (int) * leader_comm_size));
+ recvcnts = static_cast<int *>(xbt_malloc(sizeof (int) * leader_comm_size));
if (!displs || !recvcnts) {
return MPI_ERR_OTHER;
}
int dst_tree_root, rank_tree_root, last_recv_count = 0, num_procs_completed;
int offset, tmp_mask;
int tag = COLL_TAG_ALLGATHER;
- int mask = 1;
+ unsigned int mask = 1;
int success = 0;
int curr_count = recv_count;
int i, dst, send_base_offset, recv_base_offset, send_chunk, recv_chunk,
send_offset, recv_offset;
int tag = COLL_TAG_ALLGATHER;
- int mask;
+ unsigned int mask;
int curr_count;
// get size of the communicator, followed by rank
MPI_Datatype recvtype,
MPI_Comm comm)
{
- int j, i;
+ unsigned int j, i;
MPI_Status status;
MPI_Aint recvtype_extent, recvtype_true_extent, recvtype_true_lb;
- int curr_cnt, dst, total_count;
+ unsigned int curr_cnt, dst, total_count;
void *tmp_buf, *tmp_buf_rl;
unsigned int mask, dst_tree_root, my_tree_root, position,
send_offset, recv_offset, last_recv_cnt=0, nprocs_completed, k,
{
int mpi_errno = MPI_SUCCESS;
int newrank = 0;
- unsigned int mask, pof2;
- int dst, is_commutative, rem, newdst, i,
- send_idx, recv_idx, last_idx, send_cnt, recv_cnt, *cnts, *disps;
+ int mask, pof2, i, send_idx, recv_idx, last_idx, send_cnt;
+ int dst, is_commutative, rem, newdst,
+ recv_cnt, *cnts, *disps;
MPI_Aint true_lb, true_extent, extent;
void *tmp_buf, *tmp_buf_free;
/* homogeneous */
- unsigned int comm_size = smpi_comm_size(comm);
- unsigned int rank = smpi_comm_rank(comm);
+ int comm_size = smpi_comm_size(comm);
+ int rank = smpi_comm_rank(comm);
is_commutative = smpi_op_is_commute(op);
ptrdiff_t block_offset, max_real_segsize;
MPI_Request reqs[2] = {NULL, NULL};
const size_t segsize = 1 << 20; /* 1 MB */
- unsigned int size = smpi_comm_size(comm);
- unsigned int rank = smpi_comm_rank(comm);
+ int size = smpi_comm_size(comm);
+ int rank = smpi_comm_rank(comm);
XBT_DEBUG("coll:tuned:allreduce_intra_ring_segmented rank %d, count %d", rank, count);
MPI_Comm comm)
{
int tag = COLL_TAG_ALLREDUCE;
- unsigned int mask, pof2;
- int dst, newrank, rem, newdst, i,
- send_idx, recv_idx, last_idx, send_cnt, recv_cnt, *cnts, *disps;
+ unsigned int mask, pof2, i, recv_idx, last_idx, send_idx, send_cnt;
+ int dst, newrank, rem, newdst,
+ recv_cnt, *cnts, *disps;
MPI_Aint extent;
MPI_Status status;
void *tmp_buf = NULL;
unsigned int nprocs = smpi_comm_size(comm);
- unsigned int rank = smpi_comm_rank(comm);
+ int rank = smpi_comm_rank(comm);
extent = smpi_datatype_get_extent(dtype);
tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent);
void *recv, *tmp_buf;
- unsigned int rank = smpi_comm_rank(comm);
+ int rank = smpi_comm_rank(comm);
unsigned int nprocs = smpi_comm_size(comm);
if((nprocs&(nprocs-1)))
MPI_Request *preq;
size = smpi_comm_size(comm);
rank = smpi_comm_rank(comm);
- MPI_Request *ireqs= xbt_malloc(sizeof(MPI_Request) * size * 2);
+ MPI_Request *ireqs= static_cast<MPI_Request*>(xbt_malloc(sizeof(MPI_Request) * size * 2));
XBT_DEBUG(
"coll:tuned:alltoallv_intra_basic_linear rank %d", rank);
{
int count_by_segment = original_count;
size_t type_size;
- int segsize =1024 << 7;
+ size_t segsize =1024 << 7;
//mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
//mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
COLL_TUNED_COMPUTED_SEGCOUNT( segsize, type_size, count_by_segment );
- XBT_DEBUG("coll:tuned:bcast_intra_pipeline rank %d ss %5d type_size %lu count_by_segment %d",
+ XBT_DEBUG("coll:tuned:bcast_intra_pipeline rank %d ss %5zu type_size %lu count_by_segment %d",
smpi_comm_rank(comm), segsize, (unsigned long)type_size, count_by_segment);
int root,
MPI_Comm comm)
{
- int segsize ;
+ unsigned int segsize ;
int rank, size;
int segindex, i, lr, pair;
int segcount[2]; /* Number ompi_request_wait_allof elements sent with each segment */
node_sizes = smpi_comm_get_non_uniform_map(comm);
if (leader_comm_rank == leader_root) {
- displs = xbt_malloc(sizeof (int) * leader_comm_size);
- recvcnts = xbt_malloc(sizeof (int) * leader_comm_size);
+ displs = static_cast<int *>(xbt_malloc(sizeof (int) * leader_comm_size));
+ recvcnts = static_cast<int *>(xbt_malloc(sizeof (int) * leader_comm_size));
if (!displs || !recvcnts) {
mpi_errno = MPI_ERR_OTHER;
return mpi_errno;
for (i = size - 2; i >= 0; --i) {
if (rank == i)
- inbuf = sbuf;
+ inbuf = static_cast<char*>(sbuf);
else {
smpi_mpi_recv(origin, count, dtype, i, tag, comm, &status);
inbuf = origin;
/* Finally, fill up the src array */
if(recv_iter > 0) {
- knomial_reduce_src_array = smpi_get_tmp_sendbuffer(sizeof(int)*recv_iter);
+ knomial_reduce_src_array = static_cast<int*>(smpi_get_tmp_sendbuffer(sizeof(int)*recv_iter));
}
mask = orig_mask;
&dst, &expected_send_count, &expected_recv_count, &src_array);
if(expected_recv_count > 0 ) {
- tmp_buf = xbt_malloc(sizeof(void *)*expected_recv_count);
- requests = xbt_malloc(sizeof(MPI_Request)*expected_recv_count);
+ tmp_buf = static_cast<void**>(xbt_malloc(sizeof(void *)*expected_recv_count));
+ requests = static_cast<MPI_Request*>(xbt_malloc(sizeof(MPI_Request)*expected_recv_count));
for(k=0; k < expected_recv_count; k++ ) {
tmp_buf[k] = smpi_get_tmp_sendbuffer(count*(MAX(extent,true_extent)));
tmp_buf[k] = (void *)((char*)tmp_buf[k] - true_lb);
MPI_Type_extent(mpi_datatype, &typelng);
scrlng = typelng * count;
#ifdef NO_CACHE_OPTIMIZATION
- scr1buf = malloc(scrlng);
- scr2buf = malloc(scrlng);
- scr3buf = malloc(scrlng);
+ scr1buf = static_cast<char*>(xbt_malloc(scrlng));
+ scr2buf = static_cast<char*>(xbt_malloc(scrlng));
+ scr3buf = static_cast<char*>(xbt_malloc(scrlng));
#else
# ifdef SCR_LNG_OPTIM
scrlng = SCR_LNG_OPTIM(scrlng);
# endif
- scr2buf = malloc(3*scrlng); /* To test cache problems. */
+ scr2buf = static_cast<char*>(xbt_malloc(3*scrlng)); /* To test cache problems. */
scr1buf = scr2buf + 1*scrlng; /* scr1buf and scr3buf must not*/
scr3buf = scr2buf + 2*scrlng; /* be used for malloc because */
/* they are interchanged below.*/
}
# ifdef NO_CACHE_TESTING
- free(scr1buf); free(scr2buf); free(scr3buf);
+ xbt_free(scr1buf); xbt_free(scr2buf); xbt_free(scr3buf);
# else
- free(scr2buf); /* scr1buf and scr3buf are part of scr2buf */
+ xbt_free(scr2buf); /* scr1buf and scr3buf are part of scr2buf */
# endif
return(MPI_SUCCESS);
} /* new_prot */
size = total_count;
for (k = 0; k < log2_comm_size; ++k) {
/* use a double-buffering scheme to avoid local copies */
- char *incoming_data = (buf0_was_inout ? tmp_buf1 : tmp_buf0);
- char *outgoing_data = (buf0_was_inout ? tmp_buf0 : tmp_buf1);
+ char *incoming_data = static_cast<char*>(buf0_was_inout ? tmp_buf1 : tmp_buf0);
+ char *outgoing_data = static_cast<char*>(buf0_was_inout ? tmp_buf0 : tmp_buf1);
int peer = rank ^ (0x1 << k);
size /= 2;
if (root != leader_of_root) {
if (leader_comm_rank == leader_root) {
- displs = xbt_malloc(sizeof (int) * leader_comm_size);
- sendcnts = xbt_malloc(sizeof (int) * leader_comm_size);
+ displs = static_cast<int*>(xbt_malloc(sizeof (int) * leader_comm_size));
+ sendcnts = static_cast<int*>(xbt_malloc(sizeof (int) * leader_comm_size));
sendcnts[0] = node_sizes[0] * nbytes;
displs[0] = 0;
MPI_BYTE, leader_root, leader_comm);
} else {
if (leader_comm_rank == leader_root) {
- displs = xbt_malloc(sizeof (int) * leader_comm_size);
- sendcnts = xbt_malloc(sizeof (int) * leader_comm_size);
+ displs = static_cast<int*>(xbt_malloc(sizeof (int) * leader_comm_size));
+ sendcnts = static_cast<int*>(xbt_malloc(sizeof (int) * leader_comm_size));
sendcnts[0] = node_sizes[0] * sendcnt;
displs[0] = 0;
if (root != leader_of_root) {
if (leader_comm_rank == leader_root) {
- displs = xbt_malloc(sizeof (int) * leader_comm_size);
- sendcnts = xbt_malloc(sizeof (int) * leader_comm_size);
+ displs = static_cast<int*>(xbt_malloc(sizeof (int) * leader_comm_size));
+ sendcnts = static_cast<int*>(xbt_malloc(sizeof (int) * leader_comm_size));
sendcnts[0] = node_sizes[0] * nbytes;
displs[0] = 0;
MPI_BYTE, leader_root, leader_comm);
} else {
if (leader_comm_rank == leader_root) {
- displs = xbt_malloc(sizeof (int) * leader_comm_size);
- sendcnts = xbt_malloc(sizeof (int) * leader_comm_size);
+ displs = static_cast<int*>(xbt_malloc(sizeof (int) * leader_comm_size));
+ sendcnts = static_cast<int*>(xbt_malloc(sizeof (int) * leader_comm_size));
sendcnts[0] = node_sizes[0] * sendcnt;
displs[0] = 0;
#define INTEL_MAX_NB_PPN 5 /* 1 2 4 8 16 ppn */
typedef struct {
- int max_size;
+ unsigned int max_size;
int algo;
} intel_tuning_table_size_element;
size_t dsize, block_dsize;
communicator_size = smpi_comm_size(comm);
- int short_size=256;
- int medium_size=32768;
+ unsigned int short_size=256;
+ unsigned int medium_size=32768;
//short size and comm_size >=8 -> bruck
// medium size messages and (short messages for comm_size < 8), we
if(mv2_alltoall_table_ppn_conf==NULL)
init_mv2_alltoall_tables_stampede();
- int sendtype_size, recvtype_size, nbytes, comm_size;
+ int sendtype_size, recvtype_size, comm_size;
char * tmp_buf = NULL;
int mpi_errno=MPI_SUCCESS;
int range = 0;
sendtype_size=smpi_datatype_size(sendtype);
recvtype_size=smpi_datatype_size(recvtype);
- nbytes = sendtype_size * sendcount;
+ long nbytes = sendtype_size * sendcount;
/* check if safe to use partial subscription mode */
{
int mpi_errno = MPI_SUCCESS;
- int nbytes = 0, comm_size, recvtype_size;
+ long nbytes = 0, comm_size, recvtype_size;
int range = 0;
int partial_sub_ok = 0;
int conf_index = 0;
int range = 0;
int range_threshold = 0;
int range_intra_threshold = 0;
- int nbytes = 0;
+ long nbytes = 0;
int comm_size = 0;
int recvtype_size, sendtype_size;
int rank = -1;
int mpi_errno = MPI_SUCCESS;
int range = 0, comm_size, total_count, recvtype_size, i;
int range_threshold = 0;
- int nbytes = 0;
+ long nbytes = 0;
if(mv2_allgatherv_thresholds_table==NULL)
init_mv2_allgatherv_tables_stampede();
/* check if multiple threads are calling this collective function */
MPI_Aint sendtype_size = 0;
- int nbytes = 0;
+ long nbytes = 0;
int range = 0, range_threshold = 0, range_threshold_intra = 0;
int is_two_level = 0;
int is_commutative = 0;
int mpi_errno = MPI_SUCCESS;
int comm_size/*, rank*/;
int two_level_bcast = 1;
- size_t nbytes = 0;
+ long nbytes = 0;
int range = 0;
int range_threshold = 0;
int range_threshold_intra = 0;
/* } else {
MPIR_Pack_size_impl(1, datatype, &type_size);
}*/
- nbytes = (size_t) (count) * (type_size);
+ nbytes = (count) * (type_size);
/* Search for the corresponding system size inside the tuning table */
while ((range < (mv2_size_bcast_tuning_table - 1)) &&
int range_intra_threshold = 0;
int is_commutative, pof2;
int comm_size = 0;
- int nbytes = 0;
+ long nbytes = 0;
int sendtype_size;
int is_two_level = 0;
int range = 0;
int range_threshold = 0;
int is_commutative = 0;
- int *disps = xbt_malloc(comm_size * sizeof (int));
+ int *disps = static_cast<int*>(xbt_malloc(comm_size * sizeof (int)));
if(mv2_red_scat_thresholds_table==NULL)
init_mv2_reduce_scatter_tables_stampede();
mv2_alltoall_num_ppn_conf = 3;
if(smpi_coll_cleanup_callback==NULL)
smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
- mv2_alltoall_thresholds_table = xbt_malloc(sizeof(mv2_alltoall_tuning_table *)
- * mv2_alltoall_num_ppn_conf);
- table_ptrs = xbt_malloc(sizeof(mv2_alltoall_tuning_table *)
- * mv2_alltoall_num_ppn_conf);
- mv2_size_alltoall_tuning_table = xbt_malloc(sizeof(int) *
- mv2_alltoall_num_ppn_conf);
- mv2_alltoall_table_ppn_conf = xbt_malloc(mv2_alltoall_num_ppn_conf * sizeof(int));
+ mv2_alltoall_thresholds_table = static_cast<mv2_alltoall_tuning_table**>(xbt_malloc(sizeof(mv2_alltoall_tuning_table *)
+ * mv2_alltoall_num_ppn_conf));
+ table_ptrs = static_cast<mv2_alltoall_tuning_table**>(xbt_malloc(sizeof(mv2_alltoall_tuning_table *)
+ * mv2_alltoall_num_ppn_conf));
+ mv2_size_alltoall_tuning_table = static_cast<int*>(xbt_malloc(sizeof(int) *
+ mv2_alltoall_num_ppn_conf));
+ mv2_alltoall_table_ppn_conf = static_cast<int*>(xbt_malloc(mv2_alltoall_num_ppn_conf * sizeof(int)));
mv2_alltoall_table_ppn_conf[0] = 1;
mv2_size_alltoall_tuning_table[0] = 6;
mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_1ppn[] = {
agg_table_sum += mv2_size_alltoall_tuning_table[i];
}
mv2_alltoall_thresholds_table[0] =
- xbt_malloc(agg_table_sum * sizeof (mv2_alltoall_tuning_table));
+ static_cast<mv2_alltoall_tuning_table*>(xbt_malloc(agg_table_sum * sizeof (mv2_alltoall_tuning_table)));
memcpy(mv2_alltoall_thresholds_table[0], table_ptrs[0],
(sizeof(mv2_alltoall_tuning_table)
* mv2_size_alltoall_tuning_table[0]));
mv2_allgather_tuning_table **table_ptrs = NULL;
mv2_allgather_num_ppn_conf = 3;
mv2_allgather_thresholds_table
- = xbt_malloc(sizeof(mv2_allgather_tuning_table *)
- * mv2_allgather_num_ppn_conf);
- table_ptrs = xbt_malloc(sizeof(mv2_allgather_tuning_table *)
- * mv2_allgather_num_ppn_conf);
- mv2_size_allgather_tuning_table = xbt_malloc(sizeof(int) *
- mv2_allgather_num_ppn_conf);
+ = static_cast<mv2_allgather_tuning_table**>(xbt_malloc(sizeof(mv2_allgather_tuning_table *)
+ * mv2_allgather_num_ppn_conf));
+ table_ptrs = static_cast<mv2_allgather_tuning_table**>(xbt_malloc(sizeof(mv2_allgather_tuning_table *)
+ * mv2_allgather_num_ppn_conf));
+ mv2_size_allgather_tuning_table = static_cast<int*>(xbt_malloc(sizeof(int) *
+ mv2_allgather_num_ppn_conf));
mv2_allgather_table_ppn_conf
- = xbt_malloc(mv2_allgather_num_ppn_conf * sizeof(int));
+ = static_cast<int*>(xbt_malloc(mv2_allgather_num_ppn_conf * sizeof(int)));
mv2_allgather_table_ppn_conf[0] = 1;
mv2_size_allgather_tuning_table[0] = 6;
mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_1ppn[] = {
agg_table_sum += mv2_size_allgather_tuning_table[i];
}
mv2_allgather_thresholds_table[0] =
- xbt_malloc(agg_table_sum * sizeof (mv2_allgather_tuning_table));
+ static_cast<mv2_allgather_tuning_table*>(xbt_malloc(agg_table_sum * sizeof (mv2_allgather_tuning_table)));
memcpy(mv2_allgather_thresholds_table[0], table_ptrs[0],
(sizeof(mv2_allgather_tuning_table)
* mv2_size_allgather_tuning_table[0]));
if(smpi_coll_cleanup_callback==NULL)
smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
mv2_size_gather_tuning_table=7;
- mv2_gather_thresholds_table = xbt_malloc(mv2_size_gather_tuning_table*
- sizeof (mv2_gather_tuning_table));
+ mv2_gather_thresholds_table = static_cast<mv2_gather_tuning_table*>(xbt_malloc(mv2_size_gather_tuning_table*
+ sizeof (mv2_gather_tuning_table)));
mv2_gather_tuning_table mv2_tmp_gather_thresholds_table[]={
{16,
2,{{0, 524288, &MPIR_Gather_MV2_Direct},
if(smpi_coll_cleanup_callback==NULL)
smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
mv2_size_allgatherv_tuning_table = 6;
- mv2_allgatherv_thresholds_table = xbt_malloc(mv2_size_allgatherv_tuning_table *
- sizeof (mv2_allgatherv_tuning_table));
+ mv2_allgatherv_thresholds_table = static_cast<mv2_allgatherv_tuning_table*>(xbt_malloc(mv2_size_allgatherv_tuning_table *
+ sizeof (mv2_allgatherv_tuning_table)));
mv2_allgatherv_tuning_table mv2_tmp_allgatherv_thresholds_table[] = {
{
16,
if(smpi_coll_cleanup_callback==NULL)
smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
mv2_size_allreduce_tuning_table = 8;
- mv2_allreduce_thresholds_table = xbt_malloc(mv2_size_allreduce_tuning_table *
- sizeof (mv2_allreduce_tuning_table));
+ mv2_allreduce_thresholds_table = static_cast<mv2_allreduce_tuning_table*>(xbt_malloc(mv2_size_allreduce_tuning_table *
+ sizeof (mv2_allreduce_tuning_table)));
mv2_allreduce_tuning_table mv2_tmp_allreduce_thresholds_table[] = {
{
16,
if(smpi_coll_cleanup_callback==NULL)
smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
mv2_size_bcast_tuning_table=8;
- mv2_bcast_thresholds_table = xbt_malloc(mv2_size_bcast_tuning_table *
- sizeof (mv2_bcast_tuning_table));
+ mv2_bcast_thresholds_table = static_cast<mv2_bcast_tuning_table*>(xbt_malloc(mv2_size_bcast_tuning_table *
+ sizeof (mv2_bcast_tuning_table)));
mv2_bcast_tuning_table mv2_tmp_bcast_thresholds_table[]={
{
smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
/*Stampede*/
mv2_size_reduce_tuning_table = 8;
- mv2_reduce_thresholds_table = xbt_malloc(mv2_size_reduce_tuning_table *
- sizeof (mv2_reduce_tuning_table));
+ mv2_reduce_thresholds_table = static_cast<mv2_reduce_tuning_table*>(xbt_malloc(mv2_size_reduce_tuning_table *
+ sizeof (mv2_reduce_tuning_table)));
mv2_reduce_tuning_table mv2_tmp_reduce_thresholds_table[] = {
{
16,
if(smpi_coll_cleanup_callback==NULL)
smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
mv2_size_red_scat_tuning_table = 6;
- mv2_red_scat_thresholds_table = xbt_malloc(mv2_size_red_scat_tuning_table *
- sizeof (mv2_red_scat_tuning_table));
+ mv2_red_scat_thresholds_table = static_cast<mv2_red_scat_tuning_table*>(xbt_malloc(mv2_size_red_scat_tuning_table *
+ sizeof (mv2_red_scat_tuning_table)));
mv2_red_scat_tuning_table mv2_tmp_red_scat_thresholds_table[] = {
{
16,
mv2_scatter_tuning_table **table_ptrs = NULL;
mv2_scatter_num_ppn_conf = 3;
mv2_scatter_thresholds_table
- = xbt_malloc(sizeof(mv2_scatter_tuning_table *)
- * mv2_scatter_num_ppn_conf);
- table_ptrs = xbt_malloc(sizeof(mv2_scatter_tuning_table *)
- * mv2_scatter_num_ppn_conf);
- mv2_size_scatter_tuning_table = xbt_malloc(sizeof(int) *
- mv2_scatter_num_ppn_conf);
+ = static_cast<mv2_scatter_tuning_table**>(xbt_malloc(sizeof(mv2_scatter_tuning_table *)
+ * mv2_scatter_num_ppn_conf));
+ table_ptrs = static_cast<mv2_scatter_tuning_table**>(xbt_malloc(sizeof(mv2_scatter_tuning_table *)
+ * mv2_scatter_num_ppn_conf));
+ mv2_size_scatter_tuning_table = static_cast<int*>(xbt_malloc(sizeof(int) *
+ mv2_scatter_num_ppn_conf));
mv2_scatter_table_ppn_conf
- = xbt_malloc(mv2_scatter_num_ppn_conf * sizeof(int));
+ = static_cast<int*>(xbt_malloc(mv2_scatter_num_ppn_conf * sizeof(int)));
mv2_scatter_table_ppn_conf[0] = 1;
mv2_size_scatter_tuning_table[0] = 6;
mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_1ppn[] = {
agg_table_sum += mv2_size_scatter_tuning_table[i];
}
mv2_scatter_thresholds_table[0] =
- xbt_malloc(agg_table_sum * sizeof (mv2_scatter_tuning_table));
+ static_cast<mv2_scatter_tuning_table*>(xbt_malloc(agg_table_sum * sizeof (mv2_scatter_tuning_table)));
memcpy(mv2_scatter_thresholds_table[0], table_ptrs[0],
(sizeof(mv2_scatter_tuning_table)
* mv2_size_scatter_tuning_table[0]));
)
set(SMPI_SRC
- src/smpi/colls/allgather-2dmesh.c
- src/smpi/colls/allgather-3dmesh.c
- src/smpi/colls/allgather-GB.c
- src/smpi/colls/allgather-NTSLR-NB.c
- src/smpi/colls/allgather-NTSLR.c
- src/smpi/colls/allgather-SMP-NTS.c
- src/smpi/colls/allgather-bruck.c
- src/smpi/colls/allgather-loosely-lr.c
- src/smpi/colls/allgather-ompi-neighborexchange.c
- src/smpi/colls/allgather-pair.c
- src/smpi/colls/allgather-mvapich-smp.c
- src/smpi/colls/allgather-rdb.c
- src/smpi/colls/allgather-rhv.c
- src/smpi/colls/allgather-ring.c
- src/smpi/colls/allgather-smp-simple.c
- src/smpi/colls/allgather-spreading-simple.c
- src/smpi/colls/allgatherv-GB.c
- src/smpi/colls/allgatherv-mpich-rdb.c
- src/smpi/colls/allgatherv-mpich-ring.c
- src/smpi/colls/allgatherv-ompi-bruck.c
- src/smpi/colls/allgatherv-ompi-neighborexchange.c
- src/smpi/colls/allgatherv-pair.c
- src/smpi/colls/allgatherv-ring.c
- src/smpi/colls/allreduce-lr.c
- src/smpi/colls/allreduce-ompi-ring-segmented.c
- src/smpi/colls/allreduce-rab-rdb.c
- src/smpi/colls/allreduce-rab1.c
- src/smpi/colls/allreduce-rab2.c
- src/smpi/colls/allreduce-rdb.c
- src/smpi/colls/allreduce-redbcast.c
- src/smpi/colls/allreduce-smp-binomial-pipeline.c
- src/smpi/colls/allreduce-smp-binomial.c
- src/smpi/colls/allreduce-smp-rdb.c
- src/smpi/colls/allreduce-smp-rsag-lr.c
- src/smpi/colls/allreduce-smp-rsag-rab.c
- src/smpi/colls/allreduce-smp-rsag.c
- src/smpi/colls/allreduce-mvapich-rs.c
- src/smpi/colls/allreduce-mvapich-two-level.c
- src/smpi/colls/alltoall-2dmesh.c
- src/smpi/colls/alltoall-3dmesh.c
-# src/smpi/colls/alltoall-bruck.c
- src/smpi/colls/alltoall-pair-light-barrier.c
- src/smpi/colls/alltoall-pair-mpi-barrier.c
- src/smpi/colls/alltoall-pair-one-barrier.c
- src/smpi/colls/alltoall-pair.c
- src/smpi/colls/alltoall-rdb.c
- src/smpi/colls/alltoall-ring-light-barrier.c
- src/smpi/colls/alltoall-ring-mpi-barrier.c
- src/smpi/colls/alltoall-ring-one-barrier.c
- src/smpi/colls/alltoall-ring.c
- src/smpi/colls/alltoall-mvapich-scatter-dest.c
- src/smpi/colls/alltoallv-bruck.c
- src/smpi/colls/alltoallv-ompi-basic-linear.c
- src/smpi/colls/alltoallv-pair-light-barrier.c
- src/smpi/colls/alltoallv-pair-mpi-barrier.c
- src/smpi/colls/alltoallv-pair-one-barrier.c
- src/smpi/colls/alltoallv-pair.c
- src/smpi/colls/alltoallv-ring-light-barrier.c
- src/smpi/colls/alltoallv-ring-mpi-barrier.c
- src/smpi/colls/alltoallv-ring-one-barrier.c
- src/smpi/colls/alltoallv-ring.c
- src/smpi/colls/barrier-ompi.c
- src/smpi/colls/barrier-mvapich2-pair.c
- src/smpi/colls/bcast-NTSB.c
- src/smpi/colls/bcast-NTSL-Isend.c
- src/smpi/colls/bcast-NTSL.c
- src/smpi/colls/bcast-SMP-binary.c
- src/smpi/colls/bcast-SMP-binomial.c
- src/smpi/colls/bcast-SMP-linear.c
- src/smpi/colls/bcast-arrival-pattern-aware-wait.c
- src/smpi/colls/bcast-arrival-pattern-aware.c
- src/smpi/colls/bcast-arrival-scatter.c
- src/smpi/colls/bcast-binomial-tree.c
- src/smpi/colls/bcast-flattree-pipeline.c
- src/smpi/colls/bcast-flattree.c
- src/smpi/colls/bcast-ompi-pipeline.c
- src/smpi/colls/bcast-ompi-split-bintree.c
- src/smpi/colls/bcast-mvapich-smp.c
- src/smpi/colls/bcast-scatter-LR-allgather.c
- src/smpi/colls/bcast-scatter-rdb-allgather.c
- src/smpi/colls/coll_tuned_topo.c
- src/smpi/colls/colls_global.c
- src/smpi/colls/gather-ompi.c
- src/smpi/colls/gather-mvapich.c
- src/smpi/colls/reduce-NTSL.c
- src/smpi/colls/reduce-arrival-pattern-aware.c
- src/smpi/colls/reduce-binomial.c
- src/smpi/colls/reduce-flat-tree.c
- src/smpi/colls/reduce-ompi.c
- src/smpi/colls/reduce-scatter-gather.c
- src/smpi/colls/reduce_scatter-mpich.c
- src/smpi/colls/reduce_scatter-ompi.c
- src/smpi/colls/reduce-mvapich-knomial.c
- src/smpi/colls/reduce-mvapich-two-level.c
- src/smpi/colls/reduce-rab.c
- src/smpi/colls/scatter-ompi.c
- src/smpi/colls/scatter-mvapich-two-level.c
+ src/smpi/colls/allgather-2dmesh.cpp
+ src/smpi/colls/allgather-3dmesh.cpp
+ src/smpi/colls/allgather-GB.cpp
+ src/smpi/colls/allgather-NTSLR-NB.cpp
+ src/smpi/colls/allgather-NTSLR.cpp
+ src/smpi/colls/allgather-SMP-NTS.cpp
+ src/smpi/colls/allgather-bruck.cpp
+ src/smpi/colls/allgather-loosely-lr.cpp
+ src/smpi/colls/allgather-ompi-neighborexchange.cpp
+ src/smpi/colls/allgather-pair.cpp
+ src/smpi/colls/allgather-mvapich-smp.cpp
+ src/smpi/colls/allgather-rdb.cpp
+ src/smpi/colls/allgather-rhv.cpp
+ src/smpi/colls/allgather-ring.cpp
+ src/smpi/colls/allgather-smp-simple.cpp
+ src/smpi/colls/allgather-spreading-simple.cpp
+ src/smpi/colls/allgatherv-GB.cpp
+ src/smpi/colls/allgatherv-mpich-rdb.cpp
+ src/smpi/colls/allgatherv-mpich-ring.cpp
+ src/smpi/colls/allgatherv-ompi-bruck.cpp
+ src/smpi/colls/allgatherv-ompi-neighborexchange.cpp
+ src/smpi/colls/allgatherv-pair.cpp
+ src/smpi/colls/allgatherv-ring.cpp
+ src/smpi/colls/allreduce-lr.cpp
+ src/smpi/colls/allreduce-ompi-ring-segmented.cpp
+ src/smpi/colls/allreduce-rab-rdb.cpp
+ src/smpi/colls/allreduce-rab1.cpp
+ src/smpi/colls/allreduce-rab2.cpp
+ src/smpi/colls/allreduce-rdb.cpp
+ src/smpi/colls/allreduce-redbcast.cpp
+ src/smpi/colls/allreduce-smp-binomial-pipeline.cpp
+ src/smpi/colls/allreduce-smp-binomial.cpp
+ src/smpi/colls/allreduce-smp-rdb.cpp
+ src/smpi/colls/allreduce-smp-rsag-lr.cpp
+ src/smpi/colls/allreduce-smp-rsag-rab.cpp
+ src/smpi/colls/allreduce-smp-rsag.cpp
+ src/smpi/colls/allreduce-mvapich-rs.cpp
+ src/smpi/colls/allreduce-mvapich-two-level.cpp
+ src/smpi/colls/alltoall-2dmesh.cpp
+ src/smpi/colls/alltoall-3dmesh.cpp
+# src/smpi/colls/alltoall-bruck.cpp
+ src/smpi/colls/alltoall-pair-light-barrier.cpp
+ src/smpi/colls/alltoall-pair-mpi-barrier.cpp
+ src/smpi/colls/alltoall-pair-one-barrier.cpp
+ src/smpi/colls/alltoall-pair.cpp
+ src/smpi/colls/alltoall-rdb.cpp
+ src/smpi/colls/alltoall-ring-light-barrier.cpp
+ src/smpi/colls/alltoall-ring-mpi-barrier.cpp
+ src/smpi/colls/alltoall-ring-one-barrier.cpp
+ src/smpi/colls/alltoall-ring.cpp
+ src/smpi/colls/alltoall-mvapich-scatter-dest.cpp
+ src/smpi/colls/alltoallv-bruck.cpp
+ src/smpi/colls/alltoallv-ompi-basic-linear.cpp
+ src/smpi/colls/alltoallv-pair-light-barrier.cpp
+ src/smpi/colls/alltoallv-pair-mpi-barrier.cpp
+ src/smpi/colls/alltoallv-pair-one-barrier.cpp
+ src/smpi/colls/alltoallv-pair.cpp
+ src/smpi/colls/alltoallv-ring-light-barrier.cpp
+ src/smpi/colls/alltoallv-ring-mpi-barrier.cpp
+ src/smpi/colls/alltoallv-ring-one-barrier.cpp
+ src/smpi/colls/alltoallv-ring.cpp
+ src/smpi/colls/barrier-ompi.cpp
+ src/smpi/colls/barrier-mvapich2-pair.cpp
+ src/smpi/colls/bcast-NTSB.cpp
+ src/smpi/colls/bcast-NTSL-Isend.cpp
+ src/smpi/colls/bcast-NTSL.cpp
+ src/smpi/colls/bcast-SMP-binary.cpp
+ src/smpi/colls/bcast-SMP-binomial.cpp
+ src/smpi/colls/bcast-SMP-linear.cpp
+ src/smpi/colls/bcast-arrival-pattern-aware-wait.cpp
+ src/smpi/colls/bcast-arrival-pattern-aware.cpp
+ src/smpi/colls/bcast-arrival-scatter.cpp
+ src/smpi/colls/bcast-binomial-tree.cpp
+ src/smpi/colls/bcast-flattree-pipeline.cpp
+ src/smpi/colls/bcast-flattree.cpp
+ src/smpi/colls/bcast-ompi-pipeline.cpp
+ src/smpi/colls/bcast-ompi-split-bintree.cpp
+ src/smpi/colls/bcast-mvapich-smp.cpp
+ src/smpi/colls/bcast-scatter-LR-allgather.cpp
+ src/smpi/colls/bcast-scatter-rdb-allgather.cpp
+ src/smpi/colls/coll_tuned_topo.cpp
+ src/smpi/colls/colls_global.cpp
+ src/smpi/colls/gather-ompi.cpp
+ src/smpi/colls/gather-mvapich.cpp
+ src/smpi/colls/reduce-NTSL.cpp
+ src/smpi/colls/reduce-arrival-pattern-aware.cpp
+ src/smpi/colls/reduce-binomial.cpp
+ src/smpi/colls/reduce-flat-tree.cpp
+ src/smpi/colls/reduce-ompi.cpp
+ src/smpi/colls/reduce-scatter-gather.cpp
+ src/smpi/colls/reduce_scatter-mpich.cpp
+ src/smpi/colls/reduce_scatter-ompi.cpp
+ src/smpi/colls/reduce-mvapich-knomial.cpp
+ src/smpi/colls/reduce-mvapich-two-level.cpp
+ src/smpi/colls/reduce-rab.cpp
+ src/smpi/colls/scatter-ompi.cpp
+ src/smpi/colls/scatter-mvapich-two-level.cpp
src/smpi/colls/smpi_automatic_selector.cpp
- src/smpi/colls/smpi_mpich_selector.c
- src/smpi/colls/smpi_intel_mpi_selector.c
- src/smpi/colls/smpi_openmpi_selector.c
- src/smpi/colls/smpi_mvapich2_selector.c
+ src/smpi/colls/smpi_mpich_selector.cpp
+ src/smpi/colls/smpi_intel_mpi_selector.cpp
+ src/smpi/colls/smpi_openmpi_selector.cpp
+ src/smpi/colls/smpi_mvapich2_selector.cpp
src/smpi/instr_smpi.cpp
src/smpi/smpi_base.cpp
src/smpi/smpi_bench.cpp