From 38ea8eebef9013b0666d21d82493a559b5a27ac6 Mon Sep 17 00:00:00 2001 From: Augustin Degomme Date: Wed, 20 Aug 2014 16:42:44 +0200 Subject: [PATCH] switch way old SMP aware algos work, to be closer to the ones from mvapich --- src/smpi/colls/allgather-SMP-NTS.c | 14 ++++++------- src/smpi/colls/allgather-loosely-lr.c | 15 +++++++------ src/smpi/colls/allgather-smp-simple.c | 14 ++++++------- .../colls/allreduce-smp-binomial-pipeline.c | 17 +++++++-------- src/smpi/colls/allreduce-smp-binomial.c | 17 +++++++-------- src/smpi/colls/allreduce-smp-rdb.c | 16 +++++++------- src/smpi/colls/allreduce-smp-rsag-lr.c | 17 +++++++-------- src/smpi/colls/allreduce-smp-rsag-rab.c | 16 +++++++------- src/smpi/colls/allreduce-smp-rsag.c | 17 +++++++-------- src/smpi/colls/bcast-SMP-binary.c | 19 ++++++++++------- src/smpi/colls/bcast-SMP-binomial.c | 21 ++++++++++--------- src/smpi/colls/bcast-SMP-linear.c | 18 +++++++++------- 12 files changed, 98 insertions(+), 103 deletions(-) diff --git a/src/smpi/colls/allgather-SMP-NTS.c b/src/smpi/colls/allgather-SMP-NTS.c index 81c1449a8c..d4838ed4a6 100644 --- a/src/smpi/colls/allgather-SMP-NTS.c +++ b/src/smpi/colls/allgather-SMP-NTS.c @@ -5,9 +5,6 @@ * under the terms of the license (GNU LGPL) which comes with this package. */ #include "colls_private.h" -#ifndef NUM_CORE -#define NUM_CORE 8 -#endif int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, MPI_Datatype stype, void *rbuf, @@ -25,10 +22,13 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, int i, send_offset, recv_offset; int intra_rank, inter_rank; - int num_core = simcall_host_get_core(SIMIX_host_self()); - // do we use the default one or the number of cores in the platform ? - // if the number of cores is one, the platform may be simulated with 1 node = 1 core - if (num_core == 1) num_core = NUM_CORE; + if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){ + smpi_comm_init_smp(comm); + } + int num_core=1; + if (smpi_comm_is_uniform(comm)){ + num_core = smpi_comm_size(smpi_comm_get_intra_comm(comm)); + } intra_rank = rank % num_core; diff --git a/src/smpi/colls/allgather-loosely-lr.c b/src/smpi/colls/allgather-loosely-lr.c index bed1abf68c..74df884b27 100644 --- a/src/smpi/colls/allgather-loosely-lr.c +++ b/src/smpi/colls/allgather-loosely-lr.c @@ -6,10 +6,6 @@ #include "colls_private.h" -#ifndef NUM_CORE -#define NUM_CORE 4 -#endif - int smpi_coll_tuned_allgather_loosely_lr(void *sbuf, int scount, MPI_Datatype stype, void *rbuf, int rcount, MPI_Datatype rtype, @@ -23,10 +19,13 @@ int smpi_coll_tuned_allgather_loosely_lr(void *sbuf, int scount, comm_size = smpi_comm_size(comm); - int num_core = simcall_host_get_core(SIMIX_host_self()); - // do we use the default one or the number of cores in the platform ? - // if the number of cores is one, the platform may be simulated with 1 node = 1 core - if (num_core == 1) num_core = NUM_CORE; +if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){ + smpi_comm_init_smp(comm); + } + int num_core=1; + if (smpi_comm_is_uniform(comm)){ + num_core = smpi_comm_size(smpi_comm_get_intra_comm(comm)); + } if(comm_size%num_core) THROWF(arg_error,0, "allgather loosely lr algorithm can't be used with non multiple of NUM_CORE=%d number of processes ! ",num_core); diff --git a/src/smpi/colls/allgather-smp-simple.c b/src/smpi/colls/allgather-smp-simple.c index 1a38857619..e54c75b5b9 100644 --- a/src/smpi/colls/allgather-smp-simple.c +++ b/src/smpi/colls/allgather-smp-simple.c @@ -5,9 +5,6 @@ * under the terms of the license (GNU LGPL) which comes with this package. */ #include "colls_private.h" -#ifndef NUM_CORE -#define NUM_CORE 8 -#endif int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, MPI_Datatype stype, void *recv_buf, @@ -17,10 +14,13 @@ int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, int src, dst, comm_size, rank; comm_size = smpi_comm_size(comm); - int num_core = simcall_host_get_core(SIMIX_host_self()); - // do we use the default one or the number of cores in the platform ? - // if the number of cores is one, the platform may be simulated with 1 node = 1 core - if (num_core == 1) num_core = NUM_CORE; + if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){ + smpi_comm_init_smp(comm); + } + int num_core=1; + if (smpi_comm_is_uniform(comm)){ + num_core = smpi_comm_size(smpi_comm_get_intra_comm(comm)); + } if(comm_size%num_core) THROWF(arg_error,0, "allgather SMP simple algorithm can't be used with non multiple of NUM_CORE=%d number of processes ! ", num_core); diff --git a/src/smpi/colls/allreduce-smp-binomial-pipeline.c b/src/smpi/colls/allreduce-smp-binomial-pipeline.c index 44dfcb77fe..e831be95fd 100644 --- a/src/smpi/colls/allreduce-smp-binomial-pipeline.c +++ b/src/smpi/colls/allreduce-smp-binomial-pipeline.c @@ -12,11 +12,7 @@ inter-communication The communication are done in a pipeline fashion */ -/* change number of core per smp-node - we assume that number of core per process will be the same for all implementations */ -#ifndef NUM_CORE -#define NUM_CORE 8 -#endif + /* this is a default segment size for pipelining, but it is typically passed as a command line argument */ @@ -52,10 +48,13 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, int tag = COLL_TAG_ALLREDUCE; int mask, src, dst; MPI_Status status; - int num_core = simcall_host_get_core(SIMIX_host_self()); - // do we use the default one or the number of cores in the platform ? - // if the number of cores is one, the platform may be simulated with 1 node = 1 core - if (num_core == 1) num_core = NUM_CORE; + if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){ + smpi_comm_init_smp(comm); + } + int num_core=1; + if (smpi_comm_is_uniform(comm)){ + num_core = smpi_comm_size(smpi_comm_get_intra_comm(comm)); + } comm_size = smpi_comm_size(comm); rank = smpi_comm_rank(comm); diff --git a/src/smpi/colls/allreduce-smp-binomial.c b/src/smpi/colls/allreduce-smp-binomial.c index 97183fb5d4..25f9837321 100644 --- a/src/smpi/colls/allreduce-smp-binomial.c +++ b/src/smpi/colls/allreduce-smp-binomial.c @@ -11,11 +11,6 @@ It uses 2-layer communication: binomial for both intra-communication inter-communication*/ -/* change number of core per smp-node - we assume that number of core per process will be the same for all implementations */ -#ifndef NUM_CORE -#define NUM_CORE 8 -#endif /* ** NOTE ** Use -DMPICH2 if this code does not compile. @@ -41,11 +36,13 @@ int smpi_coll_tuned_allreduce_smp_binomial(void *send_buf, void *recv_buf, int tag = COLL_TAG_ALLREDUCE; int mask, src, dst; - - int num_core = simcall_host_get_core(SIMIX_host_self()); - // do we use the default one or the number of cores in the platform ? - // if the number of cores is one, the platform may be simulated with 1 node = 1 core - if (num_core == 1) num_core = NUM_CORE; + if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){ + smpi_comm_init_smp(comm); + } + int num_core=1; + if (smpi_comm_is_uniform(comm)){ + num_core = smpi_comm_size(smpi_comm_get_intra_comm(comm)); + } MPI_Status status; comm_size=smpi_comm_size(comm); diff --git a/src/smpi/colls/allreduce-smp-rdb.c b/src/smpi/colls/allreduce-smp-rdb.c index 5b67e5a975..635258c397 100644 --- a/src/smpi/colls/allreduce-smp-rdb.c +++ b/src/smpi/colls/allreduce-smp-rdb.c @@ -11,11 +11,6 @@ It uses 2-layer communication: binomial for intra-communication and rdb for inter-communication*/ -/* change number of core per smp-node - we assume that number of core per process will be the same for all implementations */ -#ifndef NUM_CORE -#define NUM_CORE 8 -#endif /* ** NOTE ** Use -DMPICH2 if this code does not compile. @@ -40,10 +35,13 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, int tag = COLL_TAG_ALLREDUCE; int mask, src, dst; MPI_Status status; - int num_core = simcall_host_get_core(SIMIX_host_self()); - // do we use the default one or the number of cores in the platform ? - // if the number of cores is one, the platform may be simulated with 1 node = 1 core - if (num_core == 1) num_core = NUM_CORE; + if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){ + smpi_comm_init_smp(comm); + } + int num_core=1; + if (smpi_comm_is_uniform(comm)){ + num_core = smpi_comm_size(smpi_comm_get_intra_comm(comm)); + } /* #ifdef MPICH2_REDUCTION MPI_User_function * uop = MPIR_Op_table[op % 16 - 1]; diff --git a/src/smpi/colls/allreduce-smp-rsag-lr.c b/src/smpi/colls/allreduce-smp-rsag-lr.c index 746eecb33e..daa2b416de 100644 --- a/src/smpi/colls/allreduce-smp-rsag-lr.c +++ b/src/smpi/colls/allreduce-smp-rsag-lr.c @@ -7,12 +7,6 @@ #include "colls_private.h" //#include -/* change number of core per smp-node - we assume that number of core per process will be the same for all implementations */ -#ifndef NUM_CORE -#define NUM_CORE 8 -#endif - /* This fucntion performs all-reduce operation as follow. 1) binomial_tree reduce inside each SMP node @@ -29,10 +23,13 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, int tag = COLL_TAG_ALLREDUCE; int mask, src, dst; MPI_Status status; - int num_core = simcall_host_get_core(SIMIX_host_self()); - // do we use the default one or the number of cores in the platform ? - // if the number of cores is one, the platform may be simulated with 1 node = 1 core - if (num_core == 1) num_core = NUM_CORE; + if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){ + smpi_comm_init_smp(comm); + } + int num_core=1; + if (smpi_comm_is_uniform(comm)){ + num_core = smpi_comm_size(smpi_comm_get_intra_comm(comm)); + } /* #ifdef MPICH2_REDUCTION MPI_User_function * uop = MPIR_Op_table[op % 16 - 1]; diff --git a/src/smpi/colls/allreduce-smp-rsag-rab.c b/src/smpi/colls/allreduce-smp-rsag-rab.c index ff921155ab..a3a11309b8 100644 --- a/src/smpi/colls/allreduce-smp-rsag-rab.c +++ b/src/smpi/colls/allreduce-smp-rsag-rab.c @@ -10,11 +10,6 @@ #include "colls_private.h" //#include -/* change number of core per smp-node - we assume that number of core per process will be the same for all implementations */ -#ifndef NUM_CORE -#define NUM_CORE 8 -#endif /* This fucntion performs all-reduce operation as follow. @@ -32,10 +27,13 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, int tag = COLL_TAG_ALLREDUCE; int mask, src, dst; MPI_Status status; - int num_core = simcall_host_get_core(SIMIX_host_self()); - // do we use the default one or the number of cores in the platform ? - // if the number of cores is one, the platform may be simulated with 1 node = 1 core - if (num_core == 1) num_core = NUM_CORE; + if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){ + smpi_comm_init_smp(comm); + } + int num_core=1; + if (smpi_comm_is_uniform(comm)){ + num_core = smpi_comm_size(smpi_comm_get_intra_comm(comm)); + } comm_size = smpi_comm_size(comm); diff --git a/src/smpi/colls/allreduce-smp-rsag.c b/src/smpi/colls/allreduce-smp-rsag.c index 8d751c88da..1bc921ccc3 100644 --- a/src/smpi/colls/allreduce-smp-rsag.c +++ b/src/smpi/colls/allreduce-smp-rsag.c @@ -6,12 +6,6 @@ #include "colls_private.h" -/* change number of core per smp-node - we assume that number of core per process will be the same for all implementations */ -#ifndef NUM_CORE -#define NUM_CORE 8 -#endif - /* This fucntion performs all-reduce operation as follow. 1) binomial_tree reduce inside each SMP node @@ -28,10 +22,13 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, int tag = COLL_TAG_ALLREDUCE; int mask, src, dst; MPI_Status status; - int num_core = simcall_host_get_core(SIMIX_host_self()); - // do we use the default one or the number of cores in the platform ? - // if the number of cores is one, the platform may be simulated with 1 node = 1 core - if (num_core == 1) num_core = NUM_CORE; + if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){ + smpi_comm_init_smp(comm); + } + int num_core=1; + if (smpi_comm_is_uniform(comm)){ + num_core = smpi_comm_size(smpi_comm_get_intra_comm(comm)); + } /* #ifdef MPICH2_REDUCTION MPI_User_function * uop = MPIR_Op_table[op % 16 - 1]; diff --git a/src/smpi/colls/bcast-SMP-binary.c b/src/smpi/colls/bcast-SMP-binary.c index 04b40b2240..5264ca3648 100644 --- a/src/smpi/colls/bcast-SMP-binary.c +++ b/src/smpi/colls/bcast-SMP-binary.c @@ -5,9 +5,7 @@ * under the terms of the license (GNU LGPL) which comes with this package. */ #include "colls_private.h" -#ifndef NUM_CORE -#define NUM_CORE 8 -#endif + int bcast_SMP_binary_segment_byte = 8192; @@ -27,10 +25,17 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, rank = smpi_comm_rank(comm); size = smpi_comm_size(comm); - int host_num_core = simcall_host_get_core(SIMIX_host_self()); - // do we use the default one or the number of cores in the platform ? - // if the number of cores is one, the platform may be simulated with 1 node = 1 core - if (host_num_core == 1) host_num_core = NUM_CORE; + if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){ + smpi_comm_init_smp(comm); + } + int host_num_core=1; + if (smpi_comm_is_uniform(comm)){ + host_num_core = smpi_comm_size(smpi_comm_get_intra_comm(comm)); + }else{ + //implementation buggy in this case + return smpi_coll_tuned_bcast_mpich( buf , count, datatype, + root, comm); + } int segment = bcast_SMP_binary_segment_byte / extent; int pipe_length = count / segment; diff --git a/src/smpi/colls/bcast-SMP-binomial.c b/src/smpi/colls/bcast-SMP-binomial.c index a45dca5fe4..6053e49c7a 100644 --- a/src/smpi/colls/bcast-SMP-binomial.c +++ b/src/smpi/colls/bcast-SMP-binomial.c @@ -5,9 +5,6 @@ * under the terms of the license (GNU LGPL) which comes with this package. */ #include "colls_private.h" -#ifndef NUM_CORE -#define NUM_CORE 8 -#endif int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count, MPI_Datatype datatype, int root, @@ -22,13 +19,17 @@ int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count, size = smpi_comm_size(comm); rank = smpi_comm_rank(comm); - int num_core = simcall_host_get_core(SIMIX_host_self()); - // do we use the default one or the number of cores in the platform ? - // if the number of cores is one, the platform may be simulated with 1 node = 1 core - if (num_core == 1) num_core = NUM_CORE; - - if(size%num_core) - THROWF(arg_error,0, "bcast SMP binomial can't be used with non multiple of NUM_CORE=%d number of processes ! ",num_core); + if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){ + smpi_comm_init_smp(comm); + } + int num_core=1; + if (smpi_comm_is_uniform(comm)){ + num_core = smpi_comm_size(smpi_comm_get_intra_comm(comm)); + }else{ + //implementation buggy in this case + return smpi_coll_tuned_bcast_mpich( buf , count, datatype, + root, comm); + } int to_intra, to_inter; int from_intra, from_inter; diff --git a/src/smpi/colls/bcast-SMP-linear.c b/src/smpi/colls/bcast-SMP-linear.c index ee7e3d2a66..b3f9b6a630 100644 --- a/src/smpi/colls/bcast-SMP-linear.c +++ b/src/smpi/colls/bcast-SMP-linear.c @@ -5,9 +5,6 @@ * under the terms of the license (GNU LGPL) which comes with this package. */ #include "colls_private.h" -#ifndef NUM_CORE -#define NUM_CORE 8 -#endif int bcast_SMP_linear_segment_byte = 8192; @@ -27,10 +24,17 @@ int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count, rank = smpi_comm_rank(comm); size = smpi_comm_size(comm); - int num_core = simcall_host_get_core(SIMIX_host_self()); - // do we use the default one or the number of cores in the platform ? - // if the number of cores is one, the platform may be simulated with 1 node = 1 core - if (num_core == 1) num_core = NUM_CORE; + if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){ + smpi_comm_init_smp(comm); + } + int num_core=1; + if (smpi_comm_is_uniform(comm)){ + num_core = smpi_comm_size(smpi_comm_get_intra_comm(comm)); + }else{ + //implementation buggy in this case + return smpi_coll_tuned_bcast_mpich( buf , count, datatype, + root, comm); + } int segment = bcast_SMP_linear_segment_byte / extent; segment = segment == 0 ? 1 :segment; -- 2.20.1