> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'network/TCP_gamma' to '4194304'
> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'smpi/cpu_threshold' to '-1'
> [Tremblay:0:(1) 0.000000] [smpi_replay/VERBOSE] 0 bcast 5e4 0.000000
-> [Jupiter:1:(2) 0.015036] [smpi_replay/VERBOSE] 1 bcast 5e4 0.015036
-> [Fafard:2:(3) 0.015676] [smpi_replay/VERBOSE] 2 bcast 5e4 0.015676
-> [Jupiter:1:(2) 2.636405] [smpi_replay/VERBOSE] 1 compute 2e8 2.621369
+> [Fafard:2:(3) 0.124241] [smpi_replay/VERBOSE] 2 bcast 5e4 0.124241
+> [Jupiter:1:(2) 0.124241] [smpi_replay/VERBOSE] 1 bcast 5e4 0.124241
+> [Jupiter:1:(2) 2.745611] [smpi_replay/VERBOSE] 1 compute 2e8 2.621369
> [Tremblay:0:(1) 5.097100] [smpi_replay/VERBOSE] 0 compute 5e8 5.097100
> [Tremblay:0:(1) 5.097100] [smpi_replay/VERBOSE] 0 bcast 5e4 0.000000
-> [Jupiter:1:(2) 5.112136] [smpi_replay/VERBOSE] 1 bcast 5e4 2.475730
-> [Fafard:2:(3) 6.569099] [smpi_replay/VERBOSE] 2 compute 5e8 6.553424
-> [Fafard:2:(3) 6.584775] [smpi_replay/VERBOSE] 2 bcast 5e4 0.015676
-> [Jupiter:1:(2) 7.733505] [smpi_replay/VERBOSE] 1 compute 2e8 2.621369
+> [Fafard:2:(3) 6.677665] [smpi_replay/VERBOSE] 2 compute 5e8 6.553424
+> [Fafard:2:(3) 6.801906] [smpi_replay/VERBOSE] 2 bcast 5e4 0.124241
+> [Jupiter:1:(2) 6.801906] [smpi_replay/VERBOSE] 1 bcast 5e4 4.056296
+> [Jupiter:1:(2) 9.423276] [smpi_replay/VERBOSE] 1 compute 2e8 2.621369
> [Tremblay:0:(1) 10.194200] [smpi_replay/VERBOSE] 0 compute 5e8 5.097100
-> [Fafard:2:(3) 13.138198] [smpi_replay/VERBOSE] 2 compute 5e8 6.553424
-> [Jupiter:1:(2) 14.286929] [smpi_replay/VERBOSE] 1 reduce 5e4 5e8 6.553424
-> [Tremblay:0:(1) 18.250974] [smpi_replay/VERBOSE] 0 reduce 5e4 5e8 8.056774
-> [Fafard:2:(3) 19.691622] [smpi_replay/VERBOSE] 2 reduce 5e4 5e8 6.553424
-> [Fafard:2:(3) 19.691622] [smpi_replay/INFO] Simulation time 19.691622
+> [Fafard:2:(3) 13.355330] [smpi_replay/VERBOSE] 2 compute 5e8 6.553424
+> [Jupiter:1:(2) 15.976699] [smpi_replay/VERBOSE] 1 reduce 5e4 5e8 6.553424
+> [Tremblay:0:(1) 18.468105] [smpi_replay/VERBOSE] 0 reduce 5e4 5e8 8.273906
+> [Fafard:2:(3) 19.908753] [smpi_replay/VERBOSE] 2 reduce 5e4 5e8 6.553424
+> [Fafard:2:(3) 19.908753] [smpi_replay/INFO] Simulation time 19.908753
+
$ rm -f replay/one_trace
> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'tracing/smpi' to 'yes'
> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'tracing/smpi/computing' to 'yes'
> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'smpi/cpu_threshold' to '-1'
-> [Fafard:2:(3) 19.691622] [smpi_replay/INFO] Simulation time 19.691622
+> [Fafard:2:(3) 19.908753] [smpi_replay/INFO] Simulation time 19.908753
+
$ rm -f replay/one_trace
> 12 0 2 2 6
> 13 0 2 1
> 12 0 2 1 4
-> 13 0.015036 2 2
-> 12 0.015036 2 2 4
-> 13 0.015676 2 3
-> 12 0.015676 2 3 4
-> 13 2.636405 2 2
-> 12 2.636405 2 2 6
+> 13 0.124241 2 3
+> 12 0.124241 2 3 4
+> 13 0.124241 2 2
+> 12 0.124241 2 2 4
+> 13 2.745611 2 2
+> 12 2.745611 2 2 6
> 13 5.097100 2 1
> 12 5.097100 2 1 6
> 13 5.097100 2 1
> 12 5.097100 2 1 4
> 5 7 2 action_reduce "0 1 0"
-> 13 5.112136 2 2
-> 12 5.112136 2 2 4
-> 13 6.569099 2 3
-> 12 6.569099 2 3 6
-> 13 6.584775 2 3
-> 12 6.584775 2 3 4
-> 13 7.733505 2 2
-> 12 7.733505 2 2 7
+> 13 6.677665 2 3
+> 12 6.677665 2 3 6
+> 13 6.801906 2 3
+> 12 6.801906 2 3 4
+> 13 6.801906 2 2
+> 12 6.801906 2 2 4
+> 13 9.423276 2 2
+> 12 9.423276 2 2 7
> 13 10.194200 2 1
> 12 10.194200 2 1 7
-> 13 13.138198 2 3
-> 12 13.138198 2 3 7
+> 13 13.355330 2 3
+> 12 13.355330 2 3 7
> 5 8 2 smpi_replay_run_finalize "0 1 0"
-> 13 14.286929 2 2
-> 12 14.286929 2 2 8
-> 13 18.250974 2 1
-> 12 18.250974 2 1 8
-> 13 19.691622 2 3
-> 12 19.691622 2 3 8
-> 13 19.691622 2 3
-> 7 19.691622 1 3
-> 13 19.691622 2 2
-> 7 19.691622 1 2
-> 13 19.691622 2 1
-> 7 19.691622 1 1
+> 13 15.976699 2 2
+> 12 15.976699 2 2 8
+> 13 18.468105 2 1
+> 12 18.468105 2 1 8
+> 13 19.908753 2 3
+> 12 19.908753 2 3 8
+> 13 19.908753 2 3
+> 7 19.908753 1 3
+> 13 19.908753 2 2
+> 7 19.908753 1 2
+> 13 19.908753 2 1
+> 7 19.908753 1 1
+
> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'smpi/cpu_threshold' to '-1'
> [Tremblay:0:(1) 1.491472] [smpi_replay/VERBOSE] 0 send 1 1e7 1.491472
> [Jupiter:1:(2) 1.491472] [smpi_replay/VERBOSE] 1 recv 0 1e7 1.491472
-> [Tremblay:0:(1) 1.495453] [smpi_replay/VERBOSE] 0 barrier 0.003981
-> [Jupiter:1:(2) 1.498398] [smpi_replay/VERBOSE] 1 barrier 0.006926
-> [Fafard:2:(3) 1.499434] [smpi_replay/VERBOSE] 2 barrier 1.499434
-> [Tremblay:0:(1) 2.495453] [smpi_replay/VERBOSE] 0 compute 98095000 1.000000
-> [Jupiter:1:(2) 2.498398] [smpi_replay/VERBOSE] 1 compute 76296000 1.000000
-> [Fafard:2:(3) 2.499434] [smpi_replay/VERBOSE] 2 compute 76296000 1.000000
-> [Fafard:2:(3) 2.499434] [smpi_replay/INFO] Simulation time 2.499434
+> [Tremblay:0:(1) 1.494910] [smpi_replay/VERBOSE] 0 barrier 0.003438
+> [Fafard:2:(3) 1.496886] [smpi_replay/VERBOSE] 2 barrier 1.496886
+> [Jupiter:1:(2) 1.498347] [smpi_replay/VERBOSE] 1 barrier 0.006875
+> [Tremblay:0:(1) 2.494910] [smpi_replay/VERBOSE] 0 compute 98095000 1.000000
+> [Fafard:2:(3) 2.496886] [smpi_replay/VERBOSE] 2 compute 76296000 1.000000
+> [Jupiter:1:(2) 2.498347] [smpi_replay/VERBOSE] 1 compute 76296000 1.000000
+> [Jupiter:1:(2) 2.498347] [smpi_replay/INFO] Simulation time 2.498347
$ rm -f replay/one_trace
> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'network/TCP_gamma' to '4194304'
> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'smpi/cpu_threshold' to '-1'
> [Tremblay:0:(1) 5.112775] [smpi_replay/VERBOSE] 0 allReduce 5e4 5e8 5.112775
-> [Jupiter:1:(2) 6.584135] [smpi_replay/VERBOSE] 1 allReduce 5e4 5e8 6.584135
-> [Fafard:2:(3) 6.584775] [smpi_replay/VERBOSE] 2 allReduce 5e4 5e8 6.584775
+> [Jupiter:1:(2) 6.693341] [smpi_replay/VERBOSE] 1 allReduce 5e4 5e8 6.693341
+> [Fafard:2:(3) 6.693341] [smpi_replay/VERBOSE] 2 allReduce 5e4 5e8 6.693341
> [Tremblay:0:(1) 10.209875] [smpi_replay/VERBOSE] 0 compute 5e8 5.097100
-> [Jupiter:1:(2) 13.137559] [smpi_replay/VERBOSE] 1 compute 5e8 6.553424
-> [Fafard:2:(3) 13.138198] [smpi_replay/VERBOSE] 2 compute 5e8 6.553424
-> [Fafard:2:(3) 13.138198] [smpi_replay/INFO] Simulation time 13.138198
+> [Fafard:2:(3) 13.246764] [smpi_replay/VERBOSE] 2 compute 5e8 6.553424
+> [Jupiter:1:(2) 13.246764] [smpi_replay/VERBOSE] 1 compute 5e8 6.553424
+> [Jupiter:1:(2) 13.246764] [smpi_replay/INFO] Simulation time 13.246764
$ rm -f replay/one_trace
s_mpi_coll_description_t mpi_coll_barrier_description[] = {
{"default",
- "barrier default collective",
- smpi_mpi_barrier},
+ "barrier default collective - ompi selector",
+ smpi_coll_tuned_barrier_ompi},
COLL_BARRIERS(COLL_DESCRIPTION, COLL_COMMA),
{NULL, NULL, NULL} /* this array must be NULL terminated */
};
s_mpi_coll_description_t mpi_coll_bcast_description[] = {
{"default",
- "bcast default collective",
- smpi_mpi_bcast},
+ "bcast default collective - ompi selector",
+ smpi_coll_tuned_bcast_ompi},
COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA),
{NULL, NULL, NULL} /* this array must be NULL terminated */
};
int (*mpi_coll_reduce_scatter_fun)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op op,MPI_Comm comm);
int (*mpi_coll_scatter_fun)(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm);
int (*mpi_coll_barrier_fun)(MPI_Comm comm);
-struct s_proc_tree {
- int PROCTREE_A;
- int numChildren;
- int *child;
- int parent;
- int me;
- int root;
- int isRoot;
-};
-typedef struct s_proc_tree *proc_tree_t;
-
-/**
- * alloc and init
- **/
-static proc_tree_t alloc_tree(int arity)
-{
- proc_tree_t tree;
- int i;
-
- tree = xbt_new(struct s_proc_tree, 1);
- tree->PROCTREE_A = arity;
- tree->isRoot = 0;
- tree->numChildren = 0;
- tree->child = xbt_new(int, arity);
- for (i = 0; i < arity; i++) {
- tree->child[i] = -1;
- }
- tree->root = -1;
- tree->parent = -1;
- return tree;
-}
-
-/**
- * free
- **/
-static void free_tree(proc_tree_t tree)
-{
- xbt_free(tree->child);
- xbt_free(tree);
-}
-
-/**
- * Build the tree depending on a process rank (index) and the group size (extent)
- * @param root the rank of the tree root
- * @param rank the rank of the calling process
- * @param size the total number of processes
- **/
-static void build_tree(int root, int rank, int size, proc_tree_t * tree)
-{
- int index = (rank - root + size) % size;
- int firstChildIdx = index * (*tree)->PROCTREE_A + 1;
- int i;
-
- (*tree)->me = rank;
- (*tree)->root = root;
-
- for (i = 0; i < (*tree)->PROCTREE_A && firstChildIdx + i < size; i++) {
- (*tree)->child[i] = (firstChildIdx + i + root) % size;
- (*tree)->numChildren++;
- }
- if (rank == root) {
- (*tree)->isRoot = 1;
- } else {
- (*tree)->isRoot = 0;
- (*tree)->parent = (((index - 1) / (*tree)->PROCTREE_A) + root) % size;
- }
-}
-
-/**
- * bcast
- **/
-static void tree_bcast(void *buf, int count, MPI_Datatype datatype,
- MPI_Comm comm, proc_tree_t tree)
-{
- int system_tag = COLL_TAG_BCAST;
- int rank, i;
- MPI_Request *requests;
-
- rank = smpi_comm_rank(comm);
- /* wait for data from my parent in the tree */
- if (!tree->isRoot) {
- XBT_DEBUG("<%d> tree_bcast(): i am not root: recv from %d, tag=%d)",
- rank, tree->parent, system_tag + rank);
- smpi_mpi_recv(buf, count, datatype, tree->parent, system_tag + rank,
- comm, MPI_STATUS_IGNORE);
- }
- requests = xbt_new(MPI_Request, tree->numChildren);
- XBT_DEBUG("<%d> creates %d requests (1 per child)", rank,
- tree->numChildren);
- /* iniates sends to ranks lower in the tree */
- for (i = 0; i < tree->numChildren; i++) {
- if (tree->child[i] == -1) {
- requests[i] = MPI_REQUEST_NULL;
- } else {
- XBT_DEBUG("<%d> send to <%d>, tag=%d", rank, tree->child[i],
- system_tag + tree->child[i]);
- requests[i] =
- smpi_isend_init(buf, count, datatype, tree->child[i],
- system_tag + tree->child[i], comm);
- }
- }
- smpi_mpi_startall(tree->numChildren, requests);
- smpi_mpi_waitall(tree->numChildren, requests, MPI_STATUS_IGNORE);
- for(i = 0; i < tree->numChildren; i++) {
- if(requests[i]!=MPI_REQUEST_NULL) smpi_mpi_request_free(&requests[i]);
- }
- xbt_free(requests);
-}
-
-/**
- * anti-bcast
- **/
-static void tree_antibcast(void *buf, int count, MPI_Datatype datatype,
- MPI_Comm comm, proc_tree_t tree)
-{
- int system_tag = COLL_TAG_BCAST;
- int rank, i;
- MPI_Request *requests;
-
- rank = smpi_comm_rank(comm);
- // everyone sends to its parent, except root.
- if (!tree->isRoot) {
- XBT_DEBUG("<%d> tree_antibcast(): i am not root: send to %d, tag=%d)",
- rank, tree->parent, system_tag + rank);
- smpi_mpi_send(buf, count, datatype, tree->parent, system_tag + rank,
- comm);
- }
- //every one receives as many messages as it has children
- requests = xbt_new(MPI_Request, tree->numChildren);
- XBT_DEBUG("<%d> creates %d requests (1 per child)", rank,
- tree->numChildren);
- for (i = 0; i < tree->numChildren; i++) {
- if (tree->child[i] == -1) {
- requests[i] = MPI_REQUEST_NULL;
- } else {
- XBT_DEBUG("<%d> recv from <%d>, tag=%d", rank, tree->child[i],
- system_tag + tree->child[i]);
- requests[i] =
- smpi_irecv_init(buf, count, datatype, tree->child[i],
- system_tag + tree->child[i], comm);
- }
- }
- smpi_mpi_startall(tree->numChildren, requests);
- smpi_mpi_waitall(tree->numChildren, requests, MPI_STATUS_IGNORE);
- for(i = 0; i < tree->numChildren; i++) {
- if(requests[i]!=MPI_REQUEST_NULL) smpi_mpi_request_free(&requests[i]);
- }
- xbt_free(requests);
-}
-
-/**
- * bcast with a binary, ternary, or whatever tree ..
- **/
-void nary_tree_bcast(void *buf, int count, MPI_Datatype datatype, int root,
- MPI_Comm comm, int arity)
-{
- proc_tree_t tree = alloc_tree(arity);
- int rank, size;
-
- rank = smpi_comm_rank(comm);
- size = smpi_comm_size(comm);
- build_tree(root, rank, size, &tree);
- tree_bcast(buf, count, datatype, comm, tree);
- free_tree(tree);
-}
-
-/**
- * barrier with a binary, ternary, or whatever tree ..
- **/
-void nary_tree_barrier(MPI_Comm comm, int arity)
-{
- proc_tree_t tree = alloc_tree(arity);
- int rank, size;
- char dummy = '$';
- rank = smpi_comm_rank(comm);
- size = smpi_comm_size(comm);
- build_tree(0, rank, size, &tree);
- tree_antibcast(&dummy, 1, MPI_CHAR, comm, tree);
- tree_bcast(&dummy, 1, MPI_CHAR, comm, tree);
- free_tree(tree);
-}
int smpi_coll_tuned_alltoall_ompi2(void *sendbuf, int sendcount,
MPI_Datatype sendtype, void *recvbuf,