1 /* Copyright (c) 2013-2014. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
7 #include "colls_private.h"
10 int bcast_SMP_binary_segment_byte = 8192;
12 int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count,
13 MPI_Datatype datatype, int root,
16 int tag = COLL_TAG_BCAST;
19 MPI_Request *request_array;
20 MPI_Status *status_array;
24 extent = smpi_datatype_get_extent(datatype);
26 rank = smpi_comm_rank(comm);
27 size = smpi_comm_size(comm);
28 if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){
29 smpi_comm_init_smp(comm);
32 if (smpi_comm_is_uniform(comm)){
33 host_num_core = smpi_comm_size(smpi_comm_get_intra_comm(comm));
35 //implementation buggy in this case
36 return smpi_coll_tuned_bcast_mpich( buf , count, datatype,
40 int segment = bcast_SMP_binary_segment_byte / extent;
41 int pipe_length = count / segment;
42 int remainder = count % segment;
44 int to_intra_left = (rank / host_num_core) * host_num_core + (rank % host_num_core) * 2 + 1;
45 int to_intra_right = (rank / host_num_core) * host_num_core + (rank % host_num_core) * 2 + 2;
46 int to_inter_left = ((rank / host_num_core) * 2 + 1) * host_num_core;
47 int to_inter_right = ((rank / host_num_core) * 2 + 2) * host_num_core;
48 int from_inter = (((rank / host_num_core) - 1) / 2) * host_num_core;
49 int from_intra = (rank / host_num_core) * host_num_core + ((rank % host_num_core) - 1) / 2;
50 int increment = segment * extent;
52 int base = (rank / host_num_core) * host_num_core;
53 int num_core = host_num_core;
54 if (((rank / host_num_core) * host_num_core) == ((size / host_num_core) * host_num_core))
55 num_core = size - (rank / host_num_core) * host_num_core;
57 // if root is not zero send to rank zero first
60 smpi_mpi_send(buf, count, datatype, 0, tag, comm);
62 smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status);
64 // when a message is smaller than a block size => no pipeline
65 if (count <= segment) {
66 // case ROOT-of-each-SMP
67 if (rank % host_num_core == 0) {
70 //printf("node %d left %d right %d\n",rank,to_inter_left,to_inter_right);
71 if (to_inter_left < size)
72 smpi_mpi_send(buf, count, datatype, to_inter_left, tag, comm);
73 if (to_inter_right < size)
74 smpi_mpi_send(buf, count, datatype, to_inter_right, tag, comm);
75 if ((to_intra_left - base) < num_core)
76 smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm);
77 if ((to_intra_right - base) < num_core)
78 smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm);
80 // case LEAVES ROOT-of-eash-SMP
81 else if (to_inter_left >= size) {
82 //printf("node %d from %d\n",rank,from_inter);
83 request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm);
84 smpi_mpi_wait(&request, &status);
85 if ((to_intra_left - base) < num_core)
86 smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm);
87 if ((to_intra_right - base) < num_core)
88 smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm);
90 // case INTERMEDIAT ROOT-of-each-SMP
92 //printf("node %d left %d right %d from %d\n",rank,to_inter_left,to_inter_right,from_inter);
93 request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm);
94 smpi_mpi_wait(&request, &status);
95 smpi_mpi_send(buf, count, datatype, to_inter_left, tag, comm);
96 if (to_inter_right < size)
97 smpi_mpi_send(buf, count, datatype, to_inter_right, tag, comm);
98 if ((to_intra_left - base) < num_core)
99 smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm);
100 if ((to_intra_right - base) < num_core)
101 smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm);
104 // case non ROOT-of-each-SMP
107 if ((to_intra_left - base) >= num_core) {
108 request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm);
109 smpi_mpi_wait(&request, &status);
113 request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm);
114 smpi_mpi_wait(&request, &status);
115 smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm);
116 if ((to_intra_right - base) < num_core)
117 smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm);
127 (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
129 (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
131 // case ROOT-of-each-SMP
132 if (rank % host_num_core == 0) {
135 for (i = 0; i < pipe_length; i++) {
136 //printf("node %d left %d right %d\n",rank,to_inter_left,to_inter_right);
137 if (to_inter_left < size)
138 smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
139 to_inter_left, (tag + i), comm);
140 if (to_inter_right < size)
141 smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
142 to_inter_right, (tag + i), comm);
143 if ((to_intra_left - base) < num_core)
144 smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
145 to_intra_left, (tag + i), comm);
146 if ((to_intra_right - base) < num_core)
147 smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
148 to_intra_right, (tag + i), comm);
151 // case LEAVES ROOT-of-eash-SMP
152 else if (to_inter_left >= size) {
153 //printf("node %d from %d\n",rank,from_inter);
154 for (i = 0; i < pipe_length; i++) {
155 request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
156 from_inter, (tag + i), comm);
158 for (i = 0; i < pipe_length; i++) {
159 smpi_mpi_wait(&request_array[i], &status);
160 if ((to_intra_left - base) < num_core)
161 smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
162 to_intra_left, (tag + i), comm);
163 if ((to_intra_right - base) < num_core)
164 smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
165 to_intra_right, (tag + i), comm);
168 // case INTERMEDIAT ROOT-of-each-SMP
170 //printf("node %d left %d right %d from %d\n",rank,to_inter_left,to_inter_right,from_inter);
171 for (i = 0; i < pipe_length; i++) {
172 request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
173 from_inter, (tag + i), comm);
175 for (i = 0; i < pipe_length; i++) {
176 smpi_mpi_wait(&request_array[i], &status);
177 smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
178 to_inter_left, (tag + i), comm);
179 if (to_inter_right < size)
180 smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
181 to_inter_right, (tag + i), comm);
182 if ((to_intra_left - base) < num_core)
183 smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
184 to_intra_left, (tag + i), comm);
185 if ((to_intra_right - base) < num_core)
186 smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
187 to_intra_right, (tag + i), comm);
191 // case non-ROOT-of-each-SMP
194 if ((to_intra_left - base) >= num_core) {
195 for (i = 0; i < pipe_length; i++) {
196 request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
197 from_intra, (tag + i), comm);
199 smpi_mpi_waitall((pipe_length), request_array, status_array);
203 for (i = 0; i < pipe_length; i++) {
204 request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
205 from_intra, (tag + i), comm);
207 for (i = 0; i < pipe_length; i++) {
208 smpi_mpi_wait(&request_array[i], &status);
209 smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
210 to_intra_left, (tag + i), comm);
211 if ((to_intra_right - base) < num_core)
212 smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
213 to_intra_right, (tag + i), comm);
222 // when count is not divisible by block size, use default BCAST for the remainder
223 if ((remainder != 0) && (count > segment)) {
224 XBT_WARN("MPI_bcast_SMP_binary use default MPI_bcast.");
225 smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,