6 int bcast_SMP_linear_segment_byte = 8192;
8 int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count,
9 MPI_Datatype datatype, int root,
15 MPI_Request *request_array;
16 MPI_Status *status_array;
20 MPI_Type_extent(datatype, &extent);
22 MPI_Comm_rank(comm, &rank);
23 MPI_Comm_size(comm, &size);
25 int segment = bcast_SMP_linear_segment_byte / extent;
26 int pipe_length = count / segment;
27 int remainder = count % segment;
28 int increment = segment * extent;
31 /* leader of each SMP do inter-communication
32 and act as a root for intra-communication */
33 int to_inter = (rank + NUM_CORE) % size;
34 int to_intra = (rank + 1) % size;
35 int from_inter = (rank - NUM_CORE + size) % size;
36 int from_intra = (rank + size - 1) % size;
38 // call native when MPI communication size is too small
39 if (size <= NUM_CORE) {
40 return MPI_Bcast(buf, count, datatype, root, comm);
42 // if root is not zero send to rank zero first
45 MPI_Send(buf, count, datatype, 0, tag, comm);
47 MPI_Recv(buf, count, datatype, root, tag, comm, &status);
49 // when a message is smaller than a block size => no pipeline
50 if (count <= segment) {
53 MPI_Send(buf, count, datatype, to_inter, tag, comm);
54 MPI_Send(buf, count, datatype, to_intra, tag, comm);
56 // case last ROOT of each SMP
57 else if (rank == (((size - 1) / NUM_CORE) * NUM_CORE)) {
58 MPI_Irecv(buf, count, datatype, from_inter, tag, comm, &request);
59 MPI_Wait(&request, &status);
60 MPI_Send(buf, count, datatype, to_intra, tag, comm);
62 // case intermediate ROOT of each SMP
63 else if (rank % NUM_CORE == 0) {
64 MPI_Irecv(buf, count, datatype, from_inter, tag, comm, &request);
65 MPI_Wait(&request, &status);
66 MPI_Send(buf, count, datatype, to_inter, tag, comm);
67 MPI_Send(buf, count, datatype, to_intra, tag, comm);
69 // case last non-ROOT of each SMP
70 else if (((rank + 1) % NUM_CORE == 0) || (rank == (size - 1))) {
71 MPI_Irecv(buf, count, datatype, from_intra, tag, comm, &request);
72 MPI_Wait(&request, &status);
74 // case intermediate non-ROOT of each SMP
76 MPI_Irecv(buf, count, datatype, from_intra, tag, comm, &request);
77 MPI_Wait(&request, &status);
78 MPI_Send(buf, count, datatype, to_intra, tag, comm);
85 (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
87 (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
89 // case ROOT of each SMP
90 if (rank % NUM_CORE == 0) {
93 for (i = 0; i < pipe_length; i++) {
94 MPI_Send((char *) buf + (i * increment), segment, datatype, to_inter,
96 MPI_Send((char *) buf + (i * increment), segment, datatype, to_intra,
100 // case last ROOT of each SMP
101 else if (rank == (((size - 1) / NUM_CORE) * NUM_CORE)) {
102 for (i = 0; i < pipe_length; i++) {
103 MPI_Irecv((char *) buf + (i * increment), segment, datatype,
104 from_inter, (tag + i), comm, &request_array[i]);
106 for (i = 0; i < pipe_length; i++) {
107 MPI_Wait(&request_array[i], &status);
108 MPI_Send((char *) buf + (i * increment), segment, datatype, to_intra,
112 // case intermediate ROOT of each SMP
114 for (i = 0; i < pipe_length; i++) {
115 MPI_Irecv((char *) buf + (i * increment), segment, datatype,
116 from_inter, (tag + i), comm, &request_array[i]);
118 for (i = 0; i < pipe_length; i++) {
119 MPI_Wait(&request_array[i], &status);
120 MPI_Send((char *) buf + (i * increment), segment, datatype, to_inter,
122 MPI_Send((char *) buf + (i * increment), segment, datatype, to_intra,
126 } else { // case last non-ROOT of each SMP
127 if (((rank + 1) % NUM_CORE == 0) || (rank == (size - 1))) {
128 for (i = 0; i < pipe_length; i++) {
129 MPI_Irecv((char *) buf + (i * increment), segment, datatype,
130 from_intra, (tag + i), comm, &request_array[i]);
132 for (i = 0; i < pipe_length; i++) {
133 MPI_Wait(&request_array[i], &status);
136 // case intermediate non-ROOT of each SMP
138 for (i = 0; i < pipe_length; i++) {
139 MPI_Irecv((char *) buf + (i * increment), segment, datatype,
140 from_intra, (tag + i), comm, &request_array[i]);
142 for (i = 0; i < pipe_length; i++) {
143 MPI_Wait(&request_array[i], &status);
144 MPI_Send((char *) buf + (i * increment), segment, datatype, to_intra,
153 // when count is not divisible by block size, use default BCAST for the remainder
154 if ((remainder != 0) && (count > segment)) {
155 MPI_Bcast((char *) buf + (pipe_length * increment), remainder, datatype,