1 /* Copyright (c) 2013-2014. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
7 #include "colls_private.h"
12 int bcast_SMP_linear_segment_byte = 8192;
14 int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count,
15 MPI_Datatype datatype, int root,
18 int tag = COLL_TAG_BCAST;
21 MPI_Request *request_array;
22 MPI_Status *status_array;
26 extent = smpi_datatype_get_extent(datatype);
28 rank = smpi_comm_rank(comm);
29 size = smpi_comm_size(comm);
30 int num_core = simcall_host_get_core(SIMIX_host_self());
31 // do we use the default one or the number of cores in the platform ?
32 // if the number of cores is one, the platform may be simulated with 1 node = 1 core
33 if (num_core == 1) num_core = NUM_CORE;
36 THROWF(arg_error,0, "bcast SMP linear can't be used with non multiple of num_core=%d number of processes!",num_core);
38 int segment = bcast_SMP_linear_segment_byte / extent;
39 int pipe_length = count / segment;
40 int remainder = count % segment;
41 int increment = segment * extent;
44 /* leader of each SMP do inter-communication
45 and act as a root for intra-communication */
46 int to_inter = (rank + num_core) % size;
47 int to_intra = (rank + 1) % size;
48 int from_inter = (rank - num_core + size) % size;
49 int from_intra = (rank + size - 1) % size;
51 // call native when MPI communication size is too small
52 if (size <= num_core) {
53 XBT_WARN("MPI_bcast_SMP_linear use default MPI_bcast.");
54 smpi_mpi_bcast(buf, count, datatype, root, comm);
57 // if root is not zero send to rank zero first
60 smpi_mpi_send(buf, count, datatype, 0, tag, comm);
62 smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status);
64 // when a message is smaller than a block size => no pipeline
65 if (count <= segment) {
68 smpi_mpi_send(buf, count, datatype, to_inter, tag, comm);
69 smpi_mpi_send(buf, count, datatype, to_intra, tag, comm);
71 // case last ROOT of each SMP
72 else if (rank == (((size - 1) / num_core) * num_core)) {
73 request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm);
74 smpi_mpi_wait(&request, &status);
75 smpi_mpi_send(buf, count, datatype, to_intra, tag, comm);
77 // case intermediate ROOT of each SMP
78 else if (rank % num_core == 0) {
79 request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm);
80 smpi_mpi_wait(&request, &status);
81 smpi_mpi_send(buf, count, datatype, to_inter, tag, comm);
82 smpi_mpi_send(buf, count, datatype, to_intra, tag, comm);
84 // case last non-ROOT of each SMP
85 else if (((rank + 1) % num_core == 0) || (rank == (size - 1))) {
86 request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm);
87 smpi_mpi_wait(&request, &status);
89 // case intermediate non-ROOT of each SMP
91 request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm);
92 smpi_mpi_wait(&request, &status);
93 smpi_mpi_send(buf, count, datatype, to_intra, tag, comm);
100 (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
102 (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
104 // case ROOT of each SMP
105 if (rank % num_core == 0) {
108 for (i = 0; i < pipe_length; i++) {
109 smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_inter,
111 smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra,
115 // case last ROOT of each SMP
116 else if (rank == (((size - 1) / num_core) * num_core)) {
117 for (i = 0; i < pipe_length; i++) {
118 request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
119 from_inter, (tag + i), comm);
121 for (i = 0; i < pipe_length; i++) {
122 smpi_mpi_wait(&request_array[i], &status);
123 smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra,
127 // case intermediate ROOT of each SMP
129 for (i = 0; i < pipe_length; i++) {
130 request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
131 from_inter, (tag + i), comm);
133 for (i = 0; i < pipe_length; i++) {
134 smpi_mpi_wait(&request_array[i], &status);
135 smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_inter,
137 smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra,
141 } else { // case last non-ROOT of each SMP
142 if (((rank + 1) % num_core == 0) || (rank == (size - 1))) {
143 for (i = 0; i < pipe_length; i++) {
144 request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
145 from_intra, (tag + i), comm);
147 for (i = 0; i < pipe_length; i++) {
148 smpi_mpi_wait(&request_array[i], &status);
151 // case intermediate non-ROOT of each SMP
153 for (i = 0; i < pipe_length; i++) {
154 request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
155 from_intra, (tag + i), comm);
157 for (i = 0; i < pipe_length; i++) {
158 smpi_mpi_wait(&request_array[i], &status);
159 smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra,
168 // when count is not divisible by block size, use default BCAST for the remainder
169 if ((remainder != 0) && (count > segment)) {
170 XBT_WARN("MPI_bcast_SMP_linear use default MPI_bcast.");
171 smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,