3 // now only work with power of two processes
6 smpi_coll_tuned_allgather_rhv(void *sbuf, int send_count,
7 MPI_Datatype send_type, void *rbuf,
8 int recv_count, MPI_Datatype recv_type,
12 MPI_Aint s_extent, r_extent;
14 // local int variables
15 int i, dst, send_base_offset, recv_base_offset, send_chunk, recv_chunk,
16 send_offset, recv_offset;
22 // get size of the communicator, followed by rank
23 MPI_Comm_size(comm, &num_procs);
24 MPI_Comm_rank(comm, &rank);
26 // get size of single element's type for send buffer and recv buffer
27 MPI_Type_extent(send_type, &s_extent);
28 MPI_Type_extent(recv_type, &r_extent);
30 // multiply size of each element by number of elements to send or recv
31 send_chunk = s_extent * send_count;
32 recv_chunk = r_extent * recv_count;
34 if (send_chunk != recv_chunk)
35 return MPI_Allgather(sbuf, send_count, send_type, rbuf, recv_count,
38 // compute starting offset location to perform local copy
39 int size = num_procs / 2;
42 while (mask < num_procs) {
50 // printf("node %d base_offset %d\n",rank,base_offset);
52 //perform a remote copy
55 MPI_Sendrecv(sbuf, send_count, send_type, dst, tag,
56 (char *)rbuf + base_offset * recv_chunk, recv_count, recv_type, dst, tag,
63 curr_count = recv_count;
65 // destination pair for both send and recv
69 send_base_offset = base_offset;
71 recv_base_offset = base_offset - i;
74 recv_base_offset = base_offset + i;
76 send_offset = send_base_offset * recv_chunk;
77 recv_offset = recv_base_offset * recv_chunk;
79 // printf("node %d send to %d in phase %d s_offset = %d r_offset = %d count = %d\n",rank,dst,phase, send_base_offset, recv_base_offset, curr_count);
81 MPI_Sendrecv((char *)rbuf + send_offset, curr_count, recv_type, dst, tag,
82 (char *)rbuf + recv_offset, curr_count, recv_type, dst, tag,