--- /dev/null
+/* Copyright (c) 2013-2014. The SimGrid Team.
+ * All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#include "colls_private.h"
+
+/*****************************************************************************
+
+Copyright (c) 2006, Ahmad Faraj & Xin Yuan,
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ * Neither the name of the Florida State University nor the names of its
+ contributors may be used to endorse or promote products derived from this
+ software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ *************************************************************************
+ * Any results obtained from executing this software require the *
+ * acknowledgment and citation of the software and its owners. *
+ * The full citation is given below: *
+ * *
+ * A. Faraj and X. Yuan. "Automatic Generation and Tuning of MPI *
+ * Collective Communication Routines." The 19th ACM International *
+ * Conference on Supercomputing (ICS), Cambridge, Massachusetts, *
+ * June 20-22, 2005. *
+ *************************************************************************
+
+*****************************************************************************/
+
+/*****************************************************************************
+
+ * Function: bcast_scatter_LR_allgather
+
+ * Return: int
+
+ * Inputs:
+ buff: send input buffer
+ count: number of elements to send
+ data_type: data type of elements being sent
+ root: source of data
+ comm: communicator
+
+ * Descrp: broadcasts using a scatter followed by LR allgather.
+
+ * Auther: MPIH / modified by Ahmad Faraj
+
+ ****************************************************************************/
+int
+smpi_coll_tuned_bcast_scatter_LR_allgather(void *buff, int count,
+ MPI_Datatype data_type, int root,
+ MPI_Comm comm)
+{
+ MPI_Aint extent;
+ MPI_Status status;
+ int i, src, dst, rank, num_procs;
+ int mask, relative_rank, curr_size, recv_size, send_size, nbytes;
+ int scatter_size, left, right, next_src, *recv_counts, *disps;
+ int tag = COLL_TAG_BCAST;
+
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ extent = smpi_datatype_get_extent(data_type);
+
+
+ nbytes = extent * count;
+ scatter_size = (nbytes + num_procs - 1) / num_procs; // ceiling division
+ curr_size = (rank == root) ? nbytes : 0; // root starts with all the data
+ relative_rank = (rank >= root) ? rank - root : rank - root + num_procs;
+
+ mask = 0x1;
+ while (mask < num_procs) {
+ if (relative_rank & mask) {
+ src = rank - mask;
+ if (src < 0)
+ src += num_procs;
+ recv_size = nbytes - relative_rank * scatter_size;
+ // recv_size is larger than what might actually be sent by the
+ // sender. We don't need compute the exact value because MPI
+ // allows you to post a larger recv.
+ if (recv_size <= 0)
+ curr_size = 0; // this process doesn't receive any data
+ // because of uneven division
+ else {
+ smpi_mpi_recv((char *) buff + relative_rank * scatter_size, recv_size,
+ MPI_BYTE, src, tag, comm, &status);
+ curr_size = smpi_mpi_get_count(&status, MPI_BYTE);
+ }
+ break;
+ }
+ mask <<= 1;
+ }
+
+ // This process is responsible for all processes that have bits
+ // set from the LSB upto (but not including) mask. Because of
+ // the "not including", we start by shifting mask back down
+ // one.
+
+ mask >>= 1;
+ while (mask > 0) {
+ if (relative_rank + mask < num_procs) {
+ send_size = curr_size - scatter_size * mask;
+ // mask is also the size of this process's subtree
+
+ if (send_size > 0) {
+ dst = rank + mask;
+ if (dst >= num_procs)
+ dst -= num_procs;
+ smpi_mpi_send((char *) buff + scatter_size * (relative_rank + mask),
+ send_size, MPI_BYTE, dst, tag, comm);
+
+ curr_size -= send_size;
+ }
+ }
+ mask >>= 1;
+ }
+
+ // done scatter now do allgather
+ recv_counts = (int *) xbt_malloc(sizeof(int) * num_procs);
+ disps = (int *) xbt_malloc(sizeof(int) * num_procs);
+
+ for (i = 0; i < num_procs; i++) {
+ recv_counts[i] = nbytes - i * scatter_size;
+ if (recv_counts[i] > scatter_size)
+ recv_counts[i] = scatter_size;
+ if (recv_counts[i] < 0)
+ recv_counts[i] = 0;
+ }
+
+ disps[0] = 0;
+ for (i = 1; i < num_procs; i++)
+ disps[i] = disps[i - 1] + recv_counts[i - 1];
+
+ left = (num_procs + rank - 1) % num_procs;
+ right = (rank + 1) % num_procs;
+
+ src = rank;
+ next_src = left;
+
+ for (i = 1; i < num_procs; i++) {
+ smpi_mpi_sendrecv((char *) buff + disps[(src - root + num_procs) % num_procs],
+ recv_counts[(src - root + num_procs) % num_procs],
+ MPI_BYTE, right, tag,
+ (char *) buff +
+ disps[(next_src - root + num_procs) % num_procs],
+ recv_counts[(next_src - root + num_procs) % num_procs],
+ MPI_BYTE, left, tag, comm, &status);
+ src = next_src;
+ next_src = (num_procs + next_src - 1) % num_procs;
+ }
+
+
+ free(recv_counts);
+ free(disps);
+
+ return MPI_SUCCESS;
+}