+++ /dev/null
-/* Copyright (c) 2013-2014. The SimGrid Team.
- * All rights reserved. */
-
-/* This program is free software; you can redistribute it and/or modify it
- * under the terms of the license (GNU LGPL) which comes with this package. */
-
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- * University Research and Technology
- * Corporation. All rights reserved.
- * Copyright (c) 2004-2009 The University of Tennessee and The University
- * of Tennessee Research Foundation. All rights
- * reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- * University of Stuttgart. All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- * All rights reserved.
- *
- * Additional copyrights may follow
- */
- /* -*- Mode: C; c-basic-offset:4 ; -*- */
-/* Copyright (c) 2001-2014, The Ohio State University. All rights
- * reserved.
- *
- * This file is part of the MVAPICH2 software package developed by the
- * team members of The Ohio State University's Network-Based Computing
- * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
- *
- * For detailed copyright and licensing information, please refer to the
- * copyright file COPYRIGHT in the top level MVAPICH2 directory.
- */
-/*
- *
- * (C) 2001 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-#include "colls_private.h"
-
-#define MPIR_Gather_MV2_Direct smpi_coll_tuned_gather_ompi_basic_linear
-#define MPIR_Gather_MV2_two_level_Direct smpi_coll_tuned_gather_ompi_basic_linear
-#define MPIR_Gather_intra smpi_coll_tuned_gather_mpich
-typedef int (*MV2_Gather_function_ptr) (void *sendbuf,
- int sendcnt,
- MPI_Datatype sendtype,
- void *recvbuf,
- int recvcnt,
- MPI_Datatype recvtype,
- int root, MPI_Comm comm);
-
-extern MV2_Gather_function_ptr MV2_Gather_inter_leader_function;
-extern MV2_Gather_function_ptr MV2_Gather_intra_node_function;
-
-#define TEMP_BUF_HAS_NO_DATA (0)
-#define TEMP_BUF_HAS_DATA (1)
-
-/* sendbuf - (in) sender's buffer
- * sendcnt - (in) sender's element count
- * sendtype - (in) sender's data type
- * recvbuf - (in) receiver's buffer
- * recvcnt - (in) receiver's element count
- * recvtype - (in) receiver's data type
- * root - (in)root for the gather operation
- * rank - (in) global rank(rank in the global comm)
- * tmp_buf - (out/in) tmp_buf into which intra node
- * data is gathered
- * is_data_avail - (in) based on this, tmp_buf acts
- * as in/out parameter.
- * 1 - tmp_buf acts as in parameter
- * 0 - tmp_buf acts as out parameter
- * comm_ptr - (in) pointer to the communicator
- * (shmem_comm or intra_sock_comm or
- * inter-sock_leader_comm)
- * intra_node_fn_ptr - (in) Function ptr to choose the
- * intra node gather function
- * errflag - (out) to record errors
- */
-static int MPIR_pt_pt_intra_gather( void *sendbuf, int sendcnt, MPI_Datatype sendtype,
- void *recvbuf, int recvcnt, MPI_Datatype recvtype,
- int root, int rank,
- void *tmp_buf, int nbytes,
- int is_data_avail,
- MPI_Comm comm,
- MV2_Gather_function_ptr intra_node_fn_ptr)
-{
- int mpi_errno = MPI_SUCCESS;
- MPI_Aint recvtype_extent = 0; /* Datatype extent */
- MPI_Aint true_lb, sendtype_true_extent, recvtype_true_extent;
-
-
- if (sendtype != MPI_DATATYPE_NULL) {
- sendtype->extent(&true_lb,
- &sendtype_true_extent);
- }
- if (recvtype != MPI_DATATYPE_NULL) {
- recvtype_extent=recvtype->get_extent();
- recvtype->extent(&true_lb,
- &recvtype_true_extent);
- }
-
- /* Special case, when tmp_buf itself has data */
- if (rank == root && sendbuf == MPI_IN_PLACE && is_data_avail) {
-
- mpi_errno = intra_node_fn_ptr(MPI_IN_PLACE,
- sendcnt, sendtype, tmp_buf, nbytes,
- MPI_BYTE, 0, comm);
-
- } else if (rank == root && sendbuf == MPI_IN_PLACE) {
- mpi_errno = intra_node_fn_ptr((char*)recvbuf +
- rank * recvcnt * recvtype_extent,
- recvcnt, recvtype, tmp_buf, nbytes,
- MPI_BYTE, 0, comm);
- } else {
- mpi_errno = intra_node_fn_ptr(sendbuf, sendcnt, sendtype,
- tmp_buf, nbytes, MPI_BYTE,
- 0, comm);
- }
-
- return mpi_errno;
-
-}
-
-
-int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf,
- int sendcnt,
- MPI_Datatype sendtype,
- void *recvbuf,
- int recvcnt,
- MPI_Datatype recvtype,
- int root,
- MPI_Comm comm)
-{
- void *leader_gather_buf = NULL;
- int comm_size, rank;
- int local_rank, local_size;
- int leader_comm_rank = -1, leader_comm_size = 0;
- int mpi_errno = MPI_SUCCESS;
- int recvtype_size = 0, sendtype_size = 0, nbytes=0;
- int leader_root, leader_of_root;
- MPI_Status status;
- MPI_Aint sendtype_extent = 0, recvtype_extent = 0; /* Datatype extent */
- MPI_Aint true_lb = 0, sendtype_true_extent = 0, recvtype_true_extent = 0;
- MPI_Comm shmem_comm, leader_comm;
- void* tmp_buf = NULL;
-
-
- //if not set (use of the algo directly, without mvapich2 selector)
- if(MV2_Gather_intra_node_function==NULL)
- MV2_Gather_intra_node_function=smpi_coll_tuned_gather_mpich;
-
- if(comm->get_leaders_comm()==MPI_COMM_NULL){
- comm->init_smp();
- }
- comm_size = comm->size();
- rank = comm->rank();
-
- if (((rank == root) && (recvcnt == 0)) ||
- ((rank != root) && (sendcnt == 0))) {
- return MPI_SUCCESS;
- }
-
- if (sendtype != MPI_DATATYPE_NULL) {
- sendtype_extent=sendtype->get_extent();
- sendtype_size=sendtype->size();
- sendtype->extent(&true_lb,
- &sendtype_true_extent);
- }
- if (recvtype != MPI_DATATYPE_NULL) {
- recvtype_extent=recvtype->get_extent();
- recvtype_size=recvtype->size();
- recvtype->extent(&true_lb,
- &recvtype_true_extent);
- }
-
- /* extract the rank,size information for the intra-node
- * communicator */
- shmem_comm = comm->get_intra_comm();
- local_rank = shmem_comm->rank();
- local_size = shmem_comm->size();
-
- if (local_rank == 0) {
- /* Node leader. Extract the rank, size information for the leader
- * communicator */
- leader_comm = comm->get_leaders_comm();
- if(leader_comm==MPI_COMM_NULL){
- leader_comm = MPI_COMM_WORLD;
- }
- leader_comm_size = leader_comm->size();
- leader_comm_rank = leader_comm->size();
- }
-
- if (rank == root) {
- nbytes = recvcnt * recvtype_size;
-
- } else {
- nbytes = sendcnt * sendtype_size;
- }
-
-#if defined(_SMP_LIMIC_)
- if((g_use_limic2_coll) && (shmem_commptr->ch.use_intra_sock_comm == 1)
- && (use_limic_gather)
- &&((num_scheme == USE_GATHER_PT_PT_BINOMIAL)
- || (num_scheme == USE_GATHER_PT_PT_DIRECT)
- ||(num_scheme == USE_GATHER_PT_LINEAR_BINOMIAL)
- || (num_scheme == USE_GATHER_PT_LINEAR_DIRECT)
- || (num_scheme == USE_GATHER_LINEAR_PT_BINOMIAL)
- || (num_scheme == USE_GATHER_LINEAR_PT_DIRECT)
- || (num_scheme == USE_GATHER_LINEAR_LINEAR)
- || (num_scheme == USE_GATHER_SINGLE_LEADER))) {
-
- mpi_errno = MV2_Gather_intra_node_function(sendbuf, sendcnt, sendtype,
- recvbuf, recvcnt,recvtype,
- root, comm);
- } else
-
-#endif/*#if defined(_SMP_LIMIC_)*/
- {
- if (local_rank == 0) {
- /* Node leader, allocate tmp_buffer */
- if (rank == root) {
- tmp_buf = smpi_get_tmp_recvbuffer(recvcnt * MAX(recvtype_extent,
- recvtype_true_extent) * local_size);
- } else {
- tmp_buf = smpi_get_tmp_sendbuffer(sendcnt * MAX(sendtype_extent,
- sendtype_true_extent) *
- local_size);
- }
- if (tmp_buf == NULL) {
- mpi_errno = MPI_ERR_OTHER;
- return mpi_errno;
- }
- }
- /*while testing mpich2 gather test, we see that
- * which basically splits the comm, and we come to
- * a point, where use_intra_sock_comm == 0, but if the
- * intra node function is MPIR_Intra_node_LIMIC_Gather_MV2,
- * it would use the intra sock comm. In such cases, we
- * fallback to binomial as a default case.*/
-#if defined(_SMP_LIMIC_)
- if(*MV2_Gather_intra_node_function == MPIR_Intra_node_LIMIC_Gather_MV2) {
-
- mpi_errno = MPIR_pt_pt_intra_gather(sendbuf,sendcnt, sendtype,
- recvbuf, recvcnt, recvtype,
- root, rank,
- tmp_buf, nbytes,
- TEMP_BUF_HAS_NO_DATA,
- shmem_commptr,
- MPIR_Gather_intra);
- } else
-#endif
- {
- /*We are gathering the data into tmp_buf and the output
- * will be of MPI_BYTE datatype. Since the tmp_buf has no
- * local data, we pass is_data_avail = TEMP_BUF_HAS_NO_DATA*/
- mpi_errno = MPIR_pt_pt_intra_gather(sendbuf,sendcnt, sendtype,
- recvbuf, recvcnt, recvtype,
- root, rank,
- tmp_buf, nbytes,
- TEMP_BUF_HAS_NO_DATA,
- shmem_comm,
- MV2_Gather_intra_node_function
- );
- }
- }
- leader_comm = comm->get_leaders_comm();
- int* leaders_map = comm->get_leaders_map();
- leader_of_root = comm->group()->rank(leaders_map[root]);
- leader_root = leader_comm->group()->rank(leaders_map[root]);
- /* leader_root is the rank of the leader of the root in leader_comm.
- * leader_root is to be used as the root of the inter-leader gather ops
- */
- if (!comm->is_uniform()) {
- if (local_rank == 0) {
- int *displs = NULL;
- int *recvcnts = NULL;
- int *node_sizes;
- int i = 0;
- /* Node leaders have all the data. But, different nodes can have
- * different number of processes. Do a Gather first to get the
- * buffer lengths at each leader, followed by a Gatherv to move
- * the actual data */
-
- if (leader_comm_rank == leader_root && root != leader_of_root) {
- /* The root of the Gather operation is not a node-level
- * leader and this process's rank in the leader_comm
- * is the same as leader_root */
- if(rank == root) {
- leader_gather_buf = smpi_get_tmp_recvbuffer(recvcnt *
- MAX(recvtype_extent,
- recvtype_true_extent) *
- comm_size);
- } else {
- leader_gather_buf = smpi_get_tmp_sendbuffer(sendcnt *
- MAX(sendtype_extent,
- sendtype_true_extent) *
- comm_size);
- }
- if (leader_gather_buf == NULL) {
- mpi_errno = MPI_ERR_OTHER;
- return mpi_errno;
- }
- }
-
- node_sizes = comm->get_non_uniform_map();
-
- if (leader_comm_rank == leader_root) {
- displs = static_cast<int *>(xbt_malloc(sizeof (int) * leader_comm_size));
- recvcnts = static_cast<int *>(xbt_malloc(sizeof (int) * leader_comm_size));
- if (!displs || !recvcnts) {
- mpi_errno = MPI_ERR_OTHER;
- return mpi_errno;
- }
- }
-
- if (root == leader_of_root) {
- /* The root of the gather operation is also the node
- * leader. Receive into recvbuf and we are done */
- if (leader_comm_rank == leader_root) {
- recvcnts[0] = node_sizes[0] * recvcnt;
- displs[0] = 0;
-
- for (i = 1; i < leader_comm_size; i++) {
- displs[i] = displs[i - 1] + node_sizes[i - 1] * recvcnt;
- recvcnts[i] = node_sizes[i] * recvcnt;
- }
- }
- smpi_mpi_gatherv(tmp_buf,
- local_size * nbytes,
- MPI_BYTE, recvbuf, recvcnts,
- displs, recvtype,
- leader_root, leader_comm);
- } else {
- /* The root of the gather operation is not the node leader.
- * Receive into leader_gather_buf and then send
- * to the root */
- if (leader_comm_rank == leader_root) {
- recvcnts[0] = node_sizes[0] * nbytes;
- displs[0] = 0;
-
- for (i = 1; i < leader_comm_size; i++) {
- displs[i] = displs[i - 1] + node_sizes[i - 1] * nbytes;
- recvcnts[i] = node_sizes[i] * nbytes;
- }
- }
- smpi_mpi_gatherv(tmp_buf, local_size * nbytes,
- MPI_BYTE, leader_gather_buf,
- recvcnts, displs, MPI_BYTE,
- leader_root, leader_comm);
- }
- if (leader_comm_rank == leader_root) {
- xbt_free(displs);
- xbt_free(recvcnts);
- }
- }
- } else {
- /* All nodes have the same number of processes.
- * Just do one Gather to get all
- * the data at the leader of the root process */
- if (local_rank == 0) {
- if (leader_comm_rank == leader_root && root != leader_of_root) {
- /* The root of the Gather operation is not a node-level leader
- */
- leader_gather_buf = smpi_get_tmp_sendbuffer(nbytes * comm_size);
- if (leader_gather_buf == NULL) {
- mpi_errno = MPI_ERR_OTHER;
- return mpi_errno;
- }
- }
- if (root == leader_of_root) {
- mpi_errno = MPIR_Gather_MV2_Direct(tmp_buf,
- nbytes * local_size,
- MPI_BYTE, recvbuf,
- recvcnt * local_size,
- recvtype, leader_root,
- leader_comm);
-
- } else {
- mpi_errno = MPIR_Gather_MV2_Direct(tmp_buf, nbytes * local_size,
- MPI_BYTE, leader_gather_buf,
- nbytes * local_size,
- MPI_BYTE, leader_root,
- leader_comm);
- }
- }
- }
- if ((local_rank == 0) && (root != rank)
- && (leader_of_root == rank)) {
- Request::send(leader_gather_buf,
- nbytes * comm_size, MPI_BYTE,
- root, COLL_TAG_GATHER, comm);
- }
-
- if (rank == root && local_rank != 0) {
- /* The root of the gather operation is not the node leader. Receive
- y* data from the node leader */
- Request::recv(recvbuf, recvcnt * comm_size, recvtype,
- leader_of_root, COLL_TAG_GATHER, comm,
- &status);
- }
-
- /* check if multiple threads are calling this collective function */
- if (local_rank == 0 ) {
- if (tmp_buf != NULL) {
- smpi_free_tmp_buffer(tmp_buf);
- }
- if (leader_gather_buf != NULL) {
- smpi_free_tmp_buffer(leader_gather_buf);
- }
- }
-
- return (mpi_errno);
-}
-