1 /* Copyright (c) 2013-2014. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
8 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
9 * University Research and Technology
10 * Corporation. All rights reserved.
11 * Copyright (c) 2004-2009 The University of Tennessee and The University
12 * of Tennessee Research Foundation. All rights
14 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
15 * University of Stuttgart. All rights reserved.
16 * Copyright (c) 2004-2005 The Regents of the University of California.
17 * All rights reserved.
19 * Additional copyrights may follow
21 /* -*- Mode: C; c-basic-offset:4 ; -*- */
22 /* Copyright (c) 2001-2014, The Ohio State University. All rights
25 * This file is part of the MVAPICH2 software package developed by the
26 * team members of The Ohio State University's Network-Based Computing
27 * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
29 * For detailed copyright and licensing information, please refer to the
30 * copyright file COPYRIGHT in the top level MVAPICH2 directory.
34 * (C) 2001 by Argonne National Laboratory.
35 * See COPYRIGHT in top-level directory.
38 #include "colls_private.h"
41 #define MPIR_Gather_MV2_Direct smpi_coll_tuned_gather_ompi_basic_linear
42 #define MPIR_Gather_MV2_two_level_Direct smpi_coll_tuned_gather_ompi_basic_linear
43 #define MPIR_Gather_intra smpi_coll_tuned_gather_mpich
44 typedef int (*MV2_Gather_function_ptr) (void *sendbuf,
46 MPI_Datatype sendtype,
49 MPI_Datatype recvtype,
50 int root, MPI_Comm comm);
52 extern MV2_Gather_function_ptr MV2_Gather_inter_leader_function;
53 extern MV2_Gather_function_ptr MV2_Gather_intra_node_function;
55 #define TEMP_BUF_HAS_NO_DATA (0)
56 #define TEMP_BUF_HAS_DATA (1)
58 /* sendbuf - (in) sender's buffer
59 * sendcnt - (in) sender's element count
60 * sendtype - (in) sender's data type
61 * recvbuf - (in) receiver's buffer
62 * recvcnt - (in) receiver's element count
63 * recvtype - (in) receiver's data type
64 * root - (in)root for the gather operation
65 * rank - (in) global rank(rank in the global comm)
66 * tmp_buf - (out/in) tmp_buf into which intra node
68 * is_data_avail - (in) based on this, tmp_buf acts
69 * as in/out parameter.
70 * 1 - tmp_buf acts as in parameter
71 * 0 - tmp_buf acts as out parameter
72 * comm_ptr - (in) pointer to the communicator
73 * (shmem_comm or intra_sock_comm or
74 * inter-sock_leader_comm)
75 * intra_node_fn_ptr - (in) Function ptr to choose the
76 * intra node gather function
77 * errflag - (out) to record errors
79 static int MPIR_pt_pt_intra_gather( void *sendbuf, int sendcnt, MPI_Datatype sendtype,
80 void *recvbuf, int recvcnt, MPI_Datatype recvtype,
82 void *tmp_buf, int nbytes,
85 MV2_Gather_function_ptr intra_node_fn_ptr)
87 int mpi_errno = MPI_SUCCESS;
88 MPI_Aint recvtype_extent = 0; /* Datatype extent */
89 MPI_Aint true_lb, sendtype_true_extent, recvtype_true_extent;
92 if (sendtype != MPI_DATATYPE_NULL) {
93 smpi_datatype_extent(sendtype, &true_lb,
94 &sendtype_true_extent);
96 if (recvtype != MPI_DATATYPE_NULL) {
97 recvtype_extent=smpi_datatype_get_extent(recvtype);
98 smpi_datatype_extent(recvtype, &true_lb,
99 &recvtype_true_extent);
102 /* Special case, when tmp_buf itself has data */
103 if (rank == root && sendbuf == MPI_IN_PLACE && is_data_avail) {
105 mpi_errno = intra_node_fn_ptr(MPI_IN_PLACE,
106 sendcnt, sendtype, tmp_buf, nbytes,
109 } else if (rank == root && sendbuf == MPI_IN_PLACE) {
110 mpi_errno = intra_node_fn_ptr((char*)recvbuf +
111 rank * recvcnt * recvtype_extent,
112 recvcnt, recvtype, tmp_buf, nbytes,
115 mpi_errno = intra_node_fn_ptr(sendbuf, sendcnt, sendtype,
116 tmp_buf, nbytes, MPI_BYTE,
125 int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf,
127 MPI_Datatype sendtype,
130 MPI_Datatype recvtype,
134 void *leader_gather_buf = NULL;
136 int local_rank, local_size;
137 int leader_comm_rank = -1, leader_comm_size = 0;
138 int mpi_errno = MPI_SUCCESS;
139 int recvtype_size = 0, sendtype_size = 0, nbytes=0;
140 int leader_root, leader_of_root;
142 MPI_Aint sendtype_extent = 0, recvtype_extent = 0; /* Datatype extent */
143 MPI_Aint true_lb, sendtype_true_extent, recvtype_true_extent;
144 MPI_Comm shmem_comm, leader_comm;
148 //if not set (use of the algo directly, without mvapich2 selector)
149 if(MV2_Gather_intra_node_function==NULL)
150 MV2_Gather_intra_node_function=smpi_coll_tuned_gather_mpich;
152 if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){
153 smpi_comm_init_smp(comm);
155 comm_size = smpi_comm_size(comm);
156 rank = smpi_comm_rank(comm);
158 if (((rank == root) && (recvcnt == 0)) ||
159 ((rank != root) && (sendcnt == 0))) {
163 if (sendtype != MPI_DATATYPE_NULL) {
164 sendtype_extent=smpi_datatype_get_extent(sendtype);
165 sendtype_size=smpi_datatype_size(sendtype);
166 smpi_datatype_extent(sendtype, &true_lb,
167 &sendtype_true_extent);
169 if (recvtype != MPI_DATATYPE_NULL) {
170 recvtype_extent=smpi_datatype_get_extent(recvtype);
171 recvtype_size=smpi_datatype_size(recvtype);
172 smpi_datatype_extent(recvtype, &true_lb,
173 &recvtype_true_extent);
176 /* extract the rank,size information for the intra-node
178 shmem_comm = smpi_comm_get_intra_comm(comm);
179 local_rank = smpi_comm_rank(shmem_comm);
180 local_size = smpi_comm_size(shmem_comm);
182 if (local_rank == 0) {
183 /* Node leader. Extract the rank, size information for the leader
185 leader_comm = smpi_comm_get_leaders_comm(comm);
186 if(leader_comm==MPI_COMM_NULL){
187 leader_comm = MPI_COMM_WORLD;
189 leader_comm_size = smpi_comm_size(leader_comm);
190 leader_comm_rank = smpi_comm_rank(leader_comm);
194 nbytes = recvcnt * recvtype_size;
197 nbytes = sendcnt * sendtype_size;
200 #if defined(_SMP_LIMIC_)
201 if((g_use_limic2_coll) && (shmem_commptr->ch.use_intra_sock_comm == 1)
202 && (use_limic_gather)
203 &&((num_scheme == USE_GATHER_PT_PT_BINOMIAL)
204 || (num_scheme == USE_GATHER_PT_PT_DIRECT)
205 ||(num_scheme == USE_GATHER_PT_LINEAR_BINOMIAL)
206 || (num_scheme == USE_GATHER_PT_LINEAR_DIRECT)
207 || (num_scheme == USE_GATHER_LINEAR_PT_BINOMIAL)
208 || (num_scheme == USE_GATHER_LINEAR_PT_DIRECT)
209 || (num_scheme == USE_GATHER_LINEAR_LINEAR)
210 || (num_scheme == USE_GATHER_SINGLE_LEADER))) {
212 mpi_errno = MV2_Gather_intra_node_function(sendbuf, sendcnt, sendtype,
213 recvbuf, recvcnt,recvtype,
217 #endif/*#if defined(_SMP_LIMIC_)*/
219 if (local_rank == 0) {
220 /* Node leader, allocate tmp_buffer */
222 tmp_buf = xbt_malloc(recvcnt * MAX(recvtype_extent,
223 recvtype_true_extent) * local_size);
225 tmp_buf = xbt_malloc(sendcnt * MAX(sendtype_extent,
226 sendtype_true_extent) *
229 if (tmp_buf == NULL) {
230 mpi_errno = MPI_ERR_OTHER;
234 /*while testing mpich2 gather test, we see that
235 * which basically splits the comm, and we come to
236 * a point, where use_intra_sock_comm == 0, but if the
237 * intra node function is MPIR_Intra_node_LIMIC_Gather_MV2,
238 * it would use the intra sock comm. In such cases, we
239 * fallback to binomial as a default case.*/
240 #if defined(_SMP_LIMIC_)
241 if(*MV2_Gather_intra_node_function == MPIR_Intra_node_LIMIC_Gather_MV2) {
243 mpi_errno = MPIR_pt_pt_intra_gather(sendbuf,sendcnt, sendtype,
244 recvbuf, recvcnt, recvtype,
247 TEMP_BUF_HAS_NO_DATA,
253 /*We are gathering the data into tmp_buf and the output
254 * will be of MPI_BYTE datatype. Since the tmp_buf has no
255 * local data, we pass is_data_avail = TEMP_BUF_HAS_NO_DATA*/
256 mpi_errno = MPIR_pt_pt_intra_gather(sendbuf,sendcnt, sendtype,
257 recvbuf, recvcnt, recvtype,
260 TEMP_BUF_HAS_NO_DATA,
262 MV2_Gather_intra_node_function
266 leader_comm = smpi_comm_get_leaders_comm(comm);
267 int* leaders_map = smpi_comm_get_leaders_map(comm);
268 leader_of_root = smpi_group_rank(smpi_comm_group(comm),leaders_map[root]);
269 leader_root = smpi_group_rank(smpi_comm_group(leader_comm),leaders_map[root]);
270 /* leader_root is the rank of the leader of the root in leader_comm.
271 * leader_root is to be used as the root of the inter-leader gather ops
273 if (!smpi_comm_is_uniform(comm)) {
274 if (local_rank == 0) {
276 int *recvcnts = NULL;
279 /* Node leaders have all the data. But, different nodes can have
280 * different number of processes. Do a Gather first to get the
281 * buffer lengths at each leader, followed by a Gatherv to move
284 if (leader_comm_rank == leader_root && root != leader_of_root) {
285 /* The root of the Gather operation is not a node-level
286 * leader and this process's rank in the leader_comm
287 * is the same as leader_root */
289 leader_gather_buf = xbt_malloc(recvcnt *
291 recvtype_true_extent) *
294 leader_gather_buf = xbt_malloc(sendcnt *
296 sendtype_true_extent) *
299 if (leader_gather_buf == NULL) {
300 mpi_errno = MPI_ERR_OTHER;
305 node_sizes = smpi_comm_get_non_uniform_map(comm);
307 if (leader_comm_rank == leader_root) {
308 displs = xbt_malloc(sizeof (int) * leader_comm_size);
309 recvcnts = xbt_malloc(sizeof (int) * leader_comm_size);
310 if (!displs || !recvcnts) {
311 mpi_errno = MPI_ERR_OTHER;
316 if (root == leader_of_root) {
317 /* The root of the gather operation is also the node
318 * leader. Receive into recvbuf and we are done */
319 if (leader_comm_rank == leader_root) {
320 recvcnts[0] = node_sizes[0] * recvcnt;
323 for (i = 1; i < leader_comm_size; i++) {
324 displs[i] = displs[i - 1] + node_sizes[i - 1] * recvcnt;
325 recvcnts[i] = node_sizes[i] * recvcnt;
328 smpi_mpi_gatherv(tmp_buf,
330 MPI_BYTE, recvbuf, recvcnts,
332 leader_root, leader_comm);
334 /* The root of the gather operation is not the node leader.
335 * Receive into leader_gather_buf and then send
337 if (leader_comm_rank == leader_root) {
338 recvcnts[0] = node_sizes[0] * nbytes;
341 for (i = 1; i < leader_comm_size; i++) {
342 displs[i] = displs[i - 1] + node_sizes[i - 1] * nbytes;
343 recvcnts[i] = node_sizes[i] * nbytes;
346 smpi_mpi_gatherv(tmp_buf, local_size * nbytes,
347 MPI_BYTE, leader_gather_buf,
348 recvcnts, displs, MPI_BYTE,
349 leader_root, leader_comm);
351 if (leader_comm_rank == leader_root) {
357 /* All nodes have the same number of processes.
358 * Just do one Gather to get all
359 * the data at the leader of the root process */
360 if (local_rank == 0) {
361 if (leader_comm_rank == leader_root && root != leader_of_root) {
362 /* The root of the Gather operation is not a node-level leader
364 leader_gather_buf = xbt_malloc(nbytes * comm_size);
365 if (leader_gather_buf == NULL) {
366 mpi_errno = MPI_ERR_OTHER;
370 if (root == leader_of_root) {
371 mpi_errno = MPIR_Gather_MV2_Direct(tmp_buf,
374 recvcnt * local_size,
375 recvtype, leader_root,
379 mpi_errno = MPIR_Gather_MV2_Direct(tmp_buf, nbytes * local_size,
380 MPI_BYTE, leader_gather_buf,
382 MPI_BYTE, leader_root,
387 if ((local_rank == 0) && (root != rank)
388 && (leader_of_root == rank)) {
389 smpi_mpi_send(leader_gather_buf,
390 nbytes * comm_size, MPI_BYTE,
391 root, COLL_TAG_GATHER, comm);
394 if (rank == root && local_rank != 0) {
395 /* The root of the gather operation is not the node leader. Receive
396 y* data from the node leader */
397 smpi_mpi_recv(recvbuf, recvcnt * comm_size, recvtype,
398 leader_of_root, COLL_TAG_GATHER, comm,
402 /* check if multiple threads are calling this collective function */
403 if (local_rank == 0 ) {
404 if (tmp_buf != NULL) {
407 if (leader_gather_buf != NULL) {
408 xbt_free(leader_gather_buf);
415 #if defined(_SMP_LIMIC_)
417 static int MPIR_Limic_Gather_Scheme_PT_PT(
418 const void *sendbuf,int sendcnt, MPI_Datatype sendtype,
419 void *recvbuf, int recvcnt,MPI_Datatype recvtype,
420 int root, MPI_Comm comm,
421 MV2_Gather_function_ptr intra_node_fn_ptr,
425 void *intra_tmp_buf = NULL;
428 int mpi_errno = MPI_SUCCESS;
429 int recvtype_size = 0, sendtype_size = 0, nbytes=0;
430 int sendtype_iscontig;
431 int intra_sock_rank=0, intra_sock_comm_size=0;
432 int intra_node_leader_rank=0, intra_node_leader_comm_size=0;
433 MPI_Aint sendtype_extent = 0, recvtype_extent = 0; /* Datatype extent */
434 MPI_Aint true_lb, sendtype_true_extent, recvtype_true_extent;
436 MPID_Comm *shmem_commptr;
437 MPID_Comm *intra_sock_commptr = NULL, *intra_node_leader_commptr=NULL;
439 rank = smpi_comm_rank(comm);
441 if (((rank == root) && (recvcnt == 0)) ||
442 ((rank != root) && (sendcnt == 0))) {
446 if (sendtype != MPI_DATATYPE_NULL) {
447 MPIR_Datatype_iscontig(sendtype, &sendtype_iscontig);
448 sendtype_extent=smpi_datatype_extent(sendtype);
449 sendtype_size=smpi_datatype_size(sendtype);
450 smpi_datatype_extent(sendtype, &true_lb,
451 &sendtype_true_extent);
453 if (recvtype != MPI_DATATYPE_NULL) {
454 recvtype_extent=smpi_datatype_extent(recvtype);
455 recvtype_size=smpi_datatype_size(recvtype);
456 smpi_datatype_extent(recvtype, &true_lb,
457 &recvtype_true_extent);
460 /* extract the rank,size information for the intra-node
462 shmem_comm = comm_ptr->ch.shmem_comm;
463 MPID_Comm_get_ptr(shmem_comm, shmem_commptr);
464 local_size = shmem_commptr->local_size;
468 nbytes = recvcnt * recvtype_size;
471 nbytes = sendcnt * sendtype_size;
474 if(shmem_commptr->ch.use_intra_sock_comm == 1) {
475 MPID_Comm_get_ptr(shmem_commptr->ch.intra_sock_comm, intra_sock_commptr);
476 MPID_Comm_get_ptr(shmem_commptr->ch.intra_sock_leader_comm, intra_node_leader_commptr);
478 intra_sock_rank = intra_sock_commptr->rank;
479 intra_sock_comm_size = intra_sock_commptr->local_size;
480 if(intra_sock_rank == 0) {
481 intra_node_leader_rank = intra_node_leader_commptr->rank;
482 intra_node_leader_comm_size = intra_node_leader_commptr->local_size;
485 if (intra_sock_rank == 0) {
486 if (intra_node_leader_rank == 0) {
487 /* Node leaders, allocate large buffers which is used to gather
488 * data for the entire node. The same buffer is used for inter-node
489 * gather as well. This saves us a memcpy operation*/
491 intra_tmp_buf = malloc(recvcnt * MPIR_MAX(recvtype_extent,
492 recvtype_true_extent) * local_size);
494 intra_tmp_buf = malloc(sendcnt * MPIR_MAX(sendtype_extent,
495 sendtype_true_extent) * local_size);
499 /* Socket leader, allocate tmp_buffer */
501 intra_tmp_buf = malloc(recvcnt * MPIR_MAX(recvtype_extent,
502 recvtype_true_extent) * intra_sock_comm_size);
504 intra_tmp_buf = malloc(sendcnt * MPIR_MAX(sendtype_extent,
505 sendtype_true_extent) * intra_sock_comm_size);
508 if (intra_tmp_buf == NULL) {
509 mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
510 MPIR_ERR_RECOVERABLE,
511 FCNAME, __LINE__, MPI_ERR_OTHER,
518 /*Intra socket gather*/
519 /*We are gathering the data into intra_tmp_buf and the output
520 * will be of MPI_BYTE datatype. Since the tmp_buf has no
521 * local data, we pass is_data_avail = TEMP_BUF_HAS_NO_DATA*/
522 mpi_errno = MPIR_pt_pt_intra_gather(sendbuf, sendcnt, sendtype,
523 recvbuf, recvcnt, recvtype,
525 intra_tmp_buf, nbytes,
526 TEMP_BUF_HAS_NO_DATA,
531 MPIU_ERR_POP(mpi_errno);
534 /*Inter socket gather*/
535 if(intra_sock_rank == 0) {
536 /*When data in each socket is different*/
537 if (shmem_commptr->ch.is_socket_uniform != 1) {
540 int *recvcnts = NULL;
543 socket_sizes = shmem_commptr->ch.socket_size;
545 if (intra_node_leader_rank == 0) {
546 tmp_buf = intra_tmp_buf;
548 displs = malloc(sizeof (int) * intra_node_leader_comm_size);
549 recvcnts = malloc(sizeof (int) * intra_node_leader_comm_size);
550 if (!displs || !recvcnts) {
551 mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
552 MPIR_ERR_RECOVERABLE,
559 recvcnts[0] = socket_sizes[0] * nbytes;
562 for (i = 1; i < intra_node_leader_comm_size; i++) {
563 displs[i] = displs[i - 1] + socket_sizes[i - 1] * nbytes;
564 recvcnts[i] = socket_sizes[i] * nbytes;
567 mpi_errno = MPIR_Gatherv(MPI_IN_PLACE,
568 intra_sock_comm_size * nbytes,
569 MPI_BYTE, tmp_buf, recvcnts,
571 0, intra_node_leader_commptr,
574 /*Free the displacement and recvcnts buffer*/
578 mpi_errno = MPIR_Gatherv(intra_tmp_buf,
579 intra_sock_comm_size * nbytes,
580 MPI_BYTE, tmp_buf, recvcnts,
582 0, intra_node_leader_commptr,
589 if (intra_node_leader_rank == 0) {
590 tmp_buf = intra_tmp_buf;
592 /*We have now completed the intra_sock gather and all the
593 * socket level leaders have data in their tmp_buf. So we
594 * set sendbuf = MPI_IN_PLACE and also explicity set the
595 * is_data_avail= TEMP_BUF_HAS_DATA*/
596 mpi_errno = MPIR_pt_pt_intra_gather(MPI_IN_PLACE,
597 (nbytes*intra_sock_comm_size),
599 recvbuf, recvcnt, recvtype,
602 (nbytes*intra_sock_comm_size),
604 intra_node_leader_commptr,
609 /*After the intra_sock gather, all the node level leaders
610 * have the data in intra_tmp_buf(sendbuf) and this is gathered into
611 * tmp_buf. Since the tmp_buf(in non-root processes) does not have
612 * the data in tmp_buf is_data_avail = TEMP_BUF_HAS_NO_DATA*/
613 mpi_errno = MPIR_pt_pt_intra_gather(intra_tmp_buf,
614 (nbytes*intra_sock_comm_size),
616 recvbuf, recvcnt, recvtype,
619 (nbytes*intra_sock_comm_size),
620 TEMP_BUF_HAS_NO_DATA,
621 intra_node_leader_commptr,
629 MPIU_ERR_POP(mpi_errno);
633 /*Free the intra socket leader buffers*/
634 if (intra_sock_rank == 0) {
635 if ((intra_node_leader_rank != 0) && (intra_tmp_buf != NULL)) {
643 static int MPIR_Limic_Gather_Scheme_PT_LINEAR(
644 const void *sendbuf,int sendcnt, MPI_Datatype sendtype,
645 void *recvbuf, int recvcnt,MPI_Datatype recvtype,
646 int root, MPI_Comm comm,
647 MV2_Gather_function_ptr intra_node_fn_ptr,
650 void *intra_tmp_buf = NULL;
651 void *local_sendbuf=NULL;
653 int local_rank, local_size;
654 int mpi_errno = MPI_SUCCESS;
655 int recvtype_size = 0, nbytes=0;
656 int sendtype_iscontig;
657 int intra_sock_rank=0, intra_sock_comm_size=0;
658 int intra_node_leader_rank=0, intra_node_leader_comm_size=0;
660 MPI_Aint recvtype_extent = 0; /* Datatype extent */
661 MPI_Aint true_lb, sendtype_true_extent, recvtype_true_extent;
663 MPID_Comm *shmem_commptr;
664 MPID_Comm *intra_sock_commptr = NULL, *intra_node_leader_commptr=NULL;
665 MPI_Aint position = 0;
666 MPI_Aint sendtype_size = 0;
668 rank = smpi_comm_rank(comm);
670 if (((rank == root) && (recvcnt == 0)) ||
671 ((rank != root) && (sendcnt == 0))) {
675 if (sendtype != MPI_DATATYPE_NULL) {
676 //MPIR_Datatype_iscontig(sendtype, &sendtype_iscontig);
677 sendtype_size=smpi_datatype_size(sendtype);
678 smpi_datatype_extent(sendtype, &true_lb,
679 &sendtype_true_extent);
681 if (recvtype != MPI_DATATYPE_NULL) {
682 recvtype_extent=smpi_datatype_extent(recvtype);
683 recvtype_size=smpi_datatype_size(recvtype);
684 smpi_datatype_extent(recvtype, &true_lb,
685 &recvtype_true_extent);
688 /* extract the rank,size information for the intra-node
690 shmem_comm = comm_ptr->ch.shmem_comm;
691 MPID_Comm_get_ptr(shmem_comm, shmem_commptr);
692 local_rank = shmem_commptr->rank;
693 local_size = shmem_commptr->local_size;
697 nbytes = recvcnt * recvtype_size;
700 nbytes = sendcnt * sendtype_size;
703 if(shmem_commptr->ch.use_intra_sock_comm == 1) {
704 MPID_Comm_get_ptr(shmem_commptr->ch.intra_sock_comm, intra_sock_commptr);
705 MPID_Comm_get_ptr(shmem_commptr->ch.intra_sock_leader_comm, intra_node_leader_commptr);
707 intra_sock_rank = intra_sock_commptr->rank;
708 intra_sock_comm_size = intra_sock_commptr->local_size;
709 if(intra_sock_rank == 0) {
710 intra_node_leader_rank = intra_node_leader_commptr->rank;
711 intra_node_leader_comm_size = intra_node_leader_commptr->local_size;
714 /*Pack data for non-contiguous buffer*/
715 /* if ((!sendtype_iscontig) && (sendbuf != MPI_IN_PLACE)) {
716 MPIR_Pack_size_impl(1, sendtype, &sendtype_size);
717 send_nbytes= sendcnt * sendtype_size;
718 MPIU_CHKLMEM_MALLOC(local_sendbuf, void *, send_nbytes, mpi_errno, "local_sendbuf");
719 MPIR_Pack_impl(sendbuf, sendcnt, sendtype, local_sendbuf, send_nbytes, &position);
721 local_sendbuf = (void *)sendbuf;
726 if (intra_sock_rank == 0) {
727 if (intra_node_leader_rank == 0) {
728 /* Node leaders, allocate large buffers which is used to gather
729 * data for the entire node. The same buffer is used for inter-node
730 * gather as well. This saves us a memcpy operation*/
732 intra_tmp_buf = malloc(recvcnt * MPIR_MAX(recvtype_extent,
733 recvtype_true_extent) * local_size);
735 intra_tmp_buf = malloc(send_nbytes * local_size);
740 /* Socket leader, allocate tmp_buffer */
742 intra_tmp_buf = malloc(recvcnt * MPIR_MAX(recvtype_extent,
743 recvtype_true_extent) * intra_sock_comm_size);
745 intra_tmp_buf = malloc(send_nbytes * intra_sock_comm_size);
750 if (intra_tmp_buf == NULL) {
751 mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
752 MPIR_ERR_RECOVERABLE,
753 FCNAME, __LINE__, MPI_ERR_OTHER,
758 /*Local copy of buffer*/
759 if(sendbuf != MPI_IN_PLACE) {
760 memcpy(intra_tmp_buf, local_sendbuf, send_nbytes);
762 MPIR_Localcopy(((char *) recvbuf +rank * recvcnt * recvtype_extent),
764 intra_tmp_buf, send_nbytes, MPI_BYTE);
768 if(local_rank !=0 && sendbuf == MPI_IN_PLACE) {
769 mpi_errno = MPIR_Limic_Gather_OSU(intra_tmp_buf,
770 (intra_sock_comm_size * send_nbytes),
771 (recvbuf + (rank*nbytes)), nbytes,
772 intra_sock_commptr );
774 mpi_errno = MPIR_Limic_Gather_OSU(intra_tmp_buf,
775 (intra_sock_comm_size * send_nbytes),
776 local_sendbuf, send_nbytes,
777 intra_sock_commptr );
780 MPIU_ERR_POP(mpi_errno);
783 /*Inter socket gather*/
784 if(intra_sock_rank == 0) {
785 /*When data in each socket is different*/
786 if (shmem_commptr->ch.is_socket_uniform != 1) {
789 int *recvcnts = NULL;
792 socket_sizes = shmem_commptr->ch.socket_size;
794 if (intra_node_leader_rank == 0) {
795 tmp_buf = intra_tmp_buf;
797 displs = malloc(sizeof (int) * intra_node_leader_comm_size);
798 recvcnts = malloc(sizeof (int) * intra_node_leader_comm_size);
799 if (!displs || !recvcnts) {
800 mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
801 MPIR_ERR_RECOVERABLE,
808 recvcnts[0] = socket_sizes[0] * nbytes;
811 for (i = 1; i < intra_node_leader_comm_size; i++) {
812 displs[i] = displs[i - 1] + socket_sizes[i - 1] * nbytes;
813 recvcnts[i] = socket_sizes[i] * nbytes;
817 mpi_errno = MPIR_Gatherv(MPI_IN_PLACE,
818 intra_sock_comm_size * nbytes,
819 MPI_BYTE, tmp_buf, recvcnts,
821 0, intra_node_leader_commptr,
824 /*Free the displacement and recvcnts buffer*/
829 mpi_errno = MPIR_Gatherv(intra_tmp_buf,
830 intra_sock_comm_size * nbytes,
831 MPI_BYTE, tmp_buf, recvcnts,
833 0, intra_node_leader_commptr,
839 if (intra_node_leader_rank == 0) {
840 tmp_buf = intra_tmp_buf;
842 /*We have now completed the intra_sock gather and all the
843 * socket level leaders have data in their tmp_buf. So we
844 * set sendbuf = MPI_IN_PLACE and also explicity set the
845 * is_data_avail= TEMP_BUF_HAS_DATA*/
846 mpi_errno = MPIR_pt_pt_intra_gather(MPI_IN_PLACE,
847 (send_nbytes*intra_sock_comm_size),
849 recvbuf, recvcnt, recvtype,
852 (send_nbytes*intra_sock_comm_size),
854 intra_node_leader_commptr,
859 /*After the intra_sock gather, all the node level leaders
860 * have the data in intra_tmp_buf(sendbuf) and this is gathered into
861 * tmp_buf. Since the tmp_buf(in non-root processes) does not have
862 * the data in tmp_buf is_data_avail = TEMP_BUF_HAS_NO_DATA*/
863 mpi_errno = MPIR_pt_pt_intra_gather(intra_tmp_buf,
864 (send_nbytes*intra_sock_comm_size),
866 recvbuf, recvcnt, recvtype,
869 (send_nbytes*intra_sock_comm_size),
870 TEMP_BUF_HAS_NO_DATA,
871 intra_node_leader_commptr,
878 MPIU_ERR_POP(mpi_errno);
882 /*Free the intra socket leader buffers*/
883 if (intra_sock_rank == 0) {
884 if ((intra_node_leader_rank != 0) && (intra_tmp_buf != NULL)) {
888 MPIU_CHKLMEM_FREEALL();
892 static int MPIR_Limic_Gather_Scheme_LINEAR_PT(
893 const void *sendbuf,int sendcnt, MPI_Datatype sendtype,
894 void *recvbuf, int recvcnt,MPI_Datatype recvtype,
895 int root, MPI_Comm comm,
896 MV2_Gather_function_ptr intra_node_fn_ptr,
899 void *intra_tmp_buf = NULL;
902 int mpi_errno = MPI_SUCCESS;
903 int recvtype_size = 0, sendtype_size = 0, nbytes=0;
904 int sendtype_iscontig;
905 int intra_sock_rank=0, intra_sock_comm_size=0;
906 int intra_node_leader_rank=0;
907 MPI_Aint sendtype_extent = 0, recvtype_extent = 0; /* Datatype extent */
908 MPI_Aint true_lb, sendtype_true_extent, recvtype_true_extent;
910 MPID_Comm *shmem_commptr;
911 MPID_Comm *intra_sock_commptr = NULL, *intra_node_leader_commptr=NULL;
913 rank = smpi_comm_rank(comm);
915 if (((rank == root) && (recvcnt == 0)) ||
916 ((rank != root) && (sendcnt == 0))) {
920 if (sendtype != MPI_DATATYPE_NULL) {
921 //MPIR_Datatype_iscontig(sendtype, &sendtype_iscontig);
922 sendtype_extent=smpi_datatype_extent(sendtype);
923 sendtype_size=smpi_datatype_size(sendtype);
924 smpi_datatype_extent(sendtype, &true_lb,
925 &sendtype_true_extent);
927 if (recvtype != MPI_DATATYPE_NULL) {
928 recvtype_extent=smpi_datatype_extent(recvtype);
929 recvtype_size=smpi_datatype_size(recvtype);
930 smpi_datatype_extent(recvtype, &true_lb,
931 &recvtype_true_extent);
934 /* extract the rank,size information for the intra-node
936 shmem_comm = comm_ptr->ch.shmem_comm;
937 MPID_Comm_get_ptr(shmem_comm, shmem_commptr);
938 local_size = shmem_commptr->local_size;
942 nbytes = recvcnt * recvtype_size;
945 nbytes = sendcnt * sendtype_size;
948 if(shmem_commptr->ch.use_intra_sock_comm == 1) {
949 MPID_Comm_get_ptr(shmem_commptr->ch.intra_sock_comm, intra_sock_commptr);
950 MPID_Comm_get_ptr(shmem_commptr->ch.intra_sock_leader_comm, intra_node_leader_commptr);
952 intra_sock_rank = intra_sock_commptr->rank;
953 intra_sock_comm_size = intra_sock_commptr->local_size;
954 if(intra_sock_rank == 0) {
955 intra_node_leader_rank = intra_node_leader_commptr->rank;
959 if (intra_sock_rank == 0) {
960 if (intra_node_leader_rank == 0) {
961 /* Node leaders, allocate large buffers which is used to gather
962 * data for the entire node. The same buffer is used for inter-node
963 * gather as well. This saves us a memcpy operation*/
965 intra_tmp_buf = malloc(recvcnt * MPIR_MAX(recvtype_extent,
966 recvtype_true_extent) * local_size);
968 intra_tmp_buf = malloc(sendcnt * MPIR_MAX(sendtype_extent,
969 sendtype_true_extent) * local_size);
973 /* Socket leader, allocate tmp_buffer */
975 intra_tmp_buf = malloc(recvcnt * MPIR_MAX(recvtype_extent,
976 recvtype_true_extent) * intra_sock_comm_size);
978 intra_tmp_buf = malloc(sendcnt * MPIR_MAX(sendtype_extent,
979 sendtype_true_extent) * intra_sock_comm_size);
982 if (intra_tmp_buf == NULL) {
983 mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
984 MPIR_ERR_RECOVERABLE,
985 FCNAME, __LINE__, MPI_ERR_OTHER,
991 /*Intra socket gather*/
992 /*We are gathering the data into intra_tmp_buf and the output
993 * will be of MPI_BYTE datatype. Since the tmp_buf has no
994 * local data, we pass is_data_avail = TEMP_BUF_HAS_NO_DATA*/
995 mpi_errno = MPIR_pt_pt_intra_gather(sendbuf, sendcnt, sendtype,
996 recvbuf, recvcnt, recvtype,
998 intra_tmp_buf, nbytes,
999 TEMP_BUF_HAS_NO_DATA,
1005 MPIU_ERR_POP(mpi_errno);
1008 /*Inter socket gather*/
1009 if(intra_sock_rank == 0) {
1010 if (intra_node_leader_rank == 0) {
1011 tmp_buf = intra_tmp_buf;
1013 mpi_errno = MPIR_Limic_Gather_OSU(tmp_buf, (local_size * nbytes),
1015 (intra_sock_comm_size * nbytes),
1016 intra_node_leader_commptr);
1020 MPIU_ERR_POP(mpi_errno);
1023 /*Free the intra socket leader buffers*/
1024 if (intra_sock_rank == 0) {
1025 if ((intra_node_leader_rank != 0) && (intra_tmp_buf != NULL)) {
1026 free(intra_tmp_buf);
1034 #define FUNCNAME MPIR_Limic_Gather_Scheme_LINEAR_LINEAR
1036 #define FCNAME MPIU_QUOTE(FUNCNAME)
1037 static int MPIR_Limic_Gather_Scheme_LINEAR_LINEAR(
1038 const void *sendbuf,int sendcnt, MPI_Datatype sendtype,
1039 void *recvbuf, int recvcnt,MPI_Datatype recvtype,
1040 int root, MPI_Comm comm,
1043 void *intra_tmp_buf = NULL;
1044 void *local_sendbuf=NULL;
1046 int local_rank, local_size;
1047 int mpi_errno = MPI_SUCCESS;
1048 int recvtype_size = 0, nbytes=0;
1049 int sendtype_iscontig;
1050 int intra_sock_rank=0, intra_sock_comm_size=0;
1051 int intra_node_leader_rank=0;
1053 MPI_Aint recvtype_extent = 0; /* Datatype extent */
1054 MPI_Aint true_lb, sendtype_true_extent, recvtype_true_extent;
1055 MPI_Comm shmem_comm;
1056 MPID_Comm *shmem_commptr;
1057 MPID_Comm *intra_sock_commptr = NULL, *intra_node_leader_commptr=NULL;
1058 MPI_Aint sendtype_size = 0;
1059 MPI_Aint position = 0;
1060 MPIU_CHKLMEM_DECL(1);
1062 rank = smpi_comm_rank(comm);
1064 if (((rank == root) && (recvcnt == 0)) ||
1065 ((rank != root) && (sendcnt == 0))) {
1069 if (sendtype != MPI_DATATYPE_NULL) {
1070 MPIR_Datatype_iscontig(sendtype, &sendtype_iscontig);
1071 sendtype_size=smpi_datatype_size(sendtype);
1072 smpi_datatype_extent(sendtype, &true_lb,
1073 &sendtype_true_extent);
1075 if (recvtype != MPI_DATATYPE_NULL) {
1076 recvtype_extent=smpi_datatype_extent(recvtype);
1077 recvtype_size=smpi_datatype_size(recvtype);
1078 smpi_datatype_extent(recvtype, &true_lb,
1079 &recvtype_true_extent);
1082 /* extract the rank,size information for the intra-node
1084 shmem_comm = comm_ptr->ch.shmem_comm;
1085 MPID_Comm_get_ptr(shmem_comm, shmem_commptr);
1086 local_rank = shmem_commptr->rank;
1087 local_size = shmem_commptr->local_size;
1091 nbytes = recvcnt * recvtype_size;
1094 nbytes = sendcnt * sendtype_size;
1097 if(shmem_commptr->ch.use_intra_sock_comm == 1) {
1098 MPID_Comm_get_ptr(shmem_commptr->ch.intra_sock_comm, intra_sock_commptr);
1099 MPID_Comm_get_ptr(shmem_commptr->ch.intra_sock_leader_comm, intra_node_leader_commptr);
1101 intra_sock_rank = intra_sock_commptr->rank;
1102 intra_sock_comm_size = intra_sock_commptr->local_size;
1103 if(intra_sock_rank == 0) {
1104 intra_node_leader_rank = intra_node_leader_commptr->rank;
1108 /*Pack data for non-contiguous buffer*/
1109 /* if ((!sendtype_iscontig) && (sendbuf != MPI_IN_PLACE)) {
1111 MPIR_Pack_size_impl(1, sendtype, &sendtype_size);
1112 send_nbytes= sendcnt * sendtype_size;
1113 MPIU_CHKLMEM_MALLOC(local_sendbuf, void *, send_nbytes, mpi_errno, "local_sendbuf");
1114 MPIR_Pack_impl(sendbuf, sendcnt, sendtype, local_sendbuf, send_nbytes, &position);
1117 local_sendbuf = (void *)sendbuf;
1118 send_nbytes = nbytes;
1121 if (intra_sock_rank == 0) {
1122 if (intra_node_leader_rank == 0) {
1123 /* Node leaders, allocate large buffers which is used to gather
1124 * data for the entire node. The same buffer is used for inter-node
1125 * gather as well. This saves us a memcpy operation*/
1127 intra_tmp_buf = malloc(recvcnt * MPIR_MAX(recvtype_extent,
1128 recvtype_true_extent) * local_size);
1130 intra_tmp_buf = malloc(send_nbytes * local_size);
1135 /* Socket leader, allocate tmp_buffer */
1137 intra_tmp_buf = malloc(recvcnt * MPIR_MAX(recvtype_extent,
1138 recvtype_true_extent) * intra_sock_comm_size);
1140 intra_tmp_buf = malloc(send_nbytes * intra_sock_comm_size);
1144 if (intra_tmp_buf == NULL) {
1145 mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
1146 MPIR_ERR_RECOVERABLE,
1147 FCNAME, __LINE__, MPI_ERR_OTHER,
1152 /*Local copy of buffer*/
1153 if(sendbuf != MPI_IN_PLACE) {
1154 memcpy(intra_tmp_buf, local_sendbuf, send_nbytes);
1156 MPIR_Localcopy(((char *) recvbuf +rank * recvcnt * recvtype_extent),
1158 intra_tmp_buf, send_nbytes, MPI_BYTE);
1163 if(local_rank !=0 && sendbuf == MPI_IN_PLACE) {
1164 mpi_errno = MPIR_Limic_Gather_OSU(intra_tmp_buf,
1165 (intra_sock_comm_size * send_nbytes),
1166 (recvbuf + (rank*nbytes)), nbytes,
1167 intra_sock_commptr);
1169 mpi_errno = MPIR_Limic_Gather_OSU(intra_tmp_buf,
1170 (intra_sock_comm_size * send_nbytes),
1171 local_sendbuf, send_nbytes,
1172 intra_sock_commptr );
1175 MPIU_ERR_POP(mpi_errno);
1178 /*Inter socket gather*/
1179 if(intra_sock_rank == 0) {
1180 if (intra_node_leader_rank == 0) {
1181 tmp_buf = intra_tmp_buf;
1183 mpi_errno = MPIR_Limic_Gather_OSU(tmp_buf, (local_size * send_nbytes),
1185 (intra_sock_comm_size * send_nbytes),
1186 intra_node_leader_commptr );
1190 MPIU_ERR_POP(mpi_errno);
1193 /*Free the intra socket leader buffers*/
1194 if (intra_sock_rank == 0) {
1195 if ((intra_node_leader_rank != 0) && (intra_tmp_buf != NULL)) {
1196 free(intra_tmp_buf);
1200 MPIU_CHKLMEM_FREEALL();
1205 #define FUNCNAME MPIR_Limic_Gather_Scheme_SINGLE_LEADER
1207 #define FCNAME MPIU_QUOTE(FUNCNAME)
1208 static int MPIR_Limic_Gather_Scheme_SINGLE_LEADER(
1209 const void *sendbuf,int sendcnt, MPI_Datatype sendtype,
1210 void *recvbuf, int recvcnt,MPI_Datatype recvtype,
1211 int root, MPI_Comm comm,
1214 void *local_sendbuf=NULL;
1216 int local_rank, local_size;
1217 int mpi_errno = MPI_SUCCESS;
1218 int recvtype_size = 0, nbytes=0;
1219 int sendtype_iscontig;
1221 MPI_Aint recvtype_extent = 0; /* Datatype extent */
1222 MPI_Aint true_lb, sendtype_true_extent, recvtype_true_extent;
1223 MPI_Comm shmem_comm;
1224 MPID_Comm *shmem_commptr;
1225 MPI_Aint sendtype_size = 0;
1226 MPI_Aint position = 0;
1227 MPIU_CHKLMEM_DECL(1);
1229 rank = smpi_comm_rank(comm);
1231 if (((rank == root) && (recvcnt == 0)) ||
1232 ((rank != root) && (sendcnt == 0))) {
1236 if (sendtype != MPI_DATATYPE_NULL) {
1237 MPIR_Datatype_iscontig(sendtype, &sendtype_iscontig);
1238 sendtype_size=smpi_datatype_size(sendtype);
1239 smpi_datatype_extent(sendtype, &true_lb,
1240 &sendtype_true_extent);
1242 if (recvtype != MPI_DATATYPE_NULL) {
1243 recvtype_extent=smpi_datatype_extent(recvtype);
1244 recvtype_size=smpi_datatype_size(recvtype);
1245 smpi_datatype_extent(recvtype, &true_lb,
1246 &recvtype_true_extent);
1249 /* extract the rank,size information for the intra-node
1251 shmem_comm = comm_ptr->ch.shmem_comm;
1252 MPID_Comm_get_ptr(shmem_comm, shmem_commptr);
1253 local_rank = shmem_commptr->rank;
1254 local_size = shmem_commptr->local_size;
1258 nbytes = recvcnt * recvtype_size;
1261 nbytes = sendcnt * sendtype_size;
1264 /*Pack data for non-contiguous buffer*/
1265 /* if ((!sendtype_iscontig) && (sendbuf != MPI_IN_PLACE)) {
1267 MPIR_Pack_size_impl(1, sendtype, &sendtype_size);
1268 send_nbytes= sendcnt * sendtype_size;
1269 MPIU_CHKLMEM_MALLOC(local_sendbuf, void *, send_nbytes, mpi_errno, "local_sendbuf");
1270 MPIR_Pack_impl(sendbuf, sendcnt, sendtype, local_sendbuf, send_nbytes, &position);
1273 local_sendbuf = (void *)sendbuf;
1274 send_nbytes = nbytes;
1277 if (local_rank == 0) {
1278 /* Node leader, allocate tmp_buffer */
1280 tmp_buf = malloc(recvcnt * MPIR_MAX(recvtype_extent,
1281 recvtype_true_extent) * local_size);
1283 tmp_buf = malloc( send_nbytes * local_size);
1285 if (tmp_buf == NULL) {
1286 mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
1287 MPIR_ERR_RECOVERABLE,
1288 FCNAME, __LINE__, MPI_ERR_OTHER,
1293 /*Local copy of buffer*/
1294 if(sendbuf != MPI_IN_PLACE) {
1295 memcpy(tmp_buf, local_sendbuf, send_nbytes);
1297 MPIR_Localcopy(((char *) recvbuf +rank * recvcnt * recvtype_extent),
1299 tmp_buf, send_nbytes, MPI_BYTE);
1303 if(local_rank !=0 && sendbuf == MPI_IN_PLACE) {
1304 mpi_errno = MPIR_Limic_Gather_OSU(tmp_buf, (local_size * send_nbytes),
1305 (recvbuf + (rank*nbytes)),
1306 nbytes, shmem_commptr );
1308 mpi_errno = MPIR_Limic_Gather_OSU(tmp_buf, (local_size * send_nbytes),
1309 local_sendbuf, nbytes,
1314 MPIU_ERR_POP(mpi_errno);
1318 MPIU_CHKLMEM_FREEALL();
1323 #define FUNCNAME MPIR_Intra_node_LIMIC_Gather_MV2
1325 #define FCNAME MPIU_QUOTE(FUNCNAME)
1326 int MPIR_Intra_node_LIMIC_Gather_MV2(
1327 const void *sendbuf,int sendcnt, MPI_Datatype sendtype,
1328 void *recvbuf, int recvcnt,MPI_Datatype recvtype,
1329 int root, MPI_Comm comm, int *errflag)
1331 int mpi_errno = MPI_SUCCESS;
1332 MPI_Comm shmem_comm;
1333 MPID_Comm *shmem_commptr;
1335 /* extract the rank,size information for the intra-node
1337 shmem_comm = comm_ptr->ch.shmem_comm;
1338 MPID_Comm_get_ptr(shmem_comm, shmem_commptr);
1340 /*This case uses the PT-PT scheme with binomial
1342 if((shmem_commptr->ch.use_intra_sock_comm == 1)
1343 && (num_scheme == USE_GATHER_PT_PT_BINOMIAL)) {
1345 mpi_errno = MPIR_Limic_Gather_Scheme_PT_PT(sendbuf, sendcnt, sendtype,
1346 recvbuf, recvcnt, recvtype,
1351 MPIU_ERR_POP(mpi_errno);
1354 /*This case uses the PT-PT scheme with DIRECT
1356 else if((shmem_commptr->ch.use_intra_sock_comm == 1)
1357 && (num_scheme == USE_GATHER_PT_PT_DIRECT)) {
1359 mpi_errno = MPIR_Limic_Gather_Scheme_PT_PT(sendbuf, sendcnt, sendtype,
1360 recvbuf, recvcnt, recvtype,
1362 MPIR_Gather_MV2_Direct,
1365 MPIU_ERR_POP(mpi_errno);
1368 /*This case uses the PT-LINEAR scheme with binomial
1370 else if((shmem_commptr->ch.use_intra_sock_comm == 1)
1371 && (num_scheme == USE_GATHER_PT_LINEAR_BINOMIAL)) {
1373 mpi_errno = MPIR_Limic_Gather_Scheme_PT_LINEAR(sendbuf, sendcnt, sendtype,
1374 recvbuf, recvcnt, recvtype,
1379 MPIU_ERR_POP(mpi_errno);
1383 /*This case uses the PT-LINEAR scheme with DIRECT
1385 else if((shmem_commptr->ch.use_intra_sock_comm == 1)
1386 && (num_scheme == USE_GATHER_PT_LINEAR_DIRECT)) {
1388 mpi_errno = MPIR_Limic_Gather_Scheme_PT_LINEAR(sendbuf, sendcnt, sendtype,
1389 recvbuf, recvcnt, recvtype,
1391 MPIR_Gather_MV2_Direct,
1394 MPIU_ERR_POP(mpi_errno);
1398 /*This case uses the LINEAR-PT scheme with binomial
1400 else if((shmem_commptr->ch.use_intra_sock_comm == 1)
1401 && (num_scheme == USE_GATHER_LINEAR_PT_BINOMIAL)) {
1403 mpi_errno = MPIR_Limic_Gather_Scheme_LINEAR_PT(sendbuf, sendcnt, sendtype,
1404 recvbuf, recvcnt, recvtype,
1409 MPIU_ERR_POP(mpi_errno);
1413 /*This case uses the LINEAR-PT scheme with DIRECT
1415 else if((shmem_commptr->ch.use_intra_sock_comm == 1)
1416 && (num_scheme == USE_GATHER_LINEAR_PT_DIRECT)) {
1418 mpi_errno = MPIR_Limic_Gather_Scheme_LINEAR_PT(sendbuf, sendcnt, sendtype,
1419 recvbuf, recvcnt, recvtype,
1421 MPIR_Gather_MV2_Direct,
1424 MPIU_ERR_POP(mpi_errno);
1427 } else if((shmem_commptr->ch.use_intra_sock_comm == 1)
1428 && (num_scheme == USE_GATHER_LINEAR_LINEAR)) {
1430 mpi_errno = MPIR_Limic_Gather_Scheme_LINEAR_LINEAR(sendbuf, sendcnt, sendtype,
1431 recvbuf, recvcnt, recvtype,
1435 MPIU_ERR_POP(mpi_errno);
1438 } else if(((comm_ptr->ch.shmem_coll_ok == 1) ||
1439 (shmem_commptr->ch.use_intra_sock_comm == 1))
1440 && (num_scheme == USE_GATHER_SINGLE_LEADER)) {
1442 mpi_errno = MPIR_Limic_Gather_Scheme_SINGLE_LEADER(sendbuf, sendcnt, sendtype,
1443 recvbuf, recvcnt, recvtype,
1447 MPIU_ERR_POP(mpi_errno);
1450 /*This is a invalid case, if we are in LIMIC Gather
1451 * the code flow should be in one of the if-else case*/
1452 mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
1453 MPIR_ERR_RECOVERABLE,
1454 FCNAME, __LINE__, MPI_ERR_OTHER,
1463 #endif /*#if defined(_SMP_LIMIC_) */