src/smpi/smpi_mpi_dt.c
src/smpi/smpi_pmpi.c
src/smpi/smpi_replay.c
+ src/smpi/colls/colls_global.c
#src/smpi/colls/allgather-2dmesh.c
#src/smpi/colls/allgather-3dmesh.c
#src/smpi/colls/allgather-bruck.c
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
int failure = 1;
int tag = 1;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
- MPI_Type_extent(send_type, &extent);
+ extent = smpi_datatype_get_extent(send_type);
block_size = extent * send_count;
MPIC_Send(send_buff, send_count, send_type, dst, tag, comm);
}
- MPI_Waitall(Y - 1, req, MPI_STATUSES_IGNORE);
+ smpi_mpi_waitall(Y - 1, req, MPI_STATUSES_IGNORE);
req_ptr = req;
comm);
}
- MPI_Waitall(X - 1, req, MPI_STATUSES_IGNORE);
+ smpi_mpi_waitall(X - 1, req, MPI_STATUSES_IGNORE);
free(req);
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
int failure = 1;
int tag = 1;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(send_type, &extent);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ extent = smpi_datatype_get_extent(send_type);
is_3dmesh(num_procs, &X, &Y, &Z);
block_size = extent * send_count;
- req = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
+ req = (MPI_Request *) xbt_malloc(num_reqs * sizeof(MPI_Request));
if (!req) {
printf("allgather-3dmesh-shoot.c:85: cannot allocate memory\n");
MPI_Finalize();
MPIC_Send(send_buff, send_count, send_type, dst, tag, comm);
}
- MPI_Waitall(Y - 1, req, MPI_STATUSES_IGNORE);
+ smpi_mpi_waitall(Y - 1, req, MPI_STATUSES_IGNORE);
req_ptr = req;
// do colwise comm, it does not matter here if i*X or i *Y since X == Y
comm);
}
- MPI_Waitall(X - 1, req, MPI_STATUSES_IGNORE);
+ smpi_mpi_waitall(X - 1, req, MPI_STATUSES_IGNORE);
req_ptr = req;
for (i = 1; i < Z; i++) {
MPIC_Send((char *)recv_buff + send_offset, send_count * two_dsize, send_type,
dst, tag, comm);
}
- MPI_Waitall(Z - 1, req, MPI_STATUSES_IGNORE);
+ smpi_mpi_waitall(Z - 1, req, MPI_STATUSES_IGNORE);
free(req);
-#include "colls.h"
+#include "colls_private.h"
// Allgather - gather/bcast algorithm
int smpi_coll_tuned_allgather_GB(void *send_buff, int send_count,
MPI_Comm comm)
{
int num_procs;
- MPI_Comm_size(comm, &num_procs);
- MPI_Gather(send_buff, send_count, send_type, recv_buff, recv_count, recv_type,
+ num_procs = smpi_comm_size(comm);
+ smpi_mpi_gather(send_buff, send_count, send_type, recv_buff, recv_count, recv_type,
0, comm);
- MPI_Bcast(recv_buff, (recv_count * num_procs), recv_type, 0, comm);
+ mpi_coll_bcast_fun(recv_buff, (recv_count * num_procs), recv_type, 0, comm);
return MPI_SUCCESS;
}
-#include "colls.h"
+#include "colls_private.h"
// Allgather-Non-Topoloty-Scecific-Logical-Ring algorithm
int
int send_offset, recv_offset;
int tag = 500;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &size);
- MPI_Type_extent(rtype, &rextent);
- MPI_Type_extent(stype, &sextent);
+ rank = smpi_comm_rank(comm);
+ size = smpi_comm_size(comm);
+ rextent = smpi_datatype_get_extent(rtype);
+ sextent = smpi_datatype_get_extent(stype);
MPI_Request *rrequest_array;
MPI_Request *srequest_array;
- rrequest_array = (MPI_Request *) malloc(size * sizeof(MPI_Request));
- srequest_array = (MPI_Request *) malloc(size * sizeof(MPI_Request));
+ rrequest_array = (MPI_Request *) xbt_malloc(size * sizeof(MPI_Request));
+ srequest_array = (MPI_Request *) xbt_malloc(size * sizeof(MPI_Request));
// irregular case use default MPI fucntions
- if (scount * sextent != rcount * rextent)
- MPI_Allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
+ if (scount * sextent != rcount * rextent) {
+ XBT_WARN("MPI_allgather_NTSLR_NB use default MPI_allgather.");
+ smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
+ return MPI_SUCCESS;
+ }
// topo non-specific
to = (rank + 1) % size;
//copy a single segment from sbuf to rbuf
send_offset = rank * scount * sextent;
- MPI_Sendrecv(sbuf, scount, stype, rank, tag,
+ smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag,
(char *)rbuf + send_offset, rcount, rtype, rank, tag, comm, &status);
//post all irecv first
for (i = 0; i < size - 1; i++) {
recv_offset = ((rank - i - 1 + size) % size) * increment;
- MPI_Irecv((char *)rbuf + recv_offset, rcount, rtype, from, tag + i, comm,
- &rrequest_array[i]);
+ rrequest_array[i] = smpi_mpi_irecv((char *)rbuf + recv_offset, rcount, rtype, from, tag + i, comm);
}
for (i = 0; i < size - 1; i++) {
send_offset = ((rank - i + size) % size) * increment;
- MPI_Isend((char *)rbuf + send_offset, scount, stype, to, tag + i, comm,
- &srequest_array[i]);
- MPI_Wait(&rrequest_array[i], &status);
- MPI_Wait(&srequest_array[i], &status2);
+ srequest_array[i] = smpi_mpi_isend((char *)rbuf + send_offset, scount, stype, to, tag + i, comm);
+ smpi_mpi_wait(&rrequest_array[i], &status);
+ smpi_mpi_wait(&srequest_array[i], &status2);
}
free(rrequest_array);
-#include "colls.h"
+#include "colls_private.h"
// Allgather-Non-Topoloty-Scecific-Logical-Ring algorithm
int
int send_offset, recv_offset;
int tag = 500;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &size);
- MPI_Type_extent(rtype, &rextent);
- MPI_Type_extent(stype, &sextent);
+ rank = smpi_comm_rank(comm);
+ size = smpi_comm_size(comm);
+ rextent = smpi_datatype_get_extent(rtype);
+ sextent = smpi_datatype_get_extent(stype);
// irregular case use default MPI fucntions
- if (scount * sextent != rcount * rextent)
- MPI_Allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
+ if (scount * sextent != rcount * rextent) {
+ XBT_WARN("MPI_allgather_NTSLR use default MPI_allgather.");
+ smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
+ return MPI_SUCCESS;
+ }
// topo non-specific
to = (rank + 1) % size;
//copy a single segment from sbuf to rbuf
send_offset = rank * scount * sextent;
- MPI_Sendrecv(sbuf, scount, stype, rank, tag,
+ smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag,
(char *)rbuf + send_offset, rcount, rtype, rank, tag,
comm, &status);
for (i = 0; i < size - 1; i++) {
send_offset = ((rank - i + size) % size) * increment;
recv_offset = ((rank - i - 1 + size) % size) * increment;
- MPI_Sendrecv((char *) rbuf + send_offset, scount, stype, to, tag + i,
+ smpi_mpi_sendrecv((char *) rbuf + send_offset, scount, stype, to, tag + i,
(char *) rbuf + recv_offset, rcount, rtype, from, tag + i,
comm, &status);
}
-#include "colls.h"
+#include "colls_private.h"
#ifndef NUM_CORE
#define NUM_CORE 8
#endif
MPI_Comm comm)
{
int src, dst, comm_size, rank;
- MPI_Comm_size(comm, &comm_size);
- MPI_Comm_rank(comm, &rank);
+ comm_size = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
MPI_Aint rextent, sextent;
- MPI_Type_extent(rtype, &rextent);
- MPI_Type_extent(stype, &sextent);
+ rextent = smpi_datatype_get_extent(rtype);
+ sextent = smpi_datatype_get_extent(stype);
int tag = 50;
MPI_Request request;
MPI_Request rrequest_array[128];
/* for too small number of processes, use default implementation */
if (comm_size <= NUM_CORE) {
- return MPI_Allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
+ XBT_WARN("MPI_allgather_SMP_NTS use default MPI_allgather.");
+ smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
+ return MPI_SUCCESS;
}
+
// the last SMP node may have fewer number of running processes than all others
if (inter_rank == (inter_comm_size - 1)) {
num_core_in_current_smp = comm_size - (inter_rank * NUM_CORE);
}
//copy corresponding message from sbuf to rbuf
recv_offset = rank * rextent * rcount;
- MPI_Sendrecv(sbuf, scount, stype, rank, tag,
+ smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag,
((char *) rbuf + recv_offset), rcount, rtype, rank, tag, comm,
&status);
(num_core_in_current_smp);
recv_offset = src * rextent * rcount;
- MPI_Sendrecv(sbuf, scount, stype, dst, tag,
+ smpi_mpi_sendrecv(sbuf, scount, stype, dst, tag,
((char *) rbuf + recv_offset), rcount, rtype, src, tag, comm,
&status);
recv_offset =
((inter_rank - i - 1 +
inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
- MPI_Irecv((char *) rbuf + recv_offset, rcount * NUM_CORE, rtype, src,
- tag + i, comm, &rrequest_array[i]);
+ rrequest_array[i] = smpi_mpi_irecv((char *)rbuf+recv_offset, rcount * NUM_CORE, rtype, src, tag+i, comm);
}
// send first message
send_offset =
((inter_rank +
inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
- MPI_Isend((char *) rbuf + send_offset, scount * NUM_CORE, stype, dst, tag,
- comm, &srequest_array[0]);
+ srequest_array[0] = smpi_mpi_isend((char *) rbuf + send_offset, scount * NUM_CORE, stype, dst, tag,
+ comm);
// loop : recv-inter , send-inter, send-intra (linear-bcast)
for (i = 0; i < inter_comm_size - 2; i++) {
recv_offset =
((inter_rank - i - 1 +
inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
- MPI_Wait(&rrequest_array[i], &status);
- MPI_Isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype, dst,
- tag + i + 1, comm, &srequest_array[i + 1]);
+ smpi_mpi_wait(&rrequest_array[i], &status);
+ srequest_array[i + 1] = smpi_mpi_isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype, dst,
+ tag + i + 1, comm);
if (num_core_in_current_smp > 1) {
- MPI_Isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype,
- (rank + 1), tag + i + 1, comm, &request);
+ request = smpi_mpi_isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype,
+ (rank + 1), tag + i + 1, comm);
}
}
inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
//recv_offset = ((inter_rank + 1) % inter_comm_size) * NUM_CORE * sextent * scount;
//i=inter_comm_size-2;
- MPI_Wait(&rrequest_array[i], &status);
+ smpi_mpi_wait(&rrequest_array[i], &status);
if (num_core_in_current_smp > 1) {
- MPI_Isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype,
- (rank + 1), tag + i + 1, comm, &request);
+ request = smpi_mpi_isend((char *) rbuf + recv_offset, scount * NUM_CORE, stype,
+ (rank + 1), tag + i + 1, comm);
}
}
// last rank of each SMP
recv_offset =
((inter_rank - i - 1 +
inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
- MPI_Irecv((char *) rbuf + recv_offset, (rcount * NUM_CORE), rtype,
- rank - 1, tag + i + 1, comm, &request);
- MPI_Wait(&request, &status);
+ request = smpi_mpi_irecv((char *) rbuf + recv_offset, (rcount * NUM_CORE), rtype,
+ rank - 1, tag + i + 1, comm);
+ smpi_mpi_wait(&request, &status);
}
}
// intermediate rank of each SMP
recv_offset =
((inter_rank - i - 1 +
inter_comm_size) % inter_comm_size) * NUM_CORE * sextent * scount;
- MPI_Irecv((char *) rbuf + recv_offset, (rcount * NUM_CORE), rtype,
- rank - 1, tag + i + 1, comm, &request);
- MPI_Wait(&request, &status);
- MPI_Isend((char *) rbuf + recv_offset, (scount * NUM_CORE), stype,
- (rank + 1), tag + i + 1, comm, &request);
+ request = smpi_mpi_irecv((char *) rbuf + recv_offset, (rcount * NUM_CORE), rtype,
+ rank - 1, tag + i + 1, comm);
+ smpi_mpi_wait(&request, &status);
+ request = smpi_mpi_isend((char *) rbuf + recv_offset, (scount * NUM_CORE), stype,
+ (rank + 1), tag + i + 1, comm);
}
}
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
char *recv_ptr = (char *) recv_buff;
// get size of the communicator, followed by rank
- MPI_Comm_size(comm, &num_procs);
- MPI_Comm_rank(comm, &rank);
+ num_procs = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
// get size of single element's type for recv buffer
- MPI_Type_extent(recv_type, &recv_extent);
+ recv_extent = smpi_datatype_get_extent(recv_type);
count = recv_count;
- tmp_buff = (char *) malloc(num_procs * recv_count * recv_extent);
+ tmp_buff = (char *) xbt_malloc(num_procs * recv_count * recv_extent);
if (!tmp_buff) {
printf("allgather-bruck:54: cannot allocate memory\n");
MPI_Finalize();
-#include "colls.h"
+#include "colls_private.h"
#ifndef NUM_CORE
#define NUM_CORE 4
int intra_rank, inter_rank, inter_comm_size, intra_comm_size;
int inter_dst, inter_src;
- MPI_Comm_size(comm, &comm_size);
- MPI_Comm_rank(comm, &rank);
+ comm_size = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
MPI_Aint rextent, sextent;
- MPI_Type_extent(rtype, &rextent);
- MPI_Type_extent(stype, &sextent);
+ rextent = smpi_datatype_get_extent(rtype);
+ sextent = smpi_datatype_get_extent(stype);
MPI_Request inter_rrequest;
MPI_Request rrequest_array[128];
MPI_Request srequest_array[128];
//copy corresponding message from sbuf to rbuf
recv_offset = rank * rextent * rcount;
- MPI_Sendrecv(sbuf, scount, stype, rank, tag,
+ smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag,
(char *)rbuf + recv_offset, rcount, rtype, rank, tag, comm, &status);
int dst, src;
if (intra_rank == j) {
if (i != inter_comm_size - 1) {
- MPI_Irecv((char *)rbuf + inter_recv_offset, rcount, rtype, inter_src, tag,
- comm, &inter_rrequest);
- MPI_Isend((char *)rbuf + inter_send_offset, scount, stype, inter_dst, tag,
- comm, &inter_srequest_array[inter_srequest_count++]);
-
+ inter_rrequest = smpi_mpi_irecv((char *)rbuf + inter_recv_offset, rcount, rtype,
+ inter_src, tag, comm);
+ inter_srequest_array[inter_srequest_count++] = smpi_mpi_isend((char *)rbuf + inter_send_offset, scount, stype,
+ inter_dst, tag, comm);
}
}
//intra_communication
if (j != intra_rank) {
- MPI_Irecv((char *)rbuf + recv_offset, rcount, rtype, src, tag, comm,
- &rrequest_array[rrequest_count++]);
- MPI_Isend((char *)rbuf + send_offset, scount, stype, dst, tag, comm,
- &srequest_array[srequest_count++]);
+ rrequest_array[rrequest_count++] = smpi_mpi_irecv((char *)rbuf + recv_offset, rcount, rtype, src, tag, comm);
+ srequest_array[srequest_count++] = smpi_mpi_isend((char *)rbuf + send_offset, scount, stype, dst, tag, comm);
}
} // intra loop
// wait for inter communication to finish for these rounds (# of round equals NUM_CORE)
if (i != inter_comm_size - 1) {
- MPI_Wait(&inter_rrequest, &status);
+ smpi_mpi_wait(&inter_rrequest, &status);
}
} //inter loop
- MPI_Waitall(rrequest_count, rrequest_array, MPI_STATUSES_IGNORE);
- MPI_Waitall(srequest_count, srequest_array, MPI_STATUSES_IGNORE);
- MPI_Waitall(inter_srequest_count, inter_srequest_array, MPI_STATUSES_IGNORE);
+ smpi_mpi_waitall(rrequest_count, rrequest_array, MPI_STATUSES_IGNORE);
+ smpi_mpi_waitall(srequest_count, srequest_array, MPI_STATUSES_IGNORE);
+ smpi_mpi_waitall(inter_srequest_count, inter_srequest_array, MPI_STATUSES_IGNORE);
return MPI_SUCCESS;
}
-#include "colls.h"
+#include "colls_private.h"
// Allgather-Non-Topoloty-Scecific-Logical-Ring algorithm
int
int send_offset, recv_offset;
int tag = 500;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &size);
- MPI_Type_extent(rtype, &rextent);
- MPI_Type_extent(stype, &sextent);
+ rank = smpi_comm_rank(comm);
+ size = smpi_comm_size(comm);
+ rextent = smpi_datatype_get_extent(rtype);
+ sextent = smpi_datatype_get_extent(stype);
// irregular case use default MPI fucntions
- if (scount * sextent != rcount * rextent)
- MPI_Allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
+ if (scount * sextent != rcount * rextent) {
+ XBT_WARN("MPI_allgather_lr use default MPI_allgather.");
+ smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
+ return MPI_SUCCESS;
+ }
// topo non-specific
to = (rank + 1) % size;
//copy a single segment from sbuf to rbuf
send_offset = rank * scount * sextent;
- MPI_Sendrecv(sbuf, scount, stype, rank, tag,
+ smpi_mpi_sendrecv(sbuf, scount, stype, rank, tag,
(char *) rbuf + send_offset, rcount, rtype, rank, tag,
comm, &status);
for (i = 0; i < size - 1; i++) {
send_offset = ((rank - i + size) % size) * increment;
recv_offset = ((rank - i - 1 + size) % size) * increment;
- MPI_Sendrecv((char *) rbuf + send_offset, scount, stype, to, tag + i,
+ smpi_mpi_sendrecv((char *) rbuf + send_offset, scount, stype, to, tag + i,
(char *) rbuf + recv_offset, rcount, rtype, from, tag + i,
comm, &status);
}
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(send_type, &extent);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ extent = smpi_datatype_get_extent(send_type);
// local send/recv
- MPI_Sendrecv(send_ptr, send_count, send_type, rank, tag,
+ smpi_mpi_sendrecv(send_ptr, send_count, send_type, rank, tag,
recv_ptr + rank * recv_count * extent,
recv_count, recv_type, rank, tag, comm, &status);
for (i = 1; i < num_procs; i++) {
src = dst = rank ^ i;
- MPI_Sendrecv(send_ptr, send_count, send_type, dst, tag,
+ smpi_mpi_sendrecv(send_ptr, send_count, send_type, dst, tag,
recv_ptr + src * recv_count * extent, recv_count, recv_type,
src, tag, comm, &status);
}
-#include "colls.h"
+#include "colls_private.h"
int
smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count,
// local int variables
int i, j, k, dst, rank, num_procs, send_offset, recv_offset, tree_root;
- int dst_tree_root, rank_tree_root, last_recv_count, num_procs_completed;
+ int dst_tree_root, rank_tree_root, last_recv_count = 0, num_procs_completed;
int offset, tmp_mask;
int tag = 1;
int mask = 1;
char *recv_ptr = (char *) rbuf;
// get size of the communicator, followed by rank
- MPI_Comm_size(comm, &num_procs);
- MPI_Comm_rank(comm, &rank);
+ num_procs = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
// get size of single element's type for send buffer and recv buffer
- MPI_Type_extent(send_type, &send_chunk);
- MPI_Type_extent(recv_type, &recv_chunk);
+ send_chunk = smpi_datatype_get_extent(send_type);
+ recv_chunk = smpi_datatype_get_extent(recv_type);
// multiply size of each element by number of elements to send or recv
send_chunk *= send_count;
recv_chunk *= recv_count;
// perform a local copy
- MPI_Sendrecv(send_ptr, send_count, send_type, rank, tag,
+ smpi_mpi_sendrecv(send_ptr, send_count, send_type, rank, tag,
recv_ptr + rank * recv_chunk, recv_count, recv_type, rank, tag,
comm, &status);
recv_offset = dst_tree_root * recv_chunk;
if (dst < num_procs) {
- MPI_Sendrecv(recv_ptr + send_offset, curr_count, send_type, dst,
+ smpi_mpi_sendrecv(recv_ptr + send_offset, curr_count, send_type, dst,
tag, recv_ptr + recv_offset, mask * recv_count,
recv_type, dst, tag, comm, &status);
- MPI_Get_count(&status, recv_type, &last_recv_count);
+ last_recv_count = smpi_mpi_get_count(&status, recv_type);
curr_count += last_recv_count;
}
if ((dst > rank)
&& (rank < tree_root + num_procs_completed)
&& (dst >= tree_root + num_procs_completed)) {
- MPI_Send(recv_ptr + offset, last_recv_count, recv_type, dst,
+ smpi_mpi_send(recv_ptr + offset, last_recv_count, recv_type, dst,
tag, comm);
/* last_recv_cnt was set in the previous
else if ((dst < rank)
&& (dst < tree_root + num_procs_completed)
&& (rank >= tree_root + num_procs_completed)) {
- MPI_Recv(recv_ptr + offset,
+ smpi_mpi_recv(recv_ptr + offset,
recv_count * num_procs_completed,
recv_type, dst, tag, comm, &status);
// num_procs_completed is also equal to the no. of processes
// whose data we don't have
- MPI_Get_count(&status, recv_type, &last_recv_count);
+ last_recv_count = smpi_mpi_get_count(&status, recv_type);
curr_count += last_recv_count;
}
tmp_mask >>= 1;
-#include "colls.h"
+#include "colls_private.h"
// now only work with power of two processes
int curr_count;
// get size of the communicator, followed by rank
- MPI_Comm_size(comm, &num_procs);
- MPI_Comm_rank(comm, &rank);
+ num_procs = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
// get size of single element's type for send buffer and recv buffer
- MPI_Type_extent(send_type, &s_extent);
- MPI_Type_extent(recv_type, &r_extent);
+ s_extent = smpi_datatype_get_extent(send_type);
+ r_extent = smpi_datatype_get_extent(recv_type);
// multiply size of each element by number of elements to send or recv
send_chunk = s_extent * send_count;
recv_chunk = r_extent * recv_count;
- if (send_chunk != recv_chunk)
- return MPI_Allgather(sbuf, send_count, send_type, rbuf, recv_count,
- recv_type, comm);
+ if (send_chunk != recv_chunk) {
+ XBT_WARN("MPI_allgather_rhv use default MPI_allgather.");
+ smpi_mpi_allgather(sbuf, send_count, send_type, rbuf, recv_count,
+ recv_type, comm);
+ return MPI_SUCCESS;
+ }
// compute starting offset location to perform local copy
int size = num_procs / 2;
//perform a remote copy
dst = base_offset;
- MPI_Sendrecv(sbuf, send_count, send_type, dst, tag,
+ smpi_mpi_sendrecv(sbuf, send_count, send_type, dst, tag,
(char *)rbuf + base_offset * recv_chunk, recv_count, recv_type, dst, tag,
comm, &status);
// printf("node %d send to %d in phase %d s_offset = %d r_offset = %d count = %d\n",rank,dst,phase, send_base_offset, recv_base_offset, curr_count);
- MPI_Sendrecv((char *)rbuf + send_offset, curr_count, recv_type, dst, tag,
+ smpi_mpi_sendrecv((char *)rbuf + send_offset, curr_count, recv_type, dst, tag,
(char *)rbuf + recv_offset, curr_count, recv_type, dst, tag,
comm, &status);
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
char *sendptr = (char *) send_buff;
char *recvptr = (char *) recv_buff;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(send_type, &extent);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ extent = smpi_datatype_get_extent(send_type);
// local send/recv
- MPI_Sendrecv(sendptr, send_count, send_type, rank, tag,
+ smpi_mpi_sendrecv(sendptr, send_count, send_type, rank, tag,
recvptr + rank * recv_count * extent,
recv_count, recv_type, rank, tag, comm, &status);
for (i = 1; i < num_procs; i++) {
src = (rank - i + num_procs) % num_procs;
dst = (rank + i) % num_procs;
- MPI_Sendrecv(sendptr, send_count, send_type, dst, tag,
+ smpi_mpi_sendrecv(sendptr, send_count, send_type, dst, tag,
recvptr + src * recv_count * extent, recv_count, recv_type,
src, tag, comm, &status);
}
-#include "colls.h"
+#include "colls_private.h"
#ifndef NUM_CORE
#define NUM_CORE 8
#endif
MPI_Comm comm)
{
int src, dst, comm_size, rank;
- MPI_Comm_size(comm, &comm_size);
- MPI_Comm_rank(comm, &rank);
+ comm_size = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
MPI_Aint rextent, sextent;
- MPI_Type_extent(rtype, &rextent);
- MPI_Type_extent(stype, &sextent);
+ rextent = smpi_datatype_get_extent(rtype);
+ sextent = smpi_datatype_get_extent(stype);
int tag = 50;
MPI_Status status;
int i, send_offset, recv_offset;
}
//INTRA-SMP-ALLGATHER
recv_offset = rank * rextent * rcount;
- MPI_Sendrecv(send_buf, scount, stype, rank, tag,
+ smpi_mpi_sendrecv(send_buf, scount, stype, rank, tag,
((char *) recv_buf + recv_offset), rcount, rtype, rank, tag,
comm, &status);
for (i = 1; i < num_core_in_current_smp; i++) {
(num_core_in_current_smp);
recv_offset = src * rextent * rcount;
- MPI_Sendrecv(send_buf, scount, stype, dst, tag,
+ smpi_mpi_sendrecv(send_buf, scount, stype, dst, tag,
((char *) recv_buf + recv_offset), rcount, rtype, src, tag,
comm, &status);
if (intra_rank == 0) {
MPI_Request *reqs, *req_ptr;
int num_req = (inter_comm_size - 1) * 2;
- reqs = (MPI_Request *) malloc(num_req * sizeof(MPI_Request));
+ reqs = (MPI_Request *) xbt_malloc(num_req * sizeof(MPI_Request));
req_ptr = reqs;
MPI_Status *stat;
- stat = (MPI_Status *) malloc(num_req * sizeof(MPI_Status));
+ stat = (MPI_Status *) xbt_malloc(num_req * sizeof(MPI_Status));
for (i = 1; i < inter_comm_size; i++) {
src = ((inter_rank - i + inter_comm_size) % inter_comm_size) * num_core;
//send_offset = (rank * sextent * scount);
recv_offset = (src * sextent * scount);
- // MPI_Sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag,
+ // smpi_mpi_sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag,
// (recv_buf+recv_offset), (rcount * num_core), rtype, src, tag, comm, &status);
//MPIC_Isend((recv_buf+send_offset), (scount * num_core), stype, dst, tag, comm, req_ptr++);
- MPI_Irecv(((char *) recv_buf + recv_offset), (rcount * num_core), rtype,
- src, tag, comm, req_ptr++);
+ *(req_ptr++) = smpi_mpi_irecv(((char *) recv_buf + recv_offset), (rcount * num_core), rtype,
+ src, tag, comm);
}
for (i = 1; i < inter_comm_size; i++) {
//src = ((inter_rank-i+inter_comm_size)%inter_comm_size) * num_core;
send_offset = (rank * sextent * scount);
//recv_offset = (src * sextent * scount);
- // MPI_Sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag,
+ // smpi_mpi_sendrecv((recv_buf+send_offset), (scount * num_core), stype, dst, tag,
// (recv_buf+recv_offset), (rcount * num_core), rtype, src, tag, comm, &status);
- MPI_Isend(((char *) recv_buf + send_offset), (scount * num_core), stype,
- dst, tag, comm, req_ptr++);
+ *(req_ptr++) = smpi_mpi_isend(((char *) recv_buf + send_offset), (scount * num_core), stype,
+ dst, tag, comm);
//MPIC_Irecv((recv_buf+recv_offset), (rcount * num_core), rtype, src, tag, comm, req_ptr++);
}
- MPI_Waitall(num_req, reqs, stat);
+ smpi_mpi_waitall(num_req, reqs, stat);
free(reqs);
free(stat);
if (intra_rank == 0) {
for (i = 1; i < num_core_in_current_smp; i++) {
//printf("rank = %d, num = %d send to %d\n",rank, num_core_in_current_smp, (rank + i));
- MPI_Send(recv_buf, (scount * comm_size), stype, (rank + i), tag, comm);
+ smpi_mpi_send(recv_buf, (scount * comm_size), stype, (rank + i), tag, comm);
}
} else {
//printf("rank = %d recv from %d\n",rank, (inter_rank * num_core));
- MPI_Recv(recv_buf, (rcount * comm_size), rtype, (inter_rank * num_core),
+ smpi_mpi_recv(recv_buf, (rcount * comm_size), rtype, (inter_rank * num_core),
tag, comm, &status);
}
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
MPI_Status status;
char *recv_ptr = (char *) recv_buff;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(send_type, &extent);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ extent = smpi_datatype_get_extent(send_type);
num_reqs = (2 * num_procs) - 2;
- reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
+ reqs = (MPI_Request *) xbt_malloc(num_reqs * sizeof(MPI_Request));
if (!reqs) {
printf("allgather-spreading-simple.c:40: cannot allocate memory\n");
MPI_Finalize();
}
req_ptr = reqs;
- MPI_Sendrecv(send_buff, send_count, send_type, rank, tag,
+ smpi_mpi_sendrecv(send_buff, send_count, send_type, rank, tag,
(char *) recv_buff + rank * recv_count * extent, recv_count,
recv_type, rank, tag, comm, &status);
src = (rank + i) % num_procs;
if (src == rank)
continue;
- MPI_Irecv(recv_ptr + src * recv_count * extent, recv_count, recv_type,
- src, tag, comm, req_ptr++);
+ *(req_ptr++) = smpi_mpi_irecv(recv_ptr + src * recv_count * extent, recv_count, recv_type,
+ src, tag, comm);
}
for (i = 0; i < num_procs; i++) {
dst = (rank + i) % num_procs;
if (dst == rank)
continue;
- MPI_Isend(send_buff, send_count, send_type, dst, tag, comm, req_ptr++);
+ *(req_ptr++) = smpi_mpi_isend(send_buff, send_count, send_type, dst, tag, comm);
}
- MPI_Waitall(num_reqs, reqs, MPI_STATUSES_IGNORE);
+ smpi_mpi_waitall(num_reqs, reqs, MPI_STATUSES_IGNORE);
free(reqs);
return MPI_SUCCESS;
-#include "colls.h"
+#include "colls_private.h"
/* IMPLEMENTED BY PITCH PATARASUK
Non-topoloty-specific all-reduce operation designed bandwidth optimally */
int send_offset, recv_offset;
int remainder, remainder_flag, remainder_offset;
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- MPI_Comm_size(MPI_COMM_WORLD, &size);
+ rank = smpi_comm_rank(MPI_COMM_WORLD);
+ size = smpi_comm_size(MPI_COMM_WORLD);
/* make it compatible with all data type */
MPI_Aint extent;
- MPI_Type_extent(dtype, &extent);
+ extent = smpi_datatype_get_extent(dtype);
/* when communication size is smaller than number of process (not support) */
if (rcount < size) {
- return MPI_Allreduce(sbuf, rbuf, rcount, dtype, op, comm);
+ return mpi_coll_allreduce_fun(sbuf, rbuf, rcount, dtype, op, comm);
}
/* when communication size is not divisible by number of process:
// copy partial data
send_offset = ((rank - 1 + size) % size) * count * extent;
recv_offset = ((rank - 1 + size) % size) * count * extent;
- MPI_Sendrecv((char *) sbuf + send_offset, count, dtype, rank, tag - 1,
+ smpi_mpi_sendrecv((char *) sbuf + send_offset, count, dtype, rank, tag - 1,
(char *) rbuf + recv_offset, count, dtype, rank, tag - 1, comm,
&status);
for (i = 0; i < (size - 1); i++) {
send_offset = ((rank - 1 - i + size) % size) * count * extent;
recv_offset = ((rank - 2 - i + size) % size) * count * extent;
- MPI_Sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size),
+ smpi_mpi_sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size),
tag + i, (char *) rbuf + recv_offset, count, dtype,
((rank + size - 1) % size), tag + i, comm, &status);
for (i = 0; i < (size - 1); i++) {
send_offset = ((rank - i + size) % size) * count * extent;
recv_offset = ((rank - 1 - i + size) % size) * count * extent;
- MPI_Sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size),
+ smpi_mpi_sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size),
tag + i, (char *) rbuf + recv_offset, count, dtype,
((rank + size - 1) % size), tag + i, comm, &status);
}
/* when communication size is not divisible by number of process:
call the native implementation for the remain chunk at the end of the operation */
if (remainder_flag) {
- return MPI_Allreduce((char *) sbuf + remainder_offset,
+ XBT_WARN("MPI_allreduce_NTS use default MPI_allreduce.");
+ smpi_mpi_allreduce((char *) sbuf + remainder_offset,
(char *) rbuf + remainder_offset, remainder, dtype, op,
comm);
+ return MPI_SUCCESS;
}
return MPI_SUCCESS;
-#include "colls.h"
+#include "colls_private.h"
/* IMPLEMENTED BY PITCH PATARASUK
Non-topoloty-specific all-reduce operation designed bandwidth optimally
int send_offset, recv_offset;
int remainder, remainder_flag, remainder_offset;
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- MPI_Comm_size(MPI_COMM_WORLD, &size);
+ rank = smpi_comm_rank(MPI_COMM_WORLD);
+ size = smpi_comm_size(MPI_COMM_WORLD);
/* make it compatible with all data type */
MPI_Aint extent;
- MPI_Type_extent(dtype, &extent);
+ extent = smpi_datatype_get_extent(dtype);
/* when communication size is smaller than number of process (not support) */
if (rcount < size) {
- return MPI_Allreduce(sbuf, rbuf, rcount, dtype, op, comm);
+ XBT_WARN("MPI_allreduce_lr use default MPI_allreduce.");
+ smpi_mpi_allreduce(sbuf, rbuf, rcount, dtype, op, comm);
+ return MPI_SUCCESS;
}
/* when communication size is not divisible by number of process:
// copy partial data
send_offset = ((rank - 1 + size) % size) * count * extent;
recv_offset = ((rank - 1 + size) % size) * count * extent;
- MPI_Sendrecv((char *) sbuf + send_offset, count, dtype, rank, tag - 1,
+ smpi_mpi_sendrecv((char *) sbuf + send_offset, count, dtype, rank, tag - 1,
(char *) rbuf + recv_offset, count, dtype, rank, tag - 1, comm,
&status);
send_offset = ((rank - 1 - i + 2 * size) % size) * count * extent;
recv_offset = ((rank - 2 - i + 2 * size) % size) * count * extent;
// recv_offset = ((rank-i+2*size)%size)*count*extent;
- MPI_Sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size),
+ smpi_mpi_sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size),
tag + i, (char *) rbuf + recv_offset, count, dtype,
((rank + size - 1) % size), tag + i, comm, &status);
for (i = 0; i < (size - 1); i++) {
send_offset = ((rank - i + 2 * size) % size) * count * extent;
recv_offset = ((rank - 1 - i + 2 * size) % size) * count * extent;
- MPI_Sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size),
+ smpi_mpi_sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size),
tag + i, (char *) rbuf + recv_offset, count, dtype,
((rank + size - 1) % size), tag + i, comm, &status);
}
/* when communication size is not divisible by number of process:
call the native implementation for the remain chunk at the end of the operation */
if (remainder_flag) {
- return MPI_Allreduce((char *) sbuf + remainder_offset,
+ return mpi_coll_allreduce_fun((char *) sbuf + remainder_offset,
(char *) rbuf + remainder_offset, remainder, dtype, op,
comm);
}
-#include "colls.h"
+#include "colls_private.h"
int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count,
MPI_Datatype dtype, MPI_Op op,
uop = op_ptr->op;
#endif
- MPI_Comm_size(comm, &nprocs);
- MPI_Comm_rank(comm, &rank);
+ nprocs = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
- MPI_Type_extent(dtype, &extent);
+ extent = smpi_datatype_get_extent(dtype);
tmp_buf = (void *) xbt_malloc(count * extent);
MPIR_Localcopy(sbuff, count, dtype, rbuff, count, dtype);
- MPI_Type_size(dtype, &type_size);
+ type_size = smpi_datatype_size(dtype);
// find nearest power-of-two less than or equal to comm_size
pof2 = 1;
// even
if (rank % 2 == 0) {
- MPI_Send(rbuff, count, dtype, rank + 1, tag, comm);
+ smpi_mpi_send(rbuff, count, dtype, rank + 1, tag, comm);
// temporarily set the rank to -1 so that this
// process does not pariticipate in recursive
newrank = -1;
} else // odd
{
- MPI_Recv(tmp_buf, count, dtype, rank - 1, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, count, dtype, rank - 1, tag, comm, &status);
// do the reduction on received data. since the
// ordering is right, it doesn't matter whether
// the operation is commutative or not.
// reduce-scatter, calculate the count that each process receives
// and the displacement within the buffer
- cnts = (int *) malloc(pof2 * sizeof(int));
- disps = (int *) malloc(pof2 * sizeof(int));
+ cnts = (int *) xbt_malloc(pof2 * sizeof(int));
+ disps = (int *) xbt_malloc(pof2 * sizeof(int));
for (i = 0; i < (pof2 - 1); i++)
cnts[i] = count / pof2;
}
// Send data from recvbuf. Recv into tmp_buf
- MPI_Sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt,
+ smpi_mpi_sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt,
dtype, dst, tag,
(char *) tmp_buf + disps[recv_idx] * extent, recv_cnt,
dtype, dst, tag, comm, &status);
recv_cnt += cnts[i];
}
- MPI_Sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt,
+ smpi_mpi_sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt,
dtype, dst, tag,
(char *) rbuff + disps[recv_idx] * extent, recv_cnt,
dtype, dst, tag, comm, &status);
if (rank < 2 * rem) {
if (rank % 2) // odd
- MPI_Send(rbuff, count, dtype, rank - 1, tag, comm);
+ smpi_mpi_send(rbuff, count, dtype, rank - 1, tag, comm);
else // even
- MPI_Recv(rbuff, count, dtype, rank + 1, tag, comm, &status);
+ smpi_mpi_recv(rbuff, count, dtype, rank + 1, tag, comm, &status);
}
free(tmp_buf);
-#include "colls.h"
+#include "colls_private.h"
#ifndef REDUCE_STUFF
#define REDUCE_STUFF
/*****************************************************************************
MPI_Status status;
void *tmp_buf = NULL;
MPI_User_function *func = get_op_func(op);
- MPI_Comm_size(comm, &nprocs);
- MPI_Comm_rank(comm, &rank);
+ nprocs = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
- MPI_Type_extent(dtype, &extent);
+ extent = smpi_datatype_get_extent(dtype);
tmp_buf = (void *) xbt_malloc(count * extent);
MPIR_Localcopy(sbuff, count, dtype, rbuff, count, dtype);
- MPI_Type_size(dtype, &type_size);
+ type_size = smpi_datatype_size(dtype);
// find nearest power-of-two less than or equal to comm_size
pof2 = 1;
// reduce-scatter, calculate the count that each process receives
// and the displacement within the buffer
- cnts = (int *) malloc(pof2 * sizeof(int));
- disps = (int *) malloc(pof2 * sizeof(int));
+ cnts = (int *) xbt_malloc(pof2 * sizeof(int));
+ disps = (int *) xbt_malloc(pof2 * sizeof(int));
for (i = 0; i < (pof2 - 1); i++)
cnts[i] = count / pof2;
-#include "colls.h"
+#include "colls_private.h"
//#include <star-reduction.c>
int smpi_coll_tuned_allreduce_rab_rsag(void *sbuff, void *rbuff, int count,
MPI_Aint extent;
MPI_Status status;
void *tmp_buf = NULL;
- MPI_Comm_size(comm, &nprocs);
- MPI_Comm_rank(comm, &rank);
+ nprocs = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
- MPI_Type_extent(dtype, &extent);
+ extent = smpi_datatype_get_extent(dtype);
tmp_buf = (void *) xbt_malloc(count * extent);
- MPI_Sendrecv(sbuff, count, dtype, rank, tag, rbuff, count, dtype, rank, tag,
+ smpi_mpi_sendrecv(sbuff, count, dtype, rank, tag, rbuff, count, dtype, rank, tag,
comm, &status);
- MPI_Type_size(dtype, &type_size);
+ type_size = smpi_datatype_size(dtype);
// find nearest power-of-two less than or equal to comm_size
pof2 = 1;
// even
if (rank % 2 == 0) {
- MPI_Send(rbuff, count, dtype, rank + 1, tag, comm);
+ smpi_mpi_send(rbuff, count, dtype, rank + 1, tag, comm);
// temporarily set the rank to -1 so that this
// process does not pariticipate in recursive
newrank = -1;
} else // odd
{
- MPI_Recv(tmp_buf, count, dtype, rank - 1, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, count, dtype, rank - 1, tag, comm, &status);
// do the reduction on received data. since the
// ordering is right, it doesn't matter whether
// the operation is commutative or not.
// reduce-scatter, calculate the count that each process receives
// and the displacement within the buffer
- cnts = (int *) malloc(pof2 * sizeof(int));
- disps = (int *) malloc(pof2 * sizeof(int));
+ cnts = (int *) xbt_malloc(pof2 * sizeof(int));
+ disps = (int *) xbt_malloc(pof2 * sizeof(int));
for (i = 0; i < (pof2 - 1); i++)
cnts[i] = count / pof2;
}
// Send data from recvbuf. Recv into tmp_buf
- MPI_Sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt,
+ smpi_mpi_sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt,
dtype, dst, tag,
(char *) tmp_buf + disps[recv_idx] * extent, recv_cnt,
dtype, dst, tag, comm, &status);
recv_cnt += cnts[i];
}
- MPI_Sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt,
+ smpi_mpi_sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt,
dtype, dst, tag,
(char *) rbuff + disps[recv_idx] * extent, recv_cnt,
dtype, dst, tag, comm, &status);
if (rank < 2 * rem) {
if (rank % 2) // odd
- MPI_Send(rbuff, count, dtype, rank - 1, tag, comm);
+ smpi_mpi_send(rbuff, count, dtype, rank - 1, tag, comm);
else // even
- MPI_Recv(rbuff, count, dtype, rank + 1, tag, comm, &status);
+ smpi_mpi_recv(rbuff, count, dtype, rank + 1, tag, comm, &status);
}
free(tmp_buf);
-#include "colls.h"
+#include "colls_private.h"
//#include <star-reduction.c>
// NP pow of 2 for now
void *recv, *tmp_buf;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &nprocs);
+ rank = smpi_comm_rank(comm);
+ nprocs = smpi_comm_size(comm);
- MPI_Type_extent(dtype, &extent);
+ extent = smpi_datatype_get_extent(dtype);
pof2 = 1;
while (pof2 <= nprocs)
send_size = (count + nprocs) / nprocs;
newcnt = send_size * nprocs;
- recv = (void *) malloc(extent * newcnt);
- tmp_buf = (void *) malloc(extent * newcnt);
+ recv = (void *) xbt_malloc(extent * newcnt);
+ tmp_buf = (void *) xbt_malloc(extent * newcnt);
memcpy(recv, sbuff, extent * count);
else
recv_idx = send_idx + (mask * share);
- MPI_Sendrecv((char *) recv + send_idx * extent, send_cnt, dtype, dst, tag,
+ smpi_mpi_sendrecv((char *) recv + send_idx * extent, send_cnt, dtype, dst, tag,
tmp_buf, recv_cnt, dtype, dst, tag, comm, &status);
star_reduction(op, tmp_buf, (char *) recv + recv_idx * extent, &recv_cnt,
}
memcpy(tmp_buf, (char *) recv + recv_idx * extent, recv_cnt * extent);
- MPI_Allgather(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm);
+ mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm);
memcpy(rbuff, recv, count * extent);
free(recv);
}
else {
- tmp_buf = (void *) malloc(extent * count);
+ tmp_buf = (void *) xbt_malloc(extent * count);
memcpy(rbuff, sbuff, count * extent);
mask = pof2 / 2;
share = count / pof2;
else
recv_idx = send_idx + (mask * share);
- MPI_Sendrecv((char *) rbuff + send_idx * extent, send_cnt, dtype, dst,
+ smpi_mpi_sendrecv((char *) rbuff + send_idx * extent, send_cnt, dtype, dst,
tag, tmp_buf, recv_cnt, dtype, dst, tag, comm, &status);
star_reduction(op, tmp_buf, (char *) rbuff + recv_idx * extent, &recv_cnt,
}
memcpy(tmp_buf, (char *) rbuff + recv_idx * extent, recv_cnt * extent);
- MPI_Allgather(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm);
+ mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm);
free(tmp_buf);
}
-#include "colls.h"
+#include "colls_private.h"
//#include <star-reduction.c>
// this requires that count >= NP
uop = op_ptr->op;
#endif
*/
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &nprocs);
+ rank = smpi_comm_rank(comm);
+ nprocs = smpi_comm_size(comm);
- MPI_Type_extent(dtype, &s_extent);
+ s_extent = smpi_datatype_get_extent(dtype);
// uneven count
if (count % nprocs) {
send_size = (count + nprocs) / nprocs;
nbytes = send_size * s_extent;
- send = (void *) malloc(s_extent * send_size * nprocs);
- recv = (void *) malloc(s_extent * send_size * nprocs);
- tmp = (void *) malloc(nbytes);
+ send = (void *) xbt_malloc(s_extent * send_size * nprocs);
+ recv = (void *) xbt_malloc(s_extent * send_size * nprocs);
+ tmp = (void *) xbt_malloc(nbytes);
memcpy(send, sbuff, s_extent * count);
- MPI_Alltoall(send, send_size, dtype, recv, send_size, dtype, comm);
+ mpi_coll_alltoall_fun(send, send_size, dtype, recv, send_size, dtype, comm);
memcpy(tmp, recv, nbytes);
for (i = 1, s_offset = nbytes; i < nprocs; i++, s_offset = i * nbytes)
star_reduction(op, (char *) recv + s_offset, tmp, &send_size, &dtype);
- MPI_Allgather(tmp, send_size, dtype, recv, send_size, dtype, comm);
+ mpi_coll_allgather_fun(tmp, send_size, dtype, recv, send_size, dtype, comm);
memcpy(rbuff, recv, count * s_extent);
free(recv);
nbytes = send_size * s_extent;
r_offset = rank * nbytes;
- recv = (void *) malloc(s_extent * send_size * nprocs);
+ recv = (void *) xbt_malloc(s_extent * send_size * nprocs);
- MPI_Alltoall(send, send_size, dtype, recv, send_size, dtype, comm);
+ mpi_coll_alltoall_fun(send, send_size, dtype, recv, send_size, dtype, comm);
memcpy((char *) rbuff + r_offset, recv, nbytes);
star_reduction(op, (char *) recv + s_offset, (char *) rbuff + r_offset,
&send_size, &dtype);
- MPI_Allgather((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size,
+ mpi_coll_allgather_fun((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size,
dtype, comm);
free(recv);
}
-#include "colls.h"
+#include "colls_private.h"
//#include <star-reduction.c>
int smpi_coll_tuned_allreduce_rdb(void *sbuff, void *rbuff, int count,
-#include "colls.h"
+#include "colls_private.h"
int smpi_coll_tuned_allreduce_redbcast(void *buf, void *buf2, int count,
MPI_Datatype datatype, MPI_Op op,
-#include "colls.h"
+#include "colls_private.h"
/* IMPLEMENTED BY PITCH PATARASUK
Non-topoloty-specific (however, number of cores/node need to be changed)
all-reduce operation designed for smp clusters
uop = MPIR_Op_table[op % 16 - 1];
#endif
- MPI_Comm_size(comm, &comm_size);
- MPI_Comm_rank(comm, &rank);
+ comm_size = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
MPI_Aint extent;
- MPI_Type_extent(dtype, &extent);
- tmp_buf = (void *) malloc(count * extent);
+ extent = smpi_datatype_get_extent(dtype);
+ tmp_buf = (void *) xbt_malloc(count * extent);
int intra_rank, inter_rank;
intra_rank = rank % num_core;
int inter_comm_size = (comm_size + num_core - 1) / num_core;
/* copy input buffer to output buffer */
- MPI_Sendrecv(send_buf, count, dtype, rank, tag,
+ smpi_mpi_sendrecv(send_buf, count, dtype, rank, tag,
recv_buf, count, dtype, rank, tag, comm, &status);
/* compute pipe length */
src = (inter_rank * num_core) + (intra_rank | mask);
if (src < comm_size) {
recv_offset = phase * pcount * extent;
- MPI_Recv(tmp_buf, pcount, dtype, src, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, pcount, dtype, src, tag, comm, &status);
(*uop) (tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype);
}
} else {
send_offset = phase * pcount * extent;
dst = (inter_rank * num_core) + (intra_rank & (~mask));
- MPI_Send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm);
+ smpi_mpi_send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm);
break;
}
mask <<= 1;
src = (inter_rank | mask) * num_core;
if (src < comm_size) {
recv_offset = (phase - 1) * pcount * extent;
- MPI_Recv(tmp_buf, pcount, dtype, src, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, pcount, dtype, src, tag, comm, &status);
(*uop) (tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype);
}
} else {
dst = (inter_rank & (~mask)) * num_core;
send_offset = (phase - 1) * pcount * extent;
- MPI_Send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm);
+ smpi_mpi_send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm);
break;
}
mask <<= 1;
if (inter_rank & mask) {
src = (inter_rank - mask) * num_core;
recv_offset = (phase - 2) * pcount * extent;
- MPI_Recv((char *)recv_buf + recv_offset, pcount, dtype, src, tag, comm,
+ smpi_mpi_recv((char *)recv_buf + recv_offset, pcount, dtype, src, tag, comm,
&status);
break;
}
if (dst < comm_size) {
//printf("Node %d send to node %d when mask is %d\n", rank, dst, mask);
send_offset = (phase - 2) * pcount * extent;
- MPI_Send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm);
+ smpi_mpi_send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm);
}
}
mask >>= 1;
if (intra_rank & mask) {
src = (inter_rank * num_core) + (intra_rank - mask);
recv_offset = (phase - 3) * pcount * extent;
- MPI_Recv((char *)recv_buf + recv_offset, pcount, dtype, src, tag, comm,
+ smpi_mpi_recv((char *)recv_buf + recv_offset, pcount, dtype, src, tag, comm,
&status);
break;
}
dst = (inter_rank * num_core) + (intra_rank + mask);
if (dst < comm_size) {
send_offset = (phase - 3) * pcount * extent;
- MPI_Send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm);
+ smpi_mpi_send((char *)recv_buf + send_offset, pcount, dtype, dst, tag, comm);
}
mask >>= 1;
}
-#include "colls.h"
+#include "colls_private.h"
/* IMPLEMENTED BY PITCH PATARASUK
Non-topoloty-specific (however, number of cores/node need to be changed)
all-reduce operation designed for smp clusters
rank=smpi_comm_rank(comm);
MPI_Aint extent, lb;
smpi_datatype_extent(dtype, &lb, &extent);
- tmp_buf = (void *) malloc(count * extent);
+ tmp_buf = (void *) xbt_malloc(count * extent);
/* compute intra and inter ranking */
int intra_rank, inter_rank;
-#include "colls.h"
+#include "colls_private.h"
/* IMPLEMENTED BY PITCH PATARASUK
Non-topoloty-specific (however, number of cores/node need to be changed)
all-reduce operation designed for smp clusters
uop = op_ptr->op;
#endif
*/
- MPI_Comm_size(comm, &comm_size);
- MPI_Comm_rank(comm, &rank);
+ comm_size = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
MPI_Aint extent;
- MPI_Type_extent(dtype, &extent);
- tmp_buf = (void *) malloc(count * extent);
+ extent = smpi_datatype_get_extent(dtype);
+ tmp_buf = (void *) xbt_malloc(count * extent);
/* compute intra and inter ranking */
int intra_rank, inter_rank;
int inter_comm_size = (comm_size + num_core - 1) / num_core;
/* copy input buffer to output buffer */
- MPI_Sendrecv(send_buf, count, dtype, rank, tag,
+ smpi_mpi_sendrecv(send_buf, count, dtype, rank, tag,
recv_buf, count, dtype, rank, tag, comm, &status);
/* start binomial reduce intra communication inside each SMP node */
if ((mask & intra_rank) == 0) {
src = (inter_rank * num_core) + (intra_rank | mask);
if (src < comm_size) {
- MPI_Recv(tmp_buf, count, dtype, src, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status);
star_reduction(op, tmp_buf, recv_buf, &count, &dtype);
}
} else {
dst = (inter_rank * num_core) + (intra_rank & (~mask));
- MPI_Send(recv_buf, count, dtype, dst, tag, comm);
+ smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm);
break;
}
mask <<= 1;
if (inter_rank < 2 * rem) {
if (inter_rank % 2 == 0) {
dst = rank + num_core;
- MPI_Send(recv_buf, count, dtype, dst, tag, comm);
+ smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm);
newrank = -1;
} else {
src = rank - num_core;
- MPI_Recv(tmp_buf, count, dtype, src, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status);
star_reduction(op, tmp_buf, recv_buf, &count, &dtype);
newrank = inter_rank / 2;
}
dst *= num_core;
/* exchange data in rdb manner */
- MPI_Sendrecv(recv_buf, count, dtype, dst, tag, tmp_buf, count, dtype,
+ smpi_mpi_sendrecv(recv_buf, count, dtype, dst, tag, tmp_buf, count, dtype,
dst, tag, comm, &status);
star_reduction(op, tmp_buf, recv_buf, &count, &dtype);
mask <<= 1;
*/
if (inter_rank < 2 * rem) {
if (inter_rank % 2) {
- MPI_Send(recv_buf, count, dtype, rank - num_core, tag, comm);
+ smpi_mpi_send(recv_buf, count, dtype, rank - num_core, tag, comm);
} else {
- MPI_Recv(recv_buf, count, dtype, rank + num_core, tag, comm, &status);
+ smpi_mpi_recv(recv_buf, count, dtype, rank + num_core, tag, comm, &status);
}
}
}
while (mask < num_core_in_current_smp) {
if (intra_rank & mask) {
src = (inter_rank * num_core) + (intra_rank - mask);
- MPI_Recv(recv_buf, count, dtype, src, tag, comm, &status);
+ smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status);
break;
}
mask <<= 1;
while (mask > 0) {
dst = (inter_rank * num_core) + (intra_rank + mask);
if (dst < comm_size) {
- MPI_Send(recv_buf, count, dtype, dst, tag, comm);
+ smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm);
}
mask >>= 1;
}
-#include "colls.h"
+#include "colls_private.h"
//#include <star-reduction.c>
/* change number of core per smp-node
uop = op_ptr->op;
#endif
*/
- MPI_Comm_size(comm, &comm_size);
- MPI_Comm_rank(comm, &rank);
+ comm_size = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
MPI_Aint extent;
- MPI_Type_extent(dtype, &extent);
- tmp_buf = (void *) malloc(count * extent);
+ extent = smpi_datatype_get_extent(dtype);
+ tmp_buf = (void *) xbt_malloc(count * extent);
int intra_rank, inter_rank;
intra_rank = rank % num_core;
}
- MPI_Sendrecv(send_buf, count, dtype, rank, tag,
+ smpi_mpi_sendrecv(send_buf, count, dtype, rank, tag,
recv_buf, count, dtype, rank, tag, comm, &status);
src = (inter_rank * num_core) + (intra_rank | mask);
// if (src < ((inter_rank + 1) * num_core)) {
if (src < comm_size) {
- MPI_Recv(tmp_buf, count, dtype, src, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status);
star_reduction(op, tmp_buf, recv_buf, &count, &dtype);
//printf("Node %d recv from node %d when mask is %d\n", rank, src, mask);
}
} else {
dst = (inter_rank * num_core) + (intra_rank & (~mask));
- MPI_Send(recv_buf, count, dtype, dst, tag, comm);
+ smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm);
//printf("Node %d send to node %d when mask is %d\n", rank, dst, mask);
break;
}
else
recv_count = curr_size + curr_remainder;
- MPI_Sendrecv((char *) recv_buf + send_offset, send_count, dtype, to,
+ smpi_mpi_sendrecv((char *) recv_buf + send_offset, send_count, dtype, to,
tag + i, tmp_buf, recv_count, dtype, from, tag + i, comm,
&status);
else
recv_count = curr_size + curr_remainder;
- MPI_Sendrecv((char *) recv_buf + send_offset, send_count, dtype, to,
+ smpi_mpi_sendrecv((char *) recv_buf + send_offset, send_count, dtype, to,
tag + i, (char *) recv_buf + recv_offset, recv_count, dtype,
from, tag + i, comm, &status);
if ((mask & inter_rank) == 0) {
src = (inter_rank | mask) * num_core;
if (src < comm_size) {
- MPI_Recv(tmp_buf, count, dtype, src, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status);
(* uop) (tmp_buf, recv_buf, &count, &dtype);
//printf("Node %d recv from node %d when mask is %d\n", rank, src, mask);
}
}
else {
dst = (inter_rank & (~mask)) * num_core;
- MPI_Send(recv_buf, count, dtype, dst, tag, comm);
+ smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm);
//printf("Node %d send to node %d when mask is %d\n", rank, dst, mask);
break;
}
if (inter_rank & mask) {
src = (inter_rank - mask) * num_core;
//printf("Node %d recv from node %d when mask is %d\n", rank, src, mask);
- MPI_Recv(recv_buf, count, dtype, src, tag, comm, &status);
+ smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status);
break;
}
mask <<= 1;
dst = (inter_rank + mask) * num_core;
if (dst < comm_size) {
//printf("Node %d send to node %d when mask is %d\n", rank, dst, mask);
- MPI_Send(recv_buf, count, dtype, dst, tag, comm);
+ smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm);
}
}
mask >>= 1;
if (intra_rank & mask) {
src = (inter_rank * num_core) + (intra_rank - mask);
//printf("Node %d recv from node %d when mask is %d\n", rank, src, mask);
- MPI_Recv(recv_buf, count, dtype, src, tag, comm, &status);
+ smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status);
break;
}
mask <<= 1;
dst = (inter_rank * num_core) + (intra_rank + mask);
if (dst < comm_size) {
//printf("Node %d send to node %d when mask is %d\n", rank, dst, mask);
- MPI_Send(recv_buf, count, dtype, dst, tag, comm);
+ smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm);
}
mask >>= 1;
}
-#include "colls.h"
+#include "colls_private.h"
/*
* implemented by Pitch Patarasuk, 07/01/2007
*/
MPI_Status status;
int num_core = NUM_CORE;
- MPI_Comm_size(comm, &comm_size);
- MPI_Comm_rank(comm, &rank);
+ comm_size = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
MPI_Aint extent;
- MPI_Type_extent(dtype, &extent);
- tmp_buf = (void *) malloc(count * extent);
+ extent = smpi_datatype_get_extent(dtype);
+ tmp_buf = (void *) xbt_malloc(count * extent);
int intra_rank, inter_rank;
intra_rank = rank % num_core;
int inter_comm_size = (comm_size + num_core - 1) / num_core;
- MPI_Sendrecv(sbuf, count, dtype, rank, tag,
+ smpi_mpi_sendrecv(sbuf, count, dtype, rank, tag,
rbuf, count, dtype, rank, tag, comm, &status);
// SMP_binomial_reduce
src = (inter_rank * num_core) + (intra_rank | mask);
// if (src < ((inter_rank + 1) * num_core)) {
if (src < comm_size) {
- MPI_Recv(tmp_buf, count, dtype, src, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status);
star_reduction(op, tmp_buf, rbuf, &count, &dtype);
//printf("Node %d recv from node %d when mask is %d\n", rank, src, mask);
}
} else {
dst = (inter_rank * num_core) + (intra_rank & (~mask));
- MPI_Send(rbuf, count, dtype, dst, tag, comm);
+ smpi_mpi_send(rbuf, count, dtype, dst, tag, comm);
//printf("Node %d send to node %d when mask is %d\n", rank, dst, mask);
break;
}
// if (rank==7)
// printf("node %d send to %d in phase %d s_offset = %d r_offset = %d count = %d\n",rank,dst,phase, send_offset, recv_offset, curr_count);
- MPI_Sendrecv((char *)rbuf + send_offset, curr_count, dtype, (dst * num_core), tag,
+ smpi_mpi_sendrecv((char *)rbuf + send_offset, curr_count, dtype, (dst * num_core), tag,
tmp_buf, curr_count, dtype, (dst * num_core), tag,
comm, &status);
// if (rank==7)
//printf("node %d send to %d in phase %d s_offset = %d r_offset = %d count = %d\n",rank,dst,phase, send_offset, recv_offset, curr_count);
- MPI_Sendrecv((char *)rbuf + send_offset, curr_count, dtype, (dst * num_core), tag,
+ smpi_mpi_sendrecv((char *)rbuf + send_offset, curr_count, dtype, (dst * num_core), tag,
(char *)rbuf + recv_offset, curr_count, dtype, (dst * num_core), tag,
comm, &status);
if (intra_rank & mask) {
src = (inter_rank * num_core) + (intra_rank - mask);
//printf("Node %d recv from node %d when mask is %d\n", rank, src, mask);
- MPI_Recv(rbuf, count, dtype, src, tag, comm, &status);
+ smpi_mpi_recv(rbuf, count, dtype, src, tag, comm, &status);
break;
}
mask <<= 1;
dst = (inter_rank * num_core) + (intra_rank + mask);
if (dst < comm_size) {
//printf("Node %d send to node %d when mask is %d\n", rank, dst, mask);
- MPI_Send(rbuf, count, dtype, dst, tag, comm);
+ smpi_mpi_send(rbuf, count, dtype, dst, tag, comm);
}
mask >>= 1;
}
-#include "colls.h"
+#include "colls_private.h"
/* change number of core per smp-node
we assume that number of core per process will be the same for all implementations */
uop = op_ptr->op;
#endif
*/
- MPI_Comm_size(comm, &comm_size);
- MPI_Comm_rank(comm, &rank);
+ comm_size = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
MPI_Aint extent;
- MPI_Type_extent(dtype, &extent);
- tmp_buf = (void *) malloc(count * extent);
+ extent = smpi_datatype_get_extent(dtype);
+ tmp_buf = (void *) xbt_malloc(count * extent);
int intra_rank, inter_rank;
intra_rank = rank % num_core;
}
- MPI_Sendrecv(send_buf, count, dtype, rank, tag,
+ smpi_mpi_sendrecv(send_buf, count, dtype, rank, tag,
recv_buf, count, dtype, rank, tag, comm, &status);
src = (inter_rank * num_core) + (intra_rank | mask);
// if (src < ((inter_rank + 1) * num_core)) {
if (src < comm_size) {
- MPI_Recv(tmp_buf, count, dtype, src, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status);
star_reduction(op, tmp_buf, recv_buf, &count, &dtype);
//printf("Node %d recv from node %d when mask is %d\n", rank, src, mask);
}
} else {
dst = (inter_rank * num_core) + (intra_rank & (~mask));
- MPI_Send(recv_buf, count, dtype, dst, tag, comm);
+ smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm);
//printf("Node %d send to node %d when mask is %d\n", rank, dst, mask);
break;
}
((inter_rank - 2 - i +
inter_comm_size) % inter_comm_size) * seg_count * extent;
- MPI_Sendrecv((char *) recv_buf + send_offset, seg_count, dtype, to,
+ smpi_mpi_sendrecv((char *) recv_buf + send_offset, seg_count, dtype, to,
tag + i, tmp_buf, seg_count, dtype, from, tag + i, comm,
&status);
((inter_rank - 1 - i +
inter_comm_size) % inter_comm_size) * seg_count * extent;
- MPI_Sendrecv((char *) recv_buf + send_offset, seg_count, dtype, to,
+ smpi_mpi_sendrecv((char *) recv_buf + send_offset, seg_count, dtype, to,
tag + i, (char *) recv_buf + recv_offset, seg_count, dtype,
from, tag + i, comm, &status);
if ((mask & inter_rank) == 0) {
src = (inter_rank | mask) * num_core;
if (src < comm_size) {
- MPI_Recv(tmp_buf, count, dtype, src, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, count, dtype, src, tag, comm, &status);
(* uop) (tmp_buf, recv_buf, &count, &dtype);
//printf("Node %d recv from node %d when mask is %d\n", rank, src, mask);
}
}
else {
dst = (inter_rank & (~mask)) * num_core;
- MPI_Send(recv_buf, count, dtype, dst, tag, comm);
+ smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm);
//printf("Node %d send to node %d when mask is %d\n", rank, dst, mask);
break;
}
if (inter_rank & mask) {
src = (inter_rank - mask) * num_core;
//printf("Node %d recv from node %d when mask is %d\n", rank, src, mask);
- MPI_Recv(recv_buf, count, dtype, src, tag, comm, &status);
+ smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status);
break;
}
mask <<= 1;
dst = (inter_rank + mask) * num_core;
if (dst < comm_size) {
//printf("Node %d send to node %d when mask is %d\n", rank, dst, mask);
- MPI_Send(recv_buf, count, dtype, dst, tag, comm);
+ smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm);
}
}
mask >>= 1;
if (intra_rank & mask) {
src = (inter_rank * num_core) + (intra_rank - mask);
//printf("Node %d recv from node %d when mask is %d\n", rank, src, mask);
- MPI_Recv(recv_buf, count, dtype, src, tag, comm, &status);
+ smpi_mpi_recv(recv_buf, count, dtype, src, tag, comm, &status);
break;
}
mask <<= 1;
dst = (inter_rank * num_core) + (intra_rank + mask);
if (dst < comm_size) {
//printf("Node %d send to node %d when mask is %d\n", rank, dst, mask);
- MPI_Send(recv_buf, count, dtype, dst, tag, comm);
+ smpi_mpi_send(recv_buf, count, dtype, dst, tag, comm);
}
mask >>= 1;
}
-#include "colls.h"
+#include "colls_private.h"
#include <math.h>
-XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_colls, smpi,
- "Logging specific to SMPI collectives");
-
/*****************************************************************************
* Function: alltoall_2dmesh_shoot
int my_row_base, my_col_base, src_row_base, block_size;
int tag = 1;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(send_type, &extent);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ extent = smpi_datatype_get_extent(send_type);
if (!alltoall_check_is_2dmesh(num_procs, &X, &Y))
return MPI_ERR_OTHER;
block_size = extent * send_count;
- tmp_buff1 = (char *) malloc(block_size * num_procs * Y);
- if (!tmp_buff1) {
- XBT_DEBUG("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
- MPI_Finalize();
- exit(MPI_ERR_OTHER);
- }
-
- tmp_buff2 = (char *) malloc(block_size * Y);
- if (!tmp_buff2) {
- XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
- MPI_Finalize();
- exit(MPI_ERR_OTHER);
- }
-
-
+ tmp_buff1 = (char *) xbt_malloc(block_size * num_procs * Y);
+ tmp_buff2 = (char *) xbt_malloc(block_size * Y);
num_reqs = X;
if (Y > X)
num_reqs = Y;
- statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status));
- reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
- if (!reqs) {
- XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
- MPI_Finalize();
- exit(MPI_ERR_OTHER);
- }
+ statuses = (MPI_Status *) xbt_malloc(num_reqs * sizeof(MPI_Status));
+ reqs = (MPI_Request *) xbt_malloc(num_reqs * sizeof(MPI_Request));
req_ptr = reqs;
continue;
recv_offset = (src % Y) * block_size * num_procs;
- MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm,
- req_ptr++);
+ *(req_ptr++) = smpi_mpi_irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm);
}
for (i = 0; i < Y; i++) {
dst = i + my_row_base;
if (dst == rank)
continue;
- MPI_Send(send_buff, count, send_type, dst, tag, comm);
+ smpi_mpi_send(send_buff, count, send_type, dst, tag, comm);
}
- MPI_Waitall(Y - 1, reqs, statuses);
+ smpi_mpi_waitall(Y - 1, reqs, statuses);
req_ptr = reqs;
for (i = 0; i < Y; i++) {
recv_offset = (my_row_base * block_size) + (i * block_size);
if (i + my_row_base == rank)
- MPI_Sendrecv((char *) send_buff + recv_offset, send_count, send_type,
+ smpi_mpi_sendrecv((char *) send_buff + recv_offset, send_count, send_type,
rank, tag,
(char *) recv_buff + recv_offset, recv_count, recv_type,
rank, tag, comm, &s);
else
- MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
+ smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type,
rank, tag,
(char *) recv_buff + recv_offset, recv_count, recv_type,
rank, tag, comm, &s);
continue;
src_row_base = (src / Y) * Y;
- MPI_Irecv((char *) recv_buff + src_row_base * block_size, recv_count * Y,
- recv_type, src, tag, comm, req_ptr++);
+ *(req_ptr++) = smpi_mpi_irecv((char *) recv_buff + src_row_base * block_size, recv_count * Y,
+ recv_type, src, tag, comm);
}
for (i = 0; i < X; i++) {
send_offset = (dst + j * num_procs) * block_size;
if (j + my_row_base == rank)
- MPI_Sendrecv((char *) send_buff + dst * block_size, send_count,
+ smpi_mpi_sendrecv((char *) send_buff + dst * block_size, send_count,
send_type, rank, tag, tmp_buff2 + recv_offset, recv_count,
recv_type, rank, tag, comm, &s);
else
- MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
+ smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type,
rank, tag,
tmp_buff2 + recv_offset, recv_count, recv_type,
rank, tag, comm, &s);
recv_offset += block_size;
}
- MPI_Send(tmp_buff2, send_count * Y, send_type, dst, tag, comm);
+ smpi_mpi_send(tmp_buff2, send_count * Y, send_type, dst, tag, comm);
}
- MPI_Waitall(X - 1, reqs, statuses);
+ smpi_mpi_waitall(X - 1, reqs, statuses);
free(reqs);
free(statuses);
free(tmp_buff1);
-#include "colls.h"
+#include "colls_private.h"
#include <math.h>
/*****************************************************************************
char *tmp_buff1, *tmp_buff2;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(send_type, &extent);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ extent = smpi_datatype_get_extent(send_type);
if (!alltoall_check_is_3dmesh(num_procs, &X, &Y, &Z))
return MPI_ERR_OTHER;
send_offset = recv_offset = (rank % two_dsize) * block_size * num_procs;
- MPI_Sendrecv(send_buff, send_count * num_procs, send_type, rank, tag,
+ smpi_mpi_sendrecv(send_buff, send_count * num_procs, send_type, rank, tag,
tmp_buff1 + recv_offset, num_procs * recv_count,
recv_type, rank, tag, comm, &status);
if (src == rank)
continue;
recv_offset = (src % two_dsize) * block_size * num_procs;
- MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm,
- req_ptr++);
+ *(req_ptr++) = smpi_mpi_irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm);
}
for (i = 0; i < Y; i++) {
dst = i + my_row_base;
if (dst == rank)
continue;
- MPI_Send(send_buff, count, send_type, dst, tag, comm);
+ smpi_mpi_send(send_buff, count, send_type, dst, tag, comm);
}
- MPI_Waitall(Y - 1, reqs, statuses);
+ smpi_mpi_waitall(Y - 1, reqs, statuses);
req_ptr = reqs;
src_row_base = (src / X) * X;
recv_offset = (src_row_base % two_dsize) * block_size * num_procs;
- MPI_Irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y,
- recv_type, src, tag, comm, req_ptr++);
+ *(req_ptr++) = smpi_mpi_irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y,
+ recv_type, src, tag, comm);
}
send_offset = (my_row_base % two_dsize) * block_size * num_procs;
dst = (i * Y + my_col_base);
if (dst == rank)
continue;
- MPI_Send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type,
+ smpi_mpi_send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type,
dst, tag, comm);
}
- MPI_Waitall(X - 1, reqs, statuses);
+ smpi_mpi_waitall(X - 1, reqs, statuses);
req_ptr = reqs;
for (i = 0; i < two_dsize; i++) {
send_offset = (rank * block_size) + (i * block_size * num_procs);
recv_offset = (my_z_base * block_size) + (i * block_size);
- MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag,
+ smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag,
(char *) recv_buff + recv_offset, recv_count, recv_type,
rank, tag, comm, &status);
}
recv_offset = (src_z_base * block_size);
- MPI_Irecv((char *) recv_buff + recv_offset, recv_count * two_dsize,
- recv_type, src, tag, comm, req_ptr++);
+ *(req_ptr++) = smpi_mpi_irecv((char *) recv_buff + recv_offset, recv_count * two_dsize,
+ recv_type, src, tag, comm);
}
for (i = 1; i < Z; i++) {
recv_offset = 0;
for (j = 0; j < two_dsize; j++) {
send_offset = (dst + j * num_procs) * block_size;
- MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
+ smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type,
rank, tag, tmp_buff2 + recv_offset, recv_count,
recv_type, rank, tag, comm, &status);
recv_offset += block_size;
}
- MPI_Send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm);
+ smpi_mpi_send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm);
}
- MPI_Waitall(Z - 1, reqs, statuses);
+ smpi_mpi_waitall(Z - 1, reqs, statuses);
free(reqs);
free(statuses);
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
- MPI_Comm_size(comm, &num_procs);
- MPI_Comm_rank(comm, &rank);
+ num_procs = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
- MPI_Type_extent(recv_type, &extent);
+ extent = smpi_datatype_get_extent(recv_type);
tmp_buff = (char *) xbt_malloc(num_procs * recv_count * extent);
disps = (int *) xbt_malloc(sizeof(int) * num_procs);
blocks_length = (int *) xbt_malloc(sizeof(int) * num_procs);
- MPI_Sendrecv(send_ptr + rank * send_count * extent,
+ smpi_mpi_sendrecv(send_ptr + rank * send_count * extent,
(num_procs - rank) * send_count, send_type, rank, tag,
recv_ptr, (num_procs - rank) * recv_count, recv_type, rank,
tag, comm, &status);
- MPI_Sendrecv(send_ptr, rank * send_count, send_type, rank, tag,
+ smpi_mpi_sendrecv(send_ptr, rank * send_count, send_type, rank, tag,
recv_ptr + (num_procs - rank) * recv_count * extent,
rank * recv_count, recv_type, rank, tag, comm, &status);
}
MPI_Type_indexed(count, blocks_length, disps, recv_type, &new_type);
- MPI_Type_commit(&new_type);
+ smpi_datatype_commit(&new_type);
position = 0;
MPI_Pack(recv_buff, 1, new_type, tmp_buff, pack_size, &position, comm);
- MPI_Sendrecv(tmp_buff, position, MPI_PACKED, dst, tag, recv_buff, 1,
+ smpi_mpi_sendrecv(tmp_buff, position, MPI_PACKED, dst, tag, recv_buff, 1,
new_type, src, tag, comm, &status);
- MPI_Type_free(&new_type);
+ smpi_datatype_free(&new_type);
pof2 *= 2;
}
free(disps);
free(blocks_length);
- MPI_Sendrecv(recv_ptr + (rank + 1) * recv_count * extent,
+ smpi_mpi_sendrecv(recv_ptr + (rank + 1) * recv_count * extent,
(num_procs - rank - 1) * recv_count, send_type,
rank, tag, tmp_buff, (num_procs - rank - 1) * recv_count,
recv_type, rank, tag, comm, &status);
- MPI_Sendrecv(recv_ptr, (rank + 1) * recv_count, send_type, rank, tag,
+ smpi_mpi_sendrecv(recv_ptr, (rank + 1) * recv_count, send_type, rank, tag,
tmp_buff + (num_procs - rank - 1) * recv_count * extent,
(rank + 1) * recv_count, recv_type, rank, tag, comm, &status);
for (i = 0; i < num_procs; i++)
- MPI_Sendrecv(tmp_buff + i * recv_count * extent, recv_count, send_type,
+ smpi_mpi_sendrecv(tmp_buff + i * recv_count * extent, recv_count, send_type,
rank, tag,
recv_ptr + (num_procs - i - 1) * recv_count * extent,
recv_count, recv_type, rank, tag, comm, &status);
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
* Function: alltoall_pair_light_barrier
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(send_type, &send_chunk);
- MPI_Type_extent(recv_type, &recv_chunk);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ send_chunk = smpi_datatype_get_extent(send_type);
+ recv_chunk = smpi_datatype_get_extent(recv_type);
send_chunk *= send_count;
recv_chunk *= recv_count;
- MPI_Sendrecv(send_ptr + rank * send_chunk, send_count, send_type, rank, tag,
+ smpi_mpi_sendrecv(send_ptr + rank * send_chunk, send_count, send_type, rank, tag,
recv_ptr + rank * recv_chunk, recv_count, recv_type, rank, tag,
comm, &s);
for (i = 1; i < num_procs; i++) {
src = dst = rank ^ i;
- MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type,
+ smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type,
dst, tag, recv_ptr + src * recv_chunk, recv_count,
recv_type, src, tag, comm, &s);
if ((i + 1) < num_procs) {
next_partner = rank ^ (i + 1);
- MPI_Sendrecv(&send_sync, 1, MPI_CHAR, next_partner, tag,
+ smpi_mpi_sendrecv(&send_sync, 1, MPI_CHAR, next_partner, tag,
&recv_sync, 1, MPI_CHAR, next_partner, tag, comm, &s);
}
}
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
* Function: alltoall_pair_mpi_barrier
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(send_type, &send_chunk);
- MPI_Type_extent(recv_type, &recv_chunk);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ send_chunk = smpi_datatype_get_extent(send_type);
+ recv_chunk = smpi_datatype_get_extent(recv_type);
send_chunk *= send_count;
recv_chunk *= recv_count;
for (i = 0; i < num_procs; i++) {
src = dst = rank ^ i;
- MPI_Barrier(comm);
- MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
+ smpi_mpi_barrier(comm);
+ smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
src, tag, comm, &s);
}
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
* Function: alltoall_pair
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(send_type, &send_chunk);
- MPI_Type_extent(recv_type, &recv_chunk);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ send_chunk = smpi_datatype_get_extent(send_type);
+ recv_chunk = smpi_datatype_get_extent(recv_type);
send_chunk *= send_count;
recv_chunk *= recv_count;
- MPI_Barrier(comm);
+ smpi_mpi_barrier(comm);
for (i = 0; i < num_procs; i++) {
src = dst = rank ^ i;
- MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
+ smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
src, tag, comm, &s);
}
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(send_type, &send_chunk);
- MPI_Type_extent(recv_type, &recv_chunk);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ send_chunk = smpi_datatype_get_extent(send_type);
+ recv_chunk = smpi_datatype_get_extent(recv_type);
MPI_Win_create(recv_buff, num_procs * recv_chunk * send_count, recv_chunk, 0,
comm, &win);
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(send_type, &send_chunk);
- MPI_Type_extent(recv_type, &recv_chunk);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ send_chunk = smpi_datatype_get_extent(send_type);
+ recv_chunk = smpi_datatype_get_extent(recv_type);
send_chunk *= send_count;
recv_chunk *= recv_count;
for (i = 0; i < num_procs; i++) {
src = dst = rank ^ i;
- MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag,
+ smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag,
recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag,
comm, &s);
}
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
int dst_tree_root, rank_tree_root, send_offset, recv_offset;
int rank, num_procs, j, k, dst, curr_size, max_size;
- int last_recv_count, tmp_mask, tree_root, num_procs_completed;
+ int last_recv_count = 0, tmp_mask, tree_root, num_procs_completed;
int tag = 1, mask = 1, i = 0;
char *tmp_buff;
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
- MPI_Comm_size(comm, &num_procs);
- MPI_Comm_rank(comm, &rank);
- MPI_Type_extent(send_type, &send_increment);
- MPI_Type_extent(recv_type, &recv_increment);
- MPI_Type_extent(recv_type, &extent);
+ num_procs = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
+ send_increment = smpi_datatype_get_extent(send_type);
+ recv_increment = smpi_datatype_get_extent(recv_type);
+ extent = smpi_datatype_get_extent(recv_type);
send_increment *= (send_count * num_procs);
recv_increment *= (recv_count * num_procs);
curr_size = send_count * num_procs;
- MPI_Sendrecv(send_ptr, curr_size, send_type, rank, tag,
+ smpi_mpi_sendrecv(send_ptr, curr_size, send_type, rank, tag,
tmp_buff + (rank * recv_increment),
curr_size, recv_type, rank, tag, comm, &status);
recv_offset = dst_tree_root * recv_increment;
if (dst < num_procs) {
- MPI_Sendrecv(tmp_buff + send_offset, curr_size, send_type, dst, tag,
+ smpi_mpi_sendrecv(tmp_buff + send_offset, curr_size, send_type, dst, tag,
tmp_buff + recv_offset, mask * recv_count * num_procs,
recv_type, dst, tag, comm, &status);
- MPI_Get_count(&status, recv_type, &last_recv_count);
+ last_recv_count = smpi_mpi_get_count(&status, recv_type);
curr_size += last_recv_count;
}
if ((dst > rank)
&& (rank < tree_root + num_procs_completed)
&& (dst >= tree_root + num_procs_completed)) {
- MPI_Send(tmp_buff + dst_tree_root * send_increment,
+ smpi_mpi_send(tmp_buff + dst_tree_root * send_increment,
last_recv_count, send_type, dst, tag, comm);
}
else if ((dst < rank)
&& (dst < tree_root + num_procs_completed)
&& (rank >= tree_root + num_procs_completed)) {
- MPI_Recv(tmp_buff + dst_tree_root * send_increment,
+ smpi_mpi_recv(tmp_buff + dst_tree_root * send_increment,
mask * num_procs * send_count, send_type, dst,
tag, comm, &status);
- MPI_Get_count(&status, send_type, &last_recv_count);
+ last_recv_count = smpi_mpi_get_count(&status, send_type);
curr_size += last_recv_count;
}
}
for (i = 0; i < num_procs; i++)
- MPI_Sendrecv(tmp_buff + (rank + i * num_procs) * send_count * extent,
+ smpi_mpi_sendrecv(tmp_buff + (rank + i * num_procs) * send_count * extent,
send_count, send_type, rank, tag,
recv_ptr + (i * recv_count * extent),
recv_count, recv_type, rank, tag, comm, &status);
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
* Function: alltoall_ring_light_barrier
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(send_type, &send_chunk);
- MPI_Type_extent(recv_type, &recv_chunk);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ send_chunk = smpi_datatype_get_extent(send_type);
+ recv_chunk = smpi_datatype_get_extent(recv_type);
send_chunk *= send_count;
recv_chunk *= recv_count;
- MPI_Sendrecv(send_ptr + rank * send_chunk, send_count, send_type, rank, tag,
+ smpi_mpi_sendrecv(send_ptr + rank * send_chunk, send_count, send_type, rank, tag,
recv_ptr + rank * recv_chunk, recv_count, recv_type, rank, tag,
comm, &s);
src = (rank - i + num_procs) % num_procs;
dst = (rank + i) % num_procs;
- MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type,
+ smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type,
dst, tag, recv_ptr + src * recv_chunk, recv_count,
recv_type, src, tag, comm, &s);
if ((i + 1) < num_procs) {
next_src = (rank - (i + 1) + num_procs) % num_procs;
next_dst = (rank + (i + 1) + num_procs) % num_procs;
- MPI_Sendrecv(&send_sync, 1, MPI_CHAR, next_src, tag,
+ smpi_mpi_sendrecv(&send_sync, 1, MPI_CHAR, next_src, tag,
&recv_sync, 1, MPI_CHAR, next_dst, tag, comm, &s);
}
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
* Function: alltoall_ring_mpi_barrier
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(send_type, &send_chunk);
- MPI_Type_extent(recv_type, &recv_chunk);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ send_chunk = smpi_datatype_get_extent(send_type);
+ recv_chunk = smpi_datatype_get_extent(recv_type);
send_chunk *= send_count;
recv_chunk *= recv_count;
src = (rank - i + num_procs) % num_procs;
dst = (rank + i) % num_procs;
- MPI_Barrier(comm);
- MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
+ smpi_mpi_barrier(comm);
+ smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
src, tag, comm, &s);
}
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
* Function: alltoall_ring
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(send_type, &send_chunk);
- MPI_Type_extent(recv_type, &recv_chunk);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ send_chunk = smpi_datatype_get_extent(send_type);
+ recv_chunk = smpi_datatype_get_extent(recv_type);
send_chunk *= send_count;
recv_chunk *= recv_count;
- MPI_Barrier(comm);
+ smpi_mpi_barrier(comm);
for (i = 0; i < num_procs; i++) {
src = (rank - i + num_procs) % num_procs;
dst = (rank + i) % num_procs;
- MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
+ smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
src, tag, comm, &s);
}
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
* Function: alltoall_ring
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(send_type, &send_chunk);
- MPI_Type_extent(recv_type, &recv_chunk);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ send_chunk = smpi_datatype_get_extent(send_type);
+ recv_chunk = smpi_datatype_get_extent(recv_type);
send_chunk *= send_count;
recv_chunk *= recv_count;
src = (rank - i + num_procs) % num_procs;
dst = (rank + i) % num_procs;
- MPI_Sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
+ smpi_mpi_sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
src, tag, comm, &s);
}
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
void *recv_buff, int recv_count,
MPI_Datatype recv_type, MPI_Comm comm)
{
- int i, rank, size, nreqs, err, src, dst, tag = 101;
+ int i, rank, size, nreqs, src, dst, tag = 101;
char *psnd;
char *prcv;
MPI_Aint sndinc;
MPI_Status s, *statuses;
- MPI_Comm_size(comm, &size);
- MPI_Comm_rank(comm, &rank);
- MPI_Type_extent(send_type, &sndinc);
- MPI_Type_extent(recv_type, &rcvinc);
+ size = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
+ sndinc = smpi_datatype_get_extent(send_type);
+ rcvinc = smpi_datatype_get_extent(recv_type);
sndinc *= send_count;
rcvinc *= recv_count;
psnd = ((char *) send_buff) + (rank * sndinc);
prcv = ((char *) recv_buff) + (rank * rcvinc);
- MPI_Sendrecv(psnd, send_count, send_type, rank, tag,
+ smpi_mpi_sendrecv(psnd, send_count, send_type, rank, tag,
prcv, recv_count, recv_type, rank, tag, comm, &s);
continue;
if (dst == rank)
continue;
- MPI_Recv_init(prcv + (src * rcvinc), recv_count, recv_type, src,
- tag, comm, preq++);
- MPI_Send_init(psnd + (dst * sndinc), send_count, send_type, dst,
- tag, comm, qreq++);
+ *(preq++) = smpi_mpi_recv_init(prcv + (src * rcvinc), recv_count, recv_type, src,
+ tag, comm);
+ *(qreq++) = smpi_mpi_send_init(psnd + (dst * sndinc), send_count, send_type, dst,
+ tag, comm);
}
/* Start all the requests. */
- err = MPI_Startall(nreqs, req);
+ smpi_mpi_startall(nreqs, req);
/* Wait for them all. */
- err = MPI_Waitall(nreqs, req, statuses);
-
- if (err != MPI_SUCCESS) {
- if (req)
- free((char *) req);
- return err;
- }
+ smpi_mpi_waitall(nreqs, req, statuses);
for (i = 0, preq = req; i < nreqs; ++i, ++preq) {
- err = MPI_Request_free(preq);
- if (err != MPI_SUCCESS) {
- if (req)
- free((char *) req);
- if (statuses)
- free(statuses);
- return err;
- }
+ smpi_mpi_request_free(preq);
}
/* All done */
-#include "colls.h"
+#include "colls_private.h"
int bcast_NTSB_segment_size_in_byte = 8192;
MPI_Status *recv_status_array;
MPI_Aint extent;
- MPI_Type_extent(datatype, &extent);
+ extent = smpi_datatype_get_extent(datatype);
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- MPI_Comm_size(MPI_COMM_WORLD, &size);
+ rank = smpi_comm_rank(MPI_COMM_WORLD);
+ size = smpi_comm_size(MPI_COMM_WORLD);
/* source node and destination nodes (same through out the functions) */
int from = (rank - 1) / 2;
/* if root is not zero send to rank zero first */
if (root != 0) {
if (rank == root) {
- MPI_Send(buf, count, datatype, 0, tag, comm);
+ smpi_mpi_send(buf, count, datatype, 0, tag, comm);
} else if (rank == 0) {
- MPI_Recv(buf, count, datatype, root, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status);
}
}
if (rank == 0) {
/* case root has only a left child */
if (to_right == -1) {
- MPI_Send(buf, count, datatype, to_left, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_left, tag, comm);
}
/* case root has both left and right children */
else {
- MPI_Send(buf, count, datatype, to_left, tag, comm);
- MPI_Send(buf, count, datatype, to_right, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_left, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_right, tag, comm);
}
}
/* case: leaf ==> receive only */
else if (to_left == -1) {
- MPI_Recv(buf, count, datatype, from, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, from, tag, comm, &status);
}
/* case: intermidiate node with only left child ==> relay message */
else if (to_right == -1) {
- MPI_Recv(buf, count, datatype, from, tag, comm, &status);
- MPI_Send(buf, count, datatype, to_left, tag, comm);
+ smpi_mpi_recv(buf, count, datatype, from, tag, comm, &status);
+ smpi_mpi_send(buf, count, datatype, to_left, tag, comm);
}
/* case: intermidiate node with both left and right children ==> relay message */
else {
- MPI_Recv(buf, count, datatype, from, tag, comm, &status);
- MPI_Send(buf, count, datatype, to_left, tag, comm);
- MPI_Send(buf, count, datatype, to_right, tag, comm);
+ smpi_mpi_recv(buf, count, datatype, from, tag, comm, &status);
+ smpi_mpi_send(buf, count, datatype, to_left, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_right, tag, comm);
}
return MPI_SUCCESS;
}
else {
send_request_array =
- (MPI_Request *) malloc(2 * (size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc(2 * (size + pipe_length) * sizeof(MPI_Request));
recv_request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
send_status_array =
- (MPI_Status *) malloc(2 * (size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc(2 * (size + pipe_length) * sizeof(MPI_Status));
recv_status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
/* case root has only a left child */
if (to_right == -1) {
for (i = 0; i < pipe_length; i++) {
- MPI_Isend((char *) buf + (i * increment), segment, datatype, to_left,
- tag + i, comm, &send_request_array[i]);
+ send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_left,
+ tag + i, comm);
}
- MPI_Waitall((pipe_length), send_request_array, send_status_array);
+ smpi_mpi_waitall((pipe_length), send_request_array, send_status_array);
}
/* case root has both left and right children */
else {
for (i = 0; i < pipe_length; i++) {
- MPI_Isend((char *) buf + (i * increment), segment, datatype, to_left,
- tag + i, comm, &send_request_array[i]);
- MPI_Isend((char *) buf + (i * increment), segment, datatype, to_right,
- tag + i, comm, &send_request_array[i + pipe_length]);
+ send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_left,
+ tag + i, comm);
+ send_request_array[i + pipe_length] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_right,
+ tag + i, comm);
}
- MPI_Waitall((2 * pipe_length), send_request_array, send_status_array);
+ smpi_mpi_waitall((2 * pipe_length), send_request_array, send_status_array);
}
}
/* case: leaf ==> receive only */
else if (to_left == -1) {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) buf + (i * increment), segment, datatype, from,
- tag + i, comm, &recv_request_array[i]);
+ recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from,
+ tag + i, comm);
}
- MPI_Waitall((pipe_length), recv_request_array, recv_status_array);
+ smpi_mpi_waitall((pipe_length), recv_request_array, recv_status_array);
}
/* case: intermidiate node with only left child ==> relay message */
else if (to_right == -1) {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) buf + (i * increment), segment, datatype, from,
- tag + i, comm, &recv_request_array[i]);
+ recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from,
+ tag + i, comm);
}
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&recv_request_array[i], &status);
- MPI_Isend((char *) buf + (i * increment), segment, datatype, to_left,
- tag + i, comm, &send_request_array[i]);
+ smpi_mpi_wait(&recv_request_array[i], &status);
+ send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_left,
+ tag + i, comm);
}
- MPI_Waitall(pipe_length, send_request_array, send_status_array);
+ smpi_mpi_waitall(pipe_length, send_request_array, send_status_array);
}
/* case: intermidiate node with both left and right children ==> relay message */
else {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) buf + (i * increment), segment, datatype, from,
- tag + i, comm, &recv_request_array[i]);
+ recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from,
+ tag + i, comm);
}
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&recv_request_array[i], &status);
- MPI_Isend((char *) buf + (i * increment), segment, datatype, to_left,
- tag + i, comm, &send_request_array[i]);
- MPI_Isend((char *) buf + (i * increment), segment, datatype, to_right,
- tag + i, comm, &send_request_array[i + pipe_length]);
+ smpi_mpi_wait(&recv_request_array[i], &status);
+ send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_left,
+ tag + i, comm);
+ send_request_array[i + pipe_length] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to_right,
+ tag + i, comm);
}
- MPI_Waitall((2 * pipe_length), send_request_array, send_status_array);
+ smpi_mpi_waitall((2 * pipe_length), send_request_array, send_status_array);
}
free(send_request_array);
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
- MPI_Bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+ XBT_WARN("MPI_bcast_NTSB use default MPI_bcast.");
+ smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
root, comm);
}
-#include "colls.h"
+#include "colls_private.h"
static int bcast_NTSL_segment_size_in_byte = 8192;
int rank, size;
int i;
MPI_Aint extent;
- MPI_Type_extent(datatype, &extent);
+ extent = smpi_datatype_get_extent(datatype);
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- MPI_Comm_size(MPI_COMM_WORLD, &size);
+ rank = smpi_comm_rank(MPI_COMM_WORLD);
+ size = smpi_comm_size(MPI_COMM_WORLD);
/* source node and destination nodes (same through out the functions) */
int to = (rank + 1) % size;
*/
if (root != 0) {
if (rank == root) {
- MPI_Send(buf, count, datatype, 0, tag, comm);
+ smpi_mpi_send(buf, count, datatype, 0, tag, comm);
} else if (rank == 0) {
- MPI_Recv(buf, count, datatype, root, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status);
}
}
/* when a message is smaller than a block size => no pipeline */
if (count <= segment) {
if (rank == 0) {
- MPI_Send(buf, count, datatype, to, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to, tag, comm);
} else if (rank == (size - 1)) {
- MPI_Irecv(buf, count, datatype, from, tag, comm, &request);
- MPI_Wait(&request, &status);
+ request = smpi_mpi_irecv(buf, count, datatype, from, tag, comm);
+ smpi_mpi_wait(&request, &status);
} else {
- MPI_Irecv(buf, count, datatype, from, tag, comm, &request);
- MPI_Wait(&request, &status);
- MPI_Send(buf, count, datatype, to, tag, comm);
+ request = smpi_mpi_irecv(buf, count, datatype, from, tag, comm);
+ smpi_mpi_wait(&request, &status);
+ smpi_mpi_send(buf, count, datatype, to, tag, comm);
}
return MPI_SUCCESS;
}
/* pipeline bcast */
else {
send_request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
recv_request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
send_status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
recv_status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
/* root send data */
if (rank == 0) {
for (i = 0; i < pipe_length; i++) {
- MPI_Isend((char *) buf + (i * increment), segment, datatype, to,
- (tag + i), comm, &send_request_array[i]);
+ send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to,
+ (tag + i), comm);
}
- MPI_Waitall((pipe_length), send_request_array, send_status_array);
+ smpi_mpi_waitall((pipe_length), send_request_array, send_status_array);
}
/* last node only receive data */
else if (rank == (size - 1)) {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) buf + (i * increment), segment, datatype, from,
- (tag + i), comm, &recv_request_array[i]);
+ recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from,
+ (tag + i), comm);
}
- MPI_Waitall((pipe_length), recv_request_array, recv_status_array);
+ smpi_mpi_waitall((pipe_length), recv_request_array, recv_status_array);
}
/* intermediate nodes relay (receive, then send) data */
else {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) buf + (i * increment), segment, datatype, from,
- (tag + i), comm, &recv_request_array[i]);
+ recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from,
+ (tag + i), comm);
}
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&recv_request_array[i], &status);
- MPI_Isend((char *) buf + (i * increment), segment, datatype, to,
- (tag + i), comm, &send_request_array[i]);
+ smpi_mpi_wait(&recv_request_array[i], &status);
+ send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to,
+ (tag + i), comm);
}
- MPI_Waitall((pipe_length), send_request_array, send_status_array);
+ smpi_mpi_waitall((pipe_length), send_request_array, send_status_array);
}
free(send_request_array);
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
- MPI_Bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+ XBT_WARN("MPI_bcast_NTSL_Isend_nb use default MPI_bcast.");
+ smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
root, comm);
}
-#include "colls.h"
+#include "colls_private.h"
static int bcast_NTSL_segment_size_in_byte = 8192;
int rank, size;
int i;
MPI_Aint extent;
- MPI_Type_extent(datatype, &extent);
+ extent = smpi_datatype_get_extent(datatype);
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- MPI_Comm_size(MPI_COMM_WORLD, &size);
+ rank = smpi_comm_rank(MPI_COMM_WORLD);
+ size = smpi_comm_size(MPI_COMM_WORLD);
/* source node and destination nodes (same through out the functions) */
int to = (rank + 1) % size;
*/
if (root != 0) {
if (rank == root) {
- MPI_Send(buf, count, datatype, 0, tag, comm);
+ smpi_mpi_send(buf, count, datatype, 0, tag, comm);
} else if (rank == 0) {
- MPI_Recv(buf, count, datatype, root, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status);
}
}
/* when a message is smaller than a block size => no pipeline */
if (count <= segment) {
if (rank == 0) {
- MPI_Send(buf, count, datatype, to, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to, tag, comm);
} else if (rank == (size - 1)) {
- MPI_Irecv(buf, count, datatype, from, tag, comm, &request);
- MPI_Wait(&request, &status);
+ request = smpi_mpi_irecv(buf, count, datatype, from, tag, comm);
+ smpi_mpi_wait(&request, &status);
} else {
- MPI_Irecv(buf, count, datatype, from, tag, comm, &request);
- MPI_Wait(&request, &status);
- MPI_Send(buf, count, datatype, to, tag, comm);
+ request = smpi_mpi_irecv(buf, count, datatype, from, tag, comm);
+ smpi_mpi_wait(&request, &status);
+ smpi_mpi_send(buf, count, datatype, to, tag, comm);
}
return MPI_SUCCESS;
}
/* pipeline bcast */
else {
send_request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
recv_request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
send_status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
recv_status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
/* root send data */
if (rank == 0) {
for (i = 0; i < pipe_length; i++) {
- MPI_Isend((char *) buf + (i * increment), segment, datatype, to,
- (tag + i), comm, &send_request_array[i]);
+ send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to,
+ (tag + i), comm);
}
- MPI_Waitall((pipe_length), send_request_array, send_status_array);
+ smpi_mpi_waitall((pipe_length), send_request_array, send_status_array);
}
/* last node only receive data */
else if (rank == (size - 1)) {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) buf + (i * increment), segment, datatype, from,
- (tag + i), comm, &recv_request_array[i]);
+ recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from,
+ (tag + i), comm);
}
- MPI_Waitall((pipe_length), recv_request_array, recv_status_array);
+ smpi_mpi_waitall((pipe_length), recv_request_array, recv_status_array);
}
/* intermediate nodes relay (receive, then send) data */
else {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) buf + (i * increment), segment, datatype, from,
- (tag + i), comm, &recv_request_array[i]);
+ recv_request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype, from,
+ (tag + i), comm);
}
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&recv_request_array[i], &status);
- MPI_Isend((char *) buf + (i * increment), segment, datatype, to,
- (tag + i), comm, &send_request_array[i]);
+ smpi_mpi_wait(&recv_request_array[i], &status);
+ send_request_array[i] = smpi_mpi_isend((char *) buf + (i * increment), segment, datatype, to,
+ (tag + i), comm);
}
- MPI_Waitall((pipe_length), send_request_array, send_status_array);
+ smpi_mpi_waitall((pipe_length), send_request_array, send_status_array);
}
free(send_request_array);
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
- MPI_Bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+ XBT_WARN("MPI_bcast_arrival_NTSL use default MPI_bcast.");
+ smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
root, comm);
}
-#include "colls.h"
+#include "colls_private.h"
#ifndef NUM_CORE
#define NUM_CORE 8
#endif
int rank, size;
int i;
MPI_Aint extent;
- MPI_Type_extent(datatype, &extent);
+ extent = smpi_datatype_get_extent(datatype);
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &size);
+ rank = smpi_comm_rank(comm);
+ size = smpi_comm_size(comm);
int segment = bcast_SMP_binary_segment_byte / extent;
int pipe_length = count / segment;
// if root is not zero send to rank zero first
if (root != 0) {
if (rank == root)
- MPI_Send(buf, count, datatype, 0, tag, comm);
+ smpi_mpi_send(buf, count, datatype, 0, tag, comm);
else if (rank == 0)
- MPI_Recv(buf, count, datatype, root, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status);
}
// when a message is smaller than a block size => no pipeline
if (count <= segment) {
if (rank == 0) {
//printf("node %d left %d right %d\n",rank,to_inter_left,to_inter_right);
if (to_inter_left < size)
- MPI_Send(buf, count, datatype, to_inter_left, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_inter_left, tag, comm);
if (to_inter_right < size)
- MPI_Send(buf, count, datatype, to_inter_right, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_inter_right, tag, comm);
if ((to_intra_left - base) < num_core)
- MPI_Send(buf, count, datatype, to_intra_left, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm);
if ((to_intra_right - base) < num_core)
- MPI_Send(buf, count, datatype, to_intra_right, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm);
}
// case LEAVES ROOT-of-eash-SMP
else if (to_inter_left >= size) {
//printf("node %d from %d\n",rank,from_inter);
- MPI_Irecv(buf, count, datatype, from_inter, tag, comm, &request);
- MPI_Wait(&request, &status);
+ request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm);
+ smpi_mpi_wait(&request, &status);
if ((to_intra_left - base) < num_core)
- MPI_Send(buf, count, datatype, to_intra_left, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm);
if ((to_intra_right - base) < num_core)
- MPI_Send(buf, count, datatype, to_intra_right, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm);
}
// case INTERMEDIAT ROOT-of-each-SMP
else {
//printf("node %d left %d right %d from %d\n",rank,to_inter_left,to_inter_right,from_inter);
- MPI_Irecv(buf, count, datatype, from_inter, tag, comm, &request);
- MPI_Wait(&request, &status);
- MPI_Send(buf, count, datatype, to_inter_left, tag, comm);
+ request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm);
+ smpi_mpi_wait(&request, &status);
+ smpi_mpi_send(buf, count, datatype, to_inter_left, tag, comm);
if (to_inter_right < size)
- MPI_Send(buf, count, datatype, to_inter_right, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_inter_right, tag, comm);
if ((to_intra_left - base) < num_core)
- MPI_Send(buf, count, datatype, to_intra_left, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm);
if ((to_intra_right - base) < num_core)
- MPI_Send(buf, count, datatype, to_intra_right, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm);
}
}
// case non ROOT-of-each-SMP
else {
// case leaves
if ((to_intra_left - base) >= num_core) {
- MPI_Irecv(buf, count, datatype, from_intra, tag, comm, &request);
- MPI_Wait(&request, &status);
+ request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm);
+ smpi_mpi_wait(&request, &status);
}
// case intermediate
else {
- MPI_Irecv(buf, count, datatype, from_intra, tag, comm, &request);
- MPI_Wait(&request, &status);
- MPI_Send(buf, count, datatype, to_intra_left, tag, comm);
+ request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm);
+ smpi_mpi_wait(&request, &status);
+ smpi_mpi_send(buf, count, datatype, to_intra_left, tag, comm);
if ((to_intra_right - base) < num_core)
- MPI_Send(buf, count, datatype, to_intra_right, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_intra_right, tag, comm);
}
}
// pipeline bcast
else {
request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
// case ROOT-of-each-SMP
if (rank % NUM_CORE == 0) {
for (i = 0; i < pipe_length; i++) {
//printf("node %d left %d right %d\n",rank,to_inter_left,to_inter_right);
if (to_inter_left < size)
- MPI_Send((char *) buf + (i * increment), segment, datatype,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
to_inter_left, (tag + i), comm);
if (to_inter_right < size)
- MPI_Send((char *) buf + (i * increment), segment, datatype,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
to_inter_right, (tag + i), comm);
if ((to_intra_left - base) < num_core)
- MPI_Send((char *) buf + (i * increment), segment, datatype,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
to_intra_left, (tag + i), comm);
if ((to_intra_right - base) < num_core)
- MPI_Send((char *) buf + (i * increment), segment, datatype,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
to_intra_right, (tag + i), comm);
}
}
else if (to_inter_left >= size) {
//printf("node %d from %d\n",rank,from_inter);
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) buf + (i * increment), segment, datatype,
- from_inter, (tag + i), comm, &request_array[i]);
+ request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
+ from_inter, (tag + i), comm);
}
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&request_array[i], &status);
+ smpi_mpi_wait(&request_array[i], &status);
if ((to_intra_left - base) < num_core)
- MPI_Send((char *) buf + (i * increment), segment, datatype,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
to_intra_left, (tag + i), comm);
if ((to_intra_right - base) < num_core)
- MPI_Send((char *) buf + (i * increment), segment, datatype,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
to_intra_right, (tag + i), comm);
}
}
else {
//printf("node %d left %d right %d from %d\n",rank,to_inter_left,to_inter_right,from_inter);
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) buf + (i * increment), segment, datatype,
- from_inter, (tag + i), comm, &request_array[i]);
+ request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
+ from_inter, (tag + i), comm);
}
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&request_array[i], &status);
- MPI_Send((char *) buf + (i * increment), segment, datatype,
+ smpi_mpi_wait(&request_array[i], &status);
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
to_inter_left, (tag + i), comm);
if (to_inter_right < size)
- MPI_Send((char *) buf + (i * increment), segment, datatype,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
to_inter_right, (tag + i), comm);
if ((to_intra_left - base) < num_core)
- MPI_Send((char *) buf + (i * increment), segment, datatype,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
to_intra_left, (tag + i), comm);
if ((to_intra_right - base) < num_core)
- MPI_Send((char *) buf + (i * increment), segment, datatype,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
to_intra_right, (tag + i), comm);
}
}
// case leaves
if ((to_intra_left - base) >= num_core) {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) buf + (i * increment), segment, datatype,
- from_intra, (tag + i), comm, &request_array[i]);
+ request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
+ from_intra, (tag + i), comm);
}
- MPI_Waitall((pipe_length), request_array, status_array);
+ smpi_mpi_waitall((pipe_length), request_array, status_array);
}
// case intermediate
else {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) buf + (i * increment), segment, datatype,
- from_intra, (tag + i), comm, &request_array[i]);
+ request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
+ from_intra, (tag + i), comm);
}
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&request_array[i], &status);
- MPI_Send((char *) buf + (i * increment), segment, datatype,
+ smpi_mpi_wait(&request_array[i], &status);
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
to_intra_left, (tag + i), comm);
if ((to_intra_right - base) < num_core)
- MPI_Send((char *) buf + (i * increment), segment, datatype,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype,
to_intra_right, (tag + i), comm);
}
}
// when count is not divisible by block size, use default BCAST for the remainder
if ((remainder != 0) && (count > segment)) {
- MPI_Bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+ XBT_WARN("MPI_bcast_SMP_binary use default MPI_bcast.");
+ smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
root, comm);
}
-#include "colls.h"
+#include "colls_private.h"
#ifndef NUM_CORE
#define NUM_CORE 8
#endif
MPI_Status status;
int tag = 50;
- MPI_Comm_size(comm, &size);
- MPI_Comm_rank(comm, &rank);
+ size = smpi_comm_size(comm);
+ rank = smpi_comm_rank(comm);
int to_intra, to_inter;
int from_intra, from_inter;
// if root is not zero send to rank zero first
if (root != 0) {
if (rank == root)
- MPI_Send(buf, count, datatype, 0, tag, comm);
+ smpi_mpi_send(buf, count, datatype, 0, tag, comm);
else if (rank == 0)
- MPI_Recv(buf, count, datatype, root, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status);
}
//FIRST STEP node 0 send to every root-of-each-SMP with binomial tree
if (inter_rank & mask) {
from_inter = (inter_rank - mask) * NUM_CORE;
//printf("Node %d recv from node %d when mask is %d\n", rank, from_inter, mask);
- MPI_Recv(buf, count, datatype, from_inter, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, from_inter, tag, comm, &status);
break;
}
mask <<= 1;
to_inter = (inter_rank + mask) * NUM_CORE;
if (to_inter < size) {
//printf("Node %d send to node %d when mask is %d\n", rank, to_inter, mask);
- MPI_Send(buf, count, datatype, to_inter, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_inter, tag, comm);
}
}
mask >>= 1;
if (intra_rank & mask) {
from_intra = base + (intra_rank - mask);
//printf("Node %d recv from node %d when mask is %d\n", rank, from_inter, mask);
- MPI_Recv(buf, count, datatype, from_intra, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, from_intra, tag, comm, &status);
break;
}
mask <<= 1;
to_intra = base + (intra_rank + mask);
if (to_intra < size) {
//printf("Node %d send to node %d when mask is %d\n", rank, to_inter, mask);
- MPI_Send(buf, count, datatype, to_intra, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_intra, tag, comm);
}
}
mask >>= 1;
-#include "colls.h"
+#include "colls_private.h"
#ifndef NUM_CORE
#define NUM_CORE 8
#endif
int rank, size;
int i;
MPI_Aint extent;
- MPI_Type_extent(datatype, &extent);
+ extent = smpi_datatype_get_extent(datatype);
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &size);
+ rank = smpi_comm_rank(comm);
+ size = smpi_comm_size(comm);
int segment = bcast_SMP_linear_segment_byte / extent;
int pipe_length = count / segment;
// call native when MPI communication size is too small
if (size <= NUM_CORE) {
- return MPI_Bcast(buf, count, datatype, root, comm);
+ XBT_WARN("MPI_bcast_SMP_linear use default MPI_bcast.");
+ smpi_mpi_bcast(buf, count, datatype, root, comm);
+ return MPI_SUCCESS;
}
// if root is not zero send to rank zero first
if (root != 0) {
if (rank == root)
- MPI_Send(buf, count, datatype, 0, tag, comm);
+ smpi_mpi_send(buf, count, datatype, 0, tag, comm);
else if (rank == 0)
- MPI_Recv(buf, count, datatype, root, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status);
}
// when a message is smaller than a block size => no pipeline
if (count <= segment) {
// case ROOT
if (rank == 0) {
- MPI_Send(buf, count, datatype, to_inter, tag, comm);
- MPI_Send(buf, count, datatype, to_intra, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_inter, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_intra, tag, comm);
}
// case last ROOT of each SMP
else if (rank == (((size - 1) / NUM_CORE) * NUM_CORE)) {
- MPI_Irecv(buf, count, datatype, from_inter, tag, comm, &request);
- MPI_Wait(&request, &status);
- MPI_Send(buf, count, datatype, to_intra, tag, comm);
+ request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm);
+ smpi_mpi_wait(&request, &status);
+ smpi_mpi_send(buf, count, datatype, to_intra, tag, comm);
}
// case intermediate ROOT of each SMP
else if (rank % NUM_CORE == 0) {
- MPI_Irecv(buf, count, datatype, from_inter, tag, comm, &request);
- MPI_Wait(&request, &status);
- MPI_Send(buf, count, datatype, to_inter, tag, comm);
- MPI_Send(buf, count, datatype, to_intra, tag, comm);
+ request = smpi_mpi_irecv(buf, count, datatype, from_inter, tag, comm);
+ smpi_mpi_wait(&request, &status);
+ smpi_mpi_send(buf, count, datatype, to_inter, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_intra, tag, comm);
}
// case last non-ROOT of each SMP
else if (((rank + 1) % NUM_CORE == 0) || (rank == (size - 1))) {
- MPI_Irecv(buf, count, datatype, from_intra, tag, comm, &request);
- MPI_Wait(&request, &status);
+ request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm);
+ smpi_mpi_wait(&request, &status);
}
// case intermediate non-ROOT of each SMP
else {
- MPI_Irecv(buf, count, datatype, from_intra, tag, comm, &request);
- MPI_Wait(&request, &status);
- MPI_Send(buf, count, datatype, to_intra, tag, comm);
+ request = smpi_mpi_irecv(buf, count, datatype, from_intra, tag, comm);
+ smpi_mpi_wait(&request, &status);
+ smpi_mpi_send(buf, count, datatype, to_intra, tag, comm);
}
return MPI_SUCCESS;
}
// pipeline bcast
else {
request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
// case ROOT of each SMP
if (rank % NUM_CORE == 0) {
// case real root
if (rank == 0) {
for (i = 0; i < pipe_length; i++) {
- MPI_Send((char *) buf + (i * increment), segment, datatype, to_inter,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_inter,
(tag + i), comm);
- MPI_Send((char *) buf + (i * increment), segment, datatype, to_intra,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra,
(tag + i), comm);
}
}
// case last ROOT of each SMP
else if (rank == (((size - 1) / NUM_CORE) * NUM_CORE)) {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) buf + (i * increment), segment, datatype,
- from_inter, (tag + i), comm, &request_array[i]);
+ request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
+ from_inter, (tag + i), comm);
}
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&request_array[i], &status);
- MPI_Send((char *) buf + (i * increment), segment, datatype, to_intra,
+ smpi_mpi_wait(&request_array[i], &status);
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra,
(tag + i), comm);
}
}
// case intermediate ROOT of each SMP
else {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) buf + (i * increment), segment, datatype,
- from_inter, (tag + i), comm, &request_array[i]);
+ request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
+ from_inter, (tag + i), comm);
}
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&request_array[i], &status);
- MPI_Send((char *) buf + (i * increment), segment, datatype, to_inter,
+ smpi_mpi_wait(&request_array[i], &status);
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_inter,
(tag + i), comm);
- MPI_Send((char *) buf + (i * increment), segment, datatype, to_intra,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra,
(tag + i), comm);
}
}
} else { // case last non-ROOT of each SMP
if (((rank + 1) % NUM_CORE == 0) || (rank == (size - 1))) {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) buf + (i * increment), segment, datatype,
- from_intra, (tag + i), comm, &request_array[i]);
+ request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
+ from_intra, (tag + i), comm);
}
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&request_array[i], &status);
+ smpi_mpi_wait(&request_array[i], &status);
}
}
// case intermediate non-ROOT of each SMP
else {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) buf + (i * increment), segment, datatype,
- from_intra, (tag + i), comm, &request_array[i]);
+ request_array[i] = smpi_mpi_irecv((char *) buf + (i * increment), segment, datatype,
+ from_intra, (tag + i), comm);
}
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&request_array[i], &status);
- MPI_Send((char *) buf + (i * increment), segment, datatype, to_intra,
+ smpi_mpi_wait(&request_array[i], &status);
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_intra,
(tag + i), comm);
}
}
// when count is not divisible by block size, use default BCAST for the remainder
if ((remainder != 0) && (count > segment)) {
- MPI_Bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+ XBT_WARN("MPI_bcast_SMP_linear use default MPI_bcast.");
+ smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
root, comm);
}
- return 1;
+ return MPI_SUCCESS;
}
-#include "colls.h"
+#include "colls_private.h"
int binary_pipeline_bcast_tree_height = 10;
int binary_pipeline_bcast_send_to[2][128] = {
int i;
MPI_Aint extent;
- MPI_Type_extent(datatype, &extent);
+ extent = smpi_datatype_get_extent(datatype);
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- MPI_Comm_size(MPI_COMM_WORLD, &size);
+ rank = smpi_comm_rank(MPI_COMM_WORLD);
+ size = smpi_comm_size(MPI_COMM_WORLD);
/* source node and destination nodes (same through out the functions) */
int to_left = binary_pipeline_bcast_send_to[0][rank];
/* if root is not zero send to rank zero first */
if (root != 0) {
if (rank == root) {
- MPI_Send(buf, count, datatype, 0, tag, comm);
+ smpi_mpi_send(buf, count, datatype, 0, tag, comm);
} else if (rank == 0) {
- MPI_Recv(buf, count, datatype, root, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status);
}
}
if (rank == 0) {
/* case root has only a left child */
if (to_right == -1) {
- MPI_Send(buf, count, datatype, to_left, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_left, tag, comm);
}
/* case root has both left and right children */
else {
- MPI_Send(buf, count, datatype, to_left, tag, comm);
- MPI_Send(buf, count, datatype, to_right, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_left, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_right, tag, comm);
}
}
/* case: leaf ==> receive only */
else if (to_left == -1) {
- MPI_Recv(buf, count, datatype, from, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, from, tag, comm, &status);
}
/* case: intermidiate node with only left child ==> relay message */
else if (to_right == -1) {
- MPI_Recv(buf, count, datatype, from, tag, comm, &status);
- MPI_Send(buf, count, datatype, to_left, tag, comm);
+ smpi_mpi_recv(buf, count, datatype, from, tag, comm, &status);
+ smpi_mpi_send(buf, count, datatype, to_left, tag, comm);
}
/* case: intermidiate node with both left and right children ==> relay message */
else {
- MPI_Recv(buf, count, datatype, from, tag, comm, &status);
- MPI_Send(buf, count, datatype, to_left, tag, comm);
- MPI_Send(buf, count, datatype, to_right, tag, comm);
+ smpi_mpi_recv(buf, count, datatype, from, tag, comm, &status);
+ smpi_mpi_send(buf, count, datatype, to_left, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to_right, tag, comm);
}
return MPI_SUCCESS;
}
/* case root has only a left child */
if (to_right == -1) {
for (i = 0; i < pipe_length; i++) {
- MPI_Send((char *) buf + (i * increment), segment, datatype, to_left,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_left,
tag + i, comm);
}
}
/* case root has both left and right children */
else {
for (i = 0; i < pipe_length; i++) {
- MPI_Send((char *) buf + (i * increment), segment, datatype, to_left,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_left,
tag + i, comm);
- MPI_Send((char *) buf + (i * increment), segment, datatype, to_right,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_right,
tag + i, comm);
}
}
/* case: leaf ==> receive only */
else if (to_left == -1) {
for (i = 0; i < pipe_length; i++) {
- MPI_Recv((char *) buf + (i * increment), segment, datatype, from,
+ smpi_mpi_recv((char *) buf + (i * increment), segment, datatype, from,
tag + i, comm, &status);
}
}
/* case: intermidiate node with only left child ==> relay message */
else if (to_right == -1) {
for (i = 0; i < pipe_length; i++) {
- MPI_Recv((char *) buf + (i * increment), segment, datatype, from,
+ smpi_mpi_recv((char *) buf + (i * increment), segment, datatype, from,
tag + i, comm, &status);
- MPI_Send((char *) buf + (i * increment), segment, datatype, to_left,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_left,
tag + i, comm);
}
}
/* case: intermidiate node with both left and right children ==> relay message */
else {
for (i = 0; i < pipe_length; i++) {
- MPI_Recv((char *) buf + (i * increment), segment, datatype, from,
+ smpi_mpi_recv((char *) buf + (i * increment), segment, datatype, from,
tag + i, comm, &status);
- MPI_Send((char *) buf + (i * increment), segment, datatype, to_left,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_left,
tag + i, comm);
- MPI_Send((char *) buf + (i * increment), segment, datatype, to_right,
+ smpi_mpi_send((char *) buf + (i * increment), segment, datatype, to_right,
tag + i, comm);
}
}
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
- MPI_Bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+ XBT_WARN("MPI_bcast_TSB use default MPI_bcast.");
+ smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
root, comm);
}
-#include "colls.h"
+#include "colls_private.h"
static int bcast_NTSL_segment_size_in_byte = 8192;
char temp_buf[MAX_NODE];
MPI_Aint extent;
- MPI_Type_extent(datatype, &extent);
+ extent = smpi_datatype_get_extent(datatype);
/* destination */
int to;
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- MPI_Comm_size(MPI_COMM_WORLD, &size);
+ rank = smpi_comm_rank(MPI_COMM_WORLD);
+ size = smpi_comm_size(MPI_COMM_WORLD);
/* segment is segment size in number of elements (not bytes) */
*/
if (root != 0) {
if (rank == root) {
- MPI_Send(buf, count, datatype, 0, tag, comm);
+ smpi_mpi_send(buf, count, datatype, 0, tag, comm);
} else if (rank == 0) {
- MPI_Recv(buf, count, datatype, root, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status);
}
}
// for (j=0;j<1000;j++) {
for (i = 1; i < size; i++) {
if (already_sent[i] == 0)
- MPI_Iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i],
+ smpi_mpi_iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i],
MPI_STATUSES_IGNORE);
}
//}
/* message arrive */
if ((flag_array[i] == 1) && (already_sent[i] == 0)) {
- MPI_Recv(temp_buf, 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
+ smpi_mpi_recv(temp_buf, 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
header_buf[header_index] = i;
header_index++;
sent_count++;
if (header_index != 0) {
header_buf[header_index] = -1;
to = header_buf[0];
- MPI_Send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm);
- MPI_Send(buf, count, datatype, to, tag, comm);
+ smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to, tag, comm);
}
/* randomly MPI_Send to one */
if (already_sent[i] == 0) {
header_buf[0] = i;
header_buf[1] = -1;
- MPI_Send(header_buf, HEADER_SIZE, MPI_INT, i, tag, comm);
- MPI_Send(buf, count, datatype, i, tag, comm);
+ smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, i, tag, comm);
+ smpi_mpi_send(buf, count, datatype, i, tag, comm);
already_sent[i] = 1;
sent_count++;
break;
else {
/* send 1-byte message to root */
- MPI_Send(temp_buf, 1, MPI_CHAR, 0, tag, comm);
+ smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm);
/* wait for header and data, forward when required */
- MPI_Recv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm,
+ smpi_mpi_recv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm,
&status);
- MPI_Recv(buf, count, datatype, MPI_ANY_SOURCE, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, MPI_ANY_SOURCE, tag, comm, &status);
/* search for where it is */
int myordering = 0;
/* send header followed by data */
if (header_buf[myordering + 1] != -1) {
- MPI_Send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1],
+ smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1],
tag, comm);
- MPI_Send(buf, count, datatype, header_buf[myordering + 1], tag, comm);
+ smpi_mpi_send(buf, count, datatype, header_buf[myordering + 1], tag, comm);
}
}
}
/* pipeline bcast */
else {
send_request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
recv_request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
send_status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
recv_status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
if (rank == 0) {
sent_count = 0;
for (k = 0; k < 3; k++) {
for (i = 1; i < size; i++) {
if ((already_sent[i] == 0) && (will_send[i] == 0)) {
- MPI_Iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i],
+ smpi_mpi_iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i],
&temp_status_array[i]);
if (flag_array[i] == 1) {
will_send[i] = 1;
- MPI_Recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD,
+ smpi_mpi_recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD,
&status);
i = 1;
}
//start = MPI_Wtime();
/* send header */
- MPI_Send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm);
+ smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm);
//total = MPI_Wtime() - start;
//total *= 1000;
if (0 == 1) {
//if (header_index == 1) {
- MPI_Send(buf, count, datatype, to, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to, tag, comm);
}
/* send data - pipeline */
else {
for (i = 0; i < pipe_length; i++) {
- MPI_Send((char *)buf + (i * increment), segment, datatype, to, tag, comm);
+ smpi_mpi_send((char *)buf + (i * increment), segment, datatype, to, tag, comm);
}
- //MPI_Waitall((pipe_length), send_request_array, send_status_array);
+ //smpi_mpi_waitall((pipe_length), send_request_array, send_status_array);
}
//total = MPI_Wtime() - start;
//total *= 1000;
to = i;
//start = MPI_Wtime();
- MPI_Send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm);
+ smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm);
/* still need to chop data so that we can use the same non-root code */
for (j = 0; j < pipe_length; j++) {
- MPI_Send((char *)buf + (j * increment), segment, datatype, to, tag,
+ smpi_mpi_send((char *)buf + (j * increment), segment, datatype, to, tag,
comm);
}
- //MPI_Send(buf,count,datatype,to,tag,comm);
- //MPI_Wait(&request,MPI_STATUS_IGNORE);
+ //smpi_mpi_send(buf,count,datatype,to,tag,comm);
+ //smpi_mpi_wait(&request,MPI_STATUS_IGNORE);
//total = MPI_Wtime() - start;
//total *= 1000;
/* probe before exit in case there are messages to recv */
for (i = 1; i < size; i++) {
- MPI_Iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i],
+ smpi_mpi_iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i],
&temp_status_array[i]);
if (flag_array[i] == 1)
- MPI_Recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
+ smpi_mpi_recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
}
}
else {
/* if root already send a message to this node, don't send one-byte message */
- MPI_Iprobe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[0], &status);
+ smpi_mpi_iprobe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[0], &status);
/* send 1-byte message to root */
if (flag_array[0] == 0)
- MPI_Send(temp_buf, 1, MPI_CHAR, 0, tag, comm);
+ smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm);
/* wait for header forward when required */
- MPI_Irecv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm,
- &request);
- MPI_Wait(&request, MPI_STATUS_IGNORE);
+ request = smpi_mpi_irecv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm);
+ smpi_mpi_wait(&request, MPI_STATUS_IGNORE);
/* search for where it is */
int myordering = 0;
/* send header when required */
if (header_buf[myordering + 1] != -1) {
- MPI_Send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1],
+ smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1],
tag, comm);
}
if (0 == -1) {
//if (header_buf[1] == -1) {
- MPI_Irecv(buf, count, datatype, 0, tag, comm, &request);
- MPI_Wait(&request, MPI_STATUS_IGNORE);
+ request = smpi_mpi_irecv(buf, count, datatype, 0, tag, comm);
+ smpi_mpi_wait(&request, MPI_STATUS_IGNORE);
//printf("\t\tnode %d ordering = %d receive data from root\n",rank,myordering);
} else {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *)buf + (i * increment), segment, datatype, MPI_ANY_SOURCE,
- tag, comm, &recv_request_array[i]);
+ recv_request_array[i] = smpi_mpi_irecv((char *)buf + (i * increment), segment, datatype, MPI_ANY_SOURCE,
+ tag, comm);
}
}
/* send data */
if (header_buf[myordering + 1] != -1) {
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&recv_request_array[i], MPI_STATUS_IGNORE);
- MPI_Isend((char *)buf + (i * increment), segment, datatype,
- header_buf[myordering + 1], tag, comm,
- &send_request_array[i]);
+ smpi_mpi_wait(&recv_request_array[i], MPI_STATUS_IGNORE);
+ send_request_array[i] = smpi_mpi_isend((char *)buf + (i * increment), segment, datatype,
+ header_buf[myordering + 1], tag, comm);
}
- MPI_Waitall((pipe_length), send_request_array, send_status_array);
+ smpi_mpi_waitall((pipe_length), send_request_array, send_status_array);
}
}
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
- MPI_Bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
+ XBT_WARN("MPI_bcast_arrival_nb use default MPI_bcast.");
+ smpi_mpi_bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
}
return MPI_SUCCESS;
-#include "colls.h"
+#include "colls_private.h"
int bcast_arrival_pattern_aware_wait_segment_size_in_byte = 8192;
int header_size = BCAST_ARRIVAL_PATTERN_AWARE_HEADER_SIZE;
MPI_Aint extent;
- MPI_Type_extent(datatype, &extent);
+ extent = smpi_datatype_get_extent(datatype);
/* source and destination */
int to, from;
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- MPI_Comm_size(MPI_COMM_WORLD, &size);
+ rank = smpi_comm_rank(MPI_COMM_WORLD);
+ size = smpi_comm_size(MPI_COMM_WORLD);
/* segment is segment size in number of elements (not bytes) */
*/
if (root != 0) {
if (rank == root) {
- MPI_Send(buf, count, datatype, 0, tag, comm);
+ smpi_mpi_send(buf, count, datatype, 0, tag, comm);
} else if (rank == 0) {
- MPI_Recv(buf, count, datatype, root, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status);
}
}
/* start pipeline bcast */
send_request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
recv_request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
send_status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
recv_status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
/* root */
if (rank == 0) {
for (k = 0; k < 3; k++) {
for (i = 1; i < size; i++) {
if ((already_sent[i] == 0) && (will_send[i] == 0)) {
- MPI_Iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i],
+ smpi_mpi_iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i],
&temp_status_array[i]);
if (flag_array[i] == 1) {
will_send[i] = 1;
- MPI_Recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD,
+ smpi_mpi_recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD,
&status);
i = 0;
}
to = header_buf[0];
/* send header */
- MPI_Send(header_buf, header_size, MPI_INT, to, tag, comm);
+ smpi_mpi_send(header_buf, header_size, MPI_INT, to, tag, comm);
/* send data - pipeline */
for (i = 0; i < pipe_length; i++) {
- MPI_Isend((char *)buf + (i * increment), segment, datatype, to, tag, comm,
- &send_request_array[i]);
+ send_request_array[i] = smpi_mpi_isend((char *)buf + (i * increment), segment, datatype, to, tag, comm);
}
- MPI_Waitall((pipe_length), send_request_array, send_status_array);
+ smpi_mpi_waitall((pipe_length), send_request_array, send_status_array);
}
header_buf[1] = -1;
to = i;
- MPI_Send(header_buf, header_size, MPI_INT, to, tag, comm);
+ smpi_mpi_send(header_buf, header_size, MPI_INT, to, tag, comm);
/* still need to chop data so that we can use the same non-root code */
for (j = 0; j < pipe_length; j++) {
- MPI_Send((char *)buf + (j * increment), segment, datatype, to, tag, comm);
+ smpi_mpi_send((char *)buf + (j * increment), segment, datatype, to, tag, comm);
}
}
}
else {
/* send 1-byte message to root */
- MPI_Send(temp_buf, 1, MPI_CHAR, 0, tag, comm);
+ smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm);
/* wait for header forward when required */
- MPI_Irecv(header_buf, header_size, MPI_INT, MPI_ANY_SOURCE, tag, comm,
- &request);
- MPI_Wait(&request, MPI_STATUS_IGNORE);
+ request = smpi_mpi_irecv(header_buf, header_size, MPI_INT, MPI_ANY_SOURCE, tag, comm);
+ smpi_mpi_wait(&request, MPI_STATUS_IGNORE);
/* search for where it is */
int myordering = 0;
/* send header when required */
if (to != -1) {
- MPI_Send(header_buf, header_size, MPI_INT, to, tag, comm);
+ smpi_mpi_send(header_buf, header_size, MPI_INT, to, tag, comm);
}
/* receive data */
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *)buf + (i * increment), segment, datatype, from, tag, comm,
- &recv_request_array[i]);
+ recv_request_array[i] = smpi_mpi_irecv((char *)buf + (i * increment), segment, datatype, from, tag, comm);
}
/* forward data */
if (to != -1) {
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&recv_request_array[i], MPI_STATUS_IGNORE);
- MPI_Isend((char *)buf + (i * increment), segment, datatype, to, tag, comm,
- &send_request_array[i]);
+ smpi_mpi_wait(&recv_request_array[i], MPI_STATUS_IGNORE);
+ send_request_array[i] = smpi_mpi_isend((char *)buf + (i * increment), segment, datatype, to, tag, comm);
}
- MPI_Waitall((pipe_length), send_request_array, send_status_array);
+ smpi_mpi_waitall((pipe_length), send_request_array, send_status_array);
}
/* recv only */
else {
- MPI_Waitall((pipe_length), recv_request_array, recv_status_array);
+ smpi_mpi_waitall((pipe_length), recv_request_array, recv_status_array);
}
}
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
- MPI_Bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
+ XBT_WARN("MPI_bcast_arrival_pattern_aware_wait use default MPI_bcast.");
+ smpi_mpi_bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
}
return MPI_SUCCESS;
-#include "colls.h"
+#include "colls_private.h"
static int bcast_NTSL_segment_size_in_byte = 8192;
char temp_buf[MAX_NODE];
MPI_Aint extent;
- MPI_Type_extent(datatype, &extent);
+ extent = smpi_datatype_get_extent(datatype);
/* destination */
int to;
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- MPI_Comm_size(MPI_COMM_WORLD, &size);
+ rank = smpi_comm_rank(MPI_COMM_WORLD);
+ size = smpi_comm_size(MPI_COMM_WORLD);
/* segment is segment size in number of elements (not bytes) */
*/
if (root != 0) {
if (rank == root) {
- MPI_Send(buf, count, datatype, 0, tag, comm);
+ smpi_mpi_send(buf, count, datatype, 0, tag, comm);
} else if (rank == 0) {
- MPI_Recv(buf, count, datatype, root, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, root, tag, comm, &status);
}
}
while (sent_count < (size - 1)) {
for (i = 1; i < size; i++) {
- MPI_Iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i],
+ smpi_mpi_iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i],
MPI_STATUSES_IGNORE);
}
/* message arrive */
if ((flag_array[i] == 1) && (already_sent[i] == 0)) {
- MPI_Recv(temp_buf, 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
+ smpi_mpi_recv(temp_buf, 1, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
header_buf[header_index] = i;
header_index++;
sent_count++;
if (header_index != 0) {
header_buf[header_index] = -1;
to = header_buf[0];
- MPI_Send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm);
- MPI_Send(buf, count, datatype, to, tag, comm);
+ smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to, tag, comm);
}
/* randomly MPI_Send to one */
if (already_sent[i] == 0) {
header_buf[0] = i;
header_buf[1] = -1;
- MPI_Send(header_buf, HEADER_SIZE, MPI_INT, i, tag, comm);
- MPI_Send(buf, count, datatype, i, tag, comm);
+ smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, i, tag, comm);
+ smpi_mpi_send(buf, count, datatype, i, tag, comm);
already_sent[i] = 1;
sent_count++;
break;
else {
/* send 1-byte message to root */
- MPI_Send(temp_buf, 1, MPI_CHAR, 0, tag, comm);
+ smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm);
/* wait for header and data, forward when required */
- MPI_Recv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm,
+ smpi_mpi_recv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm,
&status);
- MPI_Recv(buf, count, datatype, MPI_ANY_SOURCE, tag, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, MPI_ANY_SOURCE, tag, comm, &status);
/* search for where it is */
int myordering = 0;
/* send header followed by data */
if (header_buf[myordering + 1] != -1) {
- MPI_Send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1],
+ smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1],
tag, comm);
- MPI_Send(buf, count, datatype, header_buf[myordering + 1], tag, comm);
+ smpi_mpi_send(buf, count, datatype, header_buf[myordering + 1], tag, comm);
}
}
}
/* pipeline bcast */
else {
send_request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
recv_request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
send_status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
recv_status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
if (rank == 0) {
//double start2 = MPI_Wtime();
//iteration++;
//start = MPI_Wtime();
for (i = 1; i < size; i++) {
- MPI_Iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i],
+ smpi_mpi_iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i],
&temp_status_array[i]);
}
//total = MPI_Wtime() - start;
for (i = 1; i < size; i++) {
/* message arrive */
if ((flag_array[i] == 1) && (already_sent[i] == 0)) {
- MPI_Recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD,
+ smpi_mpi_recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD,
&status);
header_buf[header_index] = i;
header_index++;
//start = MPI_Wtime();
/* send header */
- MPI_Send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm);
+ smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm);
//total = MPI_Wtime() - start;
//total *= 1000;
if (0 == 1) {
//if (header_index == 1) {
- MPI_Send(buf, count, datatype, to, tag, comm);
+ smpi_mpi_send(buf, count, datatype, to, tag, comm);
}
/* send data - pipeline */
else {
for (i = 0; i < pipe_length; i++) {
- MPI_Send((char *)buf + (i * increment), segment, datatype, to, tag, comm);
+ smpi_mpi_send((char *)buf + (i * increment), segment, datatype, to, tag, comm);
}
- //MPI_Waitall((pipe_length), send_request_array, send_status_array);
+ //smpi_mpi_waitall((pipe_length), send_request_array, send_status_array);
}
//total = MPI_Wtime() - start;
//total *= 1000;
to = i;
//start = MPI_Wtime();
- MPI_Send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm);
+ smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm);
/* still need to chop data so that we can use the same non-root code */
for (j = 0; j < pipe_length; j++) {
- MPI_Send((char *)buf + (j * increment), segment, datatype, to, tag,
+ smpi_mpi_send((char *)buf + (j * increment), segment, datatype, to, tag,
comm);
}
- //MPI_Send(buf,count,datatype,to,tag,comm);
- //MPI_Wait(&request,MPI_STATUS_IGNORE);
+ //smpi_mpi_send(buf,count,datatype,to,tag,comm);
+ //smpi_mpi_wait(&request,MPI_STATUS_IGNORE);
//total = MPI_Wtime() - start;
//total *= 1000;
/* none root */
else {
/* send 1-byte message to root */
- MPI_Send(temp_buf, 1, MPI_CHAR, 0, tag, comm);
+ smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm);
/* wait for header forward when required */
- MPI_Irecv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm,
- &request);
- MPI_Wait(&request, MPI_STATUS_IGNORE);
+ request = smpi_mpi_irecv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm);
+ smpi_mpi_wait(&request, MPI_STATUS_IGNORE);
/* search for where it is */
int myordering = 0;
/* send header when required */
if (header_buf[myordering + 1] != -1) {
- MPI_Send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1],
+ smpi_mpi_send(header_buf, HEADER_SIZE, MPI_INT, header_buf[myordering + 1],
tag, comm);
}
if (0 == -1) {
//if (header_buf[1] == -1) {
- MPI_Irecv(buf, count, datatype, 0, tag, comm, &request);
- MPI_Wait(&request, MPI_STATUS_IGNORE);
+ request = smpi_mpi_irecv(buf, count, datatype, 0, tag, comm);
+ smpi_mpi_wait(&request, MPI_STATUS_IGNORE);
//printf("\t\tnode %d ordering = %d receive data from root\n",rank,myordering);
} else {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *)buf + (i * increment), segment, datatype, MPI_ANY_SOURCE,
- tag, comm, &recv_request_array[i]);
+ recv_request_array[i] = smpi_mpi_irecv((char *)buf + (i * increment), segment, datatype, MPI_ANY_SOURCE,
+ tag, comm);
}
}
/* send data */
if (header_buf[myordering + 1] != -1) {
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&recv_request_array[i], MPI_STATUS_IGNORE);
- MPI_Isend((char *)buf + (i * increment), segment, datatype,
- header_buf[myordering + 1], tag, comm,
- &send_request_array[i]);
+ smpi_mpi_wait(&recv_request_array[i], MPI_STATUS_IGNORE);
+ send_request_array[i] = smpi_mpi_isend((char *)buf + (i * increment), segment, datatype,
+ header_buf[myordering + 1], tag, comm);
}
- MPI_Waitall((pipe_length), send_request_array, send_status_array);
+ smpi_mpi_waitall((pipe_length), send_request_array, send_status_array);
}
}
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
- MPI_Bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
+ XBT_WARN("MPI_bcast_arrival_pattern_aware use default MPI_bcast.");
+ smpi_mpi_bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
}
return MPI_SUCCESS;
-#include "colls.h"
+#include "colls_private.h"
#ifndef BCAST_ARRIVAL_PATTERN_AWARE_HEADER_SIZE
#define BCAST_ARRIVAL_PATTERN_AWARE_HEADER_SIZE 128
int header_size = BCAST_ARRIVAL_PATTERN_AWARE_HEADER_SIZE;
MPI_Aint extent;
- MPI_Type_extent(datatype, &extent);
+ extent = smpi_datatype_get_extent(datatype);
/* source and destination */
int to, from;
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- MPI_Comm_size(MPI_COMM_WORLD, &size);
+ rank = smpi_comm_rank(MPI_COMM_WORLD);
+ size = smpi_comm_size(MPI_COMM_WORLD);
/* message too small */
if (count < size) {
- return MPI_Bcast(buf, count, datatype, root, comm);
+ XBT_WARN("MPI_bcast_arrival_scatter use default MPI_bcast.");
+ smpi_mpi_bcast(buf, count, datatype, root, comm);
+ return MPI_SUCCESS;
}
*/
if (root != 0) {
if (rank == root) {
- MPI_Send(buf, count, datatype, 0, tag - 1, comm);
+ smpi_mpi_send(buf, count, datatype, 0, tag - 1, comm);
} else if (rank == 0) {
- MPI_Recv(buf, count, datatype, root, tag - 1, comm, &status);
+ smpi_mpi_recv(buf, count, datatype, root, tag - 1, comm, &status);
}
}
for (k = 0; k < 3; k++) {
for (i = 1; i < size; i++) {
if ((already_sent[i] == 0) && (will_send[i] == 0)) {
- MPI_Iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i],
+ smpi_mpi_iprobe(i, MPI_ANY_TAG, MPI_COMM_WORLD, &flag_array[i],
&temp_status_array[i]);
if (flag_array[i] == 1) {
will_send[i] = 1;
- MPI_Recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD,
+ smpi_mpi_recv(&temp_buf[i], 1, MPI_CHAR, i, tag, MPI_COMM_WORLD,
&status);
i = 0;
}
/* send header */
for (i = 0; i < header_index; i++) {
to = header_buf[i];
- MPI_Send(header_buf, header_size, MPI_INT, to, header_tag, comm);
+ smpi_mpi_send(header_buf, header_size, MPI_INT, to, header_tag, comm);
}
curr_remainder = count % header_index;
if ((i == (header_index - 1)) || (curr_size == 0))
curr_size += curr_remainder;
//printf("Root send to %d index %d\n",to,(i*curr_increment));
- MPI_Send((char *) buf + (i * curr_increment), curr_size, datatype, to,
+ smpi_mpi_send((char *) buf + (i * curr_increment), curr_size, datatype, to,
tag, comm);
}
}
/* none root */
else {
/* send 1-byte message to root */
- MPI_Send(temp_buf, 1, MPI_CHAR, 0, tag, comm);
+ smpi_mpi_send(temp_buf, 1, MPI_CHAR, 0, tag, comm);
/* wait for header forward when required */
- MPI_Recv(header_buf, header_size, MPI_INT, 0, header_tag, comm, &status);
+ smpi_mpi_recv(header_buf, header_size, MPI_INT, 0, header_tag, comm, &status);
/* search for where it is */
int myordering = 0;
/* receive data */
if (myordering == (total_nodes - 1))
recv_size += curr_remainder;
- MPI_Recv((char *) buf + (myordering * curr_increment), recv_size, datatype,
+ smpi_mpi_recv((char *) buf + (myordering * curr_increment), recv_size, datatype,
0, tag, comm, &status);
/* at this point all nodes in this set perform all-gather operation */
//printf("\tnode %d sent_offset %d send_count %d\n",rank,send_offset,send_count);
- MPI_Sendrecv((char *) buf + send_offset, send_count, datatype, to,
+ smpi_mpi_sendrecv((char *) buf + send_offset, send_count, datatype, to,
tag + i, (char *) buf + recv_offset, recv_count, datatype,
from, tag + i, comm, &status);
}
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
int src, dst, rank, num_procs, mask, relative_rank;
int tag = 1;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
relative_rank = (rank >= root) ? rank - root : rank - root + num_procs;
src = rank - mask;
if (src < 0)
src += num_procs;
- MPI_Recv(buff, count, data_type, src, tag, comm, MPI_STATUS_IGNORE);
+ smpi_mpi_recv(buff, count, data_type, src, tag, comm, MPI_STATUS_IGNORE);
break;
}
mask <<= 1;
dst = rank + mask;
if (dst >= num_procs)
dst -= num_procs;
- MPI_Send(buff, count, data_type, dst, tag, comm);
+ smpi_mpi_send(buff, count, data_type, dst, tag, comm);
}
mask >>= 1;
}
-#include "colls.h"
+#include "colls_private.h"
int flattree_segment_in_byte = 8192;
int tag = 1;
MPI_Aint extent;
- MPI_Type_extent(data_type, &extent);
+ extent = smpi_datatype_get_extent(data_type);
int segment = flattree_segment_in_byte / extent;
int pipe_length = count / segment;
int increment = segment * extent;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
MPI_Request *request_array;
MPI_Status *status_array;
- request_array = (MPI_Request *) malloc(pipe_length * sizeof(MPI_Request));
- status_array = (MPI_Status *) malloc(pipe_length * sizeof(MPI_Status));
+ request_array = (MPI_Request *) xbt_malloc(pipe_length * sizeof(MPI_Request));
+ status_array = (MPI_Status *) xbt_malloc(pipe_length * sizeof(MPI_Status));
if (rank != root) {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *)buff + (i * increment), segment, data_type, root, tag, comm,
- &request_array[i]);
+ request_array[i] = smpi_mpi_irecv((char *)buff + (i * increment), segment, data_type, root, tag, comm);
}
- MPI_Waitall(pipe_length, request_array, status_array);
+ smpi_mpi_waitall(pipe_length, request_array, status_array);
}
else {
continue;
else {
for (i = 0; i < pipe_length; i++) {
- MPI_Send((char *)buff + (i * increment), segment, data_type, j, tag, comm);
+ smpi_mpi_send((char *)buff + (i * increment), segment, data_type, j, tag, comm);
}
}
}
-#include "colls.h"
+#include "colls_private.h"
int
smpi_coll_tuned_bcast_flattree(void *buff, int count, MPI_Datatype data_type,
int i, rank, num_procs;
int tag = 1;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
if (rank != root) {
- MPI_Recv(buff, count, data_type, root, tag, comm, MPI_STATUS_IGNORE);
+ smpi_mpi_recv(buff, count, data_type, root, tag, comm, MPI_STATUS_IGNORE);
}
else {
- reqs = (MPI_Request *) malloc((num_procs - 1) * sizeof(MPI_Request));
+ reqs = (MPI_Request *) xbt_malloc((num_procs - 1) * sizeof(MPI_Request));
req_ptr = reqs;
// Root sends data to all others
for (i = 0; i < num_procs; i++) {
if (i == rank)
continue;
- MPI_Isend(buff, count, data_type, i, tag, comm, req_ptr++);
+ *(req_ptr++) = smpi_mpi_isend(buff, count, data_type, i, tag, comm);
}
// wait on all requests
- MPI_Waitall(num_procs - 1, reqs, MPI_STATUSES_IGNORE);
+ smpi_mpi_waitall(num_procs - 1, reqs, MPI_STATUSES_IGNORE);
free(reqs);
}
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
int scatter_size, left, right, next_src, *recv_counts, *disps;
int tag = 1;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(data_type, &extent);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ extent = smpi_datatype_get_extent(data_type);
nbytes = extent * count;
curr_size = 0; // this process doesn't receive any data
// because of uneven division
else {
- MPI_Recv((char *) buff + relative_rank * scatter_size, recv_size,
+ smpi_mpi_recv((char *) buff + relative_rank * scatter_size, recv_size,
MPI_BYTE, src, tag, comm, &status);
- MPI_Get_count(&status, MPI_BYTE, &curr_size);
+ curr_size = smpi_mpi_get_count(&status, MPI_BYTE);
}
break;
}
dst = rank + mask;
if (dst >= num_procs)
dst -= num_procs;
- MPI_Send((char *) buff + scatter_size * (relative_rank + mask),
+ smpi_mpi_send((char *) buff + scatter_size * (relative_rank + mask),
send_size, MPI_BYTE, dst, tag, comm);
curr_size -= send_size;
next_src = left;
for (i = 1; i < num_procs; i++) {
- MPI_Sendrecv((char *) buff + disps[(src - root + num_procs) % num_procs],
+ smpi_mpi_sendrecv((char *) buff + disps[(src - root + num_procs) % num_procs],
recv_counts[(src - root + num_procs) % num_procs],
MPI_BYTE, right, tag,
(char *) buff +
-#include "colls.h"
+#include "colls_private.h"
/*****************************************************************************
MPI_Status status;
int i, j, k, src, dst, rank, num_procs, send_offset, recv_offset;
- int mask, relative_rank, curr_size, recv_size, send_size, nbytes;
+ int mask, relative_rank, curr_size, recv_size = 0, send_size, nbytes;
int scatter_size, tree_root, relative_dst, dst_tree_root;
int my_tree_root, offset, tmp_mask, num_procs_completed;
int tag = 1;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &num_procs);
- MPI_Type_extent(data_type, &extent);
+ rank = smpi_comm_rank(comm);
+ num_procs = smpi_comm_size(comm);
+ extent = smpi_datatype_get_extent(data_type);
nbytes = extent * count;
scatter_size = (nbytes + num_procs - 1) / num_procs; // ceiling division
curr_size = 0; // this process doesn't receive any data
// because of uneven division
else {
- MPI_Recv((char *)buff + relative_rank * scatter_size, recv_size,
+ smpi_mpi_recv((char *)buff + relative_rank * scatter_size, recv_size,
MPI_BYTE, src, tag, comm, &status);
- MPI_Get_count(&status, MPI_BYTE, &curr_size);
+ curr_size = smpi_mpi_get_count(&status, MPI_BYTE);
}
break;
}
dst = rank + mask;
if (dst >= num_procs)
dst -= num_procs;
- MPI_Send((char *)buff + scatter_size * (relative_rank + mask),
+ smpi_mpi_send((char *)buff + scatter_size * (relative_rank + mask),
send_size, MPI_BYTE, dst, tag, comm);
curr_size -= send_size;
recv_offset = dst_tree_root * scatter_size;
if (relative_dst < num_procs) {
- MPI_Sendrecv((char *)buff + send_offset, curr_size, MPI_BYTE, dst, tag,
+ smpi_mpi_sendrecv((char *)buff + send_offset, curr_size, MPI_BYTE, dst, tag,
(char *)buff + recv_offset, scatter_size * mask, MPI_BYTE, dst,
tag, comm, &status);
- MPI_Get_count(&status, MPI_BYTE, &recv_size);
+ recv_size = smpi_mpi_get_count(&status, MPI_BYTE);
curr_size += recv_size;
}
if ((relative_dst > relative_rank)
&& (relative_rank < tree_root + num_procs_completed)
&& (relative_dst >= tree_root + num_procs_completed)) {
- MPI_Send((char *)buff + offset, recv_size, MPI_BYTE, dst, tag, comm);
+ smpi_mpi_send((char *)buff + offset, recv_size, MPI_BYTE, dst, tag, comm);
/* recv_size was set in the previous
receive. that's the amount of data to be
&& (relative_dst < tree_root + num_procs_completed)
&& (relative_rank >= tree_root + num_procs_completed)) {
- MPI_Recv((char *)buff + offset, scatter_size * num_procs_completed,
+ smpi_mpi_recv((char *)buff + offset, scatter_size * num_procs_completed,
MPI_BYTE, dst, tag, comm, &status);
/* num_procs_completed is also equal to the no. of processes
whose data we don't have */
- MPI_Get_count(&status, MPI_BYTE, &recv_size);
+ recv_size = smpi_mpi_get_count(&status, MPI_BYTE);
curr_size += recv_size;
}
tmp_mask >>= 1;
#include "smpi/private.h"
#include "xbt.h"
-void star_reduction(MPI_Op op, void *src, void *target, int *count, MPI_Datatype *dtype);
-
#define COLL_DESCRIPTION(cat, ret, args, name) \
{# name,\
# cat " " # name " collective",\
--- /dev/null
+#include "xbt.h"
+
+XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_colls, smpi,
+ "Logging specific to SMPI collectives");
--- /dev/null
+#ifndef SMPI_COLLS_PRIVATE_H
+#define SMPI_COLLS_PRIVATE_H
+
+#include "colls.h"
+
+XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(smpi_colls);
+void star_reduction(MPI_Op op, void *src, void *target, int *count, MPI_Datatype *dtype);
+
+#endif
-#include "colls.h"
+#include "colls_private.h"
//#include <star-reduction.c>
int reduce_NTSL_segment_size_in_byte = 8192;
int rank, size;
int i;
MPI_Aint extent;
- MPI_Type_extent(datatype, &extent);
+ extent = smpi_datatype_get_extent(datatype);
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- MPI_Comm_size(MPI_COMM_WORLD, &size);
+ rank = smpi_comm_rank(MPI_COMM_WORLD);
+ size = smpi_comm_size(MPI_COMM_WORLD);
/* source node and destination nodes (same through out the functions) */
int to = (rank - 1 + size) % size;
/*
if (root != 0) {
if (rank == root){
- MPI_Send(buf,count,datatype,0,tag,comm);
+ smpi_mpi_send(buf,count,datatype,0,tag,comm);
}
else if (rank == 0) {
- MPI_Recv(buf,count,datatype,root,tag,comm,&status);
+ smpi_mpi_recv(buf,count,datatype,root,tag,comm,&status);
}
}
*/
char *tmp_buf;
- tmp_buf = (char *) malloc(count * extent);
+ tmp_buf = (char *) xbt_malloc(count * extent);
- MPI_Sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank,
+ smpi_mpi_sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank,
tag, comm, &status);
/* when a message is smaller than a block size => no pipeline */
if (count <= segment) {
if (rank == root) {
- MPI_Recv(tmp_buf, count, datatype, from, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, count, datatype, from, tag, comm, &status);
star_reduction(op, tmp_buf, rbuf, &count, &datatype);
} else if (rank == ((root - 1 + size) % size)) {
- MPI_Send(rbuf, count, datatype, to, tag, comm);
+ smpi_mpi_send(rbuf, count, datatype, to, tag, comm);
} else {
- MPI_Recv(tmp_buf, count, datatype, from, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, count, datatype, from, tag, comm, &status);
star_reduction(op, tmp_buf, rbuf, &count, &datatype);
- MPI_Send(rbuf, count, datatype, to, tag, comm);
+ smpi_mpi_send(rbuf, count, datatype, to, tag, comm);
}
free(tmp_buf);
return MPI_SUCCESS;
/* pipeline */
else {
send_request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
recv_request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
send_status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
recv_status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
/* root recv data */
if (rank == root) {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) tmp_buf + (i * increment), segment, datatype, from,
- (tag + i), comm, &recv_request_array[i]);
+ recv_request_array[i] = smpi_mpi_irecv((char *) tmp_buf + (i * increment), segment, datatype, from,
+ (tag + i), comm);
}
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&recv_request_array[i], &status);
+ smpi_mpi_wait(&recv_request_array[i], &status);
star_reduction(op, tmp_buf + (i * increment), (char *)rbuf + (i * increment),
&segment, &datatype);
}
/* last node only sends data */
else if (rank == ((root - 1 + size) % size)) {
for (i = 0; i < pipe_length; i++) {
- MPI_Isend((char *)rbuf + (i * increment), segment, datatype, to, (tag + i),
- comm, &send_request_array[i]);
+ send_request_array[i] = smpi_mpi_isend((char *)rbuf + (i * increment), segment, datatype, to, (tag + i),
+ comm);
}
- MPI_Waitall((pipe_length), send_request_array, send_status_array);
+ smpi_mpi_waitall((pipe_length), send_request_array, send_status_array);
}
/* intermediate nodes relay (receive, reduce, then send) data */
else {
for (i = 0; i < pipe_length; i++) {
- MPI_Irecv((char *) tmp_buf + (i * increment), segment, datatype, from,
- (tag + i), comm, &recv_request_array[i]);
+ recv_request_array[i] = smpi_mpi_irecv((char *) tmp_buf + (i * increment), segment, datatype, from,
+ (tag + i), comm);
}
for (i = 0; i < pipe_length; i++) {
- MPI_Wait(&recv_request_array[i], &status);
+ smpi_mpi_wait(&recv_request_array[i], &status);
star_reduction(op, tmp_buf + (i * increment), (char *)rbuf + (i * increment),
&segment, &datatype);
- MPI_Isend((char *) rbuf + (i * increment), segment, datatype, to,
- (tag + i), comm, &send_request_array[i]);
+ send_request_array[i] = smpi_mpi_isend((char *) rbuf + (i * increment), segment, datatype, to,
+ (tag + i), comm);
}
- MPI_Waitall((pipe_length), send_request_array, send_status_array);
+ smpi_mpi_waitall((pipe_length), send_request_array, send_status_array);
}
free(send_request_array);
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
- MPI_Reduce((char *)buf + (pipe_length * increment),
+ XBT_WARN("MPI_reduce_NTSL use default MPI_reduce.");
+ smpi_mpi_reduce((char *)buf + (pipe_length * increment),
(char *)rbuf + (pipe_length * increment), remainder, datatype, op, root,
comm);
}
-#include "colls.h"
+#include "colls_private.h"
//#include <star-reduction.c>
int reduce_arrival_pattern_aware_segment_size_in_byte = 8192;
MPI_Comm comm)
{
int rank;
- MPI_Comm_rank(comm, &rank);
+ rank = smpi_comm_rank(comm);
int tag = 50;
MPI_Status status;
}
char *tmp_buf;
- tmp_buf = (char *) malloc(count * extent);
+ tmp_buf = (char *) xbt_malloc(count * extent);
smpi_mpi_sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank,
tag, comm, &status);
/* wait for header and data, forward when required */
smpi_mpi_recv(header_buf, HEADER_SIZE, MPI_INT, MPI_ANY_SOURCE, tag, comm,
&status);
- // MPI_Recv(buf,count,datatype,MPI_ANY_SOURCE,tag,comm,&status);
+ // smpi_mpi_recv(buf,count,datatype,MPI_ANY_SOURCE,tag,comm,&status);
/* search for where it is */
int myordering = 0;
// printf("node %d start\n",rank);
send_request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
recv_request_array =
- (MPI_Request *) malloc((size + pipe_length) * sizeof(MPI_Request));
+ (MPI_Request *) xbt_malloc((size + pipe_length) * sizeof(MPI_Request));
send_status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
recv_status_array =
- (MPI_Status *) malloc((size + pipe_length) * sizeof(MPI_Status));
+ (MPI_Status *) xbt_malloc((size + pipe_length) * sizeof(MPI_Status));
if (rank == 0) {
sent_count = 0;
-#include "colls.h"
+#include "colls_private.h"
//#include <star-reduction.c>
if (count == 0)
return 0;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &comm_size);
+ rank = smpi_comm_rank(comm);
+ comm_size = smpi_comm_size(comm);
- MPI_Type_extent(datatype, &extent);
+ extent = smpi_datatype_get_extent(datatype);
- tmp_buf = (void *) malloc(count * extent);
+ tmp_buf = (void *) xbt_malloc(count * extent);
- MPI_Sendrecv(sendbuf, count, datatype, rank, tag,
+ smpi_mpi_sendrecv(sendbuf, count, datatype, rank, tag,
recvbuf, count, datatype, rank, tag, comm, &status);
mask = 1;
relrank = (rank - root + comm_size) % comm_size;
source = (relrank | mask);
if (source < comm_size) {
source = (source + root) % comm_size;
- MPI_Recv(tmp_buf, count, datatype, source, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, count, datatype, source, tag, comm, &status);
star_reduction(op, tmp_buf, recvbuf, &count, &datatype);
}
} else {
dst = ((relrank & (~mask)) + root) % comm_size;
- MPI_Send(recvbuf, count, datatype, dst, tag, comm);
+ smpi_mpi_send(recvbuf, count, datatype, dst, tag, comm);
break;
}
mask <<= 1;
-#include "colls.h"
+#include "colls_private.h"
//#include <star-reduction.c>
int
char *inbuf;
MPI_Status status;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &size);
+ rank = smpi_comm_rank(comm);
+ size = smpi_comm_size(comm);
/* If not root, send data to the root. */
- MPI_Type_extent(dtype, &extent);
+ extent = smpi_datatype_get_extent(dtype);
if (rank != root) {
- MPI_Send(sbuf, count, dtype, root, tag, comm);
+ smpi_mpi_send(sbuf, count, dtype, root, tag, comm);
return 0;
}
messages. */
if (size > 1)
- origin = (char *) malloc(count * extent);
+ origin = (char *) xbt_malloc(count * extent);
/* Initialize the receive buffer. */
if (rank == (size - 1))
- MPI_Sendrecv(sbuf, count, dtype, rank, tag,
+ smpi_mpi_sendrecv(sbuf, count, dtype, rank, tag,
rbuf, count, dtype, rank, tag, comm, &status);
else
- MPI_Recv(rbuf, count, dtype, size - 1, tag, comm, &status);
+ smpi_mpi_recv(rbuf, count, dtype, size - 1, tag, comm, &status);
/* Loop receiving and calling reduction function (C or Fortran). */
if (rank == i)
inbuf = sbuf;
else {
- MPI_Recv(origin, count, dtype, i, tag, comm, &status);
+ smpi_mpi_recv(origin, count, dtype, i, tag, comm, &status);
inbuf = origin;
}
-#include "colls.h"
+#include "colls_private.h"
/*
reduce
if (count == 0)
return 0;
- MPI_Comm_rank(comm, &rank);
- MPI_Comm_size(comm, &comm_size);
+ rank = smpi_comm_rank(comm);
+ comm_size = smpi_comm_size(comm);
- MPI_Type_extent(datatype, &extent);
- MPI_Type_size(datatype, &type_size);
+ extent = smpi_datatype_get_extent(datatype);
+ type_size = smpi_datatype_size(datatype);
/* find nearest power-of-two less than or equal to comm_size */
pof2 = 1;
if (count < comm_size) {
new_count = comm_size;
- send_ptr = (void *) malloc(new_count * extent);
- recv_ptr = (void *) malloc(new_count * extent);
- tmp_buf = (void *) malloc(new_count * extent);
+ send_ptr = (void *) xbt_malloc(new_count * extent);
+ recv_ptr = (void *) xbt_malloc(new_count * extent);
+ tmp_buf = (void *) xbt_malloc(new_count * extent);
memcpy(send_ptr, sendbuf, extent * new_count);
//if ((rank != root))
- MPI_Sendrecv(send_ptr, new_count, datatype, rank, tag,
+ smpi_mpi_sendrecv(send_ptr, new_count, datatype, rank, tag,
recv_ptr, new_count, datatype, rank, tag, comm, &status);
rem = comm_size - pof2;
if (rank < 2 * rem) {
if (rank % 2 != 0) {
/* odd */
- MPI_Send(recv_ptr, new_count, datatype, rank - 1, tag, comm);
+ smpi_mpi_send(recv_ptr, new_count, datatype, rank - 1, tag, comm);
newrank = -1;
} else {
- MPI_Recv(tmp_buf, count, datatype, rank + 1, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, count, datatype, rank + 1, tag, comm, &status);
star_reduction(op, tmp_buf, recv_ptr, &new_count, &datatype);
newrank = rank / 2;
}
} else /* rank >= 2*rem */
newrank = rank - rem;
- cnts = (int *) malloc(pof2 * sizeof(int));
- disps = (int *) malloc(pof2 * sizeof(int));
+ cnts = (int *) xbt_malloc(pof2 * sizeof(int));
+ disps = (int *) xbt_malloc(pof2 * sizeof(int));
if (newrank != -1) {
for (i = 0; i < (pof2 - 1); i++)
}
/* Send data from recvbuf. Recv into tmp_buf */
- MPI_Sendrecv((char *) recv_ptr +
+ smpi_mpi_sendrecv((char *) recv_ptr +
disps[send_idx] * extent,
send_cnt, datatype,
dst, tag,
for (i = 1; i < pof2; i++)
disps[i] = disps[i - 1] + cnts[i - 1];
- MPI_Recv(recv_ptr, cnts[0], datatype, 0, tag, comm, &status);
+ smpi_mpi_recv(recv_ptr, cnts[0], datatype, 0, tag, comm, &status);
newrank = 0;
send_idx = 0;
last_idx = 2;
} else if (newrank == 0) {
- MPI_Send(recv_ptr, cnts[0], datatype, root, tag, comm);
+ smpi_mpi_send(recv_ptr, cnts[0], datatype, root, tag, comm);
newrank = -1;
}
newroot = 0;
}
if (newdst_tree_root == newroot_tree_root) {
- MPI_Send((char *) recv_ptr +
+ smpi_mpi_send((char *) recv_ptr +
disps[send_idx] * extent,
send_cnt, datatype, dst, tag, comm);
break;
} else {
- MPI_Recv((char *) recv_ptr +
+ smpi_mpi_recv((char *) recv_ptr +
disps[recv_idx] * extent,
recv_cnt, datatype, dst, tag, comm, &status);
}
else if (count >= comm_size) {
- tmp_buf = (void *) malloc(count * extent);
+ tmp_buf = (void *) xbt_malloc(count * extent);
//if ((rank != root))
- MPI_Sendrecv(sendbuf, count, datatype, rank, tag,
+ smpi_mpi_sendrecv(sendbuf, count, datatype, rank, tag,
recvbuf, count, datatype, rank, tag, comm, &status);
rem = comm_size - pof2;
if (rank < 2 * rem) {
if (rank % 2 != 0) { /* odd */
- MPI_Send(recvbuf, count, datatype, rank - 1, tag, comm);
+ smpi_mpi_send(recvbuf, count, datatype, rank - 1, tag, comm);
newrank = -1;
}
else {
- MPI_Recv(tmp_buf, count, datatype, rank + 1, tag, comm, &status);
+ smpi_mpi_recv(tmp_buf, count, datatype, rank + 1, tag, comm, &status);
star_reduction(op, tmp_buf, recvbuf, &count, &datatype);
newrank = rank / 2;
}
} else /* rank >= 2*rem */
newrank = rank - rem;
- cnts = (int *) malloc(pof2 * sizeof(int));
- disps = (int *) malloc(pof2 * sizeof(int));
+ cnts = (int *) xbt_malloc(pof2 * sizeof(int));
+ disps = (int *) xbt_malloc(pof2 * sizeof(int));
if (newrank != -1) {
for (i = 0; i < (pof2 - 1); i++)
}
/* Send data from recvbuf. Recv into tmp_buf */
- MPI_Sendrecv((char *) recvbuf +
+ smpi_mpi_sendrecv((char *) recvbuf +
disps[send_idx] * extent,
send_cnt, datatype,
dst, tag,
for (i = 1; i < pof2; i++)
disps[i] = disps[i - 1] + cnts[i - 1];
- MPI_Recv(recvbuf, cnts[0], datatype, 0, tag, comm, &status);
+ smpi_mpi_recv(recvbuf, cnts[0], datatype, 0, tag, comm, &status);
newrank = 0;
send_idx = 0;
last_idx = 2;
} else if (newrank == 0) {
- MPI_Send(recvbuf, cnts[0], datatype, root, tag, comm);
+ smpi_mpi_send(recvbuf, cnts[0], datatype, root, tag, comm);
newrank = -1;
}
newroot = 0;
}
if (newdst_tree_root == newroot_tree_root) {
- MPI_Send((char *) recvbuf +
+ smpi_mpi_send((char *) recvbuf +
disps[send_idx] * extent,
send_cnt, datatype, dst, tag, comm);
break;
} else {
- MPI_Recv((char *) recvbuf +
+ smpi_mpi_recv((char *) recvbuf +
disps[recv_idx] * extent,
recv_cnt, datatype, dst, tag, comm, &status);
}
-#include "colls.h"
+#include "colls_private.h"
/*
* created by Pitch Patarasuk
int smpi_main(int (*realmain) (int argc, char *argv[]),int argc, char *argv[])
{
srand(SMPI_RAND_SEED);
-
+
if(getenv("SMPI_PRETEND_CC") != NULL) {
/* Hack to ensure that smpicc can pretend to be a simple compiler. Particularly handy to pass it to the configuration tools */
return 0;
SIMIX_function_register_default(realmain);
SIMIX_launch_application(argv[2]);
+ int allgather_id = find_coll_description(mpi_coll_allgather_description,
+ sg_cfg_get_string("smpi/allgather"));
+ mpi_coll_allgather_fun = (int (*)(void *, int, MPI_Datatype,
+ void*, int, MPI_Datatype, MPI_Comm))
+ mpi_coll_allgather_description[allgather_id].coll;
+
+ int allreduce_id = find_coll_description(mpi_coll_allreduce_description,
+ sg_cfg_get_string("smpi/allreduce"));
+ mpi_coll_allreduce_fun = (int (*)(void *sbuf, void *rbuf, int rcount, \
+ MPI_Datatype dtype, MPI_Op op, MPI_Comm comm))
+ mpi_coll_allreduce_description[allreduce_id].coll;
+
+ int alltoall_id = find_coll_description(mpi_coll_alltoall_description,
+ sg_cfg_get_string("smpi/alltoall"));
+ mpi_coll_alltoall_fun = (int (*)(void *, int, MPI_Datatype,
+ void*, int, MPI_Datatype, MPI_Comm))
+ mpi_coll_alltoall_description[alltoall_id].coll;
+
+ int bcast_id = find_coll_description(mpi_coll_bcast_description,
+ sg_cfg_get_string("smpi/bcast"));
+ mpi_coll_bcast_fun = (int (*)(void *buf, int count, MPI_Datatype datatype, \
+ int root, MPI_Comm com))
+ mpi_coll_bcast_description[bcast_id].coll;
+
+ int reduce_id = find_coll_description(mpi_coll_reduce_description,
+ sg_cfg_get_string("smpi/reduce"));
+ mpi_coll_reduce_fun = (int (*)(void *buf, void *rbuf, int count, MPI_Datatype datatype, \
+ MPI_Op op, int root, MPI_Comm comm))
+ mpi_coll_reduce_description[reduce_id].coll;
+
smpi_global_init();
/* Clean IO before the run */
if (comm == MPI_COMM_NULL) {
retval = MPI_ERR_COMM;
} else {
- smpi_mpi_bcast(buf, count, datatype, root, comm);
+ mpi_coll_bcast_fun(buf, count, datatype, root, comm);
retval = MPI_SUCCESS;
}
#ifdef HAVE_TRACING
|| recvtype == MPI_DATATYPE_NULL) {
retval = MPI_ERR_TYPE;
} else {
- smpi_mpi_allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount,
- recvtype, comm);
+ mpi_coll_allgather_fun(sendbuf, sendcount, sendtype, recvbuf, recvcount,
+ recvtype, comm);
retval = MPI_SUCCESS;
}
#ifdef HAVE_TRACING
} else if (datatype == MPI_DATATYPE_NULL || op == MPI_OP_NULL) {
retval = MPI_ERR_ARG;
} else {
- smpi_mpi_reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
+ mpi_coll_reduce_fun(sendbuf, recvbuf, count, datatype, op, root, comm);
retval = MPI_SUCCESS;
}
#ifdef HAVE_TRACING
count += recvcounts[i];
displs[i] = 0;
}
- smpi_mpi_reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
+ mpi_coll_reduce_fun(sendbuf, recvbuf, count, datatype, op, 0, comm);
smpi_mpi_scatterv(recvbuf, recvcounts, displs, datatype, recvbuf,
recvcounts[rank], datatype, 0, comm);
xbt_free(displs);