} \
}
-XBT_PRIVATE void* smpi_get_tmp_sendbuffer(size_t size);
-XBT_PRIVATE void* smpi_get_tmp_recvbuffer(size_t size);
+XBT_PRIVATE unsigned char* smpi_get_tmp_sendbuffer(size_t size);
+XBT_PRIVATE unsigned char* smpi_get_tmp_recvbuffer(size_t size);
XBT_PRIVATE void log_timed_action(simgrid::xbt::ReplayAction& action, double clock);
}
virtual void kernel(simgrid::xbt::ReplayAction& action) = 0;
- void* send_buffer(int size) { return smpi_get_tmp_sendbuffer(size); }
- void* recv_buffer(int size) { return smpi_get_tmp_recvbuffer(size); }
+ unsigned char* send_buffer(int size) { return smpi_get_tmp_sendbuffer(size); }
+ unsigned char* recv_buffer(int size) { return smpi_get_tmp_recvbuffer(size); }
};
class WaitAction : public ReplayAction<WaitTestParser> {
int pof2 = 1;
// local string variables
- char *tmp_buff;
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
count = recv_count;
- tmp_buff = (char *) smpi_get_tmp_sendbuffer(num_procs * recv_count * recv_extent);
+ unsigned char* tmp_buff = smpi_get_tmp_sendbuffer(num_procs * recv_count * recv_extent);
// perform a local copy
Datatype::copy(send_ptr, send_count, send_type, tmp_buff, recv_count, recv_type);
MPI_Status status;
MPI_Aint recvtype_extent, recvtype_true_extent, recvtype_true_lb;
unsigned int curr_cnt, dst, total_count;
- void *tmp_buf, *tmp_buf_rl;
unsigned int mask, dst_tree_root, my_tree_root, position,
send_offset, recv_offset, last_recv_cnt=0, nprocs_completed, k,
offset, tmp_mask, tree_root;
recvtype->extent(&recvtype_true_lb, &recvtype_true_extent);
- tmp_buf_rl = (void*)smpi_get_tmp_sendbuffer(total_count * std::max(recvtype_true_extent, recvtype_extent));
+ unsigned char* tmp_buf_rl = smpi_get_tmp_sendbuffer(total_count * std::max(recvtype_true_extent, recvtype_extent));
/* adjust for potential negative lower bound in datatype */
- tmp_buf = (void *)((char*)tmp_buf_rl - recvtype_true_lb);
+ unsigned char* tmp_buf = tmp_buf_rl - recvtype_true_lb;
/* copy local data into right location in tmp_buf */
position = 0;
position += recvcounts[i];
if (sendbuf != MPI_IN_PLACE)
{
- Datatype::copy(sendbuf, sendcount, sendtype,
- ((char *)tmp_buf + position*
- recvtype_extent),
- recvcounts[rank], recvtype);
+ Datatype::copy(sendbuf, sendcount, sendtype, tmp_buf + position * recvtype_extent, recvcounts[rank], recvtype);
}
else
{
/* if in_place specified, local data is found in recvbuf */
- Datatype::copy(((char *)recvbuf +
- displs[rank]*recvtype_extent),
- recvcounts[rank], recvtype,
- ((char *)tmp_buf + position*
- recvtype_extent),
- recvcounts[rank], recvtype);
+ Datatype::copy(static_cast<char*>(recvbuf) + displs[rank] * recvtype_extent, recvcounts[rank], recvtype,
+ tmp_buf + position * recvtype_extent, recvcounts[rank], recvtype);
}
curr_cnt = recvcounts[rank];
for (j=0; j<dst_tree_root; j++)
recv_offset += recvcounts[j];
- Request::sendrecv(((char *)tmp_buf + send_offset * recvtype_extent),
- curr_cnt, recvtype, dst,
- COLL_TAG_ALLGATHERV,
- ((char *)tmp_buf + recv_offset * recvtype_extent),
- total_count - recv_offset, recvtype, dst,
- COLL_TAG_ALLGATHERV,
- comm, &status);
+ Request::sendrecv(tmp_buf + send_offset * recvtype_extent, curr_cnt, recvtype, dst, COLL_TAG_ALLGATHERV,
+ tmp_buf + recv_offset * recvtype_extent, total_count - recv_offset, recvtype, dst,
+ COLL_TAG_ALLGATHERV, comm, &status);
/* for convenience, recv is posted for a bigger amount
than will be sent */
last_recv_cnt=Status::get_count(&status, recvtype);
offset += recvcounts[j];
offset *= recvtype_extent;
- Request::send(((char *)tmp_buf + offset),
- last_recv_cnt,
- recvtype, dst,
- COLL_TAG_ALLGATHERV, comm);
+ Request::send(tmp_buf + offset, last_recv_cnt, recvtype, dst, COLL_TAG_ALLGATHERV, comm);
/* last_recv_cnt was set in the previous
receive. that's the amount of data to be
sent now. */
for (j=0; j<(my_tree_root+mask); j++)
offset += recvcounts[j];
- Request::recv(((char *)tmp_buf + offset * recvtype_extent),
- total_count - offset, recvtype,
- dst, COLL_TAG_ALLGATHERV,
+ Request::recv(tmp_buf + offset * recvtype_extent, total_count - offset, recvtype, dst, COLL_TAG_ALLGATHERV,
comm, &status);
/* for convenience, recv is posted for a
bigger amount than will be sent */
if ((sendbuf != MPI_IN_PLACE) || (j != rank)) {
/* not necessary to copy if in_place and
j==rank. otherwise copy. */
- Datatype::copy(((char *)tmp_buf + position*recvtype_extent),
- recvcounts[j], recvtype,
- ((char *)recvbuf + displs[j]*recvtype_extent),
- recvcounts[j], recvtype);
+ Datatype::copy(tmp_buf + position * recvtype_extent, recvcounts[j], recvtype,
+ static_cast<char*>(recvbuf) + displs[j] * recvtype_extent, recvcounts[j], recvtype);
}
position += recvcounts[j];
}
int mask, pof2, i, send_idx, recv_idx, last_idx, send_cnt;
int dst, is_commutative, rem, newdst, recv_cnt;
MPI_Aint true_lb, true_extent, extent;
- void *tmp_buf, *tmp_buf_free;
if (count == 0) {
return MPI_SUCCESS;
datatype->extent(&true_lb, &true_extent);
extent = datatype->get_extent();
- tmp_buf_free = smpi_get_tmp_recvbuffer(count * std::max(extent, true_extent));
+ unsigned char* tmp_buf_free = smpi_get_tmp_recvbuffer(count * std::max(extent, true_extent));
/* adjust for potential negative lower bound in datatype */
- tmp_buf = (void *) ((char *) tmp_buf_free - true_lb);
+ unsigned char* tmp_buf = tmp_buf_free - true_lb;
/* copy local data into recvbuf */
if (sendbuf != MPI_IN_PLACE) {
}
/* Send data from recvbuf. Recv into tmp_buf */
- Request::sendrecv((char *) recvbuf +
- disps[send_idx] * extent,
- send_cnt, datatype,
- dst, COLL_TAG_ALLREDUCE,
- (char *) tmp_buf +
- disps[recv_idx] * extent,
- recv_cnt, datatype, dst,
- COLL_TAG_ALLREDUCE, comm,
- MPI_STATUS_IGNORE);
+ Request::sendrecv(static_cast<char*>(recvbuf) + disps[send_idx] * extent, send_cnt, datatype, dst,
+ COLL_TAG_ALLREDUCE, tmp_buf + disps[recv_idx] * extent, recv_cnt, datatype, dst,
+ COLL_TAG_ALLREDUCE, comm, MPI_STATUS_IGNORE);
/* tmp_buf contains data received in this step.
recvbuf contains data accumulated so far */
/* This algorithm is used only for predefined ops
and predefined ops are always commutative. */
- if(op!=MPI_OP_NULL) op->apply( (char *) tmp_buf + disps[recv_idx] * extent,
- (char *) recvbuf + disps[recv_idx] * extent,
- &recv_cnt, datatype);
+ if (op != MPI_OP_NULL)
+ op->apply(tmp_buf + disps[recv_idx] * extent, static_cast<char*>(recvbuf) + disps[recv_idx] * extent,
+ &recv_cnt, datatype);
/* update send_idx for next iteration */
send_idx = recv_idx;
}
if (local_size != total_size) {
- void* sendtmpbuf = (char *)smpi_get_tmp_sendbuffer(count*datatype->get_extent());
+ unsigned char* sendtmpbuf = smpi_get_tmp_sendbuffer(count * datatype->get_extent());
Datatype::copy(recvbuf, count, datatype,sendtmpbuf, count, datatype);
/* inter-node allreduce */
if(MV2_Allreducection == &MPIR_Allreduce_pt2pt_rd_MV2){
unsigned int inbi;
size_t typelng;
char *tmpsend = NULL, *tmprecv = NULL;
- char *inbuf[2] = {NULL, NULL};
+ unsigned char* inbuf[2] = {nullptr, nullptr};
ptrdiff_t true_extent, extent;
ptrdiff_t block_offset, max_real_segsize;
MPI_Request reqs[2] = {NULL, NULL};
max_real_segsize = true_extent + (max_segcount - 1) * extent;
/* Allocate and initialize temporary buffers */
- inbuf[0] = (char*)smpi_get_tmp_sendbuffer(max_real_segsize);
+ inbuf[0] = smpi_get_tmp_sendbuffer(max_real_segsize);
if (NULL == inbuf[0]) { ret = -1; line = __LINE__; goto error_hndl; }
if (size > 2) {
- inbuf[1] = (char*)smpi_get_tmp_recvbuffer(max_real_segsize);
- if (NULL == inbuf[1]) { ret = -1; line = __LINE__; goto error_hndl; }
+ inbuf[1] = smpi_get_tmp_recvbuffer(max_real_segsize);
+ if (nullptr == inbuf[1]) {
+ ret = -1;
+ line = __LINE__;
+ goto error_hndl;
+ }
}
/* Handle MPI_IN_PLACE */
}
- if (NULL != inbuf[0]) smpi_free_tmp_buffer(inbuf[0]);
- if (NULL != inbuf[1]) smpi_free_tmp_buffer(inbuf[1]);
+ smpi_free_tmp_buffer(inbuf[0]);
+ smpi_free_tmp_buffer(inbuf[1]);
return MPI_SUCCESS;
error_hndl:
XBT_DEBUG("%s:%4d\tRank %d Error occurred %d\n",
__FILE__, line, rank, ret);
- if (NULL != inbuf[0]) smpi_free_tmp_buffer(inbuf[0]);
- if (NULL != inbuf[1]) smpi_free_tmp_buffer(inbuf[1]);
+ smpi_free_tmp_buffer(inbuf[0]);
+ smpi_free_tmp_buffer(inbuf[1]);
return ret;
}
}
int dst, newrank, rem, newdst, recv_cnt;
MPI_Aint extent;
MPI_Status status;
- void *tmp_buf = NULL;
unsigned int nprocs = comm->size();
int rank = comm->rank();
extent = dtype->get_extent();
- tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent);
+ unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent);
Datatype::copy(sbuff, count, dtype, rbuff, count, dtype);
}
// Send data from recvbuf. Recv into tmp_buf
- Request::sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt,
- dtype, dst, tag,
- (char *) tmp_buf + disps[recv_idx] * extent, recv_cnt,
- dtype, dst, tag, comm, &status);
+ Request::sendrecv(static_cast<char*>(rbuff) + disps[send_idx] * extent, send_cnt, dtype, dst, tag,
+ tmp_buf + disps[recv_idx] * extent, recv_cnt, dtype, dst, tag, comm, &status);
// tmp_buf contains data received in this step.
// recvbuf contains data accumulated so far
// This algorithm is used only for predefined ops
// and predefined ops are always commutative.
- if(op!=MPI_OP_NULL) op->apply( (char *) tmp_buf + disps[recv_idx] * extent,
- (char *) rbuff + disps[recv_idx] * extent, &recv_cnt, dtype);
+ if (op != MPI_OP_NULL)
+ op->apply(tmp_buf + disps[recv_idx] * extent, static_cast<char*>(rbuff) + disps[recv_idx] * extent, &recv_cnt,
+ dtype);
// update send_idx for next iteration
send_idx = recv_idx;
unsigned int pof2 = 1, mask;
int send_idx, recv_idx, dst, send_cnt, recv_cnt;
- void *recv, *tmp_buf;
-
int rank = comm->rank();
unsigned int nprocs = comm->size();
send_size = (count + nprocs) / nprocs;
newcnt = send_size * nprocs;
- recv = (void *) smpi_get_tmp_recvbuffer(extent * newcnt);
- tmp_buf = (void *) smpi_get_tmp_sendbuffer(extent * newcnt);
+ unsigned char* recv = smpi_get_tmp_recvbuffer(extent * newcnt);
+ unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(extent * newcnt);
memcpy(recv, sbuff, extent * count);
else
recv_idx = send_idx + (mask * share);
- Request::sendrecv((char *) recv + send_idx * extent, send_cnt, dtype, dst, tag,
- tmp_buf, recv_cnt, dtype, dst, tag, comm, &status);
+ Request::sendrecv(recv + send_idx * extent, send_cnt, dtype, dst, tag, tmp_buf, recv_cnt, dtype, dst, tag, comm,
+ &status);
- if(op!=MPI_OP_NULL) op->apply( tmp_buf, (char *) recv + recv_idx * extent, &recv_cnt,
- dtype);
+ if (op != MPI_OP_NULL)
+ op->apply(tmp_buf, recv + recv_idx * extent, &recv_cnt, dtype);
// update send_idx for next iteration
send_idx = recv_idx;
mask >>= 1;
}
- memcpy(tmp_buf, (char *) recv + recv_idx * extent, recv_cnt * extent);
+ memcpy(tmp_buf, recv + recv_idx * extent, recv_cnt * extent);
Colls::allgather(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm);
memcpy(rbuff, recv, count * extent);
}
else {
- tmp_buf = (void *) smpi_get_tmp_sendbuffer(extent * count);
+ unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(extent * count);
memcpy(rbuff, sbuff, count * extent);
mask = pof2 / 2;
share = count / pof2;
send_size = (count + nprocs) / nprocs;
nbytes = send_size * s_extent;
- void* send = smpi_get_tmp_sendbuffer(s_extent * send_size * nprocs);
- void* recv = smpi_get_tmp_recvbuffer(s_extent * send_size * nprocs);
- void* tmp = smpi_get_tmp_sendbuffer(nbytes);
+ unsigned char* send = smpi_get_tmp_sendbuffer(s_extent * send_size * nprocs);
+ unsigned char* recv = smpi_get_tmp_recvbuffer(s_extent * send_size * nprocs);
+ unsigned char* tmp = smpi_get_tmp_sendbuffer(nbytes);
memcpy(send, sbuff, s_extent * count);
memcpy(tmp, recv, nbytes);
for (i = 1, s_offset = nbytes; i < nprocs; i++, s_offset = i * nbytes)
- if(op!=MPI_OP_NULL) op->apply( (char *) recv + s_offset, tmp, &send_size, dtype);
+ if (op != MPI_OP_NULL)
+ op->apply(recv + s_offset, tmp, &send_size, dtype);
Colls::allgather(tmp, send_size, dtype, recv, send_size, dtype, comm);
memcpy(rbuff, recv, count * s_extent);
nbytes = send_size * s_extent;
r_offset = rank * nbytes;
- void* recv = smpi_get_tmp_recvbuffer(s_extent * send_size * nprocs);
+ unsigned char* recv = smpi_get_tmp_recvbuffer(s_extent * send_size * nprocs);
Colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm);
memcpy((char *) rbuff + r_offset, recv, nbytes);
for (i = 1, s_offset = nbytes; i < nprocs; i++, s_offset = i * nbytes)
- if(op!=MPI_OP_NULL) op->apply( (char *) recv + s_offset, (char *) rbuff + r_offset,
- &send_size, dtype);
+ if (op != MPI_OP_NULL)
+ op->apply(recv + s_offset, static_cast<char*>(rbuff) + r_offset, &send_size, dtype);
Colls::allgather((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size,
dtype, comm);
int mask, dst, pof2, newrank, rem, newdst;
MPI_Aint extent, lb;
MPI_Status status;
- void *tmp_buf = NULL;
/*
#ifdef MPICH2_REDUCTION
MPI_User_function * uop = MPIR_Op_table[op % 16 - 1];
rank=comm->rank();
dtype->extent(&lb, &extent);
- tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent);
+ unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent);
Request::sendrecv(sbuff, count, dtype, rank, 500,
rbuff, count, dtype, rank, 500, comm, &status);
MPI_Op op, MPI_Comm comm)
{
int comm_size, rank;
- void *tmp_buf;
int tag = COLL_TAG_ALLREDUCE;
int mask, src, dst;
MPI_Status status;
rank = comm->rank();
MPI_Aint extent;
extent = dtype->get_extent();
- tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent);
+ unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent);
int intra_rank, inter_rank;
intra_rank = rank % num_core;
MPI_Op op, MPI_Comm comm)
{
int comm_size, rank;
- void *tmp_buf;
int tag = COLL_TAG_ALLREDUCE;
int mask, src, dst;
rank=comm->rank();
MPI_Aint extent, lb;
dtype->extent(&lb, &extent);
- tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent);
+ unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent);
/* compute intra and inter ranking */
int intra_rank, inter_rank;
MPI_Comm comm)
{
int comm_size, rank;
- void *tmp_buf;
int tag = COLL_TAG_ALLREDUCE;
int mask, src, dst;
MPI_Status status;
rank = comm->rank();
MPI_Aint extent;
extent = dtype->get_extent();
- tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent);
+ unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent);
/* compute intra and inter ranking */
int intra_rank, inter_rank;
MPI_Op op, MPI_Comm comm)
{
int comm_size, rank;
- void *tmp_buf;
int tag = COLL_TAG_ALLREDUCE;
int mask, src, dst;
MPI_Status status;
rank = comm->rank();
MPI_Aint extent;
extent = dtype->get_extent();
- tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent);
+ unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent);
int intra_rank, inter_rank;
intra_rank = rank % num_core;
MPI_Comm comm)
{
int comm_size, rank;
- void *tmp_buf;
int tag = COLL_TAG_ALLREDUCE;
int mask, src, dst;
MPI_Status status;
rank = comm->rank();
MPI_Aint extent;
extent = dtype->get_extent();
- tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent);
+ unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent);
int intra_rank, inter_rank;
intra_rank = rank % num_core;
MPI_Comm comm)
{
int comm_size, rank;
- void *tmp_buf;
int tag = COLL_TAG_ALLREDUCE;
int mask, src, dst;
MPI_Status status;
rank = comm->rank();
MPI_Aint extent;
extent = dtype->get_extent();
- tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent);
+ unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent);
int intra_rank, inter_rank;
intra_rank = rank % num_core;
MPI_Status s;
MPI_Aint extent;
- char *tmp_buff1, *tmp_buff2;
int i, j, src, dst, rank, num_procs, count, num_reqs;
int X, Y, send_offset, recv_offset;
int my_row_base, my_col_base, src_row_base, block_size;
block_size = extent * send_count;
- tmp_buff1 = (char *) smpi_get_tmp_sendbuffer(block_size * num_procs * Y);
- tmp_buff2 = (char *) smpi_get_tmp_recvbuffer(block_size * Y);
+ unsigned char* tmp_buff1 = smpi_get_tmp_sendbuffer(block_size * num_procs * Y);
+ unsigned char* tmp_buff2 = smpi_get_tmp_recvbuffer(block_size * Y);
num_reqs = X;
if (Y > X)
int my_z, two_dsize, my_row_base, my_col_base, my_z_base, src_row_base;
int src_z_base, send_offset, recv_offset, tag = COLL_TAG_ALLTOALL;
- char *tmp_buff1, *tmp_buff2;
-
rank = comm->rank();
num_procs = comm->size();
extent = send_type->get_extent();
block_size = extent * send_count;
- tmp_buff1 = (char *) smpi_get_tmp_sendbuffer(block_size * num_procs * two_dsize);
- tmp_buff2 = (char *) smpi_get_tmp_recvbuffer(block_size * two_dsize);
+ unsigned char* tmp_buff1 = smpi_get_tmp_sendbuffer(block_size * num_procs * two_dsize);
+ unsigned char* tmp_buff2 = smpi_get_tmp_recvbuffer(block_size * two_dsize);
MPI_Status* statuses = new MPI_Status[num_reqs];
MPI_Request* reqs = new MPI_Request[num_reqs];
int i, src, dst, rank, num_procs, count, block, position;
int pack_size, tag = COLL_TAG_ALLTOALL, pof2 = 1;
-
- char *tmp_buff;
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
extent = recv_type->get_extent();
- tmp_buff = (char *) smpi_get_tmp_sendbuffer(num_procs * recv_count * extent);
+ unsigned char* tmp_buff = smpi_get_tmp_sendbuffer(num_procs * recv_count * extent);
int* disps = new int[num_procs];
int* blocks_length = new int[num_procs];
int last_recv_count = 0, tmp_mask, tree_root, num_procs_completed;
int tag = COLL_TAG_ALLTOALL, mask = 1, i = 0;
- char *tmp_buff;
char *send_ptr = (char *) send_buff;
char *recv_ptr = (char *) recv_buff;
max_size = num_procs * recv_increment;
- tmp_buff = (char *) smpi_get_tmp_sendbuffer(max_size);
+ unsigned char* tmp_buff = smpi_get_tmp_sendbuffer(max_size);
curr_size = send_count * num_procs;
size_t nbytes = 0;
int is_homogeneous, is_contig;
MPI_Aint type_size;
- void *tmp_buf = NULL;
+ unsigned char* tmp_buf = nullptr;
MPI_Comm shmem_comm;
if (count == 0)
) {
if (not is_contig || not is_homogeneous) {
- tmp_buf = (void*)smpi_get_tmp_sendbuffer(nbytes);
+ tmp_buf = smpi_get_tmp_sendbuffer(nbytes);
/* TODO: Pipeline the packing and communication */
// position = 0;
int root,
MPI_Comm comm)
{
- void *leader_gather_buf = NULL;
- int comm_size, rank;
- int local_rank, local_size;
- int leader_comm_rank = -1, leader_comm_size = 0;
- int mpi_errno = MPI_SUCCESS;
- int recvtype_size = 0, sendtype_size = 0, nbytes=0;
- int leader_root, leader_of_root;
- MPI_Status status;
- MPI_Aint sendtype_extent = 0, recvtype_extent = 0; /* Datatype extent */
- MPI_Aint true_lb = 0, sendtype_true_extent = 0, recvtype_true_extent = 0;
- MPI_Comm shmem_comm, leader_comm;
- void* tmp_buf = NULL;
-
-
- //if not set (use of the algo directly, without mvapich2 selector)
- if(MV2_Gather_intra_node_function==NULL)
- MV2_Gather_intra_node_function= Coll_gather_mpich::gather;
-
- if(comm->get_leaders_comm()==MPI_COMM_NULL){
- comm->init_smp();
+ unsigned char* leader_gather_buf = NULL;
+ int comm_size, rank;
+ int local_rank, local_size;
+ int leader_comm_rank = -1, leader_comm_size = 0;
+ int mpi_errno = MPI_SUCCESS;
+ int recvtype_size = 0, sendtype_size = 0, nbytes = 0;
+ int leader_root, leader_of_root;
+ MPI_Status status;
+ MPI_Aint sendtype_extent = 0, recvtype_extent = 0; /* Datatype extent */
+ MPI_Aint true_lb = 0, sendtype_true_extent = 0, recvtype_true_extent = 0;
+ MPI_Comm shmem_comm, leader_comm;
+ unsigned char* tmp_buf = NULL;
+
+ // if not set (use of the algo directly, without mvapich2 selector)
+ if (MV2_Gather_intra_node_function == NULL)
+ MV2_Gather_intra_node_function = Coll_gather_mpich::gather;
+
+ if (comm->get_leaders_comm() == MPI_COMM_NULL) {
+ comm->init_smp();
}
comm_size = comm->size();
rank = comm->rank();
int vrank;
int size;
int total_recv = 0;
- char *ptmp = NULL;
- char *tempbuf = NULL;
+ unsigned char* ptmp = nullptr;
+ unsigned char* tempbuf = nullptr;
+ const unsigned char* src_buf;
int err;
ompi_coll_tree_t* bmtree;
MPI_Status status;
rdtype->extent(&rtrue_lb, &rtrue_extent);
if (0 == root) {
/* root on 0, just use the recv buffer */
- ptmp = (char*)rbuf;
+ ptmp = static_cast<unsigned char*>(rbuf);
if (sbuf != MPI_IN_PLACE) {
err = Datatype::copy(sbuf, scount, sdtype, ptmp, rcount, rdtype);
if (MPI_SUCCESS != err) {
} else {
/* root is not on 0, allocate temp buffer for recv,
* rotate data at the end */
- tempbuf = (char*)smpi_get_tmp_recvbuffer(rtrue_extent + (rcount * size - 1) * rextent);
+ tempbuf = smpi_get_tmp_recvbuffer(rtrue_extent + (rcount * size - 1) * rextent);
if (NULL == tempbuf) {
err = MPI_ERR_OTHER;
line = __LINE__;
}
}
total_recv = rcount;
+ src_buf = ptmp;
} else if (!(vrank % 2)) {
/* other non-leaf nodes, allocate temp buffer for data received from
* children, the most we need is half of the total data elements due
* to the property of binimoal tree */
- tempbuf = (char*)smpi_get_tmp_sendbuffer(strue_extent + (scount * size - 1) * sextent);
+ tempbuf = smpi_get_tmp_sendbuffer(strue_extent + (scount * size - 1) * sextent);
if (NULL == tempbuf) {
err = MPI_ERR_OTHER;
line = __LINE__;
rcount = scount;
rextent = sextent;
total_recv = rcount;
+ src_buf = ptmp;
} else {
/* leaf nodes, no temp buffer needed, use sdtype,scount as
* rdtype,rdcount since they are ignored on non-root procs */
- ptmp = (char*)sbuf;
total_recv = scount;
+ src_buf = static_cast<const unsigned char*>(sbuf);
}
if (!(vrank % 2)) {
/* all nodes except root send to parents */
XBT_DEBUG("smpi_coll_tuned_gather_ompi_binomial rank %d send %d count %d\n", rank, bmtree->tree_prev, total_recv);
- Request::send(ptmp, total_recv, sdtype, bmtree->tree_prev, COLL_TAG_GATHER, comm);
+ Request::send(src_buf, total_recv, sdtype, bmtree->tree_prev, COLL_TAG_GATHER, comm);
}
if (rank == root) {
if (root != 0) {
}
*/
- char *tmp_buf;
- tmp_buf = (char *) smpi_get_tmp_sendbuffer(count * extent);
+ unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent);
Request::sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank,
tag, comm, &status);
/* root recv data */
if (rank == root) {
for (i = 0; i < pipe_length; i++) {
- recv_request_array[i] = Request::irecv((char *) tmp_buf + (i * increment), segment, datatype, from,
- (tag + i), comm);
+ recv_request_array[i] = Request::irecv(tmp_buf + (i * increment), segment, datatype, from, (tag + i), comm);
}
for (i = 0; i < pipe_length; i++) {
Request::wait(&recv_request_array[i], &status);
/* intermediate nodes relay (receive, reduce, then send) data */
else {
for (i = 0; i < pipe_length; i++) {
- recv_request_array[i] = Request::irecv((char *) tmp_buf + (i * increment), segment, datatype, from,
- (tag + i), comm);
+ recv_request_array[i] = Request::irecv(tmp_buf + (i * increment), segment, datatype, from, (tag + i), comm);
}
for (i = 0; i < pipe_length; i++) {
Request::wait(&recv_request_array[i], &status);
already_received[i] = 0;
}
- char *tmp_buf;
- tmp_buf = (char *) smpi_get_tmp_sendbuffer(count * extent);
+ unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent);
Request::sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank,
tag, comm, &status);
int dst;
int tag = COLL_TAG_REDUCE;
MPI_Aint extent;
- void *tmp_buf;
MPI_Aint true_lb, true_extent;
if (count == 0)
return 0;
extent = datatype->get_extent();
- tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent);
+ unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent);
int is_commutative = (op==MPI_OP_NULL || op->is_commutative());
mask = 1;
datatype->extent(&true_lb, &true_extent);
/* adjust for potential negative lower bound in datatype */
- tmp_buf = (void *)((char*)tmp_buf - true_lb);
+ tmp_buf = tmp_buf - true_lb;
/* If I'm not the root, then my recvbuf may not be valid, therefore
I have to allocate a temporary one */
}
if (rank != root) {
- smpi_free_tmp_buffer(recvbuf);
+ smpi_free_tmp_buffer(static_cast<unsigned char*>(recvbuf));
}
smpi_free_tmp_buffer(tmp_buf);
int size;
int rank;
MPI_Aint extent;
- char *origin = 0;
- const char *inbuf;
+ unsigned char* origin = nullptr;
+ const unsigned char* inbuf;
MPI_Status status;
rank = comm->rank();
messages. */
if (size > 1)
- origin = (char *) smpi_get_tmp_recvbuffer(count * extent);
-
+ origin = smpi_get_tmp_recvbuffer(count * extent);
/* Initialize the receive buffer. */
if (rank == (size - 1))
for (i = size - 2; i >= 0; --i) {
if (rank == i)
- inbuf = static_cast<const char*>(sbuf);
+ inbuf = static_cast<const unsigned char*>(sbuf);
else {
Request::recv(origin, count, dtype, i, tag, comm, &status);
inbuf = origin;
}
- if (origin)
- smpi_free_tmp_buffer(origin);
+ smpi_free_tmp_buffer(origin);
/* All done */
return 0;
&dst, &expected_send_count, &expected_recv_count, &src_array);
if(expected_recv_count > 0 ) {
- void** tmp_buf = new void*[expected_recv_count];
+ unsigned char** tmp_buf = new unsigned char*[expected_recv_count];
MPI_Request* requests = new MPI_Request[expected_recv_count];
for (k = 0; k < expected_recv_count; k++) {
tmp_buf[k] = smpi_get_tmp_sendbuffer(count * std::max(extent, true_extent));
- tmp_buf[k] = (void*)((char*)tmp_buf[k] - true_lb);
+ tmp_buf[k] = tmp_buf[k] - true_lb;
}
while(recv_iter < expected_recv_count) {
Request::waitall(1, &send_request, &status);
- smpi_free_tmp_buffer((void *)((char*)recvbuf + true_lb));
+ smpi_free_tmp_buffer(static_cast<unsigned char*>(recvbuf) + true_lb);
}
/* --END ERROR HANDLING-- */
int leader_comm_rank = -1, leader_comm_size = 0;
MPI_Comm shmem_comm, leader_comm;
int leader_root, leader_of_root;
- void *in_buf = NULL, *out_buf = NULL, *tmp_buf = NULL;
+ const unsigned char* in_buf = nullptr;
+ unsigned char *out_buf = nullptr, *tmp_buf = nullptr;
MPI_Aint true_lb, true_extent, extent;
int is_commutative = 0, stride = 0;
int intra_node_root=0;
if (stride <= MV2_INTRA_SHMEM_REDUCE_MSG &&
is_commutative == 1) {
if (local_rank == 0 ) {
- tmp_buf = (void*)smpi_get_tmp_sendbuffer(count * std::max(extent, true_extent));
- tmp_buf = (void *) ((char *) tmp_buf - true_lb);
+ tmp_buf = smpi_get_tmp_sendbuffer(count * std::max(extent, true_extent));
+ tmp_buf = tmp_buf - true_lb;
}
if (sendbuf != MPI_IN_PLACE) {
- in_buf = (void *)sendbuf;
+ in_buf = static_cast<const unsigned char*>(sendbuf);
} else {
- in_buf = recvbuf;
+ in_buf = static_cast<const unsigned char*>(recvbuf);
}
if (local_rank == 0) {
if( my_rank != root) {
out_buf = tmp_buf;
} else {
- out_buf = recvbuf;
- if(in_buf == out_buf) {
- in_buf = MPI_IN_PLACE;
- out_buf = recvbuf;
+ out_buf = static_cast<unsigned char*>(recvbuf);
+ if (in_buf == out_buf) {
+ in_buf = static_cast<const unsigned char*>(MPI_IN_PLACE);
+ out_buf = static_cast<unsigned char*>(recvbuf);
}
}
} else {
- in_buf = (void *)sendbuf;
- out_buf = NULL;
+ in_buf = static_cast<const unsigned char*>(sendbuf);
+ out_buf = nullptr;
}
if (count * (std::max(extent, true_extent)) < SHMEM_COLL_BLOCK_SIZE) {
root, comm);
}
/* We are done */
- if(tmp_buf!=NULL)
- smpi_free_tmp_buffer((void *) ((char *) tmp_buf + true_lb));
+ if (tmp_buf != nullptr)
+ smpi_free_tmp_buffer(tmp_buf + true_lb);
goto fn_exit;
}
}
leader_comm_size = leader_comm->size();
leader_comm_rank = leader_comm->rank();
- tmp_buf = (void*)smpi_get_tmp_sendbuffer(count * std::max(extent, true_extent));
- tmp_buf = (void *) ((char *) tmp_buf - true_lb);
+ tmp_buf = smpi_get_tmp_sendbuffer(count * std::max(extent, true_extent));
+ tmp_buf = tmp_buf - true_lb;
}
if (sendbuf != MPI_IN_PLACE) {
- in_buf = (void *)sendbuf;
+ in_buf = static_cast<const unsigned char*>(sendbuf);
} else {
- in_buf = recvbuf;
+ in_buf = static_cast<const unsigned char*>(recvbuf);
}
if (local_rank == 0) {
- out_buf = tmp_buf;
+ out_buf = static_cast<unsigned char*>(tmp_buf);
} else {
- out_buf = NULL;
+ out_buf = nullptr;
}
intra_node_root, shmem_comm);
}
} else {
- smpi_free_tmp_buffer((void *) ((char *) tmp_buf + true_lb));
- tmp_buf = in_buf;
+ smpi_free_tmp_buffer(tmp_buf + true_lb);
+ tmp_buf = (unsigned char*)in_buf; // xxx
}
/* Now work on the inter-leader phase. Data is in tmp_buf */
* root of the reduce op. So, I will write the
* final result directly into my recvbuf */
if(tmp_buf != recvbuf) {
- in_buf = tmp_buf;
- out_buf = recvbuf;
+ in_buf = tmp_buf;
+ out_buf = static_cast<unsigned char*>(recvbuf);
} else {
- in_buf = (char *)smpi_get_tmp_sendbuffer(count*
- datatype->get_extent());
- Datatype::copy(tmp_buf, count, datatype,
- in_buf, count, datatype);
- //in_buf = MPI_IN_PLACE;
- out_buf = recvbuf;
+ unsigned char* buf = smpi_get_tmp_sendbuffer(count * datatype->get_extent());
+ Datatype::copy(tmp_buf, count, datatype, buf, count, datatype);
+ // in_buf = MPI_IN_PLACE;
+ in_buf = buf;
+ out_buf = static_cast<unsigned char*>(recvbuf);
}
} else {
- in_buf = (char *)smpi_get_tmp_sendbuffer(count*
- datatype->get_extent());
- Datatype::copy(tmp_buf, count, datatype,
- in_buf, count, datatype);
- //in_buf = MPI_IN_PLACE;
- out_buf = tmp_buf;
+ unsigned char* buf = smpi_get_tmp_sendbuffer(count * datatype->get_extent());
+ Datatype::copy(tmp_buf, count, datatype, buf, count, datatype);
+ // in_buf = MPI_IN_PLACE;
+ in_buf = buf;
+ out_buf = tmp_buf;
}
} else {
in_buf = tmp_buf;
- out_buf = NULL;
+ out_buf = nullptr;
}
/* inter-leader communication */
}
if (local_size > 1) {
- /* Send the message to the root if the leader is not the
- * root of the reduce operation. The reduced data is in tmp_buf */
- if ((local_rank == 0) && (root != my_rank)
- && (leader_root == leader_comm_rank)) {
- Request::send(tmp_buf, count, datatype, root,
- COLL_TAG_REDUCE+1, comm);
- }
- if ((local_rank != 0) && (root == my_rank)) {
- Request::recv(recvbuf, count, datatype,
- leader_of_root,
- COLL_TAG_REDUCE+1, comm,
- MPI_STATUS_IGNORE);
- }
- smpi_free_tmp_buffer((void *) ((char *) tmp_buf + true_lb));
+ /* Send the message to the root if the leader is not the
+ * root of the reduce operation. The reduced data is in tmp_buf */
+ if ((local_rank == 0) && (root != my_rank) && (leader_root == leader_comm_rank)) {
+ Request::send(tmp_buf, count, datatype, root, COLL_TAG_REDUCE + 1, comm);
+ }
+ if ((local_rank != 0) && (root == my_rank)) {
+ Request::recv(recvbuf, count, datatype, leader_of_root, COLL_TAG_REDUCE + 1, comm, MPI_STATUS_IGNORE);
+ }
+ smpi_free_tmp_buffer(tmp_buf + true_lb);
if (leader_comm_rank == leader_root) {
if (my_rank != root || (my_rank == root && tmp_buf == recvbuf)) {
ompi_coll_tree_t* tree, int count_by_segment,
int max_outstanding_reqs )
{
- char *inbuf[2] = {NULL, NULL}, *inbuf_free[2] = {NULL, NULL};
- char *accumbuf = NULL, *accumbuf_free = NULL;
- char *local_op_buffer = NULL, *sendtmpbuf = NULL;
- ptrdiff_t extent, lower_bound, segment_increment;
- MPI_Request reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL};
- int num_segments, line, ret, segindex, i, rank;
- int recvcount, prevcount, inbi;
-
- /**
- * Determine number of segments and number of elements
- * sent per operation
- */
- datatype->extent(&lower_bound, &extent);
- num_segments = (original_count + count_by_segment - 1) / count_by_segment;
- segment_increment = count_by_segment * extent;
-
- sendtmpbuf = (char*) sendbuf;
- if( sendbuf == MPI_IN_PLACE ) {
- sendtmpbuf = (char *)recvbuf;
+ unsigned char *inbuf[2] = {nullptr, nullptr}, *inbuf_free[2] = {nullptr, nullptr};
+ unsigned char *accumbuf = nullptr, *accumbuf_free = nullptr;
+ const unsigned char *local_op_buffer = nullptr, *sendtmpbuf = nullptr;
+ ptrdiff_t extent, lower_bound, segment_increment;
+ MPI_Request reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL};
+ int num_segments, line, ret, segindex, i, rank;
+ int recvcount, prevcount, inbi;
+
+ /**
+ * Determine number of segments and number of elements
+ * sent per operation
+ */
+ datatype->extent(&lower_bound, &extent);
+ num_segments = (original_count + count_by_segment - 1) / count_by_segment;
+ segment_increment = count_by_segment * extent;
+
+ sendtmpbuf = static_cast<const unsigned char*>(sendbuf);
+ if (sendbuf == MPI_IN_PLACE) {
+ sendtmpbuf = static_cast<const unsigned char*>(recvbuf);
}
XBT_DEBUG("coll:tuned:reduce_generic count %d, msg size %lu, segsize %lu, max_requests %d", original_count,
/* handle non existant recv buffer (i.e. its NULL) and
protect the recv buffer on non-root nodes */
- accumbuf = (char*)recvbuf;
- if( (NULL == accumbuf) || (root != rank) ) {
- /* Allocate temporary accumulator buffer. */
- accumbuf_free = (char*)smpi_get_tmp_sendbuffer(true_extent +
- (original_count - 1) * extent);
- if (accumbuf_free == NULL) {
- line = __LINE__; ret = -1; goto error_hndl;
- }
- accumbuf = accumbuf_free - lower_bound;
+ accumbuf = static_cast<unsigned char*>(recvbuf);
+ if (nullptr == accumbuf || root != rank) {
+ /* Allocate temporary accumulator buffer. */
+ accumbuf_free = smpi_get_tmp_sendbuffer(true_extent + (original_count - 1) * extent);
+ if (accumbuf_free == nullptr) {
+ line = __LINE__;
+ ret = -1;
+ goto error_hndl;
+ }
+ accumbuf = accumbuf_free - lower_bound;
}
/* If this is a non-commutative operation we must copy
sendbuf to the accumbuf, in order to simplfy the loops */
if ((op != MPI_OP_NULL && not op->is_commutative())) {
- Datatype::copy((char*)sendtmpbuf, original_count, datatype, (char*)accumbuf, original_count, datatype);
+ Datatype::copy(sendtmpbuf, original_count, datatype, accumbuf, original_count, datatype);
}
/* Allocate two buffers for incoming segments */
real_segment_size = true_extent + (count_by_segment - 1) * extent;
- inbuf_free[0] = (char*) smpi_get_tmp_recvbuffer(real_segment_size);
- if( inbuf_free[0] == NULL ) {
- line = __LINE__; ret = -1; goto error_hndl;
+ inbuf_free[0] = smpi_get_tmp_recvbuffer(real_segment_size);
+ if (inbuf_free[0] == nullptr) {
+ line = __LINE__;
+ ret = -1;
+ goto error_hndl;
}
inbuf[0] = inbuf_free[0] - lower_bound;
/* if there is chance to overlap communication -
allocate second buffer */
if( (num_segments > 1) || (tree->tree_nextsize > 1) ) {
- inbuf_free[1] = (char*) smpi_get_tmp_recvbuffer(real_segment_size);
- if( inbuf_free[1] == NULL ) {
- line = __LINE__; ret = -1; goto error_hndl;
+ inbuf_free[1] = smpi_get_tmp_recvbuffer(real_segment_size);
+ if (inbuf_free[1] == nullptr) {
+ line = __LINE__;
+ ret = -1;
+ goto error_hndl;
}
inbuf[1] = inbuf_free[1] - lower_bound;
}
io_root = size - 1;
const void* use_this_sendbuf = sendbuf;
void* use_this_recvbuf = recvbuf;
- void* tmp_sendbuf = nullptr;
- void* tmp_recvbuf = nullptr;
+ unsigned char* tmp_sendbuf = nullptr;
+ unsigned char* tmp_recvbuf = nullptr;
if (io_root != root) {
ptrdiff_t text, ext;
{
int i, rank, size;
ptrdiff_t true_extent, lb, extent;
- char *free_buffer = NULL;
- char *pml_buffer = NULL;
- char *inplace_temp = NULL;
- char *inbuf;
+ unsigned char* free_buffer = nullptr;
+ unsigned char* pml_buffer = nullptr;
+ unsigned char* inplace_temp = nullptr;
+ const unsigned char* inbuf;
/* Initialize */
if (MPI_IN_PLACE == sbuf) {
sbuf = rbuf;
- inplace_temp = (char*)smpi_get_tmp_recvbuffer(true_extent + (count - 1) * extent);
- if (NULL == inplace_temp) {
- return -1;
+ inplace_temp = smpi_get_tmp_recvbuffer(true_extent + (count - 1) * extent);
+ if (nullptr == inplace_temp) {
+ return -1;
}
rbuf = inplace_temp - lb;
}
if (size > 1) {
- free_buffer = (char*)smpi_get_tmp_recvbuffer(true_extent + (count - 1) * extent);
- pml_buffer = free_buffer - lb;
+ free_buffer = smpi_get_tmp_recvbuffer(true_extent + (count - 1) * extent);
+ pml_buffer = free_buffer - lb;
}
/* Initialize the receive buffer. */
for (i = size - 2; i >= 0; --i) {
if (rank == i) {
- inbuf = (char*)sbuf;
+ inbuf = static_cast<const unsigned char*>(sbuf);
} else {
Request::recv(pml_buffer, count, dtype, i,
COLL_TAG_REDUCE, comm,
if(op!=MPI_OP_NULL) op->apply( inbuf, rbuf, &count, dtype);
}
- if (NULL != inplace_temp) {
- Datatype::copy(inplace_temp, count, dtype,(char*)sbuf
- ,count , dtype);
- smpi_free_tmp_buffer(inplace_temp);
+ if (nullptr != inplace_temp) {
+ Datatype::copy(inplace_temp, count, dtype, (char*)sbuf, count, dtype);
+ smpi_free_tmp_buffer(inplace_temp);
}
- if (NULL != free_buffer) {
- smpi_free_tmp_buffer(free_buffer);
+ if (nullptr != free_buffer) {
+ smpi_free_tmp_buffer(free_buffer);
}
/* All done */
int dst, send_cnt, recv_cnt, newroot, newdst_tree_root;
int newroot_tree_root, new_count;
int tag = COLL_TAG_REDUCE,temporary_buffer=0;
- void *send_ptr, *recv_ptr, *tmp_buf;
+ unsigned char *send_ptr, *recv_ptr, *tmp_buf;
cnts = NULL;
disps = NULL;
if (count < comm_size) {
new_count = comm_size;
- send_ptr = (void *) smpi_get_tmp_sendbuffer(new_count * extent);
- recv_ptr = (void *) smpi_get_tmp_recvbuffer(new_count * extent);
- tmp_buf = (void *) smpi_get_tmp_sendbuffer(new_count * extent);
+ send_ptr = smpi_get_tmp_sendbuffer(new_count * extent);
+ recv_ptr = smpi_get_tmp_recvbuffer(new_count * extent);
+ tmp_buf = smpi_get_tmp_sendbuffer(new_count * extent);
memcpy(send_ptr, sendbuf != MPI_IN_PLACE ? sendbuf : recvbuf, extent * count);
//if ((rank != root))
}
/* Send data from recvbuf. Recv into tmp_buf */
- Request::sendrecv((char *) recv_ptr +
- disps[send_idx] * extent,
- send_cnt, datatype,
- dst, tag,
- (char *) tmp_buf +
- disps[recv_idx] * extent,
- recv_cnt, datatype, dst, tag, comm, &status);
+ Request::sendrecv(recv_ptr + disps[send_idx] * extent, send_cnt, datatype, dst, tag,
+ tmp_buf + disps[recv_idx] * extent, recv_cnt, datatype, dst, tag, comm, &status);
/* tmp_buf contains data received in this step.
recvbuf contains data accumulated so far */
- if(op!=MPI_OP_NULL) op->apply( (char *) tmp_buf + disps[recv_idx] * extent,
- (char *) recv_ptr + disps[recv_idx] * extent,
- &recv_cnt, datatype);
+ if (op != MPI_OP_NULL)
+ op->apply(tmp_buf + disps[recv_idx] * extent, recv_ptr + disps[recv_idx] * extent, &recv_cnt, datatype);
/* update send_idx for next iteration */
send_idx = recv_idx;
}
if (newdst_tree_root == newroot_tree_root) {
- Request::send((char *) recv_ptr +
- disps[send_idx] * extent,
- send_cnt, datatype, dst, tag, comm);
+ Request::send(recv_ptr + disps[send_idx] * extent, send_cnt, datatype, dst, tag, comm);
break;
} else {
- Request::recv((char *) recv_ptr +
- disps[recv_idx] * extent,
- recv_cnt, datatype, dst, tag, comm, &status);
+ Request::recv(recv_ptr + disps[recv_idx] * extent, recv_cnt, datatype, dst, tag, comm, &status);
}
if (newrank > newdst)
else /* (count >= comm_size) */ {
- tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent);
+ tmp_buf = smpi_get_tmp_sendbuffer(count * extent);
//if ((rank != root))
Request::sendrecv(sendbuf != MPI_IN_PLACE ? sendbuf : recvbuf, count, datatype, rank, tag,
}
/* Send data from recvbuf. Recv into tmp_buf */
- Request::sendrecv((char *) recvbuf +
- disps[send_idx] * extent,
- send_cnt, datatype,
- dst, tag,
- (char *) tmp_buf +
- disps[recv_idx] * extent,
- recv_cnt, datatype, dst, tag, comm, &status);
+ Request::sendrecv(static_cast<char*>(recvbuf) + disps[send_idx] * extent, send_cnt, datatype, dst, tag,
+ tmp_buf + disps[recv_idx] * extent, recv_cnt, datatype, dst, tag, comm, &status);
/* tmp_buf contains data received in this step.
recvbuf contains data accumulated so far */
- if(op!=MPI_OP_NULL) op->apply( (char *) tmp_buf + disps[recv_idx] * extent,
- (char *) recvbuf + disps[recv_idx] * extent,
- &recv_cnt, datatype);
+ if (op != MPI_OP_NULL)
+ op->apply(tmp_buf + disps[recv_idx] * extent, static_cast<char*>(recvbuf) + disps[recv_idx] * extent,
+ &recv_cnt, datatype);
/* update send_idx for next iteration */
send_idx = recv_idx;
}
if (tmp_buf)
smpi_free_tmp_buffer(tmp_buf);
- if(temporary_buffer==1) smpi_free_tmp_buffer(recvbuf);
+ if (temporary_buffer == 1)
+ smpi_free_tmp_buffer(static_cast<unsigned char*>(recvbuf));
delete[] cnts;
delete[] disps;
{
int rank, comm_size, i;
MPI_Aint extent, true_extent, true_lb;
- void *tmp_recvbuf;
+ unsigned char* tmp_recvbuf;
int mpi_errno = MPI_SUCCESS;
int total_count, dst, src;
int is_commutative;
}
/* allocate temporary buffer to store incoming data */
- tmp_recvbuf = (void*)smpi_get_tmp_recvbuffer(recvcounts[rank] * std::max(true_extent, extent) + 1);
+ tmp_recvbuf = smpi_get_tmp_recvbuffer(recvcounts[rank] * std::max(true_extent, extent) + 1);
/* adjust for potential negative lower bound in datatype */
- tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb);
+ tmp_recvbuf = tmp_recvbuf - true_lb;
for (i=1; i<comm_size; i++) {
src = (rank - i + comm_size) % comm_size;
int block_size, total_count, size;
MPI_Aint true_extent, true_lb;
int buf0_was_inout;
- void *tmp_buf0;
- void *tmp_buf1;
- void *result_ptr;
+ unsigned char* tmp_buf0;
+ unsigned char* tmp_buf1;
+ unsigned char* result_ptr;
datatype->extent(&true_lb, &true_extent);
block_size = recvcounts[0];
total_count = block_size * comm_size;
- tmp_buf0=( void *)smpi_get_tmp_sendbuffer( true_extent * total_count);
- tmp_buf1=( void *)smpi_get_tmp_recvbuffer( true_extent * total_count);
- void *tmp_buf0_save=tmp_buf0;
- void *tmp_buf1_save=tmp_buf1;
+ tmp_buf0 = smpi_get_tmp_sendbuffer(true_extent * total_count);
+ tmp_buf1 = smpi_get_tmp_recvbuffer(true_extent * total_count);
+ unsigned char* tmp_buf0_save = tmp_buf0;
+ unsigned char* tmp_buf1_save = tmp_buf1;
/* adjust for potential negative lower bound in datatype */
- tmp_buf0 = (void *)((char*)tmp_buf0 - true_lb);
- tmp_buf1 = (void *)((char*)tmp_buf1 - true_lb);
+ tmp_buf0 = tmp_buf0 - true_lb;
+ tmp_buf1 = tmp_buf1 - true_lb;
/* Copy our send data to tmp_buf0. We do this one block at a time and
permute the blocks as we go according to the mirror permutation. */
for (i = 0; i < comm_size; ++i) {
- mpi_errno = Datatype::copy((char *)(sendbuf == MPI_IN_PLACE ? recvbuf : sendbuf) + (i * true_extent * block_size), block_size, datatype,
- (char *)tmp_buf0 + (MPIU_Mirror_permutation(i, log2_comm_size) * true_extent * block_size), block_size, datatype);
- if (mpi_errno) return(mpi_errno);
+ mpi_errno = Datatype::copy(
+ static_cast<const char*>(sendbuf == MPI_IN_PLACE ? recvbuf : sendbuf) + (i * true_extent * block_size), block_size,
+ datatype, tmp_buf0 + (MPIU_Mirror_permutation(i, log2_comm_size) * true_extent * block_size), block_size,
+ datatype);
+ if (mpi_errno)
+ return mpi_errno;
}
buf0_was_inout = 1;
size = total_count;
for (k = 0; k < log2_comm_size; ++k) {
/* use a double-buffering scheme to avoid local copies */
- char *incoming_data = static_cast<char*>(buf0_was_inout ? tmp_buf1 : tmp_buf0);
- char *outgoing_data = static_cast<char*>(buf0_was_inout ? tmp_buf0 : tmp_buf1);
+ unsigned char* incoming_data = buf0_was_inout ? tmp_buf1 : tmp_buf0;
+ unsigned char* outgoing_data = buf0_was_inout ? tmp_buf0 : tmp_buf1;
int peer = rank ^ (0x1 << k);
size /= 2;
xbt_assert(size == recvcounts[rank]);
/* copy the reduced data to the recvbuf */
- result_ptr = (char *)(buf0_was_inout ? tmp_buf0 : tmp_buf1) + recv_offset * true_extent;
+ result_ptr = (buf0_was_inout ? tmp_buf0 : tmp_buf1) + recv_offset * true_extent;
mpi_errno = Datatype::copy(result_ptr, size, datatype,
recvbuf, size, datatype);
smpi_free_tmp_buffer(tmp_buf0_save);
{
int rank, comm_size, i;
MPI_Aint extent, true_extent, true_lb;
- void *tmp_recvbuf, *tmp_results;
int mpi_errno = MPI_SUCCESS;
int dis[2], blklens[2], total_count, dst;
int mask, dst_tree_root, my_tree_root, j, k;
/* noncommutative and (non-pof2 or block irregular), use recursive doubling. */
/* need to allocate temporary buffer to receive incoming data*/
- tmp_recvbuf= (void*)smpi_get_tmp_recvbuffer(total_count * std::max(true_extent, extent));
- /* adjust for potential negative lower bound in datatype */
- tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb);
-
- /* need to allocate another temporary buffer to accumulate
- results */
- tmp_results = (void*)smpi_get_tmp_sendbuffer(total_count * std::max(true_extent, extent));
- /* adjust for potential negative lower bound in datatype */
- tmp_results = (void *)((char*)tmp_results - true_lb);
-
- /* copy sendbuf into tmp_results */
- if (sendbuf != MPI_IN_PLACE)
- mpi_errno = Datatype::copy(sendbuf, total_count, datatype,
- tmp_results, total_count, datatype);
- else
- mpi_errno = Datatype::copy(recvbuf, total_count, datatype,
- tmp_results, total_count, datatype);
-
- if (mpi_errno) return(mpi_errno);
-
- mask = 0x1;
- i = 0;
- while (mask < comm_size) {
- dst = rank ^ mask;
-
- dst_tree_root = dst >> i;
- dst_tree_root <<= i;
-
- my_tree_root = rank >> i;
- my_tree_root <<= i;
-
- /* At step 1, processes exchange (n-n/p) amount of
- data; at step 2, (n-2n/p) amount of data; at step 3, (n-4n/p)
- amount of data, and so forth. We use derived datatypes for this.
-
- At each step, a process does not need to send data
- indexed from my_tree_root to
- my_tree_root+mask-1. Similarly, a process won't receive
- data indexed from dst_tree_root to dst_tree_root+mask-1. */
-
- /* calculate sendtype */
- blklens[0] = blklens[1] = 0;
- for (j=0; j<my_tree_root; j++)
- blklens[0] += recvcounts[j];
- for (j=my_tree_root+mask; j<comm_size; j++)
- blklens[1] += recvcounts[j];
-
- dis[0] = 0;
- dis[1] = blklens[0];
- for (j=my_tree_root; (j<my_tree_root+mask) && (j<comm_size); j++)
- dis[1] += recvcounts[j];
-
- mpi_errno = Datatype::create_indexed(2, blklens, dis, datatype, &sendtype);
- if (mpi_errno) return(mpi_errno);
-
- sendtype->commit();
-
- /* calculate recvtype */
- blklens[0] = blklens[1] = 0;
- for (j=0; j<dst_tree_root && j<comm_size; j++)
- blklens[0] += recvcounts[j];
- for (j=dst_tree_root+mask; j<comm_size; j++)
- blklens[1] += recvcounts[j];
-
- dis[0] = 0;
- dis[1] = blklens[0];
- for (j=dst_tree_root; (j<dst_tree_root+mask) && (j<comm_size); j++)
- dis[1] += recvcounts[j];
-
- mpi_errno = Datatype::create_indexed(2, blklens, dis, datatype, &recvtype);
- if (mpi_errno) return(mpi_errno);
-
- recvtype->commit();
-
- received = 0;
- if (dst < comm_size) {
- /* tmp_results contains data to be sent in each step. Data is
- received in tmp_recvbuf and then accumulated into
- tmp_results. accumulation is done later below. */
-
- Request::sendrecv(tmp_results, 1, sendtype, dst,
- COLL_TAG_SCATTER,
- tmp_recvbuf, 1, recvtype, dst,
- COLL_TAG_SCATTER, comm,
- MPI_STATUS_IGNORE);
- received = 1;
- }
-
- /* if some processes in this process's subtree in this step
- did not have any destination process to communicate with
- because of non-power-of-two, we need to send them the
- result. We use a logarithmic recursive-halfing algorithm
- for this. */
-
- if (dst_tree_root + mask > comm_size) {
- nprocs_completed = comm_size - my_tree_root - mask;
- /* nprocs_completed is the number of processes in this
- subtree that have all the data. Send data to others
- in a tree fashion. First find root of current tree
- that is being divided into two. k is the number of
- least-significant bits in this process's rank that
- must be zeroed out to find the rank of the root */
- j = mask;
- k = 0;
- while (j) {
- j >>= 1;
- k++;
- }
- k--;
-
- tmp_mask = mask >> 1;
- while (tmp_mask) {
- dst = rank ^ tmp_mask;
-
- tree_root = rank >> k;
- tree_root <<= k;
-
- /* send only if this proc has data and destination
- doesn't have data. at any step, multiple processes
- can send if they have the data */
- if ((dst > rank) &&
- (rank < tree_root + nprocs_completed)
- && (dst >= tree_root + nprocs_completed)) {
- /* send the current result */
- Request::send(tmp_recvbuf, 1, recvtype,
- dst, COLL_TAG_SCATTER,
- comm);
- }
- /* recv only if this proc. doesn't have data and sender
- has data */
- else if ((dst < rank) &&
- (dst < tree_root + nprocs_completed) &&
- (rank >= tree_root + nprocs_completed)) {
- Request::recv(tmp_recvbuf, 1, recvtype, dst,
- COLL_TAG_SCATTER,
- comm, MPI_STATUS_IGNORE);
- received = 1;
- }
- tmp_mask >>= 1;
- k--;
- }
- }
+ unsigned char* tmp_recvbuf = smpi_get_tmp_recvbuffer(total_count * std::max(true_extent, extent));
+ /* adjust for potential negative lower bound in datatype */
+ tmp_recvbuf = tmp_recvbuf - true_lb;
- /* The following reduction is done here instead of after
- the MPIC_Sendrecv_ft or MPIC_Recv_ft above. This is
- because to do it above, in the noncommutative
- case, we would need an extra temp buffer so as not to
- overwrite temp_recvbuf, because temp_recvbuf may have
- to be communicated to other processes in the
- non-power-of-two case. To avoid that extra allocation,
- we do the reduce here. */
- if (received) {
- if (is_commutative || (dst_tree_root < my_tree_root)) {
- {
- if (op != MPI_OP_NULL)
- op->apply(tmp_recvbuf, tmp_results, &blklens[0], datatype);
- if (op != MPI_OP_NULL)
- op->apply(((char*)tmp_recvbuf + dis[1] * extent), ((char*)tmp_results + dis[1] * extent),
- &blklens[1], datatype);
- }
- }
- else {
- {
- if (op != MPI_OP_NULL)
- op->apply(tmp_results, tmp_recvbuf, &blklens[0], datatype);
- if (op != MPI_OP_NULL)
- op->apply(((char*)tmp_results + dis[1] * extent), ((char*)tmp_recvbuf + dis[1] * extent),
- &blklens[1], datatype);
- }
- /* copy result back into tmp_results */
- mpi_errno = Datatype::copy(tmp_recvbuf, 1, recvtype,
- tmp_results, 1, recvtype);
- if (mpi_errno) return(mpi_errno);
- }
- }
+ /* need to allocate another temporary buffer to accumulate
+ results */
+ unsigned char* tmp_results = smpi_get_tmp_sendbuffer(total_count * std::max(true_extent, extent));
+ /* adjust for potential negative lower bound in datatype */
+ tmp_results = tmp_results - true_lb;
+
+ /* copy sendbuf into tmp_results */
+ if (sendbuf != MPI_IN_PLACE)
+ mpi_errno = Datatype::copy(sendbuf, total_count, datatype, tmp_results, total_count, datatype);
+ else
+ mpi_errno = Datatype::copy(recvbuf, total_count, datatype, tmp_results, total_count, datatype);
+
+ if (mpi_errno)
+ return (mpi_errno);
+
+ mask = 0x1;
+ i = 0;
+ while (mask < comm_size) {
+ dst = rank ^ mask;
+
+ dst_tree_root = dst >> i;
+ dst_tree_root <<= i;
+
+ my_tree_root = rank >> i;
+ my_tree_root <<= i;
+
+ /* At step 1, processes exchange (n-n/p) amount of
+ data; at step 2, (n-2n/p) amount of data; at step 3, (n-4n/p)
+ amount of data, and so forth. We use derived datatypes for this.
+
+ At each step, a process does not need to send data
+ indexed from my_tree_root to
+ my_tree_root+mask-1. Similarly, a process won't receive
+ data indexed from dst_tree_root to dst_tree_root+mask-1. */
+
+ /* calculate sendtype */
+ blklens[0] = blklens[1] = 0;
+ for (j = 0; j < my_tree_root; j++)
+ blklens[0] += recvcounts[j];
+ for (j = my_tree_root + mask; j < comm_size; j++)
+ blklens[1] += recvcounts[j];
+
+ dis[0] = 0;
+ dis[1] = blklens[0];
+ for (j = my_tree_root; (j < my_tree_root + mask) && (j < comm_size); j++)
+ dis[1] += recvcounts[j];
+
+ mpi_errno = Datatype::create_indexed(2, blklens, dis, datatype, &sendtype);
+ if (mpi_errno)
+ return (mpi_errno);
+
+ sendtype->commit();
+
+ /* calculate recvtype */
+ blklens[0] = blklens[1] = 0;
+ for (j = 0; j < dst_tree_root && j < comm_size; j++)
+ blklens[0] += recvcounts[j];
+ for (j = dst_tree_root + mask; j < comm_size; j++)
+ blklens[1] += recvcounts[j];
+
+ dis[0] = 0;
+ dis[1] = blklens[0];
+ for (j = dst_tree_root; (j < dst_tree_root + mask) && (j < comm_size); j++)
+ dis[1] += recvcounts[j];
+
+ mpi_errno = Datatype::create_indexed(2, blklens, dis, datatype, &recvtype);
+ if (mpi_errno)
+ return (mpi_errno);
+
+ recvtype->commit();
+
+ received = 0;
+ if (dst < comm_size) {
+ /* tmp_results contains data to be sent in each step. Data is
+ received in tmp_recvbuf and then accumulated into
+ tmp_results. accumulation is done later below. */
+
+ Request::sendrecv(tmp_results, 1, sendtype, dst, COLL_TAG_SCATTER, tmp_recvbuf, 1, recvtype, dst,
+ COLL_TAG_SCATTER, comm, MPI_STATUS_IGNORE);
+ received = 1;
+ }
+
+ /* if some processes in this process's subtree in this step
+ did not have any destination process to communicate with
+ because of non-power-of-two, we need to send them the
+ result. We use a logarithmic recursive-halfing algorithm
+ for this. */
+
+ if (dst_tree_root + mask > comm_size) {
+ nprocs_completed = comm_size - my_tree_root - mask;
+ /* nprocs_completed is the number of processes in this
+ subtree that have all the data. Send data to others
+ in a tree fashion. First find root of current tree
+ that is being divided into two. k is the number of
+ least-significant bits in this process's rank that
+ must be zeroed out to find the rank of the root */
+ j = mask;
+ k = 0;
+ while (j) {
+ j >>= 1;
+ k++;
+ }
+ k--;
+
+ tmp_mask = mask >> 1;
+ while (tmp_mask) {
+ dst = rank ^ tmp_mask;
+
+ tree_root = rank >> k;
+ tree_root <<= k;
+
+ /* send only if this proc has data and destination
+ doesn't have data. at any step, multiple processes
+ can send if they have the data */
+ if ((dst > rank) && (rank < tree_root + nprocs_completed) && (dst >= tree_root + nprocs_completed)) {
+ /* send the current result */
+ Request::send(tmp_recvbuf, 1, recvtype, dst, COLL_TAG_SCATTER, comm);
+ }
+ /* recv only if this proc. doesn't have data and sender
+ has data */
+ else if ((dst < rank) && (dst < tree_root + nprocs_completed) && (rank >= tree_root + nprocs_completed)) {
+ Request::recv(tmp_recvbuf, 1, recvtype, dst, COLL_TAG_SCATTER, comm, MPI_STATUS_IGNORE);
+ received = 1;
+ }
+ tmp_mask >>= 1;
+ k--;
+ }
+ }
+
+ /* The following reduction is done here instead of after
+ the MPIC_Sendrecv_ft or MPIC_Recv_ft above. This is
+ because to do it above, in the noncommutative
+ case, we would need an extra temp buffer so as not to
+ overwrite temp_recvbuf, because temp_recvbuf may have
+ to be communicated to other processes in the
+ non-power-of-two case. To avoid that extra allocation,
+ we do the reduce here. */
+ if (received) {
+ if (is_commutative || (dst_tree_root < my_tree_root)) {
+ {
+ if (op != MPI_OP_NULL)
+ op->apply(tmp_recvbuf, tmp_results, &blklens[0], datatype);
+ if (op != MPI_OP_NULL)
+ op->apply(tmp_recvbuf + dis[1] * extent, tmp_results + dis[1] * extent, &blklens[1], datatype);
+ }
+ } else {
+ {
+ if (op != MPI_OP_NULL)
+ op->apply(tmp_results, tmp_recvbuf, &blklens[0], datatype);
+ if (op != MPI_OP_NULL)
+ op->apply(tmp_results + dis[1] * extent, tmp_recvbuf + dis[1] * extent, &blklens[1], datatype);
+ }
+ /* copy result back into tmp_results */
+ mpi_errno = Datatype::copy(tmp_recvbuf, 1, recvtype, tmp_results, 1, recvtype);
+ if (mpi_errno)
+ return (mpi_errno);
+ }
+ }
- Datatype::unref(sendtype);
- Datatype::unref(recvtype);
+ Datatype::unref(sendtype);
+ Datatype::unref(recvtype);
- mask <<= 1;
- i++;
+ mask <<= 1;
+ i++;
}
/* now copy final results from tmp_results to recvbuf */
- mpi_errno = Datatype::copy(((char *)tmp_results+disps[rank]*extent),
- recvcounts[rank], datatype, recvbuf,
+ mpi_errno = Datatype::copy(tmp_results + disps[rank] * extent, recvcounts[rank], datatype, recvbuf,
recvcounts[rank], datatype);
if (mpi_errno) return(mpi_errno);
int i, rank, size, count, err = MPI_SUCCESS;
int tmp_size = 1, remain = 0, tmp_rank;
ptrdiff_t true_lb, true_extent, lb, extent, buf_size;
- char *recv_buf = NULL, *recv_buf_free = NULL;
- char *result_buf = NULL, *result_buf_free = NULL;
+ unsigned char *result_buf = nullptr, *result_buf_free = nullptr;
/* Initialize */
rank = comm->rank();
}
/* Allocate temporary receive buffer. */
- recv_buf_free = (char*) smpi_get_tmp_recvbuffer(buf_size);
-
- recv_buf = recv_buf_free - lb;
+ unsigned char* recv_buf_free = smpi_get_tmp_recvbuffer(buf_size);
+ unsigned char* recv_buf = recv_buf_free - lb;
if (NULL == recv_buf_free) {
err = MPI_ERR_OTHER;
goto cleanup;
}
/* allocate temporary buffer for results */
- result_buf_free = (char*) smpi_get_tmp_sendbuffer(buf_size);
-
+ result_buf_free = smpi_get_tmp_sendbuffer(buf_size);
result_buf = result_buf_free - lb;
/* copy local buffer into the temporary results */
{
int ret, line, rank, size, i, k, recv_from, send_to, total_count, max_block_count;
int inbi;
- char *tmpsend = NULL, *tmprecv = NULL, *accumbuf = NULL, *accumbuf_free = NULL;
- char *inbuf_free[2] = {NULL, NULL}, *inbuf[2] = {NULL, NULL};
+ unsigned char *tmpsend = NULL, *tmprecv = NULL, *accumbuf = NULL, *accumbuf_free = NULL;
+ unsigned char *inbuf_free[2] = {NULL, NULL}, *inbuf[2] = {NULL, NULL};
ptrdiff_t true_lb, true_extent, lb, extent, max_real_segsize;
MPI_Request reqs[2] = {NULL, NULL};
max_real_segsize = true_extent + (ptrdiff_t)(max_block_count - 1) * extent;
- accumbuf_free = (char*)smpi_get_tmp_recvbuffer(true_extent + (ptrdiff_t)(total_count - 1) * extent);
+ accumbuf_free = smpi_get_tmp_recvbuffer(true_extent + (ptrdiff_t)(total_count - 1) * extent);
if (NULL == accumbuf_free) { ret = -1; line = __LINE__; goto error_hndl; }
accumbuf = accumbuf_free - lb;
- inbuf_free[0] = (char*)smpi_get_tmp_sendbuffer(max_real_segsize);
+ inbuf_free[0] = smpi_get_tmp_sendbuffer(max_real_segsize);
if (NULL == inbuf_free[0]) { ret = -1; line = __LINE__; goto error_hndl; }
inbuf[0] = inbuf_free[0] - lb;
if (size > 2) {
- inbuf_free[1] = (char*)smpi_get_tmp_sendbuffer(max_real_segsize);
- if (NULL == inbuf_free[1]) { ret = -1; line = __LINE__; goto error_hndl; }
- inbuf[1] = inbuf_free[1] - lb;
+ inbuf_free[1] = smpi_get_tmp_sendbuffer(max_real_segsize);
+ if (NULL == inbuf_free[1]) {
+ ret = -1;
+ line = __LINE__;
+ goto error_hndl;
+ }
+ inbuf[1] = inbuf_free[1] - lb;
}
/* Handle MPI_IN_PLACE for size > 1 */
int leader_comm_rank = -1, leader_comm_size = -1;
int mpi_errno = MPI_SUCCESS;
int recvtype_size, sendtype_size, nbytes;
- void *tmp_buf = NULL;
- void *leader_scatter_buf = NULL;
+ unsigned char* tmp_buf = nullptr;
+ unsigned char* leader_scatter_buf = nullptr;
MPI_Status status;
int leader_root, leader_of_root = -1;
MPI_Comm shmem_comm, leader_comm;
int leader_comm_rank = -1, leader_comm_size = -1;
int mpi_errno = MPI_SUCCESS;
int recvtype_size, sendtype_size, nbytes;
- void *tmp_buf = NULL;
- void *leader_scatter_buf = NULL;
+ unsigned char* tmp_buf = nullptr;
+ unsigned char* leader_scatter_buf = nullptr;
MPI_Status status;
int leader_root = -1, leader_of_root = -1;
MPI_Comm shmem_comm, leader_comm;
int vrank;
int size;
int total_send = 0;
- char *ptmp = NULL;
- char *tempbuf = NULL;
+ unsigned char* ptmp = nullptr;
+ unsigned char* tempbuf = nullptr;
+ const unsigned char* cptmp; // const ptmp
int err;
ompi_coll_tree_t* bmtree;
MPI_Status status;
if (rank == root) {
if (0 == root) {
/* root on 0, just use the send buffer */
- ptmp = (char*)sbuf;
+ ptmp = nullptr; // unused
+ cptmp = static_cast<const unsigned char*>(sbuf);
if (rbuf != MPI_IN_PLACE) {
/* local copy to rbuf */
err = Datatype::copy(sbuf, scount, sdtype, rbuf, rcount, rdtype);
}
} else {
/* root is not on 0, allocate temp buffer for send */
- tempbuf = (char*)smpi_get_tmp_sendbuffer(strue_extent + (scount * size - 1) * sextent);
- if (NULL == tempbuf) {
+ tempbuf = smpi_get_tmp_sendbuffer(strue_extent + (scount * size - 1) * sextent);
+ if (nullptr == tempbuf) {
err = MPI_ERR_OTHER;
line = __LINE__;
goto err_hndl;
}
- ptmp = tempbuf - slb;
+ ptmp = tempbuf - slb;
+ cptmp = ptmp;
/* and rotate data so they will eventually in the right place */
err = Datatype::copy((char*)sbuf + sextent * root * scount, scount * (size - root), sdtype, ptmp,
} else if (not(vrank % 2)) {
/* non-root, non-leaf nodes, allocate temp buffer for recv
* the most we need is rcount*size/2 */
- tempbuf = (char*)smpi_get_tmp_recvbuffer(rtrue_extent + (rcount * size - 1) * rextent);
- if (NULL == tempbuf) {
+ tempbuf = smpi_get_tmp_recvbuffer(rtrue_extent + (rcount * size - 1) * rextent);
+ if (nullptr == tempbuf) {
err = MPI_ERR_OTHER;
line = __LINE__;
goto err_hndl;
}
- ptmp = tempbuf - rlb;
+ ptmp = tempbuf - rlb;
+ cptmp = ptmp;
sdtype = rdtype;
scount = rcount;
total_send = scount;
} else {
/* leaf nodes, just use rbuf */
- ptmp = (char*)rbuf;
+ ptmp = static_cast<unsigned char*>(rbuf);
+ cptmp = ptmp;
}
if (not(vrank % 2)) {
mycount = size - vkid;
mycount *= scount;
- Request::send(ptmp + total_send * sextent, mycount, sdtype, bmtree->tree_next[i], COLL_TAG_SCATTER, comm);
+ Request::send(cptmp + total_send * sextent, mycount, sdtype, bmtree->tree_next[i], COLL_TAG_SCATTER, comm);
total_send += mycount;
}
Request::recv(ptmp, rcount, rdtype, bmtree->tree_prev, COLL_TAG_SCATTER, comm, &status);
}
- if (NULL != tempbuf)
- smpi_free_tmp_buffer(tempbuf);
+ smpi_free_tmp_buffer(tempbuf);
// not FIXME : store the tree, as done in ompi, instead of calculating it each time ?
ompi_coll_tuned_topo_destroy_tree(&bmtree);
return MPI_SUCCESS;
err_hndl:
- if (NULL != tempbuf)
- smpi_free_tmp_buffer(tempbuf);
+ smpi_free_tmp_buffer(tempbuf);
XBT_DEBUG("%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank);
return err;
// Send/Recv buffers to/from others
MPI_Request* requests = new MPI_Request[size - 1];
- void** tmpbufs = new void*[rank];
+ unsigned char** tmpbufs = new unsigned char*[rank];
int index = 0;
for (int other = 0; other < rank; other++) {
tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext);
// Send/Recv buffers to/from others
MPI_Request* requests = new MPI_Request[size - 1];
- void** tmpbufs = new void*[rank];
+ unsigned char** tmpbufs = new unsigned char*[rank];
int index = 0;
for (int other = 0; other < rank; other++) {
tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext);
displs[i] = count;
count += recvcounts[i];
}
- void *tmpbuf = static_cast<void*>(smpi_get_tmp_sendbuffer(count*datatype->get_extent()));
+ unsigned char* tmpbuf = smpi_get_tmp_sendbuffer(count * datatype->get_extent());
int ret = Coll_reduce_default::reduce(sendbuf, tmpbuf, count, datatype, op, 0, comm);
if(ret==MPI_SUCCESS)
init_mv2_alltoall_tables_stampede();
int sendtype_size, recvtype_size, comm_size;
- char * tmp_buf = NULL;
int mpi_errno=MPI_SUCCESS;
int range = 0;
int range_threshold = 0;
mv2_alltoall_thresholds_table[conf_index][range].in_place_algo_table[range_threshold].min
||nbytes > mv2_alltoall_thresholds_table[conf_index][range].in_place_algo_table[range_threshold].max
) {
- tmp_buf = (char *)smpi_get_tmp_sendbuffer( comm_size * recvcount * recvtype_size );
- Datatype::copy((char *)recvbuf,
- comm_size*recvcount, recvtype,
- (char *)tmp_buf,
- comm_size*recvcount, recvtype);
+ unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(comm_size * recvcount * recvtype_size);
+ Datatype::copy(recvbuf, comm_size * recvcount, recvtype, tmp_buf, comm_size * recvcount, recvtype);
- mpi_errno = MV2_Alltoall_function(tmp_buf, recvcount, recvtype,
- recvbuf, recvcount, recvtype,
- comm );
- smpi_free_tmp_buffer(tmp_buf);
+ mpi_errno = MV2_Alltoall_function(tmp_buf, recvcount, recvtype, recvbuf, recvcount, recvtype, comm);
+ smpi_free_tmp_buffer(tmp_buf);
} else {
mpi_errno = MPIR_Alltoall_inplace_MV2(sendbuf, sendcount, sendtype,
recvbuf, recvcount, recvtype,
// int is_homogeneous, is_contig;
MPI_Aint type_size;
//, position;
- // void *tmp_buf = NULL;
+ // unsigned char *tmp_buf = NULL;
MPI_Comm shmem_comm;
//MPID_Datatype *dtp;
#endif
if (two_level_bcast == 1) {
// if (not is_contig || not is_homogeneous) {
- // tmp_buf = (void*)smpi_get_tmp_sendbuffer(nbytes);
-
- /* position = 0;*/
- /* if (rank == root) {*/
- /* mpi_errno =*/
- /* MPIR_Pack_impl(buffer, count, datatype, tmp_buf, nbytes, &position);*/
- /* if (mpi_errno)*/
- /* MPIU_ERR_POP(mpi_errno);*/
- /* }*/
- // }
+// tmp_buf = smpi_get_tmp_sendbuffer(nbytes);
+
+/* position = 0;*/
+/* if (rank == root) {*/
+/* mpi_errno =*/
+/* MPIR_Pack_impl(buffer, count, datatype, tmp_buf, nbytes, &position);*/
+/* if (mpi_errno)*/
+/* MPIU_ERR_POP(mpi_errno);*/
+/* }*/
+// }
#ifdef CHANNEL_MRAIL_GEN2
if ((mv2_enable_zcpy_bcast == 1) &&
(&MPIR_Pipelined_Bcast_Zcpy_MV2 == MV2_Bcast_function)) {
if (size <= 0)
return MPI_ERR_COMM;
- void* tmp_sendbuf = nullptr;
+ unsigned char* tmp_sendbuf = nullptr;
if( sendbuf == MPI_IN_PLACE ) {
tmp_sendbuf = smpi_get_tmp_sendbuffer(count * datatype->get_extent());
Datatype::copy(recvbuf, count, datatype, tmp_sendbuf, count, datatype);
XBT_PRIVATE void smpi_bench_end();
XBT_PRIVATE void smpi_shared_destroy();
-XBT_PRIVATE void* smpi_get_tmp_sendbuffer(size_t size);
-XBT_PRIVATE void* smpi_get_tmp_recvbuffer(size_t size);
-XBT_PRIVATE void smpi_free_tmp_buffer(const void* buf);
+XBT_PRIVATE unsigned char* smpi_get_tmp_sendbuffer(size_t size);
+XBT_PRIVATE unsigned char* smpi_get_tmp_recvbuffer(size_t size);
+XBT_PRIVATE void smpi_free_tmp_buffer(const unsigned char* buf);
XBT_PRIVATE void smpi_free_replay_tmp_buffers();
extern "C" {
}
XBT_CDEBUG(smpi_pmpi, "will have to access %lld from my chunk", totreads);
- char* sendbuf= static_cast<char *>(smpi_get_tmp_sendbuffer(total_sent));
+ unsigned char* sendbuf = smpi_get_tmp_sendbuffer(total_sent);
if(totreads>0){
seek(min_offset, MPI_SEEK_SET);
static std::vector<unsigned char> recvbuffer;
//allocate a single buffer for all sends, growing it if needed
-void* smpi_get_tmp_sendbuffer(size_t size)
+unsigned char* smpi_get_tmp_sendbuffer(size_t size)
{
if (not smpi_process()->replaying())
return new unsigned char[size];
}
//allocate a single buffer for all recv
-void* smpi_get_tmp_recvbuffer(size_t size)
+unsigned char* smpi_get_tmp_recvbuffer(size_t size)
{
if (not smpi_process()->replaying())
return new unsigned char[size];
return recvbuffer.data();
}
-void smpi_free_tmp_buffer(const void* buf)
+void smpi_free_tmp_buffer(const unsigned char* buf)
{
if (not smpi_process()->replaying())
- delete[] static_cast<const unsigned char*>(buf);
+ delete[] buf;
}
void smpi_free_replay_tmp_buffers()
int count=(*request)->size_/ (*request)->old_type_->size();
(*request)->op_->apply(buf, (*request)->buf_, &count, (*request)->old_type_);
}
- smpi_free_tmp_buffer(buf);
+ smpi_free_tmp_buffer(static_cast<unsigned char*>(buf));
}
}
if((*request)->nbc_requests_[i]!=MPI_REQUEST_NULL)