From: Arnaud Giersch Date: Mon, 29 Apr 2019 19:36:48 +0000 (+0200) Subject: [sonar] Use unsigned char* for smpi buffers. X-Git-Tag: v3.22.2~19 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/6633f5c4d1b7d52fa54306ee3fb7eb573daeb274?hp=d5cc61332edae35867a41bb38ad9401faaab2716 [sonar] Use unsigned char* for smpi buffers. --- diff --git a/include/simgrid/smpi/replay.hpp b/include/simgrid/smpi/replay.hpp index f1f432d440..27aceab575 100644 --- a/include/simgrid/smpi/replay.hpp +++ b/include/simgrid/smpi/replay.hpp @@ -31,8 +31,8 @@ } \ } -XBT_PRIVATE void* smpi_get_tmp_sendbuffer(size_t size); -XBT_PRIVATE void* smpi_get_tmp_recvbuffer(size_t size); +XBT_PRIVATE unsigned char* smpi_get_tmp_sendbuffer(size_t size); +XBT_PRIVATE unsigned char* smpi_get_tmp_recvbuffer(size_t size); XBT_PRIVATE void log_timed_action(simgrid::xbt::ReplayAction& action, double clock); @@ -187,8 +187,8 @@ public: } virtual void kernel(simgrid::xbt::ReplayAction& action) = 0; - void* send_buffer(int size) { return smpi_get_tmp_sendbuffer(size); } - void* recv_buffer(int size) { return smpi_get_tmp_recvbuffer(size); } + unsigned char* send_buffer(int size) { return smpi_get_tmp_sendbuffer(size); } + unsigned char* recv_buffer(int size) { return smpi_get_tmp_recvbuffer(size); } }; class WaitAction : public ReplayAction { diff --git a/src/smpi/colls/allgather/allgather-bruck.cpp b/src/smpi/colls/allgather/allgather-bruck.cpp index 1e7cf374b8..0aa067c6b6 100644 --- a/src/smpi/colls/allgather/allgather-bruck.cpp +++ b/src/smpi/colls/allgather/allgather-bruck.cpp @@ -88,7 +88,6 @@ int Coll_allgather_bruck::allgather(const void *send_buff, int send_count, int pof2 = 1; // local string variables - char *tmp_buff; char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; @@ -101,7 +100,7 @@ int Coll_allgather_bruck::allgather(const void *send_buff, int send_count, count = recv_count; - tmp_buff = (char *) smpi_get_tmp_sendbuffer(num_procs * recv_count * recv_extent); + unsigned char* tmp_buff = smpi_get_tmp_sendbuffer(num_procs * recv_count * recv_extent); // perform a local copy Datatype::copy(send_ptr, send_count, send_type, tmp_buff, recv_count, recv_type); diff --git a/src/smpi/colls/allgatherv/allgatherv-mpich-rdb.cpp b/src/smpi/colls/allgatherv/allgatherv-mpich-rdb.cpp index a359f44517..1434b0eef2 100644 --- a/src/smpi/colls/allgatherv/allgatherv-mpich-rdb.cpp +++ b/src/smpi/colls/allgatherv/allgatherv-mpich-rdb.cpp @@ -27,7 +27,6 @@ int Coll_allgatherv_mpich_rdb::allgatherv ( MPI_Status status; MPI_Aint recvtype_extent, recvtype_true_extent, recvtype_true_lb; unsigned int curr_cnt, dst, total_count; - void *tmp_buf, *tmp_buf_rl; unsigned int mask, dst_tree_root, my_tree_root, position, send_offset, recv_offset, last_recv_cnt=0, nprocs_completed, k, offset, tmp_mask, tree_root; @@ -49,10 +48,10 @@ int Coll_allgatherv_mpich_rdb::allgatherv ( recvtype->extent(&recvtype_true_lb, &recvtype_true_extent); - tmp_buf_rl = (void*)smpi_get_tmp_sendbuffer(total_count * std::max(recvtype_true_extent, recvtype_extent)); + unsigned char* tmp_buf_rl = smpi_get_tmp_sendbuffer(total_count * std::max(recvtype_true_extent, recvtype_extent)); /* adjust for potential negative lower bound in datatype */ - tmp_buf = (void *)((char*)tmp_buf_rl - recvtype_true_lb); + unsigned char* tmp_buf = tmp_buf_rl - recvtype_true_lb; /* copy local data into right location in tmp_buf */ position = 0; @@ -60,20 +59,13 @@ int Coll_allgatherv_mpich_rdb::allgatherv ( position += recvcounts[i]; if (sendbuf != MPI_IN_PLACE) { - Datatype::copy(sendbuf, sendcount, sendtype, - ((char *)tmp_buf + position* - recvtype_extent), - recvcounts[rank], recvtype); + Datatype::copy(sendbuf, sendcount, sendtype, tmp_buf + position * recvtype_extent, recvcounts[rank], recvtype); } else { /* if in_place specified, local data is found in recvbuf */ - Datatype::copy(((char *)recvbuf + - displs[rank]*recvtype_extent), - recvcounts[rank], recvtype, - ((char *)tmp_buf + position* - recvtype_extent), - recvcounts[rank], recvtype); + Datatype::copy(static_cast(recvbuf) + displs[rank] * recvtype_extent, recvcounts[rank], recvtype, + tmp_buf + position * recvtype_extent, recvcounts[rank], recvtype); } curr_cnt = recvcounts[rank]; @@ -102,13 +94,9 @@ int Coll_allgatherv_mpich_rdb::allgatherv ( for (j=0; j(recvbuf) + displs[j] * recvtype_extent, recvcounts[j], recvtype); } position += recvcounts[j]; } diff --git a/src/smpi/colls/allreduce/allreduce-mvapich-rs.cpp b/src/smpi/colls/allreduce/allreduce-mvapich-rs.cpp index 1a74de8434..4fc72fe6cc 100644 --- a/src/smpi/colls/allreduce/allreduce-mvapich-rs.cpp +++ b/src/smpi/colls/allreduce/allreduce-mvapich-rs.cpp @@ -37,7 +37,6 @@ int Coll_allreduce_mvapich2_rs::allreduce(const void *sendbuf, int mask, pof2, i, send_idx, recv_idx, last_idx, send_cnt; int dst, is_commutative, rem, newdst, recv_cnt; MPI_Aint true_lb, true_extent, extent; - void *tmp_buf, *tmp_buf_free; if (count == 0) { return MPI_SUCCESS; @@ -54,10 +53,10 @@ int Coll_allreduce_mvapich2_rs::allreduce(const void *sendbuf, datatype->extent(&true_lb, &true_extent); extent = datatype->get_extent(); - tmp_buf_free = smpi_get_tmp_recvbuffer(count * std::max(extent, true_extent)); + unsigned char* tmp_buf_free = smpi_get_tmp_recvbuffer(count * std::max(extent, true_extent)); /* adjust for potential negative lower bound in datatype */ - tmp_buf = (void *) ((char *) tmp_buf_free - true_lb); + unsigned char* tmp_buf = tmp_buf_free - true_lb; /* copy local data into recvbuf */ if (sendbuf != MPI_IN_PLACE) { @@ -188,15 +187,9 @@ int Coll_allreduce_mvapich2_rs::allreduce(const void *sendbuf, } /* Send data from recvbuf. Recv into tmp_buf */ - Request::sendrecv((char *) recvbuf + - disps[send_idx] * extent, - send_cnt, datatype, - dst, COLL_TAG_ALLREDUCE, - (char *) tmp_buf + - disps[recv_idx] * extent, - recv_cnt, datatype, dst, - COLL_TAG_ALLREDUCE, comm, - MPI_STATUS_IGNORE); + Request::sendrecv(static_cast(recvbuf) + disps[send_idx] * extent, send_cnt, datatype, dst, + COLL_TAG_ALLREDUCE, tmp_buf + disps[recv_idx] * extent, recv_cnt, datatype, dst, + COLL_TAG_ALLREDUCE, comm, MPI_STATUS_IGNORE); /* tmp_buf contains data received in this step. recvbuf contains data accumulated so far */ @@ -204,9 +197,9 @@ int Coll_allreduce_mvapich2_rs::allreduce(const void *sendbuf, /* This algorithm is used only for predefined ops and predefined ops are always commutative. */ - if(op!=MPI_OP_NULL) op->apply( (char *) tmp_buf + disps[recv_idx] * extent, - (char *) recvbuf + disps[recv_idx] * extent, - &recv_cnt, datatype); + if (op != MPI_OP_NULL) + op->apply(tmp_buf + disps[recv_idx] * extent, static_cast(recvbuf) + disps[recv_idx] * extent, + &recv_cnt, datatype); /* update send_idx for next iteration */ send_idx = recv_idx; diff --git a/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp b/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp index 8a89d286ab..325016839c 100644 --- a/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp +++ b/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp @@ -135,7 +135,7 @@ int Coll_allreduce_mvapich2_two_level::allreduce(const void *sendbuf, } if (local_size != total_size) { - void* sendtmpbuf = (char *)smpi_get_tmp_sendbuffer(count*datatype->get_extent()); + unsigned char* sendtmpbuf = smpi_get_tmp_sendbuffer(count * datatype->get_extent()); Datatype::copy(recvbuf, count, datatype,sendtmpbuf, count, datatype); /* inter-node allreduce */ if(MV2_Allreducection == &MPIR_Allreduce_pt2pt_rd_MV2){ diff --git a/src/smpi/colls/allreduce/allreduce-ompi-ring-segmented.cpp b/src/smpi/colls/allreduce/allreduce-ompi-ring-segmented.cpp index e20328f9b0..946a409b8a 100644 --- a/src/smpi/colls/allreduce/allreduce-ompi-ring-segmented.cpp +++ b/src/smpi/colls/allreduce/allreduce-ompi-ring-segmented.cpp @@ -173,7 +173,7 @@ Coll_allreduce_ompi_ring_segmented::allreduce(const void *sbuf, void *rbuf, int unsigned int inbi; size_t typelng; char *tmpsend = NULL, *tmprecv = NULL; - char *inbuf[2] = {NULL, NULL}; + unsigned char* inbuf[2] = {nullptr, nullptr}; ptrdiff_t true_extent, extent; ptrdiff_t block_offset, max_real_segsize; MPI_Request reqs[2] = {NULL, NULL}; @@ -232,11 +232,15 @@ Coll_allreduce_ompi_ring_segmented::allreduce(const void *sbuf, void *rbuf, int max_real_segsize = true_extent + (max_segcount - 1) * extent; /* Allocate and initialize temporary buffers */ - inbuf[0] = (char*)smpi_get_tmp_sendbuffer(max_real_segsize); + inbuf[0] = smpi_get_tmp_sendbuffer(max_real_segsize); if (NULL == inbuf[0]) { ret = -1; line = __LINE__; goto error_hndl; } if (size > 2) { - inbuf[1] = (char*)smpi_get_tmp_recvbuffer(max_real_segsize); - if (NULL == inbuf[1]) { ret = -1; line = __LINE__; goto error_hndl; } + inbuf[1] = smpi_get_tmp_recvbuffer(max_real_segsize); + if (nullptr == inbuf[1]) { + ret = -1; + line = __LINE__; + goto error_hndl; + } } /* Handle MPI_IN_PLACE */ @@ -378,16 +382,16 @@ Coll_allreduce_ompi_ring_segmented::allreduce(const void *sbuf, void *rbuf, int } - if (NULL != inbuf[0]) smpi_free_tmp_buffer(inbuf[0]); - if (NULL != inbuf[1]) smpi_free_tmp_buffer(inbuf[1]); + smpi_free_tmp_buffer(inbuf[0]); + smpi_free_tmp_buffer(inbuf[1]); return MPI_SUCCESS; error_hndl: XBT_DEBUG("%s:%4d\tRank %d Error occurred %d\n", __FILE__, line, rank, ret); - if (NULL != inbuf[0]) smpi_free_tmp_buffer(inbuf[0]); - if (NULL != inbuf[1]) smpi_free_tmp_buffer(inbuf[1]); + smpi_free_tmp_buffer(inbuf[0]); + smpi_free_tmp_buffer(inbuf[1]); return ret; } } diff --git a/src/smpi/colls/allreduce/allreduce-rab-rdb.cpp b/src/smpi/colls/allreduce/allreduce-rab-rdb.cpp index 90a44d2680..48b583c84a 100644 --- a/src/smpi/colls/allreduce/allreduce-rab-rdb.cpp +++ b/src/smpi/colls/allreduce/allreduce-rab-rdb.cpp @@ -16,13 +16,12 @@ int Coll_allreduce_rab_rdb::allreduce(const void *sbuff, void *rbuff, int count, int dst, newrank, rem, newdst, recv_cnt; MPI_Aint extent; MPI_Status status; - void *tmp_buf = NULL; unsigned int nprocs = comm->size(); int rank = comm->rank(); extent = dtype->get_extent(); - tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); + unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent); Datatype::copy(sbuff, count, dtype, rbuff, count, dtype); @@ -115,18 +114,17 @@ int Coll_allreduce_rab_rdb::allreduce(const void *sbuff, void *rbuff, int count, } // Send data from recvbuf. Recv into tmp_buf - Request::sendrecv((char *) rbuff + disps[send_idx] * extent, send_cnt, - dtype, dst, tag, - (char *) tmp_buf + disps[recv_idx] * extent, recv_cnt, - dtype, dst, tag, comm, &status); + Request::sendrecv(static_cast(rbuff) + disps[send_idx] * extent, send_cnt, dtype, dst, tag, + tmp_buf + disps[recv_idx] * extent, recv_cnt, dtype, dst, tag, comm, &status); // tmp_buf contains data received in this step. // recvbuf contains data accumulated so far // This algorithm is used only for predefined ops // and predefined ops are always commutative. - if(op!=MPI_OP_NULL) op->apply( (char *) tmp_buf + disps[recv_idx] * extent, - (char *) rbuff + disps[recv_idx] * extent, &recv_cnt, dtype); + if (op != MPI_OP_NULL) + op->apply(tmp_buf + disps[recv_idx] * extent, static_cast(rbuff) + disps[recv_idx] * extent, &recv_cnt, + dtype); // update send_idx for next iteration send_idx = recv_idx; diff --git a/src/smpi/colls/allreduce/allreduce-rab1.cpp b/src/smpi/colls/allreduce/allreduce-rab1.cpp index 9910b14048..e5ff57aaca 100644 --- a/src/smpi/colls/allreduce/allreduce-rab1.cpp +++ b/src/smpi/colls/allreduce/allreduce-rab1.cpp @@ -19,8 +19,6 @@ int Coll_allreduce_rab1::allreduce(const void *sbuff, void *rbuff, unsigned int pof2 = 1, mask; int send_idx, recv_idx, dst, send_cnt, recv_cnt; - void *recv, *tmp_buf; - int rank = comm->rank(); unsigned int nprocs = comm->size(); @@ -41,8 +39,8 @@ int Coll_allreduce_rab1::allreduce(const void *sbuff, void *rbuff, send_size = (count + nprocs) / nprocs; newcnt = send_size * nprocs; - recv = (void *) smpi_get_tmp_recvbuffer(extent * newcnt); - tmp_buf = (void *) smpi_get_tmp_sendbuffer(extent * newcnt); + unsigned char* recv = smpi_get_tmp_recvbuffer(extent * newcnt); + unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(extent * newcnt); memcpy(recv, sbuff, extent * count); @@ -57,18 +55,18 @@ int Coll_allreduce_rab1::allreduce(const void *sbuff, void *rbuff, else recv_idx = send_idx + (mask * share); - Request::sendrecv((char *) recv + send_idx * extent, send_cnt, dtype, dst, tag, - tmp_buf, recv_cnt, dtype, dst, tag, comm, &status); + Request::sendrecv(recv + send_idx * extent, send_cnt, dtype, dst, tag, tmp_buf, recv_cnt, dtype, dst, tag, comm, + &status); - if(op!=MPI_OP_NULL) op->apply( tmp_buf, (char *) recv + recv_idx * extent, &recv_cnt, - dtype); + if (op != MPI_OP_NULL) + op->apply(tmp_buf, recv + recv_idx * extent, &recv_cnt, dtype); // update send_idx for next iteration send_idx = recv_idx; mask >>= 1; } - memcpy(tmp_buf, (char *) recv + recv_idx * extent, recv_cnt * extent); + memcpy(tmp_buf, recv + recv_idx * extent, recv_cnt * extent); Colls::allgather(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm); memcpy(rbuff, recv, count * extent); @@ -78,7 +76,7 @@ int Coll_allreduce_rab1::allreduce(const void *sbuff, void *rbuff, } else { - tmp_buf = (void *) smpi_get_tmp_sendbuffer(extent * count); + unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(extent * count); memcpy(rbuff, sbuff, count * extent); mask = pof2 / 2; share = count / pof2; diff --git a/src/smpi/colls/allreduce/allreduce-rab2.cpp b/src/smpi/colls/allreduce/allreduce-rab2.cpp index 1381fa55ed..ae9f3a20ac 100644 --- a/src/smpi/colls/allreduce/allreduce-rab2.cpp +++ b/src/smpi/colls/allreduce/allreduce-rab2.cpp @@ -41,9 +41,9 @@ int Coll_allreduce_rab2::allreduce(const void *sbuff, void *rbuff, send_size = (count + nprocs) / nprocs; nbytes = send_size * s_extent; - void* send = smpi_get_tmp_sendbuffer(s_extent * send_size * nprocs); - void* recv = smpi_get_tmp_recvbuffer(s_extent * send_size * nprocs); - void* tmp = smpi_get_tmp_sendbuffer(nbytes); + unsigned char* send = smpi_get_tmp_sendbuffer(s_extent * send_size * nprocs); + unsigned char* recv = smpi_get_tmp_recvbuffer(s_extent * send_size * nprocs); + unsigned char* tmp = smpi_get_tmp_sendbuffer(nbytes); memcpy(send, sbuff, s_extent * count); @@ -52,7 +52,8 @@ int Coll_allreduce_rab2::allreduce(const void *sbuff, void *rbuff, memcpy(tmp, recv, nbytes); for (i = 1, s_offset = nbytes; i < nprocs; i++, s_offset = i * nbytes) - if(op!=MPI_OP_NULL) op->apply( (char *) recv + s_offset, tmp, &send_size, dtype); + if (op != MPI_OP_NULL) + op->apply(recv + s_offset, tmp, &send_size, dtype); Colls::allgather(tmp, send_size, dtype, recv, send_size, dtype, comm); memcpy(rbuff, recv, count * s_extent); @@ -66,15 +67,15 @@ int Coll_allreduce_rab2::allreduce(const void *sbuff, void *rbuff, nbytes = send_size * s_extent; r_offset = rank * nbytes; - void* recv = smpi_get_tmp_recvbuffer(s_extent * send_size * nprocs); + unsigned char* recv = smpi_get_tmp_recvbuffer(s_extent * send_size * nprocs); Colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm); memcpy((char *) rbuff + r_offset, recv, nbytes); for (i = 1, s_offset = nbytes; i < nprocs; i++, s_offset = i * nbytes) - if(op!=MPI_OP_NULL) op->apply( (char *) recv + s_offset, (char *) rbuff + r_offset, - &send_size, dtype); + if (op != MPI_OP_NULL) + op->apply(recv + s_offset, static_cast(rbuff) + r_offset, &send_size, dtype); Colls::allgather((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size, dtype, comm); diff --git a/src/smpi/colls/allreduce/allreduce-rdb.cpp b/src/smpi/colls/allreduce/allreduce-rdb.cpp index 4afddb4381..6041a42946 100644 --- a/src/smpi/colls/allreduce/allreduce-rdb.cpp +++ b/src/smpi/colls/allreduce/allreduce-rdb.cpp @@ -15,7 +15,6 @@ int Coll_allreduce_rdb::allreduce(const void *sbuff, void *rbuff, int count, int mask, dst, pof2, newrank, rem, newdst; MPI_Aint extent, lb; MPI_Status status; - void *tmp_buf = NULL; /* #ifdef MPICH2_REDUCTION MPI_User_function * uop = MPIR_Op_table[op % 16 - 1]; @@ -30,7 +29,7 @@ int Coll_allreduce_rdb::allreduce(const void *sbuff, void *rbuff, int count, rank=comm->rank(); dtype->extent(&lb, &extent); - tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); + unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent); Request::sendrecv(sbuff, count, dtype, rank, 500, rbuff, count, dtype, rank, 500, comm, &status); diff --git a/src/smpi/colls/allreduce/allreduce-smp-binomial-pipeline.cpp b/src/smpi/colls/allreduce/allreduce-smp-binomial-pipeline.cpp index 7de6db78fb..d9156b3674 100644 --- a/src/smpi/colls/allreduce/allreduce-smp-binomial-pipeline.cpp +++ b/src/smpi/colls/allreduce/allreduce-smp-binomial-pipeline.cpp @@ -46,7 +46,6 @@ int Coll_allreduce_smp_binomial_pipeline::allreduce(const void *send_buf, MPI_Op op, MPI_Comm comm) { int comm_size, rank; - void *tmp_buf; int tag = COLL_TAG_ALLREDUCE; int mask, src, dst; MPI_Status status; @@ -62,7 +61,7 @@ int Coll_allreduce_smp_binomial_pipeline::allreduce(const void *send_buf, rank = comm->rank(); MPI_Aint extent; extent = dtype->get_extent(); - tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); + unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent); int intra_rank, inter_rank; intra_rank = rank % num_core; diff --git a/src/smpi/colls/allreduce/allreduce-smp-binomial.cpp b/src/smpi/colls/allreduce/allreduce-smp-binomial.cpp index 3b1c401687..16f60d6eec 100644 --- a/src/smpi/colls/allreduce/allreduce-smp-binomial.cpp +++ b/src/smpi/colls/allreduce/allreduce-smp-binomial.cpp @@ -34,7 +34,6 @@ int Coll_allreduce_smp_binomial::allreduce(const void *send_buf, void *recv_buf, MPI_Op op, MPI_Comm comm) { int comm_size, rank; - void *tmp_buf; int tag = COLL_TAG_ALLREDUCE; int mask, src, dst; @@ -51,7 +50,7 @@ int Coll_allreduce_smp_binomial::allreduce(const void *send_buf, void *recv_buf, rank=comm->rank(); MPI_Aint extent, lb; dtype->extent(&lb, &extent); - tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); + unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent); /* compute intra and inter ranking */ int intra_rank, inter_rank; diff --git a/src/smpi/colls/allreduce/allreduce-smp-rdb.cpp b/src/smpi/colls/allreduce/allreduce-smp-rdb.cpp index a9a573941b..1e7504c957 100644 --- a/src/smpi/colls/allreduce/allreduce-smp-rdb.cpp +++ b/src/smpi/colls/allreduce/allreduce-smp-rdb.cpp @@ -33,7 +33,6 @@ int Coll_allreduce_smp_rdb::allreduce(const void *send_buf, void *recv_buf, int MPI_Comm comm) { int comm_size, rank; - void *tmp_buf; int tag = COLL_TAG_ALLREDUCE; int mask, src, dst; MPI_Status status; @@ -58,7 +57,7 @@ int Coll_allreduce_smp_rdb::allreduce(const void *send_buf, void *recv_buf, int rank = comm->rank(); MPI_Aint extent; extent = dtype->get_extent(); - tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); + unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent); /* compute intra and inter ranking */ int intra_rank, inter_rank; diff --git a/src/smpi/colls/allreduce/allreduce-smp-rsag-lr.cpp b/src/smpi/colls/allreduce/allreduce-smp-rsag-lr.cpp index 9babeb9e94..fd5718f7b9 100644 --- a/src/smpi/colls/allreduce/allreduce-smp-rsag-lr.cpp +++ b/src/smpi/colls/allreduce/allreduce-smp-rsag-lr.cpp @@ -21,7 +21,6 @@ int Coll_allreduce_smp_rsag_lr::allreduce(const void *send_buf, void *recv_buf, MPI_Op op, MPI_Comm comm) { int comm_size, rank; - void *tmp_buf; int tag = COLL_TAG_ALLREDUCE; int mask, src, dst; MPI_Status status; @@ -46,7 +45,7 @@ int Coll_allreduce_smp_rsag_lr::allreduce(const void *send_buf, void *recv_buf, rank = comm->rank(); MPI_Aint extent; extent = dtype->get_extent(); - tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); + unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent); int intra_rank, inter_rank; intra_rank = rank % num_core; diff --git a/src/smpi/colls/allreduce/allreduce-smp-rsag-rab.cpp b/src/smpi/colls/allreduce/allreduce-smp-rsag-rab.cpp index 0b20bf2ca0..ad9c9bdfc9 100644 --- a/src/smpi/colls/allreduce/allreduce-smp-rsag-rab.cpp +++ b/src/smpi/colls/allreduce/allreduce-smp-rsag-rab.cpp @@ -25,7 +25,6 @@ int Coll_allreduce_smp_rsag_rab::allreduce(const void *sbuf, void *rbuf, int cou MPI_Comm comm) { int comm_size, rank; - void *tmp_buf; int tag = COLL_TAG_ALLREDUCE; int mask, src, dst; MPI_Status status; @@ -45,7 +44,7 @@ int Coll_allreduce_smp_rsag_rab::allreduce(const void *sbuf, void *rbuf, int cou rank = comm->rank(); MPI_Aint extent; extent = dtype->get_extent(); - tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); + unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent); int intra_rank, inter_rank; intra_rank = rank % num_core; diff --git a/src/smpi/colls/allreduce/allreduce-smp-rsag.cpp b/src/smpi/colls/allreduce/allreduce-smp-rsag.cpp index 1be58c92c8..06d5e4176a 100644 --- a/src/smpi/colls/allreduce/allreduce-smp-rsag.cpp +++ b/src/smpi/colls/allreduce/allreduce-smp-rsag.cpp @@ -20,7 +20,6 @@ int Coll_allreduce_smp_rsag::allreduce(const void *send_buf, void *recv_buf, MPI_Comm comm) { int comm_size, rank; - void *tmp_buf; int tag = COLL_TAG_ALLREDUCE; int mask, src, dst; MPI_Status status; @@ -45,7 +44,7 @@ int Coll_allreduce_smp_rsag::allreduce(const void *send_buf, void *recv_buf, rank = comm->rank(); MPI_Aint extent; extent = dtype->get_extent(); - tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); + unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent); int intra_rank, inter_rank; intra_rank = rank % num_core; diff --git a/src/smpi/colls/alltoall/alltoall-2dmesh.cpp b/src/smpi/colls/alltoall/alltoall-2dmesh.cpp index 41c2cef9d5..f9c7f09b36 100644 --- a/src/smpi/colls/alltoall/alltoall-2dmesh.cpp +++ b/src/smpi/colls/alltoall/alltoall-2dmesh.cpp @@ -63,7 +63,6 @@ int Coll_alltoall_2dmesh::alltoall(const void *send_buff, int send_count, MPI_Status s; MPI_Aint extent; - char *tmp_buff1, *tmp_buff2; int i, j, src, dst, rank, num_procs, count, num_reqs; int X, Y, send_offset, recv_offset; int my_row_base, my_col_base, src_row_base, block_size; @@ -81,8 +80,8 @@ int Coll_alltoall_2dmesh::alltoall(const void *send_buff, int send_count, block_size = extent * send_count; - tmp_buff1 = (char *) smpi_get_tmp_sendbuffer(block_size * num_procs * Y); - tmp_buff2 = (char *) smpi_get_tmp_recvbuffer(block_size * Y); + unsigned char* tmp_buff1 = smpi_get_tmp_sendbuffer(block_size * num_procs * Y); + unsigned char* tmp_buff2 = smpi_get_tmp_recvbuffer(block_size * Y); num_reqs = X; if (Y > X) diff --git a/src/smpi/colls/alltoall/alltoall-3dmesh.cpp b/src/smpi/colls/alltoall/alltoall-3dmesh.cpp index 58f8afea5b..24f9def3b9 100644 --- a/src/smpi/colls/alltoall/alltoall-3dmesh.cpp +++ b/src/smpi/colls/alltoall/alltoall-3dmesh.cpp @@ -58,8 +58,6 @@ int Coll_alltoall_3dmesh::alltoall(const void *send_buff, int send_count, int my_z, two_dsize, my_row_base, my_col_base, my_z_base, src_row_base; int src_z_base, send_offset, recv_offset, tag = COLL_TAG_ALLTOALL; - char *tmp_buff1, *tmp_buff2; - rank = comm->rank(); num_procs = comm->size(); extent = send_type->get_extent(); @@ -82,8 +80,8 @@ int Coll_alltoall_3dmesh::alltoall(const void *send_buff, int send_count, block_size = extent * send_count; - tmp_buff1 = (char *) smpi_get_tmp_sendbuffer(block_size * num_procs * two_dsize); - tmp_buff2 = (char *) smpi_get_tmp_recvbuffer(block_size * two_dsize); + unsigned char* tmp_buff1 = smpi_get_tmp_sendbuffer(block_size * num_procs * two_dsize); + unsigned char* tmp_buff2 = smpi_get_tmp_recvbuffer(block_size * two_dsize); MPI_Status* statuses = new MPI_Status[num_reqs]; MPI_Request* reqs = new MPI_Request[num_reqs]; diff --git a/src/smpi/colls/alltoall/alltoall-bruck.cpp b/src/smpi/colls/alltoall/alltoall-bruck.cpp index 9c2f5bcd46..cd524443e1 100644 --- a/src/smpi/colls/alltoall/alltoall-bruck.cpp +++ b/src/smpi/colls/alltoall/alltoall-bruck.cpp @@ -44,8 +44,6 @@ Coll_alltoall_bruck::alltoall(const void *send_buff, int send_count, int i, src, dst, rank, num_procs, count, block, position; int pack_size, tag = COLL_TAG_ALLTOALL, pof2 = 1; - - char *tmp_buff; char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; @@ -54,7 +52,7 @@ Coll_alltoall_bruck::alltoall(const void *send_buff, int send_count, extent = recv_type->get_extent(); - tmp_buff = (char *) smpi_get_tmp_sendbuffer(num_procs * recv_count * extent); + unsigned char* tmp_buff = smpi_get_tmp_sendbuffer(num_procs * recv_count * extent); int* disps = new int[num_procs]; int* blocks_length = new int[num_procs]; diff --git a/src/smpi/colls/alltoall/alltoall-rdb.cpp b/src/smpi/colls/alltoall/alltoall-rdb.cpp index 24618457fd..f2a0c32e89 100644 --- a/src/smpi/colls/alltoall/alltoall-rdb.cpp +++ b/src/smpi/colls/alltoall/alltoall-rdb.cpp @@ -44,7 +44,6 @@ int Coll_alltoall_rdb::alltoall(const void *send_buff, int send_count, int last_recv_count = 0, tmp_mask, tree_root, num_procs_completed; int tag = COLL_TAG_ALLTOALL, mask = 1, i = 0; - char *tmp_buff; char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; @@ -59,7 +58,7 @@ int Coll_alltoall_rdb::alltoall(const void *send_buff, int send_count, max_size = num_procs * recv_increment; - tmp_buff = (char *) smpi_get_tmp_sendbuffer(max_size); + unsigned char* tmp_buff = smpi_get_tmp_sendbuffer(max_size); curr_size = send_count * num_procs; diff --git a/src/smpi/colls/bcast/bcast-mvapich-smp.cpp b/src/smpi/colls/bcast/bcast-mvapich-smp.cpp index d7f6a3bf53..8abda57aa6 100644 --- a/src/smpi/colls/bcast/bcast-mvapich-smp.cpp +++ b/src/smpi/colls/bcast/bcast-mvapich-smp.cpp @@ -254,7 +254,7 @@ int Coll_bcast_mvapich2_intra_node::bcast(void *buffer, size_t nbytes = 0; int is_homogeneous, is_contig; MPI_Aint type_size; - void *tmp_buf = NULL; + unsigned char* tmp_buf = nullptr; MPI_Comm shmem_comm; if (count == 0) @@ -316,7 +316,7 @@ int Coll_bcast_mvapich2_intra_node::bcast(void *buffer, ) { if (not is_contig || not is_homogeneous) { - tmp_buf = (void*)smpi_get_tmp_sendbuffer(nbytes); + tmp_buf = smpi_get_tmp_sendbuffer(nbytes); /* TODO: Pipeline the packing and communication */ // position = 0; diff --git a/src/smpi/colls/gather/gather-mvapich.cpp b/src/smpi/colls/gather/gather-mvapich.cpp index 48946ed8c3..adeb3ceb73 100644 --- a/src/smpi/colls/gather/gather-mvapich.cpp +++ b/src/smpi/colls/gather/gather-mvapich.cpp @@ -136,26 +136,25 @@ int Coll_gather_mvapich2_two_level::gather(const void *sendbuf, int root, MPI_Comm comm) { - void *leader_gather_buf = NULL; - int comm_size, rank; - int local_rank, local_size; - int leader_comm_rank = -1, leader_comm_size = 0; - int mpi_errno = MPI_SUCCESS; - int recvtype_size = 0, sendtype_size = 0, nbytes=0; - int leader_root, leader_of_root; - MPI_Status status; - MPI_Aint sendtype_extent = 0, recvtype_extent = 0; /* Datatype extent */ - MPI_Aint true_lb = 0, sendtype_true_extent = 0, recvtype_true_extent = 0; - MPI_Comm shmem_comm, leader_comm; - void* tmp_buf = NULL; - - - //if not set (use of the algo directly, without mvapich2 selector) - if(MV2_Gather_intra_node_function==NULL) - MV2_Gather_intra_node_function= Coll_gather_mpich::gather; - - if(comm->get_leaders_comm()==MPI_COMM_NULL){ - comm->init_smp(); + unsigned char* leader_gather_buf = NULL; + int comm_size, rank; + int local_rank, local_size; + int leader_comm_rank = -1, leader_comm_size = 0; + int mpi_errno = MPI_SUCCESS; + int recvtype_size = 0, sendtype_size = 0, nbytes = 0; + int leader_root, leader_of_root; + MPI_Status status; + MPI_Aint sendtype_extent = 0, recvtype_extent = 0; /* Datatype extent */ + MPI_Aint true_lb = 0, sendtype_true_extent = 0, recvtype_true_extent = 0; + MPI_Comm shmem_comm, leader_comm; + unsigned char* tmp_buf = NULL; + + // if not set (use of the algo directly, without mvapich2 selector) + if (MV2_Gather_intra_node_function == NULL) + MV2_Gather_intra_node_function = Coll_gather_mpich::gather; + + if (comm->get_leaders_comm() == MPI_COMM_NULL) { + comm->init_smp(); } comm_size = comm->size(); rank = comm->rank(); diff --git a/src/smpi/colls/gather/gather-ompi.cpp b/src/smpi/colls/gather/gather-ompi.cpp index 1546bd306c..812ec45d94 100644 --- a/src/smpi/colls/gather/gather-ompi.cpp +++ b/src/smpi/colls/gather/gather-ompi.cpp @@ -34,8 +34,9 @@ int Coll_gather_ompi_binomial::gather(const void* sbuf, int scount, MPI_Datatype int vrank; int size; int total_recv = 0; - char *ptmp = NULL; - char *tempbuf = NULL; + unsigned char* ptmp = nullptr; + unsigned char* tempbuf = nullptr; + const unsigned char* src_buf; int err; ompi_coll_tree_t* bmtree; MPI_Status status; @@ -63,7 +64,7 @@ int Coll_gather_ompi_binomial::gather(const void* sbuf, int scount, MPI_Datatype rdtype->extent(&rtrue_lb, &rtrue_extent); if (0 == root) { /* root on 0, just use the recv buffer */ - ptmp = (char*)rbuf; + ptmp = static_cast(rbuf); if (sbuf != MPI_IN_PLACE) { err = Datatype::copy(sbuf, scount, sdtype, ptmp, rcount, rdtype); if (MPI_SUCCESS != err) { @@ -74,7 +75,7 @@ int Coll_gather_ompi_binomial::gather(const void* sbuf, int scount, MPI_Datatype } else { /* root is not on 0, allocate temp buffer for recv, * rotate data at the end */ - tempbuf = (char*)smpi_get_tmp_recvbuffer(rtrue_extent + (rcount * size - 1) * rextent); + tempbuf = smpi_get_tmp_recvbuffer(rtrue_extent + (rcount * size - 1) * rextent); if (NULL == tempbuf) { err = MPI_ERR_OTHER; line = __LINE__; @@ -99,11 +100,12 @@ int Coll_gather_ompi_binomial::gather(const void* sbuf, int scount, MPI_Datatype } } total_recv = rcount; + src_buf = ptmp; } else if (!(vrank % 2)) { /* other non-leaf nodes, allocate temp buffer for data received from * children, the most we need is half of the total data elements due * to the property of binimoal tree */ - tempbuf = (char*)smpi_get_tmp_sendbuffer(strue_extent + (scount * size - 1) * sextent); + tempbuf = smpi_get_tmp_sendbuffer(strue_extent + (scount * size - 1) * sextent); if (NULL == tempbuf) { err = MPI_ERR_OTHER; line = __LINE__; @@ -124,11 +126,12 @@ int Coll_gather_ompi_binomial::gather(const void* sbuf, int scount, MPI_Datatype rcount = scount; rextent = sextent; total_recv = rcount; + src_buf = ptmp; } else { /* leaf nodes, no temp buffer needed, use sdtype,scount as * rdtype,rdcount since they are ignored on non-root procs */ - ptmp = (char*)sbuf; total_recv = scount; + src_buf = static_cast(sbuf); } if (!(vrank % 2)) { @@ -156,7 +159,7 @@ int Coll_gather_ompi_binomial::gather(const void* sbuf, int scount, MPI_Datatype /* all nodes except root send to parents */ XBT_DEBUG("smpi_coll_tuned_gather_ompi_binomial rank %d send %d count %d\n", rank, bmtree->tree_prev, total_recv); - Request::send(ptmp, total_recv, sdtype, bmtree->tree_prev, COLL_TAG_GATHER, comm); + Request::send(src_buf, total_recv, sdtype, bmtree->tree_prev, COLL_TAG_GATHER, comm); } if (rank == root) { if (root != 0) { diff --git a/src/smpi/colls/reduce/reduce-NTSL.cpp b/src/smpi/colls/reduce/reduce-NTSL.cpp index e8f6dec702..b341762d15 100644 --- a/src/smpi/colls/reduce/reduce-NTSL.cpp +++ b/src/smpi/colls/reduce/reduce-NTSL.cpp @@ -60,8 +60,7 @@ int Coll_reduce_NTSL::reduce(const void *buf, void *rbuf, int count, } */ - char *tmp_buf; - tmp_buf = (char *) smpi_get_tmp_sendbuffer(count * extent); + unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent); Request::sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank, tag, comm, &status); @@ -92,8 +91,7 @@ int Coll_reduce_NTSL::reduce(const void *buf, void *rbuf, int count, /* root recv data */ if (rank == root) { for (i = 0; i < pipe_length; i++) { - recv_request_array[i] = Request::irecv((char *) tmp_buf + (i * increment), segment, datatype, from, - (tag + i), comm); + recv_request_array[i] = Request::irecv(tmp_buf + (i * increment), segment, datatype, from, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { Request::wait(&recv_request_array[i], &status); @@ -114,8 +112,7 @@ int Coll_reduce_NTSL::reduce(const void *buf, void *rbuf, int count, /* intermediate nodes relay (receive, reduce, then send) data */ else { for (i = 0; i < pipe_length; i++) { - recv_request_array[i] = Request::irecv((char *) tmp_buf + (i * increment), segment, datatype, from, - (tag + i), comm); + recv_request_array[i] = Request::irecv(tmp_buf + (i * increment), segment, datatype, from, (tag + i), comm); } for (i = 0; i < pipe_length; i++) { Request::wait(&recv_request_array[i], &status); diff --git a/src/smpi/colls/reduce/reduce-arrival-pattern-aware.cpp b/src/smpi/colls/reduce/reduce-arrival-pattern-aware.cpp index 8f0b20260e..bd9906b1f2 100644 --- a/src/smpi/colls/reduce/reduce-arrival-pattern-aware.cpp +++ b/src/smpi/colls/reduce/reduce-arrival-pattern-aware.cpp @@ -68,8 +68,7 @@ int Coll_reduce_arrival_pattern_aware::reduce(const void *buf, void *rbuf, already_received[i] = 0; } - char *tmp_buf; - tmp_buf = (char *) smpi_get_tmp_sendbuffer(count * extent); + unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent); Request::sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank, tag, comm, &status); diff --git a/src/smpi/colls/reduce/reduce-binomial.cpp b/src/smpi/colls/reduce/reduce-binomial.cpp index df3bd42087..352523146e 100644 --- a/src/smpi/colls/reduce/reduce-binomial.cpp +++ b/src/smpi/colls/reduce/reduce-binomial.cpp @@ -20,7 +20,6 @@ int Coll_reduce_binomial::reduce(const void *sendbuf, void *recvbuf, int count, int dst; int tag = COLL_TAG_REDUCE; MPI_Aint extent; - void *tmp_buf; MPI_Aint true_lb, true_extent; if (count == 0) return 0; @@ -29,7 +28,7 @@ int Coll_reduce_binomial::reduce(const void *sendbuf, void *recvbuf, int count, extent = datatype->get_extent(); - tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); + unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(count * extent); int is_commutative = (op==MPI_OP_NULL || op->is_commutative()); mask = 1; @@ -43,7 +42,7 @@ int Coll_reduce_binomial::reduce(const void *sendbuf, void *recvbuf, int count, datatype->extent(&true_lb, &true_extent); /* adjust for potential negative lower bound in datatype */ - tmp_buf = (void *)((char*)tmp_buf - true_lb); + tmp_buf = tmp_buf - true_lb; /* If I'm not the root, then my recvbuf may not be valid, therefore I have to allocate a temporary one */ @@ -87,7 +86,7 @@ int Coll_reduce_binomial::reduce(const void *sendbuf, void *recvbuf, int count, } if (rank != root) { - smpi_free_tmp_buffer(recvbuf); + smpi_free_tmp_buffer(static_cast(recvbuf)); } smpi_free_tmp_buffer(tmp_buf); diff --git a/src/smpi/colls/reduce/reduce-flat-tree.cpp b/src/smpi/colls/reduce/reduce-flat-tree.cpp index ef3ae9c8cf..e9ffc8ab8d 100644 --- a/src/smpi/colls/reduce/reduce-flat-tree.cpp +++ b/src/smpi/colls/reduce/reduce-flat-tree.cpp @@ -17,8 +17,8 @@ Coll_reduce_flat_tree::reduce(const void *sbuf, void *rbuf, int count, int size; int rank; MPI_Aint extent; - char *origin = 0; - const char *inbuf; + unsigned char* origin = nullptr; + const unsigned char* inbuf; MPI_Status status; rank = comm->rank(); @@ -36,8 +36,7 @@ Coll_reduce_flat_tree::reduce(const void *sbuf, void *rbuf, int count, messages. */ if (size > 1) - origin = (char *) smpi_get_tmp_recvbuffer(count * extent); - + origin = smpi_get_tmp_recvbuffer(count * extent); /* Initialize the receive buffer. */ if (rank == (size - 1)) @@ -50,7 +49,7 @@ Coll_reduce_flat_tree::reduce(const void *sbuf, void *rbuf, int count, for (i = size - 2; i >= 0; --i) { if (rank == i) - inbuf = static_cast(sbuf); + inbuf = static_cast(sbuf); else { Request::recv(origin, count, dtype, i, tag, comm, &status); inbuf = origin; @@ -61,8 +60,7 @@ Coll_reduce_flat_tree::reduce(const void *sbuf, void *rbuf, int count, } - if (origin) - smpi_free_tmp_buffer(origin); + smpi_free_tmp_buffer(origin); /* All done */ return 0; diff --git a/src/smpi/colls/reduce/reduce-mvapich-knomial.cpp b/src/smpi/colls/reduce/reduce-mvapich-knomial.cpp index 02328fbef6..57069b03e0 100644 --- a/src/smpi/colls/reduce/reduce-mvapich-knomial.cpp +++ b/src/smpi/colls/reduce/reduce-mvapich-knomial.cpp @@ -173,11 +173,11 @@ int Coll_reduce_mvapich2_knomial::reduce ( &dst, &expected_send_count, &expected_recv_count, &src_array); if(expected_recv_count > 0 ) { - void** tmp_buf = new void*[expected_recv_count]; + unsigned char** tmp_buf = new unsigned char*[expected_recv_count]; MPI_Request* requests = new MPI_Request[expected_recv_count]; for (k = 0; k < expected_recv_count; k++) { tmp_buf[k] = smpi_get_tmp_sendbuffer(count * std::max(extent, true_extent)); - tmp_buf[k] = (void*)((char*)tmp_buf[k] - true_lb); + tmp_buf[k] = tmp_buf[k] - true_lb; } while(recv_iter < expected_recv_count) { @@ -217,7 +217,7 @@ int Coll_reduce_mvapich2_knomial::reduce ( Request::waitall(1, &send_request, &status); - smpi_free_tmp_buffer((void *)((char*)recvbuf + true_lb)); + smpi_free_tmp_buffer(static_cast(recvbuf) + true_lb); } /* --END ERROR HANDLING-- */ diff --git a/src/smpi/colls/reduce/reduce-mvapich-two-level.cpp b/src/smpi/colls/reduce/reduce-mvapich-two-level.cpp index e24faf618f..4d47d23432 100644 --- a/src/smpi/colls/reduce/reduce-mvapich-two-level.cpp +++ b/src/smpi/colls/reduce/reduce-mvapich-two-level.cpp @@ -88,7 +88,8 @@ int Coll_reduce_mvapich2_two_level::reduce( const void *sendbuf, int leader_comm_rank = -1, leader_comm_size = 0; MPI_Comm shmem_comm, leader_comm; int leader_root, leader_of_root; - void *in_buf = NULL, *out_buf = NULL, *tmp_buf = NULL; + const unsigned char* in_buf = nullptr; + unsigned char *out_buf = nullptr, *tmp_buf = nullptr; MPI_Aint true_lb, true_extent, extent; int is_commutative = 0, stride = 0; int intra_node_root=0; @@ -126,29 +127,29 @@ int Coll_reduce_mvapich2_two_level::reduce( const void *sendbuf, if (stride <= MV2_INTRA_SHMEM_REDUCE_MSG && is_commutative == 1) { if (local_rank == 0 ) { - tmp_buf = (void*)smpi_get_tmp_sendbuffer(count * std::max(extent, true_extent)); - tmp_buf = (void *) ((char *) tmp_buf - true_lb); + tmp_buf = smpi_get_tmp_sendbuffer(count * std::max(extent, true_extent)); + tmp_buf = tmp_buf - true_lb; } if (sendbuf != MPI_IN_PLACE) { - in_buf = (void *)sendbuf; + in_buf = static_cast(sendbuf); } else { - in_buf = recvbuf; + in_buf = static_cast(recvbuf); } if (local_rank == 0) { if( my_rank != root) { out_buf = tmp_buf; } else { - out_buf = recvbuf; - if(in_buf == out_buf) { - in_buf = MPI_IN_PLACE; - out_buf = recvbuf; + out_buf = static_cast(recvbuf); + if (in_buf == out_buf) { + in_buf = static_cast(MPI_IN_PLACE); + out_buf = static_cast(recvbuf); } } } else { - in_buf = (void *)sendbuf; - out_buf = NULL; + in_buf = static_cast(sendbuf); + out_buf = nullptr; } if (count * (std::max(extent, true_extent)) < SHMEM_COLL_BLOCK_SIZE) { @@ -177,8 +178,8 @@ int Coll_reduce_mvapich2_two_level::reduce( const void *sendbuf, root, comm); } /* We are done */ - if(tmp_buf!=NULL) - smpi_free_tmp_buffer((void *) ((char *) tmp_buf + true_lb)); + if (tmp_buf != nullptr) + smpi_free_tmp_buffer(tmp_buf + true_lb); goto fn_exit; } @@ -190,18 +191,18 @@ int Coll_reduce_mvapich2_two_level::reduce( const void *sendbuf, } leader_comm_size = leader_comm->size(); leader_comm_rank = leader_comm->rank(); - tmp_buf = (void*)smpi_get_tmp_sendbuffer(count * std::max(extent, true_extent)); - tmp_buf = (void *) ((char *) tmp_buf - true_lb); + tmp_buf = smpi_get_tmp_sendbuffer(count * std::max(extent, true_extent)); + tmp_buf = tmp_buf - true_lb; } if (sendbuf != MPI_IN_PLACE) { - in_buf = (void *)sendbuf; + in_buf = static_cast(sendbuf); } else { - in_buf = recvbuf; + in_buf = static_cast(recvbuf); } if (local_rank == 0) { - out_buf = tmp_buf; + out_buf = static_cast(tmp_buf); } else { - out_buf = NULL; + out_buf = nullptr; } @@ -228,8 +229,8 @@ int Coll_reduce_mvapich2_two_level::reduce( const void *sendbuf, intra_node_root, shmem_comm); } } else { - smpi_free_tmp_buffer((void *) ((char *) tmp_buf + true_lb)); - tmp_buf = in_buf; + smpi_free_tmp_buffer(tmp_buf + true_lb); + tmp_buf = (unsigned char*)in_buf; // xxx } /* Now work on the inter-leader phase. Data is in tmp_buf */ @@ -243,28 +244,26 @@ int Coll_reduce_mvapich2_two_level::reduce( const void *sendbuf, * root of the reduce op. So, I will write the * final result directly into my recvbuf */ if(tmp_buf != recvbuf) { - in_buf = tmp_buf; - out_buf = recvbuf; + in_buf = tmp_buf; + out_buf = static_cast(recvbuf); } else { - in_buf = (char *)smpi_get_tmp_sendbuffer(count* - datatype->get_extent()); - Datatype::copy(tmp_buf, count, datatype, - in_buf, count, datatype); - //in_buf = MPI_IN_PLACE; - out_buf = recvbuf; + unsigned char* buf = smpi_get_tmp_sendbuffer(count * datatype->get_extent()); + Datatype::copy(tmp_buf, count, datatype, buf, count, datatype); + // in_buf = MPI_IN_PLACE; + in_buf = buf; + out_buf = static_cast(recvbuf); } } else { - in_buf = (char *)smpi_get_tmp_sendbuffer(count* - datatype->get_extent()); - Datatype::copy(tmp_buf, count, datatype, - in_buf, count, datatype); - //in_buf = MPI_IN_PLACE; - out_buf = tmp_buf; + unsigned char* buf = smpi_get_tmp_sendbuffer(count * datatype->get_extent()); + Datatype::copy(tmp_buf, count, datatype, buf, count, datatype); + // in_buf = MPI_IN_PLACE; + in_buf = buf; + out_buf = tmp_buf; } } else { in_buf = tmp_buf; - out_buf = NULL; + out_buf = nullptr; } /* inter-leader communication */ @@ -275,20 +274,15 @@ int Coll_reduce_mvapich2_two_level::reduce( const void *sendbuf, } if (local_size > 1) { - /* Send the message to the root if the leader is not the - * root of the reduce operation. The reduced data is in tmp_buf */ - if ((local_rank == 0) && (root != my_rank) - && (leader_root == leader_comm_rank)) { - Request::send(tmp_buf, count, datatype, root, - COLL_TAG_REDUCE+1, comm); - } - if ((local_rank != 0) && (root == my_rank)) { - Request::recv(recvbuf, count, datatype, - leader_of_root, - COLL_TAG_REDUCE+1, comm, - MPI_STATUS_IGNORE); - } - smpi_free_tmp_buffer((void *) ((char *) tmp_buf + true_lb)); + /* Send the message to the root if the leader is not the + * root of the reduce operation. The reduced data is in tmp_buf */ + if ((local_rank == 0) && (root != my_rank) && (leader_root == leader_comm_rank)) { + Request::send(tmp_buf, count, datatype, root, COLL_TAG_REDUCE + 1, comm); + } + if ((local_rank != 0) && (root == my_rank)) { + Request::recv(recvbuf, count, datatype, leader_of_root, COLL_TAG_REDUCE + 1, comm, MPI_STATUS_IGNORE); + } + smpi_free_tmp_buffer(tmp_buf + true_lb); if (leader_comm_rank == leader_root) { if (my_rank != root || (my_rank == root && tmp_buf == recvbuf)) { diff --git a/src/smpi/colls/reduce/reduce-ompi.cpp b/src/smpi/colls/reduce/reduce-ompi.cpp index 98f6047afe..ce4d8d5f68 100644 --- a/src/smpi/colls/reduce/reduce-ompi.cpp +++ b/src/smpi/colls/reduce/reduce-ompi.cpp @@ -47,25 +47,25 @@ int smpi_coll_tuned_ompi_reduce_generic(const void* sendbuf, void* recvbuf, int ompi_coll_tree_t* tree, int count_by_segment, int max_outstanding_reqs ) { - char *inbuf[2] = {NULL, NULL}, *inbuf_free[2] = {NULL, NULL}; - char *accumbuf = NULL, *accumbuf_free = NULL; - char *local_op_buffer = NULL, *sendtmpbuf = NULL; - ptrdiff_t extent, lower_bound, segment_increment; - MPI_Request reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL}; - int num_segments, line, ret, segindex, i, rank; - int recvcount, prevcount, inbi; - - /** - * Determine number of segments and number of elements - * sent per operation - */ - datatype->extent(&lower_bound, &extent); - num_segments = (original_count + count_by_segment - 1) / count_by_segment; - segment_increment = count_by_segment * extent; - - sendtmpbuf = (char*) sendbuf; - if( sendbuf == MPI_IN_PLACE ) { - sendtmpbuf = (char *)recvbuf; + unsigned char *inbuf[2] = {nullptr, nullptr}, *inbuf_free[2] = {nullptr, nullptr}; + unsigned char *accumbuf = nullptr, *accumbuf_free = nullptr; + const unsigned char *local_op_buffer = nullptr, *sendtmpbuf = nullptr; + ptrdiff_t extent, lower_bound, segment_increment; + MPI_Request reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL}; + int num_segments, line, ret, segindex, i, rank; + int recvcount, prevcount, inbi; + + /** + * Determine number of segments and number of elements + * sent per operation + */ + datatype->extent(&lower_bound, &extent); + num_segments = (original_count + count_by_segment - 1) / count_by_segment; + segment_increment = count_by_segment * extent; + + sendtmpbuf = static_cast(sendbuf); + if (sendbuf == MPI_IN_PLACE) { + sendtmpbuf = static_cast(recvbuf); } XBT_DEBUG("coll:tuned:reduce_generic count %d, msg size %lu, segsize %lu, max_requests %d", original_count, @@ -82,35 +82,40 @@ int smpi_coll_tuned_ompi_reduce_generic(const void* sendbuf, void* recvbuf, int /* handle non existant recv buffer (i.e. its NULL) and protect the recv buffer on non-root nodes */ - accumbuf = (char*)recvbuf; - if( (NULL == accumbuf) || (root != rank) ) { - /* Allocate temporary accumulator buffer. */ - accumbuf_free = (char*)smpi_get_tmp_sendbuffer(true_extent + - (original_count - 1) * extent); - if (accumbuf_free == NULL) { - line = __LINE__; ret = -1; goto error_hndl; - } - accumbuf = accumbuf_free - lower_bound; + accumbuf = static_cast(recvbuf); + if (nullptr == accumbuf || root != rank) { + /* Allocate temporary accumulator buffer. */ + accumbuf_free = smpi_get_tmp_sendbuffer(true_extent + (original_count - 1) * extent); + if (accumbuf_free == nullptr) { + line = __LINE__; + ret = -1; + goto error_hndl; + } + accumbuf = accumbuf_free - lower_bound; } /* If this is a non-commutative operation we must copy sendbuf to the accumbuf, in order to simplfy the loops */ if ((op != MPI_OP_NULL && not op->is_commutative())) { - Datatype::copy((char*)sendtmpbuf, original_count, datatype, (char*)accumbuf, original_count, datatype); + Datatype::copy(sendtmpbuf, original_count, datatype, accumbuf, original_count, datatype); } /* Allocate two buffers for incoming segments */ real_segment_size = true_extent + (count_by_segment - 1) * extent; - inbuf_free[0] = (char*) smpi_get_tmp_recvbuffer(real_segment_size); - if( inbuf_free[0] == NULL ) { - line = __LINE__; ret = -1; goto error_hndl; + inbuf_free[0] = smpi_get_tmp_recvbuffer(real_segment_size); + if (inbuf_free[0] == nullptr) { + line = __LINE__; + ret = -1; + goto error_hndl; } inbuf[0] = inbuf_free[0] - lower_bound; /* if there is chance to overlap communication - allocate second buffer */ if( (num_segments > 1) || (tree->tree_nextsize > 1) ) { - inbuf_free[1] = (char*) smpi_get_tmp_recvbuffer(real_segment_size); - if( inbuf_free[1] == NULL ) { - line = __LINE__; ret = -1; goto error_hndl; + inbuf_free[1] = smpi_get_tmp_recvbuffer(real_segment_size); + if (inbuf_free[1] == nullptr) { + line = __LINE__; + ret = -1; + goto error_hndl; } inbuf[1] = inbuf_free[1] - lower_bound; } @@ -505,8 +510,8 @@ int Coll_reduce_ompi_in_order_binary::reduce(const void *sendbuf, void *recvbuf, io_root = size - 1; const void* use_this_sendbuf = sendbuf; void* use_this_recvbuf = recvbuf; - void* tmp_sendbuf = nullptr; - void* tmp_recvbuf = nullptr; + unsigned char* tmp_sendbuf = nullptr; + unsigned char* tmp_recvbuf = nullptr; if (io_root != root) { ptrdiff_t text, ext; @@ -590,10 +595,10 @@ Coll_reduce_ompi_basic_linear::reduce(const void *sbuf, void *rbuf, int count, { int i, rank, size; ptrdiff_t true_extent, lb, extent; - char *free_buffer = NULL; - char *pml_buffer = NULL; - char *inplace_temp = NULL; - char *inbuf; + unsigned char* free_buffer = nullptr; + unsigned char* pml_buffer = nullptr; + unsigned char* inplace_temp = nullptr; + const unsigned char* inbuf; /* Initialize */ @@ -620,16 +625,16 @@ Coll_reduce_ompi_basic_linear::reduce(const void *sbuf, void *rbuf, int count, if (MPI_IN_PLACE == sbuf) { sbuf = rbuf; - inplace_temp = (char*)smpi_get_tmp_recvbuffer(true_extent + (count - 1) * extent); - if (NULL == inplace_temp) { - return -1; + inplace_temp = smpi_get_tmp_recvbuffer(true_extent + (count - 1) * extent); + if (nullptr == inplace_temp) { + return -1; } rbuf = inplace_temp - lb; } if (size > 1) { - free_buffer = (char*)smpi_get_tmp_recvbuffer(true_extent + (count - 1) * extent); - pml_buffer = free_buffer - lb; + free_buffer = smpi_get_tmp_recvbuffer(true_extent + (count - 1) * extent); + pml_buffer = free_buffer - lb; } /* Initialize the receive buffer. */ @@ -646,7 +651,7 @@ Coll_reduce_ompi_basic_linear::reduce(const void *sbuf, void *rbuf, int count, for (i = size - 2; i >= 0; --i) { if (rank == i) { - inbuf = (char*)sbuf; + inbuf = static_cast(sbuf); } else { Request::recv(pml_buffer, count, dtype, i, COLL_TAG_REDUCE, comm, @@ -658,13 +663,12 @@ Coll_reduce_ompi_basic_linear::reduce(const void *sbuf, void *rbuf, int count, if(op!=MPI_OP_NULL) op->apply( inbuf, rbuf, &count, dtype); } - if (NULL != inplace_temp) { - Datatype::copy(inplace_temp, count, dtype,(char*)sbuf - ,count , dtype); - smpi_free_tmp_buffer(inplace_temp); + if (nullptr != inplace_temp) { + Datatype::copy(inplace_temp, count, dtype, (char*)sbuf, count, dtype); + smpi_free_tmp_buffer(inplace_temp); } - if (NULL != free_buffer) { - smpi_free_tmp_buffer(free_buffer); + if (nullptr != free_buffer) { + smpi_free_tmp_buffer(free_buffer); } /* All done */ diff --git a/src/smpi/colls/reduce/reduce-scatter-gather.cpp b/src/smpi/colls/reduce/reduce-scatter-gather.cpp index 07eb4bdd30..2c2eeada1c 100644 --- a/src/smpi/colls/reduce/reduce-scatter-gather.cpp +++ b/src/smpi/colls/reduce/reduce-scatter-gather.cpp @@ -23,7 +23,7 @@ int Coll_reduce_scatter_gather::reduce(const void *sendbuf, void *recvbuf, int dst, send_cnt, recv_cnt, newroot, newdst_tree_root; int newroot_tree_root, new_count; int tag = COLL_TAG_REDUCE,temporary_buffer=0; - void *send_ptr, *recv_ptr, *tmp_buf; + unsigned char *send_ptr, *recv_ptr, *tmp_buf; cnts = NULL; disps = NULL; @@ -52,9 +52,9 @@ int Coll_reduce_scatter_gather::reduce(const void *sendbuf, void *recvbuf, if (count < comm_size) { new_count = comm_size; - send_ptr = (void *) smpi_get_tmp_sendbuffer(new_count * extent); - recv_ptr = (void *) smpi_get_tmp_recvbuffer(new_count * extent); - tmp_buf = (void *) smpi_get_tmp_sendbuffer(new_count * extent); + send_ptr = smpi_get_tmp_sendbuffer(new_count * extent); + recv_ptr = smpi_get_tmp_recvbuffer(new_count * extent); + tmp_buf = smpi_get_tmp_sendbuffer(new_count * extent); memcpy(send_ptr, sendbuf != MPI_IN_PLACE ? sendbuf : recvbuf, extent * count); //if ((rank != root)) @@ -111,20 +111,14 @@ int Coll_reduce_scatter_gather::reduce(const void *sendbuf, void *recvbuf, } /* Send data from recvbuf. Recv into tmp_buf */ - Request::sendrecv((char *) recv_ptr + - disps[send_idx] * extent, - send_cnt, datatype, - dst, tag, - (char *) tmp_buf + - disps[recv_idx] * extent, - recv_cnt, datatype, dst, tag, comm, &status); + Request::sendrecv(recv_ptr + disps[send_idx] * extent, send_cnt, datatype, dst, tag, + tmp_buf + disps[recv_idx] * extent, recv_cnt, datatype, dst, tag, comm, &status); /* tmp_buf contains data received in this step. recvbuf contains data accumulated so far */ - if(op!=MPI_OP_NULL) op->apply( (char *) tmp_buf + disps[recv_idx] * extent, - (char *) recv_ptr + disps[recv_idx] * extent, - &recv_cnt, datatype); + if (op != MPI_OP_NULL) + op->apply(tmp_buf + disps[recv_idx] * extent, recv_ptr + disps[recv_idx] * extent, &recv_cnt, datatype); /* update send_idx for next iteration */ send_idx = recv_idx; @@ -207,14 +201,10 @@ int Coll_reduce_scatter_gather::reduce(const void *sendbuf, void *recvbuf, } if (newdst_tree_root == newroot_tree_root) { - Request::send((char *) recv_ptr + - disps[send_idx] * extent, - send_cnt, datatype, dst, tag, comm); + Request::send(recv_ptr + disps[send_idx] * extent, send_cnt, datatype, dst, tag, comm); break; } else { - Request::recv((char *) recv_ptr + - disps[recv_idx] * extent, - recv_cnt, datatype, dst, tag, comm, &status); + Request::recv(recv_ptr + disps[recv_idx] * extent, recv_cnt, datatype, dst, tag, comm, &status); } if (newrank > newdst) @@ -231,7 +221,7 @@ int Coll_reduce_scatter_gather::reduce(const void *sendbuf, void *recvbuf, else /* (count >= comm_size) */ { - tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); + tmp_buf = smpi_get_tmp_sendbuffer(count * extent); //if ((rank != root)) Request::sendrecv(sendbuf != MPI_IN_PLACE ? sendbuf : recvbuf, count, datatype, rank, tag, @@ -288,20 +278,15 @@ int Coll_reduce_scatter_gather::reduce(const void *sendbuf, void *recvbuf, } /* Send data from recvbuf. Recv into tmp_buf */ - Request::sendrecv((char *) recvbuf + - disps[send_idx] * extent, - send_cnt, datatype, - dst, tag, - (char *) tmp_buf + - disps[recv_idx] * extent, - recv_cnt, datatype, dst, tag, comm, &status); + Request::sendrecv(static_cast(recvbuf) + disps[send_idx] * extent, send_cnt, datatype, dst, tag, + tmp_buf + disps[recv_idx] * extent, recv_cnt, datatype, dst, tag, comm, &status); /* tmp_buf contains data received in this step. recvbuf contains data accumulated so far */ - if(op!=MPI_OP_NULL) op->apply( (char *) tmp_buf + disps[recv_idx] * extent, - (char *) recvbuf + disps[recv_idx] * extent, - &recv_cnt, datatype); + if (op != MPI_OP_NULL) + op->apply(tmp_buf + disps[recv_idx] * extent, static_cast(recvbuf) + disps[recv_idx] * extent, + &recv_cnt, datatype); /* update send_idx for next iteration */ send_idx = recv_idx; @@ -403,7 +388,8 @@ int Coll_reduce_scatter_gather::reduce(const void *sendbuf, void *recvbuf, } if (tmp_buf) smpi_free_tmp_buffer(tmp_buf); - if(temporary_buffer==1) smpi_free_tmp_buffer(recvbuf); + if (temporary_buffer == 1) + smpi_free_tmp_buffer(static_cast(recvbuf)); delete[] cnts; delete[] disps; diff --git a/src/smpi/colls/reduce_scatter/reduce_scatter-mpich.cpp b/src/smpi/colls/reduce_scatter/reduce_scatter-mpich.cpp index 74bc22566a..3679390852 100644 --- a/src/smpi/colls/reduce_scatter/reduce_scatter-mpich.cpp +++ b/src/smpi/colls/reduce_scatter/reduce_scatter-mpich.cpp @@ -29,7 +29,7 @@ int Coll_reduce_scatter_mpich_pair::reduce_scatter(const void *sendbuf, void *re { int rank, comm_size, i; MPI_Aint extent, true_extent, true_lb; - void *tmp_recvbuf; + unsigned char* tmp_recvbuf; int mpi_errno = MPI_SUCCESS; int total_count, dst, src; int is_commutative; @@ -64,9 +64,9 @@ int Coll_reduce_scatter_mpich_pair::reduce_scatter(const void *sendbuf, void *re } /* allocate temporary buffer to store incoming data */ - tmp_recvbuf = (void*)smpi_get_tmp_recvbuffer(recvcounts[rank] * std::max(true_extent, extent) + 1); + tmp_recvbuf = smpi_get_tmp_recvbuffer(recvcounts[rank] * std::max(true_extent, extent) + 1); /* adjust for potential negative lower bound in datatype */ - tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb); + tmp_recvbuf = tmp_recvbuf - true_lb; for (i=1; iextent(&true_lb, &true_extent); @@ -182,21 +182,24 @@ int Coll_reduce_scatter_mpich_noncomm::reduce_scatter(const void *sendbuf, void block_size = recvcounts[0]; total_count = block_size * comm_size; - tmp_buf0=( void *)smpi_get_tmp_sendbuffer( true_extent * total_count); - tmp_buf1=( void *)smpi_get_tmp_recvbuffer( true_extent * total_count); - void *tmp_buf0_save=tmp_buf0; - void *tmp_buf1_save=tmp_buf1; + tmp_buf0 = smpi_get_tmp_sendbuffer(true_extent * total_count); + tmp_buf1 = smpi_get_tmp_recvbuffer(true_extent * total_count); + unsigned char* tmp_buf0_save = tmp_buf0; + unsigned char* tmp_buf1_save = tmp_buf1; /* adjust for potential negative lower bound in datatype */ - tmp_buf0 = (void *)((char*)tmp_buf0 - true_lb); - tmp_buf1 = (void *)((char*)tmp_buf1 - true_lb); + tmp_buf0 = tmp_buf0 - true_lb; + tmp_buf1 = tmp_buf1 - true_lb; /* Copy our send data to tmp_buf0. We do this one block at a time and permute the blocks as we go according to the mirror permutation. */ for (i = 0; i < comm_size; ++i) { - mpi_errno = Datatype::copy((char *)(sendbuf == MPI_IN_PLACE ? recvbuf : sendbuf) + (i * true_extent * block_size), block_size, datatype, - (char *)tmp_buf0 + (MPIU_Mirror_permutation(i, log2_comm_size) * true_extent * block_size), block_size, datatype); - if (mpi_errno) return(mpi_errno); + mpi_errno = Datatype::copy( + static_cast(sendbuf == MPI_IN_PLACE ? recvbuf : sendbuf) + (i * true_extent * block_size), block_size, + datatype, tmp_buf0 + (MPIU_Mirror_permutation(i, log2_comm_size) * true_extent * block_size), block_size, + datatype); + if (mpi_errno) + return mpi_errno; } buf0_was_inout = 1; @@ -205,8 +208,8 @@ int Coll_reduce_scatter_mpich_noncomm::reduce_scatter(const void *sendbuf, void size = total_count; for (k = 0; k < log2_comm_size; ++k) { /* use a double-buffering scheme to avoid local copies */ - char *incoming_data = static_cast(buf0_was_inout ? tmp_buf1 : tmp_buf0); - char *outgoing_data = static_cast(buf0_was_inout ? tmp_buf0 : tmp_buf1); + unsigned char* incoming_data = buf0_was_inout ? tmp_buf1 : tmp_buf0; + unsigned char* outgoing_data = buf0_was_inout ? tmp_buf0 : tmp_buf1; int peer = rank ^ (0x1 << k); size /= 2; @@ -250,7 +253,7 @@ int Coll_reduce_scatter_mpich_noncomm::reduce_scatter(const void *sendbuf, void xbt_assert(size == recvcounts[rank]); /* copy the reduced data to the recvbuf */ - result_ptr = (char *)(buf0_was_inout ? tmp_buf0 : tmp_buf1) + recv_offset * true_extent; + result_ptr = (buf0_was_inout ? tmp_buf0 : tmp_buf1) + recv_offset * true_extent; mpi_errno = Datatype::copy(result_ptr, size, datatype, recvbuf, size, datatype); smpi_free_tmp_buffer(tmp_buf0_save); @@ -266,7 +269,6 @@ int Coll_reduce_scatter_mpich_rdb::reduce_scatter(const void *sendbuf, void *rec { int rank, comm_size, i; MPI_Aint extent, true_extent, true_lb; - void *tmp_recvbuf, *tmp_results; int mpi_errno = MPI_SUCCESS; int dis[2], blklens[2], total_count, dst; int mask, dst_tree_root, my_tree_root, j, k; @@ -294,192 +296,178 @@ int Coll_reduce_scatter_mpich_rdb::reduce_scatter(const void *sendbuf, void *rec /* noncommutative and (non-pof2 or block irregular), use recursive doubling. */ /* need to allocate temporary buffer to receive incoming data*/ - tmp_recvbuf= (void*)smpi_get_tmp_recvbuffer(total_count * std::max(true_extent, extent)); - /* adjust for potential negative lower bound in datatype */ - tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb); - - /* need to allocate another temporary buffer to accumulate - results */ - tmp_results = (void*)smpi_get_tmp_sendbuffer(total_count * std::max(true_extent, extent)); - /* adjust for potential negative lower bound in datatype */ - tmp_results = (void *)((char*)tmp_results - true_lb); - - /* copy sendbuf into tmp_results */ - if (sendbuf != MPI_IN_PLACE) - mpi_errno = Datatype::copy(sendbuf, total_count, datatype, - tmp_results, total_count, datatype); - else - mpi_errno = Datatype::copy(recvbuf, total_count, datatype, - tmp_results, total_count, datatype); - - if (mpi_errno) return(mpi_errno); - - mask = 0x1; - i = 0; - while (mask < comm_size) { - dst = rank ^ mask; - - dst_tree_root = dst >> i; - dst_tree_root <<= i; - - my_tree_root = rank >> i; - my_tree_root <<= i; - - /* At step 1, processes exchange (n-n/p) amount of - data; at step 2, (n-2n/p) amount of data; at step 3, (n-4n/p) - amount of data, and so forth. We use derived datatypes for this. - - At each step, a process does not need to send data - indexed from my_tree_root to - my_tree_root+mask-1. Similarly, a process won't receive - data indexed from dst_tree_root to dst_tree_root+mask-1. */ - - /* calculate sendtype */ - blklens[0] = blklens[1] = 0; - for (j=0; jcommit(); - - /* calculate recvtype */ - blklens[0] = blklens[1] = 0; - for (j=0; jcommit(); - - received = 0; - if (dst < comm_size) { - /* tmp_results contains data to be sent in each step. Data is - received in tmp_recvbuf and then accumulated into - tmp_results. accumulation is done later below. */ - - Request::sendrecv(tmp_results, 1, sendtype, dst, - COLL_TAG_SCATTER, - tmp_recvbuf, 1, recvtype, dst, - COLL_TAG_SCATTER, comm, - MPI_STATUS_IGNORE); - received = 1; - } - - /* if some processes in this process's subtree in this step - did not have any destination process to communicate with - because of non-power-of-two, we need to send them the - result. We use a logarithmic recursive-halfing algorithm - for this. */ - - if (dst_tree_root + mask > comm_size) { - nprocs_completed = comm_size - my_tree_root - mask; - /* nprocs_completed is the number of processes in this - subtree that have all the data. Send data to others - in a tree fashion. First find root of current tree - that is being divided into two. k is the number of - least-significant bits in this process's rank that - must be zeroed out to find the rank of the root */ - j = mask; - k = 0; - while (j) { - j >>= 1; - k++; - } - k--; - - tmp_mask = mask >> 1; - while (tmp_mask) { - dst = rank ^ tmp_mask; - - tree_root = rank >> k; - tree_root <<= k; - - /* send only if this proc has data and destination - doesn't have data. at any step, multiple processes - can send if they have the data */ - if ((dst > rank) && - (rank < tree_root + nprocs_completed) - && (dst >= tree_root + nprocs_completed)) { - /* send the current result */ - Request::send(tmp_recvbuf, 1, recvtype, - dst, COLL_TAG_SCATTER, - comm); - } - /* recv only if this proc. doesn't have data and sender - has data */ - else if ((dst < rank) && - (dst < tree_root + nprocs_completed) && - (rank >= tree_root + nprocs_completed)) { - Request::recv(tmp_recvbuf, 1, recvtype, dst, - COLL_TAG_SCATTER, - comm, MPI_STATUS_IGNORE); - received = 1; - } - tmp_mask >>= 1; - k--; - } - } + unsigned char* tmp_recvbuf = smpi_get_tmp_recvbuffer(total_count * std::max(true_extent, extent)); + /* adjust for potential negative lower bound in datatype */ + tmp_recvbuf = tmp_recvbuf - true_lb; - /* The following reduction is done here instead of after - the MPIC_Sendrecv_ft or MPIC_Recv_ft above. This is - because to do it above, in the noncommutative - case, we would need an extra temp buffer so as not to - overwrite temp_recvbuf, because temp_recvbuf may have - to be communicated to other processes in the - non-power-of-two case. To avoid that extra allocation, - we do the reduce here. */ - if (received) { - if (is_commutative || (dst_tree_root < my_tree_root)) { - { - if (op != MPI_OP_NULL) - op->apply(tmp_recvbuf, tmp_results, &blklens[0], datatype); - if (op != MPI_OP_NULL) - op->apply(((char*)tmp_recvbuf + dis[1] * extent), ((char*)tmp_results + dis[1] * extent), - &blklens[1], datatype); - } - } - else { - { - if (op != MPI_OP_NULL) - op->apply(tmp_results, tmp_recvbuf, &blklens[0], datatype); - if (op != MPI_OP_NULL) - op->apply(((char*)tmp_results + dis[1] * extent), ((char*)tmp_recvbuf + dis[1] * extent), - &blklens[1], datatype); - } - /* copy result back into tmp_results */ - mpi_errno = Datatype::copy(tmp_recvbuf, 1, recvtype, - tmp_results, 1, recvtype); - if (mpi_errno) return(mpi_errno); - } - } + /* need to allocate another temporary buffer to accumulate + results */ + unsigned char* tmp_results = smpi_get_tmp_sendbuffer(total_count * std::max(true_extent, extent)); + /* adjust for potential negative lower bound in datatype */ + tmp_results = tmp_results - true_lb; + + /* copy sendbuf into tmp_results */ + if (sendbuf != MPI_IN_PLACE) + mpi_errno = Datatype::copy(sendbuf, total_count, datatype, tmp_results, total_count, datatype); + else + mpi_errno = Datatype::copy(recvbuf, total_count, datatype, tmp_results, total_count, datatype); + + if (mpi_errno) + return (mpi_errno); + + mask = 0x1; + i = 0; + while (mask < comm_size) { + dst = rank ^ mask; + + dst_tree_root = dst >> i; + dst_tree_root <<= i; + + my_tree_root = rank >> i; + my_tree_root <<= i; + + /* At step 1, processes exchange (n-n/p) amount of + data; at step 2, (n-2n/p) amount of data; at step 3, (n-4n/p) + amount of data, and so forth. We use derived datatypes for this. + + At each step, a process does not need to send data + indexed from my_tree_root to + my_tree_root+mask-1. Similarly, a process won't receive + data indexed from dst_tree_root to dst_tree_root+mask-1. */ + + /* calculate sendtype */ + blklens[0] = blklens[1] = 0; + for (j = 0; j < my_tree_root; j++) + blklens[0] += recvcounts[j]; + for (j = my_tree_root + mask; j < comm_size; j++) + blklens[1] += recvcounts[j]; + + dis[0] = 0; + dis[1] = blklens[0]; + for (j = my_tree_root; (j < my_tree_root + mask) && (j < comm_size); j++) + dis[1] += recvcounts[j]; + + mpi_errno = Datatype::create_indexed(2, blklens, dis, datatype, &sendtype); + if (mpi_errno) + return (mpi_errno); + + sendtype->commit(); + + /* calculate recvtype */ + blklens[0] = blklens[1] = 0; + for (j = 0; j < dst_tree_root && j < comm_size; j++) + blklens[0] += recvcounts[j]; + for (j = dst_tree_root + mask; j < comm_size; j++) + blklens[1] += recvcounts[j]; + + dis[0] = 0; + dis[1] = blklens[0]; + for (j = dst_tree_root; (j < dst_tree_root + mask) && (j < comm_size); j++) + dis[1] += recvcounts[j]; + + mpi_errno = Datatype::create_indexed(2, blklens, dis, datatype, &recvtype); + if (mpi_errno) + return (mpi_errno); + + recvtype->commit(); + + received = 0; + if (dst < comm_size) { + /* tmp_results contains data to be sent in each step. Data is + received in tmp_recvbuf and then accumulated into + tmp_results. accumulation is done later below. */ + + Request::sendrecv(tmp_results, 1, sendtype, dst, COLL_TAG_SCATTER, tmp_recvbuf, 1, recvtype, dst, + COLL_TAG_SCATTER, comm, MPI_STATUS_IGNORE); + received = 1; + } + + /* if some processes in this process's subtree in this step + did not have any destination process to communicate with + because of non-power-of-two, we need to send them the + result. We use a logarithmic recursive-halfing algorithm + for this. */ + + if (dst_tree_root + mask > comm_size) { + nprocs_completed = comm_size - my_tree_root - mask; + /* nprocs_completed is the number of processes in this + subtree that have all the data. Send data to others + in a tree fashion. First find root of current tree + that is being divided into two. k is the number of + least-significant bits in this process's rank that + must be zeroed out to find the rank of the root */ + j = mask; + k = 0; + while (j) { + j >>= 1; + k++; + } + k--; + + tmp_mask = mask >> 1; + while (tmp_mask) { + dst = rank ^ tmp_mask; + + tree_root = rank >> k; + tree_root <<= k; + + /* send only if this proc has data and destination + doesn't have data. at any step, multiple processes + can send if they have the data */ + if ((dst > rank) && (rank < tree_root + nprocs_completed) && (dst >= tree_root + nprocs_completed)) { + /* send the current result */ + Request::send(tmp_recvbuf, 1, recvtype, dst, COLL_TAG_SCATTER, comm); + } + /* recv only if this proc. doesn't have data and sender + has data */ + else if ((dst < rank) && (dst < tree_root + nprocs_completed) && (rank >= tree_root + nprocs_completed)) { + Request::recv(tmp_recvbuf, 1, recvtype, dst, COLL_TAG_SCATTER, comm, MPI_STATUS_IGNORE); + received = 1; + } + tmp_mask >>= 1; + k--; + } + } + + /* The following reduction is done here instead of after + the MPIC_Sendrecv_ft or MPIC_Recv_ft above. This is + because to do it above, in the noncommutative + case, we would need an extra temp buffer so as not to + overwrite temp_recvbuf, because temp_recvbuf may have + to be communicated to other processes in the + non-power-of-two case. To avoid that extra allocation, + we do the reduce here. */ + if (received) { + if (is_commutative || (dst_tree_root < my_tree_root)) { + { + if (op != MPI_OP_NULL) + op->apply(tmp_recvbuf, tmp_results, &blklens[0], datatype); + if (op != MPI_OP_NULL) + op->apply(tmp_recvbuf + dis[1] * extent, tmp_results + dis[1] * extent, &blklens[1], datatype); + } + } else { + { + if (op != MPI_OP_NULL) + op->apply(tmp_results, tmp_recvbuf, &blklens[0], datatype); + if (op != MPI_OP_NULL) + op->apply(tmp_results + dis[1] * extent, tmp_recvbuf + dis[1] * extent, &blklens[1], datatype); + } + /* copy result back into tmp_results */ + mpi_errno = Datatype::copy(tmp_recvbuf, 1, recvtype, tmp_results, 1, recvtype); + if (mpi_errno) + return (mpi_errno); + } + } - Datatype::unref(sendtype); - Datatype::unref(recvtype); + Datatype::unref(sendtype); + Datatype::unref(recvtype); - mask <<= 1; - i++; + mask <<= 1; + i++; } /* now copy final results from tmp_results to recvbuf */ - mpi_errno = Datatype::copy(((char *)tmp_results+disps[rank]*extent), - recvcounts[rank], datatype, recvbuf, + mpi_errno = Datatype::copy(tmp_results + disps[rank] * extent, recvcounts[rank], datatype, recvbuf, recvcounts[rank], datatype); if (mpi_errno) return(mpi_errno); diff --git a/src/smpi/colls/reduce_scatter/reduce_scatter-ompi.cpp b/src/smpi/colls/reduce_scatter/reduce_scatter-ompi.cpp index 4b3a479a28..6594111219 100644 --- a/src/smpi/colls/reduce_scatter/reduce_scatter-ompi.cpp +++ b/src/smpi/colls/reduce_scatter/reduce_scatter-ompi.cpp @@ -55,8 +55,7 @@ Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(const void *sbuf int i, rank, size, count, err = MPI_SUCCESS; int tmp_size = 1, remain = 0, tmp_rank; ptrdiff_t true_lb, true_extent, lb, extent, buf_size; - char *recv_buf = NULL, *recv_buf_free = NULL; - char *result_buf = NULL, *result_buf_free = NULL; + unsigned char *result_buf = nullptr, *result_buf_free = nullptr; /* Initialize */ rank = comm->rank(); @@ -92,17 +91,15 @@ Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(const void *sbuf } /* Allocate temporary receive buffer. */ - recv_buf_free = (char*) smpi_get_tmp_recvbuffer(buf_size); - - recv_buf = recv_buf_free - lb; + unsigned char* recv_buf_free = smpi_get_tmp_recvbuffer(buf_size); + unsigned char* recv_buf = recv_buf_free - lb; if (NULL == recv_buf_free) { err = MPI_ERR_OTHER; goto cleanup; } /* allocate temporary buffer for results */ - result_buf_free = (char*) smpi_get_tmp_sendbuffer(buf_size); - + result_buf_free = smpi_get_tmp_sendbuffer(buf_size); result_buf = result_buf_free - lb; /* copy local buffer into the temporary results */ @@ -359,8 +356,8 @@ Coll_reduce_scatter_ompi_ring::reduce_scatter(const void *sbuf, void *rbuf, cons { int ret, line, rank, size, i, k, recv_from, send_to, total_count, max_block_count; int inbi; - char *tmpsend = NULL, *tmprecv = NULL, *accumbuf = NULL, *accumbuf_free = NULL; - char *inbuf_free[2] = {NULL, NULL}, *inbuf[2] = {NULL, NULL}; + unsigned char *tmpsend = NULL, *tmprecv = NULL, *accumbuf = NULL, *accumbuf_free = NULL; + unsigned char *inbuf_free[2] = {NULL, NULL}, *inbuf[2] = {NULL, NULL}; ptrdiff_t true_lb, true_extent, lb, extent, max_real_segsize; MPI_Request reqs[2] = {NULL, NULL}; @@ -404,17 +401,21 @@ Coll_reduce_scatter_ompi_ring::reduce_scatter(const void *sbuf, void *rbuf, cons max_real_segsize = true_extent + (ptrdiff_t)(max_block_count - 1) * extent; - accumbuf_free = (char*)smpi_get_tmp_recvbuffer(true_extent + (ptrdiff_t)(total_count - 1) * extent); + accumbuf_free = smpi_get_tmp_recvbuffer(true_extent + (ptrdiff_t)(total_count - 1) * extent); if (NULL == accumbuf_free) { ret = -1; line = __LINE__; goto error_hndl; } accumbuf = accumbuf_free - lb; - inbuf_free[0] = (char*)smpi_get_tmp_sendbuffer(max_real_segsize); + inbuf_free[0] = smpi_get_tmp_sendbuffer(max_real_segsize); if (NULL == inbuf_free[0]) { ret = -1; line = __LINE__; goto error_hndl; } inbuf[0] = inbuf_free[0] - lb; if (size > 2) { - inbuf_free[1] = (char*)smpi_get_tmp_sendbuffer(max_real_segsize); - if (NULL == inbuf_free[1]) { ret = -1; line = __LINE__; goto error_hndl; } - inbuf[1] = inbuf_free[1] - lb; + inbuf_free[1] = smpi_get_tmp_sendbuffer(max_real_segsize); + if (NULL == inbuf_free[1]) { + ret = -1; + line = __LINE__; + goto error_hndl; + } + inbuf[1] = inbuf_free[1] - lb; } /* Handle MPI_IN_PLACE for size > 1 */ diff --git a/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp b/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp index 613a3068d8..35e57e49f4 100644 --- a/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp +++ b/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp @@ -59,8 +59,8 @@ int Coll_scatter_mvapich2_two_level_direct::scatter(const void *sendbuf, int leader_comm_rank = -1, leader_comm_size = -1; int mpi_errno = MPI_SUCCESS; int recvtype_size, sendtype_size, nbytes; - void *tmp_buf = NULL; - void *leader_scatter_buf = NULL; + unsigned char* tmp_buf = nullptr; + unsigned char* leader_scatter_buf = nullptr; MPI_Status status; int leader_root, leader_of_root = -1; MPI_Comm shmem_comm, leader_comm; @@ -236,8 +236,8 @@ int Coll_scatter_mvapich2_two_level_binomial::scatter(const void *sendbuf, int leader_comm_rank = -1, leader_comm_size = -1; int mpi_errno = MPI_SUCCESS; int recvtype_size, sendtype_size, nbytes; - void *tmp_buf = NULL; - void *leader_scatter_buf = NULL; + unsigned char* tmp_buf = nullptr; + unsigned char* leader_scatter_buf = nullptr; MPI_Status status; int leader_root = -1, leader_of_root = -1; MPI_Comm shmem_comm, leader_comm; diff --git a/src/smpi/colls/scatter/scatter-ompi.cpp b/src/smpi/colls/scatter/scatter-ompi.cpp index 5163124a96..31385693ed 100644 --- a/src/smpi/colls/scatter/scatter-ompi.cpp +++ b/src/smpi/colls/scatter/scatter-ompi.cpp @@ -34,8 +34,9 @@ int Coll_scatter_ompi_binomial::scatter(const void* sbuf, int scount, MPI_Dataty int vrank; int size; int total_send = 0; - char *ptmp = NULL; - char *tempbuf = NULL; + unsigned char* ptmp = nullptr; + unsigned char* tempbuf = nullptr; + const unsigned char* cptmp; // const ptmp int err; ompi_coll_tree_t* bmtree; MPI_Status status; @@ -63,7 +64,8 @@ int Coll_scatter_ompi_binomial::scatter(const void* sbuf, int scount, MPI_Dataty if (rank == root) { if (0 == root) { /* root on 0, just use the send buffer */ - ptmp = (char*)sbuf; + ptmp = nullptr; // unused + cptmp = static_cast(sbuf); if (rbuf != MPI_IN_PLACE) { /* local copy to rbuf */ err = Datatype::copy(sbuf, scount, sdtype, rbuf, rcount, rdtype); @@ -74,14 +76,15 @@ int Coll_scatter_ompi_binomial::scatter(const void* sbuf, int scount, MPI_Dataty } } else { /* root is not on 0, allocate temp buffer for send */ - tempbuf = (char*)smpi_get_tmp_sendbuffer(strue_extent + (scount * size - 1) * sextent); - if (NULL == tempbuf) { + tempbuf = smpi_get_tmp_sendbuffer(strue_extent + (scount * size - 1) * sextent); + if (nullptr == tempbuf) { err = MPI_ERR_OTHER; line = __LINE__; goto err_hndl; } - ptmp = tempbuf - slb; + ptmp = tempbuf - slb; + cptmp = ptmp; /* and rotate data so they will eventually in the right place */ err = Datatype::copy((char*)sbuf + sextent * root * scount, scount * (size - root), sdtype, ptmp, @@ -111,14 +114,15 @@ int Coll_scatter_ompi_binomial::scatter(const void* sbuf, int scount, MPI_Dataty } else if (not(vrank % 2)) { /* non-root, non-leaf nodes, allocate temp buffer for recv * the most we need is rcount*size/2 */ - tempbuf = (char*)smpi_get_tmp_recvbuffer(rtrue_extent + (rcount * size - 1) * rextent); - if (NULL == tempbuf) { + tempbuf = smpi_get_tmp_recvbuffer(rtrue_extent + (rcount * size - 1) * rextent); + if (nullptr == tempbuf) { err = MPI_ERR_OTHER; line = __LINE__; goto err_hndl; } - ptmp = tempbuf - rlb; + ptmp = tempbuf - rlb; + cptmp = ptmp; sdtype = rdtype; scount = rcount; @@ -126,7 +130,8 @@ int Coll_scatter_ompi_binomial::scatter(const void* sbuf, int scount, MPI_Dataty total_send = scount; } else { /* leaf nodes, just use rbuf */ - ptmp = (char*)rbuf; + ptmp = static_cast(rbuf); + cptmp = ptmp; } if (not(vrank % 2)) { @@ -146,7 +151,7 @@ int Coll_scatter_ompi_binomial::scatter(const void* sbuf, int scount, MPI_Dataty mycount = size - vkid; mycount *= scount; - Request::send(ptmp + total_send * sextent, mycount, sdtype, bmtree->tree_next[i], COLL_TAG_SCATTER, comm); + Request::send(cptmp + total_send * sextent, mycount, sdtype, bmtree->tree_next[i], COLL_TAG_SCATTER, comm); total_send += mycount; } @@ -156,16 +161,14 @@ int Coll_scatter_ompi_binomial::scatter(const void* sbuf, int scount, MPI_Dataty Request::recv(ptmp, rcount, rdtype, bmtree->tree_prev, COLL_TAG_SCATTER, comm, &status); } - if (NULL != tempbuf) - smpi_free_tmp_buffer(tempbuf); + smpi_free_tmp_buffer(tempbuf); // not FIXME : store the tree, as done in ompi, instead of calculating it each time ? ompi_coll_tuned_topo_destroy_tree(&bmtree); return MPI_SUCCESS; err_hndl: - if (NULL != tempbuf) - smpi_free_tmp_buffer(tempbuf); + smpi_free_tmp_buffer(tempbuf); XBT_DEBUG("%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank); return err; diff --git a/src/smpi/colls/smpi_coll.cpp b/src/smpi/colls/smpi_coll.cpp index 4cd2155046..56830ad7fa 100644 --- a/src/smpi/colls/smpi_coll.cpp +++ b/src/smpi/colls/smpi_coll.cpp @@ -151,7 +151,7 @@ int Colls::scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype data // Send/Recv buffers to/from others MPI_Request* requests = new MPI_Request[size - 1]; - void** tmpbufs = new void*[rank]; + unsigned char** tmpbufs = new unsigned char*[rank]; int index = 0; for (int other = 0; other < rank; other++) { tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); @@ -209,7 +209,7 @@ int Colls::exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype da // Send/Recv buffers to/from others MPI_Request* requests = new MPI_Request[size - 1]; - void** tmpbufs = new void*[rank]; + unsigned char** tmpbufs = new unsigned char*[rank]; int index = 0; for (int other = 0; other < rank; other++) { tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); diff --git a/src/smpi/colls/smpi_default_selector.cpp b/src/smpi/colls/smpi_default_selector.cpp index 5b1febbe1e..e43e0119ec 100644 --- a/src/smpi/colls/smpi_default_selector.cpp +++ b/src/smpi/colls/smpi_default_selector.cpp @@ -43,7 +43,7 @@ int Coll_reduce_scatter_default::reduce_scatter(const void *sendbuf, void *recvb displs[i] = count; count += recvcounts[i]; } - void *tmpbuf = static_cast(smpi_get_tmp_sendbuffer(count*datatype->get_extent())); + unsigned char* tmpbuf = smpi_get_tmp_sendbuffer(count * datatype->get_extent()); int ret = Coll_reduce_default::reduce(sendbuf, tmpbuf, count, datatype, op, 0, comm); if(ret==MPI_SUCCESS) diff --git a/src/smpi/colls/smpi_mvapich2_selector.cpp b/src/smpi/colls/smpi_mvapich2_selector.cpp index 6812846699..bc6c2c596d 100644 --- a/src/smpi/colls/smpi_mvapich2_selector.cpp +++ b/src/smpi/colls/smpi_mvapich2_selector.cpp @@ -25,7 +25,6 @@ int Coll_alltoall_mvapich2::alltoall( const void *sendbuf, int sendcount, init_mv2_alltoall_tables_stampede(); int sendtype_size, recvtype_size, comm_size; - char * tmp_buf = NULL; int mpi_errno=MPI_SUCCESS; int range = 0; int range_threshold = 0; @@ -63,16 +62,11 @@ int Coll_alltoall_mvapich2::alltoall( const void *sendbuf, int sendcount, mv2_alltoall_thresholds_table[conf_index][range].in_place_algo_table[range_threshold].min ||nbytes > mv2_alltoall_thresholds_table[conf_index][range].in_place_algo_table[range_threshold].max ) { - tmp_buf = (char *)smpi_get_tmp_sendbuffer( comm_size * recvcount * recvtype_size ); - Datatype::copy((char *)recvbuf, - comm_size*recvcount, recvtype, - (char *)tmp_buf, - comm_size*recvcount, recvtype); + unsigned char* tmp_buf = smpi_get_tmp_sendbuffer(comm_size * recvcount * recvtype_size); + Datatype::copy(recvbuf, comm_size * recvcount, recvtype, tmp_buf, comm_size * recvcount, recvtype); - mpi_errno = MV2_Alltoall_function(tmp_buf, recvcount, recvtype, - recvbuf, recvcount, recvtype, - comm ); - smpi_free_tmp_buffer(tmp_buf); + mpi_errno = MV2_Alltoall_function(tmp_buf, recvcount, recvtype, recvbuf, recvcount, recvtype, comm); + smpi_free_tmp_buffer(tmp_buf); } else { mpi_errno = MPIR_Alltoall_inplace_MV2(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, @@ -482,7 +476,7 @@ int Coll_bcast_mvapich2::bcast(void *buffer, // int is_homogeneous, is_contig; MPI_Aint type_size; //, position; - // void *tmp_buf = NULL; + // unsigned char *tmp_buf = NULL; MPI_Comm shmem_comm; //MPID_Datatype *dtp; @@ -593,16 +587,16 @@ int Coll_bcast_mvapich2::bcast(void *buffer, #endif if (two_level_bcast == 1) { // if (not is_contig || not is_homogeneous) { - // tmp_buf = (void*)smpi_get_tmp_sendbuffer(nbytes); - - /* position = 0;*/ - /* if (rank == root) {*/ - /* mpi_errno =*/ - /* MPIR_Pack_impl(buffer, count, datatype, tmp_buf, nbytes, &position);*/ - /* if (mpi_errno)*/ - /* MPIU_ERR_POP(mpi_errno);*/ - /* }*/ - // } +// tmp_buf = smpi_get_tmp_sendbuffer(nbytes); + +/* position = 0;*/ +/* if (rank == root) {*/ +/* mpi_errno =*/ +/* MPIR_Pack_impl(buffer, count, datatype, tmp_buf, nbytes, &position);*/ +/* if (mpi_errno)*/ +/* MPIU_ERR_POP(mpi_errno);*/ +/* }*/ +// } #ifdef CHANNEL_MRAIL_GEN2 if ((mv2_enable_zcpy_bcast == 1) && (&MPIR_Pipelined_Bcast_Zcpy_MV2 == MV2_Bcast_function)) { diff --git a/src/smpi/colls/smpi_nbc_impl.cpp b/src/smpi/colls/smpi_nbc_impl.cpp index eed0380625..51af4b1705 100644 --- a/src/smpi/colls/smpi_nbc_impl.cpp +++ b/src/smpi/colls/smpi_nbc_impl.cpp @@ -441,7 +441,7 @@ int Colls::ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype d if (size <= 0) return MPI_ERR_COMM; - void* tmp_sendbuf = nullptr; + unsigned char* tmp_sendbuf = nullptr; if( sendbuf == MPI_IN_PLACE ) { tmp_sendbuf = smpi_get_tmp_sendbuffer(count * datatype->get_extent()); Datatype::copy(recvbuf, count, datatype, tmp_sendbuf, count, datatype); diff --git a/src/smpi/include/private.hpp b/src/smpi/include/private.hpp index 39946e0ab7..5a212c1516 100644 --- a/src/smpi/include/private.hpp +++ b/src/smpi/include/private.hpp @@ -112,9 +112,9 @@ XBT_PRIVATE void smpi_bench_begin(); XBT_PRIVATE void smpi_bench_end(); XBT_PRIVATE void smpi_shared_destroy(); -XBT_PRIVATE void* smpi_get_tmp_sendbuffer(size_t size); -XBT_PRIVATE void* smpi_get_tmp_recvbuffer(size_t size); -XBT_PRIVATE void smpi_free_tmp_buffer(const void* buf); +XBT_PRIVATE unsigned char* smpi_get_tmp_sendbuffer(size_t size); +XBT_PRIVATE unsigned char* smpi_get_tmp_recvbuffer(size_t size); +XBT_PRIVATE void smpi_free_tmp_buffer(const unsigned char* buf); XBT_PRIVATE void smpi_free_replay_tmp_buffers(); extern "C" { diff --git a/src/smpi/include/smpi_file.hpp b/src/smpi/include/smpi_file.hpp index 9ad75374ae..97ef812ab9 100644 --- a/src/smpi/include/smpi_file.hpp +++ b/src/smpi/include/smpi_file.hpp @@ -160,7 +160,7 @@ class File{ } XBT_CDEBUG(smpi_pmpi, "will have to access %lld from my chunk", totreads); - char* sendbuf= static_cast(smpi_get_tmp_sendbuffer(total_sent)); + unsigned char* sendbuf = smpi_get_tmp_sendbuffer(total_sent); if(totreads>0){ seek(min_offset, MPI_SEEK_SET); diff --git a/src/smpi/internals/smpi_memory.cpp b/src/smpi/internals/smpi_memory.cpp index 2544ad6d0b..44dad4447b 100644 --- a/src/smpi/internals/smpi_memory.cpp +++ b/src/smpi/internals/smpi_memory.cpp @@ -254,7 +254,7 @@ static std::vector sendbuffer; static std::vector recvbuffer; //allocate a single buffer for all sends, growing it if needed -void* smpi_get_tmp_sendbuffer(size_t size) +unsigned char* smpi_get_tmp_sendbuffer(size_t size) { if (not smpi_process()->replaying()) return new unsigned char[size]; @@ -266,7 +266,7 @@ void* smpi_get_tmp_sendbuffer(size_t size) } //allocate a single buffer for all recv -void* smpi_get_tmp_recvbuffer(size_t size) +unsigned char* smpi_get_tmp_recvbuffer(size_t size) { if (not smpi_process()->replaying()) return new unsigned char[size]; @@ -275,10 +275,10 @@ void* smpi_get_tmp_recvbuffer(size_t size) return recvbuffer.data(); } -void smpi_free_tmp_buffer(const void* buf) +void smpi_free_tmp_buffer(const unsigned char* buf) { if (not smpi_process()->replaying()) - delete[] static_cast(buf); + delete[] buf; } void smpi_free_replay_tmp_buffers() diff --git a/src/smpi/mpi/smpi_request.cpp b/src/smpi/mpi/smpi_request.cpp index 5bf11228c6..4a54f6a6cf 100644 --- a/src/smpi/mpi/smpi_request.cpp +++ b/src/smpi/mpi/smpi_request.cpp @@ -878,7 +878,7 @@ int Request::wait(MPI_Request * request, MPI_Status * status) int count=(*request)->size_/ (*request)->old_type_->size(); (*request)->op_->apply(buf, (*request)->buf_, &count, (*request)->old_type_); } - smpi_free_tmp_buffer(buf); + smpi_free_tmp_buffer(static_cast(buf)); } } if((*request)->nbc_requests_[i]!=MPI_REQUEST_NULL)