From 70a1c67dc21179a44b0b317a1ea4823b63b7c666 Mon Sep 17 00:00:00 2001 From: degomme Date: Sat, 27 Sep 2014 02:11:17 +0200 Subject: [PATCH] Have replay always use shared buffers instead of allocating new ones, even inside collective ops. --- src/smpi/colls/allgather-bruck.c | 4 +- src/smpi/colls/allgatherv-mpich-rdb.c | 4 +- src/smpi/colls/allreduce-mvapich-rs.c | 4 +- src/smpi/colls/allreduce-mvapich-two-level.c | 6 +-- src/smpi/colls/allreduce-rab-rdb.c | 4 +- src/smpi/colls/allreduce-rab1.c | 12 ++--- src/smpi/colls/allreduce-rab2.c | 16 +++---- src/smpi/colls/allreduce-rdb.c | 4 +- .../colls/allreduce-smp-binomial-pipeline.c | 4 +- src/smpi/colls/allreduce-smp-binomial.c | 4 +- src/smpi/colls/allreduce-smp-rdb.c | 4 +- src/smpi/colls/allreduce-smp-rsag-lr.c | 4 +- src/smpi/colls/allreduce-smp-rsag-rab.c | 4 +- src/smpi/colls/allreduce-smp-rsag.c | 4 +- src/smpi/colls/alltoall-2dmesh.c | 8 ++-- src/smpi/colls/alltoall-3dmesh.c | 8 ++-- src/smpi/colls/alltoall-bruck.c | 4 +- src/smpi/colls/alltoall-rdb.c | 4 +- src/smpi/colls/bcast-mvapich-smp.c | 2 +- src/smpi/colls/gather-mvapich.c | 14 +++--- src/smpi/colls/reduce-NTSL.c | 4 +- src/smpi/colls/reduce-arrival-pattern-aware.c | 4 +- src/smpi/colls/reduce-binomial.c | 8 ++-- src/smpi/colls/reduce-mvapich-knomial.c | 14 +++--- src/smpi/colls/reduce-mvapich-two-level.c | 8 ++-- src/smpi/colls/reduce-ompi.c | 28 ++++++------ src/smpi/colls/reduce-scatter-gather.c | 16 +++---- src/smpi/colls/reduce_scatter-mpich.c | 20 ++++----- src/smpi/colls/reduce_scatter-ompi.c | 45 ++++++------------- src/smpi/colls/scatter-mvapich-two-level.c | 16 +++---- src/smpi/colls/smpi_mvapich2_selector.c | 8 ++-- src/smpi/private.h | 5 +++ src/smpi/smpi_base.c | 16 ++++--- src/smpi/smpi_replay.c | 33 +++++++++----- 34 files changed, 172 insertions(+), 171 deletions(-) diff --git a/src/smpi/colls/allgather-bruck.c b/src/smpi/colls/allgather-bruck.c index eeaab1f436..e0424e15e1 100644 --- a/src/smpi/colls/allgather-bruck.c +++ b/src/smpi/colls/allgather-bruck.c @@ -95,7 +95,7 @@ int smpi_coll_tuned_allgather_bruck(void *send_buff, int send_count, count = recv_count; - tmp_buff = (char *) xbt_malloc(num_procs * recv_count * recv_extent); + tmp_buff = (char *) smpi_get_tmp_sendbuffer(num_procs * recv_count * recv_extent); // perform a local copy smpi_datatype_copy(send_ptr, send_count, send_type, @@ -130,6 +130,6 @@ int smpi_coll_tuned_allgather_bruck(void *send_buff, int send_count, smpi_mpi_sendrecv(tmp_buff + (num_procs - rank) * recv_count * recv_extent, rank * recv_count, recv_type, rank, tag, recv_ptr, rank * recv_count, recv_type, rank, tag, comm, &status); - free(tmp_buff); + smpi_free_tmp_buffer(tmp_buff); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allgatherv-mpich-rdb.c b/src/smpi/colls/allgatherv-mpich-rdb.c index 8e92b6f3c5..58195e814d 100644 --- a/src/smpi/colls/allgatherv-mpich-rdb.c +++ b/src/smpi/colls/allgatherv-mpich-rdb.c @@ -42,7 +42,7 @@ int smpi_coll_tuned_allgatherv_mpich_rdb ( smpi_datatype_extent(recvtype, &recvtype_true_lb, &recvtype_true_extent); - tmp_buf= (void*)xbt_malloc(total_count*(max(recvtype_true_extent,recvtype_extent))); + tmp_buf= (void*)smpi_get_tmp_sendbuffer(total_count*(max(recvtype_true_extent,recvtype_extent))); /* adjust for potential negative lower bound in datatype */ tmp_buf = (void *)((char*)tmp_buf - recvtype_true_lb); @@ -209,6 +209,6 @@ int smpi_coll_tuned_allgatherv_mpich_rdb ( position += recvcounts[j]; } - free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allreduce-mvapich-rs.c b/src/smpi/colls/allreduce-mvapich-rs.c index ba3fa498cb..370052dc26 100644 --- a/src/smpi/colls/allreduce-mvapich-rs.c +++ b/src/smpi/colls/allreduce-mvapich-rs.c @@ -51,7 +51,7 @@ smpi_datatype_extent(datatype, &true_lb, &true_extent); extent = smpi_datatype_get_extent(datatype); - tmp_buf_free= xbt_malloc(count * (MAX(extent, true_extent))); + tmp_buf_free= smpi_get_tmp_recvbuffer(count * (MAX(extent, true_extent))); /* adjust for potential negative lower bound in datatype */ tmp_buf = (void *) ((char *) tmp_buf_free - true_lb); @@ -281,7 +281,7 @@ MPI_STATUS_IGNORE); } } - xbt_free(tmp_buf_free); + smpi_free_tmp_buffer(tmp_buf_free); return (mpi_errno); } diff --git a/src/smpi/colls/allreduce-mvapich-two-level.c b/src/smpi/colls/allreduce-mvapich-two-level.c index 7ebb0b5294..90ced9a362 100644 --- a/src/smpi/colls/allreduce-mvapich-two-level.c +++ b/src/smpi/colls/allreduce-mvapich-two-level.c @@ -132,8 +132,8 @@ int smpi_coll_tuned_allreduce_mvapich2_two_level(void *sendbuf, } if (local_size != total_size) { - void* sendtmpbuf = (char *)xbt_malloc(count*smpi_datatype_get_extent(datatype)); - smpi_datatype_copy(recvbuf, count, datatype,sendtmpbuf, count, datatype); + void* sendtmpbuf = (char *)smpi_get_tmp_sendbuffer(count*smpi_datatype_get_extent(datatype)); + smpi_datatype_copy(recvbuf, count, datatype,sendtmpbuf, count, datatype); /* inter-node allreduce */ if(MV2_Allreduce_function == &MPIR_Allreduce_pt2pt_rd_MV2){ mpi_errno = @@ -144,7 +144,7 @@ int smpi_coll_tuned_allreduce_mvapich2_two_level(void *sendbuf, MPIR_Allreduce_pt2pt_rs_MV2(sendtmpbuf, recvbuf, count, datatype, op, leader_comm); } - xbt_free(sendtmpbuf); + smpi_free_tmp_buffer(sendtmpbuf); } } else { /* insert the first reduce here */ diff --git a/src/smpi/colls/allreduce-rab-rdb.c b/src/smpi/colls/allreduce-rab-rdb.c index ce690e2b0a..5dce91e93b 100644 --- a/src/smpi/colls/allreduce-rab-rdb.c +++ b/src/smpi/colls/allreduce-rab-rdb.c @@ -21,7 +21,7 @@ int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count, rank = smpi_comm_rank(comm); extent = smpi_datatype_get_extent(dtype); - tmp_buf = (void *) xbt_malloc(count * extent); + tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); smpi_datatype_copy(sbuff, count, dtype, rbuff, count, dtype); @@ -190,6 +190,6 @@ int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count, smpi_mpi_recv(rbuff, count, dtype, rank + 1, tag, comm, &status); } - free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allreduce-rab1.c b/src/smpi/colls/allreduce-rab1.c index 2cfab0a9fb..0a04e54b38 100644 --- a/src/smpi/colls/allreduce-rab1.c +++ b/src/smpi/colls/allreduce-rab1.c @@ -39,8 +39,8 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, send_size = (count + nprocs) / nprocs; newcnt = send_size * nprocs; - recv = (void *) xbt_malloc(extent * newcnt); - tmp_buf = (void *) xbt_malloc(extent * newcnt); + recv = (void *) smpi_get_tmp_recvbuffer(extent * newcnt); + tmp_buf = (void *) smpi_get_tmp_sendbuffer(extent * newcnt); memcpy(recv, sbuff, extent * count); @@ -70,13 +70,13 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm); memcpy(rbuff, recv, count * extent); - free(recv); - free(tmp_buf); + smpi_free_tmp_buffer(recv); + smpi_free_tmp_buffer(tmp_buf); } else { - tmp_buf = (void *) xbt_malloc(extent * count); + tmp_buf = (void *) smpi_get_tmp_sendbuffer(extent * count); memcpy(rbuff, sbuff, count * extent); mask = pof2 / 2; share = count / pof2; @@ -102,7 +102,7 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, memcpy(tmp_buf, (char *) rbuff + recv_idx * extent, recv_cnt * extent); mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm); - free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); } return MPI_SUCCESS; diff --git a/src/smpi/colls/allreduce-rab2.c b/src/smpi/colls/allreduce-rab2.c index 3643321239..372d5e84cc 100644 --- a/src/smpi/colls/allreduce-rab2.c +++ b/src/smpi/colls/allreduce-rab2.c @@ -40,9 +40,9 @@ int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff, send_size = (count + nprocs) / nprocs; nbytes = send_size * s_extent; - send = (void *) xbt_malloc(s_extent * send_size * nprocs); - recv = (void *) xbt_malloc(s_extent * send_size * nprocs); - tmp = (void *) xbt_malloc(nbytes); + send = (void *) smpi_get_tmp_sendbuffer(s_extent * send_size * nprocs); + recv = (void *) smpi_get_tmp_recvbuffer(s_extent * send_size * nprocs); + tmp = (void *) smpi_get_tmp_sendbuffer(nbytes); memcpy(send, sbuff, s_extent * count); @@ -56,16 +56,16 @@ int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff, mpi_coll_allgather_fun(tmp, send_size, dtype, recv, send_size, dtype, comm); memcpy(rbuff, recv, count * s_extent); - free(recv); - free(tmp); - free(send); + smpi_free_tmp_buffer(recv); + smpi_free_tmp_buffer(tmp); + smpi_free_tmp_buffer(send); } else { send = sbuff; send_size = count / nprocs; nbytes = send_size * s_extent; r_offset = rank * nbytes; - recv = (void *) xbt_malloc(s_extent * send_size * nprocs); + recv = (void *) smpi_get_tmp_recvbuffer(s_extent * send_size * nprocs); mpi_coll_alltoall_fun(send, send_size, dtype, recv, send_size, dtype, comm); @@ -77,7 +77,7 @@ int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff, mpi_coll_allgather_fun((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size, dtype, comm); - free(recv); + smpi_free_tmp_buffer(recv); } return MPI_SUCCESS; diff --git a/src/smpi/colls/allreduce-rdb.c b/src/smpi/colls/allreduce-rdb.c index cc56d4a873..85a31efd54 100644 --- a/src/smpi/colls/allreduce-rdb.c +++ b/src/smpi/colls/allreduce-rdb.c @@ -29,7 +29,7 @@ int smpi_coll_tuned_allreduce_rdb(void *sbuff, void *rbuff, int count, rank=smpi_comm_rank(comm); smpi_datatype_extent(dtype, &lb, &extent); - tmp_buf = (void *) xbt_malloc(count * extent); + tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); smpi_mpi_sendrecv(sbuff, count, dtype, rank, 500, rbuff, count, dtype, rank, 500, comm, &status); @@ -125,6 +125,6 @@ int smpi_coll_tuned_allreduce_rdb(void *sbuff, void *rbuff, int count, smpi_mpi_recv(rbuff, count, dtype, rank + 1, tag, comm, &status); } - free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allreduce-smp-binomial-pipeline.c b/src/smpi/colls/allreduce-smp-binomial-pipeline.c index e831be95fd..06a80a7a0c 100644 --- a/src/smpi/colls/allreduce-smp-binomial-pipeline.c +++ b/src/smpi/colls/allreduce-smp-binomial-pipeline.c @@ -60,7 +60,7 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, rank = smpi_comm_rank(comm); MPI_Aint extent; extent = smpi_datatype_get_extent(dtype); - tmp_buf = (void *) xbt_malloc(count * extent); + tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); int intra_rank, inter_rank; intra_rank = rank % num_core; @@ -197,6 +197,6 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, } } // for phase - free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allreduce-smp-binomial.c b/src/smpi/colls/allreduce-smp-binomial.c index 25f9837321..3d159be30f 100644 --- a/src/smpi/colls/allreduce-smp-binomial.c +++ b/src/smpi/colls/allreduce-smp-binomial.c @@ -49,7 +49,7 @@ int smpi_coll_tuned_allreduce_smp_binomial(void *send_buf, void *recv_buf, rank=smpi_comm_rank(comm); MPI_Aint extent, lb; smpi_datatype_extent(dtype, &lb, &extent); - tmp_buf = (void *) xbt_malloc(count * extent); + tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); /* compute intra and inter ranking */ int intra_rank, inter_rank; @@ -150,6 +150,6 @@ int smpi_coll_tuned_allreduce_smp_binomial(void *send_buf, void *recv_buf, mask >>= 1; } - free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allreduce-smp-rdb.c b/src/smpi/colls/allreduce-smp-rdb.c index 635258c397..9770d2993f 100644 --- a/src/smpi/colls/allreduce-smp-rdb.c +++ b/src/smpi/colls/allreduce-smp-rdb.c @@ -56,7 +56,7 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, rank = smpi_comm_rank(comm); MPI_Aint extent; extent = smpi_datatype_get_extent(dtype); - tmp_buf = (void *) xbt_malloc(count * extent); + tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); /* compute intra and inter ranking */ int intra_rank, inter_rank; @@ -182,6 +182,6 @@ int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, mask >>= 1; } - free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allreduce-smp-rsag-lr.c b/src/smpi/colls/allreduce-smp-rsag-lr.c index daa2b416de..6a928bc946 100644 --- a/src/smpi/colls/allreduce-smp-rsag-lr.c +++ b/src/smpi/colls/allreduce-smp-rsag-lr.c @@ -44,7 +44,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, rank = smpi_comm_rank(comm); MPI_Aint extent; extent = smpi_datatype_get_extent(dtype); - tmp_buf = (void *) xbt_malloc(count * extent); + tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); int intra_rank, inter_rank; intra_rank = rank % num_core; @@ -255,6 +255,6 @@ int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, } - free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allreduce-smp-rsag-rab.c b/src/smpi/colls/allreduce-smp-rsag-rab.c index a3a11309b8..1ac827e5ed 100644 --- a/src/smpi/colls/allreduce-smp-rsag-rab.c +++ b/src/smpi/colls/allreduce-smp-rsag-rab.c @@ -43,7 +43,7 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, rank = smpi_comm_rank(comm); MPI_Aint extent; extent = smpi_datatype_get_extent(dtype); - tmp_buf = (void *) xbt_malloc(count * extent); + tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); int intra_rank, inter_rank; intra_rank = rank % num_core; @@ -206,6 +206,6 @@ int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, } - free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allreduce-smp-rsag.c b/src/smpi/colls/allreduce-smp-rsag.c index 1bc921ccc3..28ac6bbe33 100644 --- a/src/smpi/colls/allreduce-smp-rsag.c +++ b/src/smpi/colls/allreduce-smp-rsag.c @@ -43,7 +43,7 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, rank = smpi_comm_rank(comm); MPI_Aint extent; extent = smpi_datatype_get_extent(dtype); - tmp_buf = (void *) xbt_malloc(count * extent); + tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); int intra_rank, inter_rank; intra_rank = rank % num_core; @@ -225,6 +225,6 @@ int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, } - free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); return MPI_SUCCESS; } diff --git a/src/smpi/colls/alltoall-2dmesh.c b/src/smpi/colls/alltoall-2dmesh.c index 287014f875..dc9971627e 100644 --- a/src/smpi/colls/alltoall-2dmesh.c +++ b/src/smpi/colls/alltoall-2dmesh.c @@ -80,8 +80,8 @@ int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, block_size = extent * send_count; - tmp_buff1 = (char *) xbt_malloc(block_size * num_procs * Y); - tmp_buff2 = (char *) xbt_malloc(block_size * Y); + tmp_buff1 = (char *) smpi_get_tmp_sendbuffer(block_size * num_procs * Y); + tmp_buff2 = (char *) smpi_get_tmp_recvbuffer(block_size * Y); num_reqs = X; if (Y > X) @@ -168,7 +168,7 @@ int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, smpi_mpi_waitall(X - 1, reqs, statuses); free(reqs); free(statuses); - free(tmp_buff1); - free(tmp_buff2); + smpi_free_tmp_buffer(tmp_buff1); + smpi_free_tmp_buffer(tmp_buff2); return MPI_SUCCESS; } diff --git a/src/smpi/colls/alltoall-3dmesh.c b/src/smpi/colls/alltoall-3dmesh.c index 6f43a425a4..81b1ecdd7c 100644 --- a/src/smpi/colls/alltoall-3dmesh.c +++ b/src/smpi/colls/alltoall-3dmesh.c @@ -82,8 +82,8 @@ int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, block_size = extent * send_count; - tmp_buff1 = (char *) xbt_malloc(block_size * num_procs * two_dsize); - tmp_buff2 = (char *) xbt_malloc(block_size * two_dsize); + tmp_buff1 = (char *) smpi_get_tmp_sendbuffer(block_size * num_procs * two_dsize); + tmp_buff2 = (char *) smpi_get_tmp_recvbuffer(block_size * two_dsize); statuses = (MPI_Status *) xbt_malloc(num_reqs * sizeof(MPI_Status)); reqs = (MPI_Request *) xbt_malloc(num_reqs * sizeof(MPI_Request)); @@ -180,7 +180,7 @@ int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, free(reqs); free(statuses); - free(tmp_buff1); - free(tmp_buff2); + smpi_free_tmp_buffer(tmp_buff1); + smpi_free_tmp_buffer(tmp_buff2); return MPI_SUCCESS; } diff --git a/src/smpi/colls/alltoall-bruck.c b/src/smpi/colls/alltoall-bruck.c index 8dd710cf98..99a93b5cfa 100644 --- a/src/smpi/colls/alltoall-bruck.c +++ b/src/smpi/colls/alltoall-bruck.c @@ -48,7 +48,7 @@ smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count, extent = smpi_datatype_get_extent(recv_type); - tmp_buff = (char *) xbt_malloc(num_procs * recv_count * extent); + tmp_buff = (char *) smpi_get_tmp_sendbuffer(num_procs * recv_count * extent); disps = (int *) xbt_malloc(sizeof(int) * num_procs); blocks_length = (int *) xbt_malloc(sizeof(int) * num_procs); @@ -110,6 +110,6 @@ smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count, recv_ptr + (num_procs - i - 1) * recv_count * extent, recv_count, recv_type, rank, tag, comm, &status); - free(tmp_buff); + smpi_free_tmp_buffer(tmp_buff); return MPI_SUCCESS; } diff --git a/src/smpi/colls/alltoall-rdb.c b/src/smpi/colls/alltoall-rdb.c index 41511f8ca5..cb49bfce2a 100644 --- a/src/smpi/colls/alltoall-rdb.c +++ b/src/smpi/colls/alltoall-rdb.c @@ -56,7 +56,7 @@ int smpi_coll_tuned_alltoall_rdb(void *send_buff, int send_count, max_size = num_procs * recv_increment; - tmp_buff = (char *) xbt_malloc(max_size); + tmp_buff = (char *) smpi_get_tmp_sendbuffer(max_size); curr_size = send_count * num_procs; @@ -149,6 +149,6 @@ int smpi_coll_tuned_alltoall_rdb(void *send_buff, int send_count, send_count, send_type, rank, tag, recv_ptr + (i * recv_count * extent), recv_count, recv_type, rank, tag, comm, &status); - free(tmp_buff); + smpi_free_tmp_buffer(tmp_buff); return MPI_SUCCESS; } diff --git a/src/smpi/colls/bcast-mvapich-smp.c b/src/smpi/colls/bcast-mvapich-smp.c index 8fb512c95a..8dd8594063 100644 --- a/src/smpi/colls/bcast-mvapich-smp.c +++ b/src/smpi/colls/bcast-mvapich-smp.c @@ -315,7 +315,7 @@ int smpi_coll_tuned_bcast_mvapich2_intra_node(void *buffer, ) { if (!is_contig || !is_homogeneous) { - tmp_buf=(void *)xbt_malloc(nbytes); + tmp_buf=(void *)smpi_get_tmp_sendbuffer(nbytes); /* TODO: Pipeline the packing and communication */ // position = 0; diff --git a/src/smpi/colls/gather-mvapich.c b/src/smpi/colls/gather-mvapich.c index eeee4695ad..561e757e18 100644 --- a/src/smpi/colls/gather-mvapich.c +++ b/src/smpi/colls/gather-mvapich.c @@ -219,10 +219,10 @@ int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf, if (local_rank == 0) { /* Node leader, allocate tmp_buffer */ if (rank == root) { - tmp_buf = xbt_malloc(recvcnt * MAX(recvtype_extent, + tmp_buf = smpi_get_tmp_recvbuffer(recvcnt * MAX(recvtype_extent, recvtype_true_extent) * local_size); } else { - tmp_buf = xbt_malloc(sendcnt * MAX(sendtype_extent, + tmp_buf = smpi_get_tmp_sendbuffer(sendcnt * MAX(sendtype_extent, sendtype_true_extent) * local_size); } @@ -286,12 +286,12 @@ int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf, * leader and this process's rank in the leader_comm * is the same as leader_root */ if(rank == root) { - leader_gather_buf = xbt_malloc(recvcnt * + leader_gather_buf = smpi_get_tmp_recvbuffer(recvcnt * MAX(recvtype_extent, recvtype_true_extent) * comm_size); } else { - leader_gather_buf = xbt_malloc(sendcnt * + leader_gather_buf = smpi_get_tmp_sendbuffer(sendcnt * MAX(sendtype_extent, sendtype_true_extent) * comm_size); @@ -361,7 +361,7 @@ int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf, if (leader_comm_rank == leader_root && root != leader_of_root) { /* The root of the Gather operation is not a node-level leader */ - leader_gather_buf = xbt_malloc(nbytes * comm_size); + leader_gather_buf = smpi_get_tmp_sendbuffer(nbytes * comm_size); if (leader_gather_buf == NULL) { mpi_errno = MPI_ERR_OTHER; return mpi_errno; @@ -402,10 +402,10 @@ int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf, /* check if multiple threads are calling this collective function */ if (local_rank == 0 ) { if (tmp_buf != NULL) { - xbt_free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); } if (leader_gather_buf != NULL) { - xbt_free(leader_gather_buf); + smpi_free_tmp_buffer(leader_gather_buf); } } diff --git a/src/smpi/colls/reduce-NTSL.c b/src/smpi/colls/reduce-NTSL.c index 4f1dc0db52..60aa06ca08 100644 --- a/src/smpi/colls/reduce-NTSL.c +++ b/src/smpi/colls/reduce-NTSL.c @@ -63,7 +63,7 @@ int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count, */ char *tmp_buf; - tmp_buf = (char *) xbt_malloc(count * extent); + tmp_buf = (char *) smpi_get_tmp_sendbuffer(count * extent); smpi_mpi_sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank, tag, comm, &status); @@ -80,7 +80,7 @@ int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count, smpi_op_apply(op, tmp_buf, rbuf, &count, &datatype); smpi_mpi_send(rbuf, count, datatype, to, tag, comm); } - free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); return MPI_SUCCESS; } diff --git a/src/smpi/colls/reduce-arrival-pattern-aware.c b/src/smpi/colls/reduce-arrival-pattern-aware.c index 920a15c836..e528e9a787 100644 --- a/src/smpi/colls/reduce-arrival-pattern-aware.c +++ b/src/smpi/colls/reduce-arrival-pattern-aware.c @@ -72,7 +72,7 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, } char *tmp_buf; - tmp_buf = (char *) xbt_malloc(count * extent); + tmp_buf = (char *) smpi_get_tmp_sendbuffer(count * extent); smpi_mpi_sendrecv(buf, count, datatype, rank, tag, rbuf, count, datatype, rank, tag, comm, &status); @@ -349,7 +349,7 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, comm); } - free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); return MPI_SUCCESS; } diff --git a/src/smpi/colls/reduce-binomial.c b/src/smpi/colls/reduce-binomial.c index 597c2e469f..607bb6fc42 100644 --- a/src/smpi/colls/reduce-binomial.c +++ b/src/smpi/colls/reduce-binomial.c @@ -27,7 +27,7 @@ int smpi_coll_tuned_reduce_binomial(void *sendbuf, void *recvbuf, int count, extent = smpi_datatype_get_extent(datatype); - tmp_buf = (void *) xbt_malloc(count * extent); + tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); int is_commutative = smpi_op_is_commute(op); mask = 1; @@ -46,7 +46,7 @@ int smpi_coll_tuned_reduce_binomial(void *sendbuf, void *recvbuf, int count, /* If I'm not the root, then my recvbuf may not be valid, therefore I have to allocate a temporary one */ if (rank != root) { - recvbuf = (void *) malloc(count*(max(extent,true_extent))); + recvbuf = (void *) smpi_get_tmp_recvbuffer(count*(max(extent,true_extent))); recvbuf = (void *)((char*)recvbuf - true_lb); } if ((rank != root) || (sendbuf != MPI_IN_PLACE)) { @@ -85,9 +85,9 @@ int smpi_coll_tuned_reduce_binomial(void *sendbuf, void *recvbuf, int count, } if (rank != root) { - xbt_free(recvbuf); + smpi_free_tmp_buffer(recvbuf); } - free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); return 0; } diff --git a/src/smpi/colls/reduce-mvapich-knomial.c b/src/smpi/colls/reduce-mvapich-knomial.c index be7f303a34..e2df112d88 100644 --- a/src/smpi/colls/reduce-mvapich-knomial.c +++ b/src/smpi/colls/reduce-mvapich-knomial.c @@ -90,7 +90,7 @@ static int MPIR_Reduce_knomial_trace(int root, int reduce_knomial_factor, /* Finally, fill up the src array */ if(recv_iter > 0) { - knomial_reduce_src_array = xbt_malloc(sizeof(int)*recv_iter); + knomial_reduce_src_array = smpi_get_tmp_sendbuffer(sizeof(int)*recv_iter); } mask = orig_mask; @@ -148,7 +148,7 @@ int smpi_coll_tuned_reduce_mvapich2_knomial ( is_commutative = smpi_op_is_commute(op); if (rank != root) { - recvbuf=(void *)xbt_malloc(count*(MAX(extent,true_extent))); + recvbuf=(void *)smpi_get_tmp_recvbuffer(count*(MAX(extent,true_extent))); recvbuf = (void *)((char*)recvbuf - true_lb); } @@ -172,10 +172,10 @@ int smpi_coll_tuned_reduce_mvapich2_knomial ( &dst, &expected_send_count, &expected_recv_count, &src_array); if(expected_recv_count > 0 ) { - tmp_buf = xbt_malloc(sizeof(void *)*expected_recv_count); + tmp_buf = smpi_get_tmp_recvbuffer(sizeof(void *)*expected_recv_count); requests = xbt_malloc(sizeof(MPI_Request)*expected_recv_count); for(k=0; k < expected_recv_count; k++ ) { - tmp_buf[k] = xbt_malloc(count*(MAX(extent,true_extent))); + tmp_buf[k] = smpi_get_tmp_sendbuffer(count*(MAX(extent,true_extent))); tmp_buf[k] = (void *)((char*)tmp_buf[k] - true_lb); } @@ -200,14 +200,14 @@ int smpi_coll_tuned_reduce_mvapich2_knomial ( } for(k=0; k < expected_recv_count; k++ ) { - xbt_free(tmp_buf[k]); + smpi_free_tmp_buffer(tmp_buf[k]); } - xbt_free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); xbt_free(requests); } if(src_array != NULL) { - xbt_free(src_array); + smpi_free_tmp_buffer(src_array); } if(rank != root) { diff --git a/src/smpi/colls/reduce-mvapich-two-level.c b/src/smpi/colls/reduce-mvapich-two-level.c index 8014a14481..306bec9a9c 100644 --- a/src/smpi/colls/reduce-mvapich-two-level.c +++ b/src/smpi/colls/reduce-mvapich-two-level.c @@ -123,7 +123,7 @@ int smpi_coll_tuned_reduce_mvapich2_two_level( void *sendbuf, if (stride <= MV2_INTRA_SHMEM_REDUCE_MSG && is_commutative == 1) { if (local_rank == 0 ) { - tmp_buf=(void *)xbt_malloc( count * + tmp_buf=(void *)smpi_get_tmp_sendbuffer( count * (MAX(extent, true_extent))); tmp_buf = (void *) ((char *) tmp_buf - true_lb); } @@ -191,7 +191,7 @@ int smpi_coll_tuned_reduce_mvapich2_two_level( void *sendbuf, } leader_comm_size = smpi_comm_size(leader_comm); leader_comm_rank = smpi_comm_rank(leader_comm); - tmp_buf=(void *)xbt_malloc(count * + tmp_buf=(void *)smpi_get_tmp_sendbuffer(count * (MAX(extent, true_extent))); tmp_buf = (void *) ((char *) tmp_buf - true_lb); } @@ -251,7 +251,7 @@ int smpi_coll_tuned_reduce_mvapich2_two_level( void *sendbuf, out_buf = recvbuf; } else { - in_buf = (char *)xbt_malloc(count* + in_buf = (char *)smpi_get_tmp_sendbuffer(count* smpi_datatype_get_extent(datatype)); smpi_datatype_copy(tmp_buf, count, datatype, in_buf, count, datatype); @@ -259,7 +259,7 @@ int smpi_coll_tuned_reduce_mvapich2_two_level( void *sendbuf, out_buf = recvbuf; } } else { - in_buf = (char *)xbt_malloc(count* + in_buf = (char *)smpi_get_tmp_sendbuffer(count* smpi_datatype_get_extent(datatype)); smpi_datatype_copy(tmp_buf, count, datatype, in_buf, count, datatype); diff --git a/src/smpi/colls/reduce-ompi.c b/src/smpi/colls/reduce-ompi.c index e9e3803b23..3c51282749 100644 --- a/src/smpi/colls/reduce-ompi.c +++ b/src/smpi/colls/reduce-ompi.c @@ -82,7 +82,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi accumbuf = (char*)recvbuf; if( (NULL == accumbuf) || (root != rank) ) { /* Allocate temporary accumulator buffer. */ - accumbuf_free = (char*)malloc(true_extent + + accumbuf_free = (char*)smpi_get_tmp_sendbuffer(true_extent + (original_count - 1) * extent); if (accumbuf_free == NULL) { line = __LINE__; ret = -1; goto error_hndl; @@ -99,7 +99,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi } /* Allocate two buffers for incoming segments */ real_segment_size = true_extent + (count_by_segment - 1) * extent; - inbuf_free[0] = (char*) malloc(real_segment_size); + inbuf_free[0] = (char*) smpi_get_tmp_recvbuffer(real_segment_size); if( inbuf_free[0] == NULL ) { line = __LINE__; ret = -1; goto error_hndl; } @@ -107,7 +107,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi /* if there is chance to overlap communication - allocate second buffer */ if( (num_segments > 1) || (tree->tree_nextsize > 1) ) { - inbuf_free[1] = (char*) malloc(real_segment_size); + inbuf_free[1] = (char*) smpi_get_tmp_recvbuffer(real_segment_size); if( inbuf_free[1] == NULL ) { line = __LINE__; ret = -1; goto error_hndl; } @@ -212,9 +212,9 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi } /* end of for each segment */ /* clean up */ - if( inbuf_free[0] != NULL) free(inbuf_free[0]); - if( inbuf_free[1] != NULL) free(inbuf_free[1]); - if( accumbuf_free != NULL ) free(accumbuf_free); + smpi_free_tmp_buffer(inbuf_free[0]); + smpi_free_tmp_buffer(inbuf_free[1]); + smpi_free_tmp_buffer(accumbuf_free); } /* leaf nodes @@ -519,7 +519,7 @@ int smpi_coll_tuned_reduce_ompi_in_order_binary( void *sendbuf, void *recvbuf, text=smpi_datatype_get_extent(datatype); if ((root == rank) && (MPI_IN_PLACE == sendbuf)) { - tmpbuf = (char *) malloc(text + (count - 1) * ext); + tmpbuf = (char *) smpi_get_tmp_sendbuffer(text + (count - 1) * ext); if (NULL == tmpbuf) { return MPI_ERR_INTERN; } @@ -528,7 +528,7 @@ int smpi_coll_tuned_reduce_ompi_in_order_binary( void *sendbuf, void *recvbuf, (char*)tmpbuf, count, datatype); use_this_sendbuf = tmpbuf; } else if (io_root == rank) { - tmpbuf = (char *) malloc(text + (count - 1) * ext); + tmpbuf = (char *) smpi_get_tmp_recvbuffer(text + (count - 1) * ext); if (NULL == tmpbuf) { return MPI_ERR_INTERN; } @@ -551,7 +551,7 @@ int smpi_coll_tuned_reduce_ompi_in_order_binary( void *sendbuf, void *recvbuf, COLL_TAG_REDUCE, comm, MPI_STATUS_IGNORE); if (MPI_IN_PLACE == sendbuf) { - free(use_this_sendbuf); + smpi_free_tmp_buffer(use_this_sendbuf); } } else if (io_root == rank) { @@ -559,7 +559,7 @@ int smpi_coll_tuned_reduce_ompi_in_order_binary( void *sendbuf, void *recvbuf, smpi_mpi_send(use_this_recvbuf, count, datatype, root, COLL_TAG_REDUCE, comm); - free(use_this_recvbuf); + smpi_free_tmp_buffer(use_this_recvbuf); } } @@ -627,7 +627,7 @@ smpi_coll_tuned_reduce_ompi_basic_linear(void *sbuf, void *rbuf, int count, if (MPI_IN_PLACE == sbuf) { sbuf = rbuf; - inplace_temp = (char*)malloc(true_extent + (count - 1) * extent); + inplace_temp = (char*)smpi_get_tmp_recvbuffer(true_extent + (count - 1) * extent); if (NULL == inplace_temp) { return -1; } @@ -635,7 +635,7 @@ smpi_coll_tuned_reduce_ompi_basic_linear(void *sbuf, void *rbuf, int count, } if (size > 1) { - free_buffer = (char*)malloc(true_extent + (count - 1) * extent); + free_buffer = (char*)smpi_get_tmp_recvbuffer(true_extent + (count - 1) * extent); pml_buffer = free_buffer - lb; } @@ -668,10 +668,10 @@ smpi_coll_tuned_reduce_ompi_basic_linear(void *sbuf, void *rbuf, int count, if (NULL != inplace_temp) { smpi_datatype_copy(inplace_temp, count, dtype,(char*)sbuf ,count , dtype); - free(inplace_temp); + smpi_free_tmp_buffer(inplace_temp); } if (NULL != free_buffer) { - free(free_buffer); + smpi_free_tmp_buffer(free_buffer); } /* All done */ diff --git a/src/smpi/colls/reduce-scatter-gather.c b/src/smpi/colls/reduce-scatter-gather.c index 30d5e6e9b4..57161ff817 100644 --- a/src/smpi/colls/reduce-scatter-gather.c +++ b/src/smpi/colls/reduce-scatter-gather.c @@ -40,7 +40,7 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, /* If I'm not the root, then my recvbuf may not be valid, therefore I have to allocate a temporary one */ if (rank != root && !recvbuf) { - recvbuf = (void *)xbt_malloc(count * extent); + recvbuf = (void *)smpi_get_tmp_recvbuffer(count * extent); } /* find nearest power-of-two less than or equal to comm_size */ pof2 = 1; @@ -50,9 +50,9 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, if (count < comm_size) { new_count = comm_size; - send_ptr = (void *) xbt_malloc(new_count * extent); - recv_ptr = (void *) xbt_malloc(new_count * extent); - tmp_buf = (void *) xbt_malloc(new_count * extent); + send_ptr = (void *) smpi_get_tmp_sendbuffer(new_count * extent); + recv_ptr = (void *) smpi_get_tmp_recvbuffer(new_count * extent); + tmp_buf = (void *) smpi_get_tmp_sendbuffer(new_count * extent); memcpy(send_ptr, sendbuf, extent * count); //if ((rank != root)) @@ -223,13 +223,13 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, } } memcpy(recvbuf, recv_ptr, extent * count); - free(send_ptr); - free(recv_ptr); + smpi_free_tmp_buffer(send_ptr); + smpi_free_tmp_buffer(recv_ptr); } else /* (count >= comm_size) */ { - tmp_buf = (void *) xbt_malloc(count * extent); + tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); //if ((rank != root)) smpi_mpi_sendrecv(sendbuf, count, datatype, rank, tag, @@ -400,7 +400,7 @@ int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, } } if (tmp_buf) - free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); if (cnts) free(cnts); if (disps) diff --git a/src/smpi/colls/reduce_scatter-mpich.c b/src/smpi/colls/reduce_scatter-mpich.c index 50c4d58fac..83a802bebf 100644 --- a/src/smpi/colls/reduce_scatter-mpich.c +++ b/src/smpi/colls/reduce_scatter-mpich.c @@ -63,7 +63,7 @@ int smpi_coll_tuned_reduce_scatter_mpich_pair(void *sendbuf, void *recvbuf, int } /* allocate temporary buffer to store incoming data */ - tmp_recvbuf = (void*)xbt_malloc(recvcounts[rank]*(max(true_extent,extent))+1); + tmp_recvbuf = (void*)smpi_get_tmp_recvbuffer(recvcounts[rank]*(max(true_extent,extent))+1); /* adjust for potential negative lower bound in datatype */ tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb); @@ -142,7 +142,7 @@ int smpi_coll_tuned_reduce_scatter_mpich_pair(void *sendbuf, void *recvbuf, int } xbt_free(disps); - xbt_free(tmp_recvbuf); + smpi_free_tmp_buffer(tmp_recvbuf); return MPI_SUCCESS; } @@ -186,8 +186,8 @@ int smpi_coll_tuned_reduce_scatter_mpich_noncomm(void *sendbuf, void *recvbuf, i block_size = recvcounts[0]; total_count = block_size * comm_size; - tmp_buf0=( void *)xbt_malloc( true_extent * total_count); - tmp_buf1=( void *)xbt_malloc( true_extent * total_count); + tmp_buf0=( void *)smpi_get_tmp_sendbuffer( true_extent * total_count); + tmp_buf1=( void *)smpi_get_tmp_recvbuffer( true_extent * total_count); void *tmp_buf0_save=tmp_buf0; void *tmp_buf1_save=tmp_buf1; @@ -258,8 +258,8 @@ int smpi_coll_tuned_reduce_scatter_mpich_noncomm(void *sendbuf, void *recvbuf, i result_ptr = (char *)(buf0_was_inout ? tmp_buf0 : tmp_buf1) + recv_offset * true_extent; mpi_errno = smpi_datatype_copy(result_ptr, size, datatype, recvbuf, size, datatype); - xbt_free(tmp_buf0_save); - xbt_free(tmp_buf1_save); + smpi_free_tmp_buffer(tmp_buf0_save); + smpi_free_tmp_buffer(tmp_buf1_save); if (mpi_errno) return(mpi_errno); return MPI_SUCCESS; } @@ -300,13 +300,13 @@ int smpi_coll_tuned_reduce_scatter_mpich_rdb(void *sendbuf, void *recvbuf, int r /* noncommutative and (non-pof2 or block irregular), use recursive doubling. */ /* need to allocate temporary buffer to receive incoming data*/ - tmp_recvbuf= (void *) xbt_malloc( total_count*(max(true_extent,extent))); + tmp_recvbuf= (void *) smpi_get_tmp_recvbuffer( total_count*(max(true_extent,extent))); /* adjust for potential negative lower bound in datatype */ tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb); /* need to allocate another temporary buffer to accumulate results */ - tmp_results = (void *)xbt_malloc( total_count*(max(true_extent,extent))); + tmp_results = (void *)smpi_get_tmp_sendbuffer( total_count*(max(true_extent,extent))); /* adjust for potential negative lower bound in datatype */ tmp_results = (void *)((char*)tmp_results - true_lb); @@ -494,8 +494,8 @@ int smpi_coll_tuned_reduce_scatter_mpich_rdb(void *sendbuf, void *recvbuf, int r if (mpi_errno) return(mpi_errno); xbt_free(disps); - xbt_free(tmp_recvbuf); - xbt_free(tmp_results); + smpi_free_tmp_buffer(tmp_recvbuf); + smpi_free_tmp_buffer(tmp_results); return MPI_SUCCESS; } diff --git a/src/smpi/colls/reduce_scatter-ompi.c b/src/smpi/colls/reduce_scatter-ompi.c index e188c00370..e303d208f3 100644 --- a/src/smpi/colls/reduce_scatter-ompi.c +++ b/src/smpi/colls/reduce_scatter-ompi.c @@ -92,12 +92,7 @@ smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(void *sbuf, } /* Allocate temporary receive buffer. */ -#ifndef WIN32 - if(_xbt_replay_is_active()){ - recv_buf_free = (char*) SMPI_SHARED_MALLOC(buf_size); - }else -#endif - recv_buf_free = (char*) xbt_malloc(buf_size); + recv_buf_free = (char*) smpi_get_tmp_recvbuffer(buf_size); recv_buf = recv_buf_free - lb; if (NULL == recv_buf_free) { @@ -106,12 +101,7 @@ smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(void *sbuf, } /* allocate temporary buffer for results */ -#ifndef WIN32 - if(_xbt_replay_is_active()){ - result_buf_free = (char*) SMPI_SHARED_MALLOC(buf_size); - }else -#endif - result_buf_free = (char*) xbt_malloc(buf_size); + result_buf_free = (char*) smpi_get_tmp_sendbuffer(buf_size); result_buf = result_buf_free - lb; @@ -302,16 +292,9 @@ smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(void *sbuf, cleanup: if (NULL != disps) xbt_free(disps); - if (!_xbt_replay_is_active()){ - if (NULL != recv_buf_free) xbt_free(recv_buf_free); - if (NULL != result_buf_free) xbt_free(result_buf_free); - } -#ifndef WIN32 - else{ - if (NULL != recv_buf_free) SMPI_SHARED_FREE(recv_buf_free); - if (NULL != result_buf_free) SMPI_SHARED_FREE(result_buf_free); - } -#endif + if (NULL != recv_buf_free) smpi_free_tmp_buffer(recv_buf_free); + if (NULL != result_buf_free) smpi_free_tmp_buffer(result_buf_free); + return err; } @@ -431,15 +414,15 @@ smpi_coll_tuned_reduce_scatter_ompi_ring(void *sbuf, void *rbuf, int *rcounts, max_real_segsize = true_extent + (ptrdiff_t)(max_block_count - 1) * extent; - accumbuf_free = (char*)xbt_malloc(true_extent + (ptrdiff_t)(total_count - 1) * extent); + accumbuf_free = (char*)smpi_get_tmp_recvbuffer(true_extent + (ptrdiff_t)(total_count - 1) * extent); if (NULL == accumbuf_free) { ret = -1; line = __LINE__; goto error_hndl; } accumbuf = accumbuf_free - lb; - inbuf_free[0] = (char*)xbt_malloc(max_real_segsize); + inbuf_free[0] = (char*)smpi_get_tmp_sendbuffer(max_real_segsize); if (NULL == inbuf_free[0]) { ret = -1; line = __LINE__; goto error_hndl; } inbuf[0] = inbuf_free[0] - lb; if (size > 2) { - inbuf_free[1] = (char*)xbt_malloc(max_real_segsize); + inbuf_free[1] = (char*)smpi_get_tmp_sendbuffer(max_real_segsize); if (NULL == inbuf_free[1]) { ret = -1; line = __LINE__; goto error_hndl; } inbuf[1] = inbuf_free[1] - lb; } @@ -520,9 +503,9 @@ smpi_coll_tuned_reduce_scatter_ompi_ring(void *sbuf, void *rbuf, int *rcounts, if (ret < 0) { line = __LINE__; goto error_hndl; } if (NULL != displs) xbt_free(displs); - if (NULL != accumbuf_free) xbt_free(accumbuf_free); - if (NULL != inbuf_free[0]) xbt_free(inbuf_free[0]); - if (NULL != inbuf_free[1]) xbt_free(inbuf_free[1]); + if (NULL != accumbuf_free) smpi_free_tmp_buffer(accumbuf_free); + if (NULL != inbuf_free[0]) smpi_free_tmp_buffer(inbuf_free[0]); + if (NULL != inbuf_free[1]) smpi_free_tmp_buffer(inbuf_free[1]); return MPI_SUCCESS; @@ -530,9 +513,9 @@ smpi_coll_tuned_reduce_scatter_ompi_ring(void *sbuf, void *rbuf, int *rcounts, XBT_DEBUG( "%s:%4d\tRank %d Error occurred %d\n", __FILE__, line, rank, ret); if (NULL != displs) xbt_free(displs); - if (NULL != accumbuf_free) xbt_free(accumbuf_free); - if (NULL != inbuf_free[0]) xbt_free(inbuf_free[0]); - if (NULL != inbuf_free[1]) xbt_free(inbuf_free[1]); + if (NULL != accumbuf_free) smpi_free_tmp_buffer(accumbuf_free); + if (NULL != inbuf_free[0]) smpi_free_tmp_buffer(inbuf_free[0]); + if (NULL != inbuf_free[1]) smpi_free_tmp_buffer(inbuf_free[1]); return ret; } diff --git a/src/smpi/colls/scatter-mvapich-two-level.c b/src/smpi/colls/scatter-mvapich-two-level.c index 7dcf2e5358..4cbc64a97c 100644 --- a/src/smpi/colls/scatter-mvapich-two-level.c +++ b/src/smpi/colls/scatter-mvapich-two-level.c @@ -108,7 +108,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf, if (local_rank == 0) { /* Node leader, allocate tmp_buffer */ - tmp_buf = xbt_malloc(nbytes * local_size); + tmp_buf = smpi_get_tmp_sendbuffer(nbytes * local_size); } leader_comm = smpi_comm_get_leaders_comm(comm); @@ -123,7 +123,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf, && (leader_of_root == rank)) { /* The root of the scatter operation is not the node leader. Recv * data from the node leader */ - leader_scatter_buf = xbt_malloc(nbytes * comm_size); + leader_scatter_buf = smpi_get_tmp_sendbuffer(nbytes * comm_size); smpi_mpi_recv(leader_scatter_buf, nbytes * comm_size, MPI_BYTE, root, COLL_TAG_SCATTER, comm, &status); @@ -216,9 +216,9 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf, /* check if multiple threads are calling this collective function */ if (comm_size != local_size && local_rank == 0) { - xbt_free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); if (leader_of_root == rank && root != rank) { - xbt_free(leader_scatter_buf); + smpi_free_tmp_buffer(leader_scatter_buf); } } return (mpi_errno); @@ -292,7 +292,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_binomial(void *sendbuf, if (local_rank == 0) { /* Node leader, allocate tmp_buffer */ - tmp_buf = xbt_malloc(nbytes * local_size); + tmp_buf = smpi_get_tmp_sendbuffer(nbytes * local_size); } leader_comm = smpi_comm_get_leaders_comm(comm); int* leaders_map = smpi_comm_get_leaders_map(comm); @@ -306,7 +306,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_binomial(void *sendbuf, && (leader_of_root == rank)) { /* The root of the scatter operation is not the node leader. Recv * data from the node leader */ - leader_scatter_buf = xbt_malloc(nbytes * comm_size); + leader_scatter_buf = smpi_get_tmp_sendbuffer(nbytes * comm_size); smpi_mpi_recv(leader_scatter_buf, nbytes * comm_size, MPI_BYTE, root, COLL_TAG_SCATTER, comm, &status); } @@ -399,9 +399,9 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_binomial(void *sendbuf, /* check if multiple threads are calling this collective function */ if (comm_size != local_size && local_rank == 0) { - xbt_free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); if (leader_of_root == rank && root != rank) { - xbt_free(leader_scatter_buf); + smpi_free_tmp_buffer(leader_scatter_buf); } } diff --git a/src/smpi/colls/smpi_mvapich2_selector.c b/src/smpi/colls/smpi_mvapich2_selector.c index c3a6022f6d..91e40d3dad 100644 --- a/src/smpi/colls/smpi_mvapich2_selector.c +++ b/src/smpi/colls/smpi_mvapich2_selector.c @@ -3,7 +3,7 @@ /* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team. * All rights reserved. */ -/* This program is xbt_free software; you can redistribute it and/or modify it +/* This program is free software; you can redistribute it and/or modify it * under the terms of the license (GNU LGPL) which comes with this package. */ #include "colls_private.h" @@ -61,7 +61,7 @@ int smpi_coll_tuned_alltoall_mvapich2( void *sendbuf, int sendcount, mv2_alltoall_thresholds_table[conf_index][range].in_place_algo_table[range_threshold].min ||nbytes > mv2_alltoall_thresholds_table[conf_index][range].in_place_algo_table[range_threshold].max ) { - tmp_buf = (char *)xbt_malloc( comm_size * recvcount * recvtype_size ); + tmp_buf = (char *)smpi_get_tmp_sendbuffer( comm_size * recvcount * recvtype_size ); mpi_errno = smpi_datatype_copy((char *)recvbuf, comm_size*recvcount, recvtype, (char *)tmp_buf, @@ -70,7 +70,7 @@ int smpi_coll_tuned_alltoall_mvapich2( void *sendbuf, int sendcount, mpi_errno = MV2_Alltoall_function(tmp_buf, recvcount, recvtype, recvbuf, recvcount, recvtype, comm ); - xbt_free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); } else { mpi_errno = MPIR_Alltoall_inplace_MV2(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, @@ -604,7 +604,7 @@ int smpi_coll_tuned_bcast_mvapich2(void *buffer, #endif if (two_level_bcast == 1) { if (!is_contig || !is_homogeneous) { - tmp_buf=(void *)xbt_malloc(nbytes); + tmp_buf=(void *)smpi_get_tmp_sendbuffer(nbytes); /* position = 0;*/ /* if (rank == root) {*/ diff --git a/src/smpi/private.h b/src/smpi/private.h index a4a62f985a..29f0d48f1e 100644 --- a/src/smpi/private.h +++ b/src/smpi/private.h @@ -414,6 +414,11 @@ void smpi_bench_destroy(void); void smpi_bench_begin(void); void smpi_bench_end(void); +void* smpi_get_tmp_sendbuffer(int size); +void* smpi_get_tmp_recvbuffer(int size); +void smpi_free_tmp_buffer(void* buf); + + // f77 wrappers void mpi_init_(int*); diff --git a/src/smpi/smpi_base.c b/src/smpi/smpi_base.c index 6ef6af245e..035c5eface 100644 --- a/src/smpi/smpi_base.c +++ b/src/smpi/smpi_base.c @@ -1136,12 +1136,13 @@ void smpi_mpi_reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts, displs[i] = count; count += recvcounts[i]; } - tmpbuf=(void*)xbt_malloc(count*smpi_datatype_get_extent(datatype)); + tmpbuf=(void*)smpi_get_tmp_sendbuffer(count*smpi_datatype_get_extent(datatype)); + mpi_coll_reduce_fun(sendbuf, tmpbuf, count, datatype, op, 0, comm); smpi_mpi_scatterv(tmpbuf, recvcounts, displs, datatype, recvbuf, recvcounts[rank], datatype, 0, comm); xbt_free(displs); - xbt_free(tmpbuf); + smpi_free_tmp_buffer(tmpbuf); } void smpi_mpi_gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, @@ -1402,7 +1403,10 @@ void smpi_mpi_reduce(void *sendbuf, void *recvbuf, int count, if(src != root) { // FIXME: possibly overkill we we have contiguous/noncontiguous data // mapping... - tmpbufs[index] = xbt_malloc(count * dataext); + if (!_xbt_replay_is_active()) + tmpbufs[index] = xbt_malloc(count * dataext); + else + tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); requests[index] = smpi_irecv_init(tmpbufs[index], count, datatype, src, system_tag, comm); @@ -1422,9 +1426,9 @@ void smpi_mpi_reduce(void *sendbuf, void *recvbuf, int count, if(op) /* op can be MPI_OP_NULL that does nothing */ smpi_op_apply(op, tmpbufs[index], recvbuf, &count, &datatype); } - for(index = 0; index < size - 1; index++) { - xbt_free(tmpbufs[index]); - } + for(index = 0; index < size - 1; index++) { + smpi_free_tmp_buffer(tmpbufs[index]); + } xbt_free(tmpbufs); xbt_free(requests); diff --git a/src/smpi/smpi_replay.c b/src/smpi/smpi_replay.c index b95afb3844..9b5ff76cc8 100644 --- a/src/smpi/smpi_replay.c +++ b/src/smpi/smpi_replay.c @@ -32,7 +32,9 @@ static void log_timed_action (const char *const *action, double clock){ } //allocate a single buffer for all sends, growing it if needed -static void* get_sendbuffer(int size){ +void* smpi_get_tmp_sendbuffer(int size){ + if (!_xbt_replay_is_active()) + return xbt_malloc(size); if (sendbuffer_size