TRACE_smpi_comm_in(rank, request == MPI_REQUEST_IGNORED ? "PMPI_Barrier" : "PMPI_Ibarrier",
new simgrid::instr::NoOpTIData(request == MPI_REQUEST_IGNORED ? "barrier" : "ibarrier"));
if (request == MPI_REQUEST_IGNORED) {
- simgrid::smpi::Colls::barrier(comm);
+ simgrid::smpi::colls::barrier(comm);
// Barrier can be used to synchronize RMA calls. Finish all requests from comm before.
comm->finish_rma_calls();
} else
- simgrid::smpi::Colls::ibarrier(comm, request);
+ simgrid::smpi::colls::ibarrier(comm, request);
TRACE_smpi_comm_out(rank);
smpi_bench_begin();
simgrid::smpi::Datatype::encode(datatype), ""));
if (comm->size() > 1) {
if (request == MPI_REQUEST_IGNORED)
- simgrid::smpi::Colls::bcast(buf, count, datatype, root, comm);
+ simgrid::smpi::colls::bcast(buf, count, datatype, root, comm);
else
- simgrid::smpi::Colls::ibcast(buf, count, datatype, root, comm, request);
+ simgrid::smpi::colls::ibcast(buf, count, datatype, root, comm, request);
} else {
if (request != MPI_REQUEST_IGNORED)
*request = MPI_REQUEST_NULL;
(comm->rank() != root || recvtype->is_replayable()) ? recvcount : recvcount * recvtype->size(),
simgrid::smpi::Datatype::encode(real_sendtype), simgrid::smpi::Datatype::encode(recvtype)));
if (request == MPI_REQUEST_IGNORED)
- simgrid::smpi::Colls::gather(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype, root, comm);
+ simgrid::smpi::colls::gather(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype, root, comm);
else
- simgrid::smpi::Colls::igather(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype, root, comm,
+ simgrid::smpi::colls::igather(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype, root, comm,
request);
TRACE_smpi_comm_out(rank);
nullptr, dt_size_recv, trace_recvcounts, simgrid::smpi::Datatype::encode(real_sendtype),
simgrid::smpi::Datatype::encode(recvtype)));
if (request == MPI_REQUEST_IGNORED)
- simgrid::smpi::Colls::gatherv(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcounts, displs, recvtype,
+ simgrid::smpi::colls::gatherv(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcounts, displs, recvtype,
root, comm);
else
- simgrid::smpi::Colls::igatherv(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcounts, displs, recvtype,
+ simgrid::smpi::colls::igatherv(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcounts, displs, recvtype,
root, comm, request);
TRACE_smpi_comm_out(rank);
recvtype->is_replayable() ? recvcount : recvcount * recvtype->size(),
simgrid::smpi::Datatype::encode(sendtype), simgrid::smpi::Datatype::encode(recvtype)));
if (request == MPI_REQUEST_IGNORED)
- simgrid::smpi::Colls::allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
+ simgrid::smpi::colls::allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
else
- simgrid::smpi::Colls::iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request);
+ simgrid::smpi::colls::iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request);
TRACE_smpi_comm_out(rank);
smpi_bench_begin();
dt_size_recv, trace_recvcounts, simgrid::smpi::Datatype::encode(sendtype),
simgrid::smpi::Datatype::encode(recvtype)));
if (request == MPI_REQUEST_IGNORED)
- simgrid::smpi::Colls::allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm);
+ simgrid::smpi::colls::allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm);
else
- simgrid::smpi::Colls::iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm,
+ simgrid::smpi::colls::iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm,
request);
TRACE_smpi_comm_out(rank);
recvtype->is_replayable() ? recvcount : recvcount * recvtype->size(),
simgrid::smpi::Datatype::encode(sendtype), simgrid::smpi::Datatype::encode(recvtype)));
if (request == MPI_REQUEST_IGNORED)
- simgrid::smpi::Colls::scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm);
+ simgrid::smpi::colls::scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm);
else
- simgrid::smpi::Colls::iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request);
+ simgrid::smpi::colls::iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request);
TRACE_smpi_comm_out(rank);
smpi_bench_begin();
nullptr, simgrid::smpi::Datatype::encode(sendtype),
simgrid::smpi::Datatype::encode(recvtype)));
if (request == MPI_REQUEST_IGNORED)
- simgrid::smpi::Colls::scatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm);
+ simgrid::smpi::colls::scatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm);
else
- simgrid::smpi::Colls::iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm,
+ simgrid::smpi::colls::iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm,
request);
TRACE_smpi_comm_out(rank);
datatype->is_replayable() ? count : count * datatype->size(), -1,
simgrid::smpi::Datatype::encode(datatype), ""));
if (request == MPI_REQUEST_IGNORED)
- simgrid::smpi::Colls::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
+ simgrid::smpi::colls::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
else
- simgrid::smpi::Colls::ireduce(sendbuf, recvbuf, count, datatype, op, root, comm, request);
+ simgrid::smpi::colls::ireduce(sendbuf, recvbuf, count, datatype, op, root, comm, request);
TRACE_smpi_comm_out(rank);
smpi_bench_begin();
simgrid::smpi::Datatype::encode(datatype), ""));
if (request == MPI_REQUEST_IGNORED)
- simgrid::smpi::Colls::allreduce(real_sendbuf, recvbuf, count, datatype, op, comm);
+ simgrid::smpi::colls::allreduce(real_sendbuf, recvbuf, count, datatype, op, comm);
else
- simgrid::smpi::Colls::iallreduce(real_sendbuf, recvbuf, count, datatype, op, comm, request);
+ simgrid::smpi::colls::iallreduce(real_sendbuf, recvbuf, count, datatype, op, comm, request);
TRACE_smpi_comm_out(rank);
smpi_bench_begin();
int retval;
if (request == MPI_REQUEST_IGNORED)
- retval = simgrid::smpi::Colls::scan(real_sendbuf, recvbuf, count, datatype, op, comm);
+ retval = simgrid::smpi::colls::scan(real_sendbuf, recvbuf, count, datatype, op, comm);
else
- retval = simgrid::smpi::Colls::iscan(real_sendbuf, recvbuf, count, datatype, op, comm, request);
+ retval = simgrid::smpi::colls::iscan(real_sendbuf, recvbuf, count, datatype, op, comm, request);
TRACE_smpi_comm_out(rank);
smpi_bench_begin();
int retval;
if (request == MPI_REQUEST_IGNORED)
- retval = simgrid::smpi::Colls::exscan(real_sendbuf, recvbuf, count, datatype, op, comm);
+ retval = simgrid::smpi::colls::exscan(real_sendbuf, recvbuf, count, datatype, op, comm);
else
- retval = simgrid::smpi::Colls::iexscan(real_sendbuf, recvbuf, count, datatype, op, comm, request);
+ retval = simgrid::smpi::colls::iexscan(real_sendbuf, recvbuf, count, datatype, op, comm, request);
TRACE_smpi_comm_out(rank);
smpi_bench_begin();
-1, trace_recvcounts, simgrid::smpi::Datatype::encode(datatype), ""));
if (request == MPI_REQUEST_IGNORED)
- simgrid::smpi::Colls::reduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm);
+ simgrid::smpi::colls::reduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm);
else
- simgrid::smpi::Colls::ireduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm, request);
+ simgrid::smpi::colls::ireduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm, request);
TRACE_smpi_comm_out(rank);
smpi_bench_begin();
for (int i = 0; i < count; i++)
recvcounts[i] = recvcount;
if (request == MPI_REQUEST_IGNORED)
- simgrid::smpi::Colls::reduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm);
+ simgrid::smpi::colls::reduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm);
else
- simgrid::smpi::Colls::ireduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm, request);
+ simgrid::smpi::colls::ireduce_scatter(real_sendbuf, recvbuf, recvcounts, datatype, op, comm, request);
delete[] recvcounts;
TRACE_smpi_comm_out(rank);
int retval;
if (request == MPI_REQUEST_IGNORED)
retval =
- simgrid::smpi::Colls::alltoall(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype, comm);
+ simgrid::smpi::colls::alltoall(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype, comm);
else
- retval = simgrid::smpi::Colls::ialltoall(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype,
+ retval = simgrid::smpi::colls::ialltoall(real_sendbuf, real_sendcount, real_sendtype, recvbuf, recvcount, recvtype,
comm, request);
TRACE_smpi_comm_out(rank);
int retval;
if (request == MPI_REQUEST_IGNORED)
- retval = simgrid::smpi::Colls::alltoallv(real_sendbuf, real_sendcounts, real_senddisps, real_sendtype, recvbuf,
+ retval = simgrid::smpi::colls::alltoallv(real_sendbuf, real_sendcounts, real_senddisps, real_sendtype, recvbuf,
recvcounts, recvdisps, recvtype, comm);
else
- retval = simgrid::smpi::Colls::ialltoallv(real_sendbuf, real_sendcounts, real_senddisps, real_sendtype, recvbuf,
+ retval = simgrid::smpi::colls::ialltoallv(real_sendbuf, real_sendcounts, real_senddisps, real_sendtype, recvbuf,
recvcounts, recvdisps, recvtype, comm, request);
TRACE_smpi_comm_out(rank);
int retval;
if (request == MPI_REQUEST_IGNORED)
- retval = simgrid::smpi::Colls::alltoallw(real_sendbuf, real_sendcounts, real_senddisps, real_sendtypes, recvbuf,
+ retval = simgrid::smpi::colls::alltoallw(real_sendbuf, real_sendcounts, real_senddisps, real_sendtypes, recvbuf,
recvcounts, recvdisps, recvtypes, comm);
else
- retval = simgrid::smpi::Colls::ialltoallw(real_sendbuf, real_sendcounts, real_senddisps, real_sendtypes, recvbuf,
+ retval = simgrid::smpi::colls::ialltoallw(real_sendbuf, real_sendcounts, real_senddisps, real_sendtypes, recvbuf,
recvcounts, recvdisps, recvtypes, comm, request);
TRACE_smpi_comm_out(rank);
if(ptr==nullptr)
return MPI_ERR_NO_MEM;
}
-
- simgrid::smpi::Colls::bcast(&ptr, sizeof(void*), MPI_BYTE, 0, comm);
- simgrid::smpi::Colls::barrier(comm);
-
+
+ simgrid::smpi::colls::bcast(&ptr, sizeof(void*), MPI_BYTE, 0, comm);
+ simgrid::smpi::colls::barrier(comm);
+
*static_cast<void**>(base) = (char*)ptr+rank*size;
*win = new simgrid::smpi::Win( ptr, size, disp_unit, info, comm,rank==0);
retval = MPI_SUCCESS;
{
int num_procs;
num_procs = comm->size();
- Colls::gather(send_buff, send_count, send_type, recv_buff, recv_count, recv_type,
- 0, comm);
- Colls::bcast(recv_buff, (recv_count * num_procs), recv_type, 0, comm);
+ colls::gather(send_buff, send_count, send_type, recv_buff, recv_count, recv_type, 0, comm);
+ colls::bcast(recv_buff, (recv_count * num_procs), recv_type, 0, comm);
return MPI_SUCCESS;
}
/*If there is just one node, after gather itself,
* root has all the data and it can do bcast*/
if(local_rank == 0) {
- mpi_errno = Colls::gather(sendbuf, sendcnt,sendtype,
- (void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)),
- recvcnt, recvtype,
- 0, shmem_comm);
+ mpi_errno =
+ colls::gather(sendbuf, sendcnt, sendtype, (void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)),
+ recvcnt, recvtype, 0, shmem_comm);
} else {
/*Since in allgather all the processes could have
* its own data in place*/
if(sendbuf == MPI_IN_PLACE) {
- mpi_errno = Colls::gather((void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)),
- recvcnt , recvtype,
- recvbuf, recvcnt, recvtype,
- 0, shmem_comm);
+ mpi_errno = colls::gather((void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)), recvcnt, recvtype,
+ recvbuf, recvcnt, recvtype, 0, shmem_comm);
} else {
- mpi_errno = Colls::gather(sendbuf, sendcnt,sendtype,
- recvbuf, recvcnt, recvtype,
- 0, shmem_comm);
+ mpi_errno = colls::gather(sendbuf, sendcnt, sendtype, recvbuf, recvcnt, recvtype, 0, shmem_comm);
}
}
/* Exchange the data between the node leaders*/
void* sendbuf=((char*)recvbuf)+recvtype->get_extent()*displs[leader_comm->rank()];
- mpi_errno = Colls::allgatherv(sendbuf,
- (recvcnt*local_size),
- recvtype,
- recvbuf, recvcnts,
- displs, recvtype,
- leader_comm);
+ mpi_errno = colls::allgatherv(sendbuf, (recvcnt * local_size), recvtype, recvbuf, recvcnts, displs,
+ recvtype, leader_comm);
delete[] displs;
delete[] recvcnts;
} else {
}
/*Bcast the entire data from node leaders to all other cores*/
- mpi_errno = Colls::bcast (recvbuf, recvcnt * size, recvtype, 0, shmem_comm);
+ mpi_errno = colls::bcast(recvbuf, recvcnt * size, recvtype, 0, shmem_comm);
return mpi_errno;
}
const int *recv_counts, const int *recv_disps, MPI_Datatype recv_type,
MPI_Comm comm)
{
- Colls::gatherv(send_buff, send_count, send_type, recv_buff, recv_counts, recv_disps, recv_type, 0, comm);
+ colls::gatherv(send_buff, send_count, send_type, recv_buff, recv_counts, recv_disps, recv_type, 0, comm);
int num_procs, i, current, max = 0;
num_procs = comm->size();
for (i = 0; i < num_procs; i++) {
if (current > max)
max = current;
}
- Colls::bcast(recv_buff, max, recv_type, 0, comm);
+ colls::bcast(recv_buff, max, recv_type, 0, comm);
return MPI_SUCCESS;
}
/* when communication size is not divisible by number of process:
call the native implementation for the remain chunk at the end of the operation */
if (remainder_flag) {
- return Colls::allreduce((char *) sbuf + remainder_offset,
- (char *) rbuf + remainder_offset, remainder, dtype, op,
- comm);
+ return colls::allreduce((char*)sbuf + remainder_offset, (char*)rbuf + remainder_offset, remainder, dtype, op, comm);
}
return 0;
MPI_Datatype datatype,
MPI_Op op, MPI_Comm comm)
{
- Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm);
+ colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
return MPI_SUCCESS;
}
MPI_Datatype datatype,
MPI_Op op, MPI_Comm comm)
{
- Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm);
+ colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
return MPI_SUCCESS;
}
/* Broadcasting the mesage from leader to the rest */
/* Note: shared memory broadcast could improve the performance */
- mpi_errno = Colls::bcast(recvbuf, count, datatype, 0, shmem_comm);
+ mpi_errno = colls::bcast(recvbuf, count, datatype, 0, shmem_comm);
return (mpi_errno);
}
memcpy(tmp_buf, recv + recv_idx * extent, recv_cnt * extent);
- Colls::allgather(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm);
+ colls::allgather(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm);
memcpy(rbuff, recv, count * extent);
smpi_free_tmp_buffer(recv);
}
memcpy(tmp_buf, (char *) rbuff + recv_idx * extent, recv_cnt * extent);
- Colls::allgather(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm);
+ colls::allgather(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm);
smpi_free_tmp_buffer(tmp_buf);
}
memcpy(send, sbuff, s_extent * count);
- Colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm);
+ colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm);
memcpy(tmp, recv, nbytes);
if (op != MPI_OP_NULL)
op->apply(recv + s_offset, tmp, &send_size, dtype);
- Colls::allgather(tmp, send_size, dtype, recv, send_size, dtype, comm);
+ colls::allgather(tmp, send_size, dtype, recv, send_size, dtype, comm);
memcpy(rbuff, recv, count * s_extent);
smpi_free_tmp_buffer(recv);
unsigned char* recv = smpi_get_tmp_recvbuffer(s_extent * send_size * nprocs);
- Colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm);
+ colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm);
memcpy((char *) rbuff + r_offset, recv, nbytes);
if (op != MPI_OP_NULL)
op->apply(recv + s_offset, static_cast<char*>(rbuff) + r_offset, &send_size, dtype);
- Colls::allgather((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size,
- dtype, comm);
+ colls::allgather((char*)rbuff + r_offset, send_size, dtype, rbuff, send_size, dtype, comm);
smpi_free_tmp_buffer(recv);
}
MPI_Datatype datatype, MPI_Op op,
MPI_Comm comm)
{
- Colls::reduce(buf, buf2, count, datatype, op, 0, comm);
- Colls::bcast(buf2, count, datatype, 0, comm);
+ colls::reduce(buf, buf2, count, datatype, op, 0, comm);
+ colls::bcast(buf2, count, datatype, 0, comm);
return MPI_SUCCESS;
}
}
for (i = 0; i < num_procs; i++) {
src = dst = rank ^ i;
- Colls::barrier(comm);
+ colls::barrier(comm);
Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
src, tag, comm, &s);
send_chunk *= send_count;
recv_chunk *= recv_count;
- Colls::barrier(comm);
+ colls::barrier(comm);
for (i = 0; i < num_procs; i++) {
src = dst = rank ^ i;
Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
src = (rank - i + num_procs) % num_procs;
dst = (rank + i) % num_procs;
- Colls::barrier(comm);
+ colls::barrier(comm);
Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
src, tag, comm, &s);
send_chunk *= send_count;
recv_chunk *= recv_count;
- Colls::barrier(comm);
+ colls::barrier(comm);
for (i = 0; i < num_procs; i++) {
src = (rank - i + num_procs) % num_procs;
dst = (rank + i) % num_procs;
count++;
}
/* Wait for them all. */
- // Colls::startall(count, requests);
+ // colls::startall(count, requests);
XBT_DEBUG("<%d> wait for %d requests", rank, count);
Request::waitall(count, requests, MPI_STATUSES_IGNORE);
delete[] requests;
for (i = 0; i < num_procs; i++) {
src = dst = rank ^ i;
- Colls::barrier(comm);
+ colls::barrier(comm);
Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst,
tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type,
src, tag, comm, &s);
send_chunk = send_type->get_extent();
recv_chunk = recv_type->get_extent();
- Colls::barrier(comm);
+ colls::barrier(comm);
for (i = 0; i < num_procs; i++) {
src = dst = rank ^ i;
Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst,
src = (rank - i + num_procs) % num_procs;
dst = (rank + i) % num_procs;
- Colls::barrier(comm);
+ colls::barrier(comm);
Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst,
tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type,
src, tag, comm, &s);
send_chunk = send_type->get_extent();
recv_chunk = recv_type->get_extent();
- Colls::barrier(comm);
+ colls::barrier(comm);
for (i = 0; i < num_procs; i++) {
src = (rank - i + num_procs) % num_procs;
dst = (rank + i) % num_procs;
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
XBT_WARN("MPI_bcast_NTSB use default MPI_bcast.");
- Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype,
- root, comm);
+ colls::bcast((char*)buf + (pipe_length * increment), remainder, datatype, root, comm);
}
return MPI_SUCCESS;
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
XBT_WARN("MPI_bcast_NTSL_Isend_nb use default MPI_bcast.");
- Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype,
- root, comm);
+ colls::bcast((char*)buf + (pipe_length * increment), remainder, datatype, root, comm);
}
return MPI_SUCCESS;
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
XBT_WARN("MPI_bcast_arrival_NTSL use default MPI_bcast.");
- Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype,
- root, comm);
+ colls::bcast((char*)buf + (pipe_length * increment), remainder, datatype, root, comm);
}
return MPI_SUCCESS;
// when count is not divisible by block size, use default BCAST for the remainder
if ((remainder != 0) && (count > segment)) {
XBT_WARN("MPI_bcast_SMP_binary use default MPI_bcast.");
- Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype,
- root, comm);
+ colls::bcast((char*)buf + (pipe_length * increment), remainder, datatype, root, comm);
}
return 1;
// when count is not divisible by block size, use default BCAST for the remainder
if ((remainder != 0) && (count > segment)) {
XBT_WARN("MPI_bcast_SMP_linear use default MPI_bcast.");
- Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype,
- root, comm);
+ colls::bcast((char*)buf + (pipe_length * increment), remainder, datatype, root, comm);
}
return MPI_SUCCESS;
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
XBT_WARN("MPI_bcast_arrival_pattern_aware_wait use default MPI_bcast.");
- Colls::bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
+ colls::bcast((char*)buf + (pipe_length * increment), remainder, datatype, root, comm);
}
return MPI_SUCCESS;
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
XBT_WARN("MPI_bcast_arrival_pattern_aware use default MPI_bcast.");
- Colls::bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
+ colls::bcast((char*)buf + (pipe_length * increment), remainder, datatype, root, comm);
}
return MPI_SUCCESS;
/* message too small */
if (count < size) {
XBT_WARN("MPI_bcast_arrival_scatter use default MPI_bcast.");
- Colls::bcast(buf, count, datatype, root, comm);
+ colls::bcast(buf, count, datatype, root, comm);
return MPI_SUCCESS;
}
recvcnts[i] = node_sizes[i] * recvcnt;
}
}
- Colls::gatherv(tmp_buf, local_size * nbytes, MPI_BYTE, recvbuf, recvcnts, displs, recvtype, leader_root,
+ colls::gatherv(tmp_buf, local_size * nbytes, MPI_BYTE, recvbuf, recvcnts, displs, recvtype, leader_root,
leader_comm);
} else {
/* The root of the gather operation is not the node leader.
recvcnts[i] = node_sizes[i] * nbytes;
}
}
- Colls::gatherv(tmp_buf, local_size * nbytes, MPI_BYTE, leader_gather_buf, recvcnts, displs, MPI_BYTE,
+ colls::gatherv(tmp_buf, local_size * nbytes, MPI_BYTE, leader_gather_buf, recvcnts, displs, MPI_BYTE,
leader_root, leader_comm);
}
if (leader_comm_rank == leader_root) {
Exa.: size=13 ==> n=3, r=5 (i.e. size == 13 == 2**n+r == 2**3 + 5)
- The algorithm needs for the execution of one Colls::reduce
+ The algorithm needs for the execution of one colls::reduce
- for r==0
exec_time = n*(L1+L2) + buf_lng * (1-1/2**n) * (T1 + T2 + O/d)
7: { [(a+b)+(c+d)] + [(e+f)+(g+h)] } + { [(i+j)+k] + [l+m] } for H
-For Colls::allreduce:
+For colls::allreduce:
------------------
Step 6.1)
on all nodes 0..12
-For Colls::reduce:
+For colls::reduce:
---------------
Step 6.0)
} /* new_prot */
/*otherwise:*/
if (is_all)
- return( Colls::allreduce(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, comm) );
+ return (colls::allreduce(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, comm));
else
- return( Colls::reduce(Sendbuf,Recvbuf, count,mpi_datatype,mpi_op, root, comm) );
+ return (colls::reduce(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, root, comm));
}
#endif /*REDUCE_LIMITS*/
sendcnts[i] = node_sizes[i] * nbytes;
}
}
- Colls::scatterv(leader_scatter_buf, sendcnts, displs, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE,
+ colls::scatterv(leader_scatter_buf, sendcnts, displs, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE,
leader_root, leader_comm);
} else {
if (leader_comm_rank == leader_root) {
sendcnts[i] = node_sizes[i] * sendcnt;
}
}
- Colls::scatterv(sendbuf, sendcnts, displs, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root,
+ colls::scatterv(sendbuf, sendcnts, displs, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root,
leader_comm);
}
if (leader_comm_rank == leader_root) {
sendcnts[i] = node_sizes[i] * nbytes;
}
}
- Colls::scatterv(leader_scatter_buf, sendcnts, displs, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE,
+ colls::scatterv(leader_scatter_buf, sendcnts, displs, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE,
leader_root, leader_comm);
} else {
if (leader_comm_rank == leader_root) {
sendcnts[i] = node_sizes[i] * sendcnt;
}
}
- Colls::scatterv(sendbuf, sendcnts, displs, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root,
+ colls::scatterv(sendbuf, sendcnts, displs, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root,
leader_comm);
}
if (leader_comm_rank == leader_root) {
#include "src/smpi/include/smpi_actor.hpp"
//attempt to do a quick autotuning version of the collective,
-#define TRACE_AUTO_COLL(cat) \
- if (TRACE_is_enabled()) { \
- simgrid::instr::EventType* type = \
- simgrid::instr::Container::get_root()->type_->by_name_or_create<simgrid::instr::EventType>( \
- _XBT_STRINGIFY(cat)); \
- \
- std::string cont_name = std::string("rank-" + std::to_string(simgrid::s4u::this_actor::get_pid())); \
- type->add_entity_value(Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[i].name, "1.0 1.0 1.0"); \
- new simgrid::instr::NewEvent(SIMIX_get_clock(), simgrid::instr::Container::by_name(cont_name), type, \
- type->get_entity_value(Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[i].name)); \
- }
-
#define AUTOMATIC_COLL_BENCH(cat, ret, args, args2) \
ret _XBT_CONCAT2(cat, __automatic)(COLL_UNPAREN args) \
{ \
double time1, time2, time_min = DBL_MAX; \
int min_coll = -1, global_coll = -1; \
- int i; \
+ int i = 0; \
double buf_in, buf_out, max_min = DBL_MAX; \
- for (i = 0; not Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[i].name.empty(); i++) { \
- if (Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[i].name == "automatic") \
- continue; \
- if (Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[i].name == "default") \
- continue; \
+ auto desc = simgrid::smpi::colls::get_smpi_coll_description(_XBT_STRINGIFY(cat), i); \
+ while (not desc->name.empty()) { \
+ if (desc->name == "automatic") \
+ goto next_iteration; \
+ if (desc->name == "default") \
+ goto next_iteration; \
barrier__default(comm); \
- TRACE_AUTO_COLL(cat) \
+ if (TRACE_is_enabled()) { \
+ simgrid::instr::EventType* type = \
+ simgrid::instr::Container::get_root()->type_->by_name_or_create<simgrid::instr::EventType>( \
+ _XBT_STRINGIFY(cat)); \
+ \
+ std::string cont_name = std::string("rank-" + std::to_string(simgrid::s4u::this_actor::get_pid())); \
+ type->add_entity_value(desc->name, "1.0 1.0 1.0"); \
+ new simgrid::instr::NewEvent(SIMIX_get_clock(), simgrid::instr::Container::by_name(cont_name), type, \
+ type->get_entity_value(desc->name)); \
+ } \
time1 = SIMIX_get_clock(); \
try { \
- ((int(*) args)Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[i].coll) args2; \
+ ((int(*) args)desc->coll) args2; \
} catch (std::exception & ex) { \
continue; \
} \
global_coll = i; \
} \
} \
+ next_iteration: \
+ i++; \
+ desc = simgrid::smpi::colls::get_smpi_coll_description(_XBT_STRINGIFY(cat), i); \
} \
if (comm->rank() == 0) { \
XBT_WARN("For rank 0, the quickest was %s : %f , but global was %s : %f at max", \
- Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[min_coll].name.c_str(), time_min, \
- Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[global_coll].name.c_str(), max_min); \
+ simgrid::smpi::colls::get_smpi_coll_description(_XBT_STRINGIFY(cat), min_coll)->name.c_str(), time_min, \
+ simgrid::smpi::colls::get_smpi_coll_description(_XBT_STRINGIFY(cat), global_coll)->name.c_str(), \
+ max_min); \
} else \
XBT_WARN("The quickest " _XBT_STRINGIFY(cat) " was %s on rank %d and took %f", \
- Colls::_XBT_CONCAT3(mpi_coll_, cat, _description)[min_coll].name.c_str(), comm->rank(), time_min); \
+ simgrid::smpi::colls::get_smpi_coll_description(_XBT_STRINGIFY(cat), min_coll)->name.c_str(), \
+ comm->rank(), time_min); \
return (min_coll != -1) ? MPI_SUCCESS : MPI_ERR_INTERN; \
}
XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_coll, smpi, "Logging specific to SMPI collectives.");
#define COLL_SETTER(cat, ret, args, args2) \
- int(*Colls::cat) args; \
- void Colls::_XBT_CONCAT(set_, cat)(const std::string& name) \
+ void colls::_XBT_CONCAT(set_, cat)(const std::string& name) \
{ \
int id = find_coll_description(_XBT_CONCAT3(mpi_coll_, cat, _description), name, _XBT_STRINGIFY(cat)); \
cat = reinterpret_cast<ret(*) args>(_XBT_CONCAT3(mpi_coll_, cat, _description)[id].coll); \
namespace simgrid{
namespace smpi{
-void (*Colls::smpi_coll_cleanup_callback)();
-
/* these arrays must be nullptr terminated */
-s_mpi_coll_description_t Colls::mpi_coll_gather_description[] = {
- COLL_GATHERS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} };
-s_mpi_coll_description_t Colls::mpi_coll_allgather_description[] = {
- COLL_ALLGATHERS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} };
-s_mpi_coll_description_t Colls::mpi_coll_allgatherv_description[] = {
- COLL_ALLGATHERVS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} };
-s_mpi_coll_description_t Colls::mpi_coll_allreduce_description[] ={
- COLL_ALLREDUCES(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} };
-s_mpi_coll_description_t Colls::mpi_coll_reduce_scatter_description[] = {
- COLL_REDUCE_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} };
-s_mpi_coll_description_t Colls::mpi_coll_scatter_description[] ={
- COLL_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} };
-s_mpi_coll_description_t Colls::mpi_coll_barrier_description[] ={
- COLL_BARRIERS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} };
-s_mpi_coll_description_t Colls::mpi_coll_alltoall_description[] = {
- COLL_ALLTOALLS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} };
-s_mpi_coll_description_t Colls::mpi_coll_alltoallv_description[] = {
- COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} };
-s_mpi_coll_description_t Colls::mpi_coll_bcast_description[] = {
- COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} };
-s_mpi_coll_description_t Colls::mpi_coll_reduce_description[] = {
- COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr} };
+s_mpi_coll_description_t mpi_coll_gather_description[] = {COLL_GATHERS(COLL_DESCRIPTION, COLL_COMMA),
+ {"", "", nullptr}};
+s_mpi_coll_description_t mpi_coll_allgather_description[] = {COLL_ALLGATHERS(COLL_DESCRIPTION, COLL_COMMA),
+ {"", "", nullptr}};
+s_mpi_coll_description_t mpi_coll_allgatherv_description[] = {COLL_ALLGATHERVS(COLL_DESCRIPTION, COLL_COMMA),
+ {"", "", nullptr}};
+s_mpi_coll_description_t mpi_coll_allreduce_description[] = {COLL_ALLREDUCES(COLL_DESCRIPTION, COLL_COMMA),
+ {"", "", nullptr}};
+s_mpi_coll_description_t mpi_coll_reduce_scatter_description[] = {COLL_REDUCE_SCATTERS(COLL_DESCRIPTION, COLL_COMMA),
+ {"", "", nullptr}};
+s_mpi_coll_description_t mpi_coll_scatter_description[] = {COLL_SCATTERS(COLL_DESCRIPTION, COLL_COMMA),
+ {"", "", nullptr}};
+s_mpi_coll_description_t mpi_coll_barrier_description[] = {COLL_BARRIERS(COLL_DESCRIPTION, COLL_COMMA),
+ {"", "", nullptr}};
+s_mpi_coll_description_t mpi_coll_alltoall_description[] = {COLL_ALLTOALLS(COLL_DESCRIPTION, COLL_COMMA),
+ {"", "", nullptr}};
+s_mpi_coll_description_t mpi_coll_alltoallv_description[] = {COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA),
+ {"", "", nullptr}};
+s_mpi_coll_description_t mpi_coll_bcast_description[] = {COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), {"", "", nullptr}};
+s_mpi_coll_description_t mpi_coll_reduce_description[] = {COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA),
+ {"", "", nullptr}};
+
+// Needed by the automatic selector weird implementation
+s_mpi_coll_description_t* colls::get_smpi_coll_description(const char* name, int rank)
+{
+ if (strcmp(name, "gather") == 0)
+ return &mpi_coll_gather_description[rank];
+ if (strcmp(name, "allgather") == 0)
+ return &mpi_coll_allgather_description[rank];
+ if (strcmp(name, "allgatherv") == 0)
+ return &mpi_coll_allgatherv_description[rank];
+ if (strcmp(name, "allreduce") == 0)
+ return &mpi_coll_allreduce_description[rank];
+ if (strcmp(name, "reduce_scatter") == 0)
+ return &mpi_coll_reduce_scatter_description[rank];
+ if (strcmp(name, "scatter") == 0)
+ return &mpi_coll_scatter_description[rank];
+ if (strcmp(name, "barrier") == 0)
+ return &mpi_coll_barrier_description[rank];
+ if (strcmp(name, "alltoall") == 0)
+ return &mpi_coll_alltoall_description[rank];
+ if (strcmp(name, "alltoallv") == 0)
+ return &mpi_coll_alltoallv_description[rank];
+ if (strcmp(name, "bcast") == 0)
+ return &mpi_coll_bcast_description[rank];
+ if (strcmp(name, "reduce") == 0)
+ return &mpi_coll_reduce_description[rank];
+ XBT_INFO("You requested an unknown collective: %s", name);
+ return nullptr;
+}
/** Displays the long description of all registered models, and quit */
-void Colls::coll_help(const char *category, s_mpi_coll_description_t * table)
+void colls::coll_help(const char* category, s_mpi_coll_description_t* table)
{
XBT_WARN("Long description of the %s models accepted by this simulator:\n", category);
for (int i = 0; not table[i].name.empty(); i++)
XBT_WARN(" %s: %s\n", table[i].name.c_str(), table[i].description.c_str());
}
-int Colls::find_coll_description(s_mpi_coll_description_t* table, const std::string& name, const char* desc)
+int colls::find_coll_description(s_mpi_coll_description_t* table, const std::string& name, const char* desc)
{
for (int i = 0; not table[i].name.empty(); i++)
if (name == table[i].name) {
return -1;
}
-int(*Colls::gather) (const void *send_buff, int send_count, MPI_Datatype send_type,
- void *recv_buff, int recv_count, MPI_Datatype recv_type,
- int root, MPI_Comm comm);
-void Colls::set_gather(const std::string& name)
+int (*colls::gather)(const void* send_buff, int send_count, MPI_Datatype send_type, void* recv_buff, int recv_count,
+ MPI_Datatype recv_type, int root, MPI_Comm comm);
+int (*colls::allgather)(const void* send_buff, int send_count, MPI_Datatype send_type, void* recv_buff, int recv_count,
+ MPI_Datatype recv_type, MPI_Comm comm);
+int (*colls::allgatherv)(const void* send_buff, int send_count, MPI_Datatype send_type, void* recv_buff,
+ const int* recv_count, const int* recv_disps, MPI_Datatype recv_type, MPI_Comm comm);
+int (*colls::alltoall)(const void* send_buff, int send_count, MPI_Datatype send_type, void* recv_buff, int recv_count,
+ MPI_Datatype recv_type, MPI_Comm comm);
+int (*colls::alltoallv)(const void* send_buff, const int* send_counts, const int* send_disps, MPI_Datatype send_type,
+ void* recv_buff, const int* recv_counts, const int* recv_disps, MPI_Datatype recv_type,
+ MPI_Comm comm);
+int (*colls::bcast)(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm);
+int (*colls::reduce)(const void* buf, void* rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
+int (*colls::allreduce)(const void* sbuf, void* rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm);
+int (*colls::reduce_scatter)(const void* sbuf, void* rbuf, const int* rcounts, MPI_Datatype dtype, MPI_Op op,
+ MPI_Comm comm);
+int (*colls::scatter)(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
+ MPI_Datatype recvtype, int root, MPI_Comm comm);
+int (*colls::barrier)(MPI_Comm comm);
+
+void (*colls::smpi_coll_cleanup_callback)();
+
+void colls::set_gather(const std::string& name)
{
int id = find_coll_description(mpi_coll_gather_description, name, "gather");
gather = reinterpret_cast<int(*)(const void *send_buff, int send_count, MPI_Datatype send_type,
COLL_APPLY(COLL_SETTER,COLL_ALLTOALL_SIG,"");
COLL_APPLY(COLL_SETTER,COLL_ALLTOALLV_SIG,"");
-void Colls::set_collectives(){
+void colls::set_collectives()
+{
std::string selector_name = simgrid::config::get_value<std::string>("smpi/coll-selector");
if (selector_name.empty())
selector_name = "default";
std::pair<std::string, std::function<void(std::string)>> setter_callbacks[] = {
- {"gather", &Colls::set_gather}, {"allgather", &Colls::set_allgather},
- {"allgatherv", &Colls::set_allgatherv}, {"allreduce", &Colls::set_allreduce},
- {"alltoall", &Colls::set_alltoall}, {"alltoallv", &Colls::set_alltoallv},
- {"reduce", &Colls::set_reduce}, {"reduce_scatter", &Colls::set_reduce_scatter},
- {"scatter", &Colls::set_scatter}, {"bcast", &Colls::set_bcast},
- {"barrier", &Colls::set_barrier}};
+ {"gather", &colls::set_gather}, {"allgather", &colls::set_allgather},
+ {"allgatherv", &colls::set_allgatherv}, {"allreduce", &colls::set_allreduce},
+ {"alltoall", &colls::set_alltoall}, {"alltoallv", &colls::set_alltoallv},
+ {"reduce", &colls::set_reduce}, {"reduce_scatter", &colls::set_reduce_scatter},
+ {"scatter", &colls::set_scatter}, {"bcast", &colls::set_bcast},
+ {"barrier", &colls::set_barrier}};
for (auto& elem : setter_callbacks) {
std::string name = simgrid::config::get_value<std::string>(("smpi/" + elem.first).c_str());
//Implementations of the single algorithm collectives
-int Colls::gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs,
- MPI_Datatype recvtype, int root, MPI_Comm comm)
+int colls::gatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts,
+ const int* displs, MPI_Datatype recvtype, int root, MPI_Comm comm)
{
MPI_Request request;
- Colls::igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, &request, 0);
+ colls::igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, &request, 0);
return Request::wait(&request, MPI_STATUS_IGNORE);
}
-
-int Colls::scatterv(const void *sendbuf, const int *sendcounts, const int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount,
- MPI_Datatype recvtype, int root, MPI_Comm comm)
+int colls::scatterv(const void* sendbuf, const int* sendcounts, const int* displs, MPI_Datatype sendtype, void* recvbuf,
+ int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)
{
MPI_Request request;
- Colls::iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, &request, 0);
+ colls::iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, &request, 0);
return Request::wait(&request, MPI_STATUS_IGNORE);
}
-
-int Colls::scan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
+int colls::scan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
int system_tag = -888;
MPI_Aint lb = 0;
return MPI_SUCCESS;
}
-int Colls::exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
+int colls::exscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
int system_tag = -888;
MPI_Aint lb = 0;
return MPI_SUCCESS;
}
-int Colls::alltoallw(const void *sendbuf, const int *sendcounts, const int *senddisps, const MPI_Datatype* sendtypes,
- void *recvbuf, const int *recvcounts, const int *recvdisps, const MPI_Datatype* recvtypes, MPI_Comm comm)
+int colls::alltoallw(const void* sendbuf, const int* sendcounts, const int* senddisps, const MPI_Datatype* sendtypes,
+ void* recvbuf, const int* recvcounts, const int* recvdisps, const MPI_Datatype* recvtypes,
+ MPI_Comm comm)
{
MPI_Request request;
- Colls::ialltoallw(sendbuf, sendcounts, senddisps, sendtypes, recvbuf, recvcounts, recvdisps, recvtypes, comm, &request, 0);
+ colls::ialltoallw(sendbuf, sendcounts, senddisps, sendtypes, recvbuf, recvcounts, recvdisps, recvtypes, comm,
+ &request, 0);
return Request::wait(&request, MPI_STATUS_IGNORE);
}
void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)
{
MPI_Request request;
- Colls::igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, &request, 0);
+ colls::igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, &request, 0);
return Request::wait(&request, MPI_STATUS_IGNORE);
}
int ret = reduce__default(sendbuf, tmpbuf, count, datatype, op, 0, comm);
if(ret==MPI_SUCCESS)
- ret = Colls::scatterv(tmpbuf, recvcounts, displs, datatype, recvbuf, recvcounts[rank], datatype, 0, comm);
+ ret = colls::scatterv(tmpbuf, recvcounts, displs, datatype, recvbuf, recvcounts[rank], datatype, 0, comm);
delete[] displs;
smpi_free_tmp_buffer(tmpbuf);
return ret;
void *recvbuf,int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
{
MPI_Request request;
- Colls::iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, &request);
+ colls::iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, &request);
return Request::wait(&request, MPI_STATUS_IGNORE);
}
const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPI_Comm comm)
{
MPI_Request request;
- Colls::iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, &request, 0);
+ colls::iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm, &request, 0);
MPI_Request* requests = request->get_nbc_requests();
int count = request->get_nbc_requests_size();
Request::waitall(count, requests, MPI_STATUS_IGNORE);
void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)
{
MPI_Request request;
- Colls::iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, &request, 0);
+ colls::iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, &request, 0);
return Request::wait(&request, MPI_STATUS_IGNORE);
}
return reduce__ompi_basic_linear(sendbuf, recvbuf, count, datatype, op, root, comm);
}
MPI_Request request;
- Colls::ireduce(sendbuf, recvbuf, count, datatype, op, root, comm, &request, 0);
+ colls::ireduce(sendbuf, recvbuf, count, datatype, op, root, comm, &request, 0);
return Request::wait(&request, MPI_STATUS_IGNORE);
}
void *recvbuf, const int *recvcounts, const int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm)
{
MPI_Request request;
- Colls::ialltoallv(sendbuf, sendcounts, senddisps, sendtype, recvbuf, recvcounts, recvdisps, recvtype, comm, &request, 0);
+ colls::ialltoallv(sendbuf, sendcounts, senddisps, sendtype, recvbuf, recvcounts, recvdisps, recvtype, comm, &request,
+ 0);
return Request::wait(&request, MPI_STATUS_IGNORE);
}
int agg_table_sum = 0;
mv2_alltoall_tuning_table** table_ptrs = NULL;
mv2_alltoall_num_ppn_conf = 3;
- if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
- simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
+ if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL)
+ simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
mv2_alltoall_thresholds_table = new mv2_alltoall_tuning_table*[mv2_alltoall_num_ppn_conf];
table_ptrs = new mv2_alltoall_tuning_table*[mv2_alltoall_num_ppn_conf];
mv2_size_alltoall_tuning_table = new int[mv2_alltoall_num_ppn_conf];
{
int agg_table_sum = 0;
- if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
- simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
+ if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL)
+ simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
mv2_allgather_num_ppn_conf = 3;
mv2_allgather_thresholds_table = new mv2_allgather_tuning_table*[mv2_allgather_num_ppn_conf];
mv2_allgather_tuning_table** table_ptrs = new mv2_allgather_tuning_table*[mv2_allgather_num_ppn_conf];
static void init_mv2_gather_tables_stampede()
{
- if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
- simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
+ if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL)
+ simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
mv2_size_gather_tuning_table = 7;
mv2_gather_thresholds_table = new mv2_gather_tuning_table[mv2_size_gather_tuning_table];
mv2_gather_tuning_table mv2_tmp_gather_thresholds_table[] = {
static void init_mv2_allgatherv_tables_stampede()
{
- if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
- simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
+ if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL)
+ simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
mv2_size_allgatherv_tuning_table = 6;
mv2_allgatherv_thresholds_table = new mv2_allgatherv_tuning_table[mv2_size_allgatherv_tuning_table];
mv2_allgatherv_tuning_table mv2_tmp_allgatherv_thresholds_table[] = {
static int MPIR_Allreduce_reduce_p2p_MV2(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
MPI_Comm comm)
{
- simgrid::smpi::Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
+ simgrid::smpi::colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
return MPI_SUCCESS;
}
static int MPIR_Allreduce_reduce_shmem_MV2(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
MPI_Comm comm)
{
- simgrid::smpi::Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
+ simgrid::smpi::colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
return MPI_SUCCESS;
}
static void init_mv2_allreduce_tables_stampede()
{
- if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
- simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
+ if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL)
+ simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
mv2_size_allreduce_tuning_table = 8;
mv2_allreduce_thresholds_table = new mv2_allreduce_tuning_table[mv2_size_allreduce_tuning_table];
mv2_allreduce_tuning_table mv2_tmp_allreduce_thresholds_table[] = {
static void init_mv2_bcast_tables_stampede()
{
// Stampede,
- if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
- simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
+ if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL)
+ simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
mv2_size_bcast_tuning_table = 8;
mv2_bcast_thresholds_table = new mv2_bcast_tuning_table[mv2_size_bcast_tuning_table];
static void init_mv2_reduce_tables_stampede()
{
- if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
- simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
+ if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL)
+ simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
/*Stampede*/
mv2_size_reduce_tuning_table = 8;
mv2_reduce_thresholds_table = new mv2_reduce_tuning_table[mv2_size_reduce_tuning_table];
static void init_mv2_reduce_scatter_tables_stampede()
{
- if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
- simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
+ if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL)
+ simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
mv2_size_red_scat_tuning_table = 6;
mv2_red_scat_thresholds_table = new mv2_red_scat_tuning_table[mv2_size_red_scat_tuning_table];
mv2_red_scat_tuning_table mv2_tmp_red_scat_thresholds_table[] = {
static void init_mv2_scatter_tables_stampede()
{
- if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
- simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
+ if (simgrid::smpi::colls::smpi_coll_cleanup_callback == NULL)
+ simgrid::smpi::colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
int agg_table_sum = 0;
mv2_scatter_num_ppn_conf = 3;
namespace simgrid{
namespace smpi{
-
-int Colls::ibarrier(MPI_Comm comm, MPI_Request* request, int external)
+int colls::ibarrier(MPI_Comm comm, MPI_Request* request, int external)
{
int size = comm->size();
int rank = comm->rank();
return MPI_SUCCESS;
}
-int Colls::ibcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm, MPI_Request* request, int external)
+int colls::ibcast(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm, MPI_Request* request,
+ int external)
{
int size = comm->size();
int rank = comm->rank();
return MPI_SUCCESS;
}
-int Colls::iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
- void *recvbuf,int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external)
+int colls::iallgather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
+ MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external)
{
const int system_tag = COLL_TAG_ALLGATHER-external;
return MPI_SUCCESS;
}
-int Colls::iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
- void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, int external)
+int colls::iscatter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
+ MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, int external)
{
const int system_tag = COLL_TAG_SCATTER-external;
MPI_Aint lb = 0;
return MPI_SUCCESS;
}
-int Colls::iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
- const int *recvcounts, const int *displs, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external)
+int colls::iallgatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts,
+ const int* displs, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external)
{
const int system_tag = COLL_TAG_ALLGATHERV-external;
MPI_Aint lb = 0;
return MPI_SUCCESS;
}
-int Colls::ialltoall( const void *sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external){
+int colls::ialltoall(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
+ MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external)
+{
int system_tag = COLL_TAG_ALLTOALL-external;
MPI_Aint lb = 0;
MPI_Aint sendext = 0;
return MPI_SUCCESS;
}
-int Colls::ialltoallv(const void *sendbuf, const int *sendcounts, const int *senddisps, MPI_Datatype sendtype,
- void *recvbuf, const int *recvcounts, const int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request, int external){
+int colls::ialltoallv(const void* sendbuf, const int* sendcounts, const int* senddisps, MPI_Datatype sendtype,
+ void* recvbuf, const int* recvcounts, const int* recvdisps, MPI_Datatype recvtype, MPI_Comm comm,
+ MPI_Request* request, int external)
+{
const int system_tag = COLL_TAG_ALLTOALLV-external;
MPI_Aint lb = 0;
MPI_Aint sendext = 0;
return err;
}
-int Colls::ialltoallw(const void *sendbuf, const int *sendcounts, const int *senddisps, const MPI_Datatype* sendtypes,
- void *recvbuf, const int *recvcounts, const int *recvdisps, const MPI_Datatype* recvtypes, MPI_Comm comm, MPI_Request *request, int external){
+int colls::ialltoallw(const void* sendbuf, const int* sendcounts, const int* senddisps, const MPI_Datatype* sendtypes,
+ void* recvbuf, const int* recvcounts, const int* recvdisps, const MPI_Datatype* recvtypes,
+ MPI_Comm comm, MPI_Request* request, int external)
+{
const int system_tag = COLL_TAG_ALLTOALLW-external;
/* Initialize. */
return err;
}
-int Colls::igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
- void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request, int external)
+int colls::igather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
+ MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, int external)
{
const int system_tag = COLL_TAG_GATHER-external;
MPI_Aint lb = 0;
return MPI_SUCCESS;
}
-int Colls::igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs,
- MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request, int external)
+int colls::igatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts,
+ const int* displs, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request,
+ int external)
{
int system_tag = COLL_TAG_GATHERV-external;
MPI_Aint lb = 0;
}
return MPI_SUCCESS;
}
-int Colls::iscatterv(const void *sendbuf, const int *sendcounts, const int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount,
- MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request, int external)
+int colls::iscatterv(const void* sendbuf, const int* sendcounts, const int* displs, MPI_Datatype sendtype,
+ void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request,
+ int external)
{
int system_tag = COLL_TAG_SCATTERV-external;
MPI_Aint lb = 0;
return MPI_SUCCESS;
}
-int Colls::ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
- MPI_Comm comm, MPI_Request* request, int external)
+int colls::ireduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
+ MPI_Comm comm, MPI_Request* request, int external)
{
const int system_tag = COLL_TAG_REDUCE-external;
MPI_Aint lb = 0;
return MPI_SUCCESS;
}
-int Colls::iallreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
- MPI_Op op, MPI_Comm comm, MPI_Request* request, int external)
+int colls::iallreduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm,
+ MPI_Request* request, int external)
{
const int system_tag = COLL_TAG_ALLREDUCE-external;
return MPI_SUCCESS;
}
-int Colls::iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request* request, int external)
+int colls::iscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm,
+ MPI_Request* request, int external)
{
int system_tag = -888-external;
MPI_Aint lb = 0;
return MPI_SUCCESS;
}
-int Colls::iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request* request, int external)
+int colls::iexscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm,
+ MPI_Request* request, int external)
{
int system_tag = -888-external;
MPI_Aint lb = 0;
return MPI_SUCCESS;
}
-int Colls::ireduce_scatter(const void *sendbuf, void *recvbuf, const int *recvcounts, MPI_Datatype datatype, MPI_Op op,
- MPI_Comm comm, MPI_Request* request, int external){
-//Version where each process performs the reduce for its own part. Alltoall pattern for comms.
+int colls::ireduce_scatter(const void* sendbuf, void* recvbuf, const int* recvcounts, MPI_Datatype datatype, MPI_Op op,
+ MPI_Comm comm, MPI_Request* request, int external)
+{
+ // Version where each process performs the reduce for its own part. Alltoall pattern for comms.
const int system_tag = COLL_TAG_REDUCE_SCATTER-external;
MPI_Aint lb = 0;
MPI_Aint dataext = 0;
/** @brief MPI collective description */
#define COLL_DEFS(cat, ret, args, args2) \
- static void _XBT_CONCAT(set_, cat)(const std::string& name); \
- static s_mpi_coll_description_t _XBT_CONCAT3(mpi_coll_, cat, _description)[]; \
- static int(*cat) args;
+ void _XBT_CONCAT(set_, cat)(const std::string& name); \
+ extern int(*cat) args;
-#define COLL_SIG(cat, ret, args, args2)\
- static int cat args;
+#define COLL_SIG(cat, ret, args, args2) int cat args;
#define COLL_DESCRIPTION(cat, ret, args, name) \
{ \
void *coll;
};
-class Colls{
-public:
- static XBT_PUBLIC void coll_help(const char* category, s_mpi_coll_description_t* table);
- static XBT_PUBLIC int find_coll_description(s_mpi_coll_description_t* table, const std::string& name,
- const char* desc);
- static void set_collectives();
-
- // for each collective type, create the set_* prototype, the description array and the function pointer
-// static void set_gather(const std::string& name);
-// static s_mpi_coll_description_t mpi_coll_gather_description[];
-// static int(*gather)(const void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type,
+namespace colls {
+XBT_PUBLIC void coll_help(const char* category, s_mpi_coll_description_t* table);
+XBT_PUBLIC int find_coll_description(s_mpi_coll_description_t* table, const std::string& name, const char* desc);
+void set_collectives();
+XBT_PUBLIC s_mpi_coll_description_t* get_smpi_coll_description(const char* name, int rank);
+
+// for each collective type, create the set_* prototype, the description array and the function pointer
+// void set_gather(const std::string& name);
+// extern int(*gather)(const void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count,
+// MPI_Datatype recv_type,
// int root, MPI_Comm comm);
- COLL_APPLY(COLL_DEFS, COLL_GATHER_SIG, "")
- COLL_APPLY(COLL_DEFS, COLL_ALLGATHER_SIG, "")
- COLL_APPLY(COLL_DEFS, COLL_ALLGATHERV_SIG, "")
- COLL_APPLY(COLL_DEFS, COLL_REDUCE_SIG, "")
- COLL_APPLY(COLL_DEFS, COLL_ALLREDUCE_SIG, "")
- COLL_APPLY(COLL_DEFS, COLL_REDUCE_SCATTER_SIG, "")
- COLL_APPLY(COLL_DEFS, COLL_SCATTER_SIG, "")
- COLL_APPLY(COLL_DEFS, COLL_BARRIER_SIG, "")
- COLL_APPLY(COLL_DEFS, COLL_BCAST_SIG, "")
- COLL_APPLY(COLL_DEFS, COLL_ALLTOALL_SIG, "")
- COLL_APPLY(COLL_DEFS, COLL_ALLTOALLV_SIG, "")
-
- // These fairly unused collectives only have one implementation in SMPI
- static int gatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts, const int* displs,
- MPI_Datatype recvtype, int root, MPI_Comm comm);
- static int scatterv(const void* sendbuf, const int* sendcounts, const int* displs, MPI_Datatype sendtype, void* recvbuf, int recvcount,
- MPI_Datatype recvtype, int root, MPI_Comm comm);
- static int scan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
- static int exscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
- static int alltoallw
- (const void* sendbuf, const int* sendcounts, const int* senddisps, const MPI_Datatype* sendtypes, void* recvbuf, const int* recvcounts,
- const int* recvdisps, const MPI_Datatype* recvtypes, MPI_Comm comm);
-
- //async collectives
- static int ibarrier(MPI_Comm comm, MPI_Request* request, int external=1);
- static int ibcast(void *buf, int count, MPI_Datatype datatype,
- int root, MPI_Comm comm, MPI_Request* request, int external=1);
- static int igather (const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
- MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request, int external=1);
- static int igatherv (const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf,
- const int* recvcounts, const int* displs, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request, int external=1);
- static int iallgather (const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf,
- int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request, int external=1);
- static int iallgatherv (const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf,
- const int* recvcounts, const int* displs, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request, int external=1);
- static int iscatter (const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf,
- int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request, int external=1);
- static int iscatterv (const void* sendbuf, const int* sendcounts, const int* displs, MPI_Datatype sendtype,
- void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request, int external=1);
- static int ireduce
- (const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, MPI_Request *request, int external=1);
- static int iallreduce
- (const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request, int external=1);
- static int iscan
- (const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request, int external=1);
- static int iexscan
- (const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request, int external=1);
- static int ireduce_scatter
- (const void* sendbuf, void* recvbuf, const int* recvcounts, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request, int external=1);
- static int ireduce_scatter_block
- (const void* sendbuf, void* recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request, int external=1);
- static int ialltoall (const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf,
- int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request, int external=1);
- static int ialltoallv
- (const void* sendbuf, const int* sendcounts, const int* senddisps, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts,
- const int* recvdisps, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request, int external=1);
- static int ialltoallw
- (const void* sendbuf, const int* sendcounts, const int* senddisps, const MPI_Datatype* sendtypes, void* recvbuf, const int* recvcounts,
- const int* recvdisps, const MPI_Datatype* recvtypes, MPI_Comm comm, MPI_Request *request, int external=1);
-
-
- static void (*smpi_coll_cleanup_callback)();
+COLL_APPLY(COLL_DEFS, COLL_GATHER_SIG, "")
+COLL_APPLY(COLL_DEFS, COLL_ALLGATHER_SIG, "")
+COLL_APPLY(COLL_DEFS, COLL_ALLGATHERV_SIG, "")
+COLL_APPLY(COLL_DEFS, COLL_REDUCE_SIG, "")
+COLL_APPLY(COLL_DEFS, COLL_ALLREDUCE_SIG, "")
+COLL_APPLY(COLL_DEFS, COLL_REDUCE_SCATTER_SIG, "")
+COLL_APPLY(COLL_DEFS, COLL_SCATTER_SIG, "")
+COLL_APPLY(COLL_DEFS, COLL_BARRIER_SIG, "")
+COLL_APPLY(COLL_DEFS, COLL_BCAST_SIG, "")
+COLL_APPLY(COLL_DEFS, COLL_ALLTOALL_SIG, "")
+COLL_APPLY(COLL_DEFS, COLL_ALLTOALLV_SIG, "")
+
+// These fairly unused collectives only have one implementation in SMPI
+int gatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts,
+ const int* displs, MPI_Datatype recvtype, int root, MPI_Comm comm);
+int scatterv(const void* sendbuf, const int* sendcounts, const int* displs, MPI_Datatype sendtype, void* recvbuf,
+ int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
+int scan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
+int exscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
+int alltoallw(const void* sendbuf, const int* sendcounts, const int* senddisps, const MPI_Datatype* sendtypes,
+ void* recvbuf, const int* recvcounts, const int* recvdisps, const MPI_Datatype* recvtypes, MPI_Comm comm);
+
+// async collectives
+int ibarrier(MPI_Comm comm, MPI_Request* request, int external = 1);
+int ibcast(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm, MPI_Request* request,
+ int external = 1);
+int igather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
+ MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, int external = 1);
+int igatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts,
+ const int* displs, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, int external = 1);
+int iallgather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
+ MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external = 1);
+int iallgatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts,
+ const int* displs, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external = 1);
+int iscatter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
+ MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, int external = 1);
+int iscatterv(const void* sendbuf, const int* sendcounts, const int* displs, MPI_Datatype sendtype, void* recvbuf,
+ int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request, int external = 1);
+int ireduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm,
+ MPI_Request* request, int external = 1);
+int iallreduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm,
+ MPI_Request* request, int external = 1);
+int iscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm,
+ MPI_Request* request, int external = 1);
+int iexscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm,
+ MPI_Request* request, int external = 1);
+int ireduce_scatter(const void* sendbuf, void* recvbuf, const int* recvcounts, MPI_Datatype datatype, MPI_Op op,
+ MPI_Comm comm, MPI_Request* request, int external = 1);
+int ireduce_scatter_block(const void* sendbuf, void* recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op,
+ MPI_Comm comm, MPI_Request* request, int external = 1);
+int ialltoall(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
+ MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request, int external = 1);
+int ialltoallv(const void* sendbuf, const int* sendcounts, const int* senddisps, MPI_Datatype sendtype, void* recvbuf,
+ const int* recvcounts, const int* recvdisps, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request,
+ int external = 1);
+int ialltoallw(const void* sendbuf, const int* sendcounts, const int* senddisps, const MPI_Datatype* sendtypes,
+ void* recvbuf, const int* recvcounts, const int* recvdisps, const MPI_Datatype* recvtypes, MPI_Comm comm,
+ MPI_Request* request, int external = 1);
+
+extern void (*smpi_coll_cleanup_callback)();
};
/*************
MPI_Offset max_offset = (min_offset + count * datatype->size());//cheating, as we don't care about exact data location, we can skip extent
MPI_Offset* min_offsets = new MPI_Offset[size];
MPI_Offset* max_offsets = new MPI_Offset[size];
- simgrid::smpi::Colls::allgather(&min_offset, 1, MPI_OFFSET, min_offsets, 1, MPI_OFFSET, comm_);
- simgrid::smpi::Colls::allgather(&max_offset, 1, MPI_OFFSET, max_offsets, 1, MPI_OFFSET, comm_);
+ simgrid::smpi::colls::allgather(&min_offset, 1, MPI_OFFSET, min_offsets, 1, MPI_OFFSET, comm_);
+ simgrid::smpi::colls::allgather(&max_offset, 1, MPI_OFFSET, max_offsets, 1, MPI_OFFSET, comm_);
MPI_Offset min=min_offset;
MPI_Offset max=max_offset;
MPI_Offset tot= 0;
seek(min_offset, MPI_SEEK_SET);
T(this,sendbuf,totreads/datatype->size(),datatype, status);
}
- simgrid::smpi::Colls::alltoall(send_sizes, 1, MPI_INT, recv_sizes, 1, MPI_INT, comm_);
+ simgrid::smpi::colls::alltoall(send_sizes, 1, MPI_INT, recv_sizes, 1, MPI_INT, comm_);
int total_recv=0;
for(int i=0;i<size;i++){
recv_disps[i]=total_recv;
total_recv+=recv_sizes[i];
}
//Set buf value to avoid copying dumb data
- simgrid::smpi::Colls::alltoallv(sendbuf, send_sizes, send_disps, MPI_BYTE,
- buf, recv_sizes, recv_disps, MPI_BYTE, comm_);
+ simgrid::smpi::colls::alltoallv(sendbuf, send_sizes, send_disps, MPI_BYTE, buf, recv_sizes, recv_disps, MPI_BYTE,
+ comm_);
status->count=count * datatype->size();
smpi_free_tmp_buffer(sendbuf);
delete[] send_sizes;
// return if already called
if (smpi_cpu_threshold > -1)
return;
- simgrid::smpi::Colls::set_collectives();
- simgrid::smpi::Colls::smpi_coll_cleanup_callback = nullptr;
+ simgrid::smpi::colls::set_collectives();
+ simgrid::smpi::colls::smpi_coll_cleanup_callback = nullptr;
smpi_cpu_threshold = simgrid::config::get_value<double>("smpi/cpu-threshold");
if (smpi_cpu_threshold < 0)
smpi_cpu_threshold = DBL_MAX;
smpi_shared_destroy();
smpi_deployment_cleanup_instances();
- if (simgrid::smpi::Colls::smpi_coll_cleanup_callback != nullptr)
- simgrid::smpi::Colls::smpi_coll_cleanup_callback();
+ if (simgrid::smpi::colls::smpi_coll_cleanup_callback != nullptr)
+ simgrid::smpi::colls::smpi_coll_cleanup_callback();
MPI_COMM_WORLD = MPI_COMM_NULL;
void BarrierAction::kernel(simgrid::xbt::ReplayAction&)
{
TRACE_smpi_comm_in(my_proc_id, __func__, new simgrid::instr::NoOpTIData("barrier"));
- Colls::barrier(MPI_COMM_WORLD);
+ colls::barrier(MPI_COMM_WORLD);
TRACE_smpi_comm_out(my_proc_id);
}
new simgrid::instr::CollTIData("bcast", MPI_COMM_WORLD->group()->actor(args.root)->get_pid(),
-1.0, args.size, -1, Datatype::encode(args.datatype1), ""));
- Colls::bcast(send_buffer(args.size * args.datatype1->size()), args.size, args.datatype1, args.root, MPI_COMM_WORLD);
+ colls::bcast(send_buffer(args.size * args.datatype1->size()), args.size, args.datatype1, args.root, MPI_COMM_WORLD);
TRACE_smpi_comm_out(my_proc_id);
}
args.comp_size, args.comm_size, -1,
Datatype::encode(args.datatype1), ""));
- Colls::reduce(send_buffer(args.comm_size * args.datatype1->size()),
- recv_buffer(args.comm_size * args.datatype1->size()), args.comm_size, args.datatype1, MPI_OP_NULL, args.root, MPI_COMM_WORLD);
+ colls::reduce(send_buffer(args.comm_size * args.datatype1->size()),
+ recv_buffer(args.comm_size * args.datatype1->size()), args.comm_size, args.datatype1, MPI_OP_NULL,
+ args.root, MPI_COMM_WORLD);
private_execute_flops(args.comp_size);
TRACE_smpi_comm_out(my_proc_id);
TRACE_smpi_comm_in(my_proc_id, "action_allreduce", new simgrid::instr::CollTIData("allreduce", -1, args.comp_size, args.comm_size, -1,
Datatype::encode(args.datatype1), ""));
- Colls::allreduce(send_buffer(args.comm_size * args.datatype1->size()),
- recv_buffer(args.comm_size * args.datatype1->size()), args.comm_size, args.datatype1, MPI_OP_NULL, MPI_COMM_WORLD);
+ colls::allreduce(send_buffer(args.comm_size * args.datatype1->size()),
+ recv_buffer(args.comm_size * args.datatype1->size()), args.comm_size, args.datatype1, MPI_OP_NULL,
+ MPI_COMM_WORLD);
private_execute_flops(args.comp_size);
TRACE_smpi_comm_out(my_proc_id);
Datatype::encode(args.datatype1),
Datatype::encode(args.datatype2)));
- Colls::alltoall(send_buffer(args.send_size * args.comm_size * args.datatype1->size()), args.send_size,
- args.datatype1, recv_buffer(args.recv_size * args.comm_size * args.datatype2->size()),
- args.recv_size, args.datatype2, MPI_COMM_WORLD);
+ colls::alltoall(send_buffer(args.send_size * args.comm_size * args.datatype1->size()), args.send_size, args.datatype1,
+ recv_buffer(args.recv_size * args.comm_size * args.datatype2->size()), args.recv_size, args.datatype2,
+ MPI_COMM_WORLD);
TRACE_smpi_comm_out(my_proc_id);
}
if (name == "gather") {
int rank = MPI_COMM_WORLD->rank();
- Colls::gather(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1,
- (rank == args.root) ? recv_buffer(args.recv_size * args.comm_size * args.datatype2->size()) : nullptr, args.recv_size, args.datatype2, args.root, MPI_COMM_WORLD);
+ colls::gather(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1,
+ (rank == args.root) ? recv_buffer(args.recv_size * args.comm_size * args.datatype2->size()) : nullptr,
+ args.recv_size, args.datatype2, args.root, MPI_COMM_WORLD);
}
else
- Colls::allgather(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1,
- recv_buffer(args.recv_size * args.datatype2->size()), args.recv_size, args.datatype2, MPI_COMM_WORLD);
+ colls::allgather(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1,
+ recv_buffer(args.recv_size * args.datatype2->size()), args.recv_size, args.datatype2,
+ MPI_COMM_WORLD);
TRACE_smpi_comm_out(my_proc_id);
}
Datatype::encode(args.datatype1), Datatype::encode(args.datatype2)));
if (name == "gatherv") {
- Colls::gatherv(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1,
- (rank == args.root) ? recv_buffer(args.recv_size_sum * args.datatype2->size()) : nullptr,
- args.recvcounts->data(), args.disps.data(), args.datatype2, args.root, MPI_COMM_WORLD);
+ colls::gatherv(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1,
+ (rank == args.root) ? recv_buffer(args.recv_size_sum * args.datatype2->size()) : nullptr,
+ args.recvcounts->data(), args.disps.data(), args.datatype2, args.root, MPI_COMM_WORLD);
}
else {
- Colls::allgatherv(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1,
- recv_buffer(args.recv_size_sum * args.datatype2->size()), args.recvcounts->data(),
- args.disps.data(), args.datatype2, MPI_COMM_WORLD);
+ colls::allgatherv(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1,
+ recv_buffer(args.recv_size_sum * args.datatype2->size()), args.recvcounts->data(),
+ args.disps.data(), args.datatype2, MPI_COMM_WORLD);
}
TRACE_smpi_comm_out(my_proc_id);
Datatype::encode(args.datatype1),
Datatype::encode(args.datatype2)));
- Colls::scatter(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1,
- (rank == args.root) ? recv_buffer(args.recv_size * args.datatype2->size()) : nullptr, args.recv_size, args.datatype2, args.root, MPI_COMM_WORLD);
+ colls::scatter(send_buffer(args.send_size * args.datatype1->size()), args.send_size, args.datatype1,
+ (rank == args.root) ? recv_buffer(args.recv_size * args.datatype2->size()) : nullptr, args.recv_size,
+ args.datatype2, args.root, MPI_COMM_WORLD);
TRACE_smpi_comm_out(my_proc_id);
}
nullptr, Datatype::encode(args.datatype1),
Datatype::encode(args.datatype2)));
- Colls::scatterv((rank == args.root) ? send_buffer(args.send_size_sum * args.datatype1->size()) : nullptr,
- args.sendcounts->data(), args.disps.data(), args.datatype1,
- recv_buffer(args.recv_size * args.datatype2->size()), args.recv_size, args.datatype2, args.root,
- MPI_COMM_WORLD);
+ colls::scatterv((rank == args.root) ? send_buffer(args.send_size_sum * args.datatype1->size()) : nullptr,
+ args.sendcounts->data(), args.disps.data(), args.datatype1,
+ recv_buffer(args.recv_size * args.datatype2->size()), args.recv_size, args.datatype2, args.root,
+ MPI_COMM_WORLD);
TRACE_smpi_comm_out(my_proc_id);
}
std::to_string(args.comp_size), /* ugly hack to print comp_size */
Datatype::encode(args.datatype1)));
- Colls::reduce_scatter(send_buffer(args.recv_size_sum * args.datatype1->size()),
- recv_buffer(args.recv_size_sum * args.datatype1->size()), args.recvcounts->data(),
- args.datatype1, MPI_OP_NULL, MPI_COMM_WORLD);
+ colls::reduce_scatter(send_buffer(args.recv_size_sum * args.datatype1->size()),
+ recv_buffer(args.recv_size_sum * args.datatype1->size()), args.recvcounts->data(),
+ args.datatype1, MPI_OP_NULL, MPI_COMM_WORLD);
private_execute_flops(args.comp_size);
TRACE_smpi_comm_out(my_proc_id);
"alltoallv", -1, args.send_size_sum, args.sendcounts, args.recv_size_sum, args.recvcounts,
Datatype::encode(args.datatype1), Datatype::encode(args.datatype2)));
- Colls::alltoallv(send_buffer(args.send_buf_size * args.datatype1->size()), args.sendcounts->data(), args.senddisps.data(), args.datatype1,
- recv_buffer(args.recv_buf_size * args.datatype2->size()), args.recvcounts->data(), args.recvdisps.data(), args.datatype2, MPI_COMM_WORLD);
+ colls::alltoallv(send_buffer(args.send_buf_size * args.datatype1->size()), args.sendcounts->data(),
+ args.senddisps.data(), args.datatype1, recv_buffer(args.recv_buf_size * args.datatype2->size()),
+ args.recvcounts->data(), args.recvdisps.data(), args.datatype2, MPI_COMM_WORLD);
TRACE_smpi_comm_out(my_proc_id);
}
id=global_id_;
global_id_++;
}
- Colls::bcast(&id, 1, MPI_INT, 0, this);
+ colls::bcast(&id, 1, MPI_INT, 0, this);
XBT_DEBUG("Communicator %p has id %d", this, id);
id_=id;//only set here, as we don't want to change it in the middle of the bcast
- Colls::barrier(this);
+ colls::barrier(this);
}
}
}else{
win_=new Win(list_, 0, 1, MPI_INFO_NULL, comm_);
}
- simgrid::smpi::Colls::bcast(&shared_file_pointer_, 1, MPI_AINT, 0, comm);
- simgrid::smpi::Colls::bcast(&shared_mutex_, 1, MPI_AINT, 0, comm);
+ simgrid::smpi::colls::bcast(&shared_file_pointer_, 1, MPI_AINT, 0, comm);
+ simgrid::smpi::colls::bcast(&shared_mutex_, 1, MPI_AINT, 0, comm);
if(comm_->rank() != 0)
intrusive_ptr_add_ref(&*shared_mutex_);
}
}
MPI_Offset result;
- simgrid::smpi::Colls::scan(&val, &result, 1, MPI_OFFSET, MPI_SUM, fh->comm_);
+ simgrid::smpi::colls::scan(&val, &result, 1, MPI_OFFSET, MPI_SUM, fh->comm_);
fh->seek(result, MPI_SEEK_SET);
int ret = fh->op_all<simgrid::smpi::File::read>(buf, count, datatype, status);
if(fh->comm_->rank()==fh->comm_->size()-1){
fh->shared_mutex_->unlock();
}
char c;
- simgrid::smpi::Colls::bcast(&c, 1, MPI_BYTE, fh->comm_->size()-1, fh->comm_);
+ simgrid::smpi::colls::bcast(&c, 1, MPI_BYTE, fh->comm_->size() - 1, fh->comm_);
return ret;
}
val=count*datatype->size();
}
MPI_Offset result;
- simgrid::smpi::Colls::scan(&val, &result, 1, MPI_OFFSET, MPI_SUM, fh->comm_);
+ simgrid::smpi::colls::scan(&val, &result, 1, MPI_OFFSET, MPI_SUM, fh->comm_);
fh->seek(result, MPI_SEEK_SET);
int ret = fh->op_all<simgrid::smpi::File::write>(const_cast<void*>(buf), count, datatype, status);
if(fh->comm_->rank()==fh->comm_->size()-1){
fh->shared_mutex_->unlock();
}
char c;
- simgrid::smpi::Colls::bcast(&c, 1, MPI_BYTE, fh->comm_->size()-1, fh->comm_);
+ simgrid::smpi::colls::bcast(&c, 1, MPI_BYTE, fh->comm_->size() - 1, fh->comm_);
return ret;
}
int File::sync(){
//no idea
- return simgrid::smpi::Colls::barrier(comm_);
+ return simgrid::smpi::colls::barrier(comm_);
}
MPI_Info File::info(){
comm->add_rma_win(this);
comm->ref();
- Colls::allgather(&(connected_wins_[rank_]), sizeof(MPI_Win), MPI_BYTE, connected_wins_, sizeof(MPI_Win),
- MPI_BYTE, comm);
+ colls::allgather(&(connected_wins_[rank_]), sizeof(MPI_Win), MPI_BYTE, connected_wins_, sizeof(MPI_Win), MPI_BYTE,
+ comm);
- Colls::bcast(&(bar_), sizeof(s4u::Barrier*), MPI_BYTE, 0, comm);
+ colls::bcast(&(bar_), sizeof(s4u::Barrier*), MPI_BYTE, 0, comm);
- Colls::barrier(comm);
+ colls::barrier(comm);
}
Win::~Win(){
comm_->remove_rma_win(this);
- Colls::barrier(comm_);
+ colls::barrier(comm_);
Comm::unref(comm_);
if (rank_ == 0)