+int MPI_Send(void* buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm) {
+ smpi_bench_end(comm, "Send");
+ smpi_mpi_send(buf, count, datatype, dst, tag, comm);
+ smpi_bench_begin(comm, "Send");
+ return MPI_SUCCESS;
+}
+
+int MPI_Sendrecv(void* sendbuf, int sendcount, MPI_Datatype sendtype, int dst, int sendtag, void* recvbuf, int recvcount, MPI_Datatype recvtype, int src, int recvtag, MPI_Comm comm, MPI_Status* status) {
+ smpi_bench_end(comm, "Sendrecv");
+ smpi_mpi_sendrecv(sendbuf, sendcount, sendtype, dst, sendtag, recvbuf, recvcount, recvtype, src, recvtag, comm, status);
+ smpi_bench_begin(comm, "Sendrecv");
+ return MPI_SUCCESS;
+}
+
+int MPI_Sendrecv_replace(void* buf, int count, MPI_Datatype datatype, int dst, int sendtag, int src, int recvtag, MPI_Comm comm, MPI_Status* status) {
+ //TODO: suboptimal implementation
+ void* recvbuf;
+ int retval, size;
+
+ size = smpi_datatype_size(datatype) * count;
+ recvbuf = xbt_new(char, size);
+ retval = MPI_Sendrecv(buf, count, datatype, dst, sendtag, recvbuf, count, datatype, src, recvtag, comm, status);
+ memcpy(buf, recvbuf, size * sizeof(char));
+ xbt_free(recvbuf);
+ return retval;
+}
+
+int MPI_Test(MPI_Request* request, int* flag, MPI_Status* status) {
+ int retval;
+
+ smpi_bench_end((*request)->comm, "Test");
+ if(request == NULL || flag == NULL) {
+ retval = MPI_ERR_ARG;
+ } else if(*request == MPI_REQUEST_NULL) {
+ retval = MPI_ERR_REQUEST;
+ } else {
+ *flag = smpi_mpi_test(request, status);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin((*request)->comm, "Test");
+ return retval;
+}
+
+int MPI_Testany(int count, MPI_Request requests[], int* index, int* flag, MPI_Status* status) {
+ int retval;
+
+ smpi_bench_end(MPI_COMM_NULL, "Testany"); //FIXME
+ if(index == NULL || flag == NULL) {
+ retval = MPI_ERR_ARG;
+ } else {
+ *flag = smpi_mpi_testany(count, requests, index, status);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin(*flag ? requests[*index]->comm : MPI_COMM_NULL, "Testany");
+ return retval;
+}
+
+int MPI_Wait(MPI_Request* request, MPI_Status* status) {
+ int retval;
+
+ smpi_bench_end((*request)->comm, "Wait");
+ if(request == NULL) {
+ retval = MPI_ERR_ARG;
+ } else if(*request == MPI_REQUEST_NULL) {
+ retval = MPI_ERR_REQUEST;
+ } else {
+ smpi_mpi_wait(request, status);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin((*request)->comm, "Wait");
+ return retval;
+}
+
+int MPI_Waitany(int count, MPI_Request requests[], int* index, MPI_Status* status) {
+ int retval;
+
+ smpi_bench_end(MPI_COMM_NULL, "Waitany"); //FIXME
+ if(index == NULL) {
+ retval = MPI_ERR_ARG;
+ } else {
+ *index = smpi_mpi_waitany(count, requests, status);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin(*index > 0 ? requests[*index]->comm : MPI_COMM_NULL, "Waitany");
+ return retval;
+}
+
+int MPI_Waitall(int count, MPI_Request requests[], MPI_Status status[]) {
+ smpi_bench_end(MPI_COMM_NULL, "Waitall"); //FIXME
+ smpi_mpi_waitall(count, requests, status);
+ smpi_bench_begin(MPI_COMM_NULL, "Waitall"); //FIXME
+ return MPI_SUCCESS;
+}
+
+int MPI_Waitsome(int incount, MPI_Request requests[], int* outcount, int* indices, MPI_Status status[]) {
+ int retval;
+
+ smpi_bench_end(MPI_COMM_NULL, "Waitsome"); //FIXME
+ if(outcount == NULL || indices == NULL) {
+ retval = MPI_ERR_ARG;
+ } else {
+ *outcount = smpi_mpi_waitsome(incount, requests, indices, status);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin(MPI_COMM_NULL, "Waitsome"); //FIXME
+ return retval;
+}
+
+int MPI_Bcast(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) {
+ int retval;
+
+ smpi_bench_end(comm, "Bcast");
+ if(comm == MPI_COMM_NULL) {
+ retval = MPI_ERR_COMM;
+ } else {
+ smpi_mpi_bcast(buf, count, datatype, root, comm);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin(comm, "Bcast");
+ return retval;
+}
+
+int MPI_Barrier(MPI_Comm comm) {
+ int retval;
+
+ smpi_bench_end(comm, "Barrier");
+ if(comm == MPI_COMM_NULL) {
+ retval = MPI_ERR_COMM;
+ } else {
+ smpi_mpi_barrier(comm);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin(comm, "Barrier");
+ return retval;
+}
+
+int MPI_Gather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) {
+ int retval;
+
+ smpi_bench_end(comm, "Gather");
+ if(comm == MPI_COMM_NULL) {
+ retval = MPI_ERR_COMM;
+ } else if(sendtype == MPI_DATATYPE_NULL || recvtype == MPI_DATATYPE_NULL) {
+ retval = MPI_ERR_TYPE;
+ } else {
+ smpi_mpi_gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin(comm, "Gather");
+ return retval;
+}
+
+int MPI_Gatherv(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int* recvcounts, int* displs, MPI_Datatype recvtype, int root, MPI_Comm comm) {
+ int retval;
+
+ smpi_bench_end(comm, "Gatherv");
+ if(comm == MPI_COMM_NULL) {
+ retval = MPI_ERR_COMM;
+ } else if(sendtype == MPI_DATATYPE_NULL || recvtype == MPI_DATATYPE_NULL) {
+ retval = MPI_ERR_TYPE;
+ } else if(recvcounts == NULL || displs == NULL) {
+ retval = MPI_ERR_ARG;
+ } else {
+ smpi_mpi_gatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin(comm, "Gatherv");
+ return retval;
+}
+
+int MPI_Allgather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) {
+ int retval;
+
+ smpi_bench_end(comm, "Allgather");
+ if(comm == MPI_COMM_NULL) {
+ retval = MPI_ERR_COMM;
+ } else if(sendtype == MPI_DATATYPE_NULL || recvtype == MPI_DATATYPE_NULL) {
+ retval = MPI_ERR_TYPE;
+ } else {
+ smpi_mpi_allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin(comm, "Allgather");
+ return retval;
+}
+
+int MPI_Allgatherv(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int* recvcounts, int* displs, MPI_Datatype recvtype, MPI_Comm comm) {
+ int retval;
+
+ smpi_bench_end(comm, "Allgatherv");
+ if(comm == MPI_COMM_NULL) {
+ retval = MPI_ERR_COMM;
+ } else if(sendtype == MPI_DATATYPE_NULL || recvtype == MPI_DATATYPE_NULL) {
+ retval = MPI_ERR_TYPE;
+ } else if(recvcounts == NULL || displs == NULL) {
+ retval = MPI_ERR_ARG;
+ } else {
+ smpi_mpi_allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin(comm, "Allgatherv");
+ return retval;
+}
+
+int MPI_Scatter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) {
+ int retval;
+
+ smpi_bench_end(comm, "Scatter");
+ if(comm == MPI_COMM_NULL) {
+ retval = MPI_ERR_COMM;
+ } else if(sendtype == MPI_DATATYPE_NULL || recvtype == MPI_DATATYPE_NULL) {
+ retval = MPI_ERR_TYPE;
+ } else {
+ smpi_mpi_scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin(comm, "Scatter");
+ return retval;
+}
+
+int MPI_Scatterv(void* sendbuf, int* sendcounts, int* displs, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) {
+ int retval;
+
+ smpi_bench_end(comm, "Scatterv");
+ if(comm == MPI_COMM_NULL) {
+ retval = MPI_ERR_COMM;
+ } else if(sendtype == MPI_DATATYPE_NULL || recvtype == MPI_DATATYPE_NULL) {
+ retval = MPI_ERR_TYPE;
+ } else if(sendcounts == NULL || displs == NULL) {
+ retval = MPI_ERR_ARG;
+ } else {
+ smpi_mpi_scatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin(comm, "Scatterv");
+ return retval;
+}
+
+int MPI_Reduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) {
+ int retval;
+
+ smpi_bench_end(comm, "Reduce");
+ if(comm == MPI_COMM_NULL) {
+ retval = MPI_ERR_COMM;
+ } else if(datatype == MPI_DATATYPE_NULL || op == MPI_OP_NULL) {
+ retval = MPI_ERR_ARG;
+ } else {
+ smpi_mpi_reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin(comm, "Reduce");
+ return retval;
+}
+
+int MPI_Allreduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) {
+ int retval;
+
+ smpi_bench_end(comm, "Allreduce");
+ if(comm == MPI_COMM_NULL) {
+ retval = MPI_ERR_COMM;
+ } else if(datatype == MPI_DATATYPE_NULL) {
+ retval = MPI_ERR_TYPE;
+ } else if(op == MPI_OP_NULL) {
+ retval = MPI_ERR_OP;
+ } else {
+ smpi_mpi_allreduce(sendbuf, recvbuf, count, datatype, op, comm);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin(comm, "Allreduce");
+ return retval;
+}
+
+int MPI_Reduce_scatter(void* sendbuf, void* recvbuf, int* recvcounts, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) {
+ int retval, i, rank, size, count;
+ int* displs;
+
+ smpi_bench_end(comm, "Reduce_scatter");
+ if(comm == MPI_COMM_NULL) {
+ retval = MPI_ERR_COMM;
+ } else if(datatype == MPI_DATATYPE_NULL) {
+ retval = MPI_ERR_TYPE;
+ } else if(op == MPI_OP_NULL) {
+ retval = MPI_ERR_OP;
+ } else if(recvcounts == NULL) {
+ retval = MPI_ERR_ARG;
+ } else {
+ /* arbitrarily choose root as rank 0 */
+ /* TODO: faster direct implementation ? */
+ rank = smpi_comm_rank(comm);
+ size = smpi_comm_size(comm);
+ count = 0;
+ displs = xbt_new(int, size);
+ for(i = 0; i < size; i++) {
+ count += recvcounts[i];
+ displs[i] = 0;
+ }
+ smpi_mpi_reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
+ smpi_mpi_scatterv(recvbuf, recvcounts, displs, datatype, recvbuf, recvcounts[rank], datatype, 0, comm);
+ xbt_free(displs);
+ retval = MPI_SUCCESS;
+ }
+ smpi_bench_begin(comm, "Reduce_scatter");
+ return retval;
+}
+
+/**
+ * MPI_Alltoall user entry point
+ *
+ * Uses the logic of OpenMPI (upto 1.2.7 or greater) for the optimizations
+ * ompi/mca/coll/tuned/coll_tuned_module.c
+ **/
+
+int MPI_Alltoall(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) {
+ int retval, size, sendsize;
+
+ smpi_bench_end(comm, "Alltoall");
+ if(comm == MPI_COMM_NULL) {
+ retval = MPI_ERR_COMM;
+ } else if(sendtype == MPI_DATATYPE_NULL || recvtype == MPI_DATATYPE_NULL) {
+ retval = MPI_ERR_TYPE;
+ } else {
+ size = smpi_comm_size(comm);
+ sendsize = smpi_datatype_size(sendtype) * sendcount;
+ if(sendsize < 200 && size > 12) {
+ retval = smpi_coll_tuned_alltoall_bruck(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
+ } else if(sendsize < 3000) {
+ retval = smpi_coll_tuned_alltoall_basic_linear(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
+ } else {
+ retval = smpi_coll_tuned_alltoall_pairwise(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
+ }
+ }
+ smpi_bench_begin(comm, "Alltoall");
+ return retval;
+}