class Comm;
class Group;
+class Op;
class Request;
class Topo;
class Win;
typedef simgrid::smpi::Comm SMPI_Comm;
typedef simgrid::smpi::Group SMPI_Group;
+typedef simgrid::smpi::Op SMPI_Op;
typedef simgrid::smpi::Request SMPI_Request;
typedef simgrid::smpi::Topo SMPI_Topology;
typedef simgrid::smpi::Win SMPI_Win;
typedef struct SMPI_Comm SMPI_Comm;
typedef struct SMPI_Group SMPI_Group;
+typedef struct SMPI_Op SMPI_Op;
typedef struct SMPI_Request SMPI_Request;
typedef struct SMPI_Topology SMPI_Topology;
typedef struct SMPI_Win SMPI_Win;
#define MPI_2DOUBLE_PRECISION MPI_2DOUBLE
typedef void MPI_User_function(void *invec, void *inoutvec, int *len, MPI_Datatype * datatype);
-struct s_smpi_mpi_op;
-typedef struct s_smpi_mpi_op *MPI_Op;
+typedef SMPI_Op *MPI_Op;
#define MPI_OP_NULL ((MPI_Op)NULL)
XBT_PUBLIC_DATA( MPI_Op ) MPI_MAX;
((rank + size - 1) % size), tag + i, comm, &status);
// compute result to rbuf+recv_offset
- smpi_op_apply(op, (char *) sbuf + recv_offset, (char *) rbuf + recv_offset,
+ if(op!=MPI_OP_NULL) op->apply( (char *) sbuf + recv_offset, (char *) rbuf + recv_offset,
&count, &dtype);
}
int comm_size = comm->size();
int rank = comm->rank();
- is_commutative = smpi_op_is_commute(op);
+ is_commutative = (op==MPI_OP_NULL || op->is_commutative());
/* need to allocate temporary buffer to store incoming data */
smpi_datatype_extent(datatype, &true_lb, &true_extent);
/* do the reduction on received data. since the
ordering is right, it doesn't matter whether
the operation is commutative or not. */
- smpi_op_apply(op, tmp_buf, recvbuf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, recvbuf, &count, &datatype);
/* change the rank */
newrank = rank / 2;
}
if (is_commutative || (dst < rank)) {
/* op is commutative OR the order is already right */
- smpi_op_apply(op, tmp_buf, recvbuf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, recvbuf, &count, &datatype);
} else {
/* op is noncommutative and the order is not right */
- smpi_op_apply(op, recvbuf, tmp_buf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( recvbuf, tmp_buf, &count, &datatype);
/* copy result back into recvbuf */
mpi_errno = smpi_datatype_copy(tmp_buf, count, datatype,
recvbuf, count, datatype);
/* This algorithm is used only for predefined ops
and predefined ops are always commutative. */
- smpi_op_apply(op, (char *) tmp_buf + disps[recv_idx] * extent,
+ if(op!=MPI_OP_NULL) op->apply( (char *) tmp_buf + disps[recv_idx] * extent,
(char *) recvbuf + disps[recv_idx] * extent,
&recv_cnt, &datatype);
(phase * early_phase_segcount) :
(phase * late_phase_segcount + split_phase));
tmprecv = ((char*)rbuf) + (block_offset + phase_offset) * extent;
- smpi_op_apply(op, inbuf[inbi ^ 0x1], tmprecv, &phase_count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( inbuf[inbi ^ 0x1], tmprecv, &phase_count, &dtype);
/* send previous block to send_to */
Request::send(tmprecv, phase_count, dtype, send_to,
666, comm);
(phase * early_phase_segcount) :
(phase * late_phase_segcount + split_phase));
tmprecv = ((char*)rbuf) + (block_offset + phase_offset) * extent;
- smpi_op_apply(op, inbuf[inbi], tmprecv, &phase_count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( inbuf[inbi], tmprecv, &phase_count, &dtype);
}
/* Distribution loop - variation of ring allgather */
// do the reduction on received data. since the
// ordering is right, it doesn't matter whether
// the operation is commutative or not.
- smpi_op_apply(op, tmp_buf, rbuff, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, rbuff, &count, &dtype);
// change the rank
newrank = rank / 2;
// This algorithm is used only for predefined ops
// and predefined ops are always commutative.
- smpi_op_apply(op, (char *) tmp_buf + disps[recv_idx] * extent,
+ if(op!=MPI_OP_NULL) op->apply( (char *) tmp_buf + disps[recv_idx] * extent,
(char *) rbuff + disps[recv_idx] * extent, &recv_cnt, &dtype);
// update send_idx for next iteration
Request::sendrecv((char *) recv + send_idx * extent, send_cnt, dtype, dst, tag,
tmp_buf, recv_cnt, dtype, dst, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, (char *) recv + recv_idx * extent, &recv_cnt,
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, (char *) recv + recv_idx * extent, &recv_cnt,
&dtype);
// update send_idx for next iteration
Request::sendrecv((char *) rbuff + send_idx * extent, send_cnt, dtype, dst,
tag, tmp_buf, recv_cnt, dtype, dst, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, (char *) rbuff + recv_idx * extent, &recv_cnt,
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, (char *) rbuff + recv_idx * extent, &recv_cnt,
&dtype);
// update send_idx for next iteration
memcpy(tmp, recv, nbytes);
for (i = 1, s_offset = nbytes; i < nprocs; i++, s_offset = i * nbytes)
- smpi_op_apply(op, (char *) recv + s_offset, tmp, &send_size, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( (char *) recv + s_offset, tmp, &send_size, &dtype);
mpi_coll_allgather_fun(tmp, send_size, dtype, recv, send_size, dtype, comm);
memcpy(rbuff, recv, count * s_extent);
memcpy((char *) rbuff + r_offset, recv, nbytes);
for (i = 1, s_offset = nbytes; i < nprocs; i++, s_offset = i * nbytes)
- smpi_op_apply(op, (char *) recv + s_offset, (char *) rbuff + r_offset,
+ if(op!=MPI_OP_NULL) op->apply( (char *) recv + s_offset, (char *) rbuff + r_offset,
&send_size, &dtype);
mpi_coll_allgather_fun((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size,
// do the reduction on received data. since the
// ordering is right, it doesn't matter whether
// the operation is commutative or not.
- smpi_op_apply(op, tmp_buf, rbuff, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, rbuff, &count, &dtype);
// change the rank
newrank = rank / 2;
// we assume it is commuttive op
// if (op -> op_commute || (dst < rank))
if ((dst < rank)) {
- smpi_op_apply(op, tmp_buf, rbuff, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, rbuff, &count, &dtype);
} else // op is noncommutative and the order is not right
{
- smpi_op_apply(op, rbuff, tmp_buf, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( rbuff, tmp_buf, &count, &dtype);
// copy result back into recvbuf
Request::sendrecv(tmp_buf, count, dtype, rank, tag, rbuff, count,
if (src < comm_size) {
recv_offset = phase * pcount * extent;
Request::recv(tmp_buf, pcount, dtype, src, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype);
}
} else {
send_offset = phase * pcount * extent;
if (src < comm_size) {
recv_offset = (phase - 1) * pcount * extent;
Request::recv(tmp_buf, pcount, dtype, src, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype);
}
} else {
dst = (inter_rank & (~mask)) * num_core;
src = (inter_rank * num_core) + (intra_rank | mask);
if (src < comm_size) {
Request::recv(tmp_buf, count, dtype, src, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, recv_buf, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, recv_buf, &count, &dtype);
}
} else {
dst = (inter_rank * num_core) + (intra_rank & (~mask));
src = (inter_rank | mask) * num_core;
if (src < comm_size) {
Request::recv(tmp_buf, count, dtype, src, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, recv_buf, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, recv_buf, &count, &dtype);
}
} else {
dst = (inter_rank & (~mask)) * num_core;
src = (inter_rank * num_core) + (intra_rank | mask);
if (src < comm_size) {
Request::recv(tmp_buf, count, dtype, src, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, recv_buf, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, recv_buf, &count, &dtype);
}
} else {
dst = (inter_rank * num_core) + (intra_rank & (~mask));
} else {
src = rank - num_core;
Request::recv(tmp_buf, count, dtype, src, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, recv_buf, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, recv_buf, &count, &dtype);
newrank = inter_rank / 2;
}
} else {
/* exchange data in rdb manner */
Request::sendrecv(recv_buf, count, dtype, dst, tag, tmp_buf, count, dtype,
dst, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, recv_buf, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, recv_buf, &count, &dtype);
mask <<= 1;
}
}
// if (src < ((inter_rank + 1) * num_core)) {
if (src < comm_size) {
Request::recv(tmp_buf, count, dtype, src, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, recv_buf, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, recv_buf, &count, &dtype);
//printf("Node %d recv from node %d when mask is %d\n", rank, src, mask);
}
} else {
&status);
// result is in rbuf
- smpi_op_apply(op, tmp_buf, (char *) recv_buf + recv_offset, &recv_count,
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, (char *) recv_buf + recv_offset, &recv_count,
&dtype);
}
// if (src < ((inter_rank + 1) * num_core)) {
if (src < comm_size) {
Request::recv(tmp_buf, count, dtype, src, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, rbuf, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, rbuf, &count, &dtype);
//printf("Node %d recv from node %d when mask is %d\n", rank, src, mask);
}
} else {
tmp_buf, curr_count, dtype, (dst * num_core), tag,
comm, &status);
- smpi_op_apply(op, tmp_buf, (char *)rbuf + recv_offset, &curr_count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, (char *)rbuf + recv_offset, &curr_count, &dtype);
mask *= 2;
curr_count /= 2;
// if (src < ((inter_rank + 1) * num_core)) {
if (src < comm_size) {
Request::recv(tmp_buf, count, dtype, src, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, recv_buf, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, recv_buf, &count, &dtype);
//printf("Node %d recv from node %d when mask is %d\n", rank, src, mask);
}
} else {
&status);
// result is in rbuf
- smpi_op_apply(op, tmp_buf, (char *) recv_buf + recv_offset, &seg_count,
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, (char *) recv_buf + recv_offset, &seg_count,
&dtype);
}
if (count <= segment) {
if (rank == root) {
Request::recv(tmp_buf, count, datatype, from, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, rbuf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, rbuf, &count, &datatype);
} else if (rank == ((root - 1 + size) % size)) {
Request::send(rbuf, count, datatype, to, tag, comm);
} else {
Request::recv(tmp_buf, count, datatype, from, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, rbuf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, rbuf, &count, &datatype);
Request::send(rbuf, count, datatype, to, tag, comm);
}
smpi_free_tmp_buffer(tmp_buf);
}
for (i = 0; i < pipe_length; i++) {
Request::wait(&recv_request_array[i], &status);
- smpi_op_apply(op, tmp_buf + (i * increment), (char *)rbuf + (i * increment),
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf + (i * increment), (char *)rbuf + (i * increment),
&segment, &datatype);
}
}
}
for (i = 0; i < pipe_length; i++) {
Request::wait(&recv_request_array[i], &status);
- smpi_op_apply(op, tmp_buf + (i * increment), (char *)rbuf + (i * increment),
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf + (i * increment), (char *)rbuf + (i * increment),
&segment, &datatype);
send_request_array[i] = Request::isend((char *) rbuf + (i * increment), segment, datatype, to,
(tag + i), comm);
Request::send(header_buf, HEADER_SIZE, MPI_INT, to, tag, comm);
Request::recv(tmp_buf, count, datatype, from, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, rbuf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, rbuf, &count, &datatype);
}
} /* while loop */
}
}
from = header_buf[myordering - 1];
Request::recv(tmp_buf, count, datatype, from, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, rbuf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, rbuf, &count, &datatype);
Request::send(rbuf, count, datatype, to, tag, comm);
}
} /* non-root */
for (i = 0; i < pipe_length; i++) {
Request::recv(tmp_buf + (i * increment), segment, datatype, from, tag,
comm, &status);
- smpi_op_apply(op, tmp_buf + (i * increment),
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf + (i * increment),
(char *)rbuf + (i * increment), &segment, &datatype);
}
}
}
for (i = 0; i < pipe_length; i++) {
Request::wait(&recv_request_array[i], MPI_STATUS_IGNORE);
- smpi_op_apply(op, tmp_buf + (i * increment), (char *)rbuf + (i * increment),
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf + (i * increment), (char *)rbuf + (i * increment),
&segment, &datatype);
send_request_array[i]=Request::isend((char *)rbuf + (i * increment), segment, datatype, to, tag, comm);
}
extent = smpi_datatype_get_extent(datatype);
tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent);
- int is_commutative = smpi_op_is_commute(op);
+ int is_commutative = (op==MPI_OP_NULL || op->is_commutative());
mask = 1;
int lroot;
Request::recv(tmp_buf, count, datatype, source, tag, comm, &status);
if (is_commutative) {
- smpi_op_apply(op, tmp_buf, recvbuf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, recvbuf, &count, &datatype);
} else {
- smpi_op_apply(op, recvbuf, tmp_buf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( recvbuf, tmp_buf, &count, &datatype);
smpi_datatype_copy(tmp_buf, count, datatype,recvbuf, count, datatype);
}
}
}
/* Call reduction function. */
- smpi_op_apply(op, inbuf, rbuf, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( inbuf, rbuf, &count, &dtype);
}
smpi_datatype_extent(datatype, &true_lb, &true_extent);
extent = smpi_datatype_get_extent(datatype);
- is_commutative = smpi_op_is_commute(op);
+ is_commutative = (op==MPI_OP_NULL || op->is_commutative());
if (rank != root) {
recvbuf=(void *)smpi_get_tmp_recvbuffer(count*(MAX(extent,true_extent)));
recv_iter++;
if (is_commutative) {
- smpi_op_apply(op, tmp_buf[index], recvbuf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf[index], recvbuf, &count, &datatype);
}
}
leader_of_root = comm->group()->rank(leaders_map[root]);
leader_root = leader_comm->group()->rank(leaders_map[root]);
- is_commutative=smpi_op_is_commute(op);
+ is_commutative= (op==MPI_OP_NULL || op->is_commutative());
smpi_datatype_extent(datatype, &true_lb,
&true_extent);
/* If this is a non-commutative operation we must copy
sendbuf to the accumbuf, in order to simplfy the loops */
- if (!smpi_op_is_commute(op)) {
+ if ( (op!=MPI_OP_NULL && !op->is_commutative())) {
smpi_datatype_copy(
(char*)sendtmpbuf, original_count, datatype,
(char*)accumbuf, original_count, datatype);
* BUT if the operation is non-commutative or
* we are root and are USING MPI_IN_PLACE this is wrong!
*/
- if( (smpi_op_is_commute(op)) &&
+ if( (op==MPI_OP_NULL || op->is_commutative()) &&
!((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
local_recvbuf = accumbuf + segindex * segment_increment;
}
* not using MPI_IN_PLACE)
*/
if( 1 == i ) {
- if( (smpi_op_is_commute(op)) &&
+ if( (op==MPI_OP_NULL || op->is_commutative())&&
!((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
local_op_buffer = sendtmpbuf + segindex * segment_increment;
}
}
/* apply operation */
- smpi_op_apply(op, local_op_buffer,
+ if(op!=MPI_OP_NULL) op->apply( local_op_buffer,
accumbuf + segindex * segment_increment,
&recvcount, &datatype );
} else if ( segindex > 0 ) {
void* accumulator = accumbuf + (segindex-1) * segment_increment;
if( tree->tree_nextsize <= 1 ) {
- if( (smpi_op_is_commute(op)) &&
+ if( (op==MPI_OP_NULL || op->is_commutative()) &&
!((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
local_op_buffer = sendtmpbuf + (segindex-1) * segment_increment;
}
}
- smpi_op_apply(op, local_op_buffer, accumulator, &prevcount,
+ if(op!=MPI_OP_NULL) op->apply( local_op_buffer, accumulator, &prevcount,
&datatype );
/* all reduced on available data this step (i) complete,
}
/* Perform the reduction */
- smpi_op_apply(op, inbuf, rbuf, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( inbuf, rbuf, &count, &dtype);
}
if (NULL != inplace_temp) {
newrank = -1;
} else {
Request::recv(tmp_buf, count, datatype, rank + 1, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, recv_ptr, &new_count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, recv_ptr, &new_count, &datatype);
newrank = rank / 2;
}
} else /* rank >= 2*rem */
/* tmp_buf contains data received in this step.
recvbuf contains data accumulated so far */
- smpi_op_apply(op, (char *) tmp_buf + disps[recv_idx] * extent,
+ if(op!=MPI_OP_NULL) op->apply( (char *) tmp_buf + disps[recv_idx] * extent,
(char *) recv_ptr + disps[recv_idx] * extent,
&recv_cnt, &datatype);
else {
Request::recv(tmp_buf, count, datatype, rank + 1, tag, comm, &status);
- smpi_op_apply(op, tmp_buf, recvbuf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( tmp_buf, recvbuf, &count, &datatype);
newrank = rank / 2;
}
} else /* rank >= 2*rem */
/* tmp_buf contains data received in this step.
recvbuf contains data accumulated so far */
- smpi_op_apply(op, (char *) tmp_buf + disps[recv_idx] * extent,
+ if(op!=MPI_OP_NULL) op->apply( (char *) tmp_buf + disps[recv_idx] * extent,
(char *) recvbuf + disps[recv_idx] * extent,
&recv_cnt, &datatype);
extent =smpi_datatype_get_extent(datatype);
smpi_datatype_extent(datatype, &true_lb, &true_extent);
- if (smpi_op_is_commute(op)) {
+ if (op->is_commutative()) {
is_commutative = 1;
}
if (is_commutative || (src < rank)) {
if (sendbuf != MPI_IN_PLACE) {
- smpi_op_apply( op,
+ if(op!=MPI_OP_NULL) op->apply(
tmp_recvbuf, recvbuf, &recvcounts[rank],
&datatype);
}
else {
- smpi_op_apply(op,
+ if(op!=MPI_OP_NULL) op->apply(
tmp_recvbuf, ((char *)recvbuf+disps[rank]*extent),
&recvcounts[rank], &datatype);
/* we can't store the result at the beginning of
}
else {
if (sendbuf != MPI_IN_PLACE) {
- smpi_op_apply(op,
+ if(op!=MPI_OP_NULL) op->apply(
recvbuf, tmp_recvbuf, &recvcounts[rank], &datatype);
/* copy result back into recvbuf */
mpi_errno = smpi_datatype_copy(tmp_recvbuf, recvcounts[rank],
if (mpi_errno) return(mpi_errno);
}
else {
- smpi_op_apply(op,
+ if(op!=MPI_OP_NULL) op->apply(
((char *)recvbuf+disps[rank]*extent),
tmp_recvbuf, &recvcounts[rank], &datatype);
/* copy result back into recvbuf */
is now our peer's responsibility */
if (rank > peer) {
/* higher ranked value so need to call op(received_data, my_data) */
- smpi_op_apply(op,
+ if(op!=MPI_OP_NULL) op->apply(
incoming_data + recv_offset*true_extent,
outgoing_data + recv_offset*true_extent,
&size, &datatype );
}
else {
/* lower ranked value so need to call op(my_data, received_data) */
- smpi_op_apply( op,
+ if(op!=MPI_OP_NULL) op->apply(
outgoing_data + recv_offset*true_extent,
incoming_data + recv_offset*true_extent,
&size, &datatype);
extent =smpi_datatype_get_extent(datatype);
smpi_datatype_extent(datatype, &true_lb, &true_extent);
- if (smpi_op_is_commute(op)) {
+ if ((op==MPI_OP_NULL) || op->is_commutative()) {
is_commutative = 1;
}
if (received) {
if (is_commutative || (dst_tree_root < my_tree_root)) {
{
- smpi_op_apply(op,
+ if(op!=MPI_OP_NULL) op->apply(
tmp_recvbuf, tmp_results, &blklens[0],
&datatype);
- smpi_op_apply(op,
+ if(op!=MPI_OP_NULL) op->apply(
((char *)tmp_recvbuf + dis[1]*extent),
((char *)tmp_results + dis[1]*extent),
&blklens[1], &datatype);
}
else {
{
- smpi_op_apply(op,
+ if(op!=MPI_OP_NULL) op->apply(
tmp_results, tmp_recvbuf, &blklens[0],
&datatype);
- smpi_op_apply(op,
+ if(op!=MPI_OP_NULL) op->apply(
((char *)tmp_results + dis[1]*extent),
((char *)tmp_recvbuf + dis[1]*extent),
&blklens[1], &datatype);
size = comm->size();
XBT_DEBUG("coll:tuned:reduce_scatter_ompi_basic_recursivehalving, rank %d", rank);
- if(!smpi_op_is_commute(op))
+ if( (op!=MPI_OP_NULL && !op->is_commutative()))
THROWF(arg_error,0, " reduce_scatter ompi_basic_recursivehalving can only be used for commutative operations! ");
/* Find displacements and the like */
comm, MPI_STATUS_IGNORE);
/* integrate their results into our temp results */
- smpi_op_apply(op, recv_buf, result_buf, &count, &dtype);
+ if(op!=MPI_OP_NULL) op->apply( recv_buf, result_buf, &count, &dtype);
/* adjust rank to be the bottom "remain" ranks */
tmp_rank = rank / 2;
/* if we received something on this step, push it into
the results buffer */
if (recv_count > 0) {
- smpi_op_apply(op,
+ if(op!=MPI_OP_NULL) op->apply(
recv_buf + (ptrdiff_t)tmp_disps[recv_index] * extent,
result_buf + (ptrdiff_t)tmp_disps[recv_index] * extent,
&recv_count, &dtype);
rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock]
*/
tmprecv = accumbuf + (ptrdiff_t)displs[prevblock] * extent;
- smpi_op_apply(op, inbuf[inbi ^ 0x1], tmprecv, &(rcounts[prevblock]), &dtype);
+ if(op!=MPI_OP_NULL) op->apply( inbuf[inbi ^ 0x1], tmprecv, &(rcounts[prevblock]), &dtype);
/* send previous block to send_to */
Request::send(tmprecv, rcounts[prevblock], dtype, send_to,
/* Apply operation on the last block (my block)
rbuf[rank] = inbuf[inbi] (op) rbuf[rank] */
tmprecv = accumbuf + (ptrdiff_t)displs[rank] * extent;
- smpi_op_apply(op, inbuf[inbi], tmprecv, &(rcounts[rank]), &dtype);
+ if(op!=MPI_OP_NULL) op->apply( inbuf[inbi], tmprecv, &(rcounts[rank]), &dtype);
/* Copy result from tmprecv to rbuf */
ret = smpi_datatype_copy(tmprecv, rcounts[rank], dtype, (char*)rbuf, rcounts[rank], dtype);
MPI_Op op,
MPI_Comm comm)
{
- if(smpi_op_is_commute(op))
+ if(op==MPI_OP_NULL || op->is_commutative())
return smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(sbuf, rbuf, rcounts,dtype, op,comm);
else
return smpi_coll_tuned_reduce_scatter_mvapich2(sbuf, rbuf, rcounts,dtype, op,comm);
while (pof2 <= comm_size) pof2 <<= 1;
pof2 >>=1;
- if (block_dsize > large_message && count >= pof2 && smpi_op_is_commute(op)) {
+ if (block_dsize > large_message && count >= pof2 && (op==MPI_OP_NULL || op->is_commutative())) {
//for long messages
return (smpi_coll_tuned_allreduce_rab_rdb (sbuf, rbuf,
count, dtype,
pof2 >>= 1;
- if ((count < pof2) || (message_size < 2048) || !smpi_op_is_commute(op)) {
+ if ((count < pof2) || (message_size < 2048) || (op!=MPI_OP_NULL && !op->is_commutative())) {
return smpi_coll_tuned_reduce_binomial (sendbuf, recvbuf, count, datatype, op, root, comm);
}
return smpi_coll_tuned_reduce_scatter_gather(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
total_message_size += rcounts[i];
}
- if( smpi_op_is_commute(op) && total_message_size > 524288) {
+ if( (op==MPI_OP_NULL || op->is_commutative()) && total_message_size > 524288) {
return smpi_coll_tuned_reduce_scatter_mpich_pair (sbuf, rbuf, rcounts,
dtype, op,
comm);
- }else if (!smpi_op_is_commute(op)) {
+ }else if ((op!=MPI_OP_NULL && !op->is_commutative())) {
int is_block_regular = 1;
for (i = 0; i < (comm_size - 1); ++i) {
if (rcounts[i] != rcounts[i+1]) {
smpi_datatype_extent(datatype, &true_lb, &true_extent);
//MPI_Op *op_ptr;
- //is_commutative = smpi_op_is_commute(op);
+ //is_commutative = op->is_commutative();
{
/* Search for the corresponding system size inside the tuning table */
if (count == 0)
return MPI_SUCCESS;
- is_commutative = smpi_op_is_commute(op);
+ is_commutative = (op==MPI_OP_NULL || op->is_commutative());
/* find nearest power-of-two less than or equal to comm_size */
for( pof2 = 1; pof2 <= comm_size; pof2 <<= 1 );
if(mv2_red_scat_thresholds_table==NULL)
init_mv2_reduce_scatter_tables_stampede();
- is_commutative=smpi_op_is_commute(op);
+ is_commutative=(op==MPI_OP_NULL || op->is_commutative());
for (i = 0; i < comm_size; i++) {
disps[i] = total_count;
total_count += recvcnts[i];
op, comm));
}
- if( smpi_op_is_commute(op) && (count > comm_size) ) {
+ if( ((op==MPI_OP_NULL) || op->is_commutative()) && (count > comm_size) ) {
const size_t segment_size = 1 << 20; /* 1 MB */
if ((comm_size * segment_size >= block_dsize)) {
//FIXME: ok, these are not the right algorithms, try to find closer ones
* If the operation is non commutative we currently have choice of linear
* or in-order binary tree algorithm.
*/
- if( !smpi_op_is_commute(op) ) {
+ if( (op!=MPI_OP_NULL) && !op->is_commutative() ) {
if ((communicator_size < 12) && (message_size < 2048)) {
return smpi_coll_tuned_reduce_ompi_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm/*, module*/);
}
}
}
- if( !smpi_op_is_commute(op) || (zerocounts)) {
+ if( ((op!=MPI_OP_NULL) && !op->is_commutative()) || (zerocounts)) {
smpi_mpi_reduce_scatter (sbuf, rbuf, rcounts,
dtype, op,
comm);
#include "xbt/xbt_os_time.h"
#include "src/smpi/smpi_group.hpp"
#include "src/smpi/smpi_comm.hpp"
+#include "src/smpi/smpi_op.hpp"
#include "src/smpi/smpi_request.hpp"
#include "src/smpi/smpi_topo.hpp"
#include "src/smpi/smpi_win.hpp"
XBT_PRIVATE void smpi_empty_status(MPI_Status * status);
XBT_PRIVATE int smpi_mpi_get_count(MPI_Status * status, MPI_Datatype datatype);
-XBT_PRIVATE MPI_Op smpi_op_new(MPI_User_function * function, bool commute);
-XBT_PRIVATE bool smpi_op_is_commute(MPI_Op op);
-XBT_PRIVATE void smpi_op_destroy(MPI_Op op);
-XBT_PRIVATE void smpi_op_set_fortran(MPI_Op op);
-XBT_PRIVATE void smpi_op_apply(MPI_Op op, void *invec, void *inoutvec, int *len, MPI_Datatype * datatype);
-
XBT_PRIVATE int smpi_comm_c2f(MPI_Comm comm);
XBT_PRIVATE int smpi_comm_add_f(MPI_Comm comm);
int rank = comm->rank();
int size = comm->size();
//non commutative case, use a working algo from openmpi
- if(!smpi_op_is_commute(op)){
+ if(op != MPI_OP_NULL && !op->is_commutative()){
smpi_coll_tuned_reduce_ompi_basic_linear(sendtmpbuf, recvbuf, count, datatype, op, root, comm);
return;
}
Request::unuse(&requests[index]);
}
if(op) /* op can be MPI_OP_NULL that does nothing */
- smpi_op_apply(op, tmpbufs[index], recvbuf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( tmpbufs[index], recvbuf, &count, &datatype);
}
for(index = 0; index < size - 1; index++) {
smpi_free_tmp_buffer(tmpbufs[index]);
// Wait for completion of all comms.
Request::startall(size - 1, requests);
- if(smpi_op_is_commute(op)){
+ if(op != MPI_OP_NULL && op->is_commutative()){
for (int other = 0; other < size - 1; other++) {
index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE);
if(index == MPI_UNDEFINED) {
}
if(index < rank) {
// #Request is below rank: it's a irecv
- smpi_op_apply(op, tmpbufs[index], recvbuf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( tmpbufs[index], recvbuf, &count, &datatype);
}
}
}else{
for (int other = 0; other < size - 1; other++) {
Request::wait(&(requests[other]), MPI_STATUS_IGNORE);
if(index < rank) {
- smpi_op_apply(op, tmpbufs[other], recvbuf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( tmpbufs[other], recvbuf, &count, &datatype);
}
}
}
// Wait for completion of all comms.
Request::startall(size - 1, requests);
- if(smpi_op_is_commute(op)){
+ if(op != MPI_OP_NULL && op->is_commutative()){
for (int other = 0; other < size - 1; other++) {
index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE);
if(index == MPI_UNDEFINED) {
recvbuf_is_empty=0;
} else
// #Request is below rank: it's a irecv
- smpi_op_apply(op, tmpbufs[index], recvbuf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( tmpbufs[index], recvbuf, &count, &datatype);
}
}
}else{
smpi_datatype_copy(tmpbufs[other], count, datatype, recvbuf, count, datatype);
recvbuf_is_empty = 0;
} else
- smpi_op_apply(op, tmpbufs[other], recvbuf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( tmpbufs[other], recvbuf, &count, &datatype);
}
}
}
MPI_Op tmp;
*ierr = MPI_Op_create(reinterpret_cast<MPI_User_function*>(function),*commute, &tmp);
if(*ierr == MPI_SUCCESS) {
- smpi_op_set_fortran(tmp);
+ tmp->set_fortran_op();
*op = smpi_op_add_f(tmp);
}
}
}; \
const MPI_Datatype name = &mpi_##name;
-//The following are datatypes for the MPI functions MPI_MAXLOC and MPI_MINLOC.
-typedef struct {
- float value;
- int index;
-} float_int;
-typedef struct {
- float value;
- float index;
-} float_float;
-typedef struct {
- long value;
- long index;
-} long_long;
-typedef struct {
- double value;
- double index;
-} double_double;
-typedef struct {
- long value;
- int index;
-} long_int;
-typedef struct {
- double value;
- int index;
-} double_int;
-typedef struct {
- short value;
- int index;
-} short_int;
-typedef struct {
- int value;
- int index;
-} int_int;
-typedef struct {
- long double value;
- int index;
-} long_double_int;
-typedef struct {
- int64_t value;
- int64_t index;
-} integer128_t;
+
// Predefined data types
CREATE_MPI_DATATYPE(MPI_CHAR, char);
CREATE_MPI_DATATYPE(MPI_SHORT, short);
char* noncontiguous_vector_char = static_cast<char*>(noncontiguous_vector);
for (i = 0; i < type_c->block_count * count; i++) {
- if (type_c->old_type->sizeof_substruct == 0)
- smpi_op_apply(op, contiguous_vector_char, noncontiguous_vector_char, &type_c->block_length,
+ if (type_c->old_type->sizeof_substruct == 0){
+ if(op!=MPI_OP_NULL)
+ op->apply( contiguous_vector_char, noncontiguous_vector_char, &type_c->block_length,
&type_c->old_type);
- else
+ }else
static_cast<s_smpi_subtype_t*>(type_c->old_type->substruct)->unserialize(contiguous_vector_char, noncontiguous_vector_char,
type_c->block_length,type_c->old_type->substruct,
op);
char* contiguous_vector_char = static_cast<char*>(contiguous_vector);
char* noncontiguous_vector_char = static_cast<char*>(noncontiguous_vector)+type_c->lb;
int n= count* type_c->block_count;
- smpi_op_apply(op, contiguous_vector_char, noncontiguous_vector_char, &n, &type_c->old_type);
+ if(op!=MPI_OP_NULL)
+ op->apply( contiguous_vector_char, noncontiguous_vector_char, &n, &type_c->old_type);
}
void free_contiguous(MPI_Datatype* d){
char* noncontiguous_vector_char = static_cast<char*>(noncontiguous_vector);
for (i = 0; i < type_c->block_count * count; i++) {
- if (type_c->old_type->sizeof_substruct == 0)
- smpi_op_apply(op, contiguous_vector_char, noncontiguous_vector_char, &type_c->block_length, &type_c->old_type);
- else
+ if (type_c->old_type->sizeof_substruct == 0){
+ if(op!=MPI_OP_NULL)
+ op->apply( contiguous_vector_char, noncontiguous_vector_char, &type_c->block_length, &type_c->old_type);
+ }else
static_cast<s_smpi_subtype_t*>(type_c->old_type->substruct)->unserialize( contiguous_vector_char, noncontiguous_vector_char,
type_c->block_length, type_c->old_type->substruct,
op);
static_cast<char*>(noncontiguous_indexed)+type_c->block_indices[0]*smpi_datatype_get_extent(type_c->old_type);
for (int j = 0; j < count; j++) {
for (int i = 0; i < type_c->block_count; i++) {
- if (type_c->old_type->sizeof_substruct == 0)
- smpi_op_apply(op, contiguous_indexed_char, noncontiguous_indexed_char, &type_c->block_lengths[i],
+ if (type_c->old_type->sizeof_substruct == 0){
+ if(op!=MPI_OP_NULL)
+ op->apply( contiguous_indexed_char, noncontiguous_indexed_char, &type_c->block_lengths[i],
&type_c->old_type);
- else
+ }else
static_cast<s_smpi_subtype_t*>(type_c->old_type->substruct)->unserialize( contiguous_indexed_char,
noncontiguous_indexed_char,
type_c->block_lengths[i],
char* noncontiguous_hindexed_char = static_cast<char*>(noncontiguous_hindexed)+ type_c->block_indices[0];
for (int j = 0; j < count; j++) {
for (int i = 0; i < type_c->block_count; i++) {
- if (type_c->old_type->sizeof_substruct == 0)
- smpi_op_apply(op, contiguous_hindexed_char, noncontiguous_hindexed_char, &type_c->block_lengths[i],
+ if (type_c->old_type->sizeof_substruct == 0){
+ if(op!=MPI_OP_NULL)
+ op->apply( contiguous_hindexed_char, noncontiguous_hindexed_char, &type_c->block_lengths[i],
&type_c->old_type);
- else
+ }else
static_cast<s_smpi_subtype_t*>(type_c->old_type->substruct)->unserialize( contiguous_hindexed_char,
noncontiguous_hindexed_char,
type_c->block_lengths[i],
char* noncontiguous_struct_char = static_cast<char*>(noncontiguous_struct)+ type_c->block_indices[0];
for (int j = 0; j < count; j++) {
for (int i = 0; i < type_c->block_count; i++) {
- if (type_c->old_types[i]->sizeof_substruct == 0)
- smpi_op_apply(op, contiguous_struct_char, noncontiguous_struct_char, &type_c->block_lengths[i],
+ if (type_c->old_types[i]->sizeof_substruct == 0){
+ if(op!=MPI_OP_NULL)
+ op->apply( contiguous_struct_char, noncontiguous_struct_char, &type_c->block_lengths[i],
& type_c->old_types[i]);
- else
+ }else
static_cast<s_smpi_subtype_t*>(type_c->old_types[i]->substruct)->unserialize( contiguous_struct_char,
noncontiguous_struct_char,
type_c->block_lengths[i],
(*datatype)->flags= ((*datatype)->flags | DT_FLAG_COMMITED);
}
-typedef struct s_smpi_mpi_op {
- MPI_User_function *func;
- bool is_commute;
- bool is_fortran_op;
-} s_smpi_mpi_op_t;
-
-#define MAX_OP(a, b) (b) = (a) < (b) ? (b) : (a)
-#define MIN_OP(a, b) (b) = (a) < (b) ? (a) : (b)
-#define SUM_OP(a, b) (b) += (a)
-#define PROD_OP(a, b) (b) *= (a)
-#define LAND_OP(a, b) (b) = (a) && (b)
-#define LOR_OP(a, b) (b) = (a) || (b)
-#define LXOR_OP(a, b) (b) = (!(a) && (b)) || ((a) && !(b))
-#define BAND_OP(a, b) (b) &= (a)
-#define BOR_OP(a, b) (b) |= (a)
-#define BXOR_OP(a, b) (b) ^= (a)
-#define MAXLOC_OP(a, b) (b) = (a.value) < (b.value) ? (b) : (a)
-#define MINLOC_OP(a, b) (b) = (a.value) < (b.value) ? (a) : (b)
-
-#define APPLY_FUNC(a, b, length, type, func) \
-{ \
- int i; \
- type* x = (type*)(a); \
- type* y = (type*)(b); \
- for(i = 0; i < *(length); i++) { \
- func(x[i], y[i]); \
- } \
-}
-
-#define APPLY_OP_LOOP(dtype, type, op) \
- if (*datatype == dtype) {\
- APPLY_FUNC(a, b, length, type, op)\
- } else \
-
-
-#define APPLY_BASIC_OP_LOOP(op)\
-APPLY_OP_LOOP(MPI_CHAR, char,op)\
-APPLY_OP_LOOP(MPI_SHORT, short,op)\
-APPLY_OP_LOOP(MPI_INT, int,op)\
-APPLY_OP_LOOP(MPI_LONG, long,op)\
-APPLY_OP_LOOP(MPI_LONG_LONG, long long,op)\
-APPLY_OP_LOOP(MPI_SIGNED_CHAR, signed char,op)\
-APPLY_OP_LOOP(MPI_UNSIGNED_CHAR, unsigned char,op)\
-APPLY_OP_LOOP(MPI_UNSIGNED_SHORT, unsigned short,op)\
-APPLY_OP_LOOP(MPI_UNSIGNED, unsigned int,op)\
-APPLY_OP_LOOP(MPI_UNSIGNED_LONG, unsigned long,op)\
-APPLY_OP_LOOP(MPI_UNSIGNED_LONG_LONG, unsigned long long,op)\
-APPLY_OP_LOOP(MPI_WCHAR, wchar_t,op)\
-APPLY_OP_LOOP(MPI_BYTE, int8_t,op)\
-APPLY_OP_LOOP(MPI_INT8_T, int8_t,op)\
-APPLY_OP_LOOP(MPI_INT16_T, int16_t,op)\
-APPLY_OP_LOOP(MPI_INT32_T, int32_t,op)\
-APPLY_OP_LOOP(MPI_INT64_T, int64_t,op)\
-APPLY_OP_LOOP(MPI_UINT8_T, uint8_t,op)\
-APPLY_OP_LOOP(MPI_UINT16_T, uint16_t,op)\
-APPLY_OP_LOOP(MPI_UINT32_T, uint32_t,op)\
-APPLY_OP_LOOP(MPI_UINT64_T, uint64_t,op)\
-APPLY_OP_LOOP(MPI_AINT, MPI_Aint,op)\
-APPLY_OP_LOOP(MPI_OFFSET, MPI_Offset,op)\
-APPLY_OP_LOOP(MPI_INTEGER1, int,op)\
-APPLY_OP_LOOP(MPI_INTEGER2, int16_t,op)\
-APPLY_OP_LOOP(MPI_INTEGER4, int32_t,op)\
-APPLY_OP_LOOP(MPI_INTEGER8, int64_t,op)
-
-#define APPLY_BOOL_OP_LOOP(op)\
-APPLY_OP_LOOP(MPI_C_BOOL, bool,op)
-
-#define APPLY_FLOAT_OP_LOOP(op)\
-APPLY_OP_LOOP(MPI_FLOAT, float,op)\
-APPLY_OP_LOOP(MPI_DOUBLE, double,op)\
-APPLY_OP_LOOP(MPI_LONG_DOUBLE, long double,op)\
-APPLY_OP_LOOP(MPI_REAL, float,op)\
-APPLY_OP_LOOP(MPI_REAL4, float,op)\
-APPLY_OP_LOOP(MPI_REAL8, float,op)\
-APPLY_OP_LOOP(MPI_REAL16, double,op)
-
-#define APPLY_COMPLEX_OP_LOOP(op)\
-APPLY_OP_LOOP(MPI_C_FLOAT_COMPLEX, float _Complex,op)\
-APPLY_OP_LOOP(MPI_C_DOUBLE_COMPLEX, double _Complex,op)\
-APPLY_OP_LOOP(MPI_C_LONG_DOUBLE_COMPLEX, long double _Complex,op)
-
-#define APPLY_PAIR_OP_LOOP(op)\
-APPLY_OP_LOOP(MPI_FLOAT_INT, float_int,op)\
-APPLY_OP_LOOP(MPI_LONG_INT, long_int,op)\
-APPLY_OP_LOOP(MPI_DOUBLE_INT, double_int,op)\
-APPLY_OP_LOOP(MPI_SHORT_INT, short_int,op)\
-APPLY_OP_LOOP(MPI_2INT, int_int,op)\
-APPLY_OP_LOOP(MPI_2FLOAT, float_float,op)\
-APPLY_OP_LOOP(MPI_2DOUBLE, double_double,op)\
-APPLY_OP_LOOP(MPI_LONG_DOUBLE_INT, long_double_int,op)\
-APPLY_OP_LOOP(MPI_2LONG, long_long,op)
-
-#define APPLY_END_OP_LOOP(op)\
- {\
- xbt_die("Failed to apply " #op " to type %s", (*datatype)->name);\
- }
-
-
-static void max_func(void *a, void *b, int *length, MPI_Datatype * datatype)
-{
- APPLY_BASIC_OP_LOOP(MAX_OP)
- APPLY_FLOAT_OP_LOOP(MAX_OP)
- APPLY_END_OP_LOOP(MAX_OP)
-}
-
-static void min_func(void *a, void *b, int *length, MPI_Datatype * datatype)
-{
- APPLY_BASIC_OP_LOOP(MIN_OP)
- APPLY_FLOAT_OP_LOOP(MIN_OP)
- APPLY_END_OP_LOOP(MIN_OP)
-}
-
-static void sum_func(void *a, void *b, int *length, MPI_Datatype * datatype)
-{
- APPLY_BASIC_OP_LOOP(SUM_OP)
- APPLY_FLOAT_OP_LOOP(SUM_OP)
- APPLY_COMPLEX_OP_LOOP(SUM_OP)
- APPLY_END_OP_LOOP(SUM_OP)
-}
-
-static void prod_func(void *a, void *b, int *length, MPI_Datatype * datatype)
-{
- APPLY_BASIC_OP_LOOP(PROD_OP)
- APPLY_FLOAT_OP_LOOP(PROD_OP)
- APPLY_COMPLEX_OP_LOOP(PROD_OP)
- APPLY_END_OP_LOOP(PROD_OP)
-}
-
-static void land_func(void *a, void *b, int *length, MPI_Datatype * datatype)
-{
- APPLY_BASIC_OP_LOOP(LAND_OP)
- APPLY_BOOL_OP_LOOP(LAND_OP)
- APPLY_END_OP_LOOP(LAND_OP)
-}
-
-static void lor_func(void *a, void *b, int *length, MPI_Datatype * datatype)
-{
- APPLY_BASIC_OP_LOOP(LOR_OP)
- APPLY_BOOL_OP_LOOP(LOR_OP)
- APPLY_END_OP_LOOP(LOR_OP)
-}
-
-static void lxor_func(void *a, void *b, int *length, MPI_Datatype * datatype)
-{
- APPLY_BASIC_OP_LOOP(LXOR_OP)
- APPLY_BOOL_OP_LOOP(LXOR_OP)
- APPLY_END_OP_LOOP(LXOR_OP)
-}
-
-static void band_func(void *a, void *b, int *length, MPI_Datatype * datatype)
-{
- APPLY_BASIC_OP_LOOP(BAND_OP)
- APPLY_BOOL_OP_LOOP(BAND_OP)
- APPLY_END_OP_LOOP(BAND_OP)
-}
-
-static void bor_func(void *a, void *b, int *length, MPI_Datatype * datatype)
-{
- APPLY_BASIC_OP_LOOP(BOR_OP)
- APPLY_BOOL_OP_LOOP(BOR_OP)
- APPLY_END_OP_LOOP(BOR_OP)
-}
-
-static void bxor_func(void *a, void *b, int *length, MPI_Datatype * datatype)
-{
- APPLY_BASIC_OP_LOOP(BXOR_OP)
- APPLY_BOOL_OP_LOOP(BXOR_OP)
- APPLY_END_OP_LOOP(BXOR_OP)
-}
-
-static void minloc_func(void *a, void *b, int *length, MPI_Datatype * datatype)
-{
- APPLY_PAIR_OP_LOOP(MINLOC_OP)
- APPLY_END_OP_LOOP(MINLOC_OP)
-}
-
-static void maxloc_func(void *a, void *b, int *length, MPI_Datatype * datatype)
-{
- APPLY_PAIR_OP_LOOP(MAXLOC_OP)
- APPLY_END_OP_LOOP(MAXLOC_OP)
-}
-
-static void replace_func(void *a, void *b, int *length, MPI_Datatype * datatype)
-{
- memcpy(b, a, *length * smpi_datatype_size(*datatype));
-}
-
-#define CREATE_MPI_OP(name, func) \
- static s_smpi_mpi_op_t mpi_##name = { &(func) /* func */, true, false }; \
-MPI_Op name = &mpi_##name;
-
-CREATE_MPI_OP(MPI_MAX, max_func);
-CREATE_MPI_OP(MPI_MIN, min_func);
-CREATE_MPI_OP(MPI_SUM, sum_func);
-CREATE_MPI_OP(MPI_PROD, prod_func);
-CREATE_MPI_OP(MPI_LAND, land_func);
-CREATE_MPI_OP(MPI_LOR, lor_func);
-CREATE_MPI_OP(MPI_LXOR, lxor_func);
-CREATE_MPI_OP(MPI_BAND, band_func);
-CREATE_MPI_OP(MPI_BOR, bor_func);
-CREATE_MPI_OP(MPI_BXOR, bxor_func);
-CREATE_MPI_OP(MPI_MAXLOC, maxloc_func);
-CREATE_MPI_OP(MPI_MINLOC, minloc_func);
-CREATE_MPI_OP(MPI_REPLACE, replace_func);
-
-MPI_Op smpi_op_new(MPI_User_function * function, bool commute)
-{
- MPI_Op op = xbt_new(s_smpi_mpi_op_t, 1);
- op->func = function;
- op-> is_commute = commute;
- op-> is_fortran_op = false;
- return op;
-}
-
-bool smpi_op_is_commute(MPI_Op op)
-{
- return (op==MPI_OP_NULL) ? true : op-> is_commute;
-}
-
-void smpi_op_destroy(MPI_Op op)
-{
- xbt_free(op);
-}
-
-void smpi_op_set_fortran(MPI_Op op)
-{
- //tell that we were created from fortran, so we need to translate the type to fortran when called
- op->is_fortran_op = true;
-}
-
-void smpi_op_apply(MPI_Op op, void *invec, void *inoutvec, int *len, MPI_Datatype * datatype)
-{
- if(op==MPI_OP_NULL)
- return;
-
- if(smpi_privatize_global_variables){//we need to switch as the called function may silently touch global variables
- XBT_DEBUG("Applying operation, switch to the right data frame ");
- smpi_switch_data_segment(smpi_process_index());
- }
-
- if(!smpi_process_get_replaying()){
- if(! op->is_fortran_op)
- op->func(invec, inoutvec, len, datatype);
- else{
- int tmp = smpi_type_c2f(*datatype);
- /* Unfortunately, the C and Fortran version of the MPI standard do not agree on the type here,
- thus the reinterpret_cast. */
- op->func(invec, inoutvec, len, reinterpret_cast<MPI_Datatype*>(&tmp) );
- }
- }
-}
int smpi_type_attr_delete(MPI_Datatype type, int keyval){
smpi_type_key_elem elem =
MPI_Datatype* old_types;
} s_smpi_mpi_struct_t;
+//The following are datatypes for the MPI functions MPI_MAXLOC and MPI_MINLOC.
+typedef struct {
+ float value;
+ int index;
+} float_int;
+typedef struct {
+ float value;
+ float index;
+} float_float;
+typedef struct {
+ long value;
+ long index;
+} long_long;
+typedef struct {
+ double value;
+ double index;
+} double_double;
+typedef struct {
+ long value;
+ int index;
+} long_int;
+typedef struct {
+ double value;
+ int index;
+} double_int;
+typedef struct {
+ short value;
+ int index;
+} short_int;
+typedef struct {
+ int value;
+ int index;
+} int_int;
+typedef struct {
+ long double value;
+ int index;
+} long_double_int;
+typedef struct {
+ int64_t value;
+ int64_t index;
+} integer128_t;
+
+
/*
Functions to handle serialization/unserialization of messages, 3 for each type of MPI_Type
One for creating the substructure to handle, one for serialization, one for unserialization
--- /dev/null
+/* Copyright (c) 2009-2017. The SimGrid Team. All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#include "mc/mc.h"
+#include "private.h"
+#include "smpi_mpi_dt_private.h"
+
+XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_op, smpi, "Logging specific to SMPI (op)");
+
+#define MAX_OP(a, b) (b) = (a) < (b) ? (b) : (a)
+#define MIN_OP(a, b) (b) = (a) < (b) ? (a) : (b)
+#define SUM_OP(a, b) (b) += (a)
+#define PROD_OP(a, b) (b) *= (a)
+#define LAND_OP(a, b) (b) = (a) && (b)
+#define LOR_OP(a, b) (b) = (a) || (b)
+#define LXOR_OP(a, b) (b) = (!(a) && (b)) || ((a) && !(b))
+#define BAND_OP(a, b) (b) &= (a)
+#define BOR_OP(a, b) (b) |= (a)
+#define BXOR_OP(a, b) (b) ^= (a)
+#define MAXLOC_OP(a, b) (b) = (a.value) < (b.value) ? (b) : (a)
+#define MINLOC_OP(a, b) (b) = (a.value) < (b.value) ? (a) : (b)
+
+#define APPLY_FUNC(a, b, length, type, func) \
+{ \
+ int i; \
+ type* x = (type*)(a); \
+ type* y = (type*)(b); \
+ for(i = 0; i < *(length); i++) { \
+ func(x[i], y[i]); \
+ } \
+}
+
+#define APPLY_OP_LOOP(dtype, type, op) \
+ if (*datatype == dtype) {\
+ APPLY_FUNC(a, b, length, type, op)\
+ } else \
+
+
+#define APPLY_BASIC_OP_LOOP(op)\
+APPLY_OP_LOOP(MPI_CHAR, char,op)\
+APPLY_OP_LOOP(MPI_SHORT, short,op)\
+APPLY_OP_LOOP(MPI_INT, int,op)\
+APPLY_OP_LOOP(MPI_LONG, long,op)\
+APPLY_OP_LOOP(MPI_LONG_LONG, long long,op)\
+APPLY_OP_LOOP(MPI_SIGNED_CHAR, signed char,op)\
+APPLY_OP_LOOP(MPI_UNSIGNED_CHAR, unsigned char,op)\
+APPLY_OP_LOOP(MPI_UNSIGNED_SHORT, unsigned short,op)\
+APPLY_OP_LOOP(MPI_UNSIGNED, unsigned int,op)\
+APPLY_OP_LOOP(MPI_UNSIGNED_LONG, unsigned long,op)\
+APPLY_OP_LOOP(MPI_UNSIGNED_LONG_LONG, unsigned long long,op)\
+APPLY_OP_LOOP(MPI_WCHAR, wchar_t,op)\
+APPLY_OP_LOOP(MPI_BYTE, int8_t,op)\
+APPLY_OP_LOOP(MPI_INT8_T, int8_t,op)\
+APPLY_OP_LOOP(MPI_INT16_T, int16_t,op)\
+APPLY_OP_LOOP(MPI_INT32_T, int32_t,op)\
+APPLY_OP_LOOP(MPI_INT64_T, int64_t,op)\
+APPLY_OP_LOOP(MPI_UINT8_T, uint8_t,op)\
+APPLY_OP_LOOP(MPI_UINT16_T, uint16_t,op)\
+APPLY_OP_LOOP(MPI_UINT32_T, uint32_t,op)\
+APPLY_OP_LOOP(MPI_UINT64_T, uint64_t,op)\
+APPLY_OP_LOOP(MPI_AINT, MPI_Aint,op)\
+APPLY_OP_LOOP(MPI_OFFSET, MPI_Offset,op)\
+APPLY_OP_LOOP(MPI_INTEGER1, int,op)\
+APPLY_OP_LOOP(MPI_INTEGER2, int16_t,op)\
+APPLY_OP_LOOP(MPI_INTEGER4, int32_t,op)\
+APPLY_OP_LOOP(MPI_INTEGER8, int64_t,op)
+
+#define APPLY_BOOL_OP_LOOP(op)\
+APPLY_OP_LOOP(MPI_C_BOOL, bool,op)
+
+#define APPLY_FLOAT_OP_LOOP(op)\
+APPLY_OP_LOOP(MPI_FLOAT, float,op)\
+APPLY_OP_LOOP(MPI_DOUBLE, double,op)\
+APPLY_OP_LOOP(MPI_LONG_DOUBLE, long double,op)\
+APPLY_OP_LOOP(MPI_REAL, float,op)\
+APPLY_OP_LOOP(MPI_REAL4, float,op)\
+APPLY_OP_LOOP(MPI_REAL8, float,op)\
+APPLY_OP_LOOP(MPI_REAL16, double,op)
+
+#define APPLY_COMPLEX_OP_LOOP(op)\
+APPLY_OP_LOOP(MPI_C_FLOAT_COMPLEX, float _Complex,op)\
+APPLY_OP_LOOP(MPI_C_DOUBLE_COMPLEX, double _Complex,op)\
+APPLY_OP_LOOP(MPI_C_LONG_DOUBLE_COMPLEX, long double _Complex,op)
+
+#define APPLY_PAIR_OP_LOOP(op)\
+APPLY_OP_LOOP(MPI_FLOAT_INT, float_int,op)\
+APPLY_OP_LOOP(MPI_LONG_INT, long_int,op)\
+APPLY_OP_LOOP(MPI_DOUBLE_INT, double_int,op)\
+APPLY_OP_LOOP(MPI_SHORT_INT, short_int,op)\
+APPLY_OP_LOOP(MPI_2INT, int_int,op)\
+APPLY_OP_LOOP(MPI_2FLOAT, float_float,op)\
+APPLY_OP_LOOP(MPI_2DOUBLE, double_double,op)\
+APPLY_OP_LOOP(MPI_LONG_DOUBLE_INT, long_double_int,op)\
+APPLY_OP_LOOP(MPI_2LONG, long_long,op)
+
+#define APPLY_END_OP_LOOP(op)\
+ {\
+ xbt_die("Failed to apply " #op " to type %s", (*datatype)->name);\
+ }
+
+static void max_func(void *a, void *b, int *length, MPI_Datatype * datatype)
+{
+ APPLY_BASIC_OP_LOOP(MAX_OP)
+ APPLY_FLOAT_OP_LOOP(MAX_OP)
+ APPLY_END_OP_LOOP(MAX_OP)
+}
+
+static void min_func(void *a, void *b, int *length, MPI_Datatype * datatype)
+{
+ APPLY_BASIC_OP_LOOP(MIN_OP)
+ APPLY_FLOAT_OP_LOOP(MIN_OP)
+ APPLY_END_OP_LOOP(MIN_OP)
+}
+
+static void sum_func(void *a, void *b, int *length, MPI_Datatype * datatype)
+{
+ APPLY_BASIC_OP_LOOP(SUM_OP)
+ APPLY_FLOAT_OP_LOOP(SUM_OP)
+ APPLY_COMPLEX_OP_LOOP(SUM_OP)
+ APPLY_END_OP_LOOP(SUM_OP)
+}
+
+static void prod_func(void *a, void *b, int *length, MPI_Datatype * datatype)
+{
+ APPLY_BASIC_OP_LOOP(PROD_OP)
+ APPLY_FLOAT_OP_LOOP(PROD_OP)
+ APPLY_COMPLEX_OP_LOOP(PROD_OP)
+ APPLY_END_OP_LOOP(PROD_OP)
+}
+
+static void land_func(void *a, void *b, int *length, MPI_Datatype * datatype)
+{
+ APPLY_BASIC_OP_LOOP(LAND_OP)
+ APPLY_BOOL_OP_LOOP(LAND_OP)
+ APPLY_END_OP_LOOP(LAND_OP)
+}
+
+static void lor_func(void *a, void *b, int *length, MPI_Datatype * datatype)
+{
+ APPLY_BASIC_OP_LOOP(LOR_OP)
+ APPLY_BOOL_OP_LOOP(LOR_OP)
+ APPLY_END_OP_LOOP(LOR_OP)
+}
+
+static void lxor_func(void *a, void *b, int *length, MPI_Datatype * datatype)
+{
+ APPLY_BASIC_OP_LOOP(LXOR_OP)
+ APPLY_BOOL_OP_LOOP(LXOR_OP)
+ APPLY_END_OP_LOOP(LXOR_OP)
+}
+
+static void band_func(void *a, void *b, int *length, MPI_Datatype * datatype)
+{
+ APPLY_BASIC_OP_LOOP(BAND_OP)
+ APPLY_BOOL_OP_LOOP(BAND_OP)
+ APPLY_END_OP_LOOP(BAND_OP)
+}
+
+static void bor_func(void *a, void *b, int *length, MPI_Datatype * datatype)
+{
+ APPLY_BASIC_OP_LOOP(BOR_OP)
+ APPLY_BOOL_OP_LOOP(BOR_OP)
+ APPLY_END_OP_LOOP(BOR_OP)
+}
+
+static void bxor_func(void *a, void *b, int *length, MPI_Datatype * datatype)
+{
+ APPLY_BASIC_OP_LOOP(BXOR_OP)
+ APPLY_BOOL_OP_LOOP(BXOR_OP)
+ APPLY_END_OP_LOOP(BXOR_OP)
+}
+
+static void minloc_func(void *a, void *b, int *length, MPI_Datatype * datatype)
+{
+ APPLY_PAIR_OP_LOOP(MINLOC_OP)
+ APPLY_END_OP_LOOP(MINLOC_OP)
+}
+
+static void maxloc_func(void *a, void *b, int *length, MPI_Datatype * datatype)
+{
+ APPLY_PAIR_OP_LOOP(MAXLOC_OP)
+ APPLY_END_OP_LOOP(MAXLOC_OP)
+}
+
+static void replace_func(void *a, void *b, int *length, MPI_Datatype * datatype)
+{
+ memcpy(b, a, *length * smpi_datatype_size(*datatype));
+}
+
+#define CREATE_MPI_OP(name, func) \
+ static SMPI_Op mpi_##name (&(func) /* func */, true ); \
+MPI_Op name = &mpi_##name;
+
+CREATE_MPI_OP(MPI_MAX, max_func);
+CREATE_MPI_OP(MPI_MIN, min_func);
+CREATE_MPI_OP(MPI_SUM, sum_func);
+CREATE_MPI_OP(MPI_PROD, prod_func);
+CREATE_MPI_OP(MPI_LAND, land_func);
+CREATE_MPI_OP(MPI_LOR, lor_func);
+CREATE_MPI_OP(MPI_LXOR, lxor_func);
+CREATE_MPI_OP(MPI_BAND, band_func);
+CREATE_MPI_OP(MPI_BOR, bor_func);
+CREATE_MPI_OP(MPI_BXOR, bxor_func);
+CREATE_MPI_OP(MPI_MAXLOC, maxloc_func);
+CREATE_MPI_OP(MPI_MINLOC, minloc_func);
+CREATE_MPI_OP(MPI_REPLACE, replace_func);
+
+namespace simgrid{
+namespace smpi{
+
+Op::Op(MPI_User_function * function, bool commutative) : func_(function), is_commutative_(commutative)
+{
+ is_fortran_op_ = false;
+}
+
+bool Op::is_commutative()
+{
+ return is_commutative_;
+}
+
+bool Op::is_fortran_op()
+{
+ return is_fortran_op_;
+}
+
+void Op::set_fortran_op()
+{
+ //tell that we were created from fortran, so we need to translate the type to fortran when called
+ is_fortran_op_ = true;
+}
+
+void Op::apply(void *invec, void *inoutvec, int *len, MPI_Datatype * datatype)
+{
+ if(smpi_privatize_global_variables){//we need to switch as the called function may silently touch global variables
+ XBT_DEBUG("Applying operation, switch to the right data frame ");
+ smpi_switch_data_segment(smpi_process_index());
+ }
+
+ if(!smpi_process_get_replaying()){
+ if(! is_fortran_op_)
+ this->func_(invec, inoutvec, len, datatype);
+ else{
+ int tmp = smpi_type_c2f(*datatype);
+ /* Unfortunately, the C and Fortran version of the MPI standard do not agree on the type here,
+ thus the reinterpret_cast. */
+ this->func_(invec, inoutvec, len, reinterpret_cast<MPI_Datatype*>(&tmp) );
+ }
+ }
+}
+
+}
+}
--- /dev/null
+/* Copyright (c) 2009-2010, 2012-2014. The SimGrid Team.
+ * All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#ifndef SMPI_OP_HPP
+#define SMPI_OP_HPP
+
+#include <xbt/base.h>
+
+#include "private.h"
+
+namespace simgrid{
+namespace smpi{
+
+class Op {
+ private:
+ MPI_User_function *func_;
+ bool is_commutative_;
+ bool is_fortran_op_;
+ public:
+ Op(MPI_User_function * function, bool commutative);
+ bool is_commutative();
+ bool is_fortran_op();
+ void set_fortran_op();
+ void apply(void *invec, void *inoutvec, int *len, MPI_Datatype * datatype);
+};
+
+}
+}
+
+#endif
if (function == nullptr || op == nullptr) {
return MPI_ERR_ARG;
} else {
- *op = smpi_op_new(function, (commute!=0));
+ *op = new Op(function, (commute!=0));
return MPI_SUCCESS;
}
}
} else if (*op == MPI_OP_NULL) {
return MPI_ERR_OP;
} else {
- smpi_op_destroy(*op);
+ delete (*op);
*op = MPI_OP_NULL;
return MPI_SUCCESS;
}
return MPI_SUCCESS;
}else{
group->use();
- *newcomm = new simgrid::smpi::Comm(group, nullptr);
+ *newcomm = new Comm(group, nullptr);
return MPI_SUCCESS;
}
}
if (!is_datatype_valid(datatype) || op == MPI_OP_NULL) {
retval = MPI_ERR_ARG;
} else {
- smpi_op_apply(op, inbuf, inoutbuf, &count, &datatype);
+ if(op!=MPI_OP_NULL) op->apply( inbuf, inoutbuf, &count, &datatype);
retval = MPI_SUCCESS;
}
smpi_bench_begin();
} else if (ndims < 0 || (ndims > 0 && (dims == nullptr || periodic == nullptr)) || comm_cart == nullptr) {
return MPI_ERR_ARG;
} else{
- new simgrid::smpi::Cart(comm_old, ndims, dims, periodic, reorder, comm_cart);
+ new Cart(comm_old, ndims, dims, periodic, reorder, comm_cart);
return MPI_SUCCESS;
}
}
if (coords == nullptr) {
return MPI_ERR_ARG;
}
- simgrid::smpi::Cart* topo = static_cast<simgrid::smpi::Cart*>(comm->topo());
+ Cart* topo = static_cast<Cart*>(comm->topo());
if (topo==nullptr) {
return MPI_ERR_ARG;
}
if (source == nullptr || dest == nullptr || direction < 0 ) {
return MPI_ERR_ARG;
}
- simgrid::smpi::Cart* topo = static_cast<simgrid::smpi::Cart*>(comm->topo());
+ Cart* topo = static_cast<Cart*>(comm->topo());
if (topo==nullptr) {
return MPI_ERR_ARG;
}
if(coords == nullptr) {
return MPI_ERR_ARG;
}
- simgrid::smpi::Cart* topo = static_cast<simgrid::smpi::Cart*>(comm->topo());
+ Cart* topo = static_cast<Cart*>(comm->topo());
if (topo==nullptr) {
return MPI_ERR_ARG;
}
if(maxdims <= 0 || dims == nullptr || periods == nullptr || coords == nullptr) {
return MPI_ERR_ARG;
}
- simgrid::smpi::Cart* topo = static_cast<simgrid::smpi::Cart*>(comm->topo());
+ Cart* topo = static_cast<Cart*>(comm->topo());
if (topo==nullptr) {
return MPI_ERR_ARG;
}
if (ndims == nullptr) {
return MPI_ERR_ARG;
}
- simgrid::smpi::Cart* topo = static_cast<simgrid::smpi::Cart*>(comm->topo());
+ Cart* topo = static_cast<Cart*>(comm->topo());
if (topo==nullptr) {
return MPI_ERR_ARG;
}
if (ndims < 1 || nnodes < 1) {
return MPI_ERR_DIMS;
}
- return simgrid::smpi::Dims_create(nnodes, ndims, dims);
+ return Dims_create(nnodes, ndims, dims);
}
int PMPI_Cart_sub(MPI_Comm comm, int* remain_dims, MPI_Comm* comm_new) {
if (comm_new == nullptr) {
return MPI_ERR_ARG;
}
- simgrid::smpi::Cart* topo = static_cast<simgrid::smpi::Cart*>(comm->topo());
+ Cart* topo = static_cast<Cart*>(comm->topo());
if (topo==nullptr) {
return MPI_ERR_ARG;
}
- simgrid::smpi::Cart* cart = topo->sub(remain_dims, comm_new);
+ Cart* cart = topo->sub(remain_dims, comm_new);
if(cart==nullptr)
return MPI_ERR_ARG;
return MPI_SUCCESS;
}else if ((base == nullptr && size != 0) || disp_unit <= 0 || size < 0 ){
retval= MPI_ERR_OTHER;
}else{
- *win = new simgrid::smpi::Win( base, size, disp_unit, info, comm);
+ *win = new Win( base, size, disp_unit, info, comm);
retval = MPI_SUCCESS;
}
smpi_bench_begin();
namespace simgrid{
namespace smpi{
Request::Request(){}
-Request::Request(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, MPI_Comm comm, unsigned flags) : src_(src), dst_(dst), tag_(tag), comm_(comm), flags_(flags)
+Request::Request(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, MPI_Comm comm, unsigned flags) : buf_(buf), old_type_(datatype), src_(src), dst_(dst), tag_(tag), comm_(comm), flags_(flags)
{
void *old_buf = nullptr;
s_smpi_subtype_t *subtype = static_cast<s_smpi_subtype_t*>(datatype->substruct);
if((((flags & RECV) != 0) && ((flags & ACCUMULATE) !=0)) || (datatype->sizeof_substruct != 0)){
// This part handles the problem of non-contiguous memory
old_buf = buf;
- buf = count==0 ? nullptr : xbt_malloc(count*smpi_datatype_size(datatype));
+ buf_ = count==0 ? nullptr : xbt_malloc(count*smpi_datatype_size(datatype));
if ((datatype->sizeof_substruct != 0) && ((flags & SEND) != 0)) {
- subtype->serialize(old_buf, buf, count, datatype->substruct);
+ subtype->serialize(old_buf, buf_, count, datatype->substruct);
}
}
- buf_ = buf;
// This part handles the problem of non-contiguous memory (for the unserialisation at the reception)
old_buf_ = old_buf;
- old_type_ = datatype;
size_ = smpi_datatype_size(datatype) * count;
smpi_datatype_use(datatype);
comm_->use();
xbt_free(req->buf_);
}else if(req->flags_ & RECV){//apply op on contiguous buffer for accumulate
int n =req->real_size_/smpi_datatype_size(datatype);
- smpi_op_apply(req->op_, req->buf_, req->old_buf_, &n, &datatype);
+ req->op_->apply(req->buf_, req->old_buf_, &n, &datatype);
xbt_free(req->buf_);
}
}
src/smpi/smpi_group.hpp
src/smpi/smpi_mpi.cpp
src/smpi/smpi_mpi_dt.cpp
+ src/smpi/smpi_op.cpp
+ src/smpi/smpi_op.hpp
src/smpi/smpi_pmpi.cpp
src/smpi/smpi_replay.cpp
src/smpi/smpi_request.cpp