From 3cfd4823046b64b2c8645a655dfb47b795e4f412 Mon Sep 17 00:00:00 2001 From: genaud Date: Fri, 31 Jul 2009 17:57:42 +0000 Subject: [PATCH] started alltoallv(). Still some bugs. git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/simgrid/simgrid/trunk@6538 48e7efb5-ca39-0410-a469-dd3cf9ba447f --- ChangeLog | 7 +- examples/smpi/alltoall2.c | 38 +++++--- examples/smpi/alltoall_basic.c | 56 ++++++++++++ include/smpi/smpi.h | 14 ++- src/smpi/smpi_coll.c | 155 ++++++++++++++++++++++++++++++++- src/smpi/smpi_coll_private.h | 5 ++ src/smpi/smpi_mpi.c | 40 ++++++++- src/smpi/smpi_mpi_dt.c | 19 ++-- 8 files changed, 301 insertions(+), 33 deletions(-) create mode 100644 examples/smpi/alltoall_basic.c diff --git a/ChangeLog b/ChangeLog index a99f0609d0..0b5b80adec 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,13 +7,14 @@ SimGrid (3.3.2-svn) unstable; urgency=low SMPI: * Implement some more MPI primitives: - MPI_Bcast, MPI_Waitany, MPI_Waitall, MPI_Reduce, MPI_Allreduce, MPI_Sendrecv, MPI_Alltoall + MPI_Bcast, MPI_Waitany, MPI_Waitall, MPI_Reduce, MPI_Allreduce, MPI_Scatter, MPI_Sendrecv, MPI_Alltoall -implementation: Bcast: flat or 2-ary tree (default), Barrier: 4-ary tree, Reduce: flat tree Allreduce: Reduce then Bcast - Alltoall: basic_linear if data per proc < 3Kb, otherwise pairwise. - Not yet implemented: bruck for data per proc < 200b and comm size > 12 + Alltoall: "basic_linear" if data per proc < 3Kb, "otherwise pairwise". + Not yet implemented: "Bruck" for data per proc < 200b and comm size > 12 + Scatter: flat tree * Add support for optimized collectives (Bcast is now binomial by default) * Port smpirun and smpicc to OS X diff --git a/examples/smpi/alltoall2.c b/examples/smpi/alltoall2.c index ae47160d1e..0b1c9180d6 100644 --- a/examples/smpi/alltoall2.c +++ b/examples/smpi/alltoall2.c @@ -43,24 +43,38 @@ #define MAXLEN 10000 -void main() +int main( int argc, char *argv[] ) { - int out[1000000],in[1000000],i,j,k; +#define N 1000000 + int *out, *in,i,j,k; int myself,tasks; - MPI_Init(0,0); + out = malloc(N*sizeof(int)); + in = malloc(N*sizeof(int)); + if ((out==NULL) || (in==NULL)) { + printf("Error: cannot allocate N bytes for in or out arrays\n"); + exit(1); + } + MPI_Init( &argc,&argv ); MPI_Comm_rank(MPI_COMM_WORLD,&myself); MPI_Comm_size(MPI_COMM_WORLD,&tasks); for(j=1;j<=MAXLEN;j*=10) { - for(i=0;i bad answer (%d) at index %d of %d (should be %d)\n",myself,in[k+i*j],k+i*j,j*tasks,i); + break; + } + } + } } MPI_Barrier(MPI_COMM_WORLD); if(myself==0) printf("TEST COMPLETE\n"); diff --git a/examples/smpi/alltoall_basic.c b/examples/smpi/alltoall_basic.c new file mode 100644 index 0000000000..12bc44d800 --- /dev/null +++ b/examples/smpi/alltoall_basic.c @@ -0,0 +1,56 @@ +#include "mpi.h" +#include +#include +#include +#include + +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#define EXIT_FAILURE 1 +#endif + +int main( int argc, char *argv[] ) +{ + int rank, size; + int i; + int *sb; + int *rb; + int status, gstatus; + + MPI_Init(&argc,&argv); + MPI_Comm_rank(MPI_COMM_WORLD,&rank); + MPI_Comm_size(MPI_COMM_WORLD,&size); + + sb = (int *)malloc(size*sizeof(int)); + if ( !sb ) { + perror( "can't allocate send buffer" );fflush(stderr); + MPI_Abort(MPI_COMM_WORLD,EXIT_FAILURE); + } + rb = (int *)malloc(size*sizeof(int)); + if ( !rb ) { + perror( "can't allocate recv buffer");fflush(stderr); + free(sb); + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + } + for ( i=0 ; i < size ; ++i ) { + sb[i] = rank + 1; + rb[i] = 0; + } + status = MPI_Alltoall(sb, 1, MPI_INT, rb, 1, MPI_INT, MPI_COMM_WORLD); + + printf("[%d] rcvbuf=[",rank); + for (i=0;i %s (#%d): ", rank, msg,len); + for (tmp = 0; tmp < len; tmp++) { + v = buf; + fprintf(stderr,"[%d (%p)]", v[tmp],v+tmp); + } + fprintf(stderr,"\n"); + free(msg); +} + + + +/** + * alltoallv basic + **/ + +int smpi_coll_basic_alltoallv(void *sbuf, int *scounts, int *sdisps, MPI_Datatype sdtype, + void *rbuf, int *rcounts, int *rdisps, MPI_Datatype rdtype, + MPI_Comm comm) { + + int i; + int system_alltoallv_tag = 889; + int rank; + int size = comm->size; + int err; + char *psnd; + char *prcv; + //int nreq = 0; + int rreq = 0; + int sreq = 0; + MPI_Aint lb; + MPI_Aint sndextent; + MPI_Aint rcvextent; + MPI_Request *reqs; + + /* Initialize. */ + rank = smpi_mpi_comm_rank(comm); + DEBUG1("<%d> algorithm basic_alltoallv() called.",rank); + + err = smpi_mpi_type_get_extent(sdtype, &lb, &sndextent); + err = smpi_mpi_type_get_extent(rdtype, &lb, &rcvextent); + DEBUG3("<%d> sizeof(sndttype)=%d,sizeof(rcvtype)=%d",rank,sndextent,rcvextent); + + psnd = (char *)sbuf; + print_buffer_int(psnd,size*size,xbt_strdup("sbuff"),rank); + + /* copy the local sbuf to rbuf when it's me */ + psnd = ((char *) sbuf) + (sdisps[rank] * sndextent); + prcv = ((char *) rbuf) + (rdisps[rank] * rcvextent); + + if (0 != scounts[rank]) { + err = copy_dt( psnd, scounts[rank], sdtype, prcv, rcounts[rank], rdtype ); + print_buffer_int(psnd,scounts[rank],strdup("copy_dt"),rank); + if (MPI_SUCCESS != err) { + return err; + } + } + + /* If only one process, we're done. */ + if (1 == size) { + return MPI_SUCCESS; + } + + /* Initiate all send/recv to/from others. */ + reqs = xbt_malloc(2*(size-1) * sizeof(smpi_mpi_request_t)); + + + /* Create all receives that will be posted first */ + for (i = 0; i < size; ++i) { + if (i == rank || 0 == rcounts[i]) { + DEBUG3("<%d> skip req creation i=%d,rcounts[i]=%d",rank,i, rcounts[i]); + continue; + } + prcv = ((char *) rbuf) + (rdisps[i] * rcvextent); + + err = smpi_create_request( prcv, rcounts[i], rdtype, + i, rank, + system_alltoallv_tag, + comm, &(reqs[rreq])); + if (MPI_SUCCESS != err) { + DEBUG2("<%d> failed to create request for rank %d",rank,i); + for (i=0;i< rreq;i++) + xbt_mallocator_release(smpi_global->request_mallocator, reqs[i]); + return err; + } + rreq++; + } + DEBUG2("<%d> %d irecv reqs created",rank,rreq); + /* Now create all sends */ + for (i = 0; i < size; ++i) { + if (i == rank || 0 == scounts[i]) { + DEBUG3("<%d> skip req creation i=%d,scounts[i]=%d",rank,i, scounts[i]); + continue; + } + psnd = ((char *) sbuf) + (sdisps[i] * sndextent); + + fprintf(stderr,"<%d> send %d elems to <%d>\n",rank,scounts[i],i); + print_buffer_int(psnd,scounts[i],xbt_strdup("sbuff part"),rank); + err = smpi_create_request (psnd, scounts[i], sdtype, + rank, i, + system_alltoallv_tag, + comm, &(reqs[rreq+sreq])); + if (MPI_SUCCESS != err) { + DEBUG2("<%d> failed to create request for rank %d\n",rank,i); + for (i=0;i< rreq+sreq;i++) + xbt_mallocator_release(smpi_global->request_mallocator, reqs[i]); + return err; + } + sreq++; + } + DEBUG2("<%d> %d isend reqs created",rank,sreq); + + /* Start your engines. This will never return an error. */ + for ( i=0; i< rreq; i++ ) { + DEBUG3("<%d> issued irecv request reqs[%d]=%p",rank,i,reqs[i]); + smpi_mpi_irecv( reqs[i] ); + } + for ( i=rreq; i issued isend request reqs[%d]=%p",rank,i,reqs[i]); + smpi_mpi_isend( reqs[i] ); + } + + + /* Wait for them all. If there's an error, note that we don't + * care what the error was -- just that there *was* an error. The + * PML will finish all requests, even if one or more of them fail. + * i.e., by the end of this call, all the requests are free-able. + * So free them anyway -- even if there was an error, and return + * the error after we free everything. */ + + DEBUG2("<%d> wait for %d requests",rank,rreq+sreq); + // waitall is buggy: use a loop instead for the moment + // err = smpi_mpi_waitall(nreq, reqs, MPI_STATUS_IGNORE); + for (i=0;i< rreq+sreq;i++) { + err = smpi_mpi_wait( reqs[i], MPI_STATUS_IGNORE); + } + + /* Free the reqs */ + /* nreq might be < 2*(size-1) since some request creations are skipped */ + for (i=0;i< rreq+sreq;i++) { + xbt_mallocator_release(smpi_global->request_mallocator, reqs[i]); + } + xbt_free( reqs ); + return err; +} + + + /** * ----------------------------------------------------------------------------------------------------- diff --git a/src/smpi/smpi_coll_private.h b/src/smpi/smpi_coll_private.h index b4b1e9df0b..31a0bd7b06 100644 --- a/src/smpi/smpi_coll_private.h +++ b/src/smpi/smpi_coll_private.h @@ -22,3 +22,8 @@ int smpi_coll_tuned_alltoall_pairwise (void *sendbuf, int sendcount, MPI_Datatyp int smpi_coll_tuned_alltoall_basic_linear(void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, MPI_Datatype rdtype, MPI_Comm comm); +int smpi_coll_basic_alltoallv(void *sendbuf, int *scounts, int *sdisps, MPI_Datatype datatype, + void *recvbuf, int *rcounts, int *rdisps, MPI_Datatype recvtype, + MPI_Comm comm); + + diff --git a/src/smpi/smpi_mpi.c b/src/smpi/smpi_mpi.c index fdd8a4f2a2..53799a0e94 100644 --- a/src/smpi/smpi_mpi.c +++ b/src/smpi/smpi_mpi.c @@ -1,3 +1,14 @@ +/* $Id: $tag */ + +/* smpi_mpi.c -- + * + * Eventually will contain the user level MPI primitives and its corresponding + * internal wrapper. The implementations of these primitives should go to specific + * files. For example, a SMPI_MPI_Bcast() in this file, should call the wrapper + * smpi_mpi_bcast(), which decides which implementation to call. Currently, it + * calls nary_tree_bcast() in smpi_coll.c. (Stéphane Genaud). + * */ + #include "private.h" @@ -457,7 +468,7 @@ int retval = MPI_SUCCESS; /** * MPI_Allreduce * - * Same as MPI_REDUCE except that the result appears in the receive buffer of all the group members. + * Same as MPI_Reduce except that the result appears in the receive buffer of all the group members. **/ int SMPI_MPI_Allreduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm ) @@ -555,8 +566,8 @@ int SMPI_MPI_Scatter(void *sendbuf, int sendcount, MPI_Datatype datatype, * ompi/mca/coll/tuned/coll_tuned_module.c **/ int SMPI_MPI_Alltoall(void *sendbuf, int sendcount, MPI_Datatype datatype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, - MPI_Comm comm) + void *recvbuf, int recvcount, MPI_Datatype recvtype, + MPI_Comm comm) { int retval = MPI_SUCCESS; int block_dsize; @@ -586,6 +597,29 @@ int SMPI_MPI_Alltoall(void *sendbuf, int sendcount, MPI_Datatype datatype, return retval; } +/** + * MPI_Alltoallv user entry point + * + * As in OpenMPI, alltoallv is not optimized + * ompi/mca/coll/basic/coll_basic_alltoallv.c + **/ +int SMPI_MPI_Alltoallv(void *sendbuf, int *scounts, int *sdisps, MPI_Datatype datatype, + void *recvbuf, int *rcounts, int *rdisps, MPI_Datatype recvtype, + MPI_Comm comm) +{ + int retval = MPI_SUCCESS; + int rank; + + rank = smpi_mpi_comm_rank(comm); + DEBUG1("<%d> basic alltoallv() called.",rank); + + retval = smpi_coll_basic_alltoallv(sendbuf, scounts, sdisps, datatype, + recvbuf, rcounts, rdisps, recvtype, + comm); + + return retval; +} + diff --git a/src/smpi/smpi_mpi_dt.c b/src/smpi/smpi_mpi_dt.c index 7352a0996c..a196e3616b 100644 --- a/src/smpi/smpi_mpi_dt.c +++ b/src/smpi/smpi_mpi_dt.c @@ -47,6 +47,16 @@ int smpi_mpi_type_get_extent(MPI_Datatype datatype, MPI_Aint *lb, MPI_Aint *exte *extent = datatype->ub - datatype->lb; return( MPI_SUCCESS ); } + + +/** + * query extent and lower bound of the type + **/ +int SMPI_MPI_Type_get_extent( MPI_Datatype datatype, int *lb, int *extent) +{ + return( smpi_mpi_type_get_extent( datatype, lb, extent)); +} + /** * query the size of the type **/ @@ -70,15 +80,6 @@ int SMPI_MPI_Type_size(MPI_Datatype datatype, size_t * size) } - -/** - * query extent and lower bound of the type - **/ -int SMPI_MPI_Type_get_extent( MPI_Datatype datatype, int *lb, int *extent) -{ - return( smpi_mpi_type_get_extent( datatype, lb, extent)); -} - /* Deprecated Functions. * The MPI-2 standard deprecated a number of routines because MPI-2 provides better versions. * This routine is one of those that was deprecated. The routine may continue to be used, but -- 2.20.1