--- /dev/null
+c**********************************************************************
+c twod.f - a solution to the Poisson problem by using Jacobi
+c interation on a 2-d decomposition
+c
+c .... the rest of this is from pi3.f to show the style ...
+c
+c Each node:
+c 1) receives the number of rectangles used in the approximation.
+c 2) calculates the areas of it's rectangles.
+c 3) Synchronizes for a global summation.
+c Node 0 prints the result.
+c
+c Variables:
+c
+c pi the calculated result
+c n number of points of integration.
+c x midpoint of each rectangle's interval
+c f function to integrate
+c sum,pi area of rectangles
+c tmp temporary scratch space for global summation
+c i do loop index
+c
+c This code is included (without the prints) because one version of
+c MPICH SEGV'ed (probably because of errors in handling send/recv of
+c MPI_PROC_NULL source/destination).
+c
+c****************************************************************************
+ program main
+ include "mpif.h"
+ integer maxn
+ parameter (maxn = 128)
+ double precision a(maxn,maxn), b(maxn,maxn), f(maxn,maxn)
+ integer nx, ny
+ integer myid, numprocs, it, rc, comm2d, ierr, stride
+ integer nbrleft, nbrright, nbrtop, nbrbottom
+ integer sx, ex, sy, ey
+ integer dims(2)
+ logical periods(2)
+ double precision diff2d, diffnorm, dwork
+ double precision t1, t2
+ external diff2d
+ data periods/2*.false./
+
+ call MPI_INIT( ierr )
+ call MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
+ call MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
+c print *, "Process ", myid, " of ", numprocs, " is alive"
+ if (myid .eq. 0) then
+c
+c Get the size of the problem
+c
+c print *, 'Enter nx'
+c read *, nx
+ nx = 10
+ endif
+c print *, 'About to do bcast on ', myid
+ call MPI_BCAST(nx,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
+ ny = nx
+c
+c Get a new communicator for a decomposition of the domain. Let MPI
+c find a "good" decomposition
+c
+ dims(1) = 0
+ dims(2) = 0
+ call MPI_DIMS_CREATE( numprocs, 2, dims, ierr )
+ call MPI_CART_CREATE( MPI_COMM_WORLD, 2, dims, periods, .true.,
+ * comm2d, ierr )
+c
+c Get my position in this communicator
+c
+ call MPI_COMM_RANK( comm2d, myid, ierr )
+c print *, "Process ", myid, " of ", numprocs, " is alive"
+c
+c My neighbors are now +/- 1 with my rank. Handle the case of the
+c boundaries by using MPI_PROCNULL.
+ call fnd2dnbrs( comm2d, nbrleft, nbrright, nbrtop, nbrbottom )
+c print *, "Process ", myid, ":",
+c * nbrleft, nbrright, nbrtop, nbrbottom
+c
+c Compute the decomposition
+c
+ call fnd2ddecomp( comm2d, nx, sx, ex, sy, ey )
+c print *, "Process ", myid, ":", sx, ex, sy, ey
+c
+c Create a new, "strided" datatype for the exchange in the "non-contiguous"
+c direction
+c
+ call mpi_Type_vector( ey-sy+1, 1, ex-sx+3,
+ $ MPI_DOUBLE_PRECISION, stride, ierr )
+ call mpi_Type_commit( stride, ierr )
+c
+c
+c Initialize the right-hand-side (f) and the initial solution guess (a)
+c
+ call twodinit( a, b, f, nx, sx, ex, sy, ey )
+c
+c Actually do the computation. Note the use of a collective operation to
+c check for convergence, and a do-loop to bound the number of iterations.
+c
+ call MPI_BARRIER( MPI_COMM_WORLD, ierr )
+ t1 = MPI_WTIME()
+ do 10 it=1, 100
+ call exchng2( a, b, sx, ex, sy, ey, comm2d, stride,
+ $ nbrleft, nbrright, nbrtop, nbrbottom )
+ call sweep2d( b, f, nx, sx, ex, sy, ey, a )
+ call exchng2( b, a, sx, ex, sy, ey, comm2d, stride,
+ $ nbrleft, nbrright, nbrtop, nbrbottom )
+ call sweep2d( a, f, nx, sx, ex, sy, ey, b )
+ dwork = diff2d( a, b, nx, sx, ex, sy, ey )
+ call MPI_Allreduce( dwork, diffnorm, 1, MPI_DOUBLE_PRECISION,
+ $ MPI_SUM, comm2d, ierr )
+ if (diffnorm .lt. 1.0e-5) goto 20
+c if (myid .eq. 0) print *, 2*it, ' Difference is ', diffnorm
+10 continue
+ if (myid .eq. 0) print *, 'Failed to converge'
+20 continue
+ t2 = MPI_WTIME()
+c if (myid .eq. 0) then
+c print *, 'Converged after ', 2*it, ' Iterations in ', t2 - t1,
+c $ ' secs '
+c endif
+c
+c
+ call MPI_Type_free( stride, ierr )
+ call MPI_Comm_free( comm2d, ierr )
+ if (myid .eq. 0) then
+ print *, ' No Errors'
+ endif
+ call MPI_FINALIZE(rc)
+ end
+c
+c Perform a Jacobi sweep for a 2-d decomposition
+c
+ subroutine sweep2d( a, f, n, sx, ex, sy, ey, b )
+ integer n, sx, ex, sy, ey
+ double precision a(sx-1:ex+1, sy-1:ey+1), f(sx-1:ex+1, sy-1:ey+1),
+ + b(sx-1:ex+1, sy-1:ey+1)
+c
+ integer i, j
+ double precision h
+c
+ h = 1.0d0 / dble(n+1)
+ do 10 i=sx, ex
+ do 10 j=sy, ey
+ b(i,j) = 0.25 * (a(i-1,j)+a(i,j+1)+a(i,j-1)+a(i+1,j)) -
+ + h * h * f(i,j)
+ 10 continue
+ return
+ end
+
+ subroutine exchng2( a, b, sx, ex, sy, ey,
+ $ comm2d, stride,
+ $ nbrleft, nbrright, nbrtop, nbrbottom )
+ include "mpif.h"
+ integer sx, ex, sy, ey, stride
+ double precision a(sx-1:ex+1, sy-1:ey+1),
+ $ b(sx-1:ex+1, sy-1:ey+1)
+ integer nbrleft, nbrright, nbrtop, nbrbottom, comm2d
+ integer status(MPI_STATUS_SIZE), ierr, nx
+c
+ nx = ex - sx + 1
+c These are just like the 1-d versions, except for less data
+ call MPI_SENDRECV( b(ex,sy), nx, MPI_DOUBLE_PRECISION,
+ $ nbrtop, 0,
+ $ a(sx-1,sy), nx, MPI_DOUBLE_PRECISION,
+ $ nbrbottom, 0, comm2d, status, ierr )
+ call MPI_SENDRECV( b(sx,sy), nx, MPI_DOUBLE_PRECISION,
+ $ nbrbottom, 1,
+ $ a(ex+1,sy), nx, MPI_DOUBLE_PRECISION,
+ $ nbrtop, 1, comm2d, status, ierr )
+c
+c This uses the "strided" datatype
+ call MPI_SENDRECV( b(sx,ey), 1, stride, nbrright, 0,
+ $ a(sx,sy-1), 1, stride, nbrleft, 0,
+ $ comm2d, status, ierr )
+ call MPI_SENDRECV( b(sx,sy), 1, stride, nbrleft, 1,
+ $ a(sx,ey+1), 1, stride, nbrright, 1,
+ $ comm2d, status, ierr )
+ return
+ end
+
+c
+c The rest of the 2-d program
+c
+ double precision function diff2d( a, b, nx, sx, ex, sy, ey )
+ integer nx, sx, ex, sy, ey
+ double precision a(sx-1:ex+1, sy-1:ey+1), b(sx-1:ex+1, sy-1:ey+1)
+c
+ double precision sum
+ integer i, j
+c
+ sum = 0.0d0
+ do 10 j=sy,ey
+ do 10 i=sx,ex
+ sum = sum + (a(i,j) - b(i,j)) ** 2
+ 10 continue
+c
+ diff2d = sum
+ return
+ end
+ subroutine twodinit( a, b, f, nx, sx, ex, sy, ey )
+ integer nx, sx, ex, sy, ey
+ double precision a(sx-1:ex+1, sy-1:ey+1), b(sx-1:ex+1, sy-1:ey+1),
+ & f(sx-1:ex+1, sy-1:ey+1)
+c
+ integer i, j
+c
+ do 10 j=sy-1,ey+1
+ do 10 i=sx-1,ex+1
+ a(i,j) = 0.0d0
+ b(i,j) = 0.0d0
+ f(i,j) = 0.0d0
+ 10 continue
+c
+c Handle boundary conditions
+c
+ if (sx .eq. 1) then
+ do 20 j=sy,ey
+ a(0,j) = 1.0d0
+ b(0,j) = 1.0d0
+ 20 continue
+ endif
+ if (ex .eq. nx) then
+ do 21 j=sy,ey
+ a(nx+1,j) = 0.0d0
+ b(nx+1,j) = 0.0d0
+ 21 continue
+ endif
+ if (sy .eq. 1) then
+ do 30 i=sx,ex
+ a(i,0) = 1.0d0
+ b(i,0) = 1.0d0
+ 30 continue
+ endif
+c
+ return
+ end
+
+c
+c This file contains a routine for producing a decomposition of a 1-d array
+c when given a number of processors. It may be used in "direct" product
+c decomposition. The values returned assume a "global" domain in [1:n]
+c
+ subroutine MPE_DECOMP1D( n, numprocs, myid, s, e )
+ integer n, numprocs, myid, s, e
+ integer nlocal
+ integer deficit
+c
+ nlocal = n / numprocs
+ s = myid * nlocal + 1
+ deficit = mod(n,numprocs)
+ s = s + min(myid,deficit)
+ if (myid .lt. deficit) then
+ nlocal = nlocal + 1
+ endif
+ e = s + nlocal - 1
+ if (e .gt. n .or. myid .eq. numprocs-1) e = n
+ return
+ end
+c
+c This routine show how to determine the neighbors in a 2-d decomposition of
+c the domain. This assumes that MPI_Cart_create has already been called
+c
+ subroutine fnd2dnbrs( comm2d,
+ $ nbrleft, nbrright, nbrtop, nbrbottom )
+ integer comm2d, nbrleft, nbrright, nbrtop, nbrbottom
+c
+ integer ierr
+c
+ call MPI_Cart_shift( comm2d, 0, 1, nbrleft, nbrright, ierr )
+ call MPI_Cart_shift( comm2d, 1, 1, nbrbottom, nbrtop, ierr )
+c
+ return
+ end
+c
+c Note: THIS IS A TEST PROGRAM. THE ACTUAL VALUES MOVED ARE NOT
+c CORRECT FOR A POISSON SOLVER.
+c
+ subroutine fnd2ddecomp( comm2d, n, sx, ex, sy, ey )
+ integer comm2d
+ integer n, sx, ex, sy, ey
+ integer dims(2), coords(2), ierr
+ logical periods(2)
+c
+ call MPI_Cart_get( comm2d, 2, dims, periods, coords, ierr )
+
+ call MPE_DECOMP1D( n, dims(1), coords(1), sx, ex )
+ call MPE_DECOMP1D( n, dims(2), coords(2), sy, ey )
+c
+ return
+ end