teshsuite/smpi/mpich-test/topol/twod2.f

   1 c**********************************************************************
   2 c   twod.f - a solution to the Poisson problem by using Jacobi
   3 c   interation on a 2-d decomposition
   4 c
   5 c   .... the rest of this is from pi3.f to show the style ...
   6 c
   7 c   Each node:
   8 c    1) receives the number of rectangles used in the approximation.
   9 c    2) calculates the areas of it's rectangles.
  10 c    3) Synchronizes for a global summation.
  11 c   Node 0 prints the result.
  12 c
  13 c  Variables:
  14 c
  15 c    pi  the calculated result
  16 c    n   number of points of integration.
  17 c    x           midpoint of each rectangle's interval
  18 c    f           function to integrate
  19 c    sum,pi      area of rectangles
  20 c    tmp         temporary scratch space for global summation
  21 c    i           do loop index
  22 c
  23 c     This code is included (without the prints) because one version of
  24 c     MPICH SEGV'ed (probably because of errors in handling send/recv of
  25 c     MPI_PROC_NULL source/destination).
  26 c
  27 c****************************************************************************
  28       program main
  29       include "mpif.h"
  30       integer maxn
  31       parameter (maxn = 128)
  32       double precision  a(maxn,maxn), b(maxn,maxn), f(maxn,maxn)
  33       integer nx, ny
  34       integer myid, numprocs, it, rc, comm2d, ierr, stride
  35       integer nbrleft, nbrright, nbrtop, nbrbottom
  36       integer sx, ex, sy, ey
  37       integer dims(2)
  38       logical periods(2)
  39       double precision diff2d, diffnorm, dwork
  40       double precision t1, t2
  41       external diff2d
  42       data periods/2*.false./
  43
  44       call MPI_INIT( ierr )
  45       call MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
  46       call MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
  47 c      print *, "Process ", myid, " of ", numprocs, " is alive"
  48       if (myid .eq. 0) then
  49 c
  50 c         Get the size of the problem
  51 c
  52 c          print *, 'Enter nx'
  53 c          read *, nx
  54            nx = 10
  55       endif
  56 c      print *, 'About to do bcast on ', myid
  57       call MPI_BCAST(nx,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
  58       ny   = nx
  59 c
  60 c Get a new communicator for a decomposition of the domain.  Let MPI
  61 c find a "good" decomposition
  62 c
  63       dims(1) = 0
  64       dims(2) = 0
  65       call MPI_DIMS_CREATE( numprocs, 2, dims, ierr )
  66       call MPI_CART_CREATE( MPI_COMM_WORLD, 2, dims, periods, .true.,
  67      *                    comm2d, ierr )
  68 c
  69 c Get my position in this communicator
  70 c
  71       call MPI_COMM_RANK( comm2d, myid, ierr )
  72 c      print *, "Process ", myid, " of ", numprocs, " is alive"
  73 c
  74 c My neighbors are now +/- 1 with my rank.  Handle the case of the
  75 c boundaries by using MPI_PROCNULL.
  76       call fnd2dnbrs( comm2d, nbrleft, nbrright, nbrtop, nbrbottom )
  77 c      print *, "Process ", myid, ":",
  78 c     *     nbrleft, nbrright, nbrtop, nbrbottom
  79 c
  80 c Compute the decomposition
  81 c
  82       call fnd2ddecomp( comm2d, nx, sx, ex, sy, ey )
  83 c      print *, "Process ", myid, ":", sx, ex, sy, ey
  84 c
  85 c Create a new, "strided" datatype for the exchange in the "non-contiguous"
  86 c direction
  87 c
  88       call mpi_Type_vector( ey-sy+1, 1, ex-sx+3,
  89      $                      MPI_DOUBLE_PRECISION, stride, ierr )
  90       call mpi_Type_commit( stride, ierr )
  91 c
  92 c
  93 c Initialize the right-hand-side (f) and the initial solution guess (a)
  94 c
  95       call twodinit( a, b, f, nx, sx, ex, sy, ey )
  96 c
  97 c Actually do the computation.  Note the use of a collective operation to
  98 c check for convergence, and a do-loop to bound the number of iterations.
  99 c
 100       call MPI_BARRIER( MPI_COMM_WORLD, ierr )
 101       t1 = MPI_WTIME()
 102       do 10 it=1, 100
 103         call exchng2( b, sx, ex, sy, ey, comm2d, stride,
 104      $                nbrleft, nbrright, nbrtop, nbrbottom )
 105         call sweep2d( b, f, nx, sx, ex, sy, ey, a )
 106         call exchng2( a, sx, ex, sy, ey, comm2d, stride,
 107      $                nbrleft, nbrright, nbrtop, nbrbottom )
 108         call sweep2d( a, f, nx, sx, ex, sy, ey, b )
 109         dwork = diff2d( a, b, nx, sx, ex, sy, ey )
 110         call MPI_Allreduce( dwork, diffnorm, 1, MPI_DOUBLE_PRECISION,
 111      $                     MPI_SUM, comm2d, ierr )
 112         if (diffnorm .lt. 1.0e-5) goto 20
 113         if (myid .eq. 0) print *, 2*it, ' Difference is ', diffnorm
 114 10     continue
 115       if (myid .eq. 0) print *, 'Failed to converge'
 116 20    continue
 117       t2 = MPI_WTIME()
 118 c      if (myid .eq. 0) then
 119 c         print *, 'Converged after ', 2*it, ' Iterations in ', t2 - t1,
 120 c     $        ' secs '
 121 c      endif
 122 c
 123 c
 124       call MPI_Type_free( stride, ierr )
 125       call MPI_Comm_free( comm2d, ierr )
 126       call MPI_FINALIZE(rc)
 127       end
 128 c
 129 c Perform a Jacobi sweep for a 2-d decomposition
 130 c
 131       subroutine sweep2d( a, f, n, sx, ex, sy, ey, b )
 132       integer n, sx, ex, sy, ey
 133       double precision a(sx-1:ex+1, sy-1:ey+1), f(sx-1:ex+1, sy-1:ey+1),
 134      +                 b(sx-1:ex+1, sy-1:ey+1)
 135 c
 136       integer i, j
 137       double precision h
 138 c
 139       h = 1.0d0 / dble(n+1)
 140       do 10 j=sy, ey
 141          do 10 i=sx, ex
 142             b(i,j) = 0.25 * (a(i-1,j)+a(i,j+1)+a(i,j-1)+a(i+1,j)) -
 143      +               h * h * f(i,j)
 144  10   continue
 145       return
 146       end
 147 c
 148        subroutine exchng2( a, sx, ex, sy, ey,
 149      $                     comm2d, stride,
 150      $                     nbrleft, nbrright, nbrtop, nbrbottom  )
 151        include "mpif.h"
 152        integer sx, ex, sy, ey, stride
 153        double precision a(sx-1:ex+1, sy-1:ey+1)
 154        integer nbrleft, nbrright, nbrtop, nbrbottom, comm2d
 155        integer status(MPI_STATUS_SIZE), ierr, nx
 156 c
 157        nx = ex - sx + 1
 158 c  These are just like the 1-d versions, except for less data
 159         call MPI_SENDRECV( a(sx,ey),  nx, MPI_DOUBLE_PRECISION,
 160      $                    nbrtop, 0,
 161      $                    a(sx,sy-1), nx, MPI_DOUBLE_PRECISION,
 162      $                    nbrbottom, 0, comm2d, status, ierr )
 163         call MPI_SENDRECV( a(sx,sy),  nx, MPI_DOUBLE_PRECISION,
 164      $                    nbrbottom, 1,
 165      $                    a(sx,ey+1), nx, MPI_DOUBLE_PRECISION,
 166      $                    nbrtop, 1, comm2d, status, ierr )
 167 c
 168 c This uses the "strided" datatype
 169         call MPI_SENDRECV( a(ex,sy),  1, stride, nbrright,  0,
 170      $                     a(sx-1,sy), 1, stride, nbrleft,  0,
 171      $                     comm2d, status, ierr )
 172         call MPI_SENDRECV( a(sx,sy),  1, stride, nbrleft,   1,
 173      $                     a(ex+1,sy), 1, stride, nbrright, 1,
 174      $                     comm2d, status, ierr )
 175         return
 176         end
 177
 178 c
 179 c  The rest of the 2-d program
 180 c
 181       double precision function diff2d( a, b, nx, sx, ex, sy, ey )
 182       integer nx, sx, ex, sy, ey
 183       double precision a(sx-1:ex+1, sy-1:ey+1), b(sx-1:ex+1, sy-1:ey+1)
 184 c
 185       double precision sum
 186       integer i, j
 187 c
 188       sum = 0.0d0
 189       do 10 j=sy,ey
 190          do 10 i=sx,ex
 191             sum = sum + (a(i,j) - b(i,j)) ** 2
 192  10      continue
 193 c
 194       diff2d = sum
 195       return
 196       end
 197       subroutine twodinit( a, b, f, nx, sx, ex, sy, ey )
 198       integer nx, sx, ex, sy, ey
 199       double precision a(sx-1:ex+1, sy-1:ey+1), b(sx-1:ex+1, sy-1:ey+1),
 200      &                 f(sx-1:ex+1, sy-1:ey+1)
 201 c
 202       integer i, j
 203 c
 204       do 10 j=sy-1,ey+1
 205          do 10 i=sx-1,ex+1
 206             a(i,j) = 0.0d0
 207             b(i,j) = 0.0d0
 208             f(i,j) = 0.0d0
 209  10      continue
 210 c
 211 c    Handle boundary conditions
 212 c
 213       if (sx .eq. 1) then
 214          do 20 j=sy,ey
 215             a(0,j) = 1.0d0
 216             b(0,j) = 1.0d0
 217  20      continue
 218       endif
 219       if (ex .eq. nx) then
 220          do 21 j=sy,ey
 221             a(nx+1,j) = 0.0d0
 222             b(nx+1,j) = 0.0d0
 223  21      continue
 224       endif
 225       if (sy .eq. 1) then
 226          do 30 i=sx,ex
 227             a(i,0) = 1.0d0
 228             b(i,0) = 1.0d0
 229  30      continue
 230       endif
 231 c
 232       return
 233       end
 234
 235 c
 236 c  This file contains a routine for producing a decomposition of a 1-d array
 237 c  when given a number of processors.  It may be used in "direct" product
 238 c  decomposition.  The values returned assume a "global" domain in [1:n]
 239 c
 240       subroutine MPE_DECOMP1D( n, numprocs, myid, s, e )
 241       integer n, numprocs, myid, s, e
 242       integer nlocal
 243       integer deficit
 244 c
 245       nlocal  = n / numprocs
 246       s       = myid * nlocal + 1
 247       deficit = mod(n,numprocs)
 248       s       = s + min(myid,deficit)
 249       if (myid .lt. deficit) then
 250           nlocal = nlocal + 1
 251       endif
 252       e = s + nlocal - 1
 253       if (e .gt. n .or. myid .eq. numprocs-1) e = n
 254       return
 255       end
 256 c
 257 c This routine show how to determine the neighbors in a 2-d decomposition of
 258 c the domain. This assumes that MPI_Cart_create has already been called
 259 c
 260       subroutine fnd2dnbrs( comm2d,
 261      $                      nbrleft, nbrright, nbrtop, nbrbottom )
 262       integer comm2d, nbrleft, nbrright, nbrtop, nbrbottom
 263 c
 264       integer ierr
 265 c
 266       call MPI_Cart_shift( comm2d, 0,  1, nbrleft,   nbrright, ierr )
 267       call MPI_Cart_shift( comm2d, 1,  1, nbrbottom, nbrtop,   ierr )
 268 c
 269       return
 270       end
 271 c
 272 c Note: THIS IS A TEST PROGRAM.  THE ACTUAL VALUES MOVED ARE NOT
 273 c CORRECT FOR A POISSON SOLVER.
 274 c
 275       subroutine fnd2ddecomp( comm2d, n, sx, ex, sy, ey )
 276       integer comm2d
 277       integer n, sx, ex, sy, ey
 278       integer dims(2), coords(2), ierr
 279       logical periods(2)
 280 c
 281       call MPI_Cart_get( comm2d, 2, dims, periods, coords, ierr )
 282
 283       call MPE_DECOMP1D( n, dims(1), coords(1), sx, ex )
 284       call MPE_DECOMP1D( n, dims(2), coords(2), sy, ey )
 285 c
 286       return
 287       end
 288
 289