examples/smpi/NAS/FT/global.h

   1       include 'npbparams.h'
   2
   3 c 2D processor array -> 2D grid decomposition (by pencils)
   4 c If processor array is 1xN or -> 1D grid decomposition (by planes)
   5 c If processor array is 1x1 -> 0D grid decomposition
   6 c For simplicity, do not treat Nx1 (np2 = 1) specially
   7       integer np1, np2, np
   8
   9 c basic decomposition strategy
  10       integer layout_type
  11       integer layout_0D, layout_1D, layout_2D
  12       parameter (layout_0D = 0, layout_1D = 1, layout_2D = 2)
  13
  14       common /procgrid/ np1, np2, layout_type, np
  15
  16
  17 c Cache blocking params. These values are good for most
  18 c RISC processors.
  19 c FFT parameters:
  20 c  fftblock controls how many ffts are done at a time.
  21 c  The default is appropriate for most cache-based machines
  22 c  On vector machines, the FFT can be vectorized with vector
  23 c  length equal to the block size, so the block size should
  24 c  be as large as possible. This is the size of the smallest
  25 c  dimension of the problem: 128 for class A, 256 for class B and
  26 c  512 for class C.
  27 c Transpose parameters:
  28 c  transblock is the blocking factor for the transposes when there
  29 c  is a 1-D layout. On vector machines it should probably be
  30 c  large (largest dimension of the problem).
  31
  32
  33       integer fftblock_default, fftblockpad_default
  34       parameter (fftblock_default=16, fftblockpad_default=18)
  35       integer transblock, transblockpad
  36       parameter(transblock=32, transblockpad=34)
  37
  38       integer fftblock, fftblockpad
  39       common /blockinfo/ fftblock, fftblockpad
  40
  41 c we need a bunch of logic to keep track of how
  42 c arrays are laid out.
  43 c coords of this processor
  44       integer me, me1, me2
  45       common /coords/ me, me1, me2
  46 c need a communicator for row/col in processor grid
  47       integer commslice1, commslice2
  48       common /comms/ commslice1, commslice2
  49
  50
  51
  52 c There are basically three stages
  53 c 1: x-y-z layout
  54 c 2: after x-transform (before y)
  55 c 3: after y-transform (before z)
  56 c The computation proceeds logically as
  57
  58 c set up initial conditions
  59 c fftx(1)
  60 c transpose (1->2)
  61 c ffty(2)
  62 c transpose (2->3)
  63 c fftz(3)
  64 c time evolution
  65 c fftz(3)
  66 c transpose (3->2)
  67 c ffty(2)
  68 c transpose (2->1)
  69 c fftx(1)
  70 c compute residual(1)
  71
  72 c for the 0D, 1D, 2D strategies, the layouts look like xxx
  73 c
  74 c            0D        1D        2D
  75 c 1:        xyz       xyz       xyz
  76 c 2:        xyz       xyz       yxz
  77 c 3:        xyz       zyx       zxy
  78
  79 c the array dimensions are stored in dims(coord, phase)
  80       integer dims(3, 3)
  81       integer xstart(3), ystart(3), zstart(3)
  82       integer xend(3), yend(3), zend(3)
  83       common /layout/ dims,
  84      >                xstart, ystart, zstart,
  85      >                xend, yend, zend
  86
  87       integer T_total, T_setup, T_fft, T_evolve, T_checksum,
  88      >        T_fftlow, T_fftcopy, T_transpose,
  89      >        T_transxzloc, T_transxzglo, T_transxzfin,
  90      >        T_transxyloc, T_transxyglo, T_transxyfin,
  91      >        T_synch, T_max
  92       parameter (T_total = 1, T_setup = 2, T_fft = 3,
  93      >           T_evolve = 4, T_checksum = 5,
  94      >           T_fftlow = 6, T_fftcopy = 7, T_transpose = 8,
  95      >           T_transxzloc = 9, T_transxzglo = 10, T_transxzfin = 11,
  96      >           T_transxyloc = 12, T_transxyglo = 13,
  97      >           T_transxyfin = 14,  T_synch = 15, T_max = 15)
  98
  99
 100
 101       logical timers_enabled
 102       parameter (timers_enabled = .false.)
 103
 104
 105       external timer_read
 106       double precision timer_read
 107       external ilog2
 108       integer ilog2
 109
 110       external randlc
 111       double precision randlc
 112
 113
 114 c other stuff
 115       logical debug, debugsynch
 116       common /dbg/ debug, debugsynch
 117
 118       double precision seed, a, pi, alpha
 119       parameter (seed = 314159265.d0, a = 1220703125.d0,
 120      >  pi = 3.141592653589793238d0, alpha=1.0d-6)
 121
 122 c roots of unity array
 123 c relies on x being largest dimension?
 124       double complex u(nx)
 125       common /ucomm/ u
 126
 127
 128 c for checksum data
 129       double complex sums(0:niter_default)
 130       common /sumcomm/ sums
 131
 132 c number of iterations
 133       integer niter
 134       common /iter/ niter