+++ /dev/null
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine buts( ldmx, ldmy, ldmz,
- > nx, ny, nz, k,
- > omega,
- > v, tv,
- > d, udx, udy, udz,
- > ist, iend, jst, jend,
- > nx0, ny0, ipt, jpt )
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c
-c compute the regular-sparse, block upper triangular solution:
-c
-c v <-- ( U-inv ) * v
-c
-c---------------------------------------------------------------------
-
- implicit none
-
-c---------------------------------------------------------------------
-c input parameters
-c---------------------------------------------------------------------
- integer ldmx, ldmy, ldmz
- integer nx, ny, nz
- integer k
- double precision omega
- double precision v( 5, -1:ldmx+2, -1:ldmy+2, *),
- > tv(5, ldmx, ldmy),
- > d( 5, 5, ldmx, ldmy),
- > udx( 5, 5, ldmx, ldmy),
- > udy( 5, 5, ldmx, ldmy),
- > udz( 5, 5, ldmx, ldmy )
- integer ist, iend
- integer jst, jend
- integer nx0, ny0
- integer ipt, jpt
-
-c---------------------------------------------------------------------
-c local variables
-c---------------------------------------------------------------------
- integer i, j, m, l, istp, iendp
- integer iex
- double precision tmp, tmp1
- double precision tmat(5,5)
-
-
-c---------------------------------------------------------------------
-c receive data from south and east
-c---------------------------------------------------------------------
- iex = 1
- call exchange_1( v,k,iex )
-
- do j = jend, jst, -1
- do i = iend, ist, -1
- do m = 1, 5
- tv( m, i, j ) =
- > omega * ( udz( m, 1, i, j ) * v( 1, i, j, k+1 )
- > + udz( m, 2, i, j ) * v( 2, i, j, k+1 )
- > + udz( m, 3, i, j ) * v( 3, i, j, k+1 )
- > + udz( m, 4, i, j ) * v( 4, i, j, k+1 )
- > + udz( m, 5, i, j ) * v( 5, i, j, k+1 ) )
- end do
- end do
- end do
-
-
- do l = iend+jend, ist+jst, -1
- istp = max(l - jend, ist)
- iendp = min(l - jst, iend)
-
-!dir$ ivdep
- do i = istp, iendp
- j = l - i
-
-!!dir$ unroll 5
-! manually unroll the loop
-! do m = 1, 5
- tv( 1, i, j ) = tv( 1, i, j )
- > + omega * ( udy( 1, 1, i, j ) * v( 1, i, j+1, k )
- > + udx( 1, 1, i, j ) * v( 1, i+1, j, k )
- > + udy( 1, 2, i, j ) * v( 2, i, j+1, k )
- > + udx( 1, 2, i, j ) * v( 2, i+1, j, k )
- > + udy( 1, 3, i, j ) * v( 3, i, j+1, k )
- > + udx( 1, 3, i, j ) * v( 3, i+1, j, k )
- > + udy( 1, 4, i, j ) * v( 4, i, j+1, k )
- > + udx( 1, 4, i, j ) * v( 4, i+1, j, k )
- > + udy( 1, 5, i, j ) * v( 5, i, j+1, k )
- > + udx( 1, 5, i, j ) * v( 5, i+1, j, k ) )
- tv( 2, i, j ) = tv( 2, i, j )
- > + omega * ( udy( 2, 1, i, j ) * v( 1, i, j+1, k )
- > + udx( 2, 1, i, j ) * v( 1, i+1, j, k )
- > + udy( 2, 2, i, j ) * v( 2, i, j+1, k )
- > + udx( 2, 2, i, j ) * v( 2, i+1, j, k )
- > + udy( 2, 3, i, j ) * v( 3, i, j+1, k )
- > + udx( 2, 3, i, j ) * v( 3, i+1, j, k )
- > + udy( 2, 4, i, j ) * v( 4, i, j+1, k )
- > + udx( 2, 4, i, j ) * v( 4, i+1, j, k )
- > + udy( 2, 5, i, j ) * v( 5, i, j+1, k )
- > + udx( 2, 5, i, j ) * v( 5, i+1, j, k ) )
- tv( 3, i, j ) = tv( 3, i, j )
- > + omega * ( udy( 3, 1, i, j ) * v( 1, i, j+1, k )
- > + udx( 3, 1, i, j ) * v( 1, i+1, j, k )
- > + udy( 3, 2, i, j ) * v( 2, i, j+1, k )
- > + udx( 3, 2, i, j ) * v( 2, i+1, j, k )
- > + udy( 3, 3, i, j ) * v( 3, i, j+1, k )
- > + udx( 3, 3, i, j ) * v( 3, i+1, j, k )
- > + udy( 3, 4, i, j ) * v( 4, i, j+1, k )
- > + udx( 3, 4, i, j ) * v( 4, i+1, j, k )
- > + udy( 3, 5, i, j ) * v( 5, i, j+1, k )
- > + udx( 3, 5, i, j ) * v( 5, i+1, j, k ) )
- tv( 4, i, j ) = tv( 4, i, j )
- > + omega * ( udy( 4, 1, i, j ) * v( 1, i, j+1, k )
- > + udx( 4, 1, i, j ) * v( 1, i+1, j, k )
- > + udy( 4, 2, i, j ) * v( 2, i, j+1, k )
- > + udx( 4, 2, i, j ) * v( 2, i+1, j, k )
- > + udy( 4, 3, i, j ) * v( 3, i, j+1, k )
- > + udx( 4, 3, i, j ) * v( 3, i+1, j, k )
- > + udy( 4, 4, i, j ) * v( 4, i, j+1, k )
- > + udx( 4, 4, i, j ) * v( 4, i+1, j, k )
- > + udy( 4, 5, i, j ) * v( 5, i, j+1, k )
- > + udx( 4, 5, i, j ) * v( 5, i+1, j, k ) )
- tv( 5, i, j ) = tv( 5, i, j )
- > + omega * ( udy( 5, 1, i, j ) * v( 1, i, j+1, k )
- > + udx( 5, 1, i, j ) * v( 1, i+1, j, k )
- > + udy( 5, 2, i, j ) * v( 2, i, j+1, k )
- > + udx( 5, 2, i, j ) * v( 2, i+1, j, k )
- > + udy( 5, 3, i, j ) * v( 3, i, j+1, k )
- > + udx( 5, 3, i, j ) * v( 3, i+1, j, k )
- > + udy( 5, 4, i, j ) * v( 4, i, j+1, k )
- > + udx( 5, 4, i, j ) * v( 4, i+1, j, k )
- > + udy( 5, 5, i, j ) * v( 5, i, j+1, k )
- > + udx( 5, 5, i, j ) * v( 5, i+1, j, k ) )
-! end do
-
-c---------------------------------------------------------------------
-c diagonal block inversion
-c---------------------------------------------------------------------
-!!dir$ unroll 5
-! manually unroll the loop
-! do m = 1, 5
- tmat( 1, 1 ) = d( 1, 1, i, j )
- tmat( 1, 2 ) = d( 1, 2, i, j )
- tmat( 1, 3 ) = d( 1, 3, i, j )
- tmat( 1, 4 ) = d( 1, 4, i, j )
- tmat( 1, 5 ) = d( 1, 5, i, j )
- tmat( 2, 1 ) = d( 2, 1, i, j )
- tmat( 2, 2 ) = d( 2, 2, i, j )
- tmat( 2, 3 ) = d( 2, 3, i, j )
- tmat( 2, 4 ) = d( 2, 4, i, j )
- tmat( 2, 5 ) = d( 2, 5, i, j )
- tmat( 3, 1 ) = d( 3, 1, i, j )
- tmat( 3, 2 ) = d( 3, 2, i, j )
- tmat( 3, 3 ) = d( 3, 3, i, j )
- tmat( 3, 4 ) = d( 3, 4, i, j )
- tmat( 3, 5 ) = d( 3, 5, i, j )
- tmat( 4, 1 ) = d( 4, 1, i, j )
- tmat( 4, 2 ) = d( 4, 2, i, j )
- tmat( 4, 3 ) = d( 4, 3, i, j )
- tmat( 4, 4 ) = d( 4, 4, i, j )
- tmat( 4, 5 ) = d( 4, 5, i, j )
- tmat( 5, 1 ) = d( 5, 1, i, j )
- tmat( 5, 2 ) = d( 5, 2, i, j )
- tmat( 5, 3 ) = d( 5, 3, i, j )
- tmat( 5, 4 ) = d( 5, 4, i, j )
- tmat( 5, 5 ) = d( 5, 5, i, j )
-! end do
-
- tmp1 = 1.0d+00 / tmat( 1, 1 )
- tmp = tmp1 * tmat( 2, 1 )
- tmat( 2, 2 ) = tmat( 2, 2 )
- > - tmp * tmat( 1, 2 )
- tmat( 2, 3 ) = tmat( 2, 3 )
- > - tmp * tmat( 1, 3 )
- tmat( 2, 4 ) = tmat( 2, 4 )
- > - tmp * tmat( 1, 4 )
- tmat( 2, 5 ) = tmat( 2, 5 )
- > - tmp * tmat( 1, 5 )
- tv( 2, i, j ) = tv( 2, i, j )
- > - tv( 1, i, j ) * tmp
-
- tmp = tmp1 * tmat( 3, 1 )
- tmat( 3, 2 ) = tmat( 3, 2 )
- > - tmp * tmat( 1, 2 )
- tmat( 3, 3 ) = tmat( 3, 3 )
- > - tmp * tmat( 1, 3 )
- tmat( 3, 4 ) = tmat( 3, 4 )
- > - tmp * tmat( 1, 4 )
- tmat( 3, 5 ) = tmat( 3, 5 )
- > - tmp * tmat( 1, 5 )
- tv( 3, i, j ) = tv( 3, i, j )
- > - tv( 1, i, j ) * tmp
-
- tmp = tmp1 * tmat( 4, 1 )
- tmat( 4, 2 ) = tmat( 4, 2 )
- > - tmp * tmat( 1, 2 )
- tmat( 4, 3 ) = tmat( 4, 3 )
- > - tmp * tmat( 1, 3 )
- tmat( 4, 4 ) = tmat( 4, 4 )
- > - tmp * tmat( 1, 4 )
- tmat( 4, 5 ) = tmat( 4, 5 )
- > - tmp * tmat( 1, 5 )
- tv( 4, i, j ) = tv( 4, i, j )
- > - tv( 1, i, j ) * tmp
-
- tmp = tmp1 * tmat( 5, 1 )
- tmat( 5, 2 ) = tmat( 5, 2 )
- > - tmp * tmat( 1, 2 )
- tmat( 5, 3 ) = tmat( 5, 3 )
- > - tmp * tmat( 1, 3 )
- tmat( 5, 4 ) = tmat( 5, 4 )
- > - tmp * tmat( 1, 4 )
- tmat( 5, 5 ) = tmat( 5, 5 )
- > - tmp * tmat( 1, 5 )
- tv( 5, i, j ) = tv( 5, i, j )
- > - tv( 1, i, j ) * tmp
-
-
-
- tmp1 = 1.0d+00 / tmat( 2, 2 )
- tmp = tmp1 * tmat( 3, 2 )
- tmat( 3, 3 ) = tmat( 3, 3 )
- > - tmp * tmat( 2, 3 )
- tmat( 3, 4 ) = tmat( 3, 4 )
- > - tmp * tmat( 2, 4 )
- tmat( 3, 5 ) = tmat( 3, 5 )
- > - tmp * tmat( 2, 5 )
- tv( 3, i, j ) = tv( 3, i, j )
- > - tv( 2, i, j ) * tmp
-
- tmp = tmp1 * tmat( 4, 2 )
- tmat( 4, 3 ) = tmat( 4, 3 )
- > - tmp * tmat( 2, 3 )
- tmat( 4, 4 ) = tmat( 4, 4 )
- > - tmp * tmat( 2, 4 )
- tmat( 4, 5 ) = tmat( 4, 5 )
- > - tmp * tmat( 2, 5 )
- tv( 4, i, j ) = tv( 4, i, j )
- > - tv( 2, i, j ) * tmp
-
- tmp = tmp1 * tmat( 5, 2 )
- tmat( 5, 3 ) = tmat( 5, 3 )
- > - tmp * tmat( 2, 3 )
- tmat( 5, 4 ) = tmat( 5, 4 )
- > - tmp * tmat( 2, 4 )
- tmat( 5, 5 ) = tmat( 5, 5 )
- > - tmp * tmat( 2, 5 )
- tv( 5, i, j ) = tv( 5, i, j )
- > - tv( 2, i, j ) * tmp
-
-
-
- tmp1 = 1.0d+00 / tmat( 3, 3 )
- tmp = tmp1 * tmat( 4, 3 )
- tmat( 4, 4 ) = tmat( 4, 4 )
- > - tmp * tmat( 3, 4 )
- tmat( 4, 5 ) = tmat( 4, 5 )
- > - tmp * tmat( 3, 5 )
- tv( 4, i, j ) = tv( 4, i, j )
- > - tv( 3, i, j ) * tmp
-
- tmp = tmp1 * tmat( 5, 3 )
- tmat( 5, 4 ) = tmat( 5, 4 )
- > - tmp * tmat( 3, 4 )
- tmat( 5, 5 ) = tmat( 5, 5 )
- > - tmp * tmat( 3, 5 )
- tv( 5, i, j ) = tv( 5, i, j )
- > - tv( 3, i, j ) * tmp
-
-
-
- tmp1 = 1.0d+00 / tmat( 4, 4 )
- tmp = tmp1 * tmat( 5, 4 )
- tmat( 5, 5 ) = tmat( 5, 5 )
- > - tmp * tmat( 4, 5 )
- tv( 5, i, j ) = tv( 5, i, j )
- > - tv( 4, i, j ) * tmp
-
-c---------------------------------------------------------------------
-c back substitution
-c---------------------------------------------------------------------
- tv( 5, i, j ) = tv( 5, i, j )
- > / tmat( 5, 5 )
-
- tv( 4, i, j ) = tv( 4, i, j )
- > - tmat( 4, 5 ) * tv( 5, i, j )
- tv( 4, i, j ) = tv( 4, i, j )
- > / tmat( 4, 4 )
-
- tv( 3, i, j ) = tv( 3, i, j )
- > - tmat( 3, 4 ) * tv( 4, i, j )
- > - tmat( 3, 5 ) * tv( 5, i, j )
- tv( 3, i, j ) = tv( 3, i, j )
- > / tmat( 3, 3 )
-
- tv( 2, i, j ) = tv( 2, i, j )
- > - tmat( 2, 3 ) * tv( 3, i, j )
- > - tmat( 2, 4 ) * tv( 4, i, j )
- > - tmat( 2, 5 ) * tv( 5, i, j )
- tv( 2, i, j ) = tv( 2, i, j )
- > / tmat( 2, 2 )
-
- tv( 1, i, j ) = tv( 1, i, j )
- > - tmat( 1, 2 ) * tv( 2, i, j )
- > - tmat( 1, 3 ) * tv( 3, i, j )
- > - tmat( 1, 4 ) * tv( 4, i, j )
- > - tmat( 1, 5 ) * tv( 5, i, j )
- tv( 1, i, j ) = tv( 1, i, j )
- > / tmat( 1, 1 )
-
- v( 1, i, j, k ) = v( 1, i, j, k ) - tv( 1, i, j )
- v( 2, i, j, k ) = v( 2, i, j, k ) - tv( 2, i, j )
- v( 3, i, j, k ) = v( 3, i, j, k ) - tv( 3, i, j )
- v( 4, i, j, k ) = v( 4, i, j, k ) - tv( 4, i, j )
- v( 5, i, j, k ) = v( 5, i, j, k ) - tv( 5, i, j )
-
-
- enddo
- end do
-
-c---------------------------------------------------------------------
-c send data to north and west
-c---------------------------------------------------------------------
- iex = 3
- call exchange_1( v,k,iex )
-
- return
- end