Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Remove the unmodified NAS examples as they are really useless nowadays
authorMartin Quinson <martin.quinson@loria.fr>
Thu, 13 Nov 2014 20:59:09 +0000 (21:59 +0100)
committerMartin Quinson <martin.quinson@loria.fr>
Thu, 13 Nov 2014 20:59:16 +0000 (21:59 +0100)
I'm still unsure of what to do with the modified ones. I vote for
removing them if we have enough examples already.

113 files changed:
examples/smpi/NAS/BT/Makefile [deleted file]
examples/smpi/NAS/BT/add.f [deleted file]
examples/smpi/NAS/BT/adi.f [deleted file]
examples/smpi/NAS/BT/bt.f [deleted file]
examples/smpi/NAS/BT/btio.f [deleted file]
examples/smpi/NAS/BT/btio_common.f [deleted file]
examples/smpi/NAS/BT/copy_faces.f [deleted file]
examples/smpi/NAS/BT/define.f [deleted file]
examples/smpi/NAS/BT/epio.f [deleted file]
examples/smpi/NAS/BT/error.f [deleted file]
examples/smpi/NAS/BT/exact_rhs.f [deleted file]
examples/smpi/NAS/BT/exact_solution.f [deleted file]
examples/smpi/NAS/BT/fortran_io.f [deleted file]
examples/smpi/NAS/BT/full_mpiio.f [deleted file]
examples/smpi/NAS/BT/header.h [deleted file]
examples/smpi/NAS/BT/initialize.f [deleted file]
examples/smpi/NAS/BT/inputbt.data.sample [deleted file]
examples/smpi/NAS/BT/make_set.f [deleted file]
examples/smpi/NAS/BT/mpinpb.h [deleted file]
examples/smpi/NAS/BT/rhs.f [deleted file]
examples/smpi/NAS/BT/set_constants.f [deleted file]
examples/smpi/NAS/BT/setup_mpi.f [deleted file]
examples/smpi/NAS/BT/simple_mpiio.f [deleted file]
examples/smpi/NAS/BT/solve_subs.f [deleted file]
examples/smpi/NAS/BT/verify.f [deleted file]
examples/smpi/NAS/BT/work_lhs.h [deleted file]
examples/smpi/NAS/BT/work_lhs_vec.h [deleted file]
examples/smpi/NAS/BT/x_solve.f [deleted file]
examples/smpi/NAS/BT/x_solve_vec.f [deleted file]
examples/smpi/NAS/BT/y_solve.f [deleted file]
examples/smpi/NAS/BT/y_solve_vec.f [deleted file]
examples/smpi/NAS/BT/z_solve.f [deleted file]
examples/smpi/NAS/BT/z_solve_vec.f [deleted file]
examples/smpi/NAS/CG/Makefile [deleted file]
examples/smpi/NAS/CG/cg.f [deleted file]
examples/smpi/NAS/CG/mpinpb.h [deleted file]
examples/smpi/NAS/FT/Makefile [deleted file]
examples/smpi/NAS/FT/README [deleted file]
examples/smpi/NAS/FT/ft.f [deleted file]
examples/smpi/NAS/FT/global.h [deleted file]
examples/smpi/NAS/FT/inputft.data.sample [deleted file]
examples/smpi/NAS/FT/mpinpb.h [deleted file]
examples/smpi/NAS/LU/Makefile [deleted file]
examples/smpi/NAS/LU/applu.incl [deleted file]
examples/smpi/NAS/LU/bcast_inputs.f [deleted file]
examples/smpi/NAS/LU/blts.f [deleted file]
examples/smpi/NAS/LU/blts_vec.f [deleted file]
examples/smpi/NAS/LU/buts.f [deleted file]
examples/smpi/NAS/LU/buts_vec.f [deleted file]
examples/smpi/NAS/LU/erhs.f [deleted file]
examples/smpi/NAS/LU/error.f [deleted file]
examples/smpi/NAS/LU/exact.f [deleted file]
examples/smpi/NAS/LU/exchange_1.f [deleted file]
examples/smpi/NAS/LU/exchange_3.f [deleted file]
examples/smpi/NAS/LU/exchange_4.f [deleted file]
examples/smpi/NAS/LU/exchange_5.f [deleted file]
examples/smpi/NAS/LU/exchange_6.f [deleted file]
examples/smpi/NAS/LU/init_comm.f [deleted file]
examples/smpi/NAS/LU/inputlu.data.sample [deleted file]
examples/smpi/NAS/LU/jacld.f [deleted file]
examples/smpi/NAS/LU/jacu.f [deleted file]
examples/smpi/NAS/LU/l2norm.f [deleted file]
examples/smpi/NAS/LU/lu.f [deleted file]
examples/smpi/NAS/LU/mpinpb.h [deleted file]
examples/smpi/NAS/LU/neighbors.f [deleted file]
examples/smpi/NAS/LU/nodedim.f [deleted file]
examples/smpi/NAS/LU/pintgr.f [deleted file]
examples/smpi/NAS/LU/proc_grid.f [deleted file]
examples/smpi/NAS/LU/read_input.f [deleted file]
examples/smpi/NAS/LU/rhs.f [deleted file]
examples/smpi/NAS/LU/setbv.f [deleted file]
examples/smpi/NAS/LU/setcoeff.f [deleted file]
examples/smpi/NAS/LU/sethyper.f [deleted file]
examples/smpi/NAS/LU/setiv.f [deleted file]
examples/smpi/NAS/LU/ssor.f [deleted file]
examples/smpi/NAS/LU/subdomain.f [deleted file]
examples/smpi/NAS/LU/verify.f [deleted file]
examples/smpi/NAS/MG/Makefile [deleted file]
examples/smpi/NAS/MG/README [deleted file]
examples/smpi/NAS/MG/globals.h [deleted file]
examples/smpi/NAS/MG/mg.f [deleted file]
examples/smpi/NAS/MG/mg.input.sample [deleted file]
examples/smpi/NAS/MG/mpinpb.h [deleted file]
examples/smpi/NAS/Makefile
examples/smpi/NAS/SP/Makefile [deleted file]
examples/smpi/NAS/SP/README [deleted file]
examples/smpi/NAS/SP/add.f [deleted file]
examples/smpi/NAS/SP/adi.f [deleted file]
examples/smpi/NAS/SP/copy_faces.f [deleted file]
examples/smpi/NAS/SP/define.f [deleted file]
examples/smpi/NAS/SP/error.f [deleted file]
examples/smpi/NAS/SP/exact_rhs.f [deleted file]
examples/smpi/NAS/SP/exact_solution.f [deleted file]
examples/smpi/NAS/SP/header.h [deleted file]
examples/smpi/NAS/SP/initialize.f [deleted file]
examples/smpi/NAS/SP/inputsp.data.sample [deleted file]
examples/smpi/NAS/SP/lhsx.f [deleted file]
examples/smpi/NAS/SP/lhsy.f [deleted file]
examples/smpi/NAS/SP/lhsz.f [deleted file]
examples/smpi/NAS/SP/make_set.f [deleted file]
examples/smpi/NAS/SP/mpinpb.h [deleted file]
examples/smpi/NAS/SP/ninvr.f [deleted file]
examples/smpi/NAS/SP/pinvr.f [deleted file]
examples/smpi/NAS/SP/rhs.f [deleted file]
examples/smpi/NAS/SP/set_constants.f [deleted file]
examples/smpi/NAS/SP/setup_mpi.f [deleted file]
examples/smpi/NAS/SP/sp.f [deleted file]
examples/smpi/NAS/SP/txinvr.f [deleted file]
examples/smpi/NAS/SP/tzetar.f [deleted file]
examples/smpi/NAS/SP/verify.f [deleted file]
examples/smpi/NAS/SP/x_solve.f [deleted file]
examples/smpi/NAS/SP/y_solve.f [deleted file]
examples/smpi/NAS/SP/z_solve.f [deleted file]

diff --git a/examples/smpi/NAS/BT/Makefile b/examples/smpi/NAS/BT/Makefile
deleted file mode 100644 (file)
index dd27503..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-SHELL=/bin/sh
-BENCHMARK=bt
-BENCHMARKU=BT
-VEC=
-
-include ../config/make.def
-
-
-OBJS = bt.o make_set.o initialize.o exact_solution.o exact_rhs.o \
-       set_constants.o adi.o define.o copy_faces.o rhs.o solve_subs.o \
-       x_solve$(VEC).o y_solve$(VEC).o z_solve$(VEC).o add.o error.o \
-       verify.o setup_mpi.o \
-       ${COMMON}/print_results.o ${COMMON}/timers.o
-
-include ../sys/make.common
-
-# npbparams.h is included by header.h
-# The following rule should do the trick but many make programs (not gmake)
-# will do the wrong thing and rebuild the world every time (because the
-# mod time on header.h is not changed. One solution would be to 
-# touch header.h but this might cause confusion if someone has
-# accidentally deleted it. Instead, make the dependency on npbparams.h
-# explicit in all the lines below (even though dependence is indirect). 
-
-# header.h: npbparams.h
-
-${PROGRAM}: config
-       @if [ x$(VERSION) = xvec ] ; then       \
-               ${MAKE} VEC=_vec exec;          \
-       elif [ x$(VERSION) = xVEC ] ; then      \
-               ${MAKE} VEC=_vec exec;          \
-       else                                    \
-               ${MAKE} exec;                   \
-       fi
-
-exec: $(OBJS)
-       @if [ x$(SUBTYPE) = xfull ] ; then      \
-               ${MAKE} bt-full;                \
-       elif [ x$(SUBTYPE) = xFULL ] ; then     \
-               ${MAKE} bt-full;                \
-       elif [ x$(SUBTYPE) = xsimple ] ; then   \
-               ${MAKE} bt-simple;              \
-       elif [ x$(SUBTYPE) = xSIMPLE ] ; then   \
-               ${MAKE} bt-simple;              \
-       elif [ x$(SUBTYPE) = xfortran ] ; then  \
-               ${MAKE} bt-fortran;             \
-       elif [ x$(SUBTYPE) = xFORTRAN ] ; then  \
-               ${MAKE} bt-fortran;             \
-       elif [ x$(SUBTYPE) = xepio ] ; then     \
-               ${MAKE} bt-epio;                \
-       elif [ x$(SUBTYPE) = xEPIO ] ; then     \
-               ${MAKE} bt-epio;                \
-       else                                    \
-               ${MAKE} bt-bt;                  \
-       fi
-
-bt-bt: ${OBJS} btio.o
-       ${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} btio.o ${FMPI_LIB}
-
-bt-full: ${OBJS} full_mpiio.o btio_common.o
-       ${FLINK} ${FLINKFLAGS} -o ${PROGRAM}.mpi_io_full ${OBJS} btio_common.o full_mpiio.o ${FMPI_LIB}
-
-bt-simple: ${OBJS} simple_mpiio.o btio_common.o
-       ${FLINK} ${FLINKFLAGS} -o ${PROGRAM}.mpi_io_simple ${OBJS} btio_common.o simple_mpiio.o ${FMPI_LIB}
-
-bt-fortran: ${OBJS} fortran_io.o btio_common.o
-       ${FLINK} ${FLINKFLAGS} -o ${PROGRAM}.fortran_io ${OBJS} btio_common.o fortran_io.o ${FMPI_LIB}
-
-bt-epio: ${OBJS} epio.o btio_common.o
-       ${FLINK} ${FLINKFLAGS} -o ${PROGRAM}.ep_io ${OBJS} btio_common.o epio.o ${FMPI_LIB}
-
-.f.o:
-       ${FCOMPILE} $<
-
-.c.o:
-       ${CCOMPILE} $<
-
-
-bt.o:             bt.f  header.h npbparams.h  mpinpb.h
-make_set.o:       make_set.f  header.h npbparams.h  mpinpb.h
-initialize.o:     initialize.f  header.h npbparams.h
-exact_solution.o: exact_solution.f  header.h npbparams.h
-exact_rhs.o:      exact_rhs.f  header.h npbparams.h
-set_constants.o:  set_constants.f  header.h npbparams.h
-adi.o:            adi.f  header.h npbparams.h
-define.o:         define.f  header.h npbparams.h
-copy_faces.o:     copy_faces.f  header.h npbparams.h  mpinpb.h
-rhs.o:            rhs.f  header.h npbparams.h
-x_solve$(VEC).o:  x_solve$(VEC).f  header.h work_lhs$(VEC).h npbparams.h  mpinpb.h
-y_solve$(VEC).o:  y_solve$(VEC).f  header.h work_lhs$(VEC).h npbparams.h  mpinpb.h
-z_solve$(VEC).o:  z_solve$(VEC).f  header.h work_lhs$(VEC).h npbparams.h  mpinpb.h
-solve_subs.o:     solve_subs.f  npbparams.h
-add.o:            add.f  header.h npbparams.h
-error.o:          error.f  header.h npbparams.h  mpinpb.h
-verify.o:         verify.f  header.h npbparams.h  mpinpb.h
-setup_mpi.o:      setup_mpi.f mpinpb.h npbparams.h 
-btio.o:           btio.f  header.h npbparams.h
-btio_common.o:    btio_common.f mpinpb.h npbparams.h 
-fortran_io.o:     fortran_io.f mpinpb.h npbparams.h 
-simple_mpiio.o:   simple_mpiio.f mpinpb.h npbparams.h 
-full_mpiio.o:     full_mpiio.f mpinpb.h npbparams.h 
-epio.o:           epio.f mpinpb.h npbparams.h 
-
-clean:
-       - rm -f *.o *~ mputil*
-       - rm -f  npbparams.h core
diff --git a/examples/smpi/NAS/BT/add.f b/examples/smpi/NAS/BT/add.f
deleted file mode 100644 (file)
index e14cde4..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine  add
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     addition of update to the vector u
-c---------------------------------------------------------------------
-
-      include 'header.h'
-
-      integer  c, i, j, k, m
-
-      do     c = 1, ncells
-         do     k = start(3,c), cell_size(3,c)-end(3,c)-1
-            do     j = start(2,c), cell_size(2,c)-end(2,c)-1
-               do     i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  do    m = 1, 5
-                     u(m,i,j,k,c) = u(m,i,j,k,c) + rhs(m,i,j,k,c)
-                  enddo
-               enddo
-            enddo
-         enddo
-      enddo
-
-      return
-      end
diff --git a/examples/smpi/NAS/BT/adi.f b/examples/smpi/NAS/BT/adi.f
deleted file mode 100644 (file)
index 58450c0..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine  adi
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      call copy_faces
-
-      call x_solve
-
-      call y_solve
-
-      call z_solve
-
-      call add
-
-      return
-      end
-
diff --git a/examples/smpi/NAS/BT/bt.f b/examples/smpi/NAS/BT/bt.f
deleted file mode 100644 (file)
index 36e5078..0000000
+++ /dev/null
@@ -1,275 +0,0 @@
-!-------------------------------------------------------------------------!
-!                                                                         !
-!        N  A  S     P A R A L L E L     B E N C H M A R K S  3.3         !
-!                                                                         !
-!                                   B T                                   !
-!                                                                         !
-!-------------------------------------------------------------------------!
-!                                                                         !
-!    This benchmark is part of the NAS Parallel Benchmark 3.3 suite.      !
-!    It is described in NAS Technical Reports 95-020 and 02-007.          !
-!                                                                         !
-!    Permission to use, copy, distribute and modify this software         !
-!    for any purpose with or without fee is hereby granted.  We           !
-!    request, however, that all derived work reference the NAS            !
-!    Parallel Benchmarks 3.3. This software is provided "as is"           !
-!    without express or implied warranty.                                 !
-!                                                                         !
-!    Information on NPB 3.3, including the technical report, the          !
-!    original specifications, source code, results and information        !
-!    on how to submit new results, is available at:                       !
-!                                                                         !
-!           http://www.nas.nasa.gov/Software/NPB/                         !
-!                                                                         !
-!    Send comments or suggestions to  npb@nas.nasa.gov                    !
-!                                                                         !
-!          NAS Parallel Benchmarks Group                                  !
-!          NASA Ames Research Center                                      !
-!          Mail Stop: T27A-1                                              !
-!          Moffett Field, CA   94035-1000                                 !
-!                                                                         !
-!          E-mail:  npb@nas.nasa.gov                                      !
-!          Fax:     (650) 604-3957                                        !
-!                                                                         !
-!-------------------------------------------------------------------------!
-
-c---------------------------------------------------------------------
-c
-c Authors: R. F. Van der Wijngaart
-c          T. Harris
-c          M. Yarrow
-c
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-       program MPBT
-c---------------------------------------------------------------------
-
-       include  'header.h'
-       include  'mpinpb.h'
-      
-       integer i, niter, step, c, error, fstatus
-       double precision navg, mflops, mbytes, n3
-
-       external timer_read
-       double precision t, tmax, tiominv, tpc, timer_read
-       logical verified
-       character class, cbuff*40
-
-       integer wr_interval
-
-       call setup_mpi
-       if (.not. active) goto 999
-
-c---------------------------------------------------------------------
-c      Root node reads input file (if it exists) else takes
-c      defaults from parameters
-c---------------------------------------------------------------------
-       if (node .eq. root) then
-          
-          write(*, 1000)
-          open (unit=2,file='inputbt.data',status='old', iostat=fstatus)
-c
-          rd_interval = 0
-          if (fstatus .eq. 0) then
-            write(*,233) 
- 233        format(' Reading from input file inputbt.data')
-            read (2,*) niter
-            read (2,*) dt
-            read (2,*) grid_points(1), grid_points(2), grid_points(3)
-            if (iotype .ne. 0) then
-                read (2,'(A)') cbuff
-                read (cbuff,*,iostat=i) wr_interval, rd_interval
-                if (i .ne. 0) rd_interval = 0
-                if (wr_interval .le. 0) wr_interval = wr_default
-            endif
-            if (iotype .eq. 1) then
-                read (2,*) collbuf_nodes, collbuf_size
-                write(*,*) 'collbuf_nodes ', collbuf_nodes
-                write(*,*) 'collbuf_size  ', collbuf_size
-            endif
-            close(2)
-          else
-            write(*,234) 
-            niter = niter_default
-            dt    = dt_default
-            grid_points(1) = problem_size
-            grid_points(2) = problem_size
-            grid_points(3) = problem_size
-            wr_interval = wr_default
-            if (iotype .eq. 1) then
-c             set number of nodes involved in collective buffering to 4,
-c             unless total number of nodes is smaller than that.
-c             set buffer size for collective buffering to 1MB per node
-c             collbuf_nodes = min(4,no_nodes)
-c             set default to No-File-Hints with a value of 0
-              collbuf_nodes = 0
-              collbuf_size = 1000000
-            endif
-          endif
- 234      format(' No input file inputbt.data. Using compiled defaults')
-
-          write(*, 1001) grid_points(1), grid_points(2), grid_points(3)
-          write(*, 1002) niter, dt
-          if (no_nodes .ne. total_nodes) write(*, 1004) total_nodes
-          if (no_nodes .ne. maxcells*maxcells) 
-     >        write(*, 1005) maxcells*maxcells
-          write(*, 1003) no_nodes
-
-          if (iotype .eq. 1) write(*, 1006) 'FULL MPI-IO', wr_interval
-          if (iotype .eq. 2) write(*, 1006) 'SIMPLE MPI-IO', wr_interval
-          if (iotype .eq. 3) write(*, 1006) 'EPIO', wr_interval
-          if (iotype .eq. 4) write(*, 1006) 'FORTRAN IO', wr_interval
-
- 1000 format(//, ' NAS Parallel Benchmarks 3.3 -- BT Benchmark ',/)
- 1001     format(' Size: ', i4, 'x', i4, 'x', i4)
- 1002     format(' Iterations: ', i4, '    dt: ', F11.7)
- 1004     format(' Total number of processes: ', i5)
- 1005     format(' WARNING: compiled for ', i5, ' processes ')
- 1003     format(' Number of active processes: ', i5, /)
- 1006     format(' BTIO -- ', A, ' write interval: ', i3 /)
-
-       endif
-
-       call mpi_bcast(niter, 1, MPI_INTEGER,
-     >                root, comm_setup, error)
-
-       call mpi_bcast(dt, 1, dp_type, 
-     >                root, comm_setup, error)
-
-       call mpi_bcast(grid_points(1), 3, MPI_INTEGER, 
-     >                root, comm_setup, error)
-
-       call mpi_bcast(wr_interval, 1, MPI_INTEGER,
-     >                root, comm_setup, error)
-
-       call mpi_bcast(rd_interval, 1, MPI_INTEGER,
-     >                root, comm_setup, error)
-
-       call make_set
-
-       do  c = 1, maxcells
-          if ( (cell_size(1,c) .gt. IMAX) .or.
-     >         (cell_size(2,c) .gt. JMAX) .or.
-     >         (cell_size(3,c) .gt. KMAX) ) then
-             print *,node, c, (cell_size(i,c),i=1,3)
-             print *,' Problem size too big for compiled array sizes'
-             goto 999
-          endif
-       end do
-
-       call set_constants
-
-       call initialize
-
-       call setup_btio
-       idump = 0
-
-       call lhsinit
-
-       call exact_rhs
-
-       call compute_buffer_size(5)
-
-c---------------------------------------------------------------------
-c      do one time step to touch all code, and reinitialize
-c---------------------------------------------------------------------
-       call adi
-       call initialize
-
-       call timer_clear(2)
-
-c---------------------------------------------------------------------
-c      Synchronize before placing time stamp
-c---------------------------------------------------------------------
-       call mpi_barrier(comm_setup, error)
-
-       call timer_clear(1)
-       call timer_start(1)
-
-       do  step = 1, niter
-
-          if (node .eq. root) then
-             if (mod(step, 20) .eq. 0 .or. step .eq. niter .or.
-     >           step .eq. 1) then
-                write(*, 200) step
- 200            format(' Time step ', i4)
-             endif
-          endif
-
-          call adi
-
-          if (iotype .ne. 0) then
-              call timer_start(2)
-              if (mod(step, wr_interval).eq.0 .or. step .eq. niter) then
-                  if (node .eq. root) then
-                      print *, 'Writing data set, time step', step
-                  endif
-                  if (step .eq. niter .and. rd_interval .gt. 1) then
-                      rd_interval = 1
-                  endif
-                  call output_timestep
-                  idump = idump + 1
-              endif
-              call timer_stop(2)
-          endif
-       end do
-
-       call btio_cleanup
-
-       call timer_stop(1)
-       t = timer_read(1)
-       
-       call verify(niter, class, verified)
-
-       call mpi_reduce(t, tmax, 1, 
-     >                 dp_type, MPI_MAX, 
-     >                 root, comm_setup, error)
-
-       if (iotype .ne. 0) then
-          t = timer_read(2)
-          if (t .ne. 0.d0) t = 1.0d0 / t
-          call mpi_reduce(t, tiominv, 1, 
-     >                    dp_type, MPI_SUM, 
-     >                    root, comm_setup, error)
-       endif
-
-       if( node .eq. root ) then
-          n3 = 1.0d0*grid_points(1)*grid_points(2)*grid_points(3)
-          navg = (grid_points(1)+grid_points(2)+grid_points(3))/3.0
-          if( tmax .ne. 0. ) then
-             mflops = 1.0e-6*float(niter)*
-     >     (3478.8*n3-17655.7*navg**2+28023.7*navg)
-     >     / tmax
-          else
-             mflops = 0.0
-          endif
-
-          if (iotype .ne. 0) then
-             mbytes = n3 * 40.0 * idump * 1.0d-6
-             tiominv = tiominv / no_nodes
-             t = 0.0
-             if (tiominv .ne. 0.) t = 1.d0 / tiominv
-             tpc = 0.0
-             if (tmax .ne. 0.) tpc = t * 100.0 / tmax
-             write(*,1100) t, tpc, mbytes, mbytes*tiominv
- 1100        format(/' BTIO -- statistics:'/
-     >               '   I/O timing in seconds   : ', f14.2/
-     >               '   I/O timing percentage   : ', f14.2/
-     >               '   Total data written (MB) : ', f14.2/
-     >               '   I/O data rate  (MB/sec) : ', f14.2)
-          endif
-
-         call print_results('BT', class, grid_points(1), 
-     >     grid_points(2), grid_points(3), niter, maxcells*maxcells, 
-     >     total_nodes, tmax, mflops, '          floating point', 
-     >     verified, npbversion,compiletime, cs1, cs2, cs3, cs4, cs5, 
-     >     cs6, '(none)')
-       endif
-
- 999   continue
-       call mpi_barrier(MPI_COMM_WORLD, error)
-       call mpi_finalize(error)
-
-       end
-
diff --git a/examples/smpi/NAS/BT/btio.f b/examples/smpi/NAS/BT/btio.f
deleted file mode 100644 (file)
index 1fb730b..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine setup_btio
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine output_timestep
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine btio_cleanup
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine btio_verify(verified)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      logical verified
-
-      verified = .true.
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine accumulate_norms(xce_acc)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      double precision xce_acc(5)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine checksum_timestep
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      return
-      end
diff --git a/examples/smpi/NAS/BT/btio_common.f b/examples/smpi/NAS/BT/btio_common.f
deleted file mode 100644 (file)
index 9227a12..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine clear_timestep
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer cio, kio, jio, ix
-
-      do cio=1,ncells
-          do kio=0, cell_size(3,cio)-1
-              do jio=0, cell_size(2,cio)-1
-                  do ix=0,cell_size(1,cio)-1
-                            u(1,ix, jio,kio,cio) = 0
-                            u(2,ix, jio,kio,cio) = 0
-                            u(3,ix, jio,kio,cio) = 0
-                            u(4,ix, jio,kio,cio) = 0
-                            u(5,ix, jio,kio,cio) = 0
-                  enddo
-              enddo
-          enddo
-      enddo
-
-      return
-      end
-
diff --git a/examples/smpi/NAS/BT/copy_faces.f b/examples/smpi/NAS/BT/copy_faces.f
deleted file mode 100644 (file)
index 14b82ca..0000000
+++ /dev/null
@@ -1,316 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine copy_faces
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     
-c This function copies the face values of a variable defined on a set 
-c of cells to the overlap locations of the adjacent sets of cells. 
-c Because a set of cells interfaces in each direction with exactly one 
-c other set, we only need to fill six different buffers. We could try to 
-c overlap communication with computation, by computing
-c some internal values while communicating boundary values, but this
-c adds so much overhead that it's not clearly useful. 
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer i, j, k, c, m, requests(0:11), p0, p1, 
-     >     p2, p3, p4, p5, b_size(0:5), ss(0:5), 
-     >     sr(0:5), error, statuses(MPI_STATUS_SIZE, 0:11)
-
-c---------------------------------------------------------------------
-c     exit immediately if there are no faces to be copied           
-c---------------------------------------------------------------------
-      if (no_nodes .eq. 1) then
-         call compute_rhs
-         return
-      endif
-
-      ss(0) = start_send_east
-      ss(1) = start_send_west
-      ss(2) = start_send_north
-      ss(3) = start_send_south
-      ss(4) = start_send_top
-      ss(5) = start_send_bottom
-
-      sr(0) = start_recv_east
-      sr(1) = start_recv_west
-      sr(2) = start_recv_north
-      sr(3) = start_recv_south
-      sr(4) = start_recv_top
-      sr(5) = start_recv_bottom
-
-      b_size(0) = east_size   
-      b_size(1) = west_size   
-      b_size(2) = north_size  
-      b_size(3) = south_size  
-      b_size(4) = top_size    
-      b_size(5) = bottom_size 
-
-c---------------------------------------------------------------------
-c     because the difference stencil for the diagonalized scheme is 
-c     orthogonal, we do not have to perform the staged copying of faces, 
-c     but can send all face information simultaneously to the neighboring 
-c     cells in all directions          
-c---------------------------------------------------------------------
-      p0 = 0
-      p1 = 0
-      p2 = 0
-      p3 = 0
-      p4 = 0
-      p5 = 0
-
-      do  c = 1, ncells
-
-c---------------------------------------------------------------------
-c     fill the buffer to be sent to eastern neighbors (i-dir)
-c---------------------------------------------------------------------
-         if (cell_coord(1,c) .ne. ncells) then
-            do   k = 0, cell_size(3,c)-1
-               do   j = 0, cell_size(2,c)-1
-                  do   i = cell_size(1,c)-2, cell_size(1,c)-1
-                     do   m = 1, 5
-                        out_buffer(ss(0)+p0) = u(m,i,j,k,c)
-                        p0 = p0 + 1
-                     end do
-                  end do
-               end do
-            end do
-         endif
-
-c---------------------------------------------------------------------
-c     fill the buffer to be sent to western neighbors 
-c---------------------------------------------------------------------
-         if (cell_coord(1,c) .ne. 1) then
-            do   k = 0, cell_size(3,c)-1
-               do   j = 0, cell_size(2,c)-1
-                  do   i = 0, 1
-                     do   m = 1, 5
-                        out_buffer(ss(1)+p1) = u(m,i,j,k,c)
-                        p1 = p1 + 1
-                     end do
-                  end do
-               end do
-            end do
-
-         endif
-
-c---------------------------------------------------------------------
-c     fill the buffer to be sent to northern neighbors (j_dir)
-c---------------------------------------------------------------------
-         if (cell_coord(2,c) .ne. ncells) then
-            do   k = 0, cell_size(3,c)-1
-               do   j = cell_size(2,c)-2, cell_size(2,c)-1
-                  do   i = 0, cell_size(1,c)-1
-                     do   m = 1, 5
-                        out_buffer(ss(2)+p2) = u(m,i,j,k,c)
-                        p2 = p2 + 1
-                     end do
-                  end do
-               end do
-            end do
-         endif
-
-c---------------------------------------------------------------------
-c     fill the buffer to be sent to southern neighbors 
-c---------------------------------------------------------------------
-         if (cell_coord(2,c).ne. 1) then
-            do   k = 0, cell_size(3,c)-1
-               do   j = 0, 1
-                  do   i = 0, cell_size(1,c)-1   
-                     do   m = 1, 5
-                        out_buffer(ss(3)+p3) = u(m,i,j,k,c)
-                        p3 = p3 + 1
-                     end do
-                  end do
-               end do
-            end do
-         endif
-
-c---------------------------------------------------------------------
-c     fill the buffer to be sent to top neighbors (k-dir)
-c---------------------------------------------------------------------
-         if (cell_coord(3,c) .ne. ncells) then
-            do   k = cell_size(3,c)-2, cell_size(3,c)-1
-               do   j = 0, cell_size(2,c)-1
-                  do   i = 0, cell_size(1,c)-1
-                     do   m = 1, 5
-                        out_buffer(ss(4)+p4) = u(m,i,j,k,c)
-                        p4 = p4 + 1
-                     end do
-                  end do
-               end do
-            end do
-         endif
-
-c---------------------------------------------------------------------
-c     fill the buffer to be sent to bottom neighbors
-c---------------------------------------------------------------------
-         if (cell_coord(3,c).ne. 1) then
-            do    k=0, 1
-               do   j = 0, cell_size(2,c)-1
-                  do   i = 0, cell_size(1,c)-1
-                     do   m = 1, 5
-                        out_buffer(ss(5)+p5) = u(m,i,j,k,c)
-                        p5 = p5 + 1
-                     end do
-                  end do
-               end do
-            end do
-         endif
-
-c---------------------------------------------------------------------
-c     cell loop
-c---------------------------------------------------------------------
-      end do
-
-      call mpi_irecv(in_buffer(sr(0)), b_size(0), 
-     >     dp_type, successor(1), WEST,  
-     >     comm_rhs, requests(0), error)
-      call mpi_irecv(in_buffer(sr(1)), b_size(1), 
-     >     dp_type, predecessor(1), EAST,  
-     >     comm_rhs, requests(1), error)
-      call mpi_irecv(in_buffer(sr(2)), b_size(2), 
-     >     dp_type, successor(2), SOUTH, 
-     >     comm_rhs, requests(2), error)
-      call mpi_irecv(in_buffer(sr(3)), b_size(3), 
-     >     dp_type, predecessor(2), NORTH, 
-     >     comm_rhs, requests(3), error)
-      call mpi_irecv(in_buffer(sr(4)), b_size(4), 
-     >     dp_type, successor(3), BOTTOM,
-     >     comm_rhs, requests(4), error)
-      call mpi_irecv(in_buffer(sr(5)), b_size(5), 
-     >     dp_type, predecessor(3), TOP,   
-     >     comm_rhs, requests(5), error)
-
-      call mpi_isend(out_buffer(ss(0)), b_size(0), 
-     >     dp_type, successor(1),   EAST, 
-     >     comm_rhs, requests(6), error)
-      call mpi_isend(out_buffer(ss(1)), b_size(1), 
-     >     dp_type, predecessor(1), WEST, 
-     >     comm_rhs, requests(7), error)
-      call mpi_isend(out_buffer(ss(2)), b_size(2), 
-     >     dp_type,successor(2),   NORTH, 
-     >     comm_rhs, requests(8), error)
-      call mpi_isend(out_buffer(ss(3)), b_size(3), 
-     >     dp_type,predecessor(2), SOUTH, 
-     >     comm_rhs, requests(9), error)
-      call mpi_isend(out_buffer(ss(4)), b_size(4), 
-     >     dp_type,successor(3),   TOP, 
-     >     comm_rhs,   requests(10), error)
-      call mpi_isend(out_buffer(ss(5)), b_size(5), 
-     >     dp_type,predecessor(3), BOTTOM, 
-     >     comm_rhs,requests(11), error)
-
-
-      call mpi_waitall(12, requests, statuses, error)
-
-c---------------------------------------------------------------------
-c     unpack the data that has just been received;             
-c---------------------------------------------------------------------
-      p0 = 0
-      p1 = 0
-      p2 = 0
-      p3 = 0
-      p4 = 0
-      p5 = 0
-
-      do   c = 1, ncells
-
-         if (cell_coord(1,c) .ne. 1) then
-            do   k = 0, cell_size(3,c)-1
-               do   j = 0, cell_size(2,c)-1
-                  do   i = -2, -1
-                     do   m = 1, 5
-                        u(m,i,j,k,c) = in_buffer(sr(1)+p0)
-                        p0 = p0 + 1
-                     end do
-                  end do
-               end do
-            end do
-         endif
-
-         if (cell_coord(1,c) .ne. ncells) then
-            do  k = 0, cell_size(3,c)-1
-               do  j = 0, cell_size(2,c)-1
-                  do  i = cell_size(1,c), cell_size(1,c)+1
-                     do   m = 1, 5
-                        u(m,i,j,k,c) = in_buffer(sr(0)+p1)
-                        p1 = p1 + 1
-                     end do
-                  end do
-               end do
-            end do
-         end if
-            
-         if (cell_coord(2,c) .ne. 1) then
-            do  k = 0, cell_size(3,c)-1
-               do   j = -2, -1
-                  do  i = 0, cell_size(1,c)-1
-                     do   m = 1, 5
-                        u(m,i,j,k,c) = in_buffer(sr(3)+p2)
-                        p2 = p2 + 1
-                     end do
-                  end do
-               end do
-            end do
-
-         endif
-            
-         if (cell_coord(2,c) .ne. ncells) then
-            do  k = 0, cell_size(3,c)-1
-               do   j = cell_size(2,c), cell_size(2,c)+1
-                  do  i = 0, cell_size(1,c)-1
-                     do   m = 1, 5
-                        u(m,i,j,k,c) = in_buffer(sr(2)+p3)
-                        p3 = p3 + 1
-                     end do
-                  end do
-               end do
-            end do
-         endif
-
-         if (cell_coord(3,c) .ne. 1) then
-            do  k = -2, -1
-               do  j = 0, cell_size(2,c)-1
-                  do  i = 0, cell_size(1,c)-1
-                     do   m = 1, 5
-                        u(m,i,j,k,c) = in_buffer(sr(5)+p4)
-                        p4 = p4 + 1
-                     end do
-                  end do
-               end do
-            end do
-         endif
-
-         if (cell_coord(3,c) .ne. ncells) then
-            do  k = cell_size(3,c), cell_size(3,c)+1
-               do  j = 0, cell_size(2,c)-1
-                  do  i = 0, cell_size(1,c)-1
-                     do   m = 1, 5
-                        u(m,i,j,k,c) = in_buffer(sr(4)+p5)
-                        p5 = p5 + 1
-                     end do
-                  end do
-               end do
-            end do
-         endif
-
-c---------------------------------------------------------------------
-c     cells loop
-c---------------------------------------------------------------------
-      end do
-
-c---------------------------------------------------------------------
-c     do the rest of the rhs that uses the copied face values          
-c---------------------------------------------------------------------
-      call compute_rhs
-
-      return
-      end
diff --git a/examples/smpi/NAS/BT/define.f b/examples/smpi/NAS/BT/define.f
deleted file mode 100644 (file)
index 03c4c6e..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine compute_buffer_size(dim)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      include 'header.h'
-
-      integer  c, dim, face_size
-
-      if (ncells .eq. 1) return
-
-c---------------------------------------------------------------------
-c     compute the actual sizes of the buffers; note that there is 
-c     always one cell face that doesn't need buffer space, because it 
-c     is at the boundary of the grid
-c---------------------------------------------------------------------
-      west_size = 0
-      east_size = 0
-
-      do   c = 1, ncells
-         face_size = cell_size(2,c) * cell_size(3,c) * dim * 2
-         if (cell_coord(1,c).ne.1) west_size = west_size + face_size
-         if (cell_coord(1,c).ne.ncells) east_size = east_size + 
-     >        face_size 
-      end do
-
-      north_size = 0
-      south_size = 0
-      do   c = 1, ncells
-         face_size = cell_size(1,c)*cell_size(3,c) * dim * 2
-         if (cell_coord(2,c).ne.1) south_size = south_size + face_size
-         if (cell_coord(2,c).ne.ncells) north_size = north_size + 
-     >        face_size 
-      end do
-
-      top_size = 0
-      bottom_size = 0
-      do   c = 1, ncells
-         face_size = cell_size(1,c) * cell_size(2,c) * dim * 2
-         if (cell_coord(3,c).ne.1) bottom_size = bottom_size + 
-     >        face_size
-         if (cell_coord(3,c).ne.ncells) top_size = top_size +
-     >        face_size     
-      end do
-
-      start_send_west   = 1
-      start_send_east   = start_send_west   + west_size
-      start_send_south  = start_send_east   + east_size
-      start_send_north  = start_send_south  + south_size
-      start_send_bottom = start_send_north  + north_size
-      start_send_top    = start_send_bottom + bottom_size
-      start_recv_west   = 1
-      start_recv_east   = start_recv_west   + west_size
-      start_recv_south  = start_recv_east   + east_size
-      start_recv_north  = start_recv_south  + south_size
-      start_recv_bottom = start_recv_north  + north_size
-      start_recv_top    = start_recv_bottom + bottom_size
-
-      return
-      end
-
diff --git a/examples/smpi/NAS/BT/epio.f b/examples/smpi/NAS/BT/epio.f
deleted file mode 100644 (file)
index 52b6309..0000000
+++ /dev/null
@@ -1,165 +0,0 @@
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine setup_btio
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      character*(128) newfilenm
-      integer m
-
-      if (node .lt. 10000) then
-          write (newfilenm, 996) filenm,node
-      else
-          print *, 'error generating file names (> 10000 nodes)'
-          stop
-      endif
-
-996   format (a,'.',i4.4)
-
-      open (unit=99, file=newfilenm, form='unformatted',
-     $       status='unknown')
-
-      do m = 1, 5
-         xce_sub(m) = 0.d0
-      end do
-
-      idump_sub = 0
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine output_timestep
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer ix, iio, jio, kio, cio, aio
-
-      do cio=1,ncells
-          write(99)
-     $         ((((u(aio,ix, jio,kio,cio),aio=1,5),
-     $             ix=0, cell_size(1,cio)-1),
-     $             jio=0, cell_size(2,cio)-1),
-     $             kio=0, cell_size(3,cio)-1)
-      enddo
-
-      idump_sub = idump_sub + 1
-      if (rd_interval .gt. 0) then
-         if (idump_sub .ge. rd_interval) then
-
-            rewind(99)
-            call acc_sub_norms(idump+1)
-
-            rewind(99)
-            idump_sub = 0
-         endif
-      endif
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine acc_sub_norms(idump_cur)
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer idump_cur
-
-      integer ix, jio, kio, cio, ii, m, ichunk
-      double precision xce_single(5)
-
-      ichunk = idump_cur - idump_sub + 1
-      do ii=0, idump_sub-1
-        do cio=1,ncells
-          read(99)
-     $         ((((u(m,ix, jio,kio,cio),m=1,5),
-     $             ix=0, cell_size(1,cio)-1),
-     $             jio=0, cell_size(2,cio)-1),
-     $             kio=0, cell_size(3,cio)-1)
-        enddo
-
-        if (node .eq. root) print *, 'Reading data set ', ii+ichunk
-
-        call error_norm(xce_single)
-        do m = 1, 5
-           xce_sub(m) = xce_sub(m) + xce_single(m)
-        end do
-      enddo
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine btio_cleanup
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      close(unit=99)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine accumulate_norms(xce_acc)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      double precision xce_acc(5)
-
-      character*(128) newfilenm
-      integer m
-
-      if (rd_interval .gt. 0) goto 20
-
-      if (node .lt. 10000) then
-          write (newfilenm, 996) filenm,node
-      else
-          print *, 'error generating file names (> 10000 nodes)'
-          stop
-      endif
-
-996   format (a,'.',i4.4)
-
-      open (unit=99, file=newfilenm,
-     $      form='unformatted')
-
-c     clear the last time step
-
-      call clear_timestep
-
-c     read back the time steps and accumulate norms
-
-      call acc_sub_norms(idump)
-
-      close(unit=99)
-
- 20   continue
-      do m = 1, 5
-         xce_acc(m) = xce_sub(m) / dble(idump)
-      end do
-
-      return
-      end
diff --git a/examples/smpi/NAS/BT/error.f b/examples/smpi/NAS/BT/error.f
deleted file mode 100644 (file)
index 147a582..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine error_norm(rms)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     this function computes the norm of the difference between the
-c     computed solution and the exact solution
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer c, i, j, k, m, ii, jj, kk, d, error
-      double precision xi, eta, zeta, u_exact(5), rms(5), rms_work(5),
-     >     add
-
-      do m = 1, 5 
-         rms_work(m) = 0.0d0
-      enddo
-
-      do c = 1, ncells
-         kk = 0
-         do k = cell_low(3,c), cell_high(3,c)
-            zeta = dble(k) * dnzm1
-            jj = 0
-            do j = cell_low(2,c), cell_high(2,c)
-               eta = dble(j) * dnym1
-               ii = 0
-               do i = cell_low(1,c), cell_high(1,c)
-                  xi = dble(i) * dnxm1
-                  call exact_solution(xi, eta, zeta, u_exact)
-
-                  do m = 1, 5
-                     add = u(m,ii,jj,kk,c)-u_exact(m)
-                     rms_work(m) = rms_work(m) + add*add
-                  enddo
-                  ii = ii + 1
-               enddo
-               jj = jj + 1
-            enddo
-            kk = kk + 1
-         enddo
-      enddo
-
-      call mpi_allreduce(rms_work, rms, 5, dp_type, 
-     >     MPI_SUM, comm_setup, error)
-
-      do m = 1, 5
-         do d = 1, 3
-            rms(m) = rms(m) / dble(grid_points(d)-2)
-         enddo
-         rms(m) = dsqrt(rms(m))
-      enddo
-
-      return
-      end
-
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine rhs_norm(rms)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer c, i, j, k, d, m, error
-      double precision rms(5), rms_work(5), add
-
-      do m = 1, 5
-         rms_work(m) = 0.0d0
-      enddo 
-
-      do c = 1, ncells
-         do k = start(3,c), cell_size(3,c)-end(3,c)-1
-            do j = start(2,c), cell_size(2,c)-end(2,c)-1
-               do i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  do m = 1, 5
-                     add = rhs(m,i,j,k,c)
-                     rms_work(m) = rms_work(m) + add*add
-                  enddo 
-               enddo 
-            enddo 
-         enddo 
-      enddo 
-
-      call mpi_allreduce(rms_work, rms, 5, dp_type, 
-     >     MPI_SUM, comm_setup, error)
-
-      do m = 1, 5
-         do d = 1, 3
-            rms(m) = rms(m) / dble(grid_points(d)-2)
-         enddo 
-         rms(m) = dsqrt(rms(m))
-      enddo 
-
-      return
-      end
-
diff --git a/examples/smpi/NAS/BT/exact_rhs.f b/examples/smpi/NAS/BT/exact_rhs.f
deleted file mode 100644 (file)
index 26a2871..0000000
+++ /dev/null
@@ -1,360 +0,0 @@
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine exact_rhs
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     compute the right hand side based on exact solution
-c---------------------------------------------------------------------
-
-      include 'header.h'
-
-      double precision dtemp(5), xi, eta, zeta, dtpp
-      integer          c, m, i, j, k, ip1, im1, jp1, 
-     >     jm1, km1, kp1
-
-
-c---------------------------------------------------------------------
-c     loop over all cells owned by this node                   
-c---------------------------------------------------------------------
-      do c = 1, ncells
-
-c---------------------------------------------------------------------
-c     initialize                                  
-c---------------------------------------------------------------------
-         do k= 0, cell_size(3,c)-1
-            do j = 0, cell_size(2,c)-1
-               do i = 0, cell_size(1,c)-1
-                  do m = 1, 5
-                     forcing(m,i,j,k,c) = 0.0d0
-                  enddo
-               enddo
-            enddo
-         enddo
-
-c---------------------------------------------------------------------
-c     xi-direction flux differences                      
-c---------------------------------------------------------------------
-         do k = start(3,c), cell_size(3,c)-end(3,c)-1
-            zeta = dble(k+cell_low(3,c)) * dnzm1
-            do j = start(2,c), cell_size(2,c)-end(2,c)-1
-               eta = dble(j+cell_low(2,c)) * dnym1
-
-               do i=-2*(1-start(1,c)), cell_size(1,c)+1-2*end(1,c)
-                  xi = dble(i+cell_low(1,c)) * dnxm1
-
-                  call exact_solution(xi, eta, zeta, dtemp)
-                  do m = 1, 5
-                     ue(i,m) = dtemp(m)
-                  enddo
-
-                  dtpp = 1.0d0 / dtemp(1)
-
-                  do m = 2, 5
-                     buf(i,m) = dtpp * dtemp(m)
-                  enddo
-
-                  cuf(i)   = buf(i,2) * buf(i,2)
-                  buf(i,1) = cuf(i) + buf(i,3) * buf(i,3) + 
-     >                 buf(i,4) * buf(i,4) 
-                  q(i) = 0.5d0*(buf(i,2)*ue(i,2) + buf(i,3)*ue(i,3) +
-     >                 buf(i,4)*ue(i,4))
-
-               enddo
-               
-               do i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  im1 = i-1
-                  ip1 = i+1
-
-                  forcing(1,i,j,k,c) = forcing(1,i,j,k,c) -
-     >                 tx2*( ue(ip1,2)-ue(im1,2) )+
-     >                 dx1tx1*(ue(ip1,1)-2.0d0*ue(i,1)+ue(im1,1))
-
-                  forcing(2,i,j,k,c) = forcing(2,i,j,k,c) - tx2 * (
-     >                 (ue(ip1,2)*buf(ip1,2)+c2*(ue(ip1,5)-q(ip1)))-
-     >                 (ue(im1,2)*buf(im1,2)+c2*(ue(im1,5)-q(im1))))+
-     >                 xxcon1*(buf(ip1,2)-2.0d0*buf(i,2)+buf(im1,2))+
-     >                 dx2tx1*( ue(ip1,2)-2.0d0* ue(i,2)+ue(im1,2))
-
-                  forcing(3,i,j,k,c) = forcing(3,i,j,k,c) - tx2 * (
-     >                 ue(ip1,3)*buf(ip1,2)-ue(im1,3)*buf(im1,2))+
-     >                 xxcon2*(buf(ip1,3)-2.0d0*buf(i,3)+buf(im1,3))+
-     >                 dx3tx1*( ue(ip1,3)-2.0d0*ue(i,3) +ue(im1,3))
-                  
-                  forcing(4,i,j,k,c) = forcing(4,i,j,k,c) - tx2*(
-     >                 ue(ip1,4)*buf(ip1,2)-ue(im1,4)*buf(im1,2))+
-     >                 xxcon2*(buf(ip1,4)-2.0d0*buf(i,4)+buf(im1,4))+
-     >                 dx4tx1*( ue(ip1,4)-2.0d0* ue(i,4)+ ue(im1,4))
-
-                  forcing(5,i,j,k,c) = forcing(5,i,j,k,c) - tx2*(
-     >                 buf(ip1,2)*(c1*ue(ip1,5)-c2*q(ip1))-
-     >                 buf(im1,2)*(c1*ue(im1,5)-c2*q(im1)))+
-     >                 0.5d0*xxcon3*(buf(ip1,1)-2.0d0*buf(i,1)+
-     >                 buf(im1,1))+
-     >                 xxcon4*(cuf(ip1)-2.0d0*cuf(i)+cuf(im1))+
-     >                 xxcon5*(buf(ip1,5)-2.0d0*buf(i,5)+buf(im1,5))+
-     >                 dx5tx1*( ue(ip1,5)-2.0d0* ue(i,5)+ ue(im1,5))
-               enddo
-
-c---------------------------------------------------------------------
-c     Fourth-order dissipation                         
-c---------------------------------------------------------------------
-               if (start(1,c) .gt. 0) then
-                  do m = 1, 5
-                     i = 1
-                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
-     >                    (5.0d0*ue(i,m) - 4.0d0*ue(i+1,m) +ue(i+2,m))
-                     i = 2
-                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
-     >                    (-4.0d0*ue(i-1,m) + 6.0d0*ue(i,m) -
-     >                    4.0d0*ue(i+1,m) +       ue(i+2,m))
-                  enddo
-               endif
-
-               do i = start(1,c)*3, cell_size(1,c)-3*end(1,c)-1
-                  do m = 1, 5
-                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp*
-     >                    (ue(i-2,m) - 4.0d0*ue(i-1,m) +
-     >                    6.0d0*ue(i,m) - 4.0d0*ue(i+1,m) + ue(i+2,m))
-                  enddo
-               enddo
-
-               if (end(1,c) .gt. 0) then
-                  do m = 1, 5
-                     i = cell_size(1,c)-3
-                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
-     >                    (ue(i-2,m) - 4.0d0*ue(i-1,m) +
-     >                    6.0d0*ue(i,m) - 4.0d0*ue(i+1,m))
-                     i = cell_size(1,c)-2
-                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
-     >                    (ue(i-2,m) - 4.0d0*ue(i-1,m) + 5.0d0*ue(i,m))
-                  enddo
-               endif
-
-            enddo
-         enddo
-
-c---------------------------------------------------------------------
-c     eta-direction flux differences             
-c---------------------------------------------------------------------
-         do k = start(3,c), cell_size(3,c)-end(3,c)-1          
-            zeta = dble(k+cell_low(3,c)) * dnzm1
-            do i=start(1,c), cell_size(1,c)-end(1,c)-1
-               xi = dble(i+cell_low(1,c)) * dnxm1
-
-               do j=-2*(1-start(2,c)), cell_size(2,c)+1-2*end(2,c)
-                  eta = dble(j+cell_low(2,c)) * dnym1
-
-                  call exact_solution(xi, eta, zeta, dtemp)
-                  do m = 1, 5 
-                     ue(j,m) = dtemp(m)
-                  enddo
-                  
-                  dtpp = 1.0d0/dtemp(1)
-
-                  do m = 2, 5
-                     buf(j,m) = dtpp * dtemp(m)
-                  enddo
-
-                  cuf(j)   = buf(j,3) * buf(j,3)
-                  buf(j,1) = cuf(j) + buf(j,2) * buf(j,2) + 
-     >                 buf(j,4) * buf(j,4)
-                  q(j) = 0.5d0*(buf(j,2)*ue(j,2) + buf(j,3)*ue(j,3) +
-     >                 buf(j,4)*ue(j,4))
-               enddo
-
-               do j = start(2,c), cell_size(2,c)-end(2,c)-1
-                  jm1 = j-1
-                  jp1 = j+1
-                  
-                  forcing(1,i,j,k,c) = forcing(1,i,j,k,c) -
-     >                 ty2*( ue(jp1,3)-ue(jm1,3) )+
-     >                 dy1ty1*(ue(jp1,1)-2.0d0*ue(j,1)+ue(jm1,1))
-
-                  forcing(2,i,j,k,c) = forcing(2,i,j,k,c) - ty2*(
-     >                 ue(jp1,2)*buf(jp1,3)-ue(jm1,2)*buf(jm1,3))+
-     >                 yycon2*(buf(jp1,2)-2.0d0*buf(j,2)+buf(jm1,2))+
-     >                 dy2ty1*( ue(jp1,2)-2.0* ue(j,2)+ ue(jm1,2))
-
-                  forcing(3,i,j,k,c) = forcing(3,i,j,k,c) - ty2*(
-     >                 (ue(jp1,3)*buf(jp1,3)+c2*(ue(jp1,5)-q(jp1)))-
-     >                 (ue(jm1,3)*buf(jm1,3)+c2*(ue(jm1,5)-q(jm1))))+
-     >                 yycon1*(buf(jp1,3)-2.0d0*buf(j,3)+buf(jm1,3))+
-     >                 dy3ty1*( ue(jp1,3)-2.0d0*ue(j,3) +ue(jm1,3))
-
-                  forcing(4,i,j,k,c) = forcing(4,i,j,k,c) - ty2*(
-     >                 ue(jp1,4)*buf(jp1,3)-ue(jm1,4)*buf(jm1,3))+
-     >                 yycon2*(buf(jp1,4)-2.0d0*buf(j,4)+buf(jm1,4))+
-     >                 dy4ty1*( ue(jp1,4)-2.0d0*ue(j,4)+ ue(jm1,4))
-
-                  forcing(5,i,j,k,c) = forcing(5,i,j,k,c) - ty2*(
-     >                 buf(jp1,3)*(c1*ue(jp1,5)-c2*q(jp1))-
-     >                 buf(jm1,3)*(c1*ue(jm1,5)-c2*q(jm1)))+
-     >                 0.5d0*yycon3*(buf(jp1,1)-2.0d0*buf(j,1)+
-     >                 buf(jm1,1))+
-     >                 yycon4*(cuf(jp1)-2.0d0*cuf(j)+cuf(jm1))+
-     >                 yycon5*(buf(jp1,5)-2.0d0*buf(j,5)+buf(jm1,5))+
-     >                 dy5ty1*(ue(jp1,5)-2.0d0*ue(j,5)+ue(jm1,5))
-               enddo
-
-c---------------------------------------------------------------------
-c     Fourth-order dissipation                      
-c---------------------------------------------------------------------
-               if (start(2,c) .gt. 0) then
-                  do m = 1, 5
-                     j = 1
-                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
-     >                    (5.0d0*ue(j,m) - 4.0d0*ue(j+1,m) +ue(j+2,m))
-                     j = 2
-                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
-     >                    (-4.0d0*ue(j-1,m) + 6.0d0*ue(j,m) -
-     >                    4.0d0*ue(j+1,m) +       ue(j+2,m))
-                  enddo
-               endif
-
-               do j = start(2,c)*3, cell_size(2,c)-3*end(2,c)-1
-                  do m = 1, 5
-                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp*
-     >                    (ue(j-2,m) - 4.0d0*ue(j-1,m) +
-     >                    6.0d0*ue(j,m) - 4.0d0*ue(j+1,m) + ue(j+2,m))
-                  enddo
-               enddo
-
-               if (end(2,c) .gt. 0) then
-                  do m = 1, 5
-                     j = cell_size(2,c)-3
-                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
-     >                    (ue(j-2,m) - 4.0d0*ue(j-1,m) +
-     >                    6.0d0*ue(j,m) - 4.0d0*ue(j+1,m))
-                     j = cell_size(2,c)-2
-                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
-     >                    (ue(j-2,m) - 4.0d0*ue(j-1,m) + 5.0d0*ue(j,m))
-
-                  enddo
-               endif
-
-            enddo
-         enddo
-
-c---------------------------------------------------------------------
-c     zeta-direction flux differences                      
-c---------------------------------------------------------------------
-         do j=start(2,c), cell_size(2,c)-end(2,c)-1
-            eta = dble(j+cell_low(2,c)) * dnym1
-            do i = start(1,c), cell_size(1,c)-end(1,c)-1
-               xi = dble(i+cell_low(1,c)) * dnxm1
-
-               do k=-2*(1-start(3,c)), cell_size(3,c)+1-2*end(3,c)
-                  zeta = dble(k+cell_low(3,c)) * dnzm1
-
-                  call exact_solution(xi, eta, zeta, dtemp)
-                  do m = 1, 5
-                     ue(k,m) = dtemp(m)
-                  enddo
-
-                  dtpp = 1.0d0/dtemp(1)
-
-                  do m = 2, 5
-                     buf(k,m) = dtpp * dtemp(m)
-                  enddo
-
-                  cuf(k)   = buf(k,4) * buf(k,4)
-                  buf(k,1) = cuf(k) + buf(k,2) * buf(k,2) + 
-     >                 buf(k,3) * buf(k,3)
-                  q(k) = 0.5d0*(buf(k,2)*ue(k,2) + buf(k,3)*ue(k,3) +
-     >                 buf(k,4)*ue(k,4))
-               enddo
-
-               do k=start(3,c), cell_size(3,c)-end(3,c)-1
-                  km1 = k-1
-                  kp1 = k+1
-                  
-                  forcing(1,i,j,k,c) = forcing(1,i,j,k,c) -
-     >                 tz2*( ue(kp1,4)-ue(km1,4) )+
-     >                 dz1tz1*(ue(kp1,1)-2.0d0*ue(k,1)+ue(km1,1))
-
-                  forcing(2,i,j,k,c) = forcing(2,i,j,k,c) - tz2 * (
-     >                 ue(kp1,2)*buf(kp1,4)-ue(km1,2)*buf(km1,4))+
-     >                 zzcon2*(buf(kp1,2)-2.0d0*buf(k,2)+buf(km1,2))+
-     >                 dz2tz1*( ue(kp1,2)-2.0d0* ue(k,2)+ ue(km1,2))
-
-                  forcing(3,i,j,k,c) = forcing(3,i,j,k,c) - tz2 * (
-     >                 ue(kp1,3)*buf(kp1,4)-ue(km1,3)*buf(km1,4))+
-     >                 zzcon2*(buf(kp1,3)-2.0d0*buf(k,3)+buf(km1,3))+
-     >                 dz3tz1*(ue(kp1,3)-2.0d0*ue(k,3)+ue(km1,3))
-
-                  forcing(4,i,j,k,c) = forcing(4,i,j,k,c) - tz2 * (
-     >                 (ue(kp1,4)*buf(kp1,4)+c2*(ue(kp1,5)-q(kp1)))-
-     >                 (ue(km1,4)*buf(km1,4)+c2*(ue(km1,5)-q(km1))))+
-     >                 zzcon1*(buf(kp1,4)-2.0d0*buf(k,4)+buf(km1,4))+
-     >                 dz4tz1*( ue(kp1,4)-2.0d0*ue(k,4) +ue(km1,4))
-
-                  forcing(5,i,j,k,c) = forcing(5,i,j,k,c) - tz2 * (
-     >                 buf(kp1,4)*(c1*ue(kp1,5)-c2*q(kp1))-
-     >                 buf(km1,4)*(c1*ue(km1,5)-c2*q(km1)))+
-     >                 0.5d0*zzcon3*(buf(kp1,1)-2.0d0*buf(k,1)
-     >                 +buf(km1,1))+
-     >                 zzcon4*(cuf(kp1)-2.0d0*cuf(k)+cuf(km1))+
-     >                 zzcon5*(buf(kp1,5)-2.0d0*buf(k,5)+buf(km1,5))+
-     >                 dz5tz1*( ue(kp1,5)-2.0d0*ue(k,5)+ ue(km1,5))
-               enddo
-
-c---------------------------------------------------------------------
-c     Fourth-order dissipation                        
-c---------------------------------------------------------------------
-               if (start(3,c) .gt. 0) then
-                  do m = 1, 5
-                     k = 1
-                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
-     >                    (5.0d0*ue(k,m) - 4.0d0*ue(k+1,m) +ue(k+2,m))
-                     k = 2
-                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
-     >                    (-4.0d0*ue(k-1,m) + 6.0d0*ue(k,m) -
-     >                    4.0d0*ue(k+1,m) +       ue(k+2,m))
-                  enddo
-               endif
-
-               do k = start(3,c)*3, cell_size(3,c)-3*end(3,c)-1
-                  do m = 1, 5
-                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp*
-     >                    (ue(k-2,m) - 4.0d0*ue(k-1,m) +
-     >                    6.0d0*ue(k,m) - 4.0d0*ue(k+1,m) + ue(k+2,m))
-                  enddo
-               enddo
-
-               if (end(3,c) .gt. 0) then
-                  do m = 1, 5
-                     k = cell_size(3,c)-3
-                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
-     >                    (ue(k-2,m) - 4.0d0*ue(k-1,m) +
-     >                    6.0d0*ue(k,m) - 4.0d0*ue(k+1,m))
-                     k = cell_size(3,c)-2
-                     forcing(m,i,j,k,c) = forcing(m,i,j,k,c) - dssp *
-     >                    (ue(k-2,m) - 4.0d0*ue(k-1,m) + 5.0d0*ue(k,m))
-                  enddo
-               endif
-
-            enddo
-         enddo
-
-c---------------------------------------------------------------------
-c     now change the sign of the forcing function, 
-c---------------------------------------------------------------------
-         do k = start(3,c), cell_size(3,c)-end(3,c)-1
-            do j = start(2,c), cell_size(2,c)-end(2,c)-1
-               do i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  do m = 1, 5
-                     forcing(m,i,j,k,c) = -1.d0 * forcing(m,i,j,k,c)
-                  enddo
-               enddo
-            enddo
-         enddo
-
-      enddo
-
-      return
-      end
diff --git a/examples/smpi/NAS/BT/exact_solution.f b/examples/smpi/NAS/BT/exact_solution.f
deleted file mode 100644 (file)
index b093b46..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine exact_solution(xi,eta,zeta,dtemp)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     this function returns the exact solution at point xi, eta, zeta  
-c---------------------------------------------------------------------
-
-      include 'header.h'
-
-      double precision  xi, eta, zeta, dtemp(5)
-      integer m
-
-      do m = 1, 5
-         dtemp(m) =  ce(m,1) +
-     >     xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
-     >     eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
-     >     zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) + 
-     >     zeta*ce(m,13))))
-      enddo
-
-      return
-      end
-
-
diff --git a/examples/smpi/NAS/BT/fortran_io.f b/examples/smpi/NAS/BT/fortran_io.f
deleted file mode 100644 (file)
index d3085a0..0000000
+++ /dev/null
@@ -1,174 +0,0 @@
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine setup_btio
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      character*(128) newfilenm
-      integer m, ierr
-
-      if (node.eq.root) record_length = 40/fortran_rec_sz
-      call mpi_bcast(record_length, 1, MPI_INTEGER,
-     >                root, comm_setup, ierr)
-
-      open (unit=99, file=filenm,
-     $      form='unformatted', access='direct',
-     $      recl=record_length)
-
-      do m = 1, 5
-         xce_sub(m) = 0.d0
-      end do
-
-      idump_sub = 0
-
-      return
-      end
-
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine output_timestep
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer ix, jio, kio, cio
-
-      do cio=1,ncells
-          do kio=0, cell_size(3,cio)-1
-              do jio=0, cell_size(2,cio)-1
-                  iseek=(cell_low(1,cio) +
-     $                   PROBLEM_SIZE*((cell_low(2,cio)+jio) +
-     $                   PROBLEM_SIZE*((cell_low(3,cio)+kio) +
-     $                   PROBLEM_SIZE*idump_sub)))
-
-                  do ix=0,cell_size(1,cio)-1
-                      write(99, rec=iseek+ix+1)
-     $                      u(1,ix, jio,kio,cio),
-     $                      u(2,ix, jio,kio,cio),
-     $                      u(3,ix, jio,kio,cio),
-     $                      u(4,ix, jio,kio,cio),
-     $                      u(5,ix, jio,kio,cio)
-                  enddo
-              enddo
-          enddo
-      enddo
-
-      idump_sub = idump_sub + 1
-      if (rd_interval .gt. 0) then
-         if (idump_sub .ge. rd_interval) then
-
-            call acc_sub_norms(idump+1)
-
-            idump_sub = 0
-         endif
-      endif
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine acc_sub_norms(idump_cur)
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer idump_cur
-
-      integer ix, jio, kio, cio, ii, m, ichunk
-      double precision xce_single(5)
-
-      ichunk = idump_cur - idump_sub + 1
-      do ii=0, idump_sub-1
-        do cio=1,ncells
-          do kio=0, cell_size(3,cio)-1
-              do jio=0, cell_size(2,cio)-1
-                  iseek=(cell_low(1,cio) +
-     $                   PROBLEM_SIZE*((cell_low(2,cio)+jio) +
-     $                   PROBLEM_SIZE*((cell_low(3,cio)+kio) +
-     $                   PROBLEM_SIZE*ii)))
-
-
-                  do ix=0,cell_size(1,cio)-1
-                      read(99, rec=iseek+ix+1)
-     $                      u(1,ix, jio,kio,cio),
-     $                      u(2,ix, jio,kio,cio),
-     $                      u(3,ix, jio,kio,cio),
-     $                      u(4,ix, jio,kio,cio),
-     $                      u(5,ix, jio,kio,cio)
-                  enddo
-              enddo
-          enddo
-        enddo
-
-        if (node .eq. root) print *, 'Reading data set ', ii+ichunk
-
-        call error_norm(xce_single)
-        do m = 1, 5
-           xce_sub(m) = xce_sub(m) + xce_single(m)
-        end do
-      enddo
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine btio_cleanup
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      close(unit=99)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine accumulate_norms(xce_acc)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      include 'header.h'
-      include 'mpinpb.h'
-
-      double precision xce_acc(5)
-      integer m
-
-      if (rd_interval .gt. 0) goto 20
-
-      open (unit=99, file=filenm,
-     $      form='unformatted', access='direct',
-     $      recl=record_length)
-
-c     clear the last time step
-
-      call clear_timestep
-
-c     read back the time steps and accumulate norms
-
-      call acc_sub_norms(idump)
-
-      close(unit=99)
-
- 20   continue
-      do m = 1, 5
-         xce_acc(m) = xce_sub(m) / dble(idump)
-      end do
-
-      return
-      end
diff --git a/examples/smpi/NAS/BT/full_mpiio.f b/examples/smpi/NAS/BT/full_mpiio.f
deleted file mode 100644 (file)
index ecfd41c..0000000
+++ /dev/null
@@ -1,307 +0,0 @@
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine setup_btio
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer ierr
-      integer mstatus(MPI_STATUS_SIZE)
-      integer sizes(4), starts(4), subsizes(4)
-      integer cell_btype(maxcells), cell_ftype(maxcells)
-      integer cell_blength(maxcells)
-      integer info
-      character*20 cb_nodes, cb_size
-      integer c, m
-      integer cell_disp(maxcells)
-
-       call mpi_bcast(collbuf_nodes, 1, MPI_INTEGER,
-     >                root, comm_setup, ierr)
-
-       call mpi_bcast(collbuf_size, 1, MPI_INTEGER,
-     >                root, comm_setup, ierr)
-
-       if (collbuf_nodes .eq. 0) then
-          info = MPI_INFO_NULL
-       else
-          write (cb_nodes,*) collbuf_nodes
-          write (cb_size,*) collbuf_size
-          call MPI_Info_create(info, ierr)
-          call MPI_Info_set(info, 'cb_nodes', cb_nodes, ierr)
-          call MPI_Info_set(info, 'cb_buffer_size', cb_size, ierr)
-          call MPI_Info_set(info, 'collective_buffering', 'true', ierr)
-       endif
-
-       call MPI_Type_contiguous(5, MPI_DOUBLE_PRECISION,
-     $                          element, ierr)
-       call MPI_Type_commit(element, ierr)
-       call MPI_Type_extent(element, eltext, ierr)
-
-       do  c = 1, ncells
-c
-c Outer array dimensions ar same for every cell
-c
-           sizes(1) = IMAX+4
-           sizes(2) = JMAX+4
-           sizes(3) = KMAX+4
-c
-c 4th dimension is cell number, total of maxcells cells
-c
-           sizes(4) = maxcells
-c
-c Internal dimensions of cells can differ slightly between cells
-c
-           subsizes(1) = cell_size(1, c)
-           subsizes(2) = cell_size(2, c)
-           subsizes(3) = cell_size(3, c)
-c
-c Cell is 4th dimension, 1 cell per cell type to handle varying 
-c cell sub-array sizes
-c
-           subsizes(4) = 1
-
-c
-c type constructors use 0-based start addresses
-c
-           starts(1) = 2 
-           starts(2) = 2
-           starts(3) = 2
-           starts(4) = c-1
-
-c 
-c Create buftype for a cell
-c
-           call MPI_Type_create_subarray(4, sizes, subsizes, 
-     $          starts, MPI_ORDER_FORTRAN, element, 
-     $          cell_btype(c), ierr)
-c
-c block length and displacement for joining cells - 
-c 1 cell buftype per block, cell buftypes have own displacment
-c generated from cell number (4th array dimension)
-c
-           cell_blength(c) = 1
-           cell_disp(c) = 0
-
-       enddo
-c
-c Create combined buftype for all cells
-c
-       call MPI_Type_struct(ncells, cell_blength, cell_disp,
-     $            cell_btype, combined_btype, ierr)
-       call MPI_Type_commit(combined_btype, ierr)
-
-       do  c = 1, ncells
-c
-c Entire array size
-c
-           sizes(1) = PROBLEM_SIZE
-           sizes(2) = PROBLEM_SIZE
-           sizes(3) = PROBLEM_SIZE
-
-c
-c Size of c'th cell
-c
-           subsizes(1) = cell_size(1, c)
-           subsizes(2) = cell_size(2, c)
-           subsizes(3) = cell_size(3, c)
-
-c
-c Starting point in full array of c'th cell
-c
-           starts(1) = cell_low(1,c)
-           starts(2) = cell_low(2,c)
-           starts(3) = cell_low(3,c)
-
-           call MPI_Type_create_subarray(3, sizes, subsizes,
-     $          starts, MPI_ORDER_FORTRAN,
-     $          element, cell_ftype(c), ierr)
-           cell_blength(c) = 1
-           cell_disp(c) = 0
-       enddo
-
-       call MPI_Type_struct(ncells, cell_blength, cell_disp,
-     $            cell_ftype, combined_ftype, ierr)
-       call MPI_Type_commit(combined_ftype, ierr)
-
-       iseek=0
-       if (node .eq. root) then
-          call MPI_File_delete(filenm, MPI_INFO_NULL, ierr)
-       endif
-
-
-      call MPI_Barrier(comm_solve, ierr)
-
-       call MPI_File_open(comm_solve,
-     $          filenm,
-     $          MPI_MODE_RDWR+MPI_MODE_CREATE,
-     $          MPI_INFO_NULL, fp, ierr)
-
-       if (ierr .ne. MPI_SUCCESS) then
-                print *, 'Error opening file'
-                stop
-       endif
-
-        call MPI_File_set_view(fp, iseek, element, 
-     $          combined_ftype, 'native', info, ierr)
-
-       if (ierr .ne. MPI_SUCCESS) then
-                print *, 'Error setting file view'
-                stop
-       endif
-
-      do m = 1, 5
-         xce_sub(m) = 0.d0
-      end do
-
-      idump_sub = 0
-
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine output_timestep
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer mstatus(MPI_STATUS_SIZE)
-      integer ierr
-
-      call MPI_File_write_at_all(fp, iseek, u,
-     $                           1, combined_btype, mstatus, ierr)
-      if (ierr .ne. MPI_SUCCESS) then
-          print *, 'Error writing to file'
-          stop
-      endif
-
-      call MPI_Type_size(combined_btype, iosize, ierr)
-      iseek = iseek + iosize/eltext
-
-      idump_sub = idump_sub + 1
-      if (rd_interval .gt. 0) then
-         if (idump_sub .ge. rd_interval) then
-
-            iseek = 0
-            call acc_sub_norms(idump+1)
-
-            iseek = 0
-            idump_sub = 0
-         endif
-      endif
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine acc_sub_norms(idump_cur)
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer idump_cur
-
-      integer ii, m, ichunk
-      integer ierr
-      integer mstatus(MPI_STATUS_SIZE)
-      double precision xce_single(5)
-
-      ichunk = idump_cur - idump_sub + 1
-      do ii=0, idump_sub-1
-
-        call MPI_File_read_at_all(fp, iseek, u,
-     $                           1, combined_btype, mstatus, ierr)
-        if (ierr .ne. MPI_SUCCESS) then
-           print *, 'Error reading back file'
-           call MPI_File_close(fp, ierr)
-           stop
-        endif
-
-        call MPI_Type_size(combined_btype, iosize, ierr)
-        iseek = iseek + iosize/eltext
-
-        if (node .eq. root) print *, 'Reading data set ', ii+ichunk
-
-        call error_norm(xce_single)
-        do m = 1, 5
-           xce_sub(m) = xce_sub(m) + xce_single(m)
-        end do
-      enddo
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine btio_cleanup
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer ierr
-
-      call MPI_File_close(fp, ierr)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-
-      subroutine accumulate_norms(xce_acc)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      double precision xce_acc(5)
-      integer m, ierr
-
-      if (rd_interval .gt. 0) goto 20
-
-      call MPI_File_open(comm_solve,
-     $          filenm,
-     $          MPI_MODE_RDONLY,
-     $          MPI_INFO_NULL,
-     $          fp,
-     $          ierr)
-
-      iseek = 0
-      call MPI_File_set_view(fp, iseek, element, combined_ftype,
-     $          'native', MPI_INFO_NULL, ierr)
-
-c     clear the last time step
-
-      call clear_timestep
-
-c     read back the time steps and accumulate norms
-
-      call acc_sub_norms(idump)
-
-      call MPI_File_close(fp, ierr)
-
- 20   continue
-      do m = 1, 5
-         xce_acc(m) = xce_sub(m) / dble(idump)
-      end do
-
-      return
-      end
-
diff --git a/examples/smpi/NAS/BT/header.h b/examples/smpi/NAS/BT/header.h
deleted file mode 100644 (file)
index 47719da..0000000
+++ /dev/null
@@ -1,137 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-c
-c  header.h
-c
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      implicit none
-
-c---------------------------------------------------------------------
-c The following include file is generated automatically by the
-c "setparams" utility. It defines 
-c      maxcells:      the square root of the maximum number of processors
-c      problem_size:  12, 64, 102, 162 (for class T, A, B, C)
-c      dt_default:    default time step for this problem size if no
-c                     config file
-c      niter_default: default number of iterations for this problem size
-c---------------------------------------------------------------------
-
-      include 'npbparams.h'
-
-      integer           aa, bb, cc, BLOCK_SIZE
-      parameter (aa=1, bb=2, cc=3, BLOCK_SIZE=5)
-
-      integer           ncells, grid_points(3)
-      double precision  elapsed_time
-      common /global/   elapsed_time, ncells, grid_points
-
-      double precision  tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3, 
-     >                  dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4, 
-     >                  dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt, 
-     >                  ce(5,13), dxmax, dymax, dzmax, xxcon1, xxcon2, 
-     >                  xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1,
-     >                  dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4,
-     >                  yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1,
-     >                  zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1, 
-     >                  dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1, 
-     >                  dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2, 
-     >                  c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt,
-     >                  dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1, 
-     >                  c2dtty1, c2dttz1, comz1, comz4, comz5, comz6, 
-     >                  c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16
-
-      common /constants/ tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3,
-     >                  dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4, 
-     >                  dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt, 
-     >                  ce, dxmax, dymax, dzmax, xxcon1, xxcon2, 
-     >                  xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1,
-     >                  dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4,
-     >                  yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1,
-     >                  zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1, 
-     >                  dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1, 
-     >                  dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2, 
-     >                  c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt,
-     >                  dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1, 
-     >                  c2dtty1, c2dttz1, comz1, comz4, comz5, comz6, 
-     >                  c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16
-
-      integer           EAST, WEST, NORTH, SOUTH, 
-     >                  BOTTOM, TOP
-
-      parameter (EAST=2000, WEST=3000,      NORTH=4000, SOUTH=5000,
-     >           BOTTOM=6000, TOP=7000)
-
-      integer cell_coord (3,maxcells), cell_low (3,maxcells), 
-     >        cell_high  (3,maxcells), cell_size(3,maxcells),
-     >        predecessor(3),          slice    (3,maxcells),
-     >        grid_size  (3),          successor(3)         ,
-     >        start      (3,maxcells), end      (3,maxcells)
-      common /partition/ cell_coord, cell_low, cell_high, cell_size,
-     >                   grid_size, successor, predecessor, slice,
-     >                   start, end
-
-      integer IMAX, JMAX, KMAX, MAX_CELL_DIM, BUF_SIZE
-
-      parameter (MAX_CELL_DIM = (problem_size/maxcells)+1)
-
-      parameter (IMAX=MAX_CELL_DIM,JMAX=MAX_CELL_DIM,KMAX=MAX_CELL_DIM)
-
-      parameter (BUF_SIZE=MAX_CELL_DIM*MAX_CELL_DIM*(maxcells-1)*60+1)
-
-      double precision 
-     >   us      (    -1:IMAX,  -1:JMAX,  -1:KMAX,   maxcells),
-     >   vs      (    -1:IMAX,  -1:JMAX,  -1:KMAX,   maxcells),
-     >   ws      (    -1:IMAX,  -1:JMAX,  -1:KMAX,   maxcells),
-     >   qs      (    -1:IMAX,  -1:JMAX,  -1:KMAX,   maxcells),
-     >   rho_i   (    -1:IMAX,  -1:JMAX,  -1:KMAX,   maxcells),
-     >   square  (    -1:IMAX,  -1:JMAX,  -1:KMAX,   maxcells),
-     >   forcing (5,   0:IMAX-1, 0:JMAX-1, 0:KMAX-1, maxcells),
-     >   u       (5,  -2:IMAX+1,-2:JMAX+1,-2:KMAX+1, maxcells),
-     >   rhs     (5,  -1:IMAX-1,-1:JMAX-1,-1:KMAX-1, maxcells),
-     >   lhsc    (5,5,-1:IMAX-1,-1:JMAX-1,-1:KMAX-1, maxcells),
-     >   backsub_info (5, 0:MAX_CELL_DIM, 0:MAX_CELL_DIM, maxcells),
-     >   in_buffer(BUF_SIZE), out_buffer(BUF_SIZE)
-      common /fields/  u, us, vs, ws, qs, rho_i, square, 
-     >                 rhs, forcing, lhsc, in_buffer, out_buffer,
-     >                 backsub_info
-
-      double precision cv(-2:MAX_CELL_DIM+1),   rhon(-2:MAX_CELL_DIM+1),
-     >                 rhos(-2:MAX_CELL_DIM+1), rhoq(-2:MAX_CELL_DIM+1),
-     >                 cuf(-2:MAX_CELL_DIM+1),  q(-2:MAX_CELL_DIM+1),
-     >                 ue(-2:MAX_CELL_DIM+1,5), buf(-2:MAX_CELL_DIM+1,5)
-      common /work_1d/ cv, rhon, rhos, rhoq, cuf, q, ue, buf
-
-      integer  west_size, east_size, bottom_size, top_size,
-     >         north_size, south_size, start_send_west, 
-     >         start_send_east, start_send_south, start_send_north,
-     >         start_send_bottom, start_send_top, start_recv_west,
-     >         start_recv_east, start_recv_south, start_recv_north,
-     >         start_recv_bottom, start_recv_top
-      common /box/ west_size, east_size, bottom_size,
-     >             top_size, north_size, south_size, 
-     >             start_send_west, start_send_east, start_send_south,
-     >             start_send_north, start_send_bottom, start_send_top,
-     >             start_recv_west, start_recv_east, start_recv_south,
-     >             start_recv_north, start_recv_bottom, start_recv_top
-
-      double precision  tmp_block(5,5), b_inverse(5,5), tmp_vec(5)
-      common /work_solve/ tmp_block, b_inverse, tmp_vec
-
-c
-c     These are used by btio
-c
-      integer collbuf_nodes, collbuf_size, iosize, eltext,
-     $        combined_btype, fp, idump, record_length, element,
-     $        combined_ftype, idump_sub, rd_interval
-      common /btio/ collbuf_nodes, collbuf_size, iosize, eltext,
-     $              combined_btype, fp, idump, record_length,
-     $              idump_sub, rd_interval
-      double precision sum(niter_default), xce_sub(5)
-      common /btio/ sum, xce_sub
-      integer*8 iseek
-      common /btio/ iseek, element, combined_ftype
-
-
-
diff --git a/examples/smpi/NAS/BT/initialize.f b/examples/smpi/NAS/BT/initialize.f
deleted file mode 100644 (file)
index 274cdb1..0000000
+++ /dev/null
@@ -1,308 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine  initialize
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     This subroutine initializes the field variable u using 
-c     tri-linear transfinite interpolation of the boundary values     
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      
-      integer c, i, j, k, m, ii, jj, kk, ix, iy, iz
-      double precision  xi, eta, zeta, Pface(5,3,2), Pxi, Peta, 
-     >     Pzeta, temp(5)
-
-c---------------------------------------------------------------------
-c  Later (in compute_rhs) we compute 1/u for every element. A few of 
-c  the corner elements are not used, but it convenient (and faster) 
-c  to compute the whole thing with a simple loop. Make sure those 
-c  values are nonzero by initializing the whole thing here. 
-c---------------------------------------------------------------------
-      do c = 1, ncells
-         do kk = -1, KMAX
-            do jj = -1, JMAX
-               do ii = -1, IMAX
-                  do m = 1, 5
-                     u(m, ii, jj, kk, c) = 1.0
-                  end do
-               end do
-            end do
-         end do
-      end do
-c---------------------------------------------------------------------
-
-
-
-c---------------------------------------------------------------------
-c     first store the "interpolated" values everywhere on the grid    
-c---------------------------------------------------------------------
-      do c=1, ncells
-         kk = 0
-         do k = cell_low(3,c), cell_high(3,c)
-            zeta = dble(k) * dnzm1
-            jj = 0
-            do j = cell_low(2,c), cell_high(2,c)
-               eta = dble(j) * dnym1
-               ii = 0
-               do i = cell_low(1,c), cell_high(1,c)
-                  xi = dble(i) * dnxm1
-                  
-                  do ix = 1, 2
-                     call exact_solution(dble(ix-1), eta, zeta, 
-     >                    Pface(1,1,ix))
-                  enddo
-
-                  do iy = 1, 2
-                     call exact_solution(xi, dble(iy-1) , zeta, 
-     >                    Pface(1,2,iy))
-                  enddo
-
-                  do iz = 1, 2
-                     call exact_solution(xi, eta, dble(iz-1),   
-     >                    Pface(1,3,iz))
-                  enddo
-
-                  do m = 1, 5
-                     Pxi   = xi   * Pface(m,1,2) + 
-     >                    (1.0d0-xi)   * Pface(m,1,1)
-                     Peta  = eta  * Pface(m,2,2) + 
-     >                    (1.0d0-eta)  * Pface(m,2,1)
-                     Pzeta = zeta * Pface(m,3,2) + 
-     >                    (1.0d0-zeta) * Pface(m,3,1)
-                     
-                     u(m,ii,jj,kk,c) = Pxi + Peta + Pzeta - 
-     >                    Pxi*Peta - Pxi*Pzeta - Peta*Pzeta + 
-     >                    Pxi*Peta*Pzeta
-
-                  enddo
-                  ii = ii + 1
-               enddo
-               jj = jj + 1
-            enddo
-            kk = kk+1
-         enddo
-      enddo
-
-c---------------------------------------------------------------------
-c     now store the exact values on the boundaries        
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     west face                                                  
-c---------------------------------------------------------------------
-      c = slice(1,1)
-      ii = 0
-      xi = 0.0d0
-      kk = 0
-      do k = cell_low(3,c), cell_high(3,c)
-         zeta = dble(k) * dnzm1
-         jj = 0
-         do j = cell_low(2,c), cell_high(2,c)
-            eta = dble(j) * dnym1
-            call exact_solution(xi, eta, zeta, temp)
-            do m = 1, 5
-               u(m,ii,jj,kk,c) = temp(m)
-            enddo
-            jj = jj + 1
-         enddo
-         kk = kk + 1
-      enddo
-
-c---------------------------------------------------------------------
-c     east face                                                      
-c---------------------------------------------------------------------
-      c  = slice(1,ncells)
-      ii = cell_size(1,c)-1
-      xi = 1.0d0
-      kk = 0
-      do k = cell_low(3,c), cell_high(3,c)
-         zeta = dble(k) * dnzm1
-         jj = 0
-         do j = cell_low(2,c), cell_high(2,c)
-            eta = dble(j) * dnym1
-            call exact_solution(xi, eta, zeta, temp)
-            do m = 1, 5
-               u(m,ii,jj,kk,c) = temp(m)
-            enddo
-            jj = jj + 1
-         enddo
-         kk = kk + 1
-      enddo
-
-c---------------------------------------------------------------------
-c     south face                                                 
-c---------------------------------------------------------------------
-      c = slice(2,1)
-      jj = 0
-      eta = 0.0d0
-      kk = 0
-      do k = cell_low(3,c), cell_high(3,c)
-         zeta = dble(k) * dnzm1
-         ii = 0
-         do i = cell_low(1,c), cell_high(1,c)
-            xi = dble(i) * dnxm1
-            call exact_solution(xi, eta, zeta, temp)
-            do m = 1, 5
-               u(m,ii,jj,kk,c) = temp(m)
-            enddo
-            ii = ii + 1
-         enddo
-         kk = kk + 1
-      enddo
-
-
-c---------------------------------------------------------------------
-c     north face                                    
-c---------------------------------------------------------------------
-      c = slice(2,ncells)
-      jj = cell_size(2,c)-1
-      eta = 1.0d0
-      kk = 0
-      do k = cell_low(3,c), cell_high(3,c)
-         zeta = dble(k) * dnzm1
-         ii = 0
-         do i = cell_low(1,c), cell_high(1,c)
-            xi = dble(i) * dnxm1
-            call exact_solution(xi, eta, zeta, temp)
-            do m = 1, 5
-               u(m,ii,jj,kk,c) = temp(m)
-            enddo
-            ii = ii + 1
-         enddo
-         kk = kk + 1
-      enddo
-
-c---------------------------------------------------------------------
-c     bottom face                                       
-c---------------------------------------------------------------------
-      c = slice(3,1)
-      kk = 0
-      zeta = 0.0d0
-      jj = 0
-      do j = cell_low(2,c), cell_high(2,c)
-         eta = dble(j) * dnym1
-         ii = 0
-         do i =cell_low(1,c), cell_high(1,c)
-            xi = dble(i) *dnxm1
-            call exact_solution(xi, eta, zeta, temp)
-            do m = 1, 5
-               u(m,ii,jj,kk,c) = temp(m)
-            enddo
-            ii = ii + 1
-         enddo
-         jj = jj + 1
-      enddo
-
-c---------------------------------------------------------------------
-c     top face     
-c---------------------------------------------------------------------
-      c = slice(3,ncells)
-      kk = cell_size(3,c)-1
-      zeta = 1.0d0
-      jj = 0
-      do j = cell_low(2,c), cell_high(2,c)
-         eta = dble(j) * dnym1
-         ii = 0
-         do i =cell_low(1,c), cell_high(1,c)
-            xi = dble(i) * dnxm1
-            call exact_solution(xi, eta, zeta, temp)
-            do m = 1, 5
-               u(m,ii,jj,kk,c) = temp(m)
-            enddo
-            ii = ii + 1
-         enddo
-         jj = jj + 1
-      enddo
-
-      return
-      end
-
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine lhsinit
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      
-      integer i, j, k, d, c, m, n
-
-c---------------------------------------------------------------------
-c     loop over all cells                                       
-c---------------------------------------------------------------------
-      do c = 1, ncells
-
-c---------------------------------------------------------------------
-c     first, initialize the start and end arrays
-c---------------------------------------------------------------------
-         do d = 1, 3
-            if (cell_coord(d,c) .eq. 1) then
-               start(d,c) = 1
-            else 
-               start(d,c) = 0
-            endif
-            if (cell_coord(d,c) .eq. ncells) then
-               end(d,c) = 1
-            else
-               end(d,c) = 0
-            endif
-         enddo
-
-c---------------------------------------------------------------------
-c     zero the whole left hand side for starters
-c---------------------------------------------------------------------
-         do k = 0, cell_size(3,c)-1
-            do j = 0, cell_size(2,c)-1
-               do i = 0, cell_size(1,c)-1
-                  do m = 1,5
-                     do n = 1, 5
-                        lhsc(m,n,i,j,k,c) = 0.0d0
-                     enddo
-                  enddo
-               enddo
-            enddo
-         enddo
-
-      enddo
-
-      return
-      end
-
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine lhsabinit(lhsa, lhsb, size)
-      implicit none
-
-      integer size
-      double precision lhsa(5, 5, -1:size), lhsb(5, 5, -1:size)
-
-      integer i, m, n
-
-c---------------------------------------------------------------------
-c     next, set all diagonal values to 1. This is overkill, but convenient
-c---------------------------------------------------------------------
-      do i = 0, size
-         do m = 1, 5
-            do n = 1, 5
-               lhsa(m,n,i) = 0.0d0
-               lhsb(m,n,i) = 0.0d0
-            enddo
-            lhsb(m,m,i) = 1.0d0
-         enddo
-      enddo
-
-      return
-      end
-
-
-
diff --git a/examples/smpi/NAS/BT/inputbt.data.sample b/examples/smpi/NAS/BT/inputbt.data.sample
deleted file mode 100644 (file)
index 776654e..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-200       number of time steps
-0.0008d0  dt for class A = 0.0008d0. class B = 0.0003d0  class C = 0.0001d0
-64 64 64
-5 0        write interval (optional read interval) for BTIO
-0 1000000  number of nodes in collective buffering and buffer size for BTIO
diff --git a/examples/smpi/NAS/BT/make_set.f b/examples/smpi/NAS/BT/make_set.f
deleted file mode 100644 (file)
index b8d90c6..0000000
+++ /dev/null
@@ -1,124 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine make_set
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     This function allocates space for a set of cells and fills the set     
-c     such that communication between cells on different nodes is only
-c     nearest neighbor                                                   
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-
-      integer p, i, j, c, dir, size, excess, ierr,ierrcode
-
-c---------------------------------------------------------------------
-c     compute square root; add small number to allow for roundoff
-c     (note: this is computed in setup_mpi.f also, but prefer to do
-c     it twice because of some include file problems).
-c---------------------------------------------------------------------
-      ncells = dint(dsqrt(dble(no_nodes) + 0.00001d0))
-
-c---------------------------------------------------------------------
-c     this makes coding easier
-c---------------------------------------------------------------------
-      p = ncells
-      
-c---------------------------------------------------------------------
-c     determine the location of the cell at the bottom of the 3D 
-c     array of cells
-c---------------------------------------------------------------------
-      cell_coord(1,1) = mod(node,p) 
-      cell_coord(2,1) = node/p 
-      cell_coord(3,1) = 0
-
-c---------------------------------------------------------------------
-c     set the cell_coords for cells in the rest of the z-layers; 
-c     this comes down to a simple linear numbering in the z-direct-
-c     ion, and to the doubly-cyclic numbering in the other dirs     
-c---------------------------------------------------------------------
-      do c=2, p
-         cell_coord(1,c) = mod(cell_coord(1,c-1)+1,p) 
-         cell_coord(2,c) = mod(cell_coord(2,c-1)-1+p,p) 
-         cell_coord(3,c) = c-1
-      end do
-
-c---------------------------------------------------------------------
-c     offset all the coordinates by 1 to adjust for Fortran arrays
-c---------------------------------------------------------------------
-      do dir = 1, 3
-         do c = 1, p
-            cell_coord(dir,c) = cell_coord(dir,c) + 1
-         end do
-      end do
-      
-c---------------------------------------------------------------------
-c     slice(dir,n) contains the sequence number of the cell that is in
-c     coordinate plane n in the dir direction
-c---------------------------------------------------------------------
-      do dir = 1, 3
-         do c = 1, p
-            slice(dir,cell_coord(dir,c)) = c
-         end do
-      end do
-
-
-c---------------------------------------------------------------------
-c     fill the predecessor and successor entries, using the indices 
-c     of the bottom cells (they are the same at each level of k 
-c     anyway) acting as if full periodicity pertains; note that p is
-c     added to those arguments to the mod functions that might
-c     otherwise return wrong values when using the modulo function
-c---------------------------------------------------------------------
-      i = cell_coord(1,1)-1
-      j = cell_coord(2,1)-1
-
-      predecessor(1) = mod(i-1+p,p) + p*j
-      predecessor(2) = i + p*mod(j-1+p,p)
-      predecessor(3) = mod(i+1,p) + p*mod(j-1+p,p)
-      successor(1)   = mod(i+1,p) + p*j
-      successor(2)   = i + p*mod(j+1,p)
-      successor(3)   = mod(i-1+p,p) + p*mod(j+1,p)
-
-c---------------------------------------------------------------------
-c     now compute the sizes of the cells                                    
-c---------------------------------------------------------------------
-      do dir= 1, 3
-c---------------------------------------------------------------------
-c     set cell_coord range for each direction                            
-c---------------------------------------------------------------------
-         size   = grid_points(dir)/p
-         excess = mod(grid_points(dir),p)
-         do c=1, ncells
-            if (cell_coord(dir,c) .le. excess) then
-               cell_size(dir,c) = size+1
-               cell_low(dir,c) = (cell_coord(dir,c)-1)*(size+1)
-               cell_high(dir,c) = cell_low(dir,c)+size
-            else 
-               cell_size(dir,c) = size
-               cell_low(dir,c)  = excess*(size+1)+
-     >              (cell_coord(dir,c)-excess-1)*size
-               cell_high(dir,c) = cell_low(dir,c)+size-1
-            endif
-            if (cell_size(dir, c) .le. 2) then
-               write(*,50)
- 50            format(' Error: Cell size too small. Min size is 3')
-               call MPI_Abort(mpi_comm_world,ierrcode,ierr)
-               stop
-            endif
-         end do
-      end do
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-
diff --git a/examples/smpi/NAS/BT/mpinpb.h b/examples/smpi/NAS/BT/mpinpb.h
deleted file mode 100644 (file)
index f621f08..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      include 'mpif.h'
-
-      integer           node, no_nodes, total_nodes, root, comm_setup, 
-     >                  comm_solve, comm_rhs, dp_type
-      logical           active
-      common /mpistuff/ node, no_nodes, total_nodes, root, comm_setup, 
-     >                  comm_solve, comm_rhs, dp_type, active
-
diff --git a/examples/smpi/NAS/BT/rhs.f b/examples/smpi/NAS/BT/rhs.f
deleted file mode 100644 (file)
index 89171a6..0000000
+++ /dev/null
@@ -1,425 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine compute_rhs
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      include 'header.h'
-
-      integer c, i, j, k, m
-      double precision rho_inv, uijk, up1, um1, vijk, vp1, vm1,
-     >     wijk, wp1, wm1
-
-
-c---------------------------------------------------------------------
-c     loop over all cells owned by this node                           
-c---------------------------------------------------------------------
-      do c = 1, ncells
-
-c---------------------------------------------------------------------
-c     compute the reciprocal of density, and the kinetic energy, 
-c     and the speed of sound.
-c---------------------------------------------------------------------
-         do k = -1, cell_size(3,c)
-            do j = -1, cell_size(2,c)
-               do i = -1, cell_size(1,c)
-                  rho_inv = 1.0d0/u(1,i,j,k,c)
-                  rho_i(i,j,k,c) = rho_inv
-                  us(i,j,k,c) = u(2,i,j,k,c) * rho_inv
-                  vs(i,j,k,c) = u(3,i,j,k,c) * rho_inv
-                  ws(i,j,k,c) = u(4,i,j,k,c) * rho_inv
-                  square(i,j,k,c)     = 0.5d0* (
-     >                 u(2,i,j,k,c)*u(2,i,j,k,c) + 
-     >                 u(3,i,j,k,c)*u(3,i,j,k,c) +
-     >                 u(4,i,j,k,c)*u(4,i,j,k,c) ) * rho_inv
-                  qs(i,j,k,c) = square(i,j,k,c) * rho_inv
-               enddo
-            enddo
-         enddo
-
-c---------------------------------------------------------------------
-c copy the exact forcing term to the right hand side;  because 
-c this forcing term is known, we can store it on the whole of every 
-c cell,  including the boundary                   
-c---------------------------------------------------------------------
-
-         do k = 0, cell_size(3,c)-1
-            do j = 0, cell_size(2,c)-1
-               do i = 0, cell_size(1,c)-1
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = forcing(m,i,j,k,c)
-                  enddo
-               enddo
-            enddo
-         enddo
-
-
-c---------------------------------------------------------------------
-c     compute xi-direction fluxes 
-c---------------------------------------------------------------------
-         do k = start(3,c), cell_size(3,c)-end(3,c)-1
-            do j = start(2,c), cell_size(2,c)-end(2,c)-1
-               do i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  uijk = us(i,j,k,c)
-                  up1  = us(i+1,j,k,c)
-                  um1  = us(i-1,j,k,c)
-
-                  rhs(1,i,j,k,c) = rhs(1,i,j,k,c) + dx1tx1 * 
-     >                 (u(1,i+1,j,k,c) - 2.0d0*u(1,i,j,k,c) + 
-     >                 u(1,i-1,j,k,c)) -
-     >                 tx2 * (u(2,i+1,j,k,c) - u(2,i-1,j,k,c))
-
-                  rhs(2,i,j,k,c) = rhs(2,i,j,k,c) + dx2tx1 * 
-     >                 (u(2,i+1,j,k,c) - 2.0d0*u(2,i,j,k,c) + 
-     >                 u(2,i-1,j,k,c)) +
-     >                 xxcon2*con43 * (up1 - 2.0d0*uijk + um1) -
-     >                 tx2 * (u(2,i+1,j,k,c)*up1 - 
-     >                 u(2,i-1,j,k,c)*um1 +
-     >                 (u(5,i+1,j,k,c)- square(i+1,j,k,c)-
-     >                 u(5,i-1,j,k,c)+ square(i-1,j,k,c))*
-     >                 c2)
-
-                  rhs(3,i,j,k,c) = rhs(3,i,j,k,c) + dx3tx1 * 
-     >                 (u(3,i+1,j,k,c) - 2.0d0*u(3,i,j,k,c) +
-     >                 u(3,i-1,j,k,c)) +
-     >                 xxcon2 * (vs(i+1,j,k,c) - 2.0d0*vs(i,j,k,c) +
-     >                 vs(i-1,j,k,c)) -
-     >                 tx2 * (u(3,i+1,j,k,c)*up1 - 
-     >                 u(3,i-1,j,k,c)*um1)
-
-                  rhs(4,i,j,k,c) = rhs(4,i,j,k,c) + dx4tx1 * 
-     >                 (u(4,i+1,j,k,c) - 2.0d0*u(4,i,j,k,c) +
-     >                 u(4,i-1,j,k,c)) +
-     >                 xxcon2 * (ws(i+1,j,k,c) - 2.0d0*ws(i,j,k,c) +
-     >                 ws(i-1,j,k,c)) -
-     >                 tx2 * (u(4,i+1,j,k,c)*up1 - 
-     >                 u(4,i-1,j,k,c)*um1)
-
-                  rhs(5,i,j,k,c) = rhs(5,i,j,k,c) + dx5tx1 * 
-     >                 (u(5,i+1,j,k,c) - 2.0d0*u(5,i,j,k,c) +
-     >                 u(5,i-1,j,k,c)) +
-     >                 xxcon3 * (qs(i+1,j,k,c) - 2.0d0*qs(i,j,k,c) +
-     >                 qs(i-1,j,k,c)) +
-     >                 xxcon4 * (up1*up1 -       2.0d0*uijk*uijk + 
-     >                 um1*um1) +
-     >                 xxcon5 * (u(5,i+1,j,k,c)*rho_i(i+1,j,k,c) - 
-     >                 2.0d0*u(5,i,j,k,c)*rho_i(i,j,k,c) +
-     >                 u(5,i-1,j,k,c)*rho_i(i-1,j,k,c)) -
-     >                 tx2 * ( (c1*u(5,i+1,j,k,c) - 
-     >                 c2*square(i+1,j,k,c))*up1 -
-     >                 (c1*u(5,i-1,j,k,c) - 
-     >                 c2*square(i-1,j,k,c))*um1 )
-               enddo
-            enddo
-         enddo
-
-c---------------------------------------------------------------------
-c     add fourth order xi-direction dissipation               
-c---------------------------------------------------------------------
-         if (start(1,c) .gt. 0) then
-            do k = start(3,c), cell_size(3,c)-end(3,c)-1
-               do j = start(2,c), cell_size(2,c)-end(2,c)-1
-                  i = 1
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c)- dssp * 
-     >                    ( 5.0d0*u(m,i,j,k,c) - 4.0d0*u(m,i+1,j,k,c) +
-     >                    u(m,i+2,j,k,c))
-                  enddo
-
-                  i = 2
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp * 
-     >                    (-4.0d0*u(m,i-1,j,k,c) + 6.0d0*u(m,i,j,k,c) -
-     >                    4.0d0*u(m,i+1,j,k,c) + u(m,i+2,j,k,c))
-                  enddo
-               enddo
-            enddo
-         endif
-
-         do k = start(3,c), cell_size(3,c)-end(3,c)-1
-            do j = start(2,c), cell_size(2,c)-end(2,c)-1
-               do i = 3*start(1,c),cell_size(1,c)-3*end(1,c)-1
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp * 
-     >                    (  u(m,i-2,j,k,c) - 4.0d0*u(m,i-1,j,k,c) + 
-     >                    6.0*u(m,i,j,k,c) - 4.0d0*u(m,i+1,j,k,c) + 
-     >                    u(m,i+2,j,k,c) )
-                  enddo
-               enddo
-            enddo
-         enddo
-         
-
-         if (end(1,c) .gt. 0) then
-            do k = start(3,c), cell_size(3,c)-end(3,c)-1
-               do j = start(2,c), cell_size(2,c)-end(2,c)-1
-                  i = cell_size(1,c)-3
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp *
-     >                    ( u(m,i-2,j,k,c) - 4.0d0*u(m,i-1,j,k,c) + 
-     >                    6.0d0*u(m,i,j,k,c) - 4.0d0*u(m,i+1,j,k,c) )
-                  enddo
-
-                  i = cell_size(1,c)-2
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp *
-     >                    ( u(m,i-2,j,k,c) - 4.d0*u(m,i-1,j,k,c) +
-     >                    5.d0*u(m,i,j,k,c) )
-                  enddo
-               enddo
-            enddo
-         endif
-
-c---------------------------------------------------------------------
-c     compute eta-direction fluxes 
-c---------------------------------------------------------------------
-         do k = start(3,c), cell_size(3,c)-end(3,c)-1
-            do j = start(2,c), cell_size(2,c)-end(2,c)-1
-               do i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  vijk = vs(i,j,k,c)
-                  vp1  = vs(i,j+1,k,c)
-                  vm1  = vs(i,j-1,k,c)
-                  rhs(1,i,j,k,c) = rhs(1,i,j,k,c) + dy1ty1 * 
-     >                 (u(1,i,j+1,k,c) - 2.0d0*u(1,i,j,k,c) + 
-     >                 u(1,i,j-1,k,c)) -
-     >                 ty2 * (u(3,i,j+1,k,c) - u(3,i,j-1,k,c))
-                  rhs(2,i,j,k,c) = rhs(2,i,j,k,c) + dy2ty1 * 
-     >                 (u(2,i,j+1,k,c) - 2.0d0*u(2,i,j,k,c) + 
-     >                 u(2,i,j-1,k,c)) +
-     >                 yycon2 * (us(i,j+1,k,c) - 2.0d0*us(i,j,k,c) + 
-     >                 us(i,j-1,k,c)) -
-     >                 ty2 * (u(2,i,j+1,k,c)*vp1 - 
-     >                 u(2,i,j-1,k,c)*vm1)
-                  rhs(3,i,j,k,c) = rhs(3,i,j,k,c) + dy3ty1 * 
-     >                 (u(3,i,j+1,k,c) - 2.0d0*u(3,i,j,k,c) + 
-     >                 u(3,i,j-1,k,c)) +
-     >                 yycon2*con43 * (vp1 - 2.0d0*vijk + vm1) -
-     >                 ty2 * (u(3,i,j+1,k,c)*vp1 - 
-     >                 u(3,i,j-1,k,c)*vm1 +
-     >                 (u(5,i,j+1,k,c) - square(i,j+1,k,c) - 
-     >                 u(5,i,j-1,k,c) + square(i,j-1,k,c))
-     >                 *c2)
-                  rhs(4,i,j,k,c) = rhs(4,i,j,k,c) + dy4ty1 * 
-     >                 (u(4,i,j+1,k,c) - 2.0d0*u(4,i,j,k,c) + 
-     >                 u(4,i,j-1,k,c)) +
-     >                 yycon2 * (ws(i,j+1,k,c) - 2.0d0*ws(i,j,k,c) + 
-     >                 ws(i,j-1,k,c)) -
-     >                 ty2 * (u(4,i,j+1,k,c)*vp1 - 
-     >                 u(4,i,j-1,k,c)*vm1)
-                  rhs(5,i,j,k,c) = rhs(5,i,j,k,c) + dy5ty1 * 
-     >                 (u(5,i,j+1,k,c) - 2.0d0*u(5,i,j,k,c) + 
-     >                 u(5,i,j-1,k,c)) +
-     >                 yycon3 * (qs(i,j+1,k,c) - 2.0d0*qs(i,j,k,c) + 
-     >                 qs(i,j-1,k,c)) +
-     >                 yycon4 * (vp1*vp1       - 2.0d0*vijk*vijk + 
-     >                 vm1*vm1) +
-     >                 yycon5 * (u(5,i,j+1,k,c)*rho_i(i,j+1,k,c) - 
-     >                 2.0d0*u(5,i,j,k,c)*rho_i(i,j,k,c) +
-     >                 u(5,i,j-1,k,c)*rho_i(i,j-1,k,c)) -
-     >                 ty2 * ((c1*u(5,i,j+1,k,c) - 
-     >                 c2*square(i,j+1,k,c)) * vp1 -
-     >                 (c1*u(5,i,j-1,k,c) - 
-     >                 c2*square(i,j-1,k,c)) * vm1)
-               enddo
-            enddo
-         enddo
-
-c---------------------------------------------------------------------
-c     add fourth order eta-direction dissipation         
-c---------------------------------------------------------------------
-         if (start(2,c) .gt. 0) then
-            do k = start(3,c), cell_size(3,c)-end(3,c)-1
-               j = 1
-               do i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c)- dssp * 
-     >                    ( 5.0d0*u(m,i,j,k,c) - 4.0d0*u(m,i,j+1,k,c) +
-     >                    u(m,i,j+2,k,c))
-                  enddo
-               enddo
-
-               j = 2
-               do i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp * 
-     >                    (-4.0d0*u(m,i,j-1,k,c) + 6.0d0*u(m,i,j,k,c) -
-     >                    4.0d0*u(m,i,j+1,k,c) + u(m,i,j+2,k,c))
-                  enddo
-               enddo
-            enddo
-         endif
-
-         do k = start(3,c), cell_size(3,c)-end(3,c)-1
-            do j = 3*start(2,c), cell_size(2,c)-3*end(2,c)-1
-               do i = start(1,c),cell_size(1,c)-end(1,c)-1
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp * 
-     >                    (  u(m,i,j-2,k,c) - 4.0d0*u(m,i,j-1,k,c) + 
-     >                    6.0*u(m,i,j,k,c) - 4.0d0*u(m,i,j+1,k,c) + 
-     >                    u(m,i,j+2,k,c) )
-                  enddo
-               enddo
-            enddo
-         enddo
-         
-         if (end(2,c) .gt. 0) then
-            do k = start(3,c), cell_size(3,c)-end(3,c)-1
-               j = cell_size(2,c)-3
-               do i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp *
-     >                    ( u(m,i,j-2,k,c) - 4.0d0*u(m,i,j-1,k,c) + 
-     >                    6.0d0*u(m,i,j,k,c) - 4.0d0*u(m,i,j+1,k,c) )
-                  enddo
-               enddo
-
-               j = cell_size(2,c)-2
-               do i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp *
-     >                    ( u(m,i,j-2,k,c) - 4.d0*u(m,i,j-1,k,c) +
-     >                    5.d0*u(m,i,j,k,c) )
-                  enddo
-               enddo
-            enddo
-         endif
-
-c---------------------------------------------------------------------
-c     compute zeta-direction fluxes 
-c---------------------------------------------------------------------
-         do k = start(3,c), cell_size(3,c)-end(3,c)-1
-            do j = start(2,c), cell_size(2,c)-end(2,c)-1
-               do i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  wijk = ws(i,j,k,c)
-                  wp1  = ws(i,j,k+1,c)
-                  wm1  = ws(i,j,k-1,c)
-
-                  rhs(1,i,j,k,c) = rhs(1,i,j,k,c) + dz1tz1 * 
-     >                 (u(1,i,j,k+1,c) - 2.0d0*u(1,i,j,k,c) + 
-     >                 u(1,i,j,k-1,c)) -
-     >                 tz2 * (u(4,i,j,k+1,c) - u(4,i,j,k-1,c))
-                  rhs(2,i,j,k,c) = rhs(2,i,j,k,c) + dz2tz1 * 
-     >                 (u(2,i,j,k+1,c) - 2.0d0*u(2,i,j,k,c) + 
-     >                 u(2,i,j,k-1,c)) +
-     >                 zzcon2 * (us(i,j,k+1,c) - 2.0d0*us(i,j,k,c) + 
-     >                 us(i,j,k-1,c)) -
-     >                 tz2 * (u(2,i,j,k+1,c)*wp1 - 
-     >                 u(2,i,j,k-1,c)*wm1)
-                  rhs(3,i,j,k,c) = rhs(3,i,j,k,c) + dz3tz1 * 
-     >                 (u(3,i,j,k+1,c) - 2.0d0*u(3,i,j,k,c) + 
-     >                 u(3,i,j,k-1,c)) +
-     >                 zzcon2 * (vs(i,j,k+1,c) - 2.0d0*vs(i,j,k,c) + 
-     >                 vs(i,j,k-1,c)) -
-     >                 tz2 * (u(3,i,j,k+1,c)*wp1 - 
-     >                 u(3,i,j,k-1,c)*wm1)
-                  rhs(4,i,j,k,c) = rhs(4,i,j,k,c) + dz4tz1 * 
-     >                 (u(4,i,j,k+1,c) - 2.0d0*u(4,i,j,k,c) + 
-     >                 u(4,i,j,k-1,c)) +
-     >                 zzcon2*con43 * (wp1 - 2.0d0*wijk + wm1) -
-     >                 tz2 * (u(4,i,j,k+1,c)*wp1 - 
-     >                 u(4,i,j,k-1,c)*wm1 +
-     >                 (u(5,i,j,k+1,c) - square(i,j,k+1,c) - 
-     >                 u(5,i,j,k-1,c) + square(i,j,k-1,c))
-     >                 *c2)
-                  rhs(5,i,j,k,c) = rhs(5,i,j,k,c) + dz5tz1 * 
-     >                 (u(5,i,j,k+1,c) - 2.0d0*u(5,i,j,k,c) + 
-     >                 u(5,i,j,k-1,c)) +
-     >                 zzcon3 * (qs(i,j,k+1,c) - 2.0d0*qs(i,j,k,c) + 
-     >                 qs(i,j,k-1,c)) +
-     >                 zzcon4 * (wp1*wp1 - 2.0d0*wijk*wijk + 
-     >                 wm1*wm1) +
-     >                 zzcon5 * (u(5,i,j,k+1,c)*rho_i(i,j,k+1,c) - 
-     >                 2.0d0*u(5,i,j,k,c)*rho_i(i,j,k,c) +
-     >                 u(5,i,j,k-1,c)*rho_i(i,j,k-1,c)) -
-     >                 tz2 * ( (c1*u(5,i,j,k+1,c) - 
-     >                 c2*square(i,j,k+1,c))*wp1 -
-     >                 (c1*u(5,i,j,k-1,c) - 
-     >                 c2*square(i,j,k-1,c))*wm1)
-               enddo
-            enddo
-         enddo
-
-c---------------------------------------------------------------------
-c     add fourth order zeta-direction dissipation                
-c---------------------------------------------------------------------
-         if (start(3,c) .gt. 0) then
-            k = 1
-            do j = start(2,c), cell_size(2,c)-end(2,c)-1
-               do i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c)- dssp * 
-     >                    ( 5.0d0*u(m,i,j,k,c) - 4.0d0*u(m,i,j,k+1,c) +
-     >                    u(m,i,j,k+2,c))
-                  enddo
-               enddo
-            enddo
-
-            k = 2
-            do j = start(2,c), cell_size(2,c)-end(2,c)-1
-               do i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp * 
-     >                    (-4.0d0*u(m,i,j,k-1,c) + 6.0d0*u(m,i,j,k,c) -
-     >                    4.0d0*u(m,i,j,k+1,c) + u(m,i,j,k+2,c))
-                  enddo
-               enddo
-            enddo
-         endif
-
-         do k = 3*start(3,c), cell_size(3,c)-3*end(3,c)-1
-            do j = start(2,c), cell_size(2,c)-end(2,c)-1
-               do i = start(1,c),cell_size(1,c)-end(1,c)-1
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp * 
-     >                    (  u(m,i,j,k-2,c) - 4.0d0*u(m,i,j,k-1,c) + 
-     >                    6.0*u(m,i,j,k,c) - 4.0d0*u(m,i,j,k+1,c) + 
-     >                    u(m,i,j,k+2,c) )
-                  enddo
-               enddo
-            enddo
-         enddo
-         
-         if (end(3,c) .gt. 0) then
-            k = cell_size(3,c)-3
-            do j = start(2,c), cell_size(2,c)-end(2,c)-1
-               do i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp *
-     >                    ( u(m,i,j,k-2,c) - 4.0d0*u(m,i,j,k-1,c) + 
-     >                    6.0d0*u(m,i,j,k,c) - 4.0d0*u(m,i,j,k+1,c) )
-                  enddo
-               enddo
-            enddo
-
-            k = cell_size(3,c)-2
-            do j = start(2,c), cell_size(2,c)-end(2,c)-1
-               do i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) - dssp *
-     >                    ( u(m,i,j,k-2,c) - 4.d0*u(m,i,j,k-1,c) +
-     >                    5.d0*u(m,i,j,k,c) )
-                  enddo
-               enddo
-            enddo
-         endif
-
-         do k = start(3,c), cell_size(3,c)-end(3,c)-1
-            do j = start(2,c), cell_size(2,c)-end(2,c)-1
-               do i = start(1,c), cell_size(1,c)-end(1,c)-1
-                  do m = 1, 5
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) * dt
-                  enddo
-               enddo
-            enddo
-         enddo
-
-      enddo
-      
-      return
-      end
-
-
-
-
diff --git a/examples/smpi/NAS/BT/set_constants.f b/examples/smpi/NAS/BT/set_constants.f
deleted file mode 100644 (file)
index 81397d4..0000000
+++ /dev/null
@@ -1,202 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine  set_constants
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      
-      ce(1,1)  = 2.0d0
-      ce(1,2)  = 0.0d0
-      ce(1,3)  = 0.0d0
-      ce(1,4)  = 4.0d0
-      ce(1,5)  = 5.0d0
-      ce(1,6)  = 3.0d0
-      ce(1,7)  = 0.5d0
-      ce(1,8)  = 0.02d0
-      ce(1,9)  = 0.01d0
-      ce(1,10) = 0.03d0
-      ce(1,11) = 0.5d0
-      ce(1,12) = 0.4d0
-      ce(1,13) = 0.3d0
-      
-      ce(2,1)  = 1.0d0
-      ce(2,2)  = 0.0d0
-      ce(2,3)  = 0.0d0
-      ce(2,4)  = 0.0d0
-      ce(2,5)  = 1.0d0
-      ce(2,6)  = 2.0d0
-      ce(2,7)  = 3.0d0
-      ce(2,8)  = 0.01d0
-      ce(2,9)  = 0.03d0
-      ce(2,10) = 0.02d0
-      ce(2,11) = 0.4d0
-      ce(2,12) = 0.3d0
-      ce(2,13) = 0.5d0
-
-      ce(3,1)  = 2.0d0
-      ce(3,2)  = 2.0d0
-      ce(3,3)  = 0.0d0
-      ce(3,4)  = 0.0d0
-      ce(3,5)  = 0.0d0
-      ce(3,6)  = 2.0d0
-      ce(3,7)  = 3.0d0
-      ce(3,8)  = 0.04d0
-      ce(3,9)  = 0.03d0
-      ce(3,10) = 0.05d0
-      ce(3,11) = 0.3d0
-      ce(3,12) = 0.5d0
-      ce(3,13) = 0.4d0
-
-      ce(4,1)  = 2.0d0
-      ce(4,2)  = 2.0d0
-      ce(4,3)  = 0.0d0
-      ce(4,4)  = 0.0d0
-      ce(4,5)  = 0.0d0
-      ce(4,6)  = 2.0d0
-      ce(4,7)  = 3.0d0
-      ce(4,8)  = 0.03d0
-      ce(4,9)  = 0.05d0
-      ce(4,10) = 0.04d0
-      ce(4,11) = 0.2d0
-      ce(4,12) = 0.1d0
-      ce(4,13) = 0.3d0
-
-      ce(5,1)  = 5.0d0
-      ce(5,2)  = 4.0d0
-      ce(5,3)  = 3.0d0
-      ce(5,4)  = 2.0d0
-      ce(5,5)  = 0.1d0
-      ce(5,6)  = 0.4d0
-      ce(5,7)  = 0.3d0
-      ce(5,8)  = 0.05d0
-      ce(5,9)  = 0.04d0
-      ce(5,10) = 0.03d0
-      ce(5,11) = 0.1d0
-      ce(5,12) = 0.3d0
-      ce(5,13) = 0.2d0
-
-      c1 = 1.4d0
-      c2 = 0.4d0
-      c3 = 0.1d0
-      c4 = 1.0d0
-      c5 = 1.4d0
-
-      bt = dsqrt(0.5d0)
-
-      dnxm1 = 1.0d0 / dble(grid_points(1)-1)
-      dnym1 = 1.0d0 / dble(grid_points(2)-1)
-      dnzm1 = 1.0d0 / dble(grid_points(3)-1)
-
-      c1c2 = c1 * c2
-      c1c5 = c1 * c5
-      c3c4 = c3 * c4
-      c1345 = c1c5 * c3c4
-
-      conz1 = (1.0d0-c1c5)
-
-      tx1 = 1.0d0 / (dnxm1 * dnxm1)
-      tx2 = 1.0d0 / (2.0d0 * dnxm1)
-      tx3 = 1.0d0 / dnxm1
-
-      ty1 = 1.0d0 / (dnym1 * dnym1)
-      ty2 = 1.0d0 / (2.0d0 * dnym1)
-      ty3 = 1.0d0 / dnym1
-      
-      tz1 = 1.0d0 / (dnzm1 * dnzm1)
-      tz2 = 1.0d0 / (2.0d0 * dnzm1)
-      tz3 = 1.0d0 / dnzm1
-
-      dx1 = 0.75d0
-      dx2 = 0.75d0
-      dx3 = 0.75d0
-      dx4 = 0.75d0
-      dx5 = 0.75d0
-
-      dy1 = 0.75d0
-      dy2 = 0.75d0
-      dy3 = 0.75d0
-      dy4 = 0.75d0
-      dy5 = 0.75d0
-
-      dz1 = 1.0d0
-      dz2 = 1.0d0
-      dz3 = 1.0d0
-      dz4 = 1.0d0
-      dz5 = 1.0d0
-
-      dxmax = dmax1(dx3, dx4)
-      dymax = dmax1(dy2, dy4)
-      dzmax = dmax1(dz2, dz3)
-
-      dssp = 0.25d0 * dmax1(dx1, dmax1(dy1, dz1) )
-
-      c4dssp = 4.0d0 * dssp
-      c5dssp = 5.0d0 * dssp
-
-      dttx1 = dt*tx1
-      dttx2 = dt*tx2
-      dtty1 = dt*ty1
-      dtty2 = dt*ty2
-      dttz1 = dt*tz1
-      dttz2 = dt*tz2
-
-      c2dttx1 = 2.0d0*dttx1
-      c2dtty1 = 2.0d0*dtty1
-      c2dttz1 = 2.0d0*dttz1
-
-      dtdssp = dt*dssp
-
-      comz1  = dtdssp
-      comz4  = 4.0d0*dtdssp
-      comz5  = 5.0d0*dtdssp
-      comz6  = 6.0d0*dtdssp
-
-      c3c4tx3 = c3c4*tx3
-      c3c4ty3 = c3c4*ty3
-      c3c4tz3 = c3c4*tz3
-
-      dx1tx1 = dx1*tx1
-      dx2tx1 = dx2*tx1
-      dx3tx1 = dx3*tx1
-      dx4tx1 = dx4*tx1
-      dx5tx1 = dx5*tx1
-      
-      dy1ty1 = dy1*ty1
-      dy2ty1 = dy2*ty1
-      dy3ty1 = dy3*ty1
-      dy4ty1 = dy4*ty1
-      dy5ty1 = dy5*ty1
-      
-      dz1tz1 = dz1*tz1
-      dz2tz1 = dz2*tz1
-      dz3tz1 = dz3*tz1
-      dz4tz1 = dz4*tz1
-      dz5tz1 = dz5*tz1
-
-      c2iv  = 2.5d0
-      con43 = 4.0d0/3.0d0
-      con16 = 1.0d0/6.0d0
-      
-      xxcon1 = c3c4tx3*con43*tx3
-      xxcon2 = c3c4tx3*tx3
-      xxcon3 = c3c4tx3*conz1*tx3
-      xxcon4 = c3c4tx3*con16*tx3
-      xxcon5 = c3c4tx3*c1c5*tx3
-
-      yycon1 = c3c4ty3*con43*ty3
-      yycon2 = c3c4ty3*ty3
-      yycon3 = c3c4ty3*conz1*ty3
-      yycon4 = c3c4ty3*con16*ty3
-      yycon5 = c3c4ty3*c1c5*ty3
-
-      zzcon1 = c3c4tz3*con43*tz3
-      zzcon2 = c3c4tz3*tz3
-      zzcon3 = c3c4tz3*conz1*tz3
-      zzcon4 = c3c4tz3*con16*tz3
-      zzcon5 = c3c4tz3*c1c5*tz3
-
-      return
-      end
diff --git a/examples/smpi/NAS/BT/setup_mpi.f b/examples/smpi/NAS/BT/setup_mpi.f
deleted file mode 100644 (file)
index 987c6bf..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine setup_mpi
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c set up MPI stuff
-c---------------------------------------------------------------------
-
-      implicit none
-      include 'mpinpb.h'
-      include 'npbparams.h'
-      integer error, color, nc
-
-      call mpi_init(error)
-      
-      call mpi_comm_size(MPI_COMM_WORLD, total_nodes, error)
-      call mpi_comm_rank(MPI_COMM_WORLD, node, error)
-
-      if (.not. convertdouble) then
-         dp_type = MPI_DOUBLE_PRECISION
-      else
-         dp_type = MPI_REAL
-      endif
-
-c---------------------------------------------------------------------
-c     compute square root; add small number to allow for roundoff
-c---------------------------------------------------------------------
-      nc = dint(dsqrt(dble(total_nodes) + 0.00001d0))
-
-c---------------------------------------------------------------------
-c We handle a non-square number of nodes by making the excess nodes
-c inactive. However, we can never handle more cells than were compiled
-c in. 
-c---------------------------------------------------------------------
-
-      if (nc .gt. maxcells) nc = maxcells
-      if (node .ge. nc*nc) then
-         active = .false.
-         color = 1
-      else
-         active = .true.
-         color = 0
-      end if
-      
-      call mpi_comm_split(MPI_COMM_WORLD,color,node,comm_setup,error)
-      if (.not. active) return
-
-      call mpi_comm_size(comm_setup, no_nodes, error)
-      call mpi_comm_dup(comm_setup, comm_solve, error)
-      call mpi_comm_dup(comm_setup, comm_rhs, error)
-      
-c---------------------------------------------------------------------
-c     let node 0 be the root for the group (there is only one)
-c---------------------------------------------------------------------
-      root = 0
-
-      return
-      end
-
diff --git a/examples/smpi/NAS/BT/simple_mpiio.f b/examples/smpi/NAS/BT/simple_mpiio.f
deleted file mode 100644 (file)
index 02e2700..0000000
+++ /dev/null
@@ -1,213 +0,0 @@
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine setup_btio
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer m, ierr
-
-      iseek=0
-
-      if (node .eq. root) then
-          call MPI_File_delete(filenm, MPI_INFO_NULL, ierr)
-      endif
-
-      call MPI_Barrier(comm_solve, ierr)
-
-      call MPI_File_open(comm_solve,
-     $          filenm,
-     $          MPI_MODE_RDWR + MPI_MODE_CREATE,
-     $          MPI_INFO_NULL,
-     $          fp,
-     $          ierr)
-
-      call MPI_File_set_view(fp,
-     $          iseek, MPI_DOUBLE_PRECISION, MPI_DOUBLE_PRECISION,
-     $          'native', MPI_INFO_NULL, ierr)
-
-      if (ierr .ne. MPI_SUCCESS) then
-          print *, 'Error opening file'
-          stop
-      endif
-
-      do m = 1, 5
-         xce_sub(m) = 0.d0
-      end do
-
-      idump_sub = 0
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine output_timestep
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer count, jio, kio, cio, aio
-      integer ierr
-      integer mstatus(MPI_STATUS_SIZE)
-
-      do cio=1,ncells
-          do kio=0, cell_size(3,cio)-1
-              do jio=0, cell_size(2,cio)-1
-                  iseek=5*(cell_low(1,cio) +
-     $                   PROBLEM_SIZE*((cell_low(2,cio)+jio) +
-     $                   PROBLEM_SIZE*((cell_low(3,cio)+kio) +
-     $                   PROBLEM_SIZE*idump_sub)))
-
-                  count=5*cell_size(1,cio)
-
-                  call MPI_File_write_at(fp, iseek,
-     $                  u(1,0,jio,kio,cio),
-     $                  count, MPI_DOUBLE_PRECISION,
-     $                  mstatus, ierr)
-
-                  if (ierr .ne. MPI_SUCCESS) then
-                      print *, 'Error writing to file'
-                      stop
-                  endif
-              enddo
-          enddo
-      enddo
-
-      idump_sub = idump_sub + 1
-      if (rd_interval .gt. 0) then
-         if (idump_sub .ge. rd_interval) then
-
-            call acc_sub_norms(idump+1)
-
-            idump_sub = 0
-         endif
-      endif
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine acc_sub_norms(idump_cur)
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer idump_cur
-
-      integer count, jio, kio, cio, ii, m, ichunk
-      integer ierr
-      integer mstatus(MPI_STATUS_SIZE)
-      double precision xce_single(5)
-
-      ichunk = idump_cur - idump_sub + 1
-      do ii=0, idump_sub-1
-        do cio=1,ncells
-          do kio=0, cell_size(3,cio)-1
-              do jio=0, cell_size(2,cio)-1
-                  iseek=5*(cell_low(1,cio) +
-     $                   PROBLEM_SIZE*((cell_low(2,cio)+jio) +
-     $                   PROBLEM_SIZE*((cell_low(3,cio)+kio) +
-     $                   PROBLEM_SIZE*ii)))
-
-                  count=5*cell_size(1,cio)
-
-                  call MPI_File_read_at(fp, iseek,
-     $                  u(1,0,jio,kio,cio),
-     $                  count, MPI_DOUBLE_PRECISION,
-     $                  mstatus, ierr)
-
-                  if (ierr .ne. MPI_SUCCESS) then
-                      print *, 'Error reading back file'
-                      call MPI_File_close(fp, ierr)
-                      stop
-                  endif
-              enddo
-          enddo
-        enddo
-
-        if (node .eq. root) print *, 'Reading data set ', ii+ichunk
-
-        call error_norm(xce_single)
-        do m = 1, 5
-           xce_sub(m) = xce_sub(m) + xce_single(m)
-        end do
-      enddo
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine btio_cleanup
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer ierr
-
-      call MPI_File_close(fp, ierr)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine accumulate_norms(xce_acc)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      double precision xce_acc(5)
-      integer m, ierr
-
-      if (rd_interval .gt. 0) goto 20
-
-      call MPI_File_open(comm_solve,
-     $          filenm,
-     $          MPI_MODE_RDONLY,
-     $          MPI_INFO_NULL,
-     $          fp,
-     $          ierr)
-
-      iseek = 0
-      call MPI_File_set_view(fp,
-     $          iseek, MPI_DOUBLE_PRECISION, MPI_DOUBLE_PRECISION,
-     $          'native', MPI_INFO_NULL, ierr)
-
-c     clear the last time step
-
-      call clear_timestep
-
-c     read back the time steps and accumulate norms
-
-      call acc_sub_norms(idump)
-
-      call MPI_File_close(fp, ierr)
-
- 20   continue
-      do m = 1, 5
-         xce_acc(m) = xce_sub(m) / dble(idump)
-      end do
-
-      return
-      end
-
diff --git a/examples/smpi/NAS/BT/solve_subs.f b/examples/smpi/NAS/BT/solve_subs.f
deleted file mode 100644 (file)
index 351489a..0000000
+++ /dev/null
@@ -1,642 +0,0 @@
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine matvec_sub(ablock,avec,bvec)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     subtracts bvec=bvec - ablock*avec
-c---------------------------------------------------------------------
-
-      implicit none
-
-      double precision ablock,avec,bvec
-      dimension ablock(5,5),avec(5),bvec(5)
-
-c---------------------------------------------------------------------
-c            rhs(i,ic,jc,kc,ccell) = rhs(i,ic,jc,kc,ccell) 
-c     $           - lhs(i,1,ablock,ia,ja,ka,acell)*
-c---------------------------------------------------------------------
-         bvec(1) = bvec(1) - ablock(1,1)*avec(1)
-     >                     - ablock(1,2)*avec(2)
-     >                     - ablock(1,3)*avec(3)
-     >                     - ablock(1,4)*avec(4)
-     >                     - ablock(1,5)*avec(5)
-         bvec(2) = bvec(2) - ablock(2,1)*avec(1)
-     >                     - ablock(2,2)*avec(2)
-     >                     - ablock(2,3)*avec(3)
-     >                     - ablock(2,4)*avec(4)
-     >                     - ablock(2,5)*avec(5)
-         bvec(3) = bvec(3) - ablock(3,1)*avec(1)
-     >                     - ablock(3,2)*avec(2)
-     >                     - ablock(3,3)*avec(3)
-     >                     - ablock(3,4)*avec(4)
-     >                     - ablock(3,5)*avec(5)
-         bvec(4) = bvec(4) - ablock(4,1)*avec(1)
-     >                     - ablock(4,2)*avec(2)
-     >                     - ablock(4,3)*avec(3)
-     >                     - ablock(4,4)*avec(4)
-     >                     - ablock(4,5)*avec(5)
-         bvec(5) = bvec(5) - ablock(5,1)*avec(1)
-     >                     - ablock(5,2)*avec(2)
-     >                     - ablock(5,3)*avec(3)
-     >                     - ablock(5,4)*avec(4)
-     >                     - ablock(5,5)*avec(5)
-
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine matmul_sub(ablock, bblock, cblock)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     subtracts a(i,j,k) X b(i,j,k) from c(i,j,k)
-c---------------------------------------------------------------------
-
-      implicit none
-
-      double precision ablock, bblock, cblock
-      dimension ablock(5,5), bblock(5,5), cblock(5,5)
-
-
-         cblock(1,1) = cblock(1,1) - ablock(1,1)*bblock(1,1)
-     >                             - ablock(1,2)*bblock(2,1)
-     >                             - ablock(1,3)*bblock(3,1)
-     >                             - ablock(1,4)*bblock(4,1)
-     >                             - ablock(1,5)*bblock(5,1)
-         cblock(2,1) = cblock(2,1) - ablock(2,1)*bblock(1,1)
-     >                             - ablock(2,2)*bblock(2,1)
-     >                             - ablock(2,3)*bblock(3,1)
-     >                             - ablock(2,4)*bblock(4,1)
-     >                             - ablock(2,5)*bblock(5,1)
-         cblock(3,1) = cblock(3,1) - ablock(3,1)*bblock(1,1)
-     >                             - ablock(3,2)*bblock(2,1)
-     >                             - ablock(3,3)*bblock(3,1)
-     >                             - ablock(3,4)*bblock(4,1)
-     >                             - ablock(3,5)*bblock(5,1)
-         cblock(4,1) = cblock(4,1) - ablock(4,1)*bblock(1,1)
-     >                             - ablock(4,2)*bblock(2,1)
-     >                             - ablock(4,3)*bblock(3,1)
-     >                             - ablock(4,4)*bblock(4,1)
-     >                             - ablock(4,5)*bblock(5,1)
-         cblock(5,1) = cblock(5,1) - ablock(5,1)*bblock(1,1)
-     >                             - ablock(5,2)*bblock(2,1)
-     >                             - ablock(5,3)*bblock(3,1)
-     >                             - ablock(5,4)*bblock(4,1)
-     >                             - ablock(5,5)*bblock(5,1)
-         cblock(1,2) = cblock(1,2) - ablock(1,1)*bblock(1,2)
-     >                             - ablock(1,2)*bblock(2,2)
-     >                             - ablock(1,3)*bblock(3,2)
-     >                             - ablock(1,4)*bblock(4,2)
-     >                             - ablock(1,5)*bblock(5,2)
-         cblock(2,2) = cblock(2,2) - ablock(2,1)*bblock(1,2)
-     >                             - ablock(2,2)*bblock(2,2)
-     >                             - ablock(2,3)*bblock(3,2)
-     >                             - ablock(2,4)*bblock(4,2)
-     >                             - ablock(2,5)*bblock(5,2)
-         cblock(3,2) = cblock(3,2) - ablock(3,1)*bblock(1,2)
-     >                             - ablock(3,2)*bblock(2,2)
-     >                             - ablock(3,3)*bblock(3,2)
-     >                             - ablock(3,4)*bblock(4,2)
-     >                             - ablock(3,5)*bblock(5,2)
-         cblock(4,2) = cblock(4,2) - ablock(4,1)*bblock(1,2)
-     >                             - ablock(4,2)*bblock(2,2)
-     >                             - ablock(4,3)*bblock(3,2)
-     >                             - ablock(4,4)*bblock(4,2)
-     >                             - ablock(4,5)*bblock(5,2)
-         cblock(5,2) = cblock(5,2) - ablock(5,1)*bblock(1,2)
-     >                             - ablock(5,2)*bblock(2,2)
-     >                             - ablock(5,3)*bblock(3,2)
-     >                             - ablock(5,4)*bblock(4,2)
-     >                             - ablock(5,5)*bblock(5,2)
-         cblock(1,3) = cblock(1,3) - ablock(1,1)*bblock(1,3)
-     >                             - ablock(1,2)*bblock(2,3)
-     >                             - ablock(1,3)*bblock(3,3)
-     >                             - ablock(1,4)*bblock(4,3)
-     >                             - ablock(1,5)*bblock(5,3)
-         cblock(2,3) = cblock(2,3) - ablock(2,1)*bblock(1,3)
-     >                             - ablock(2,2)*bblock(2,3)
-     >                             - ablock(2,3)*bblock(3,3)
-     >                             - ablock(2,4)*bblock(4,3)
-     >                             - ablock(2,5)*bblock(5,3)
-         cblock(3,3) = cblock(3,3) - ablock(3,1)*bblock(1,3)
-     >                             - ablock(3,2)*bblock(2,3)
-     >                             - ablock(3,3)*bblock(3,3)
-     >                             - ablock(3,4)*bblock(4,3)
-     >                             - ablock(3,5)*bblock(5,3)
-         cblock(4,3) = cblock(4,3) - ablock(4,1)*bblock(1,3)
-     >                             - ablock(4,2)*bblock(2,3)
-     >                             - ablock(4,3)*bblock(3,3)
-     >                             - ablock(4,4)*bblock(4,3)
-     >                             - ablock(4,5)*bblock(5,3)
-         cblock(5,3) = cblock(5,3) - ablock(5,1)*bblock(1,3)
-     >                             - ablock(5,2)*bblock(2,3)
-     >                             - ablock(5,3)*bblock(3,3)
-     >                             - ablock(5,4)*bblock(4,3)
-     >                             - ablock(5,5)*bblock(5,3)
-         cblock(1,4) = cblock(1,4) - ablock(1,1)*bblock(1,4)
-     >                             - ablock(1,2)*bblock(2,4)
-     >                             - ablock(1,3)*bblock(3,4)
-     >                             - ablock(1,4)*bblock(4,4)
-     >                             - ablock(1,5)*bblock(5,4)
-         cblock(2,4) = cblock(2,4) - ablock(2,1)*bblock(1,4)
-     >                             - ablock(2,2)*bblock(2,4)
-     >                             - ablock(2,3)*bblock(3,4)
-     >                             - ablock(2,4)*bblock(4,4)
-     >                             - ablock(2,5)*bblock(5,4)
-         cblock(3,4) = cblock(3,4) - ablock(3,1)*bblock(1,4)
-     >                             - ablock(3,2)*bblock(2,4)
-     >                             - ablock(3,3)*bblock(3,4)
-     >                             - ablock(3,4)*bblock(4,4)
-     >                             - ablock(3,5)*bblock(5,4)
-         cblock(4,4) = cblock(4,4) - ablock(4,1)*bblock(1,4)
-     >                             - ablock(4,2)*bblock(2,4)
-     >                             - ablock(4,3)*bblock(3,4)
-     >                             - ablock(4,4)*bblock(4,4)
-     >                             - ablock(4,5)*bblock(5,4)
-         cblock(5,4) = cblock(5,4) - ablock(5,1)*bblock(1,4)
-     >                             - ablock(5,2)*bblock(2,4)
-     >                             - ablock(5,3)*bblock(3,4)
-     >                             - ablock(5,4)*bblock(4,4)
-     >                             - ablock(5,5)*bblock(5,4)
-         cblock(1,5) = cblock(1,5) - ablock(1,1)*bblock(1,5)
-     >                             - ablock(1,2)*bblock(2,5)
-     >                             - ablock(1,3)*bblock(3,5)
-     >                             - ablock(1,4)*bblock(4,5)
-     >                             - ablock(1,5)*bblock(5,5)
-         cblock(2,5) = cblock(2,5) - ablock(2,1)*bblock(1,5)
-     >                             - ablock(2,2)*bblock(2,5)
-     >                             - ablock(2,3)*bblock(3,5)
-     >                             - ablock(2,4)*bblock(4,5)
-     >                             - ablock(2,5)*bblock(5,5)
-         cblock(3,5) = cblock(3,5) - ablock(3,1)*bblock(1,5)
-     >                             - ablock(3,2)*bblock(2,5)
-     >                             - ablock(3,3)*bblock(3,5)
-     >                             - ablock(3,4)*bblock(4,5)
-     >                             - ablock(3,5)*bblock(5,5)
-         cblock(4,5) = cblock(4,5) - ablock(4,1)*bblock(1,5)
-     >                             - ablock(4,2)*bblock(2,5)
-     >                             - ablock(4,3)*bblock(3,5)
-     >                             - ablock(4,4)*bblock(4,5)
-     >                             - ablock(4,5)*bblock(5,5)
-         cblock(5,5) = cblock(5,5) - ablock(5,1)*bblock(1,5)
-     >                             - ablock(5,2)*bblock(2,5)
-     >                             - ablock(5,3)*bblock(3,5)
-     >                             - ablock(5,4)*bblock(4,5)
-     >                             - ablock(5,5)*bblock(5,5)
-
-              
-      return
-      end
-
-
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine binvcrhs( lhs,c,r )
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     
-c---------------------------------------------------------------------
-
-      implicit none
-
-      double precision pivot, coeff, lhs
-      dimension lhs(5,5)
-      double precision c(5,5), r(5)
-
-c---------------------------------------------------------------------
-c     
-c---------------------------------------------------------------------
-
-      pivot = 1.00d0/lhs(1,1)
-      lhs(1,2) = lhs(1,2)*pivot
-      lhs(1,3) = lhs(1,3)*pivot
-      lhs(1,4) = lhs(1,4)*pivot
-      lhs(1,5) = lhs(1,5)*pivot
-      c(1,1) = c(1,1)*pivot
-      c(1,2) = c(1,2)*pivot
-      c(1,3) = c(1,3)*pivot
-      c(1,4) = c(1,4)*pivot
-      c(1,5) = c(1,5)*pivot
-      r(1)   = r(1)  *pivot
-
-      coeff = lhs(2,1)
-      lhs(2,2)= lhs(2,2) - coeff*lhs(1,2)
-      lhs(2,3)= lhs(2,3) - coeff*lhs(1,3)
-      lhs(2,4)= lhs(2,4) - coeff*lhs(1,4)
-      lhs(2,5)= lhs(2,5) - coeff*lhs(1,5)
-      c(2,1) = c(2,1) - coeff*c(1,1)
-      c(2,2) = c(2,2) - coeff*c(1,2)
-      c(2,3) = c(2,3) - coeff*c(1,3)
-      c(2,4) = c(2,4) - coeff*c(1,4)
-      c(2,5) = c(2,5) - coeff*c(1,5)
-      r(2)   = r(2)   - coeff*r(1)
-
-      coeff = lhs(3,1)
-      lhs(3,2)= lhs(3,2) - coeff*lhs(1,2)
-      lhs(3,3)= lhs(3,3) - coeff*lhs(1,3)
-      lhs(3,4)= lhs(3,4) - coeff*lhs(1,4)
-      lhs(3,5)= lhs(3,5) - coeff*lhs(1,5)
-      c(3,1) = c(3,1) - coeff*c(1,1)
-      c(3,2) = c(3,2) - coeff*c(1,2)
-      c(3,3) = c(3,3) - coeff*c(1,3)
-      c(3,4) = c(3,4) - coeff*c(1,4)
-      c(3,5) = c(3,5) - coeff*c(1,5)
-      r(3)   = r(3)   - coeff*r(1)
-
-      coeff = lhs(4,1)
-      lhs(4,2)= lhs(4,2) - coeff*lhs(1,2)
-      lhs(4,3)= lhs(4,3) - coeff*lhs(1,3)
-      lhs(4,4)= lhs(4,4) - coeff*lhs(1,4)
-      lhs(4,5)= lhs(4,5) - coeff*lhs(1,5)
-      c(4,1) = c(4,1) - coeff*c(1,1)
-      c(4,2) = c(4,2) - coeff*c(1,2)
-      c(4,3) = c(4,3) - coeff*c(1,3)
-      c(4,4) = c(4,4) - coeff*c(1,4)
-      c(4,5) = c(4,5) - coeff*c(1,5)
-      r(4)   = r(4)   - coeff*r(1)
-
-      coeff = lhs(5,1)
-      lhs(5,2)= lhs(5,2) - coeff*lhs(1,2)
-      lhs(5,3)= lhs(5,3) - coeff*lhs(1,3)
-      lhs(5,4)= lhs(5,4) - coeff*lhs(1,4)
-      lhs(5,5)= lhs(5,5) - coeff*lhs(1,5)
-      c(5,1) = c(5,1) - coeff*c(1,1)
-      c(5,2) = c(5,2) - coeff*c(1,2)
-      c(5,3) = c(5,3) - coeff*c(1,3)
-      c(5,4) = c(5,4) - coeff*c(1,4)
-      c(5,5) = c(5,5) - coeff*c(1,5)
-      r(5)   = r(5)   - coeff*r(1)
-
-
-      pivot = 1.00d0/lhs(2,2)
-      lhs(2,3) = lhs(2,3)*pivot
-      lhs(2,4) = lhs(2,4)*pivot
-      lhs(2,5) = lhs(2,5)*pivot
-      c(2,1) = c(2,1)*pivot
-      c(2,2) = c(2,2)*pivot
-      c(2,3) = c(2,3)*pivot
-      c(2,4) = c(2,4)*pivot
-      c(2,5) = c(2,5)*pivot
-      r(2)   = r(2)  *pivot
-
-      coeff = lhs(1,2)
-      lhs(1,3)= lhs(1,3) - coeff*lhs(2,3)
-      lhs(1,4)= lhs(1,4) - coeff*lhs(2,4)
-      lhs(1,5)= lhs(1,5) - coeff*lhs(2,5)
-      c(1,1) = c(1,1) - coeff*c(2,1)
-      c(1,2) = c(1,2) - coeff*c(2,2)
-      c(1,3) = c(1,3) - coeff*c(2,3)
-      c(1,4) = c(1,4) - coeff*c(2,4)
-      c(1,5) = c(1,5) - coeff*c(2,5)
-      r(1)   = r(1)   - coeff*r(2)
-
-      coeff = lhs(3,2)
-      lhs(3,3)= lhs(3,3) - coeff*lhs(2,3)
-      lhs(3,4)= lhs(3,4) - coeff*lhs(2,4)
-      lhs(3,5)= lhs(3,5) - coeff*lhs(2,5)
-      c(3,1) = c(3,1) - coeff*c(2,1)
-      c(3,2) = c(3,2) - coeff*c(2,2)
-      c(3,3) = c(3,3) - coeff*c(2,3)
-      c(3,4) = c(3,4) - coeff*c(2,4)
-      c(3,5) = c(3,5) - coeff*c(2,5)
-      r(3)   = r(3)   - coeff*r(2)
-
-      coeff = lhs(4,2)
-      lhs(4,3)= lhs(4,3) - coeff*lhs(2,3)
-      lhs(4,4)= lhs(4,4) - coeff*lhs(2,4)
-      lhs(4,5)= lhs(4,5) - coeff*lhs(2,5)
-      c(4,1) = c(4,1) - coeff*c(2,1)
-      c(4,2) = c(4,2) - coeff*c(2,2)
-      c(4,3) = c(4,3) - coeff*c(2,3)
-      c(4,4) = c(4,4) - coeff*c(2,4)
-      c(4,5) = c(4,5) - coeff*c(2,5)
-      r(4)   = r(4)   - coeff*r(2)
-
-      coeff = lhs(5,2)
-      lhs(5,3)= lhs(5,3) - coeff*lhs(2,3)
-      lhs(5,4)= lhs(5,4) - coeff*lhs(2,4)
-      lhs(5,5)= lhs(5,5) - coeff*lhs(2,5)
-      c(5,1) = c(5,1) - coeff*c(2,1)
-      c(5,2) = c(5,2) - coeff*c(2,2)
-      c(5,3) = c(5,3) - coeff*c(2,3)
-      c(5,4) = c(5,4) - coeff*c(2,4)
-      c(5,5) = c(5,5) - coeff*c(2,5)
-      r(5)   = r(5)   - coeff*r(2)
-
-
-      pivot = 1.00d0/lhs(3,3)
-      lhs(3,4) = lhs(3,4)*pivot
-      lhs(3,5) = lhs(3,5)*pivot
-      c(3,1) = c(3,1)*pivot
-      c(3,2) = c(3,2)*pivot
-      c(3,3) = c(3,3)*pivot
-      c(3,4) = c(3,4)*pivot
-      c(3,5) = c(3,5)*pivot
-      r(3)   = r(3)  *pivot
-
-      coeff = lhs(1,3)
-      lhs(1,4)= lhs(1,4) - coeff*lhs(3,4)
-      lhs(1,5)= lhs(1,5) - coeff*lhs(3,5)
-      c(1,1) = c(1,1) - coeff*c(3,1)
-      c(1,2) = c(1,2) - coeff*c(3,2)
-      c(1,3) = c(1,3) - coeff*c(3,3)
-      c(1,4) = c(1,4) - coeff*c(3,4)
-      c(1,5) = c(1,5) - coeff*c(3,5)
-      r(1)   = r(1)   - coeff*r(3)
-
-      coeff = lhs(2,3)
-      lhs(2,4)= lhs(2,4) - coeff*lhs(3,4)
-      lhs(2,5)= lhs(2,5) - coeff*lhs(3,5)
-      c(2,1) = c(2,1) - coeff*c(3,1)
-      c(2,2) = c(2,2) - coeff*c(3,2)
-      c(2,3) = c(2,3) - coeff*c(3,3)
-      c(2,4) = c(2,4) - coeff*c(3,4)
-      c(2,5) = c(2,5) - coeff*c(3,5)
-      r(2)   = r(2)   - coeff*r(3)
-
-      coeff = lhs(4,3)
-      lhs(4,4)= lhs(4,4) - coeff*lhs(3,4)
-      lhs(4,5)= lhs(4,5) - coeff*lhs(3,5)
-      c(4,1) = c(4,1) - coeff*c(3,1)
-      c(4,2) = c(4,2) - coeff*c(3,2)
-      c(4,3) = c(4,3) - coeff*c(3,3)
-      c(4,4) = c(4,4) - coeff*c(3,4)
-      c(4,5) = c(4,5) - coeff*c(3,5)
-      r(4)   = r(4)   - coeff*r(3)
-
-      coeff = lhs(5,3)
-      lhs(5,4)= lhs(5,4) - coeff*lhs(3,4)
-      lhs(5,5)= lhs(5,5) - coeff*lhs(3,5)
-      c(5,1) = c(5,1) - coeff*c(3,1)
-      c(5,2) = c(5,2) - coeff*c(3,2)
-      c(5,3) = c(5,3) - coeff*c(3,3)
-      c(5,4) = c(5,4) - coeff*c(3,4)
-      c(5,5) = c(5,5) - coeff*c(3,5)
-      r(5)   = r(5)   - coeff*r(3)
-
-
-      pivot = 1.00d0/lhs(4,4)
-      lhs(4,5) = lhs(4,5)*pivot
-      c(4,1) = c(4,1)*pivot
-      c(4,2) = c(4,2)*pivot
-      c(4,3) = c(4,3)*pivot
-      c(4,4) = c(4,4)*pivot
-      c(4,5) = c(4,5)*pivot
-      r(4)   = r(4)  *pivot
-
-      coeff = lhs(1,4)
-      lhs(1,5)= lhs(1,5) - coeff*lhs(4,5)
-      c(1,1) = c(1,1) - coeff*c(4,1)
-      c(1,2) = c(1,2) - coeff*c(4,2)
-      c(1,3) = c(1,3) - coeff*c(4,3)
-      c(1,4) = c(1,4) - coeff*c(4,4)
-      c(1,5) = c(1,5) - coeff*c(4,5)
-      r(1)   = r(1)   - coeff*r(4)
-
-      coeff = lhs(2,4)
-      lhs(2,5)= lhs(2,5) - coeff*lhs(4,5)
-      c(2,1) = c(2,1) - coeff*c(4,1)
-      c(2,2) = c(2,2) - coeff*c(4,2)
-      c(2,3) = c(2,3) - coeff*c(4,3)
-      c(2,4) = c(2,4) - coeff*c(4,4)
-      c(2,5) = c(2,5) - coeff*c(4,5)
-      r(2)   = r(2)   - coeff*r(4)
-
-      coeff = lhs(3,4)
-      lhs(3,5)= lhs(3,5) - coeff*lhs(4,5)
-      c(3,1) = c(3,1) - coeff*c(4,1)
-      c(3,2) = c(3,2) - coeff*c(4,2)
-      c(3,3) = c(3,3) - coeff*c(4,3)
-      c(3,4) = c(3,4) - coeff*c(4,4)
-      c(3,5) = c(3,5) - coeff*c(4,5)
-      r(3)   = r(3)   - coeff*r(4)
-
-      coeff = lhs(5,4)
-      lhs(5,5)= lhs(5,5) - coeff*lhs(4,5)
-      c(5,1) = c(5,1) - coeff*c(4,1)
-      c(5,2) = c(5,2) - coeff*c(4,2)
-      c(5,3) = c(5,3) - coeff*c(4,3)
-      c(5,4) = c(5,4) - coeff*c(4,4)
-      c(5,5) = c(5,5) - coeff*c(4,5)
-      r(5)   = r(5)   - coeff*r(4)
-
-
-      pivot = 1.00d0/lhs(5,5)
-      c(5,1) = c(5,1)*pivot
-      c(5,2) = c(5,2)*pivot
-      c(5,3) = c(5,3)*pivot
-      c(5,4) = c(5,4)*pivot
-      c(5,5) = c(5,5)*pivot
-      r(5)   = r(5)  *pivot
-
-      coeff = lhs(1,5)
-      c(1,1) = c(1,1) - coeff*c(5,1)
-      c(1,2) = c(1,2) - coeff*c(5,2)
-      c(1,3) = c(1,3) - coeff*c(5,3)
-      c(1,4) = c(1,4) - coeff*c(5,4)
-      c(1,5) = c(1,5) - coeff*c(5,5)
-      r(1)   = r(1)   - coeff*r(5)
-
-      coeff = lhs(2,5)
-      c(2,1) = c(2,1) - coeff*c(5,1)
-      c(2,2) = c(2,2) - coeff*c(5,2)
-      c(2,3) = c(2,3) - coeff*c(5,3)
-      c(2,4) = c(2,4) - coeff*c(5,4)
-      c(2,5) = c(2,5) - coeff*c(5,5)
-      r(2)   = r(2)   - coeff*r(5)
-
-      coeff = lhs(3,5)
-      c(3,1) = c(3,1) - coeff*c(5,1)
-      c(3,2) = c(3,2) - coeff*c(5,2)
-      c(3,3) = c(3,3) - coeff*c(5,3)
-      c(3,4) = c(3,4) - coeff*c(5,4)
-      c(3,5) = c(3,5) - coeff*c(5,5)
-      r(3)   = r(3)   - coeff*r(5)
-
-      coeff = lhs(4,5)
-      c(4,1) = c(4,1) - coeff*c(5,1)
-      c(4,2) = c(4,2) - coeff*c(5,2)
-      c(4,3) = c(4,3) - coeff*c(5,3)
-      c(4,4) = c(4,4) - coeff*c(5,4)
-      c(4,5) = c(4,5) - coeff*c(5,5)
-      r(4)   = r(4)   - coeff*r(5)
-
-
-      return
-      end
-
-
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine binvrhs( lhs,r )
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     
-c---------------------------------------------------------------------
-
-      implicit none
-
-      double precision pivot, coeff, lhs
-      dimension lhs(5,5)
-      double precision r(5)
-
-c---------------------------------------------------------------------
-c     
-c---------------------------------------------------------------------
-
-
-      pivot = 1.00d0/lhs(1,1)
-      lhs(1,2) = lhs(1,2)*pivot
-      lhs(1,3) = lhs(1,3)*pivot
-      lhs(1,4) = lhs(1,4)*pivot
-      lhs(1,5) = lhs(1,5)*pivot
-      r(1)   = r(1)  *pivot
-
-      coeff = lhs(2,1)
-      lhs(2,2)= lhs(2,2) - coeff*lhs(1,2)
-      lhs(2,3)= lhs(2,3) - coeff*lhs(1,3)
-      lhs(2,4)= lhs(2,4) - coeff*lhs(1,4)
-      lhs(2,5)= lhs(2,5) - coeff*lhs(1,5)
-      r(2)   = r(2)   - coeff*r(1)
-
-      coeff = lhs(3,1)
-      lhs(3,2)= lhs(3,2) - coeff*lhs(1,2)
-      lhs(3,3)= lhs(3,3) - coeff*lhs(1,3)
-      lhs(3,4)= lhs(3,4) - coeff*lhs(1,4)
-      lhs(3,5)= lhs(3,5) - coeff*lhs(1,5)
-      r(3)   = r(3)   - coeff*r(1)
-
-      coeff = lhs(4,1)
-      lhs(4,2)= lhs(4,2) - coeff*lhs(1,2)
-      lhs(4,3)= lhs(4,3) - coeff*lhs(1,3)
-      lhs(4,4)= lhs(4,4) - coeff*lhs(1,4)
-      lhs(4,5)= lhs(4,5) - coeff*lhs(1,5)
-      r(4)   = r(4)   - coeff*r(1)
-
-      coeff = lhs(5,1)
-      lhs(5,2)= lhs(5,2) - coeff*lhs(1,2)
-      lhs(5,3)= lhs(5,3) - coeff*lhs(1,3)
-      lhs(5,4)= lhs(5,4) - coeff*lhs(1,4)
-      lhs(5,5)= lhs(5,5) - coeff*lhs(1,5)
-      r(5)   = r(5)   - coeff*r(1)
-
-
-      pivot = 1.00d0/lhs(2,2)
-      lhs(2,3) = lhs(2,3)*pivot
-      lhs(2,4) = lhs(2,4)*pivot
-      lhs(2,5) = lhs(2,5)*pivot
-      r(2)   = r(2)  *pivot
-
-      coeff = lhs(1,2)
-      lhs(1,3)= lhs(1,3) - coeff*lhs(2,3)
-      lhs(1,4)= lhs(1,4) - coeff*lhs(2,4)
-      lhs(1,5)= lhs(1,5) - coeff*lhs(2,5)
-      r(1)   = r(1)   - coeff*r(2)
-
-      coeff = lhs(3,2)
-      lhs(3,3)= lhs(3,3) - coeff*lhs(2,3)
-      lhs(3,4)= lhs(3,4) - coeff*lhs(2,4)
-      lhs(3,5)= lhs(3,5) - coeff*lhs(2,5)
-      r(3)   = r(3)   - coeff*r(2)
-
-      coeff = lhs(4,2)
-      lhs(4,3)= lhs(4,3) - coeff*lhs(2,3)
-      lhs(4,4)= lhs(4,4) - coeff*lhs(2,4)
-      lhs(4,5)= lhs(4,5) - coeff*lhs(2,5)
-      r(4)   = r(4)   - coeff*r(2)
-
-      coeff = lhs(5,2)
-      lhs(5,3)= lhs(5,3) - coeff*lhs(2,3)
-      lhs(5,4)= lhs(5,4) - coeff*lhs(2,4)
-      lhs(5,5)= lhs(5,5) - coeff*lhs(2,5)
-      r(5)   = r(5)   - coeff*r(2)
-
-
-      pivot = 1.00d0/lhs(3,3)
-      lhs(3,4) = lhs(3,4)*pivot
-      lhs(3,5) = lhs(3,5)*pivot
-      r(3)   = r(3)  *pivot
-
-      coeff = lhs(1,3)
-      lhs(1,4)= lhs(1,4) - coeff*lhs(3,4)
-      lhs(1,5)= lhs(1,5) - coeff*lhs(3,5)
-      r(1)   = r(1)   - coeff*r(3)
-
-      coeff = lhs(2,3)
-      lhs(2,4)= lhs(2,4) - coeff*lhs(3,4)
-      lhs(2,5)= lhs(2,5) - coeff*lhs(3,5)
-      r(2)   = r(2)   - coeff*r(3)
-
-      coeff = lhs(4,3)
-      lhs(4,4)= lhs(4,4) - coeff*lhs(3,4)
-      lhs(4,5)= lhs(4,5) - coeff*lhs(3,5)
-      r(4)   = r(4)   - coeff*r(3)
-
-      coeff = lhs(5,3)
-      lhs(5,4)= lhs(5,4) - coeff*lhs(3,4)
-      lhs(5,5)= lhs(5,5) - coeff*lhs(3,5)
-      r(5)   = r(5)   - coeff*r(3)
-
-
-      pivot = 1.00d0/lhs(4,4)
-      lhs(4,5) = lhs(4,5)*pivot
-      r(4)   = r(4)  *pivot
-
-      coeff = lhs(1,4)
-      lhs(1,5)= lhs(1,5) - coeff*lhs(4,5)
-      r(1)   = r(1)   - coeff*r(4)
-
-      coeff = lhs(2,4)
-      lhs(2,5)= lhs(2,5) - coeff*lhs(4,5)
-      r(2)   = r(2)   - coeff*r(4)
-
-      coeff = lhs(3,4)
-      lhs(3,5)= lhs(3,5) - coeff*lhs(4,5)
-      r(3)   = r(3)   - coeff*r(4)
-
-      coeff = lhs(5,4)
-      lhs(5,5)= lhs(5,5) - coeff*lhs(4,5)
-      r(5)   = r(5)   - coeff*r(4)
-
-
-      pivot = 1.00d0/lhs(5,5)
-      r(5)   = r(5)  *pivot
-
-      coeff = lhs(1,5)
-      r(1)   = r(1)   - coeff*r(5)
-
-      coeff = lhs(2,5)
-      r(2)   = r(2)   - coeff*r(5)
-
-      coeff = lhs(3,5)
-      r(3)   = r(3)   - coeff*r(5)
-
-      coeff = lhs(4,5)
-      r(4)   = r(4)   - coeff*r(5)
-
-
-      return
-      end
-
-
-
diff --git a/examples/smpi/NAS/BT/verify.f b/examples/smpi/NAS/BT/verify.f
deleted file mode 100644 (file)
index 7dbc8a9..0000000
+++ /dev/null
@@ -1,435 +0,0 @@
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-        subroutine verify(no_time_steps, class, verified)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c  verification routine                         
-c---------------------------------------------------------------------
-
-        include 'header.h'
-        include 'mpinpb.h'
-
-        double precision xcrref(5),xceref(5),xcrdif(5),xcedif(5), 
-     >                   epsilon, xce(5), xcr(5), dtref
-        integer m, no_time_steps
-        character class
-        logical verified
-
-c---------------------------------------------------------------------
-c   tolerance level
-c---------------------------------------------------------------------
-        epsilon = 1.0d-08
-        verified = .true.
-
-c---------------------------------------------------------------------
-c   compute the error norm and the residual norm, and exit if not printing
-c---------------------------------------------------------------------
-
-        if (iotype .ne. 0) then
-           call accumulate_norms(xce)
-        else
-           call error_norm(xce)
-        endif
-
-        call copy_faces
-
-        call rhs_norm(xcr)
-
-        do m = 1, 5
-           xcr(m) = xcr(m) / dt
-        enddo
-
-        if (node .ne. 0) return
-
-        class = 'U'
-
-        do m = 1,5
-           xcrref(m) = 1.0
-           xceref(m) = 1.0
-        end do
-
-c---------------------------------------------------------------------
-c    reference data for 12X12X12 grids after 60 time steps, with DT = 1.0d-02
-c---------------------------------------------------------------------
-        if ( (grid_points(1)  .eq. 12     ) .and. 
-     >       (grid_points(2)  .eq. 12     ) .and.
-     >       (grid_points(3)  .eq. 12     ) .and.
-     >       (no_time_steps   .eq. 60    ))  then
-
-           class = 'S'
-           dtref = 1.0d-2
-
-c---------------------------------------------------------------------
-c  Reference values of RMS-norms of residual.
-c---------------------------------------------------------------------
-         xcrref(1) = 1.7034283709541311d-01
-         xcrref(2) = 1.2975252070034097d-02
-         xcrref(3) = 3.2527926989486055d-02
-         xcrref(4) = 2.6436421275166801d-02
-         xcrref(5) = 1.9211784131744430d-01
-
-c---------------------------------------------------------------------
-c  Reference values of RMS-norms of solution error.
-c---------------------------------------------------------------------
-
-         if (iotype .eq. 0) then
-           xceref(1) = 4.9976913345811579d-04
-           xceref(2) = 4.5195666782961927d-05
-           xceref(3) = 7.3973765172921357d-05
-           xceref(4) = 7.3821238632439731d-05
-           xceref(5) = 8.9269630987491446d-04
-         else
-           xceref(1) = 0.1149036328945d+02
-           xceref(2) = 0.9156788904727d+00
-           xceref(3) = 0.2857899428614d+01
-           xceref(4) = 0.2598273346734d+01
-           xceref(5) = 0.2652795397547d+02
-         endif
-
-c---------------------------------------------------------------------
-c    reference data for 24X24X24 grids after 200 time steps, with DT = 0.8d-3
-c---------------------------------------------------------------------
-        elseif ( (grid_points(1) .eq. 24) .and. 
-     >           (grid_points(2) .eq. 24) .and.
-     >           (grid_points(3) .eq. 24) .and.
-     >           (no_time_steps . eq. 200) ) then
-
-           class = 'W'
-           dtref = 0.8d-3
-c---------------------------------------------------------------------
-c  Reference values of RMS-norms of residual.
-c---------------------------------------------------------------------
-           xcrref(1) = 0.1125590409344d+03
-           xcrref(2) = 0.1180007595731d+02
-           xcrref(3) = 0.2710329767846d+02
-           xcrref(4) = 0.2469174937669d+02
-           xcrref(5) = 0.2638427874317d+03
-
-c---------------------------------------------------------------------
-c  Reference values of RMS-norms of solution error.
-c---------------------------------------------------------------------
-
-         if (iotype .eq. 0) then
-           xceref(1) = 0.4419655736008d+01
-           xceref(2) = 0.4638531260002d+00
-           xceref(3) = 0.1011551749967d+01
-           xceref(4) = 0.9235878729944d+00
-           xceref(5) = 0.1018045837718d+02
-         else
-           xceref(1) = 0.6729594398612d+02
-           xceref(2) = 0.5264523081690d+01
-           xceref(3) = 0.1677107142637d+02
-           xceref(4) = 0.1508721463436d+02
-           xceref(5) = 0.1477018363393d+03
-         endif
-
-
-c---------------------------------------------------------------------
-c    reference data for 64X64X64 grids after 200 time steps, with DT = 0.8d-3
-c---------------------------------------------------------------------
-        elseif ( (grid_points(1) .eq. 64) .and. 
-     >           (grid_points(2) .eq. 64) .and.
-     >           (grid_points(3) .eq. 64) .and.
-     >           (no_time_steps . eq. 200) ) then
-
-           class = 'A'
-           dtref = 0.8d-3
-c---------------------------------------------------------------------
-c  Reference values of RMS-norms of residual.
-c---------------------------------------------------------------------
-         xcrref(1) = 1.0806346714637264d+02
-         xcrref(2) = 1.1319730901220813d+01
-         xcrref(3) = 2.5974354511582465d+01
-         xcrref(4) = 2.3665622544678910d+01
-         xcrref(5) = 2.5278963211748344d+02
-
-c---------------------------------------------------------------------
-c  Reference values of RMS-norms of solution error.
-c---------------------------------------------------------------------
-
-         if (iotype .eq. 0) then
-           xceref(1) = 4.2348416040525025d+00
-           xceref(2) = 4.4390282496995698d-01
-           xceref(3) = 9.6692480136345650d-01
-           xceref(4) = 8.8302063039765474d-01
-           xceref(5) = 9.7379901770829278d+00
-         else
-           xceref(1) = 0.6482218724961d+02
-           xceref(2) = 0.5066461714527d+01
-           xceref(3) = 0.1613931961359d+02
-           xceref(4) = 0.1452010201481d+02
-           xceref(5) = 0.1420099377681d+03
-         endif
-
-c---------------------------------------------------------------------
-c    reference data for 102X102X102 grids after 200 time steps,
-c    with DT = 3.0d-04
-c---------------------------------------------------------------------
-        elseif ( (grid_points(1) .eq. 102) .and. 
-     >           (grid_points(2) .eq. 102) .and.
-     >           (grid_points(3) .eq. 102) .and.
-     >           (no_time_steps . eq. 200) ) then
-
-           class = 'B'
-           dtref = 3.0d-4
-
-c---------------------------------------------------------------------
-c  Reference values of RMS-norms of residual.
-c---------------------------------------------------------------------
-         xcrref(1) = 1.4233597229287254d+03
-         xcrref(2) = 9.9330522590150238d+01
-         xcrref(3) = 3.5646025644535285d+02
-         xcrref(4) = 3.2485447959084092d+02
-         xcrref(5) = 3.2707541254659363d+03
-
-c---------------------------------------------------------------------
-c  Reference values of RMS-norms of solution error.
-c---------------------------------------------------------------------
-
-         if (iotype .eq. 0) then
-           xceref(1) = 5.2969847140936856d+01
-           xceref(2) = 4.4632896115670668d+00
-           xceref(3) = 1.3122573342210174d+01
-           xceref(4) = 1.2006925323559144d+01
-           xceref(5) = 1.2459576151035986d+02
-         else
-           xceref(1) = 0.1477545106464d+03
-           xceref(2) = 0.1108895555053d+02
-           xceref(3) = 0.3698065590331d+02
-           xceref(4) = 0.3310505581440d+02
-           xceref(5) = 0.3157928282563d+03
-         endif
-
-c---------------------------------------------------------------------
-c    reference data for 162X162X162 grids after 200 time steps,
-c    with DT = 1.0d-04
-c---------------------------------------------------------------------
-        elseif ( (grid_points(1) .eq. 162) .and. 
-     >           (grid_points(2) .eq. 162) .and.
-     >           (grid_points(3) .eq. 162) .and.
-     >           (no_time_steps . eq. 200) ) then
-
-           class = 'C'
-           dtref = 1.0d-4
-
-c---------------------------------------------------------------------
-c  Reference values of RMS-norms of residual.
-c---------------------------------------------------------------------
-         xcrref(1) = 0.62398116551764615d+04
-         xcrref(2) = 0.50793239190423964d+03
-         xcrref(3) = 0.15423530093013596d+04
-         xcrref(4) = 0.13302387929291190d+04
-         xcrref(5) = 0.11604087428436455d+05
-
-c---------------------------------------------------------------------
-c  Reference values of RMS-norms of solution error.
-c---------------------------------------------------------------------
-
-         if (iotype .eq. 0) then
-           xceref(1) = 0.16462008369091265d+03
-           xceref(2) = 0.11497107903824313d+02
-           xceref(3) = 0.41207446207461508d+02
-           xceref(4) = 0.37087651059694167d+02
-           xceref(5) = 0.36211053051841265d+03
-         else
-           xceref(1) = 0.2597156483475d+03
-           xceref(2) = 0.1985384289495d+02
-           xceref(3) = 0.6517950485788d+02
-           xceref(4) = 0.5757235541520d+02
-           xceref(5) = 0.5215668188726d+03
-         endif 
-
-
-c---------------------------------------------------------------------
-c    reference data for 408x408x408 grids after 250 time steps,
-c    with DT = 0.2d-04
-c---------------------------------------------------------------------
-        elseif ( (grid_points(1) .eq. 408) .and. 
-     >           (grid_points(2) .eq. 408) .and.
-     >           (grid_points(3) .eq. 408) .and.
-     >           (no_time_steps . eq. 250) ) then
-
-           class = 'D'
-           dtref = 0.2d-4
-
-c---------------------------------------------------------------------
-c  Reference values of RMS-norms of residual.
-c---------------------------------------------------------------------
-         xcrref(1) = 0.2533188551738d+05
-         xcrref(2) = 0.2346393716980d+04
-         xcrref(3) = 0.6294554366904d+04
-         xcrref(4) = 0.5352565376030d+04
-         xcrref(5) = 0.3905864038618d+05
-
-c---------------------------------------------------------------------
-c  Reference values of RMS-norms of solution error.
-c---------------------------------------------------------------------
-
-         if (iotype .eq. 0) then
-           xceref(1) = 0.3100009377557d+03
-           xceref(2) = 0.2424086324913d+02
-           xceref(3) = 0.7782212022645d+02
-           xceref(4) = 0.6835623860116d+02
-           xceref(5) = 0.6065737200368d+03
-         else
-           xceref(1) = 0.3813781566713d+03
-           xceref(2) = 0.3160872966198d+02
-           xceref(3) = 0.9593576357290d+02
-           xceref(4) = 0.8363391989815d+02
-           xceref(5) = 0.7063466087423d+03
-         endif
-
-
-c---------------------------------------------------------------------
-c    reference data for 1020x1020x1020 grids after 250 time steps,
-c    with DT = 0.4d-05
-c---------------------------------------------------------------------
-        elseif ( (grid_points(1) .eq. 1020) .and. 
-     >           (grid_points(2) .eq. 1020) .and.
-     >           (grid_points(3) .eq. 1020) .and.
-     >           (no_time_steps . eq. 250) ) then
-
-           class = 'E'
-           dtref = 0.4d-5
-
-c---------------------------------------------------------------------
-c  Reference values of RMS-norms of residual.
-c---------------------------------------------------------------------
-         xcrref(1) = 0.9795372484517d+05
-         xcrref(2) = 0.9739814511521d+04
-         xcrref(3) = 0.2467606342965d+05
-         xcrref(4) = 0.2092419572860d+05
-         xcrref(5) = 0.1392138856939d+06
-
-c---------------------------------------------------------------------
-c  Reference values of RMS-norms of solution error.
-c---------------------------------------------------------------------
-
-         if (iotype .eq. 0) then
-           xceref(1) = 0.4327562208414d+03
-           xceref(2) = 0.3699051964887d+02
-           xceref(3) = 0.1089845040954d+03
-           xceref(4) = 0.9462517622043d+02
-           xceref(5) = 0.7765512765309d+03
-         else
-c  wr_interval = 5
-           xceref(1) = 0.4729898413058d+03
-           xceref(2) = 0.4145899331704d+02
-           xceref(3) = 0.1192850917138d+03
-           xceref(4) = 0.1032746026932d+03
-           xceref(5) = 0.8270322177634d+03
-c  wr_interval = 10
-c          xceref(1) = 0.4718135916251d+03
-c          xceref(2) = 0.4132620259096d+02
-c          xceref(3) = 0.1189831133503d+03
-c          xceref(4) = 0.1030212798803d+03
-c          xceref(5) = 0.8255924078458d+03
-        endif
-
-        else
-           verified = .false.
-        endif
-
-c---------------------------------------------------------------------
-c    verification test for residuals if gridsize is one of 
-c    the defined grid sizes above (class .ne. 'U')
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c    Compute the difference of solution values and the known reference 
-c    values.
-c---------------------------------------------------------------------
-        do m = 1, 5
-           
-           xcrdif(m) = dabs((xcr(m)-xcrref(m))/xcrref(m)) 
-           xcedif(m) = dabs((xce(m)-xceref(m))/xceref(m))
-           
-        enddo
-
-c---------------------------------------------------------------------
-c    Output the comparison of computed results to known cases.
-c---------------------------------------------------------------------
-
-        if (class .ne. 'U') then
-           write(*, 1990) class
- 1990      format(' Verification being performed for class ', a)
-           write (*,2000) epsilon
- 2000      format(' accuracy setting for epsilon = ', E20.13)
-           verified = (dabs(dt-dtref) .le. epsilon)
-           if (.not.verified) then  
-              verified = .false.
-              class = 'U'
-              write (*,1000) dtref
- 1000         format(' DT does not match the reference value of ', 
-     >                 E15.8)
-           endif
-        else 
-           write(*, 1995)
- 1995      format(' Unknown class')
-        endif
-
-
-        if (class .ne. 'U') then
-           write (*,2001) 
-        else
-           write (*, 2005)
-        endif
-
- 2001   format(' Comparison of RMS-norms of residual')
- 2005   format(' RMS-norms of residual')
-        do m = 1, 5
-           if (class .eq. 'U') then
-              write(*, 2015) m, xcr(m)
-           else if (xcrdif(m) .le. epsilon) then
-              write (*,2011) m,xcr(m),xcrref(m),xcrdif(m)
-           else 
-              verified = .false.
-              write (*,2010) m,xcr(m),xcrref(m),xcrdif(m)
-           endif
-        enddo
-
-        if (class .ne. 'U') then
-           write (*,2002)
-        else
-           write (*,2006)
-        endif
- 2002   format(' Comparison of RMS-norms of solution error')
- 2006   format(' RMS-norms of solution error')
-        
-        do m = 1, 5
-           if (class .eq. 'U') then
-              write(*, 2015) m, xce(m)
-           else if (xcedif(m) .le. epsilon) then
-              write (*,2011) m,xce(m),xceref(m),xcedif(m)
-           else
-              verified = .false.
-              write (*,2010) m,xce(m),xceref(m),xcedif(m)
-           endif
-        enddo
-        
- 2010   format(' FAILURE: ', i2, E20.13, E20.13, E20.13)
- 2011   format('          ', i2, E20.13, E20.13, E20.13)
- 2015   format('          ', i2, E20.13)
-        
-        if (class .eq. 'U') then
-           write(*, 2022)
-           write(*, 2023)
- 2022      format(' No reference values provided')
- 2023      format(' No verification performed')
-        else if (verified) then
-           write(*, 2020)
- 2020      format(' Verification Successful')
-        else
-           write(*, 2021)
- 2021      format(' Verification failed')
-        endif
-
-        return
-
-
-        end
diff --git a/examples/smpi/NAS/BT/work_lhs.h b/examples/smpi/NAS/BT/work_lhs.h
deleted file mode 100644 (file)
index d9bc9e4..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-c
-c  work_lhs.h
-c
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      double precision fjac(5, 5, -2:MAX_CELL_DIM+1),
-     >                 njac(5, 5, -2:MAX_CELL_DIM+1),
-     >                 lhsa(5, 5, -1:MAX_CELL_DIM),
-     >                 lhsb(5, 5, -1:MAX_CELL_DIM),
-     >                 tmp1, tmp2, tmp3
-      common /work_lhs/ fjac, njac, lhsa, lhsb, tmp1, tmp2, tmp3
diff --git a/examples/smpi/NAS/BT/work_lhs_vec.h b/examples/smpi/NAS/BT/work_lhs_vec.h
deleted file mode 100644 (file)
index a97054f..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-c
-c  work_lhs_vec.h
-c
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      double precision fjac(5, 5, -2:MAX_CELL_DIM+1, -2:MAX_CELL_DIM+1),
-     >                 njac(5, 5, -2:MAX_CELL_DIM+1, -2:MAX_CELL_DIM+1),
-     >                 lhsa(5, 5, -1:MAX_CELL_DIM,   -1:MAX_CELL_DIM),
-     >                 lhsb(5, 5, -1:MAX_CELL_DIM,   -1:MAX_CELL_DIM),
-     >                 tmp1, tmp2, tmp3
-      common /work_lhs/ fjac, njac, lhsa, lhsb, tmp1, tmp2, tmp3
diff --git a/examples/smpi/NAS/BT/x_solve.f b/examples/smpi/NAS/BT/x_solve.f
deleted file mode 100644 (file)
index 5386732..0000000
+++ /dev/null
@@ -1,761 +0,0 @@
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine x_solve
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     
-c     Performs line solves in X direction by first factoring
-c     the block-tridiagonal matrix into an upper triangular matrix, 
-c     and then performing back substitution to solve for the unknow
-c     vectors of each line.  
-c     
-c     Make sure we treat elements zero to cell_size in the direction
-c     of the sweep.
-c     
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-      integer  c, istart, stage,
-     >     first, last, recv_id, error, r_status(MPI_STATUS_SIZE),
-     >     isize,jsize,ksize,send_id
-
-      istart = 0
-
-c---------------------------------------------------------------------
-c     in our terminology stage is the number of the cell in the x-direction
-c     i.e. stage = 1 means the start of the line stage=ncells means end
-c---------------------------------------------------------------------
-      do stage = 1,ncells
-         c = slice(1,stage)
-         isize = cell_size(1,c) - 1
-         jsize = cell_size(2,c) - 1
-         ksize = cell_size(3,c) - 1
-         
-c---------------------------------------------------------------------
-c     set last-cell flag
-c---------------------------------------------------------------------
-         if (stage .eq. ncells) then
-            last = 1
-         else
-            last = 0
-         endif
-
-         if (stage .eq. 1) then
-c---------------------------------------------------------------------
-c     This is the first cell, so solve without receiving data
-c---------------------------------------------------------------------
-            first = 1
-c            call lhsx(c)
-            call x_solve_cell(first,last,c)
-         else
-c---------------------------------------------------------------------
-c     Not the first cell of this line, so receive info from
-c     processor working on preceeding cell
-c---------------------------------------------------------------------
-            first = 0
-            call x_receive_solve_info(recv_id,c)
-c---------------------------------------------------------------------
-c     overlap computations and communications
-c---------------------------------------------------------------------
-c            call lhsx(c)
-c---------------------------------------------------------------------
-c     wait for completion
-c---------------------------------------------------------------------
-            call mpi_wait(send_id,r_status,error)
-            call mpi_wait(recv_id,r_status,error)
-c---------------------------------------------------------------------
-c     install C'(istart) and rhs'(istart) to be used in this cell
-c---------------------------------------------------------------------
-            call x_unpack_solve_info(c)
-            call x_solve_cell(first,last,c)
-         endif
-
-         if (last .eq. 0) call x_send_solve_info(send_id,c)
-      enddo
-
-c---------------------------------------------------------------------
-c     now perform backsubstitution in reverse direction
-c---------------------------------------------------------------------
-      do stage = ncells, 1, -1
-         c = slice(1,stage)
-         first = 0
-         last = 0
-         if (stage .eq. 1) first = 1
-         if (stage .eq. ncells) then
-            last = 1
-c---------------------------------------------------------------------
-c     last cell, so perform back substitute without waiting
-c---------------------------------------------------------------------
-            call x_backsubstitute(first, last,c)
-         else
-            call x_receive_backsub_info(recv_id,c)
-            call mpi_wait(send_id,r_status,error)
-            call mpi_wait(recv_id,r_status,error)
-            call x_unpack_backsub_info(c)
-            call x_backsubstitute(first,last,c)
-         endif
-         if (first .eq. 0) call x_send_backsub_info(send_id,c)
-      enddo
-
-
-      return
-      end
-      
-      
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine x_unpack_solve_info(c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     unpack C'(-1) and rhs'(-1) for
-c     all j and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      integer j,k,m,n,ptr,c,istart 
-
-      istart = 0
-      ptr = 0
-      do k=0,KMAX-1
-         do j=0,JMAX-1
-            do m=1,BLOCK_SIZE
-               do n=1,BLOCK_SIZE
-                  lhsc(m,n,istart-1,j,k,c) = out_buffer(ptr+n)
-               enddo
-               ptr = ptr+BLOCK_SIZE
-            enddo
-            do n=1,BLOCK_SIZE
-               rhs(n,istart-1,j,k,c) = out_buffer(ptr+n)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      
-      subroutine x_send_solve_info(send_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     pack up and send C'(iend) and rhs'(iend) for
-c     all j and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer j,k,m,n,isize,ptr,c,jp,kp
-      integer error,send_id,buffer_size 
-
-      isize = cell_size(1,c)-1
-      jp = cell_coord(2,c) - 1
-      kp = cell_coord(3,c) - 1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
-     >     (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE)
-
-c---------------------------------------------------------------------
-c     pack up buffer
-c---------------------------------------------------------------------
-      ptr = 0
-      do k=0,KMAX-1
-         do j=0,JMAX-1
-            do m=1,BLOCK_SIZE
-               do n=1,BLOCK_SIZE
-                  in_buffer(ptr+n) = lhsc(m,n,isize,j,k,c)
-               enddo
-               ptr = ptr+BLOCK_SIZE
-            enddo
-            do n=1,BLOCK_SIZE
-               in_buffer(ptr+n) = rhs(n,isize,j,k,c)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-
-c---------------------------------------------------------------------
-c     send buffer 
-c---------------------------------------------------------------------
-      call mpi_isend(in_buffer, buffer_size,
-     >     dp_type, successor(1),
-     >     WEST+jp+kp*NCELLS, comm_solve,
-     >     send_id,error)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine x_send_backsub_info(send_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     pack up and send U(istart) for all j and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer j,k,n,ptr,c,istart,jp,kp
-      integer error,send_id,buffer_size
-
-c---------------------------------------------------------------------
-c     Send element 0 to previous processor
-c---------------------------------------------------------------------
-      istart = 0
-      jp = cell_coord(2,c)-1
-      kp = cell_coord(3,c)-1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE
-      ptr = 0
-      do k=0,KMAX-1
-         do j=0,JMAX-1
-            do n=1,BLOCK_SIZE
-               in_buffer(ptr+n) = rhs(n,istart,j,k,c)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-      call mpi_isend(in_buffer, buffer_size,
-     >     dp_type, predecessor(1), 
-     >     EAST+jp+kp*NCELLS, comm_solve, 
-     >     send_id,error)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine x_unpack_backsub_info(c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     unpack U(isize) for all j and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      integer j,k,n,ptr,c
-
-      ptr = 0
-      do k=0,KMAX-1
-         do j=0,JMAX-1
-            do n=1,BLOCK_SIZE
-               backsub_info(n,j,k,c) = out_buffer(ptr+n)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine x_receive_backsub_info(recv_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     post mpi receives
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer error,recv_id,jp,kp,c,buffer_size
-      jp = cell_coord(2,c) - 1
-      kp = cell_coord(3,c) - 1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE
-      call mpi_irecv(out_buffer, buffer_size,
-     >     dp_type, successor(1), 
-     >     EAST+jp+kp*NCELLS, comm_solve, 
-     >     recv_id, error)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine x_receive_solve_info(recv_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     post mpi receives 
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer jp,kp,recv_id,error,c,buffer_size
-      jp = cell_coord(2,c) - 1
-      kp = cell_coord(3,c) - 1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
-     >     (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE)
-      call mpi_irecv(out_buffer, buffer_size,
-     >     dp_type, predecessor(1), 
-     >     WEST+jp+kp*NCELLS,  comm_solve, 
-     >     recv_id, error)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      
-      subroutine x_backsubstitute(first, last, c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     back solve: if last cell, then generate U(isize)=rhs(isize)
-c     else assume U(isize) is loaded in un pack backsub_info
-c     so just use it
-c     after call u(istart) will be sent to next cell
-c---------------------------------------------------------------------
-
-      include 'header.h'
-
-      integer first, last, c, i, j, k
-      integer m,n,isize,jsize,ksize,istart
-      
-      istart = 0
-      isize = cell_size(1,c)-1
-      jsize = cell_size(2,c)-end(2,c)-1      
-      ksize = cell_size(3,c)-end(3,c)-1
-      if (last .eq. 0) then
-         do k=start(3,c),ksize
-            do j=start(2,c),jsize
-c---------------------------------------------------------------------
-c     U(isize) uses info from previous cell if not last cell
-c---------------------------------------------------------------------
-               do m=1,BLOCK_SIZE
-                  do n=1,BLOCK_SIZE
-                     rhs(m,isize,j,k,c) = rhs(m,isize,j,k,c) 
-     >                    - lhsc(m,n,isize,j,k,c)*
-     >                    backsub_info(n,j,k,c)
-c---------------------------------------------------------------------
-c     rhs(m,isize,j,k,c) = rhs(m,isize,j,k,c) 
-c     $                    - lhsc(m,n,isize,j,k,c)*rhs(n,isize+1,j,k,c)
-c---------------------------------------------------------------------
-                  enddo
-               enddo
-            enddo
-         enddo
-      endif
-      do k=start(3,c),ksize
-         do j=start(2,c),jsize
-            do i=isize-1,istart,-1
-               do m=1,BLOCK_SIZE
-                  do n=1,BLOCK_SIZE
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) 
-     >                    - lhsc(m,n,i,j,k,c)*rhs(n,i+1,j,k,c)
-                  enddo
-               enddo
-            enddo
-         enddo
-      enddo
-
-      return
-      end
-
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine x_solve_cell(first,last,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     performs guaussian elimination on this cell.
-c     
-c     assumes that unpacking routines for non-first cells 
-c     preload C' and rhs' from previous cell.
-c     
-c     assumed send happens outside this routine, but that
-c     c'(IMAX) and rhs'(IMAX) will be sent to next cell
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'work_lhs.h'
-
-      integer first,last,c
-      integer i,j,k,isize,ksize,jsize,istart
-
-      istart = 0
-      isize = cell_size(1,c)-1
-      jsize = cell_size(2,c)-end(2,c)-1
-      ksize = cell_size(3,c)-end(3,c)-1
-
-      call lhsabinit(lhsa, lhsb, isize)
-
-      do k=start(3,c),ksize 
-         do j=start(2,c),jsize
-
-c---------------------------------------------------------------------
-c     This function computes the left hand side in the xi-direction
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     determine a (labeled f) and n jacobians for cell c
-c---------------------------------------------------------------------
-            do i = start(1,c)-1, cell_size(1,c) - end(1,c)
-
-               tmp1 = rho_i(i,j,k,c)
-               tmp2 = tmp1 * tmp1
-               tmp3 = tmp1 * tmp2
-c---------------------------------------------------------------------
-c     
-c---------------------------------------------------------------------
-               fjac(1,1,i) = 0.0d+00
-               fjac(1,2,i) = 1.0d+00
-               fjac(1,3,i) = 0.0d+00
-               fjac(1,4,i) = 0.0d+00
-               fjac(1,5,i) = 0.0d+00
-
-               fjac(2,1,i) = -(u(2,i,j,k,c) * tmp2 * 
-     >              u(2,i,j,k,c))
-     >              + c2 * qs(i,j,k,c)
-               fjac(2,2,i) = ( 2.0d+00 - c2 )
-     >              * ( u(2,i,j,k,c) * tmp1 )
-               fjac(2,3,i) = - c2 * ( u(3,i,j,k,c) * tmp1 )
-               fjac(2,4,i) = - c2 * ( u(4,i,j,k,c) * tmp1 )
-               fjac(2,5,i) = c2
-
-               fjac(3,1,i) = - ( u(2,i,j,k,c)*u(3,i,j,k,c) ) * tmp2
-               fjac(3,2,i) = u(3,i,j,k,c) * tmp1
-               fjac(3,3,i) = u(2,i,j,k,c) * tmp1
-               fjac(3,4,i) = 0.0d+00
-               fjac(3,5,i) = 0.0d+00
-
-               fjac(4,1,i) = - ( u(2,i,j,k,c)*u(4,i,j,k,c) ) * tmp2
-               fjac(4,2,i) = u(4,i,j,k,c) * tmp1
-               fjac(4,3,i) = 0.0d+00
-               fjac(4,4,i) = u(2,i,j,k,c) * tmp1
-               fjac(4,5,i) = 0.0d+00
-
-               fjac(5,1,i) = ( c2 * 2.0d0 * qs(i,j,k,c)
-     >              - c1 * ( u(5,i,j,k,c) * tmp1 ) )
-     >              * ( u(2,i,j,k,c) * tmp1 )
-               fjac(5,2,i) = c1 *  u(5,i,j,k,c) * tmp1 
-     >              - c2
-     >              * ( u(2,i,j,k,c)*u(2,i,j,k,c) * tmp2
-     >              + qs(i,j,k,c) )
-               fjac(5,3,i) = - c2 * ( u(3,i,j,k,c)*u(2,i,j,k,c) )
-     >              * tmp2
-               fjac(5,4,i) = - c2 * ( u(4,i,j,k,c)*u(2,i,j,k,c) )
-     >              * tmp2
-               fjac(5,5,i) = c1 * ( u(2,i,j,k,c) * tmp1 )
-
-               njac(1,1,i) = 0.0d+00
-               njac(1,2,i) = 0.0d+00
-               njac(1,3,i) = 0.0d+00
-               njac(1,4,i) = 0.0d+00
-               njac(1,5,i) = 0.0d+00
-
-               njac(2,1,i) = - con43 * c3c4 * tmp2 * u(2,i,j,k,c)
-               njac(2,2,i) =   con43 * c3c4 * tmp1
-               njac(2,3,i) =   0.0d+00
-               njac(2,4,i) =   0.0d+00
-               njac(2,5,i) =   0.0d+00
-
-               njac(3,1,i) = - c3c4 * tmp2 * u(3,i,j,k,c)
-               njac(3,2,i) =   0.0d+00
-               njac(3,3,i) =   c3c4 * tmp1
-               njac(3,4,i) =   0.0d+00
-               njac(3,5,i) =   0.0d+00
-
-               njac(4,1,i) = - c3c4 * tmp2 * u(4,i,j,k,c)
-               njac(4,2,i) =   0.0d+00 
-               njac(4,3,i) =   0.0d+00
-               njac(4,4,i) =   c3c4 * tmp1
-               njac(4,5,i) =   0.0d+00
-
-               njac(5,1,i) = - ( con43 * c3c4
-     >              - c1345 ) * tmp3 * (u(2,i,j,k,c)**2)
-     >              - ( c3c4 - c1345 ) * tmp3 * (u(3,i,j,k,c)**2)
-     >              - ( c3c4 - c1345 ) * tmp3 * (u(4,i,j,k,c)**2)
-     >              - c1345 * tmp2 * u(5,i,j,k,c)
-
-               njac(5,2,i) = ( con43 * c3c4
-     >              - c1345 ) * tmp2 * u(2,i,j,k,c)
-               njac(5,3,i) = ( c3c4 - c1345 ) * tmp2 * u(3,i,j,k,c)
-               njac(5,4,i) = ( c3c4 - c1345 ) * tmp2 * u(4,i,j,k,c)
-               njac(5,5,i) = ( c1345 ) * tmp1
-
-            enddo
-c---------------------------------------------------------------------
-c     now jacobians set, so form left hand side in x direction
-c---------------------------------------------------------------------
-            do i = start(1,c), isize - end(1,c)
-
-               tmp1 = dt * tx1
-               tmp2 = dt * tx2
-
-               lhsa(1,1,i) = - tmp2 * fjac(1,1,i-1)
-     >              - tmp1 * njac(1,1,i-1)
-     >              - tmp1 * dx1 
-               lhsa(1,2,i) = - tmp2 * fjac(1,2,i-1)
-     >              - tmp1 * njac(1,2,i-1)
-               lhsa(1,3,i) = - tmp2 * fjac(1,3,i-1)
-     >              - tmp1 * njac(1,3,i-1)
-               lhsa(1,4,i) = - tmp2 * fjac(1,4,i-1)
-     >              - tmp1 * njac(1,4,i-1)
-               lhsa(1,5,i) = - tmp2 * fjac(1,5,i-1)
-     >              - tmp1 * njac(1,5,i-1)
-
-               lhsa(2,1,i) = - tmp2 * fjac(2,1,i-1)
-     >              - tmp1 * njac(2,1,i-1)
-               lhsa(2,2,i) = - tmp2 * fjac(2,2,i-1)
-     >              - tmp1 * njac(2,2,i-1)
-     >              - tmp1 * dx2
-               lhsa(2,3,i) = - tmp2 * fjac(2,3,i-1)
-     >              - tmp1 * njac(2,3,i-1)
-               lhsa(2,4,i) = - tmp2 * fjac(2,4,i-1)
-     >              - tmp1 * njac(2,4,i-1)
-               lhsa(2,5,i) = - tmp2 * fjac(2,5,i-1)
-     >              - tmp1 * njac(2,5,i-1)
-
-               lhsa(3,1,i) = - tmp2 * fjac(3,1,i-1)
-     >              - tmp1 * njac(3,1,i-1)
-               lhsa(3,2,i) = - tmp2 * fjac(3,2,i-1)
-     >              - tmp1 * njac(3,2,i-1)
-               lhsa(3,3,i) = - tmp2 * fjac(3,3,i-1)
-     >              - tmp1 * njac(3,3,i-1)
-     >              - tmp1 * dx3 
-               lhsa(3,4,i) = - tmp2 * fjac(3,4,i-1)
-     >              - tmp1 * njac(3,4,i-1)
-               lhsa(3,5,i) = - tmp2 * fjac(3,5,i-1)
-     >              - tmp1 * njac(3,5,i-1)
-
-               lhsa(4,1,i) = - tmp2 * fjac(4,1,i-1)
-     >              - tmp1 * njac(4,1,i-1)
-               lhsa(4,2,i) = - tmp2 * fjac(4,2,i-1)
-     >              - tmp1 * njac(4,2,i-1)
-               lhsa(4,3,i) = - tmp2 * fjac(4,3,i-1)
-     >              - tmp1 * njac(4,3,i-1)
-               lhsa(4,4,i) = - tmp2 * fjac(4,4,i-1)
-     >              - tmp1 * njac(4,4,i-1)
-     >              - tmp1 * dx4
-               lhsa(4,5,i) = - tmp2 * fjac(4,5,i-1)
-     >              - tmp1 * njac(4,5,i-1)
-
-               lhsa(5,1,i) = - tmp2 * fjac(5,1,i-1)
-     >              - tmp1 * njac(5,1,i-1)
-               lhsa(5,2,i) = - tmp2 * fjac(5,2,i-1)
-     >              - tmp1 * njac(5,2,i-1)
-               lhsa(5,3,i) = - tmp2 * fjac(5,3,i-1)
-     >              - tmp1 * njac(5,3,i-1)
-               lhsa(5,4,i) = - tmp2 * fjac(5,4,i-1)
-     >              - tmp1 * njac(5,4,i-1)
-               lhsa(5,5,i) = - tmp2 * fjac(5,5,i-1)
-     >              - tmp1 * njac(5,5,i-1)
-     >              - tmp1 * dx5
-
-               lhsb(1,1,i) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(1,1,i)
-     >              + tmp1 * 2.0d+00 * dx1
-               lhsb(1,2,i) = tmp1 * 2.0d+00 * njac(1,2,i)
-               lhsb(1,3,i) = tmp1 * 2.0d+00 * njac(1,3,i)
-               lhsb(1,4,i) = tmp1 * 2.0d+00 * njac(1,4,i)
-               lhsb(1,5,i) = tmp1 * 2.0d+00 * njac(1,5,i)
-
-               lhsb(2,1,i) = tmp1 * 2.0d+00 * njac(2,1,i)
-               lhsb(2,2,i) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(2,2,i)
-     >              + tmp1 * 2.0d+00 * dx2
-               lhsb(2,3,i) = tmp1 * 2.0d+00 * njac(2,3,i)
-               lhsb(2,4,i) = tmp1 * 2.0d+00 * njac(2,4,i)
-               lhsb(2,5,i) = tmp1 * 2.0d+00 * njac(2,5,i)
-
-               lhsb(3,1,i) = tmp1 * 2.0d+00 * njac(3,1,i)
-               lhsb(3,2,i) = tmp1 * 2.0d+00 * njac(3,2,i)
-               lhsb(3,3,i) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(3,3,i)
-     >              + tmp1 * 2.0d+00 * dx3
-               lhsb(3,4,i) = tmp1 * 2.0d+00 * njac(3,4,i)
-               lhsb(3,5,i) = tmp1 * 2.0d+00 * njac(3,5,i)
-
-               lhsb(4,1,i) = tmp1 * 2.0d+00 * njac(4,1,i)
-               lhsb(4,2,i) = tmp1 * 2.0d+00 * njac(4,2,i)
-               lhsb(4,3,i) = tmp1 * 2.0d+00 * njac(4,3,i)
-               lhsb(4,4,i) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(4,4,i)
-     >              + tmp1 * 2.0d+00 * dx4
-               lhsb(4,5,i) = tmp1 * 2.0d+00 * njac(4,5,i)
-
-               lhsb(5,1,i) = tmp1 * 2.0d+00 * njac(5,1,i)
-               lhsb(5,2,i) = tmp1 * 2.0d+00 * njac(5,2,i)
-               lhsb(5,3,i) = tmp1 * 2.0d+00 * njac(5,3,i)
-               lhsb(5,4,i) = tmp1 * 2.0d+00 * njac(5,4,i)
-               lhsb(5,5,i) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(5,5,i)
-     >              + tmp1 * 2.0d+00 * dx5
-
-               lhsc(1,1,i,j,k,c) =  tmp2 * fjac(1,1,i+1)
-     >              - tmp1 * njac(1,1,i+1)
-     >              - tmp1 * dx1
-               lhsc(1,2,i,j,k,c) =  tmp2 * fjac(1,2,i+1)
-     >              - tmp1 * njac(1,2,i+1)
-               lhsc(1,3,i,j,k,c) =  tmp2 * fjac(1,3,i+1)
-     >              - tmp1 * njac(1,3,i+1)
-               lhsc(1,4,i,j,k,c) =  tmp2 * fjac(1,4,i+1)
-     >              - tmp1 * njac(1,4,i+1)
-               lhsc(1,5,i,j,k,c) =  tmp2 * fjac(1,5,i+1)
-     >              - tmp1 * njac(1,5,i+1)
-
-               lhsc(2,1,i,j,k,c) =  tmp2 * fjac(2,1,i+1)
-     >              - tmp1 * njac(2,1,i+1)
-               lhsc(2,2,i,j,k,c) =  tmp2 * fjac(2,2,i+1)
-     >              - tmp1 * njac(2,2,i+1)
-     >              - tmp1 * dx2
-               lhsc(2,3,i,j,k,c) =  tmp2 * fjac(2,3,i+1)
-     >              - tmp1 * njac(2,3,i+1)
-               lhsc(2,4,i,j,k,c) =  tmp2 * fjac(2,4,i+1)
-     >              - tmp1 * njac(2,4,i+1)
-               lhsc(2,5,i,j,k,c) =  tmp2 * fjac(2,5,i+1)
-     >              - tmp1 * njac(2,5,i+1)
-
-               lhsc(3,1,i,j,k,c) =  tmp2 * fjac(3,1,i+1)
-     >              - tmp1 * njac(3,1,i+1)
-               lhsc(3,2,i,j,k,c) =  tmp2 * fjac(3,2,i+1)
-     >              - tmp1 * njac(3,2,i+1)
-               lhsc(3,3,i,j,k,c) =  tmp2 * fjac(3,3,i+1)
-     >              - tmp1 * njac(3,3,i+1)
-     >              - tmp1 * dx3
-               lhsc(3,4,i,j,k,c) =  tmp2 * fjac(3,4,i+1)
-     >              - tmp1 * njac(3,4,i+1)
-               lhsc(3,5,i,j,k,c) =  tmp2 * fjac(3,5,i+1)
-     >              - tmp1 * njac(3,5,i+1)
-
-               lhsc(4,1,i,j,k,c) =  tmp2 * fjac(4,1,i+1)
-     >              - tmp1 * njac(4,1,i+1)
-               lhsc(4,2,i,j,k,c) =  tmp2 * fjac(4,2,i+1)
-     >              - tmp1 * njac(4,2,i+1)
-               lhsc(4,3,i,j,k,c) =  tmp2 * fjac(4,3,i+1)
-     >              - tmp1 * njac(4,3,i+1)
-               lhsc(4,4,i,j,k,c) =  tmp2 * fjac(4,4,i+1)
-     >              - tmp1 * njac(4,4,i+1)
-     >              - tmp1 * dx4
-               lhsc(4,5,i,j,k,c) =  tmp2 * fjac(4,5,i+1)
-     >              - tmp1 * njac(4,5,i+1)
-
-               lhsc(5,1,i,j,k,c) =  tmp2 * fjac(5,1,i+1)
-     >              - tmp1 * njac(5,1,i+1)
-               lhsc(5,2,i,j,k,c) =  tmp2 * fjac(5,2,i+1)
-     >              - tmp1 * njac(5,2,i+1)
-               lhsc(5,3,i,j,k,c) =  tmp2 * fjac(5,3,i+1)
-     >              - tmp1 * njac(5,3,i+1)
-               lhsc(5,4,i,j,k,c) =  tmp2 * fjac(5,4,i+1)
-     >              - tmp1 * njac(5,4,i+1)
-               lhsc(5,5,i,j,k,c) =  tmp2 * fjac(5,5,i+1)
-     >              - tmp1 * njac(5,5,i+1)
-     >              - tmp1 * dx5
-
-            enddo
-
-
-c---------------------------------------------------------------------
-c     outer most do loops - sweeping in i direction
-c---------------------------------------------------------------------
-            if (first .eq. 1) then 
-
-c---------------------------------------------------------------------
-c     multiply c(istart,j,k) by b_inverse and copy back to c
-c     multiply rhs(istart) by b_inverse(istart) and copy to rhs
-c---------------------------------------------------------------------
-               call binvcrhs( lhsb(1,1,istart),
-     >                        lhsc(1,1,istart,j,k,c),
-     >                        rhs(1,istart,j,k,c) )
-
-            endif
-
-c---------------------------------------------------------------------
-c     begin inner most do loop
-c     do all the elements of the cell unless last 
-c---------------------------------------------------------------------
-            do i=istart+first,isize-last
-
-c---------------------------------------------------------------------
-c     rhs(i) = rhs(i) - A*rhs(i-1)
-c---------------------------------------------------------------------
-               call matvec_sub(lhsa(1,1,i),
-     >                         rhs(1,i-1,j,k,c),rhs(1,i,j,k,c))
-
-c---------------------------------------------------------------------
-c     B(i) = B(i) - C(i-1)*A(i)
-c---------------------------------------------------------------------
-               call matmul_sub(lhsa(1,1,i),
-     >                         lhsc(1,1,i-1,j,k,c),
-     >                         lhsb(1,1,i))
-
-
-c---------------------------------------------------------------------
-c     multiply c(i,j,k) by b_inverse and copy back to c
-c     multiply rhs(1,j,k) by b_inverse(1,j,k) and copy to rhs
-c---------------------------------------------------------------------
-               call binvcrhs( lhsb(1,1,i),
-     >                        lhsc(1,1,i,j,k,c),
-     >                        rhs(1,i,j,k,c) )
-
-            enddo
-
-c---------------------------------------------------------------------
-c     Now finish up special cases for last cell
-c---------------------------------------------------------------------
-            if (last .eq. 1) then
-
-c---------------------------------------------------------------------
-c     rhs(isize) = rhs(isize) - A*rhs(isize-1)
-c---------------------------------------------------------------------
-               call matvec_sub(lhsa(1,1,isize),
-     >                         rhs(1,isize-1,j,k,c),rhs(1,isize,j,k,c))
-
-c---------------------------------------------------------------------
-c     B(isize) = B(isize) - C(isize-1)*A(isize)
-c---------------------------------------------------------------------
-               call matmul_sub(lhsa(1,1,isize),
-     >                         lhsc(1,1,isize-1,j,k,c),
-     >                         lhsb(1,1,isize))
-
-c---------------------------------------------------------------------
-c     multiply rhs() by b_inverse() and copy to rhs
-c---------------------------------------------------------------------
-               call binvrhs( lhsb(1,1,isize),
-     >                       rhs(1,isize,j,k,c) )
-
-            endif
-         enddo
-      enddo
-
-
-      return
-      end
-      
diff --git a/examples/smpi/NAS/BT/x_solve_vec.f b/examples/smpi/NAS/BT/x_solve_vec.f
deleted file mode 100644 (file)
index 8f1c137..0000000
+++ /dev/null
@@ -1,789 +0,0 @@
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine x_solve
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     
-c     Performs line solves in X direction by first factoring
-c     the block-tridiagonal matrix into an upper triangular matrix, 
-c     and then performing back substitution to solve for the unknow
-c     vectors of each line.  
-c     
-c     Make sure we treat elements zero to cell_size in the direction
-c     of the sweep.
-c     
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-      integer  c, istart, stage,
-     >     first, last, recv_id, error, r_status(MPI_STATUS_SIZE),
-     >     isize,jsize,ksize,send_id
-
-      istart = 0
-
-c---------------------------------------------------------------------
-c     in our terminology stage is the number of the cell in the x-direct
-c     i.e. stage = 1 means the start of the line stage=ncells means end
-c---------------------------------------------------------------------
-      do stage = 1,ncells
-         c = slice(1,stage)
-         isize = cell_size(1,c) - 1
-         jsize = cell_size(2,c) - 1
-         ksize = cell_size(3,c) - 1
-         
-c---------------------------------------------------------------------
-c     set last-cell flag
-c---------------------------------------------------------------------
-         if (stage .eq. ncells) then
-            last = 1
-         else
-            last = 0
-         endif
-
-         if (stage .eq. 1) then
-c---------------------------------------------------------------------
-c     This is the first cell, so solve without receiving data
-c---------------------------------------------------------------------
-            first = 1
-c            call lhsx(c)
-            call x_solve_cell(first,last,c)
-         else
-c---------------------------------------------------------------------
-c     Not the first cell of this line, so receive info from
-c     processor working on preceeding cell
-c---------------------------------------------------------------------
-            first = 0
-            call x_receive_solve_info(recv_id,c)
-c---------------------------------------------------------------------
-c     overlap computations and communications
-c---------------------------------------------------------------------
-c            call lhsx(c)
-c---------------------------------------------------------------------
-c     wait for completion
-c---------------------------------------------------------------------
-            call mpi_wait(send_id,r_status,error)
-            call mpi_wait(recv_id,r_status,error)
-c---------------------------------------------------------------------
-c     install C'(istart) and rhs'(istart) to be used in this cell
-c---------------------------------------------------------------------
-            call x_unpack_solve_info(c)
-            call x_solve_cell(first,last,c)
-         endif
-
-         if (last .eq. 0) call x_send_solve_info(send_id,c)
-      enddo
-
-c---------------------------------------------------------------------
-c     now perform backsubstitution in reverse direction
-c---------------------------------------------------------------------
-      do stage = ncells, 1, -1
-         c = slice(1,stage)
-         first = 0
-         last = 0
-         if (stage .eq. 1) first = 1
-         if (stage .eq. ncells) then
-            last = 1
-c---------------------------------------------------------------------
-c     last cell, so perform back substitute without waiting
-c---------------------------------------------------------------------
-            call x_backsubstitute(first, last,c)
-         else
-            call x_receive_backsub_info(recv_id,c)
-            call mpi_wait(send_id,r_status,error)
-            call mpi_wait(recv_id,r_status,error)
-            call x_unpack_backsub_info(c)
-            call x_backsubstitute(first,last,c)
-         endif
-         if (first .eq. 0) call x_send_backsub_info(send_id,c)
-      enddo
-
-
-      return
-      end
-      
-      
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine x_unpack_solve_info(c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     unpack C'(-1) and rhs'(-1) for
-c     all j and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      integer j,k,m,n,ptr,c,istart 
-
-      istart = 0
-      ptr = 0
-      do k=0,KMAX-1
-         do j=0,JMAX-1
-            do m=1,BLOCK_SIZE
-               do n=1,BLOCK_SIZE
-                  lhsc(m,n,istart-1,j,k,c) = out_buffer(ptr+n)
-               enddo
-               ptr = ptr+BLOCK_SIZE
-            enddo
-            do n=1,BLOCK_SIZE
-               rhs(n,istart-1,j,k,c) = out_buffer(ptr+n)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      
-      subroutine x_send_solve_info(send_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     pack up and send C'(iend) and rhs'(iend) for
-c     all j and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer j,k,m,n,isize,ptr,c,jp,kp
-      integer error,send_id,buffer_size 
-
-      isize = cell_size(1,c)-1
-      jp = cell_coord(2,c) - 1
-      kp = cell_coord(3,c) - 1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
-     >     (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE)
-
-c---------------------------------------------------------------------
-c     pack up buffer
-c---------------------------------------------------------------------
-      ptr = 0
-      do k=0,KMAX-1
-         do j=0,JMAX-1
-            do m=1,BLOCK_SIZE
-               do n=1,BLOCK_SIZE
-                  in_buffer(ptr+n) = lhsc(m,n,isize,j,k,c)
-               enddo
-               ptr = ptr+BLOCK_SIZE
-            enddo
-            do n=1,BLOCK_SIZE
-               in_buffer(ptr+n) = rhs(n,isize,j,k,c)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-
-c---------------------------------------------------------------------
-c     send buffer 
-c---------------------------------------------------------------------
-      call mpi_isend(in_buffer, buffer_size,
-     >     dp_type, successor(1),
-     >     WEST+jp+kp*NCELLS, comm_solve,
-     >     send_id,error)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine x_send_backsub_info(send_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     pack up and send U(istart) for all j and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer j,k,n,ptr,c,istart,jp,kp
-      integer error,send_id,buffer_size
-
-c---------------------------------------------------------------------
-c     Send element 0 to previous processor
-c---------------------------------------------------------------------
-      istart = 0
-      jp = cell_coord(2,c)-1
-      kp = cell_coord(3,c)-1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE
-      ptr = 0
-      do k=0,KMAX-1
-         do j=0,JMAX-1
-            do n=1,BLOCK_SIZE
-               in_buffer(ptr+n) = rhs(n,istart,j,k,c)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-      call mpi_isend(in_buffer, buffer_size,
-     >     dp_type, predecessor(1), 
-     >     EAST+jp+kp*NCELLS, comm_solve, 
-     >     send_id,error)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine x_unpack_backsub_info(c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     unpack U(isize) for all j and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      integer j,k,n,ptr,c
-
-      ptr = 0
-      do k=0,KMAX-1
-         do j=0,JMAX-1
-            do n=1,BLOCK_SIZE
-               backsub_info(n,j,k,c) = out_buffer(ptr+n)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine x_receive_backsub_info(recv_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     post mpi receives
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer error,recv_id,jp,kp,c,buffer_size
-      jp = cell_coord(2,c) - 1
-      kp = cell_coord(3,c) - 1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE
-      call mpi_irecv(out_buffer, buffer_size,
-     >     dp_type, successor(1), 
-     >     EAST+jp+kp*NCELLS, comm_solve, 
-     >     recv_id, error)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine x_receive_solve_info(recv_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     post mpi receives 
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer jp,kp,recv_id,error,c,buffer_size
-      jp = cell_coord(2,c) - 1
-      kp = cell_coord(3,c) - 1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
-     >     (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE)
-      call mpi_irecv(out_buffer, buffer_size,
-     >     dp_type, predecessor(1), 
-     >     WEST+jp+kp*NCELLS,  comm_solve, 
-     >     recv_id, error)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      
-      subroutine x_backsubstitute(first, last, c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     back solve: if last cell, then generate U(isize)=rhs(isize)
-c     else assume U(isize) is loaded in un pack backsub_info
-c     so just use it
-c     after call u(istart) will be sent to next cell
-c---------------------------------------------------------------------
-
-      include 'header.h'
-
-      integer first, last, c, i, j, k
-      integer m,n,isize,jsize,ksize,istart
-      
-      istart = 0
-      isize = cell_size(1,c)-1
-      jsize = cell_size(2,c)-end(2,c)-1      
-      ksize = cell_size(3,c)-end(3,c)-1
-      if (last .eq. 0) then
-         do k=start(3,c),ksize
-            do j=start(2,c),jsize
-c---------------------------------------------------------------------
-c     U(isize) uses info from previous cell if not last cell
-c---------------------------------------------------------------------
-               do m=1,BLOCK_SIZE
-                  do n=1,BLOCK_SIZE
-                     rhs(m,isize,j,k,c) = rhs(m,isize,j,k,c) 
-     >                    - lhsc(m,n,isize,j,k,c)*
-     >                    backsub_info(n,j,k,c)
-c---------------------------------------------------------------------
-c     rhs(m,isize,j,k,c) = rhs(m,isize,j,k,c) 
-c     $                    - lhsc(m,n,isize,j,k,c)*rhs(n,isize+1,j,k,c)
-c---------------------------------------------------------------------
-                  enddo
-               enddo
-            enddo
-         enddo
-      endif
-      do k=start(3,c),ksize
-         do j=start(2,c),jsize
-            do i=isize-1,istart,-1
-               do m=1,BLOCK_SIZE
-                  do n=1,BLOCK_SIZE
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) 
-     >                    - lhsc(m,n,i,j,k,c)*rhs(n,i+1,j,k,c)
-                  enddo
-               enddo
-            enddo
-         enddo
-      enddo
-
-      return
-      end
-
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine x_solve_cell(first,last,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     performs guaussian elimination on this cell.
-c     
-c     assumes that unpacking routines for non-first cells 
-c     preload C' and rhs' from previous cell.
-c     
-c     assumed send happens outside this routine, but that
-c     c'(IMAX) and rhs'(IMAX) will be sent to next cell
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'work_lhs_vec.h'
-
-      integer first,last,c
-      integer i,j,k,m,n,isize,ksize,jsize,istart
-
-      istart = 0
-      isize = cell_size(1,c)-1
-      jsize = cell_size(2,c)-end(2,c)-1
-      ksize = cell_size(3,c)-end(3,c)-1
-
-c---------------------------------------------------------------------
-c     zero the left hand side for starters
-c     set diagonal values to 1. This is overkill, but convenient
-c---------------------------------------------------------------------
-      do j = 0, jsize
-         do m = 1, 5
-            do n = 1, 5
-               lhsa(m,n,0,j) = 0.0d0
-               lhsb(m,n,0,j) = 0.0d0
-               lhsa(m,n,isize,j) = 0.0d0
-               lhsb(m,n,isize,j) = 0.0d0
-            enddo
-            lhsb(m,m,0,j) = 1.0d0
-            lhsb(m,m,isize,j) = 1.0d0
-         enddo
-      enddo
-
-      do k=start(3,c),ksize 
-
-c---------------------------------------------------------------------
-c     This function computes the left hand side in the xi-direction
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     determine a (labeled f) and n jacobians for cell c
-c---------------------------------------------------------------------
-         do j=start(2,c),jsize
-            do i = start(1,c)-1, cell_size(1,c) - end(1,c)
-
-               tmp1 = rho_i(i,j,k,c)
-               tmp2 = tmp1 * tmp1
-               tmp3 = tmp1 * tmp2
-c---------------------------------------------------------------------
-c     
-c---------------------------------------------------------------------
-               fjac(1,1,i,j) = 0.0d+00
-               fjac(1,2,i,j) = 1.0d+00
-               fjac(1,3,i,j) = 0.0d+00
-               fjac(1,4,i,j) = 0.0d+00
-               fjac(1,5,i,j) = 0.0d+00
-
-               fjac(2,1,i,j) = -(u(2,i,j,k,c) * tmp2 * 
-     >              u(2,i,j,k,c))
-     >              + c2 * qs(i,j,k,c)
-               fjac(2,2,i,j) = ( 2.0d+00 - c2 )
-     >              * ( u(2,i,j,k,c) * tmp1 )
-               fjac(2,3,i,j) = - c2 * ( u(3,i,j,k,c) * tmp1 )
-               fjac(2,4,i,j) = - c2 * ( u(4,i,j,k,c) * tmp1 )
-               fjac(2,5,i,j) = c2
-
-               fjac(3,1,i,j) = - ( u(2,i,j,k,c)*u(3,i,j,k,c) ) * tmp2
-               fjac(3,2,i,j) = u(3,i,j,k,c) * tmp1
-               fjac(3,3,i,j) = u(2,i,j,k,c) * tmp1
-               fjac(3,4,i,j) = 0.0d+00
-               fjac(3,5,i,j) = 0.0d+00
-
-               fjac(4,1,i,j) = - ( u(2,i,j,k,c)*u(4,i,j,k,c) ) * tmp2
-               fjac(4,2,i,j) = u(4,i,j,k,c) * tmp1
-               fjac(4,3,i,j) = 0.0d+00
-               fjac(4,4,i,j) = u(2,i,j,k,c) * tmp1
-               fjac(4,5,i,j) = 0.0d+00
-
-               fjac(5,1,i,j) = ( c2 * 2.0d0 * qs(i,j,k,c)
-     >              - c1 * ( u(5,i,j,k,c) * tmp1 ) )
-     >              * ( u(2,i,j,k,c) * tmp1 )
-               fjac(5,2,i,j) = c1 *  u(5,i,j,k,c) * tmp1 
-     >              - c2
-     >              * ( u(2,i,j,k,c)*u(2,i,j,k,c) * tmp2
-     >              + qs(i,j,k,c) )
-               fjac(5,3,i,j) = - c2 * ( u(3,i,j,k,c)*u(2,i,j,k,c) )
-     >              * tmp2
-               fjac(5,4,i,j) = - c2 * ( u(4,i,j,k,c)*u(2,i,j,k,c) )
-     >              * tmp2
-               fjac(5,5,i,j) = c1 * ( u(2,i,j,k,c) * tmp1 )
-
-               njac(1,1,i,j) = 0.0d+00
-               njac(1,2,i,j) = 0.0d+00
-               njac(1,3,i,j) = 0.0d+00
-               njac(1,4,i,j) = 0.0d+00
-               njac(1,5,i,j) = 0.0d+00
-
-               njac(2,1,i,j) = - con43 * c3c4 * tmp2 * u(2,i,j,k,c)
-               njac(2,2,i,j) =   con43 * c3c4 * tmp1
-               njac(2,3,i,j) =   0.0d+00
-               njac(2,4,i,j) =   0.0d+00
-               njac(2,5,i,j) =   0.0d+00
-
-               njac(3,1,i,j) = - c3c4 * tmp2 * u(3,i,j,k,c)
-               njac(3,2,i,j) =   0.0d+00
-               njac(3,3,i,j) =   c3c4 * tmp1
-               njac(3,4,i,j) =   0.0d+00
-               njac(3,5,i,j) =   0.0d+00
-
-               njac(4,1,i,j) = - c3c4 * tmp2 * u(4,i,j,k,c)
-               njac(4,2,i,j) =   0.0d+00 
-               njac(4,3,i,j) =   0.0d+00
-               njac(4,4,i,j) =   c3c4 * tmp1
-               njac(4,5,i,j) =   0.0d+00
-
-               njac(5,1,i,j) = - ( con43 * c3c4
-     >              - c1345 ) * tmp3 * (u(2,i,j,k,c)**2)
-     >              - ( c3c4 - c1345 ) * tmp3 * (u(3,i,j,k,c)**2)
-     >              - ( c3c4 - c1345 ) * tmp3 * (u(4,i,j,k,c)**2)
-     >              - c1345 * tmp2 * u(5,i,j,k,c)
-
-               njac(5,2,i,j) = ( con43 * c3c4
-     >              - c1345 ) * tmp2 * u(2,i,j,k,c)
-               njac(5,3,i,j) = ( c3c4 - c1345 ) * tmp2 * u(3,i,j,k,c)
-               njac(5,4,i,j) = ( c3c4 - c1345 ) * tmp2 * u(4,i,j,k,c)
-               njac(5,5,i,j) = ( c1345 ) * tmp1
-
-            enddo
-         enddo
-
-c---------------------------------------------------------------------
-c     now jacobians set, so form left hand side in x direction
-c---------------------------------------------------------------------
-         do j=start(2,c),jsize
-            do i = start(1,c), isize - end(1,c)
-
-               tmp1 = dt * tx1
-               tmp2 = dt * tx2
-
-               lhsa(1,1,i,j) = - tmp2 * fjac(1,1,i-1,j)
-     >              - tmp1 * njac(1,1,i-1,j)
-     >              - tmp1 * dx1 
-               lhsa(1,2,i,j) = - tmp2 * fjac(1,2,i-1,j)
-     >              - tmp1 * njac(1,2,i-1,j)
-               lhsa(1,3,i,j) = - tmp2 * fjac(1,3,i-1,j)
-     >              - tmp1 * njac(1,3,i-1,j)
-               lhsa(1,4,i,j) = - tmp2 * fjac(1,4,i-1,j)
-     >              - tmp1 * njac(1,4,i-1,j)
-               lhsa(1,5,i,j) = - tmp2 * fjac(1,5,i-1,j)
-     >              - tmp1 * njac(1,5,i-1,j)
-
-               lhsa(2,1,i,j) = - tmp2 * fjac(2,1,i-1,j)
-     >              - tmp1 * njac(2,1,i-1,j)
-               lhsa(2,2,i,j) = - tmp2 * fjac(2,2,i-1,j)
-     >              - tmp1 * njac(2,2,i-1,j)
-     >              - tmp1 * dx2
-               lhsa(2,3,i,j) = - tmp2 * fjac(2,3,i-1,j)
-     >              - tmp1 * njac(2,3,i-1,j)
-               lhsa(2,4,i,j) = - tmp2 * fjac(2,4,i-1,j)
-     >              - tmp1 * njac(2,4,i-1,j)
-               lhsa(2,5,i,j) = - tmp2 * fjac(2,5,i-1,j)
-     >              - tmp1 * njac(2,5,i-1,j)
-
-               lhsa(3,1,i,j) = - tmp2 * fjac(3,1,i-1,j)
-     >              - tmp1 * njac(3,1,i-1,j)
-               lhsa(3,2,i,j) = - tmp2 * fjac(3,2,i-1,j)
-     >              - tmp1 * njac(3,2,i-1,j)
-               lhsa(3,3,i,j) = - tmp2 * fjac(3,3,i-1,j)
-     >              - tmp1 * njac(3,3,i-1,j)
-     >              - tmp1 * dx3 
-               lhsa(3,4,i,j) = - tmp2 * fjac(3,4,i-1,j)
-     >              - tmp1 * njac(3,4,i-1,j)
-               lhsa(3,5,i,j) = - tmp2 * fjac(3,5,i-1,j)
-     >              - tmp1 * njac(3,5,i-1,j)
-
-               lhsa(4,1,i,j) = - tmp2 * fjac(4,1,i-1,j)
-     >              - tmp1 * njac(4,1,i-1,j)
-               lhsa(4,2,i,j) = - tmp2 * fjac(4,2,i-1,j)
-     >              - tmp1 * njac(4,2,i-1,j)
-               lhsa(4,3,i,j) = - tmp2 * fjac(4,3,i-1,j)
-     >              - tmp1 * njac(4,3,i-1,j)
-               lhsa(4,4,i,j) = - tmp2 * fjac(4,4,i-1,j)
-     >              - tmp1 * njac(4,4,i-1,j)
-     >              - tmp1 * dx4
-               lhsa(4,5,i,j) = - tmp2 * fjac(4,5,i-1,j)
-     >              - tmp1 * njac(4,5,i-1,j)
-
-               lhsa(5,1,i,j) = - tmp2 * fjac(5,1,i-1,j)
-     >              - tmp1 * njac(5,1,i-1,j)
-               lhsa(5,2,i,j) = - tmp2 * fjac(5,2,i-1,j)
-     >              - tmp1 * njac(5,2,i-1,j)
-               lhsa(5,3,i,j) = - tmp2 * fjac(5,3,i-1,j)
-     >              - tmp1 * njac(5,3,i-1,j)
-               lhsa(5,4,i,j) = - tmp2 * fjac(5,4,i-1,j)
-     >              - tmp1 * njac(5,4,i-1,j)
-               lhsa(5,5,i,j) = - tmp2 * fjac(5,5,i-1,j)
-     >              - tmp1 * njac(5,5,i-1,j)
-     >              - tmp1 * dx5
-
-               lhsb(1,1,i,j) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(1,1,i,j)
-     >              + tmp1 * 2.0d+00 * dx1
-               lhsb(1,2,i,j) = tmp1 * 2.0d+00 * njac(1,2,i,j)
-               lhsb(1,3,i,j) = tmp1 * 2.0d+00 * njac(1,3,i,j)
-               lhsb(1,4,i,j) = tmp1 * 2.0d+00 * njac(1,4,i,j)
-               lhsb(1,5,i,j) = tmp1 * 2.0d+00 * njac(1,5,i,j)
-
-               lhsb(2,1,i,j) = tmp1 * 2.0d+00 * njac(2,1,i,j)
-               lhsb(2,2,i,j) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(2,2,i,j)
-     >              + tmp1 * 2.0d+00 * dx2
-               lhsb(2,3,i,j) = tmp1 * 2.0d+00 * njac(2,3,i,j)
-               lhsb(2,4,i,j) = tmp1 * 2.0d+00 * njac(2,4,i,j)
-               lhsb(2,5,i,j) = tmp1 * 2.0d+00 * njac(2,5,i,j)
-
-               lhsb(3,1,i,j) = tmp1 * 2.0d+00 * njac(3,1,i,j)
-               lhsb(3,2,i,j) = tmp1 * 2.0d+00 * njac(3,2,i,j)
-               lhsb(3,3,i,j) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(3,3,i,j)
-     >              + tmp1 * 2.0d+00 * dx3
-               lhsb(3,4,i,j) = tmp1 * 2.0d+00 * njac(3,4,i,j)
-               lhsb(3,5,i,j) = tmp1 * 2.0d+00 * njac(3,5,i,j)
-
-               lhsb(4,1,i,j) = tmp1 * 2.0d+00 * njac(4,1,i,j)
-               lhsb(4,2,i,j) = tmp1 * 2.0d+00 * njac(4,2,i,j)
-               lhsb(4,3,i,j) = tmp1 * 2.0d+00 * njac(4,3,i,j)
-               lhsb(4,4,i,j) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(4,4,i,j)
-     >              + tmp1 * 2.0d+00 * dx4
-               lhsb(4,5,i,j) = tmp1 * 2.0d+00 * njac(4,5,i,j)
-
-               lhsb(5,1,i,j) = tmp1 * 2.0d+00 * njac(5,1,i,j)
-               lhsb(5,2,i,j) = tmp1 * 2.0d+00 * njac(5,2,i,j)
-               lhsb(5,3,i,j) = tmp1 * 2.0d+00 * njac(5,3,i,j)
-               lhsb(5,4,i,j) = tmp1 * 2.0d+00 * njac(5,4,i,j)
-               lhsb(5,5,i,j) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(5,5,i,j)
-     >              + tmp1 * 2.0d+00 * dx5
-
-               lhsc(1,1,i,j,k,c) =  tmp2 * fjac(1,1,i+1,j)
-     >              - tmp1 * njac(1,1,i+1,j)
-     >              - tmp1 * dx1
-               lhsc(1,2,i,j,k,c) =  tmp2 * fjac(1,2,i+1,j)
-     >              - tmp1 * njac(1,2,i+1,j)
-               lhsc(1,3,i,j,k,c) =  tmp2 * fjac(1,3,i+1,j)
-     >              - tmp1 * njac(1,3,i+1,j)
-               lhsc(1,4,i,j,k,c) =  tmp2 * fjac(1,4,i+1,j)
-     >              - tmp1 * njac(1,4,i+1,j)
-               lhsc(1,5,i,j,k,c) =  tmp2 * fjac(1,5,i+1,j)
-     >              - tmp1 * njac(1,5,i+1,j)
-
-               lhsc(2,1,i,j,k,c) =  tmp2 * fjac(2,1,i+1,j)
-     >              - tmp1 * njac(2,1,i+1,j)
-               lhsc(2,2,i,j,k,c) =  tmp2 * fjac(2,2,i+1,j)
-     >              - tmp1 * njac(2,2,i+1,j)
-     >              - tmp1 * dx2
-               lhsc(2,3,i,j,k,c) =  tmp2 * fjac(2,3,i+1,j)
-     >              - tmp1 * njac(2,3,i+1,j)
-               lhsc(2,4,i,j,k,c) =  tmp2 * fjac(2,4,i+1,j)
-     >              - tmp1 * njac(2,4,i+1,j)
-               lhsc(2,5,i,j,k,c) =  tmp2 * fjac(2,5,i+1,j)
-     >              - tmp1 * njac(2,5,i+1,j)
-
-               lhsc(3,1,i,j,k,c) =  tmp2 * fjac(3,1,i+1,j)
-     >              - tmp1 * njac(3,1,i+1,j)
-               lhsc(3,2,i,j,k,c) =  tmp2 * fjac(3,2,i+1,j)
-     >              - tmp1 * njac(3,2,i+1,j)
-               lhsc(3,3,i,j,k,c) =  tmp2 * fjac(3,3,i+1,j)
-     >              - tmp1 * njac(3,3,i+1,j)
-     >              - tmp1 * dx3
-               lhsc(3,4,i,j,k,c) =  tmp2 * fjac(3,4,i+1,j)
-     >              - tmp1 * njac(3,4,i+1,j)
-               lhsc(3,5,i,j,k,c) =  tmp2 * fjac(3,5,i+1,j)
-     >              - tmp1 * njac(3,5,i+1,j)
-
-               lhsc(4,1,i,j,k,c) =  tmp2 * fjac(4,1,i+1,j)
-     >              - tmp1 * njac(4,1,i+1,j)
-               lhsc(4,2,i,j,k,c) =  tmp2 * fjac(4,2,i+1,j)
-     >              - tmp1 * njac(4,2,i+1,j)
-               lhsc(4,3,i,j,k,c) =  tmp2 * fjac(4,3,i+1,j)
-     >              - tmp1 * njac(4,3,i+1,j)
-               lhsc(4,4,i,j,k,c) =  tmp2 * fjac(4,4,i+1,j)
-     >              - tmp1 * njac(4,4,i+1,j)
-     >              - tmp1 * dx4
-               lhsc(4,5,i,j,k,c) =  tmp2 * fjac(4,5,i+1,j)
-     >              - tmp1 * njac(4,5,i+1,j)
-
-               lhsc(5,1,i,j,k,c) =  tmp2 * fjac(5,1,i+1,j)
-     >              - tmp1 * njac(5,1,i+1,j)
-               lhsc(5,2,i,j,k,c) =  tmp2 * fjac(5,2,i+1,j)
-     >              - tmp1 * njac(5,2,i+1,j)
-               lhsc(5,3,i,j,k,c) =  tmp2 * fjac(5,3,i+1,j)
-     >              - tmp1 * njac(5,3,i+1,j)
-               lhsc(5,4,i,j,k,c) =  tmp2 * fjac(5,4,i+1,j)
-     >              - tmp1 * njac(5,4,i+1,j)
-               lhsc(5,5,i,j,k,c) =  tmp2 * fjac(5,5,i+1,j)
-     >              - tmp1 * njac(5,5,i+1,j)
-     >              - tmp1 * dx5
-
-            enddo
-         enddo
-
-
-c---------------------------------------------------------------------
-c     outer most do loops - sweeping in i direction
-c---------------------------------------------------------------------
-         if (first .eq. 1) then 
-
-c---------------------------------------------------------------------
-c     multiply c(istart,j,k) by b_inverse and copy back to c
-c     multiply rhs(istart) by b_inverse(istart) and copy to rhs
-c---------------------------------------------------------------------
-!dir$ ivdep
-            do j=start(2,c),jsize
-               call binvcrhs( lhsb(1,1,istart,j),
-     >                        lhsc(1,1,istart,j,k,c),
-     >                        rhs(1,istart,j,k,c) )
-            enddo
-
-         endif
-
-c---------------------------------------------------------------------
-c     begin inner most do loop
-c     do all the elements of the cell unless last 
-c---------------------------------------------------------------------
-!dir$ ivdep
-!dir$ interchange(i,j)
-         do j=start(2,c),jsize
-            do i=istart+first,isize-last
-
-c---------------------------------------------------------------------
-c     rhs(i) = rhs(i) - A*rhs(i-1)
-c---------------------------------------------------------------------
-               call matvec_sub(lhsa(1,1,i,j),
-     >                         rhs(1,i-1,j,k,c),rhs(1,i,j,k,c))
-
-c---------------------------------------------------------------------
-c     B(i) = B(i) - C(i-1)*A(i)
-c---------------------------------------------------------------------
-               call matmul_sub(lhsa(1,1,i,j),
-     >                         lhsc(1,1,i-1,j,k,c),
-     >                         lhsb(1,1,i,j))
-
-
-c---------------------------------------------------------------------
-c     multiply c(i,j,k) by b_inverse and copy back to c
-c     multiply rhs(1,j,k) by b_inverse(1,j,k) and copy to rhs
-c---------------------------------------------------------------------
-               call binvcrhs( lhsb(1,1,i,j),
-     >                        lhsc(1,1,i,j,k,c),
-     >                        rhs(1,i,j,k,c) )
-
-            enddo
-         enddo
-
-c---------------------------------------------------------------------
-c     Now finish up special cases for last cell
-c---------------------------------------------------------------------
-         if (last .eq. 1) then
-
-!dir$ ivdep
-            do j=start(2,c),jsize
-c---------------------------------------------------------------------
-c     rhs(isize) = rhs(isize) - A*rhs(isize-1)
-c---------------------------------------------------------------------
-               call matvec_sub(lhsa(1,1,isize,j),
-     >                         rhs(1,isize-1,j,k,c),rhs(1,isize,j,k,c))
-
-c---------------------------------------------------------------------
-c     B(isize) = B(isize) - C(isize-1)*A(isize)
-c---------------------------------------------------------------------
-               call matmul_sub(lhsa(1,1,isize,j),
-     >                         lhsc(1,1,isize-1,j,k,c),
-     >                         lhsb(1,1,isize,j))
-
-c---------------------------------------------------------------------
-c     multiply rhs() by b_inverse() and copy to rhs
-c---------------------------------------------------------------------
-               call binvrhs( lhsb(1,1,isize,j),
-     >                       rhs(1,isize,j,k,c) )
-            enddo
-
-         endif
-      enddo
-
-
-      return
-      end
-      
diff --git a/examples/smpi/NAS/BT/y_solve.f b/examples/smpi/NAS/BT/y_solve.f
deleted file mode 100644 (file)
index 33e2ebc..0000000
+++ /dev/null
@@ -1,771 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine y_solve
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     Performs line solves in Y direction by first factoring
-c     the block-tridiagonal matrix into an upper triangular matrix, 
-c     and then performing back substitution to solve for the unknow
-c     vectors of each line.  
-c     
-c     Make sure we treat elements zero to cell_size in the direction
-c     of the sweep.
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer 
-     >     c, jstart, stage,
-     >     first, last, recv_id, error, r_status(MPI_STATUS_SIZE),
-     >     isize,jsize,ksize,send_id
-
-      jstart = 0
-
-c---------------------------------------------------------------------
-c     in our terminology stage is the number of the cell in the y-direction
-c     i.e. stage = 1 means the start of the line stage=ncells means end
-c---------------------------------------------------------------------
-      do stage = 1,ncells
-         c = slice(2,stage)
-         isize = cell_size(1,c) - 1
-         jsize = cell_size(2,c) - 1
-         ksize = cell_size(3,c) - 1
-
-c---------------------------------------------------------------------
-c     set last-cell flag
-c---------------------------------------------------------------------
-         if (stage .eq. ncells) then
-            last = 1
-         else
-            last = 0
-         endif
-
-         if (stage .eq. 1) then
-c---------------------------------------------------------------------
-c     This is the first cell, so solve without receiving data
-c---------------------------------------------------------------------
-            first = 1
-c            call lhsy(c)
-            call y_solve_cell(first,last,c)
-         else
-c---------------------------------------------------------------------
-c     Not the first cell of this line, so receive info from
-c     processor working on preceeding cell
-c---------------------------------------------------------------------
-            first = 0
-            call y_receive_solve_info(recv_id,c)
-c---------------------------------------------------------------------
-c     overlap computations and communications
-c---------------------------------------------------------------------
-c            call lhsy(c)
-c---------------------------------------------------------------------
-c     wait for completion
-c---------------------------------------------------------------------
-            call mpi_wait(send_id,r_status,error)
-            call mpi_wait(recv_id,r_status,error)
-c---------------------------------------------------------------------
-c     install C'(jstart+1) and rhs'(jstart+1) to be used in this cell
-c---------------------------------------------------------------------
-            call y_unpack_solve_info(c)
-            call y_solve_cell(first,last,c)
-         endif
-
-         if (last .eq. 0) call y_send_solve_info(send_id,c)
-      enddo
-
-c---------------------------------------------------------------------
-c     now perform backsubstitution in reverse direction
-c---------------------------------------------------------------------
-      do stage = ncells, 1, -1
-         c = slice(2,stage)
-         first = 0
-         last = 0
-         if (stage .eq. 1) first = 1
-         if (stage .eq. ncells) then
-            last = 1
-c---------------------------------------------------------------------
-c     last cell, so perform back substitute without waiting
-c---------------------------------------------------------------------
-            call y_backsubstitute(first, last,c)
-         else
-            call y_receive_backsub_info(recv_id,c)
-            call mpi_wait(send_id,r_status,error)
-            call mpi_wait(recv_id,r_status,error)
-            call y_unpack_backsub_info(c)
-            call y_backsubstitute(first,last,c)
-         endif
-         if (first .eq. 0) call y_send_backsub_info(send_id,c)
-      enddo
-
-
-      return
-      end
-      
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      
-      subroutine y_unpack_solve_info(c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     unpack C'(-1) and rhs'(-1) for
-c     all i and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-
-      integer i,k,m,n,ptr,c,jstart 
-
-      jstart = 0
-      ptr = 0
-      do k=0,KMAX-1
-         do i=0,IMAX-1
-            do m=1,BLOCK_SIZE
-               do n=1,BLOCK_SIZE
-                  lhsc(m,n,i,jstart-1,k,c) = out_buffer(ptr+n)
-               enddo
-               ptr = ptr+BLOCK_SIZE
-            enddo
-            do n=1,BLOCK_SIZE
-               rhs(n,i,jstart-1,k,c) = out_buffer(ptr+n)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      
-      subroutine y_send_solve_info(send_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     pack up and send C'(jend) and rhs'(jend) for
-c     all i and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer i,k,m,n,jsize,ptr,c,ip,kp
-      integer error,send_id,buffer_size 
-
-      jsize = cell_size(2,c)-1
-      ip = cell_coord(1,c) - 1
-      kp = cell_coord(3,c) - 1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
-     >     (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE)
-
-c---------------------------------------------------------------------
-c     pack up buffer
-c---------------------------------------------------------------------
-      ptr = 0
-      do k=0,KMAX-1
-         do i=0,IMAX-1
-            do m=1,BLOCK_SIZE
-               do n=1,BLOCK_SIZE
-                  in_buffer(ptr+n) = lhsc(m,n,i,jsize,k,c)
-               enddo
-               ptr = ptr+BLOCK_SIZE
-            enddo
-            do n=1,BLOCK_SIZE
-               in_buffer(ptr+n) = rhs(n,i,jsize,k,c)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-
-c---------------------------------------------------------------------
-c     send buffer 
-c---------------------------------------------------------------------
-      call mpi_isend(in_buffer, buffer_size,
-     >     dp_type, successor(2),
-     >     SOUTH+ip+kp*NCELLS, comm_solve,
-     >     send_id,error)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine y_send_backsub_info(send_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     pack up and send U(jstart) for all i and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer i,k,n,ptr,c,jstart,ip,kp
-      integer error,send_id,buffer_size
-
-c---------------------------------------------------------------------
-c     Send element 0 to previous processor
-c---------------------------------------------------------------------
-      jstart = 0
-      ip = cell_coord(1,c)-1
-      kp = cell_coord(3,c)-1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE
-      ptr = 0
-      do k=0,KMAX-1
-         do i=0,IMAX-1
-            do n=1,BLOCK_SIZE
-               in_buffer(ptr+n) = rhs(n,i,jstart,k,c)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-      call mpi_isend(in_buffer, buffer_size,
-     >     dp_type, predecessor(2), 
-     >     NORTH+ip+kp*NCELLS, comm_solve, 
-     >     send_id,error)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine y_unpack_backsub_info(c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     unpack U(jsize) for all i and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-
-      integer i,k,n,ptr,c 
-
-      ptr = 0
-      do k=0,KMAX-1
-         do i=0,IMAX-1
-            do n=1,BLOCK_SIZE
-               backsub_info(n,i,k,c) = out_buffer(ptr+n)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine y_receive_backsub_info(recv_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     post mpi receives
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer error,recv_id,ip,kp,c,buffer_size
-      ip = cell_coord(1,c) - 1
-      kp = cell_coord(3,c) - 1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE
-      call mpi_irecv(out_buffer, buffer_size,
-     >     dp_type, successor(2), 
-     >     NORTH+ip+kp*NCELLS, comm_solve, 
-     >     recv_id, error)
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine y_receive_solve_info(recv_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     post mpi receives 
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer ip,kp,recv_id,error,c,buffer_size
-      ip = cell_coord(1,c) - 1
-      kp = cell_coord(3,c) - 1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
-     >     (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE)
-      call mpi_irecv(out_buffer, buffer_size, 
-     >     dp_type, predecessor(2), 
-     >     SOUTH+ip+kp*NCELLS,  comm_solve, 
-     >     recv_id, error)
-
-      return
-      end
-      
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine y_backsubstitute(first, last, c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     back solve: if last cell, then generate U(jsize)=rhs(jsize)
-c     else assume U(jsize) is loaded in un pack backsub_info
-c     so just use it
-c     after call u(jstart) will be sent to next cell
-c---------------------------------------------------------------------
-
-      include 'header.h'
-
-      integer first, last, c, i, k
-      integer m,n,j,jsize,isize,ksize,jstart
-      
-      jstart = 0
-      isize = cell_size(1,c)-end(1,c)-1      
-      jsize = cell_size(2,c)-1
-      ksize = cell_size(3,c)-end(3,c)-1
-      if (last .eq. 0) then
-         do k=start(3,c),ksize
-            do i=start(1,c),isize
-c---------------------------------------------------------------------
-c     U(jsize) uses info from previous cell if not last cell
-c---------------------------------------------------------------------
-               do m=1,BLOCK_SIZE
-                  do n=1,BLOCK_SIZE
-                     rhs(m,i,jsize,k,c) = rhs(m,i,jsize,k,c) 
-     >                    - lhsc(m,n,i,jsize,k,c)*
-     >                    backsub_info(n,i,k,c)
-                  enddo
-               enddo
-            enddo
-         enddo
-      endif
-      do k=start(3,c),ksize
-         do j=jsize-1,jstart,-1
-            do i=start(1,c),isize
-               do m=1,BLOCK_SIZE
-                  do n=1,BLOCK_SIZE
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) 
-     >                    - lhsc(m,n,i,j,k,c)*rhs(n,i,j+1,k,c)
-                  enddo
-               enddo
-            enddo
-         enddo
-      enddo
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine y_solve_cell(first,last,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     performs guaussian elimination on this cell.
-c     
-c     assumes that unpacking routines for non-first cells 
-c     preload C' and rhs' from previous cell.
-c     
-c     assumed send happens outside this routine, but that
-c     c'(JMAX) and rhs'(JMAX) will be sent to next cell
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'work_lhs.h'
-
-      integer first,last,c
-      integer i,j,k,isize,ksize,jsize,jstart
-      double precision utmp(6,-2:JMAX+1)
-
-      jstart = 0
-      isize = cell_size(1,c)-end(1,c)-1
-      jsize = cell_size(2,c)-1
-      ksize = cell_size(3,c)-end(3,c)-1
-
-      call lhsabinit(lhsa, lhsb, jsize)
-
-      do k=start(3,c),ksize 
-         do i=start(1,c),isize
-
-c---------------------------------------------------------------------
-c     This function computes the left hand side for the three y-factors   
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     Compute the indices for storing the tri-diagonal matrix;
-c     determine a (labeled f) and n jacobians for cell c
-c---------------------------------------------------------------------
-            do j = start(2,c)-1, cell_size(2,c)-end(2,c)
-               utmp(1,j) = 1.0d0 / u(1,i,j,k,c)
-               utmp(2,j) = u(2,i,j,k,c)
-               utmp(3,j) = u(3,i,j,k,c)
-               utmp(4,j) = u(4,i,j,k,c)
-               utmp(5,j) = u(5,i,j,k,c)
-               utmp(6,j) = qs(i,j,k,c)
-            end do
-
-            do j = start(2,c)-1, cell_size(2,c)-end(2,c)
-
-               tmp1 = utmp(1,j)
-               tmp2 = tmp1 * tmp1
-               tmp3 = tmp1 * tmp2
-
-               fjac(1,1,j) = 0.0d+00
-               fjac(1,2,j) = 0.0d+00
-               fjac(1,3,j) = 1.0d+00
-               fjac(1,4,j) = 0.0d+00
-               fjac(1,5,j) = 0.0d+00
-
-               fjac(2,1,j) = - ( utmp(2,j)*utmp(3,j) )
-     >              * tmp2
-               fjac(2,2,j) = utmp(3,j) * tmp1
-               fjac(2,3,j) = utmp(2,j) * tmp1
-               fjac(2,4,j) = 0.0d+00
-               fjac(2,5,j) = 0.0d+00
-
-               fjac(3,1,j) = - ( utmp(3,j)*utmp(3,j)*tmp2)
-     >              + c2 * utmp(6,j)
-               fjac(3,2,j) = - c2 *  utmp(2,j) * tmp1
-               fjac(3,3,j) = ( 2.0d+00 - c2 )
-     >              *  utmp(3,j) * tmp1 
-               fjac(3,4,j) = - c2 * utmp(4,j) * tmp1 
-               fjac(3,5,j) = c2
-
-               fjac(4,1,j) = - ( utmp(3,j)*utmp(4,j) )
-     >              * tmp2
-               fjac(4,2,j) = 0.0d+00
-               fjac(4,3,j) = utmp(4,j) * tmp1
-               fjac(4,4,j) = utmp(3,j) * tmp1
-               fjac(4,5,j) = 0.0d+00
-
-               fjac(5,1,j) = ( c2 * 2.0d0 * utmp(6,j)
-     >              - c1 * utmp(5,j) * tmp1 ) 
-     >              * utmp(3,j) * tmp1 
-               fjac(5,2,j) = - c2 * utmp(2,j)*utmp(3,j) 
-     >              * tmp2
-               fjac(5,3,j) = c1 * utmp(5,j) * tmp1 
-     >              - c2 * ( utmp(6,j)
-     >              + utmp(3,j)*utmp(3,j) * tmp2 )
-               fjac(5,4,j) = - c2 * ( utmp(3,j)*utmp(4,j) )
-     >              * tmp2
-               fjac(5,5,j) = c1 * utmp(3,j) * tmp1 
-
-               njac(1,1,j) = 0.0d+00
-               njac(1,2,j) = 0.0d+00
-               njac(1,3,j) = 0.0d+00
-               njac(1,4,j) = 0.0d+00
-               njac(1,5,j) = 0.0d+00
-
-               njac(2,1,j) = - c3c4 * tmp2 * utmp(2,j)
-               njac(2,2,j) =   c3c4 * tmp1
-               njac(2,3,j) =   0.0d+00
-               njac(2,4,j) =   0.0d+00
-               njac(2,5,j) =   0.0d+00
-
-               njac(3,1,j) = - con43 * c3c4 * tmp2 * utmp(3,j)
-               njac(3,2,j) =   0.0d+00
-               njac(3,3,j) =   con43 * c3c4 * tmp1
-               njac(3,4,j) =   0.0d+00
-               njac(3,5,j) =   0.0d+00
-
-               njac(4,1,j) = - c3c4 * tmp2 * utmp(4,j)
-               njac(4,2,j) =   0.0d+00
-               njac(4,3,j) =   0.0d+00
-               njac(4,4,j) =   c3c4 * tmp1
-               njac(4,5,j) =   0.0d+00
-
-               njac(5,1,j) = - (  c3c4
-     >              - c1345 ) * tmp3 * (utmp(2,j)**2)
-     >              - ( con43 * c3c4
-     >              - c1345 ) * tmp3 * (utmp(3,j)**2)
-     >              - ( c3c4 - c1345 ) * tmp3 * (utmp(4,j)**2)
-     >              - c1345 * tmp2 * utmp(5,j)
-
-               njac(5,2,j) = (  c3c4 - c1345 ) * tmp2 * utmp(2,j)
-               njac(5,3,j) = ( con43 * c3c4
-     >              - c1345 ) * tmp2 * utmp(3,j)
-               njac(5,4,j) = ( c3c4 - c1345 ) * tmp2 * utmp(4,j)
-               njac(5,5,j) = ( c1345 ) * tmp1
-
-            enddo
-
-c---------------------------------------------------------------------
-c     now joacobians set, so form left hand side in y direction
-c---------------------------------------------------------------------
-            do j = start(2,c), jsize-end(2,c)
-
-               tmp1 = dt * ty1
-               tmp2 = dt * ty2
-
-               lhsa(1,1,j) = - tmp2 * fjac(1,1,j-1)
-     >              - tmp1 * njac(1,1,j-1)
-     >              - tmp1 * dy1 
-               lhsa(1,2,j) = - tmp2 * fjac(1,2,j-1)
-     >              - tmp1 * njac(1,2,j-1)
-               lhsa(1,3,j) = - tmp2 * fjac(1,3,j-1)
-     >              - tmp1 * njac(1,3,j-1)
-               lhsa(1,4,j) = - tmp2 * fjac(1,4,j-1)
-     >              - tmp1 * njac(1,4,j-1)
-               lhsa(1,5,j) = - tmp2 * fjac(1,5,j-1)
-     >              - tmp1 * njac(1,5,j-1)
-
-               lhsa(2,1,j) = - tmp2 * fjac(2,1,j-1)
-     >              - tmp1 * njac(2,1,j-1)
-               lhsa(2,2,j) = - tmp2 * fjac(2,2,j-1)
-     >              - tmp1 * njac(2,2,j-1)
-     >              - tmp1 * dy2
-               lhsa(2,3,j) = - tmp2 * fjac(2,3,j-1)
-     >              - tmp1 * njac(2,3,j-1)
-               lhsa(2,4,j) = - tmp2 * fjac(2,4,j-1)
-     >              - tmp1 * njac(2,4,j-1)
-               lhsa(2,5,j) = - tmp2 * fjac(2,5,j-1)
-     >              - tmp1 * njac(2,5,j-1)
-
-               lhsa(3,1,j) = - tmp2 * fjac(3,1,j-1)
-     >              - tmp1 * njac(3,1,j-1)
-               lhsa(3,2,j) = - tmp2 * fjac(3,2,j-1)
-     >              - tmp1 * njac(3,2,j-1)
-               lhsa(3,3,j) = - tmp2 * fjac(3,3,j-1)
-     >              - tmp1 * njac(3,3,j-1)
-     >              - tmp1 * dy3 
-               lhsa(3,4,j) = - tmp2 * fjac(3,4,j-1)
-     >              - tmp1 * njac(3,4,j-1)
-               lhsa(3,5,j) = - tmp2 * fjac(3,5,j-1)
-     >              - tmp1 * njac(3,5,j-1)
-
-               lhsa(4,1,j) = - tmp2 * fjac(4,1,j-1)
-     >              - tmp1 * njac(4,1,j-1)
-               lhsa(4,2,j) = - tmp2 * fjac(4,2,j-1)
-     >              - tmp1 * njac(4,2,j-1)
-               lhsa(4,3,j) = - tmp2 * fjac(4,3,j-1)
-     >              - tmp1 * njac(4,3,j-1)
-               lhsa(4,4,j) = - tmp2 * fjac(4,4,j-1)
-     >              - tmp1 * njac(4,4,j-1)
-     >              - tmp1 * dy4
-               lhsa(4,5,j) = - tmp2 * fjac(4,5,j-1)
-     >              - tmp1 * njac(4,5,j-1)
-
-               lhsa(5,1,j) = - tmp2 * fjac(5,1,j-1)
-     >              - tmp1 * njac(5,1,j-1)
-               lhsa(5,2,j) = - tmp2 * fjac(5,2,j-1)
-     >              - tmp1 * njac(5,2,j-1)
-               lhsa(5,3,j) = - tmp2 * fjac(5,3,j-1)
-     >              - tmp1 * njac(5,3,j-1)
-               lhsa(5,4,j) = - tmp2 * fjac(5,4,j-1)
-     >              - tmp1 * njac(5,4,j-1)
-               lhsa(5,5,j) = - tmp2 * fjac(5,5,j-1)
-     >              - tmp1 * njac(5,5,j-1)
-     >              - tmp1 * dy5
-
-               lhsb(1,1,j) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(1,1,j)
-     >              + tmp1 * 2.0d+00 * dy1
-               lhsb(1,2,j) = tmp1 * 2.0d+00 * njac(1,2,j)
-               lhsb(1,3,j) = tmp1 * 2.0d+00 * njac(1,3,j)
-               lhsb(1,4,j) = tmp1 * 2.0d+00 * njac(1,4,j)
-               lhsb(1,5,j) = tmp1 * 2.0d+00 * njac(1,5,j)
-
-               lhsb(2,1,j) = tmp1 * 2.0d+00 * njac(2,1,j)
-               lhsb(2,2,j) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(2,2,j)
-     >              + tmp1 * 2.0d+00 * dy2
-               lhsb(2,3,j) = tmp1 * 2.0d+00 * njac(2,3,j)
-               lhsb(2,4,j) = tmp1 * 2.0d+00 * njac(2,4,j)
-               lhsb(2,5,j) = tmp1 * 2.0d+00 * njac(2,5,j)
-
-               lhsb(3,1,j) = tmp1 * 2.0d+00 * njac(3,1,j)
-               lhsb(3,2,j) = tmp1 * 2.0d+00 * njac(3,2,j)
-               lhsb(3,3,j) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(3,3,j)
-     >              + tmp1 * 2.0d+00 * dy3
-               lhsb(3,4,j) = tmp1 * 2.0d+00 * njac(3,4,j)
-               lhsb(3,5,j) = tmp1 * 2.0d+00 * njac(3,5,j)
-
-               lhsb(4,1,j) = tmp1 * 2.0d+00 * njac(4,1,j)
-               lhsb(4,2,j) = tmp1 * 2.0d+00 * njac(4,2,j)
-               lhsb(4,3,j) = tmp1 * 2.0d+00 * njac(4,3,j)
-               lhsb(4,4,j) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(4,4,j)
-     >              + tmp1 * 2.0d+00 * dy4
-               lhsb(4,5,j) = tmp1 * 2.0d+00 * njac(4,5,j)
-
-               lhsb(5,1,j) = tmp1 * 2.0d+00 * njac(5,1,j)
-               lhsb(5,2,j) = tmp1 * 2.0d+00 * njac(5,2,j)
-               lhsb(5,3,j) = tmp1 * 2.0d+00 * njac(5,3,j)
-               lhsb(5,4,j) = tmp1 * 2.0d+00 * njac(5,4,j)
-               lhsb(5,5,j) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(5,5,j) 
-     >              + tmp1 * 2.0d+00 * dy5
-
-               lhsc(1,1,i,j,k,c) =  tmp2 * fjac(1,1,j+1)
-     >              - tmp1 * njac(1,1,j+1)
-     >              - tmp1 * dy1
-               lhsc(1,2,i,j,k,c) =  tmp2 * fjac(1,2,j+1)
-     >              - tmp1 * njac(1,2,j+1)
-               lhsc(1,3,i,j,k,c) =  tmp2 * fjac(1,3,j+1)
-     >              - tmp1 * njac(1,3,j+1)
-               lhsc(1,4,i,j,k,c) =  tmp2 * fjac(1,4,j+1)
-     >              - tmp1 * njac(1,4,j+1)
-               lhsc(1,5,i,j,k,c) =  tmp2 * fjac(1,5,j+1)
-     >              - tmp1 * njac(1,5,j+1)
-
-               lhsc(2,1,i,j,k,c) =  tmp2 * fjac(2,1,j+1)
-     >              - tmp1 * njac(2,1,j+1)
-               lhsc(2,2,i,j,k,c) =  tmp2 * fjac(2,2,j+1)
-     >              - tmp1 * njac(2,2,j+1)
-     >              - tmp1 * dy2
-               lhsc(2,3,i,j,k,c) =  tmp2 * fjac(2,3,j+1)
-     >              - tmp1 * njac(2,3,j+1)
-               lhsc(2,4,i,j,k,c) =  tmp2 * fjac(2,4,j+1)
-     >              - tmp1 * njac(2,4,j+1)
-               lhsc(2,5,i,j,k,c) =  tmp2 * fjac(2,5,j+1)
-     >              - tmp1 * njac(2,5,j+1)
-
-               lhsc(3,1,i,j,k,c) =  tmp2 * fjac(3,1,j+1)
-     >              - tmp1 * njac(3,1,j+1)
-               lhsc(3,2,i,j,k,c) =  tmp2 * fjac(3,2,j+1)
-     >              - tmp1 * njac(3,2,j+1)
-               lhsc(3,3,i,j,k,c) =  tmp2 * fjac(3,3,j+1)
-     >              - tmp1 * njac(3,3,j+1)
-     >              - tmp1 * dy3
-               lhsc(3,4,i,j,k,c) =  tmp2 * fjac(3,4,j+1)
-     >              - tmp1 * njac(3,4,j+1)
-               lhsc(3,5,i,j,k,c) =  tmp2 * fjac(3,5,j+1)
-     >              - tmp1 * njac(3,5,j+1)
-
-               lhsc(4,1,i,j,k,c) =  tmp2 * fjac(4,1,j+1)
-     >              - tmp1 * njac(4,1,j+1)
-               lhsc(4,2,i,j,k,c) =  tmp2 * fjac(4,2,j+1)
-     >              - tmp1 * njac(4,2,j+1)
-               lhsc(4,3,i,j,k,c) =  tmp2 * fjac(4,3,j+1)
-     >              - tmp1 * njac(4,3,j+1)
-               lhsc(4,4,i,j,k,c) =  tmp2 * fjac(4,4,j+1)
-     >              - tmp1 * njac(4,4,j+1)
-     >              - tmp1 * dy4
-               lhsc(4,5,i,j,k,c) =  tmp2 * fjac(4,5,j+1)
-     >              - tmp1 * njac(4,5,j+1)
-
-               lhsc(5,1,i,j,k,c) =  tmp2 * fjac(5,1,j+1)
-     >              - tmp1 * njac(5,1,j+1)
-               lhsc(5,2,i,j,k,c) =  tmp2 * fjac(5,2,j+1)
-     >              - tmp1 * njac(5,2,j+1)
-               lhsc(5,3,i,j,k,c) =  tmp2 * fjac(5,3,j+1)
-     >              - tmp1 * njac(5,3,j+1)
-               lhsc(5,4,i,j,k,c) =  tmp2 * fjac(5,4,j+1)
-     >              - tmp1 * njac(5,4,j+1)
-               lhsc(5,5,i,j,k,c) =  tmp2 * fjac(5,5,j+1)
-     >              - tmp1 * njac(5,5,j+1)
-     >              - tmp1 * dy5
-
-            enddo
-
-
-c---------------------------------------------------------------------
-c     outer most do loops - sweeping in i direction
-c---------------------------------------------------------------------
-            if (first .eq. 1) then 
-
-c---------------------------------------------------------------------
-c     multiply c(i,jstart,k) by b_inverse and copy back to c
-c     multiply rhs(jstart) by b_inverse(jstart) and copy to rhs
-c---------------------------------------------------------------------
-               call binvcrhs( lhsb(1,1,jstart),
-     >                        lhsc(1,1,i,jstart,k,c),
-     >                        rhs(1,i,jstart,k,c) )
-
-            endif
-
-c---------------------------------------------------------------------
-c     begin inner most do loop
-c     do all the elements of the cell unless last 
-c---------------------------------------------------------------------
-            do j=jstart+first,jsize-last
-
-c---------------------------------------------------------------------
-c     subtract A*lhs_vector(j-1) from lhs_vector(j)
-c     
-c     rhs(j) = rhs(j) - A*rhs(j-1)
-c---------------------------------------------------------------------
-               call matvec_sub(lhsa(1,1,j),
-     >                         rhs(1,i,j-1,k,c),rhs(1,i,j,k,c))
-
-c---------------------------------------------------------------------
-c     B(j) = B(j) - C(j-1)*A(j)
-c---------------------------------------------------------------------
-               call matmul_sub(lhsa(1,1,j),
-     >                         lhsc(1,1,i,j-1,k,c),
-     >                         lhsb(1,1,j))
-
-c---------------------------------------------------------------------
-c     multiply c(i,j,k) by b_inverse and copy back to c
-c     multiply rhs(i,1,k) by b_inverse(i,1,k) and copy to rhs
-c---------------------------------------------------------------------
-               call binvcrhs( lhsb(1,1,j),
-     >                        lhsc(1,1,i,j,k,c),
-     >                        rhs(1,i,j,k,c) )
-
-            enddo
-
-c---------------------------------------------------------------------
-c     Now finish up special cases for last cell
-c---------------------------------------------------------------------
-            if (last .eq. 1) then
-
-c---------------------------------------------------------------------
-c     rhs(jsize) = rhs(jsize) - A*rhs(jsize-1)
-c---------------------------------------------------------------------
-               call matvec_sub(lhsa(1,1,jsize),
-     >                         rhs(1,i,jsize-1,k,c),rhs(1,i,jsize,k,c))
-
-c---------------------------------------------------------------------
-c     B(jsize) = B(jsize) - C(jsize-1)*A(jsize)
-c     call matmul_sub(aa,i,jsize,k,c,
-c     $              cc,i,jsize-1,k,c,bb,i,jsize,k,c)
-c---------------------------------------------------------------------
-               call matmul_sub(lhsa(1,1,jsize),
-     >                         lhsc(1,1,i,jsize-1,k,c),
-     >                         lhsb(1,1,jsize))
-
-c---------------------------------------------------------------------
-c     multiply rhs(jsize) by b_inverse(jsize) and copy to rhs
-c---------------------------------------------------------------------
-               call binvrhs( lhsb(1,1,jsize),
-     >                       rhs(1,i,jsize,k,c) )
-
-            endif
-         enddo
-      enddo
-
-
-      return
-      end
-      
-
-
diff --git a/examples/smpi/NAS/BT/y_solve_vec.f b/examples/smpi/NAS/BT/y_solve_vec.f
deleted file mode 100644 (file)
index e21cfa3..0000000
+++ /dev/null
@@ -1,788 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine y_solve
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     Performs line solves in Y direction by first factoring
-c     the block-tridiagonal matrix into an upper triangular matrix, 
-c     and then performing back substitution to solve for the unknow
-c     vectors of each line.  
-c     
-c     Make sure we treat elements zero to cell_size in the direction
-c     of the sweep.
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer 
-     >     c, jstart, stage,
-     >     first, last, recv_id, error, r_status(MPI_STATUS_SIZE),
-     >     isize,jsize,ksize,send_id
-
-      jstart = 0
-
-c---------------------------------------------------------------------
-c     in our terminology stage is the number of the cell in the y-direct
-c     i.e. stage = 1 means the start of the line stage=ncells means end
-c---------------------------------------------------------------------
-      do stage = 1,ncells
-         c = slice(2,stage)
-         isize = cell_size(1,c) - 1
-         jsize = cell_size(2,c) - 1
-         ksize = cell_size(3,c) - 1
-
-c---------------------------------------------------------------------
-c     set last-cell flag
-c---------------------------------------------------------------------
-         if (stage .eq. ncells) then
-            last = 1
-         else
-            last = 0
-         endif
-
-         if (stage .eq. 1) then
-c---------------------------------------------------------------------
-c     This is the first cell, so solve without receiving data
-c---------------------------------------------------------------------
-            first = 1
-c            call lhsy(c)
-            call y_solve_cell(first,last,c)
-         else
-c---------------------------------------------------------------------
-c     Not the first cell of this line, so receive info from
-c     processor working on preceeding cell
-c---------------------------------------------------------------------
-            first = 0
-            call y_receive_solve_info(recv_id,c)
-c---------------------------------------------------------------------
-c     overlap computations and communications
-c---------------------------------------------------------------------
-c            call lhsy(c)
-c---------------------------------------------------------------------
-c     wait for completion
-c---------------------------------------------------------------------
-            call mpi_wait(send_id,r_status,error)
-            call mpi_wait(recv_id,r_status,error)
-c---------------------------------------------------------------------
-c     install C'(jstart+1) and rhs'(jstart+1) to be used in this cell
-c---------------------------------------------------------------------
-            call y_unpack_solve_info(c)
-            call y_solve_cell(first,last,c)
-         endif
-
-         if (last .eq. 0) call y_send_solve_info(send_id,c)
-      enddo
-
-c---------------------------------------------------------------------
-c     now perform backsubstitution in reverse direction
-c---------------------------------------------------------------------
-      do stage = ncells, 1, -1
-         c = slice(2,stage)
-         first = 0
-         last = 0
-         if (stage .eq. 1) first = 1
-         if (stage .eq. ncells) then
-            last = 1
-c---------------------------------------------------------------------
-c     last cell, so perform back substitute without waiting
-c---------------------------------------------------------------------
-            call y_backsubstitute(first, last,c)
-         else
-            call y_receive_backsub_info(recv_id,c)
-            call mpi_wait(send_id,r_status,error)
-            call mpi_wait(recv_id,r_status,error)
-            call y_unpack_backsub_info(c)
-            call y_backsubstitute(first,last,c)
-         endif
-         if (first .eq. 0) call y_send_backsub_info(send_id,c)
-      enddo
-
-
-      return
-      end
-      
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      
-      subroutine y_unpack_solve_info(c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     unpack C'(-1) and rhs'(-1) for
-c     all i and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-
-      integer i,k,m,n,ptr,c,jstart 
-
-      jstart = 0
-      ptr = 0
-      do k=0,KMAX-1
-         do i=0,IMAX-1
-            do m=1,BLOCK_SIZE
-               do n=1,BLOCK_SIZE
-                  lhsc(m,n,i,jstart-1,k,c) = out_buffer(ptr+n)
-               enddo
-               ptr = ptr+BLOCK_SIZE
-            enddo
-            do n=1,BLOCK_SIZE
-               rhs(n,i,jstart-1,k,c) = out_buffer(ptr+n)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      
-      subroutine y_send_solve_info(send_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     pack up and send C'(jend) and rhs'(jend) for
-c     all i and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer i,k,m,n,jsize,ptr,c,ip,kp
-      integer error,send_id,buffer_size 
-
-      jsize = cell_size(2,c)-1
-      ip = cell_coord(1,c) - 1
-      kp = cell_coord(3,c) - 1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
-     >     (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE)
-
-c---------------------------------------------------------------------
-c     pack up buffer
-c---------------------------------------------------------------------
-      ptr = 0
-      do k=0,KMAX-1
-         do i=0,IMAX-1
-            do m=1,BLOCK_SIZE
-               do n=1,BLOCK_SIZE
-                  in_buffer(ptr+n) = lhsc(m,n,i,jsize,k,c)
-               enddo
-               ptr = ptr+BLOCK_SIZE
-            enddo
-            do n=1,BLOCK_SIZE
-               in_buffer(ptr+n) = rhs(n,i,jsize,k,c)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-
-c---------------------------------------------------------------------
-c     send buffer 
-c---------------------------------------------------------------------
-      call mpi_isend(in_buffer, buffer_size,
-     >     dp_type, successor(2),
-     >     SOUTH+ip+kp*NCELLS, comm_solve,
-     >     send_id,error)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine y_send_backsub_info(send_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     pack up and send U(jstart) for all i and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer i,k,n,ptr,c,jstart,ip,kp
-      integer error,send_id,buffer_size
-
-c---------------------------------------------------------------------
-c     Send element 0 to previous processor
-c---------------------------------------------------------------------
-      jstart = 0
-      ip = cell_coord(1,c)-1
-      kp = cell_coord(3,c)-1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE
-      ptr = 0
-      do k=0,KMAX-1
-         do i=0,IMAX-1
-            do n=1,BLOCK_SIZE
-               in_buffer(ptr+n) = rhs(n,i,jstart,k,c)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-      call mpi_isend(in_buffer, buffer_size,
-     >     dp_type, predecessor(2), 
-     >     NORTH+ip+kp*NCELLS, comm_solve, 
-     >     send_id,error)
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine y_unpack_backsub_info(c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     unpack U(jsize) for all i and k
-c---------------------------------------------------------------------
-
-      include 'header.h'
-
-      integer i,k,n,ptr,c 
-
-      ptr = 0
-      do k=0,KMAX-1
-         do i=0,IMAX-1
-            do n=1,BLOCK_SIZE
-               backsub_info(n,i,k,c) = out_buffer(ptr+n)
-            enddo
-            ptr = ptr+BLOCK_SIZE
-         enddo
-      enddo
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine y_receive_backsub_info(recv_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     post mpi receives
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer error,recv_id,ip,kp,c,buffer_size
-      ip = cell_coord(1,c) - 1
-      kp = cell_coord(3,c) - 1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE
-      call mpi_irecv(out_buffer, buffer_size,
-     >     dp_type, successor(2), 
-     >     NORTH+ip+kp*NCELLS, comm_solve, 
-     >     recv_id, error)
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine y_receive_solve_info(recv_id,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     post mpi receives 
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer ip,kp,recv_id,error,c,buffer_size
-      ip = cell_coord(1,c) - 1
-      kp = cell_coord(3,c) - 1
-      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
-     >     (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE)
-      call mpi_irecv(out_buffer, buffer_size, 
-     >     dp_type, predecessor(2), 
-     >     SOUTH+ip+kp*NCELLS,  comm_solve, 
-     >     recv_id, error)
-
-      return
-      end
-      
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine y_backsubstitute(first, last, c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     back solve: if last cell, then generate U(jsize)=rhs(jsize)
-c     else assume U(jsize) is loaded in un pack backsub_info
-c     so just use it
-c     after call u(jstart) will be sent to next cell
-c---------------------------------------------------------------------
-
-      include 'header.h'
-
-      integer first, last, c, i, k
-      integer m,n,j,jsize,isize,ksize,jstart
-      
-      jstart = 0
-      isize = cell_size(1,c)-end(1,c)-1      
-      jsize = cell_size(2,c)-1
-      ksize = cell_size(3,c)-end(3,c)-1
-      if (last .eq. 0) then
-         do k=start(3,c),ksize
-            do i=start(1,c),isize
-c---------------------------------------------------------------------
-c     U(jsize) uses info from previous cell if not last cell
-c---------------------------------------------------------------------
-               do m=1,BLOCK_SIZE
-                  do n=1,BLOCK_SIZE
-                     rhs(m,i,jsize,k,c) = rhs(m,i,jsize,k,c) 
-     >                    - lhsc(m,n,i,jsize,k,c)*
-     >                    backsub_info(n,i,k,c)
-                  enddo
-               enddo
-            enddo
-         enddo
-      endif
-      do k=start(3,c),ksize
-         do j=jsize-1,jstart,-1
-            do i=start(1,c),isize
-               do m=1,BLOCK_SIZE
-                  do n=1,BLOCK_SIZE
-                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c) 
-     >                    - lhsc(m,n,i,j,k,c)*rhs(n,i,j+1,k,c)
-                  enddo
-               enddo
-            enddo
-         enddo
-      enddo
-
-      return
-      end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine y_solve_cell(first,last,c)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     performs guaussian elimination on this cell.
-c     
-c     assumes that unpacking routines for non-first cells 
-c     preload C' and rhs' from previous cell.
-c     
-c     assumed send happens outside this routine, but that
-c     c'(JMAX) and rhs'(JMAX) will be sent to next cell
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'work_lhs_vec.h'
-
-      integer first,last,c
-      integer i,j,k,m,n,isize,ksize,jsize,jstart
-
-      jstart = 0
-      isize = cell_size(1,c)-end(1,c)-1
-      jsize = cell_size(2,c)-1
-      ksize = cell_size(3,c)-end(3,c)-1
-
-c---------------------------------------------------------------------
-c     zero the left hand side for starters
-c     set diagonal values to 1. This is overkill, but convenient
-c---------------------------------------------------------------------
-      do i = 0, isize
-         do m = 1, 5
-            do n = 1, 5
-               lhsa(m,n,i,0) = 0.0d0
-               lhsb(m,n,i,0) = 0.0d0
-               lhsa(m,n,i,jsize) = 0.0d0
-               lhsb(m,n,i,jsize) = 0.0d0
-            enddo
-            lhsb(m,m,i,0) = 1.0d0
-            lhsb(m,m,i,jsize) = 1.0d0
-         enddo
-      enddo
-
-      do k=start(3,c),ksize 
-
-c---------------------------------------------------------------------
-c     This function computes the left hand side for the three y-factors 
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     Compute the indices for storing the tri-diagonal matrix;
-c     determine a (labeled f) and n jacobians for cell c
-c---------------------------------------------------------------------
-
-         do j = start(2,c)-1, cell_size(2,c)-end(2,c)
-            do i=start(1,c),isize
-
-               tmp1 = 1.0d0 / u(1,i,j,k,c)
-               tmp2 = tmp1 * tmp1
-               tmp3 = tmp1 * tmp2
-
-               fjac(1,1,i,j) = 0.0d+00
-               fjac(1,2,i,j) = 0.0d+00
-               fjac(1,3,i,j) = 1.0d+00
-               fjac(1,4,i,j) = 0.0d+00
-               fjac(1,5,i,j) = 0.0d+00
-
-               fjac(2,1,i,j) = - ( u(2,i,j,k,c)*u(3,i,j,k,c) )
-     >              * tmp2
-               fjac(2,2,i,j) = u(3,i,j,k,c) * tmp1
-               fjac(2,3,i,j) = u(2,i,j,k,c) * tmp1
-               fjac(2,4,i,j) = 0.0d+00
-               fjac(2,5,i,j) = 0.0d+00
-
-               fjac(3,1,i,j) = - ( u(3,i,j,k,c)*u(3,i,j,k,c)*tmp2)
-     >              + c2 * qs(i,j,k,c)
-               fjac(3,2,i,j) = - c2 *  u(2,i,j,k,c) * tmp1
-               fjac(3,3,i,j) = ( 2.0d+00 - c2 )
-     >              *  u(3,i,j,k,c) * tmp1 
-               fjac(3,4,i,j) = - c2 * u(4,i,j,k,c) * tmp1 
-               fjac(3,5,i,j) = c2
-
-               fjac(4,1,i,j) = - ( u(3,i,j,k,c)*u(4,i,j,k,c) )
-     >              * tmp2
-               fjac(4,2,i,j) = 0.0d+00
-               fjac(4,3,i,j) = u(4,i,j,k,c) * tmp1
-               fjac(4,4,i,j) = u(3,i,j,k,c) * tmp1
-               fjac(4,5,i,j) = 0.0d+00
-
-               fjac(5,1,i,j) = ( c2 * 2.0d0 * qs(i,j,k,c)
-     >              - c1 * u(5,i,j,k,c) * tmp1 ) 
-     >              * u(3,i,j,k,c) * tmp1 
-               fjac(5,2,i,j) = - c2 * u(2,i,j,k,c)*u(3,i,j,k,c) 
-     >              * tmp2
-               fjac(5,3,i,j) = c1 * u(5,i,j,k,c) * tmp1 
-     >              - c2 * ( qs(i,j,k,c)
-     >              + u(3,i,j,k,c)*u(3,i,j,k,c) * tmp2 )
-               fjac(5,4,i,j) = - c2 * ( u(3,i,j,k,c)*u(4,i,j,k,c) )
-     >              * tmp2
-               fjac(5,5,i,j) = c1 * u(3,i,j,k,c) * tmp1 
-
-               njac(1,1,i,j) = 0.0d+00
-               njac(1,2,i,j) = 0.0d+00
-               njac(1,3,i,j) = 0.0d+00
-               njac(1,4,i,j) = 0.0d+00
-               njac(1,5,i,j) = 0.0d+00
-
-               njac(2,1,i,j) = - c3c4 * tmp2 * u(2,i,j,k,c)
-               njac(2,2,i,j) =   c3c4 * tmp1
-               njac(2,3,i,j) =   0.0d+00
-               njac(2,4,i,j) =   0.0d+00
-               njac(2,5,i,j) =   0.0d+00
-
-               njac(3,1,i,j) = - con43 * c3c4 * tmp2 * u(3,i,j,k,c)
-               njac(3,2,i,j) =   0.0d+00
-               njac(3,3,i,j) =   con43 * c3c4 * tmp1
-               njac(3,4,i,j) =   0.0d+00
-               njac(3,5,i,j) =   0.0d+00
-
-               njac(4,1,i,j) = - c3c4 * tmp2 * u(4,i,j,k,c)
-               njac(4,2,i,j) =   0.0d+00
-               njac(4,3,i,j) =   0.0d+00
-               njac(4,4,i,j) =   c3c4 * tmp1
-               njac(4,5,i,j) =   0.0d+00
-
-               njac(5,1,i,j) = - (  c3c4
-     >              - c1345 ) * tmp3 * (u(2,i,j,k,c)**2)
-     >              - ( con43 * c3c4
-     >              - c1345 ) * tmp3 * (u(3,i,j,k,c)**2)
-     >              - ( c3c4 - c1345 ) * tmp3 * (u(4,i,j,k,c)**2)
-     >              - c1345 * tmp2 * u(5,i,j,k,c)
-
-               njac(5,2,i,j) = (  c3c4 - c1345 ) * tmp2 * u(2,i,j,k,c)
-               njac(5,3,i,j) = ( con43 * c3c4
-     >              - c1345 ) * tmp2 * u(3,i,j,k,c)
-               njac(5,4,i,j) = ( c3c4 - c1345 ) * tmp2 * u(4,i,j,k,c)
-               njac(5,5,i,j) = ( c1345 ) * tmp1
-
-            enddo
-         enddo
-
-c---------------------------------------------------------------------
-c     now joacobians set, so form left hand side in y direction
-c---------------------------------------------------------------------
-         do j = start(2,c), jsize-end(2,c)
-            do i=start(1,c),isize
-
-               tmp1 = dt * ty1
-               tmp2 = dt * ty2
-
-               lhsa(1,1,i,j) = - tmp2 * fjac(1,1,i,j-1)
-     >              - tmp1 * njac(1,1,i,j-1)
-     >              - tmp1 * dy1 
-               lhsa(1,2,i,j) = - tmp2 * fjac(1,2,i,j-1)
-     >              - tmp1 * njac(1,2,i,j-1)
-               lhsa(1,3,i,j) = - tmp2 * fjac(1,3,i,j-1)
-     >              - tmp1 * njac(1,3,i,j-1)
-               lhsa(1,4,i,j) = - tmp2 * fjac(1,4,i,j-1)
-     >              - tmp1 * njac(1,4,i,j-1)
-               lhsa(1,5,i,j) = - tmp2 * fjac(1,5,i,j-1)
-     >              - tmp1 * njac(1,5,i,j-1)
-
-               lhsa(2,1,i,j) = - tmp2 * fjac(2,1,i,j-1)
-     >              - tmp1 * njac(2,1,i,j-1)
-               lhsa(2,2,i,j) = - tmp2 * fjac(2,2,i,j-1)
-     >              - tmp1 * njac(2,2,i,j-1)
-     >              - tmp1 * dy2
-               lhsa(2,3,i,j) = - tmp2 * fjac(2,3,i,j-1)
-     >              - tmp1 * njac(2,3,i,j-1)
-               lhsa(2,4,i,j) = - tmp2 * fjac(2,4,i,j-1)
-     >              - tmp1 * njac(2,4,i,j-1)
-               lhsa(2,5,i,j) = - tmp2 * fjac(2,5,i,j-1)
-     >              - tmp1 * njac(2,5,i,j-1)
-
-               lhsa(3,1,i,j) = - tmp2 * fjac(3,1,i,j-1)
-     >              - tmp1 * njac(3,1,i,j-1)
-               lhsa(3,2,i,j) = - tmp2 * fjac(3,2,i,j-1)
-     >              - tmp1 * njac(3,2,i,j-1)
-               lhsa(3,3,i,j) = - tmp2 * fjac(3,3,i,j-1)
-     >              - tmp1 * njac(3,3,i,j-1)
-     >              - tmp1 * dy3 
-               lhsa(3,4,i,j) = - tmp2 * fjac(3,4,i,j-1)
-     >              - tmp1 * njac(3,4,i,j-1)
-               lhsa(3,5,i,j) = - tmp2 * fjac(3,5,i,j-1)
-     >              - tmp1 * njac(3,5,i,j-1)
-
-               lhsa(4,1,i,j) = - tmp2 * fjac(4,1,i,j-1)
-     >              - tmp1 * njac(4,1,i,j-1)
-               lhsa(4,2,i,j) = - tmp2 * fjac(4,2,i,j-1)
-     >              - tmp1 * njac(4,2,i,j-1)
-               lhsa(4,3,i,j) = - tmp2 * fjac(4,3,i,j-1)
-     >              - tmp1 * njac(4,3,i,j-1)
-               lhsa(4,4,i,j) = - tmp2 * fjac(4,4,i,j-1)
-     >              - tmp1 * njac(4,4,i,j-1)
-     >              - tmp1 * dy4
-               lhsa(4,5,i,j) = - tmp2 * fjac(4,5,i,j-1)
-     >              - tmp1 * njac(4,5,i,j-1)
-
-               lhsa(5,1,i,j) = - tmp2 * fjac(5,1,i,j-1)
-     >              - tmp1 * njac(5,1,i,j-1)
-               lhsa(5,2,i,j) = - tmp2 * fjac(5,2,i,j-1)
-     >              - tmp1 * njac(5,2,i,j-1)
-               lhsa(5,3,i,j) = - tmp2 * fjac(5,3,i,j-1)
-     >              - tmp1 * njac(5,3,i,j-1)
-               lhsa(5,4,i,j) = - tmp2 * fjac(5,4,i,j-1)
-     >              - tmp1 * njac(5,4,i,j-1)
-               lhsa(5,5,i,j) = - tmp2 * fjac(5,5,i,j-1)
-     >              - tmp1 * njac(5,5,i,j-1)
-     >              - tmp1 * dy5
-
-               lhsb(1,1,i,j) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(1,1,i,j)
-     >              + tmp1 * 2.0d+00 * dy1
-               lhsb(1,2,i,j) = tmp1 * 2.0d+00 * njac(1,2,i,j)
-               lhsb(1,3,i,j) = tmp1 * 2.0d+00 * njac(1,3,i,j)
-               lhsb(1,4,i,j) = tmp1 * 2.0d+00 * njac(1,4,i,j)
-               lhsb(1,5,i,j) = tmp1 * 2.0d+00 * njac(1,5,i,j)
-
-               lhsb(2,1,i,j) = tmp1 * 2.0d+00 * njac(2,1,i,j)
-               lhsb(2,2,i,j) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(2,2,i,j)
-     >              + tmp1 * 2.0d+00 * dy2
-               lhsb(2,3,i,j) = tmp1 * 2.0d+00 * njac(2,3,i,j)
-               lhsb(2,4,i,j) = tmp1 * 2.0d+00 * njac(2,4,i,j)
-               lhsb(2,5,i,j) = tmp1 * 2.0d+00 * njac(2,5,i,j)
-
-               lhsb(3,1,i,j) = tmp1 * 2.0d+00 * njac(3,1,i,j)
-               lhsb(3,2,i,j) = tmp1 * 2.0d+00 * njac(3,2,i,j)
-               lhsb(3,3,i,j) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(3,3,i,j)
-     >              + tmp1 * 2.0d+00 * dy3
-               lhsb(3,4,i,j) = tmp1 * 2.0d+00 * njac(3,4,i,j)
-               lhsb(3,5,i,j) = tmp1 * 2.0d+00 * njac(3,5,i,j)
-
-               lhsb(4,1,i,j) = tmp1 * 2.0d+00 * njac(4,1,i,j)
-               lhsb(4,2,i,j) = tmp1 * 2.0d+00 * njac(4,2,i,j)
-               lhsb(4,3,i,j) = tmp1 * 2.0d+00 * njac(4,3,i,j)
-               lhsb(4,4,i,j) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(4,4,i,j)
-     >              + tmp1 * 2.0d+00 * dy4
-               lhsb(4,5,i,j) = tmp1 * 2.0d+00 * njac(4,5,i,j)
-
-               lhsb(5,1,i,j) = tmp1 * 2.0d+00 * njac(5,1,i,j)
-               lhsb(5,2,i,j) = tmp1 * 2.0d+00 * njac(5,2,i,j)
-               lhsb(5,3,i,j) = tmp1 * 2.0d+00 * njac(5,3,i,j)
-               lhsb(5,4,i,j) = tmp1 * 2.0d+00 * njac(5,4,i,j)
-               lhsb(5,5,i,j) = 1.0d+00
-     >              + tmp1 * 2.0d+00 * njac(5,5,i,j) 
-     >              + tmp1 * 2.0d+00 * dy5
-
-               lhsc(1,1,i,j,k,c) =  tmp2 * fjac(1,1,i,j+1)
-     >              - tmp1 * njac(1,1,i,j+1)
-     >              - tmp1 * dy1
-               lhsc(1,2,i,j,k,c) =  tmp2 * fjac(1,2,i,j+1)
-     >              - tmp1 * njac(1,2,i,j+1)
-               lhsc(1,3,i,j,k,c) =  tmp2 * fjac(1,3,i,j+1)
-     >              - tmp1 * njac(1,3,i,j+1)
-               lhsc(1,4,i,j,k,c) =  tmp2 * fjac(1,4,i,j+1)
-     >              - tmp1 * njac(1,4,i,j+1)
-               lhsc(1,5,i,j,k,c) =  tmp2 * fjac(1,5,i,j+1)
-     >              - tmp1 * njac(1,5,i,j+1)
-
-               lhsc(2,1,i,j,k,c) =  tmp2 * fjac(2,1,i,j+1)
-     >              - tmp1 * njac(2,1,i,j+1)
-               lhsc(2,2,i,j,k,c) =  tmp2 * fjac(2,2,i,j+1)
-     >              - tmp1 * njac(2,2,i,j+1)
-     >              - tmp1 * dy2
-               lhsc(2,3,i,j,k,c) =  tmp2 * fjac(2,3,i,j+1)
-     >              - tmp1 * njac(2,3,i,j+1)
-               lhsc(2,4,i,j,k,c) =  tmp2 * fjac(2,4,i,j+1)
-     >              - tmp1 * njac(2,4,i,j+1)
-               lhsc(2,5,i,j,k,c) =  tmp2 * fjac(2,5,i,j+1)
-     >              - tmp1 * njac(2,5,i,j+1)
-
-               lhsc(3,1,i,j,k,c) =  tmp2 * fjac(3,1,i,j+1)
-     >              - tmp1 * njac(3,1,i,j+1)
-               lhsc(3,2,i,j,k,c) =  tmp2 * fjac(3,2,i,j+1)
-     >              - tmp1 * njac(3,2,i,j+1)
-               lhsc(3,3,i,j,k,c) =  tmp2 * fjac(3,3,i,j+1)
-     >              - tmp1 * njac(3,3,i,j+1)
-     >              - tmp1 * dy3
-               lhsc(3,4,i,j,k,c) =  tmp2 * fjac(3,4,i,j+1)
-     >              - tmp1 * njac(3,4,i,j+1)
-               lhsc(3,5,i,j,k,c) =  tmp2 * fjac(3,5,i,j+1)
-     >              - tmp1 * njac(3,5,i,j+1)
-
-               lhsc(4,1,i,j,k,c) =  tmp2 * fjac(4,1,i,j+1)
-     >              - tmp1 * njac(4,1,i,j+1)
-               lhsc(4,2,i,j,k,c) =  tmp2 * fjac(4,2,i,j+1)
-     >              - tmp1 * njac(4,2,i,j+1)
-               lhsc(4,3,i,j,k,c) =  tmp2 * fjac(4,3,i,j+1)
-     >              - tmp1 * njac(4,3,i,j+1)
-               lhsc(4,4,i,j,k,c) =  tmp2 * fjac(4,4,i,j+1)
-     >              - tmp1 * njac(4,4,i,j+1)
-     >              - tmp1 * dy4
-               lhsc(4,5,i,j,k,c) =  tmp2 * fjac(4,5,i,j+1)
-     >              - tmp1 * njac(4,5,i,j+1)
-
-               lhsc(5,1,i,j,k,c) =  tmp2 * fjac(5,1,i,j+1)
-     >              - tmp1 * njac(5,1,i,j+1)
-               lhsc(5,2,i,j,k,c) =  tmp2 * fjac(5,2,i,j+1)
-     >              - tmp1 * njac(5,2,i,j+1)
-               lhsc(5,3,i,j,k,c) =  tmp2 * fjac(5,3,i,j+1)
-     >              - tmp1 * njac(5,3,i,j+1)
-               lhsc(5,4,i,j,k,c) =  tmp2 * fjac(5,4,i,j+1)
-     >              - tmp1 * njac(5,4,i,j+1)
-               lhsc(5,5,i,j,k,c) =  tmp2 * fjac(5,5,i,j+1)
-     >              - tmp1 * njac(5,5,i,j+1)
-     >              - tmp1 * dy5
-
-            enddo
-         enddo
-
-
-c---------------------------------------------------------------------
-c     outer most do loops - sweeping in i direction
-c---------------------------------------------------------------------
-         if (first .eq. 1) then 
-
-c---------------------------------------------------------------------
-c     multiply c(i,jstart,k) by b_inverse and copy back to c
-c     multiply rhs(jstart) by b_inverse(jstart) and copy to rhs
-c---------------------------------------------------------------------
-!dir$ ivdep
-            do i=start(1,c),isize
-               call binvcrhs( lhsb(1,1,i,jstart),
-     >                        lhsc(1,1,i,jstart,k,c),
-     >                        rhs(1,i,jstart,k,c) )
-            enddo
-
-         endif
-
-c---------------------------------------------------------------------
-c     begin inner most do loop
-c     do all the elements of the cell unless last 
-c---------------------------------------------------------------------
-         do j=jstart+first,jsize-last
-!dir$ ivdep
-            do i=start(1,c),isize
-
-c---------------------------------------------------------------------
-c     subtract A*lhs_vector(j-1) from lhs_vector(j)
-c     
-c     rhs(j) = rhs(j) - A*rhs(j-1)
-c---------------------------------------------------------------------
-               call matvec_sub(lhsa(1,1,i,j),
-     >                         rhs(1,i,j-1,k,c),rhs(1,i,j,k,c))
-
-c---------------------------------------------------------------------
-c     B(j) = B(j) - C(j-1)*A(j)
-c---------------------------------------------------------------------
-               call matmul_sub(lhsa(1,1,i,j),
-     >                         lhsc(1,1,i,j-1,k,c),
-     >                         lhsb(1,1,i,j))
-
-c---------------------------------------------------------------------
-c     multiply c(i,j,k) by b_inverse and copy back to c
-c     multiply rhs(i,1,k) by b_inverse(i,1,k) and copy to rhs
-c---------------------------------------------------------------------
-               call binvcrhs( lhsb(1,1,i,j),
-     >                        lhsc(1,1,i,j,k,c),
-     >                        rhs(1,i,j,k,c) )
-
-            enddo
-         enddo
-
-c---------------------------------------------------------------------
-c     Now finish up special cases for last cell
-c---------------------------------------------------------------------
-         if (last .eq. 1) then
-
-!dir$ ivdep
-            do i=start(1,c),isize
-c---------------------------------------------------------------------
-c     rhs(jsize) = rhs(jsize) - A*rhs(jsize-1)
-c---------------------------------------------------------------------
-               call matvec_sub(lhsa(1,1,i,jsize),
-     >                         rhs(1,i,jsize-1,k,c),rhs(1,i,jsize,k,c))
-
-c---------------------------------------------------------------------
-c     B(jsize) = B(jsize) - C(jsize-1)*A(jsize)
-c     call matmul_sub(aa,i,jsize,k,c,
-c     $              cc,i,jsize-1,k,c,bb,i,jsize,k,c)
-c---------------------------------------------------------------------
-               call matmul_sub(lhsa(1,1,i,jsize),
-     >                         lhsc(1,1,i,jsize-1,k,c),
-     >                         lhsb(1,1,i,jsize))
-
-c---------------------------------------------------------------------
-c     multiply rhs(jsize) by b_inverse(jsize) and copy to rhs
-c---------------------------------------------------------------------
-               call binvrhs( lhsb(1,1,i,jsize),
-     >                       rhs(1,i,jsize,k,c) )
-            enddo
-
-         endif
-      enddo
-
-
-      return
-      end
-      
-
-
diff --git a/examples/smpi/NAS/BT/z_solve.f b/examples/smpi/NAS/BT/z_solve.f
deleted file mode 100644 (file)
index d7a5a2f..0000000
+++ /dev/null
@@ -1,776 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine z_solve
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c     Performs line solves in Z direction by first factoring
-c     the block-tridiagonal matrix into an upper triangular matrix, 
-c     and then performing back substitution to solve for the unknow
-c     vectors of each line.  
-c     
-c     Make sure we treat elements zero to cell_size in the direction
-c     of the sweep.
-c---------------------------------------------------------------------
-
-      include 'header.h'
-      include 'mpinpb.h'
-
-      integer c, kstart, stage,
-     >     first, last, recv_id, error, r_status(MPI_STATUS_SIZE),
-     >     isize,jsize,ksize,send_id
-
-      kstart = 0
-
-c---------------------------------------------------------------------
-c     in our terminology stage is the number of the cell in the y-direction
-c     i.e. stage = 1 means the start of the line stage=ncells means end
-c---------------------------------------------------------------------
-      do stage = 1,ncells
-         c = slice(3,stage)
-         isize = cell_size(1,c) - 1
-         jsize = cell_size(2,c) - 1
-         ksize = cell_size(3,c) - 1
-c---------------------------------------------------------------------
-c     set last-cell flag
-c---------------------------------------------------------------------
-         if (stage .eq. ncells) then
-          &nb