X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/d12142c9dc382512752775d0cca043bb1aef3f86..4c74bb7b6f2398da81ce462cbdfd9c5a77ffa683:/examples/smpi/NAS/BT/z_solve_vec.f diff --git a/examples/smpi/NAS/BT/z_solve_vec.f b/examples/smpi/NAS/BT/z_solve_vec.f new file mode 100644 index 0000000000..2c27fb00d1 --- /dev/null +++ b/examples/smpi/NAS/BT/z_solve_vec.f @@ -0,0 +1,793 @@ +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + + subroutine z_solve + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + +c--------------------------------------------------------------------- +c Performs line solves in Z direction by first factoring +c the block-tridiagonal matrix into an upper triangular matrix, +c and then performing back substitution to solve for the unknow +c vectors of each line. +c +c Make sure we treat elements zero to cell_size in the direction +c of the sweep. +c--------------------------------------------------------------------- + + include 'header.h' + include 'mpinpb.h' + + integer c, kstart, stage, + > first, last, recv_id, error, r_status(MPI_STATUS_SIZE), + > isize,jsize,ksize,send_id + + kstart = 0 + +c--------------------------------------------------------------------- +c in our terminology stage is the number of the cell in the y-direct +c i.e. stage = 1 means the start of the line stage=ncells means end +c--------------------------------------------------------------------- + do stage = 1,ncells + c = slice(3,stage) + isize = cell_size(1,c) - 1 + jsize = cell_size(2,c) - 1 + ksize = cell_size(3,c) - 1 +c--------------------------------------------------------------------- +c set last-cell flag +c--------------------------------------------------------------------- + if (stage .eq. ncells) then + last = 1 + else + last = 0 + endif + + if (stage .eq. 1) then +c--------------------------------------------------------------------- +c This is the first cell, so solve without receiving data +c--------------------------------------------------------------------- + first = 1 +c call lhsz(c) + call z_solve_cell(first,last,c) + else +c--------------------------------------------------------------------- +c Not the first cell of this line, so receive info from +c processor working on preceeding cell +c--------------------------------------------------------------------- + first = 0 + call z_receive_solve_info(recv_id,c) +c--------------------------------------------------------------------- +c overlap computations and communications +c--------------------------------------------------------------------- +c call lhsz(c) +c--------------------------------------------------------------------- +c wait for completion +c--------------------------------------------------------------------- + call mpi_wait(send_id,r_status,error) + call mpi_wait(recv_id,r_status,error) +c--------------------------------------------------------------------- +c install C'(kstart+1) and rhs'(kstart+1) to be used in this cell +c--------------------------------------------------------------------- + call z_unpack_solve_info(c) + call z_solve_cell(first,last,c) + endif + + if (last .eq. 0) call z_send_solve_info(send_id,c) + enddo + +c--------------------------------------------------------------------- +c now perform backsubstitution in reverse direction +c--------------------------------------------------------------------- + do stage = ncells, 1, -1 + c = slice(3,stage) + first = 0 + last = 0 + if (stage .eq. 1) first = 1 + if (stage .eq. ncells) then + last = 1 +c--------------------------------------------------------------------- +c last cell, so perform back substitute without waiting +c--------------------------------------------------------------------- + call z_backsubstitute(first, last,c) + else + call z_receive_backsub_info(recv_id,c) + call mpi_wait(send_id,r_status,error) + call mpi_wait(recv_id,r_status,error) + call z_unpack_backsub_info(c) + call z_backsubstitute(first,last,c) + endif + if (first .eq. 0) call z_send_backsub_info(send_id,c) + enddo + + + return + end + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + + subroutine z_unpack_solve_info(c) +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + +c--------------------------------------------------------------------- +c unpack C'(-1) and rhs'(-1) for +c all i and j +c--------------------------------------------------------------------- + + include 'header.h' + + integer i,j,m,n,ptr,c,kstart + + kstart = 0 + ptr = 0 + do j=0,JMAX-1 + do i=0,IMAX-1 + do m=1,BLOCK_SIZE + do n=1,BLOCK_SIZE + lhsc(m,n,i,j,kstart-1,c) = out_buffer(ptr+n) + enddo + ptr = ptr+BLOCK_SIZE + enddo + do n=1,BLOCK_SIZE + rhs(n,i,j,kstart-1,c) = out_buffer(ptr+n) + enddo + ptr = ptr+BLOCK_SIZE + enddo + enddo + + return + end + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + + subroutine z_send_solve_info(send_id,c) + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + +c--------------------------------------------------------------------- +c pack up and send C'(kend) and rhs'(kend) for +c all i and j +c--------------------------------------------------------------------- + + include 'header.h' + include 'mpinpb.h' + + integer i,j,m,n,ksize,ptr,c,ip,jp + integer error,send_id,buffer_size + + ksize = cell_size(3,c)-1 + ip = cell_coord(1,c) - 1 + jp = cell_coord(2,c) - 1 + buffer_size=MAX_CELL_DIM*MAX_CELL_DIM* + > (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE) + +c--------------------------------------------------------------------- +c pack up buffer +c--------------------------------------------------------------------- + ptr = 0 + do j=0,JMAX-1 + do i=0,IMAX-1 + do m=1,BLOCK_SIZE + do n=1,BLOCK_SIZE + in_buffer(ptr+n) = lhsc(m,n,i,j,ksize,c) + enddo + ptr = ptr+BLOCK_SIZE + enddo + do n=1,BLOCK_SIZE + in_buffer(ptr+n) = rhs(n,i,j,ksize,c) + enddo + ptr = ptr+BLOCK_SIZE + enddo + enddo + +c--------------------------------------------------------------------- +c send buffer +c--------------------------------------------------------------------- + call mpi_isend(in_buffer, buffer_size, + > dp_type, successor(3), + > BOTTOM+ip+jp*NCELLS, comm_solve, + > send_id,error) + + return + end + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + + subroutine z_send_backsub_info(send_id,c) + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + +c--------------------------------------------------------------------- +c pack up and send U(jstart) for all i and j +c--------------------------------------------------------------------- + + include 'header.h' + include 'mpinpb.h' + + integer i,j,n,ptr,c,kstart,ip,jp + integer error,send_id,buffer_size + +c--------------------------------------------------------------------- +c Send element 0 to previous processor +c--------------------------------------------------------------------- + kstart = 0 + ip = cell_coord(1,c)-1 + jp = cell_coord(2,c)-1 + buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE + ptr = 0 + do j=0,JMAX-1 + do i=0,IMAX-1 + do n=1,BLOCK_SIZE + in_buffer(ptr+n) = rhs(n,i,j,kstart,c) + enddo + ptr = ptr+BLOCK_SIZE + enddo + enddo + + call mpi_isend(in_buffer, buffer_size, + > dp_type, predecessor(3), + > TOP+ip+jp*NCELLS, comm_solve, + > send_id,error) + + return + end + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + + subroutine z_unpack_backsub_info(c) + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + +c--------------------------------------------------------------------- +c unpack U(ksize) for all i and j +c--------------------------------------------------------------------- + + include 'header.h' + + integer i,j,n,ptr,c + + ptr = 0 + do j=0,JMAX-1 + do i=0,IMAX-1 + do n=1,BLOCK_SIZE + backsub_info(n,i,j,c) = out_buffer(ptr+n) + enddo + ptr = ptr+BLOCK_SIZE + enddo + enddo + + return + end + + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + + subroutine z_receive_backsub_info(recv_id,c) + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + +c--------------------------------------------------------------------- +c post mpi receives +c--------------------------------------------------------------------- + + include 'header.h' + include 'mpinpb.h' + + integer error,recv_id,ip,jp,c,buffer_size + ip = cell_coord(1,c) - 1 + jp = cell_coord(2,c) - 1 + buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE + call mpi_irecv(out_buffer, buffer_size, + > dp_type, successor(3), + > TOP+ip+jp*NCELLS, comm_solve, + > recv_id, error) + + return + end + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + + subroutine z_receive_solve_info(recv_id,c) + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + +c--------------------------------------------------------------------- +c post mpi receives +c--------------------------------------------------------------------- + + include 'header.h' + include 'mpinpb.h' + + integer ip,jp,recv_id,error,c,buffer_size + ip = cell_coord(1,c) - 1 + jp = cell_coord(2,c) - 1 + buffer_size=MAX_CELL_DIM*MAX_CELL_DIM* + > (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE) + call mpi_irecv(out_buffer, buffer_size, + > dp_type, predecessor(3), + > BOTTOM+ip+jp*NCELLS, comm_solve, + > recv_id, error) + + return + end + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + + subroutine z_backsubstitute(first, last, c) + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + +c--------------------------------------------------------------------- +c back solve: if last cell, then generate U(ksize)=rhs(ksize) +c else assume U(ksize) is loaded in un pack backsub_info +c so just use it +c after call u(kstart) will be sent to next cell +c--------------------------------------------------------------------- + + include 'header.h' + + integer first, last, c, i, k + integer m,n,j,jsize,isize,ksize,kstart + + kstart = 0 + isize = cell_size(1,c)-end(1,c)-1 + jsize = cell_size(2,c)-end(2,c)-1 + ksize = cell_size(3,c)-1 + if (last .eq. 0) then + do j=start(2,c),jsize + do i=start(1,c),isize +c--------------------------------------------------------------------- +c U(jsize) uses info from previous cell if not last cell +c--------------------------------------------------------------------- + do m=1,BLOCK_SIZE + do n=1,BLOCK_SIZE + rhs(m,i,j,ksize,c) = rhs(m,i,j,ksize,c) + > - lhsc(m,n,i,j,ksize,c)* + > backsub_info(n,i,j,c) + enddo + enddo + enddo + enddo + endif + do k=ksize-1,kstart,-1 + do j=start(2,c),jsize + do i=start(1,c),isize + do m=1,BLOCK_SIZE + do n=1,BLOCK_SIZE + rhs(m,i,j,k,c) = rhs(m,i,j,k,c) + > - lhsc(m,n,i,j,k,c)*rhs(n,i,j,k+1,c) + enddo + enddo + enddo + enddo + enddo + + return + end + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + + subroutine z_solve_cell(first,last,c) + +c--------------------------------------------------------------------- +c--------------------------------------------------------------------- + +c--------------------------------------------------------------------- +c performs guaussian elimination on this cell. +c +c assumes that unpacking routines for non-first cells +c preload C' and rhs' from previous cell. +c +c assumed send happens outside this routine, but that +c c'(KMAX) and rhs'(KMAX) will be sent to next cell. +c--------------------------------------------------------------------- + + include 'header.h' + include 'work_lhs_vec.h' + + integer first,last,c + integer i,j,k,m,n,isize,ksize,jsize,kstart + + kstart = 0 + isize = cell_size(1,c)-end(1,c)-1 + jsize = cell_size(2,c)-end(2,c)-1 + ksize = cell_size(3,c)-1 + +c--------------------------------------------------------------------- +c zero the left hand side for starters +c set diagonal values to 1. This is overkill, but convenient +c--------------------------------------------------------------------- + do i = 0, isize + do m = 1, 5 + do n = 1, 5 + lhsa(m,n,i,0) = 0.0d0 + lhsb(m,n,i,0) = 0.0d0 + lhsa(m,n,i,ksize) = 0.0d0 + lhsb(m,n,i,ksize) = 0.0d0 + enddo + lhsb(m,m,i,0) = 1.0d0 + lhsb(m,m,i,ksize) = 1.0d0 + enddo + enddo + + do j=start(2,c),jsize + +c--------------------------------------------------------------------- +c This function computes the left hand side for the three z-factors +c--------------------------------------------------------------------- + +c--------------------------------------------------------------------- +c Compute the indices for storing the block-diagonal matrix; +c determine c (labeled f) and s jacobians for cell c +c--------------------------------------------------------------------- + + do k = start(3,c)-1, cell_size(3,c)-end(3,c) + do i=start(1,c),isize + + tmp1 = 1.0d0 / u(1,i,j,k,c) + tmp2 = tmp1 * tmp1 + tmp3 = tmp1 * tmp2 + + fjac(1,1,i,k) = 0.0d+00 + fjac(1,2,i,k) = 0.0d+00 + fjac(1,3,i,k) = 0.0d+00 + fjac(1,4,i,k) = 1.0d+00 + fjac(1,5,i,k) = 0.0d+00 + + fjac(2,1,i,k) = - ( u(2,i,j,k,c)*u(4,i,j,k,c) ) + > * tmp2 + fjac(2,2,i,k) = u(4,i,j,k,c) * tmp1 + fjac(2,3,i,k) = 0.0d+00 + fjac(2,4,i,k) = u(2,i,j,k,c) * tmp1 + fjac(2,5,i,k) = 0.0d+00 + + fjac(3,1,i,k) = - ( u(3,i,j,k,c)*u(4,i,j,k,c) ) + > * tmp2 + fjac(3,2,i,k) = 0.0d+00 + fjac(3,3,i,k) = u(4,i,j,k,c) * tmp1 + fjac(3,4,i,k) = u(3,i,j,k,c) * tmp1 + fjac(3,5,i,k) = 0.0d+00 + + fjac(4,1,i,k) = - (u(4,i,j,k,c)*u(4,i,j,k,c) * tmp2 ) + > + c2 * qs(i,j,k,c) + fjac(4,2,i,k) = - c2 * u(2,i,j,k,c) * tmp1 + fjac(4,3,i,k) = - c2 * u(3,i,j,k,c) * tmp1 + fjac(4,4,i,k) = ( 2.0d+00 - c2 ) + > * u(4,i,j,k,c) * tmp1 + fjac(4,5,i,k) = c2 + + fjac(5,1,i,k) = ( c2 * 2.0d0 * qs(i,j,k,c) + > - c1 * ( u(5,i,j,k,c) * tmp1 ) ) + > * ( u(4,i,j,k,c) * tmp1 ) + fjac(5,2,i,k) = - c2 * ( u(2,i,j,k,c)*u(4,i,j,k,c) ) + > * tmp2 + fjac(5,3,i,k) = - c2 * ( u(3,i,j,k,c)*u(4,i,j,k,c) ) + > * tmp2 + fjac(5,4,i,k) = c1 * ( u(5,i,j,k,c) * tmp1 ) + > - c2 * ( qs(i,j,k,c) + > + u(4,i,j,k,c)*u(4,i,j,k,c) * tmp2 ) + fjac(5,5,i,k) = c1 * u(4,i,j,k,c) * tmp1 + + njac(1,1,i,k) = 0.0d+00 + njac(1,2,i,k) = 0.0d+00 + njac(1,3,i,k) = 0.0d+00 + njac(1,4,i,k) = 0.0d+00 + njac(1,5,i,k) = 0.0d+00 + + njac(2,1,i,k) = - c3c4 * tmp2 * u(2,i,j,k,c) + njac(2,2,i,k) = c3c4 * tmp1 + njac(2,3,i,k) = 0.0d+00 + njac(2,4,i,k) = 0.0d+00 + njac(2,5,i,k) = 0.0d+00 + + njac(3,1,i,k) = - c3c4 * tmp2 * u(3,i,j,k,c) + njac(3,2,i,k) = 0.0d+00 + njac(3,3,i,k) = c3c4 * tmp1 + njac(3,4,i,k) = 0.0d+00 + njac(3,5,i,k) = 0.0d+00 + + njac(4,1,i,k) = - con43 * c3c4 * tmp2 * u(4,i,j,k,c) + njac(4,2,i,k) = 0.0d+00 + njac(4,3,i,k) = 0.0d+00 + njac(4,4,i,k) = con43 * c3 * c4 * tmp1 + njac(4,5,i,k) = 0.0d+00 + + njac(5,1,i,k) = - ( c3c4 + > - c1345 ) * tmp3 * (u(2,i,j,k,c)**2) + > - ( c3c4 - c1345 ) * tmp3 * (u(3,i,j,k,c)**2) + > - ( con43 * c3c4 + > - c1345 ) * tmp3 * (u(4,i,j,k,c)**2) + > - c1345 * tmp2 * u(5,i,j,k,c) + + njac(5,2,i,k) = ( c3c4 - c1345 ) * tmp2 * u(2,i,j,k,c) + njac(5,3,i,k) = ( c3c4 - c1345 ) * tmp2 * u(3,i,j,k,c) + njac(5,4,i,k) = ( con43 * c3c4 + > - c1345 ) * tmp2 * u(4,i,j,k,c) + njac(5,5,i,k) = ( c1345 )* tmp1 + + + enddo + enddo + +c--------------------------------------------------------------------- +c now joacobians set, so form left hand side in z direction +c--------------------------------------------------------------------- + do k = start(3,c), ksize-end(3,c) + do i=start(1,c),isize + + tmp1 = dt * tz1 + tmp2 = dt * tz2 + + lhsa(1,1,i,k) = - tmp2 * fjac(1,1,i,k-1) + > - tmp1 * njac(1,1,i,k-1) + > - tmp1 * dz1 + lhsa(1,2,i,k) = - tmp2 * fjac(1,2,i,k-1) + > - tmp1 * njac(1,2,i,k-1) + lhsa(1,3,i,k) = - tmp2 * fjac(1,3,i,k-1) + > - tmp1 * njac(1,3,i,k-1) + lhsa(1,4,i,k) = - tmp2 * fjac(1,4,i,k-1) + > - tmp1 * njac(1,4,i,k-1) + lhsa(1,5,i,k) = - tmp2 * fjac(1,5,i,k-1) + > - tmp1 * njac(1,5,i,k-1) + + lhsa(2,1,i,k) = - tmp2 * fjac(2,1,i,k-1) + > - tmp1 * njac(2,1,i,k-1) + lhsa(2,2,i,k) = - tmp2 * fjac(2,2,i,k-1) + > - tmp1 * njac(2,2,i,k-1) + > - tmp1 * dz2 + lhsa(2,3,i,k) = - tmp2 * fjac(2,3,i,k-1) + > - tmp1 * njac(2,3,i,k-1) + lhsa(2,4,i,k) = - tmp2 * fjac(2,4,i,k-1) + > - tmp1 * njac(2,4,i,k-1) + lhsa(2,5,i,k) = - tmp2 * fjac(2,5,i,k-1) + > - tmp1 * njac(2,5,i,k-1) + + lhsa(3,1,i,k) = - tmp2 * fjac(3,1,i,k-1) + > - tmp1 * njac(3,1,i,k-1) + lhsa(3,2,i,k) = - tmp2 * fjac(3,2,i,k-1) + > - tmp1 * njac(3,2,i,k-1) + lhsa(3,3,i,k) = - tmp2 * fjac(3,3,i,k-1) + > - tmp1 * njac(3,3,i,k-1) + > - tmp1 * dz3 + lhsa(3,4,i,k) = - tmp2 * fjac(3,4,i,k-1) + > - tmp1 * njac(3,4,i,k-1) + lhsa(3,5,i,k) = - tmp2 * fjac(3,5,i,k-1) + > - tmp1 * njac(3,5,i,k-1) + + lhsa(4,1,i,k) = - tmp2 * fjac(4,1,i,k-1) + > - tmp1 * njac(4,1,i,k-1) + lhsa(4,2,i,k) = - tmp2 * fjac(4,2,i,k-1) + > - tmp1 * njac(4,2,i,k-1) + lhsa(4,3,i,k) = - tmp2 * fjac(4,3,i,k-1) + > - tmp1 * njac(4,3,i,k-1) + lhsa(4,4,i,k) = - tmp2 * fjac(4,4,i,k-1) + > - tmp1 * njac(4,4,i,k-1) + > - tmp1 * dz4 + lhsa(4,5,i,k) = - tmp2 * fjac(4,5,i,k-1) + > - tmp1 * njac(4,5,i,k-1) + + lhsa(5,1,i,k) = - tmp2 * fjac(5,1,i,k-1) + > - tmp1 * njac(5,1,i,k-1) + lhsa(5,2,i,k) = - tmp2 * fjac(5,2,i,k-1) + > - tmp1 * njac(5,2,i,k-1) + lhsa(5,3,i,k) = - tmp2 * fjac(5,3,i,k-1) + > - tmp1 * njac(5,3,i,k-1) + lhsa(5,4,i,k) = - tmp2 * fjac(5,4,i,k-1) + > - tmp1 * njac(5,4,i,k-1) + lhsa(5,5,i,k) = - tmp2 * fjac(5,5,i,k-1) + > - tmp1 * njac(5,5,i,k-1) + > - tmp1 * dz5 + + lhsb(1,1,i,k) = 1.0d+00 + > + tmp1 * 2.0d+00 * njac(1,1,i,k) + > + tmp1 * 2.0d+00 * dz1 + lhsb(1,2,i,k) = tmp1 * 2.0d+00 * njac(1,2,i,k) + lhsb(1,3,i,k) = tmp1 * 2.0d+00 * njac(1,3,i,k) + lhsb(1,4,i,k) = tmp1 * 2.0d+00 * njac(1,4,i,k) + lhsb(1,5,i,k) = tmp1 * 2.0d+00 * njac(1,5,i,k) + + lhsb(2,1,i,k) = tmp1 * 2.0d+00 * njac(2,1,i,k) + lhsb(2,2,i,k) = 1.0d+00 + > + tmp1 * 2.0d+00 * njac(2,2,i,k) + > + tmp1 * 2.0d+00 * dz2 + lhsb(2,3,i,k) = tmp1 * 2.0d+00 * njac(2,3,i,k) + lhsb(2,4,i,k) = tmp1 * 2.0d+00 * njac(2,4,i,k) + lhsb(2,5,i,k) = tmp1 * 2.0d+00 * njac(2,5,i,k) + + lhsb(3,1,i,k) = tmp1 * 2.0d+00 * njac(3,1,i,k) + lhsb(3,2,i,k) = tmp1 * 2.0d+00 * njac(3,2,i,k) + lhsb(3,3,i,k) = 1.0d+00 + > + tmp1 * 2.0d+00 * njac(3,3,i,k) + > + tmp1 * 2.0d+00 * dz3 + lhsb(3,4,i,k) = tmp1 * 2.0d+00 * njac(3,4,i,k) + lhsb(3,5,i,k) = tmp1 * 2.0d+00 * njac(3,5,i,k) + + lhsb(4,1,i,k) = tmp1 * 2.0d+00 * njac(4,1,i,k) + lhsb(4,2,i,k) = tmp1 * 2.0d+00 * njac(4,2,i,k) + lhsb(4,3,i,k) = tmp1 * 2.0d+00 * njac(4,3,i,k) + lhsb(4,4,i,k) = 1.0d+00 + > + tmp1 * 2.0d+00 * njac(4,4,i,k) + > + tmp1 * 2.0d+00 * dz4 + lhsb(4,5,i,k) = tmp1 * 2.0d+00 * njac(4,5,i,k) + + lhsb(5,1,i,k) = tmp1 * 2.0d+00 * njac(5,1,i,k) + lhsb(5,2,i,k) = tmp1 * 2.0d+00 * njac(5,2,i,k) + lhsb(5,3,i,k) = tmp1 * 2.0d+00 * njac(5,3,i,k) + lhsb(5,4,i,k) = tmp1 * 2.0d+00 * njac(5,4,i,k) + lhsb(5,5,i,k) = 1.0d+00 + > + tmp1 * 2.0d+00 * njac(5,5,i,k) + > + tmp1 * 2.0d+00 * dz5 + + lhsc(1,1,i,j,k,c) = tmp2 * fjac(1,1,i,k+1) + > - tmp1 * njac(1,1,i,k+1) + > - tmp1 * dz1 + lhsc(1,2,i,j,k,c) = tmp2 * fjac(1,2,i,k+1) + > - tmp1 * njac(1,2,i,k+1) + lhsc(1,3,i,j,k,c) = tmp2 * fjac(1,3,i,k+1) + > - tmp1 * njac(1,3,i,k+1) + lhsc(1,4,i,j,k,c) = tmp2 * fjac(1,4,i,k+1) + > - tmp1 * njac(1,4,i,k+1) + lhsc(1,5,i,j,k,c) = tmp2 * fjac(1,5,i,k+1) + > - tmp1 * njac(1,5,i,k+1) + + lhsc(2,1,i,j,k,c) = tmp2 * fjac(2,1,i,k+1) + > - tmp1 * njac(2,1,i,k+1) + lhsc(2,2,i,j,k,c) = tmp2 * fjac(2,2,i,k+1) + > - tmp1 * njac(2,2,i,k+1) + > - tmp1 * dz2 + lhsc(2,3,i,j,k,c) = tmp2 * fjac(2,3,i,k+1) + > - tmp1 * njac(2,3,i,k+1) + lhsc(2,4,i,j,k,c) = tmp2 * fjac(2,4,i,k+1) + > - tmp1 * njac(2,4,i,k+1) + lhsc(2,5,i,j,k,c) = tmp2 * fjac(2,5,i,k+1) + > - tmp1 * njac(2,5,i,k+1) + + lhsc(3,1,i,j,k,c) = tmp2 * fjac(3,1,i,k+1) + > - tmp1 * njac(3,1,i,k+1) + lhsc(3,2,i,j,k,c) = tmp2 * fjac(3,2,i,k+1) + > - tmp1 * njac(3,2,i,k+1) + lhsc(3,3,i,j,k,c) = tmp2 * fjac(3,3,i,k+1) + > - tmp1 * njac(3,3,i,k+1) + > - tmp1 * dz3 + lhsc(3,4,i,j,k,c) = tmp2 * fjac(3,4,i,k+1) + > - tmp1 * njac(3,4,i,k+1) + lhsc(3,5,i,j,k,c) = tmp2 * fjac(3,5,i,k+1) + > - tmp1 * njac(3,5,i,k+1) + + lhsc(4,1,i,j,k,c) = tmp2 * fjac(4,1,i,k+1) + > - tmp1 * njac(4,1,i,k+1) + lhsc(4,2,i,j,k,c) = tmp2 * fjac(4,2,i,k+1) + > - tmp1 * njac(4,2,i,k+1) + lhsc(4,3,i,j,k,c) = tmp2 * fjac(4,3,i,k+1) + > - tmp1 * njac(4,3,i,k+1) + lhsc(4,4,i,j,k,c) = tmp2 * fjac(4,4,i,k+1) + > - tmp1 * njac(4,4,i,k+1) + > - tmp1 * dz4 + lhsc(4,5,i,j,k,c) = tmp2 * fjac(4,5,i,k+1) + > - tmp1 * njac(4,5,i,k+1) + + lhsc(5,1,i,j,k,c) = tmp2 * fjac(5,1,i,k+1) + > - tmp1 * njac(5,1,i,k+1) + lhsc(5,2,i,j,k,c) = tmp2 * fjac(5,2,i,k+1) + > - tmp1 * njac(5,2,i,k+1) + lhsc(5,3,i,j,k,c) = tmp2 * fjac(5,3,i,k+1) + > - tmp1 * njac(5,3,i,k+1) + lhsc(5,4,i,j,k,c) = tmp2 * fjac(5,4,i,k+1) + > - tmp1 * njac(5,4,i,k+1) + lhsc(5,5,i,j,k,c) = tmp2 * fjac(5,5,i,k+1) + > - tmp1 * njac(5,5,i,k+1) + > - tmp1 * dz5 + + enddo + enddo + + +c--------------------------------------------------------------------- +c outer most do loops - sweeping in i direction +c--------------------------------------------------------------------- + if (first .eq. 1) then + +c--------------------------------------------------------------------- +c multiply c(i,j,kstart) by b_inverse and copy back to c +c multiply rhs(kstart) by b_inverse(kstart) and copy to rhs +c--------------------------------------------------------------------- +!dir$ ivdep + do i=start(1,c),isize + call binvcrhs( lhsb(1,1,i,kstart), + > lhsc(1,1,i,j,kstart,c), + > rhs(1,i,j,kstart,c) ) + enddo + + endif + +c--------------------------------------------------------------------- +c begin inner most do loop +c do all the elements of the cell unless last +c--------------------------------------------------------------------- + do k=kstart+first,ksize-last +!dir$ ivdep + do i=start(1,c),isize + +c--------------------------------------------------------------------- +c subtract A*lhs_vector(k-1) from lhs_vector(k) +c +c rhs(k) = rhs(k) - A*rhs(k-1) +c--------------------------------------------------------------------- + call matvec_sub(lhsa(1,1,i,k), + > rhs(1,i,j,k-1,c),rhs(1,i,j,k,c)) + +c--------------------------------------------------------------------- +c B(k) = B(k) - C(k-1)*A(k) +c call matmul_sub(aa,i,j,k,c,cc,i,j,k-1,c,bb,i,j,k,c) +c--------------------------------------------------------------------- + call matmul_sub(lhsa(1,1,i,k), + > lhsc(1,1,i,j,k-1,c), + > lhsb(1,1,i,k)) + +c--------------------------------------------------------------------- +c multiply c(i,j,k) by b_inverse and copy back to c +c multiply rhs(i,j,1) by b_inverse(i,j,1) and copy to rhs +c--------------------------------------------------------------------- + call binvcrhs( lhsb(1,1,i,k), + > lhsc(1,1,i,j,k,c), + > rhs(1,i,j,k,c) ) + + enddo + enddo + +c--------------------------------------------------------------------- +c Now finish up special cases for last cell +c--------------------------------------------------------------------- + if (last .eq. 1) then + +!dir$ ivdep + do i=start(1,c),isize +c--------------------------------------------------------------------- +c rhs(ksize) = rhs(ksize) - A*rhs(ksize-1) +c--------------------------------------------------------------------- + call matvec_sub(lhsa(1,1,i,ksize), + > rhs(1,i,j,ksize-1,c),rhs(1,i,j,ksize,c)) + +c--------------------------------------------------------------------- +c B(ksize) = B(ksize) - C(ksize-1)*A(ksize) +c call matmul_sub(aa,i,j,ksize,c, +c $ cc,i,j,ksize-1,c,bb,i,j,ksize,c) +c--------------------------------------------------------------------- + call matmul_sub(lhsa(1,1,i,ksize), + > lhsc(1,1,i,j,ksize-1,c), + > lhsb(1,1,i,ksize)) + +c--------------------------------------------------------------------- +c multiply rhs(ksize) by b_inverse(ksize) and copy to rhs +c--------------------------------------------------------------------- + call binvrhs( lhsb(1,1,i,ksize), + > rhs(1,i,j,ksize,c) ) + enddo + + endif + enddo + + + return + end + + + + + +