Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Improve error message
[simgrid.git] / examples / smpi / NAS / BT / z_solve.f
1 c---------------------------------------------------------------------
2 c---------------------------------------------------------------------
3
4       subroutine z_solve
5
6 c---------------------------------------------------------------------
7 c---------------------------------------------------------------------
8
9 c---------------------------------------------------------------------
10 c     Performs line solves in Z direction by first factoring
11 c     the block-tridiagonal matrix into an upper triangular matrix, 
12 c     and then performing back substitution to solve for the unknow
13 c     vectors of each line.  
14 c     
15 c     Make sure we treat elements zero to cell_size in the direction
16 c     of the sweep.
17 c---------------------------------------------------------------------
18
19       include 'header.h'
20       include 'mpinpb.h'
21
22       integer c, kstart, stage,
23      >     first, last, recv_id, error, r_status(MPI_STATUS_SIZE),
24      >     isize,jsize,ksize,send_id
25
26       kstart = 0
27
28 c---------------------------------------------------------------------
29 c     in our terminology stage is the number of the cell in the y-direction
30 c     i.e. stage = 1 means the start of the line stage=ncells means end
31 c---------------------------------------------------------------------
32       do stage = 1,ncells
33          c = slice(3,stage)
34          isize = cell_size(1,c) - 1
35          jsize = cell_size(2,c) - 1
36          ksize = cell_size(3,c) - 1
37 c---------------------------------------------------------------------
38 c     set last-cell flag
39 c---------------------------------------------------------------------
40          if (stage .eq. ncells) then
41             last = 1
42          else
43             last = 0
44          endif
45
46          if (stage .eq. 1) then
47 c---------------------------------------------------------------------
48 c     This is the first cell, so solve without receiving data
49 c---------------------------------------------------------------------
50             first = 1
51 c            call lhsz(c)
52             call z_solve_cell(first,last,c)
53          else
54 c---------------------------------------------------------------------
55 c     Not the first cell of this line, so receive info from
56 c     processor working on preceeding cell
57 c---------------------------------------------------------------------
58             first = 0
59             call z_receive_solve_info(recv_id,c)
60 c---------------------------------------------------------------------
61 c     overlap computations and communications
62 c---------------------------------------------------------------------
63 c            call lhsz(c)
64 c---------------------------------------------------------------------
65 c     wait for completion
66 c---------------------------------------------------------------------
67             call mpi_wait(send_id,r_status,error)
68             call mpi_wait(recv_id,r_status,error)
69 c---------------------------------------------------------------------
70 c     install C'(kstart+1) and rhs'(kstart+1) to be used in this cell
71 c---------------------------------------------------------------------
72             call z_unpack_solve_info(c)
73             call z_solve_cell(first,last,c)
74          endif
75
76          if (last .eq. 0) call z_send_solve_info(send_id,c)
77       enddo
78
79 c---------------------------------------------------------------------
80 c     now perform backsubstitution in reverse direction
81 c---------------------------------------------------------------------
82       do stage = ncells, 1, -1
83          c = slice(3,stage)
84          first = 0
85          last = 0
86          if (stage .eq. 1) first = 1
87          if (stage .eq. ncells) then
88             last = 1
89 c---------------------------------------------------------------------
90 c     last cell, so perform back substitute without waiting
91 c---------------------------------------------------------------------
92             call z_backsubstitute(first, last,c)
93          else
94             call z_receive_backsub_info(recv_id,c)
95             call mpi_wait(send_id,r_status,error)
96             call mpi_wait(recv_id,r_status,error)
97             call z_unpack_backsub_info(c)
98             call z_backsubstitute(first,last,c)
99          endif
100          if (first .eq. 0) call z_send_backsub_info(send_id,c)
101       enddo
102
103
104       return
105       end
106       
107 c---------------------------------------------------------------------
108 c---------------------------------------------------------------------
109       
110       subroutine z_unpack_solve_info(c)
111 c---------------------------------------------------------------------
112 c---------------------------------------------------------------------
113
114 c---------------------------------------------------------------------
115 c     unpack C'(-1) and rhs'(-1) for
116 c     all i and j
117 c---------------------------------------------------------------------
118
119       include 'header.h'
120
121       integer i,j,m,n,ptr,c,kstart 
122
123       kstart = 0
124       ptr = 0
125       do j=0,JMAX-1
126          do i=0,IMAX-1
127             do m=1,BLOCK_SIZE
128                do n=1,BLOCK_SIZE
129                   lhsc(m,n,i,j,kstart-1,c) = out_buffer(ptr+n)
130                enddo
131                ptr = ptr+BLOCK_SIZE
132             enddo
133             do n=1,BLOCK_SIZE
134                rhs(n,i,j,kstart-1,c) = out_buffer(ptr+n)
135             enddo
136             ptr = ptr+BLOCK_SIZE
137          enddo
138       enddo
139
140       return
141       end
142
143 c---------------------------------------------------------------------
144 c---------------------------------------------------------------------
145       
146       subroutine z_send_solve_info(send_id,c)
147
148 c---------------------------------------------------------------------
149 c---------------------------------------------------------------------
150
151 c---------------------------------------------------------------------
152 c     pack up and send C'(kend) and rhs'(kend) for
153 c     all i and j
154 c---------------------------------------------------------------------
155
156       include 'header.h'
157       include 'mpinpb.h'
158
159       integer i,j,m,n,ksize,ptr,c,ip,jp
160       integer error,send_id,buffer_size
161
162       ksize = cell_size(3,c)-1
163       ip = cell_coord(1,c) - 1
164       jp = cell_coord(2,c) - 1
165       buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
166      >     (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE)
167
168 c---------------------------------------------------------------------
169 c     pack up buffer
170 c---------------------------------------------------------------------
171       ptr = 0
172       do j=0,JMAX-1
173          do i=0,IMAX-1
174             do m=1,BLOCK_SIZE
175                do n=1,BLOCK_SIZE
176                   in_buffer(ptr+n) = lhsc(m,n,i,j,ksize,c)
177                enddo
178                ptr = ptr+BLOCK_SIZE
179             enddo
180             do n=1,BLOCK_SIZE
181                in_buffer(ptr+n) = rhs(n,i,j,ksize,c)
182             enddo
183             ptr = ptr+BLOCK_SIZE
184          enddo
185       enddo
186
187 c---------------------------------------------------------------------
188 c     send buffer 
189 c---------------------------------------------------------------------
190       call mpi_isend(in_buffer, buffer_size,
191      >     dp_type, successor(3),
192      >     BOTTOM+ip+jp*NCELLS, comm_solve,
193      >     send_id,error)
194
195       return
196       end
197
198 c---------------------------------------------------------------------
199 c---------------------------------------------------------------------
200
201       subroutine z_send_backsub_info(send_id,c)
202
203 c---------------------------------------------------------------------
204 c---------------------------------------------------------------------
205
206 c---------------------------------------------------------------------
207 c     pack up and send U(jstart) for all i and j
208 c---------------------------------------------------------------------
209
210       include 'header.h'
211       include 'mpinpb.h'
212
213       integer i,j,n,ptr,c,kstart,ip,jp
214       integer error,send_id,buffer_size
215
216 c---------------------------------------------------------------------
217 c     Send element 0 to previous processor
218 c---------------------------------------------------------------------
219       kstart = 0
220       ip = cell_coord(1,c)-1
221       jp = cell_coord(2,c)-1
222       buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE
223       ptr = 0
224       do j=0,JMAX-1
225          do i=0,IMAX-1
226             do n=1,BLOCK_SIZE
227                in_buffer(ptr+n) = rhs(n,i,j,kstart,c)
228             enddo
229             ptr = ptr+BLOCK_SIZE
230          enddo
231       enddo
232
233       call mpi_isend(in_buffer, buffer_size,
234      >     dp_type, predecessor(3), 
235      >     TOP+ip+jp*NCELLS, comm_solve, 
236      >     send_id,error)
237
238       return
239       end
240
241 c---------------------------------------------------------------------
242 c---------------------------------------------------------------------
243
244       subroutine z_unpack_backsub_info(c)
245
246 c---------------------------------------------------------------------
247 c---------------------------------------------------------------------
248
249 c---------------------------------------------------------------------
250 c     unpack U(ksize) for all i and j
251 c---------------------------------------------------------------------
252
253       include 'header.h'
254
255       integer i,j,n,ptr,c
256
257       ptr = 0
258       do j=0,JMAX-1
259          do i=0,IMAX-1
260             do n=1,BLOCK_SIZE
261                backsub_info(n,i,j,c) = out_buffer(ptr+n)
262             enddo
263             ptr = ptr+BLOCK_SIZE
264          enddo
265       enddo
266
267       return
268       end
269
270
271 c---------------------------------------------------------------------
272 c---------------------------------------------------------------------
273
274       subroutine z_receive_backsub_info(recv_id,c)
275
276 c---------------------------------------------------------------------
277 c---------------------------------------------------------------------
278
279 c---------------------------------------------------------------------
280 c     post mpi receives
281 c---------------------------------------------------------------------
282
283       include 'header.h'
284       include 'mpinpb.h'
285
286       integer error,recv_id,ip,jp,c,buffer_size
287       ip = cell_coord(1,c) - 1
288       jp = cell_coord(2,c) - 1
289       buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE
290       call mpi_irecv(out_buffer, buffer_size,
291      >     dp_type, successor(3), 
292      >     TOP+ip+jp*NCELLS, comm_solve, 
293      >     recv_id, error)
294
295       return
296       end
297
298 c---------------------------------------------------------------------
299 c---------------------------------------------------------------------
300
301       subroutine z_receive_solve_info(recv_id,c)
302
303 c---------------------------------------------------------------------
304 c---------------------------------------------------------------------
305
306 c---------------------------------------------------------------------
307 c     post mpi receives 
308 c---------------------------------------------------------------------
309
310       include 'header.h'
311       include 'mpinpb.h'
312
313       integer ip,jp,recv_id,error,c,buffer_size
314       ip = cell_coord(1,c) - 1
315       jp = cell_coord(2,c) - 1
316       buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
317      >     (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE)
318       call mpi_irecv(out_buffer, buffer_size,
319      >     dp_type, predecessor(3), 
320      >     BOTTOM+ip+jp*NCELLS, comm_solve,
321      >     recv_id, error)
322
323       return
324       end
325       
326 c---------------------------------------------------------------------
327 c---------------------------------------------------------------------
328
329       subroutine z_backsubstitute(first, last, c)
330
331 c---------------------------------------------------------------------
332 c---------------------------------------------------------------------
333
334 c---------------------------------------------------------------------
335 c     back solve: if last cell, then generate U(ksize)=rhs(ksize)
336 c     else assume U(ksize) is loaded in un pack backsub_info
337 c     so just use it
338 c     after call u(kstart) will be sent to next cell
339 c---------------------------------------------------------------------
340
341       include 'header.h'
342
343       integer first, last, c, i, k
344       integer m,n,j,jsize,isize,ksize,kstart
345       
346       kstart = 0
347       isize = cell_size(1,c)-end(1,c)-1      
348       jsize = cell_size(2,c)-end(2,c)-1
349       ksize = cell_size(3,c)-1
350       if (last .eq. 0) then
351          do j=start(2,c),jsize
352             do i=start(1,c),isize
353 c---------------------------------------------------------------------
354 c     U(jsize) uses info from previous cell if not last cell
355 c---------------------------------------------------------------------
356                do m=1,BLOCK_SIZE
357                   do n=1,BLOCK_SIZE
358                      rhs(m,i,j,ksize,c) = rhs(m,i,j,ksize,c) 
359      >                    - lhsc(m,n,i,j,ksize,c)*
360      >                    backsub_info(n,i,j,c)
361                   enddo
362                enddo
363             enddo
364          enddo
365       endif
366       do k=ksize-1,kstart,-1
367          do j=start(2,c),jsize
368             do i=start(1,c),isize
369                do m=1,BLOCK_SIZE
370                   do n=1,BLOCK_SIZE
371                      rhs(m,i,j,k,c) = rhs(m,i,j,k,c) 
372      >                    - lhsc(m,n,i,j,k,c)*rhs(n,i,j,k+1,c)
373                   enddo
374                enddo
375             enddo
376          enddo
377       enddo
378
379       return
380       end
381
382 c---------------------------------------------------------------------
383 c---------------------------------------------------------------------
384
385       subroutine z_solve_cell(first,last,c)
386
387 c---------------------------------------------------------------------
388 c---------------------------------------------------------------------
389
390 c---------------------------------------------------------------------
391 c     performs guaussian elimination on this cell.
392 c     
393 c     assumes that unpacking routines for non-first cells 
394 c     preload C' and rhs' from previous cell.
395 c     
396 c     assumed send happens outside this routine, but that
397 c     c'(KMAX) and rhs'(KMAX) will be sent to next cell.
398 c---------------------------------------------------------------------
399
400       include 'header.h'
401       include 'work_lhs.h'
402
403       integer first,last,c
404       integer i,j,k,isize,ksize,jsize,kstart
405       double precision utmp(6,-2:KMAX+1)
406
407       kstart = 0
408       isize = cell_size(1,c)-end(1,c)-1
409       jsize = cell_size(2,c)-end(2,c)-1
410       ksize = cell_size(3,c)-1
411
412       call lhsabinit(lhsa, lhsb, ksize)
413
414       do j=start(2,c),jsize 
415          do i=start(1,c),isize
416
417 c---------------------------------------------------------------------
418 c     This function computes the left hand side for the three z-factors   
419 c---------------------------------------------------------------------
420
421 c---------------------------------------------------------------------
422 c     Compute the indices for storing the block-diagonal matrix;
423 c     determine c (labeled f) and s jacobians for cell c
424 c---------------------------------------------------------------------
425             do k = start(3,c)-1, cell_size(3,c)-end(3,c)
426                utmp(1,k) = 1.0d0 / u(1,i,j,k,c)
427                utmp(2,k) = u(2,i,j,k,c)
428                utmp(3,k) = u(3,i,j,k,c)
429                utmp(4,k) = u(4,i,j,k,c)
430                utmp(5,k) = u(5,i,j,k,c)
431                utmp(6,k) = qs(i,j,k,c)
432             end do
433
434             do k = start(3,c)-1, cell_size(3,c)-end(3,c)
435
436                tmp1 = utmp(1,k)
437                tmp2 = tmp1 * tmp1
438                tmp3 = tmp1 * tmp2
439
440                fjac(1,1,k) = 0.0d+00
441                fjac(1,2,k) = 0.0d+00
442                fjac(1,3,k) = 0.0d+00
443                fjac(1,4,k) = 1.0d+00
444                fjac(1,5,k) = 0.0d+00
445
446                fjac(2,1,k) = - ( utmp(2,k)*utmp(4,k) ) 
447      >              * tmp2 
448                fjac(2,2,k) = utmp(4,k) * tmp1
449                fjac(2,3,k) = 0.0d+00
450                fjac(2,4,k) = utmp(2,k) * tmp1
451                fjac(2,5,k) = 0.0d+00
452
453                fjac(3,1,k) = - ( utmp(3,k)*utmp(4,k) )
454      >              * tmp2 
455                fjac(3,2,k) = 0.0d+00
456                fjac(3,3,k) = utmp(4,k) * tmp1
457                fjac(3,4,k) = utmp(3,k) * tmp1
458                fjac(3,5,k) = 0.0d+00
459
460                fjac(4,1,k) = - (utmp(4,k)*utmp(4,k) * tmp2 ) 
461      >              + c2 * utmp(6,k)
462                fjac(4,2,k) = - c2 *  utmp(2,k) * tmp1 
463                fjac(4,3,k) = - c2 *  utmp(3,k) * tmp1
464                fjac(4,4,k) = ( 2.0d+00 - c2 )
465      >              *  utmp(4,k) * tmp1 
466                fjac(4,5,k) = c2
467
468                fjac(5,1,k) = ( c2 * 2.0d0 * utmp(6,k)
469      >              - c1 * ( utmp(5,k) * tmp1 ) )
470      >              * ( utmp(4,k) * tmp1 )
471                fjac(5,2,k) = - c2 * ( utmp(2,k)*utmp(4,k) )
472      >              * tmp2 
473                fjac(5,3,k) = - c2 * ( utmp(3,k)*utmp(4,k) )
474      >              * tmp2
475                fjac(5,4,k) = c1 * ( utmp(5,k) * tmp1 )
476      >              - c2 * ( utmp(6,k)
477      >              + utmp(4,k)*utmp(4,k) * tmp2 )
478                fjac(5,5,k) = c1 * utmp(4,k) * tmp1
479
480                njac(1,1,k) = 0.0d+00
481                njac(1,2,k) = 0.0d+00
482                njac(1,3,k) = 0.0d+00
483                njac(1,4,k) = 0.0d+00
484                njac(1,5,k) = 0.0d+00
485
486                njac(2,1,k) = - c3c4 * tmp2 * utmp(2,k)
487                njac(2,2,k) =   c3c4 * tmp1
488                njac(2,3,k) =   0.0d+00
489                njac(2,4,k) =   0.0d+00
490                njac(2,5,k) =   0.0d+00
491
492                njac(3,1,k) = - c3c4 * tmp2 * utmp(3,k)
493                njac(3,2,k) =   0.0d+00
494                njac(3,3,k) =   c3c4 * tmp1
495                njac(3,4,k) =   0.0d+00
496                njac(3,5,k) =   0.0d+00
497
498                njac(4,1,k) = - con43 * c3c4 * tmp2 * utmp(4,k)
499                njac(4,2,k) =   0.0d+00
500                njac(4,3,k) =   0.0d+00
501                njac(4,4,k) =   con43 * c3 * c4 * tmp1
502                njac(4,5,k) =   0.0d+00
503
504                njac(5,1,k) = - (  c3c4
505      >              - c1345 ) * tmp3 * (utmp(2,k)**2)
506      >              - ( c3c4 - c1345 ) * tmp3 * (utmp(3,k)**2)
507      >              - ( con43 * c3c4
508      >              - c1345 ) * tmp3 * (utmp(4,k)**2)
509      >              - c1345 * tmp2 * utmp(5,k)
510
511                njac(5,2,k) = (  c3c4 - c1345 ) * tmp2 * utmp(2,k)
512                njac(5,3,k) = (  c3c4 - c1345 ) * tmp2 * utmp(3,k)
513                njac(5,4,k) = ( con43 * c3c4
514      >              - c1345 ) * tmp2 * utmp(4,k)
515                njac(5,5,k) = ( c1345 )* tmp1
516
517
518             enddo
519
520 c---------------------------------------------------------------------
521 c     now joacobians set, so form left hand side in z direction
522 c---------------------------------------------------------------------
523             do k = start(3,c), ksize-end(3,c)
524
525                tmp1 = dt * tz1
526                tmp2 = dt * tz2
527
528                lhsa(1,1,k) = - tmp2 * fjac(1,1,k-1)
529      >              - tmp1 * njac(1,1,k-1)
530      >              - tmp1 * dz1 
531                lhsa(1,2,k) = - tmp2 * fjac(1,2,k-1)
532      >              - tmp1 * njac(1,2,k-1)
533                lhsa(1,3,k) = - tmp2 * fjac(1,3,k-1)
534      >              - tmp1 * njac(1,3,k-1)
535                lhsa(1,4,k) = - tmp2 * fjac(1,4,k-1)
536      >              - tmp1 * njac(1,4,k-1)
537                lhsa(1,5,k) = - tmp2 * fjac(1,5,k-1)
538      >              - tmp1 * njac(1,5,k-1)
539
540                lhsa(2,1,k) = - tmp2 * fjac(2,1,k-1)
541      >              - tmp1 * njac(2,1,k-1)
542                lhsa(2,2,k) = - tmp2 * fjac(2,2,k-1)
543      >              - tmp1 * njac(2,2,k-1)
544      >              - tmp1 * dz2
545                lhsa(2,3,k) = - tmp2 * fjac(2,3,k-1)
546      >              - tmp1 * njac(2,3,k-1)
547                lhsa(2,4,k) = - tmp2 * fjac(2,4,k-1)
548      >              - tmp1 * njac(2,4,k-1)
549                lhsa(2,5,k) = - tmp2 * fjac(2,5,k-1)
550      >              - tmp1 * njac(2,5,k-1)
551
552                lhsa(3,1,k) = - tmp2 * fjac(3,1,k-1)
553      >              - tmp1 * njac(3,1,k-1)
554                lhsa(3,2,k) = - tmp2 * fjac(3,2,k-1)
555      >              - tmp1 * njac(3,2,k-1)
556                lhsa(3,3,k) = - tmp2 * fjac(3,3,k-1)
557      >              - tmp1 * njac(3,3,k-1)
558      >              - tmp1 * dz3 
559                lhsa(3,4,k) = - tmp2 * fjac(3,4,k-1)
560      >              - tmp1 * njac(3,4,k-1)
561                lhsa(3,5,k) = - tmp2 * fjac(3,5,k-1)
562      >              - tmp1 * njac(3,5,k-1)
563
564                lhsa(4,1,k) = - tmp2 * fjac(4,1,k-1)
565      >              - tmp1 * njac(4,1,k-1)
566                lhsa(4,2,k) = - tmp2 * fjac(4,2,k-1)
567      >              - tmp1 * njac(4,2,k-1)
568                lhsa(4,3,k) = - tmp2 * fjac(4,3,k-1)
569      >              - tmp1 * njac(4,3,k-1)
570                lhsa(4,4,k) = - tmp2 * fjac(4,4,k-1)
571      >              - tmp1 * njac(4,4,k-1)
572      >              - tmp1 * dz4
573                lhsa(4,5,k) = - tmp2 * fjac(4,5,k-1)
574      >              - tmp1 * njac(4,5,k-1)
575
576                lhsa(5,1,k) = - tmp2 * fjac(5,1,k-1)
577      >              - tmp1 * njac(5,1,k-1)
578                lhsa(5,2,k) = - tmp2 * fjac(5,2,k-1)
579      >              - tmp1 * njac(5,2,k-1)
580                lhsa(5,3,k) = - tmp2 * fjac(5,3,k-1)
581      >              - tmp1 * njac(5,3,k-1)
582                lhsa(5,4,k) = - tmp2 * fjac(5,4,k-1)
583      >              - tmp1 * njac(5,4,k-1)
584                lhsa(5,5,k) = - tmp2 * fjac(5,5,k-1)
585      >              - tmp1 * njac(5,5,k-1)
586      >              - tmp1 * dz5
587
588                lhsb(1,1,k) = 1.0d+00
589      >              + tmp1 * 2.0d+00 * njac(1,1,k)
590      >              + tmp1 * 2.0d+00 * dz1
591                lhsb(1,2,k) = tmp1 * 2.0d+00 * njac(1,2,k)
592                lhsb(1,3,k) = tmp1 * 2.0d+00 * njac(1,3,k)
593                lhsb(1,4,k) = tmp1 * 2.0d+00 * njac(1,4,k)
594                lhsb(1,5,k) = tmp1 * 2.0d+00 * njac(1,5,k)
595
596                lhsb(2,1,k) = tmp1 * 2.0d+00 * njac(2,1,k)
597                lhsb(2,2,k) = 1.0d+00
598      >              + tmp1 * 2.0d+00 * njac(2,2,k)
599      >              + tmp1 * 2.0d+00 * dz2
600                lhsb(2,3,k) = tmp1 * 2.0d+00 * njac(2,3,k)
601                lhsb(2,4,k) = tmp1 * 2.0d+00 * njac(2,4,k)
602                lhsb(2,5,k) = tmp1 * 2.0d+00 * njac(2,5,k)
603
604                lhsb(3,1,k) = tmp1 * 2.0d+00 * njac(3,1,k)
605                lhsb(3,2,k) = tmp1 * 2.0d+00 * njac(3,2,k)
606                lhsb(3,3,k) = 1.0d+00
607      >              + tmp1 * 2.0d+00 * njac(3,3,k)
608      >              + tmp1 * 2.0d+00 * dz3
609                lhsb(3,4,k) = tmp1 * 2.0d+00 * njac(3,4,k)
610                lhsb(3,5,k) = tmp1 * 2.0d+00 * njac(3,5,k)
611
612                lhsb(4,1,k) = tmp1 * 2.0d+00 * njac(4,1,k)
613                lhsb(4,2,k) = tmp1 * 2.0d+00 * njac(4,2,k)
614                lhsb(4,3,k) = tmp1 * 2.0d+00 * njac(4,3,k)
615                lhsb(4,4,k) = 1.0d+00
616      >              + tmp1 * 2.0d+00 * njac(4,4,k)
617      >              + tmp1 * 2.0d+00 * dz4
618                lhsb(4,5,k) = tmp1 * 2.0d+00 * njac(4,5,k)
619
620                lhsb(5,1,k) = tmp1 * 2.0d+00 * njac(5,1,k)
621                lhsb(5,2,k) = tmp1 * 2.0d+00 * njac(5,2,k)
622                lhsb(5,3,k) = tmp1 * 2.0d+00 * njac(5,3,k)
623                lhsb(5,4,k) = tmp1 * 2.0d+00 * njac(5,4,k)
624                lhsb(5,5,k) = 1.0d+00
625      >              + tmp1 * 2.0d+00 * njac(5,5,k) 
626      >              + tmp1 * 2.0d+00 * dz5
627
628                lhsc(1,1,i,j,k,c) =  tmp2 * fjac(1,1,k+1)
629      >              - tmp1 * njac(1,1,k+1)
630      >              - tmp1 * dz1
631                lhsc(1,2,i,j,k,c) =  tmp2 * fjac(1,2,k+1)
632      >              - tmp1 * njac(1,2,k+1)
633                lhsc(1,3,i,j,k,c) =  tmp2 * fjac(1,3,k+1)
634      >              - tmp1 * njac(1,3,k+1)
635                lhsc(1,4,i,j,k,c) =  tmp2 * fjac(1,4,k+1)
636      >              - tmp1 * njac(1,4,k+1)
637                lhsc(1,5,i,j,k,c) =  tmp2 * fjac(1,5,k+1)
638      >              - tmp1 * njac(1,5,k+1)
639
640                lhsc(2,1,i,j,k,c) =  tmp2 * fjac(2,1,k+1)
641      >              - tmp1 * njac(2,1,k+1)
642                lhsc(2,2,i,j,k,c) =  tmp2 * fjac(2,2,k+1)
643      >              - tmp1 * njac(2,2,k+1)
644      >              - tmp1 * dz2
645                lhsc(2,3,i,j,k,c) =  tmp2 * fjac(2,3,k+1)
646      >              - tmp1 * njac(2,3,k+1)
647                lhsc(2,4,i,j,k,c) =  tmp2 * fjac(2,4,k+1)
648      >              - tmp1 * njac(2,4,k+1)
649                lhsc(2,5,i,j,k,c) =  tmp2 * fjac(2,5,k+1)
650      >              - tmp1 * njac(2,5,k+1)
651
652                lhsc(3,1,i,j,k,c) =  tmp2 * fjac(3,1,k+1)
653      >              - tmp1 * njac(3,1,k+1)
654                lhsc(3,2,i,j,k,c) =  tmp2 * fjac(3,2,k+1)
655      >              - tmp1 * njac(3,2,k+1)
656                lhsc(3,3,i,j,k,c) =  tmp2 * fjac(3,3,k+1)
657      >              - tmp1 * njac(3,3,k+1)
658      >              - tmp1 * dz3
659                lhsc(3,4,i,j,k,c) =  tmp2 * fjac(3,4,k+1)
660      >              - tmp1 * njac(3,4,k+1)
661                lhsc(3,5,i,j,k,c) =  tmp2 * fjac(3,5,k+1)
662      >              - tmp1 * njac(3,5,k+1)
663
664                lhsc(4,1,i,j,k,c) =  tmp2 * fjac(4,1,k+1)
665      >              - tmp1 * njac(4,1,k+1)
666                lhsc(4,2,i,j,k,c) =  tmp2 * fjac(4,2,k+1)
667      >              - tmp1 * njac(4,2,k+1)
668                lhsc(4,3,i,j,k,c) =  tmp2 * fjac(4,3,k+1)
669      >              - tmp1 * njac(4,3,k+1)
670                lhsc(4,4,i,j,k,c) =  tmp2 * fjac(4,4,k+1)
671      >              - tmp1 * njac(4,4,k+1)
672      >              - tmp1 * dz4
673                lhsc(4,5,i,j,k,c) =  tmp2 * fjac(4,5,k+1)
674      >              - tmp1 * njac(4,5,k+1)
675
676                lhsc(5,1,i,j,k,c) =  tmp2 * fjac(5,1,k+1)
677      >              - tmp1 * njac(5,1,k+1)
678                lhsc(5,2,i,j,k,c) =  tmp2 * fjac(5,2,k+1)
679      >              - tmp1 * njac(5,2,k+1)
680                lhsc(5,3,i,j,k,c) =  tmp2 * fjac(5,3,k+1)
681      >              - tmp1 * njac(5,3,k+1)
682                lhsc(5,4,i,j,k,c) =  tmp2 * fjac(5,4,k+1)
683      >              - tmp1 * njac(5,4,k+1)
684                lhsc(5,5,i,j,k,c) =  tmp2 * fjac(5,5,k+1)
685      >              - tmp1 * njac(5,5,k+1)
686      >              - tmp1 * dz5
687
688             enddo
689
690
691 c---------------------------------------------------------------------
692 c     outer most do loops - sweeping in i direction
693 c---------------------------------------------------------------------
694             if (first .eq. 1) then 
695
696 c---------------------------------------------------------------------
697 c     multiply c(i,j,kstart) by b_inverse and copy back to c
698 c     multiply rhs(kstart) by b_inverse(kstart) and copy to rhs
699 c---------------------------------------------------------------------
700                call binvcrhs( lhsb(1,1,kstart),
701      >                        lhsc(1,1,i,j,kstart,c),
702      >                        rhs(1,i,j,kstart,c) )
703
704             endif
705
706 c---------------------------------------------------------------------
707 c     begin inner most do loop
708 c     do all the elements of the cell unless last 
709 c---------------------------------------------------------------------
710             do k=kstart+first,ksize-last
711
712 c---------------------------------------------------------------------
713 c     subtract A*lhs_vector(k-1) from lhs_vector(k)
714 c     
715 c     rhs(k) = rhs(k) - A*rhs(k-1)
716 c---------------------------------------------------------------------
717                call matvec_sub(lhsa(1,1,k),
718      >                         rhs(1,i,j,k-1,c),rhs(1,i,j,k,c))
719
720 c---------------------------------------------------------------------
721 c     B(k) = B(k) - C(k-1)*A(k)
722 c     call matmul_sub(aa,i,j,k,c,cc,i,j,k-1,c,bb,i,j,k,c)
723 c---------------------------------------------------------------------
724                call matmul_sub(lhsa(1,1,k),
725      >                         lhsc(1,1,i,j,k-1,c),
726      >                         lhsb(1,1,k))
727
728 c---------------------------------------------------------------------
729 c     multiply c(i,j,k) by b_inverse and copy back to c
730 c     multiply rhs(i,j,1) by b_inverse(i,j,1) and copy to rhs
731 c---------------------------------------------------------------------
732                call binvcrhs( lhsb(1,1,k),
733      >                        lhsc(1,1,i,j,k,c),
734      >                        rhs(1,i,j,k,c) )
735
736             enddo
737
738 c---------------------------------------------------------------------
739 c     Now finish up special cases for last cell
740 c---------------------------------------------------------------------
741             if (last .eq. 1) then
742
743 c---------------------------------------------------------------------
744 c     rhs(ksize) = rhs(ksize) - A*rhs(ksize-1)
745 c---------------------------------------------------------------------
746                call matvec_sub(lhsa(1,1,ksize),
747      >                         rhs(1,i,j,ksize-1,c),rhs(1,i,j,ksize,c))
748
749 c---------------------------------------------------------------------
750 c     B(ksize) = B(ksize) - C(ksize-1)*A(ksize)
751 c     call matmul_sub(aa,i,j,ksize,c,
752 c     $              cc,i,j,ksize-1,c,bb,i,j,ksize,c)
753 c---------------------------------------------------------------------
754                call matmul_sub(lhsa(1,1,ksize),
755      >                         lhsc(1,1,i,j,ksize-1,c),
756      >                         lhsb(1,1,ksize))
757
758 c---------------------------------------------------------------------
759 c     multiply rhs(ksize) by b_inverse(ksize) and copy to rhs
760 c---------------------------------------------------------------------
761                call binvrhs( lhsb(1,1,ksize),
762      >                       rhs(1,i,j,ksize,c) )
763
764             endif
765          enddo
766       enddo
767
768
769       return
770       end
771       
772
773
774
775
776