Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'master' of git+ssh://scm.gforge.inria.fr//gitroot/simgrid/simgrid
[simgrid.git] / examples / smpi / NAS / BT / y_solve_vec.f
1 c---------------------------------------------------------------------
2 c---------------------------------------------------------------------
3
4       subroutine y_solve
5
6 c---------------------------------------------------------------------
7 c---------------------------------------------------------------------
8
9 c---------------------------------------------------------------------
10 c     Performs line solves in Y direction by first factoring
11 c     the block-tridiagonal matrix into an upper triangular matrix, 
12 c     and then performing back substitution to solve for the unknow
13 c     vectors of each line.  
14 c     
15 c     Make sure we treat elements zero to cell_size in the direction
16 c     of the sweep.
17 c---------------------------------------------------------------------
18
19       include 'header.h'
20       include 'mpinpb.h'
21
22       integer 
23      >     c, jstart, stage,
24      >     first, last, recv_id, error, r_status(MPI_STATUS_SIZE),
25      >     isize,jsize,ksize,send_id
26
27       jstart = 0
28
29 c---------------------------------------------------------------------
30 c     in our terminology stage is the number of the cell in the y-direct
31 c     i.e. stage = 1 means the start of the line stage=ncells means end
32 c---------------------------------------------------------------------
33       do stage = 1,ncells
34          c = slice(2,stage)
35          isize = cell_size(1,c) - 1
36          jsize = cell_size(2,c) - 1
37          ksize = cell_size(3,c) - 1
38
39 c---------------------------------------------------------------------
40 c     set last-cell flag
41 c---------------------------------------------------------------------
42          if (stage .eq. ncells) then
43             last = 1
44          else
45             last = 0
46          endif
47
48          if (stage .eq. 1) then
49 c---------------------------------------------------------------------
50 c     This is the first cell, so solve without receiving data
51 c---------------------------------------------------------------------
52             first = 1
53 c            call lhsy(c)
54             call y_solve_cell(first,last,c)
55          else
56 c---------------------------------------------------------------------
57 c     Not the first cell of this line, so receive info from
58 c     processor working on preceeding cell
59 c---------------------------------------------------------------------
60             first = 0
61             call y_receive_solve_info(recv_id,c)
62 c---------------------------------------------------------------------
63 c     overlap computations and communications
64 c---------------------------------------------------------------------
65 c            call lhsy(c)
66 c---------------------------------------------------------------------
67 c     wait for completion
68 c---------------------------------------------------------------------
69             call mpi_wait(send_id,r_status,error)
70             call mpi_wait(recv_id,r_status,error)
71 c---------------------------------------------------------------------
72 c     install C'(jstart+1) and rhs'(jstart+1) to be used in this cell
73 c---------------------------------------------------------------------
74             call y_unpack_solve_info(c)
75             call y_solve_cell(first,last,c)
76          endif
77
78          if (last .eq. 0) call y_send_solve_info(send_id,c)
79       enddo
80
81 c---------------------------------------------------------------------
82 c     now perform backsubstitution in reverse direction
83 c---------------------------------------------------------------------
84       do stage = ncells, 1, -1
85          c = slice(2,stage)
86          first = 0
87          last = 0
88          if (stage .eq. 1) first = 1
89          if (stage .eq. ncells) then
90             last = 1
91 c---------------------------------------------------------------------
92 c     last cell, so perform back substitute without waiting
93 c---------------------------------------------------------------------
94             call y_backsubstitute(first, last,c)
95          else
96             call y_receive_backsub_info(recv_id,c)
97             call mpi_wait(send_id,r_status,error)
98             call mpi_wait(recv_id,r_status,error)
99             call y_unpack_backsub_info(c)
100             call y_backsubstitute(first,last,c)
101          endif
102          if (first .eq. 0) call y_send_backsub_info(send_id,c)
103       enddo
104
105
106       return
107       end
108       
109 c---------------------------------------------------------------------
110 c---------------------------------------------------------------------
111       
112       subroutine y_unpack_solve_info(c)
113
114 c---------------------------------------------------------------------
115 c---------------------------------------------------------------------
116
117 c---------------------------------------------------------------------
118 c     unpack C'(-1) and rhs'(-1) for
119 c     all i and k
120 c---------------------------------------------------------------------
121
122       include 'header.h'
123
124       integer i,k,m,n,ptr,c,jstart 
125
126       jstart = 0
127       ptr = 0
128       do k=0,KMAX-1
129          do i=0,IMAX-1
130             do m=1,BLOCK_SIZE
131                do n=1,BLOCK_SIZE
132                   lhsc(m,n,i,jstart-1,k,c) = out_buffer(ptr+n)
133                enddo
134                ptr = ptr+BLOCK_SIZE
135             enddo
136             do n=1,BLOCK_SIZE
137                rhs(n,i,jstart-1,k,c) = out_buffer(ptr+n)
138             enddo
139             ptr = ptr+BLOCK_SIZE
140          enddo
141       enddo
142
143       return
144       end
145
146 c---------------------------------------------------------------------
147 c---------------------------------------------------------------------
148       
149       subroutine y_send_solve_info(send_id,c)
150
151 c---------------------------------------------------------------------
152 c---------------------------------------------------------------------
153
154 c---------------------------------------------------------------------
155 c     pack up and send C'(jend) and rhs'(jend) for
156 c     all i and k
157 c---------------------------------------------------------------------
158
159       include 'header.h'
160       include 'mpinpb.h'
161
162       integer i,k,m,n,jsize,ptr,c,ip,kp
163       integer error,send_id,buffer_size 
164
165       jsize = cell_size(2,c)-1
166       ip = cell_coord(1,c) - 1
167       kp = cell_coord(3,c) - 1
168       buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
169      >     (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE)
170
171 c---------------------------------------------------------------------
172 c     pack up buffer
173 c---------------------------------------------------------------------
174       ptr = 0
175       do k=0,KMAX-1
176          do i=0,IMAX-1
177             do m=1,BLOCK_SIZE
178                do n=1,BLOCK_SIZE
179                   in_buffer(ptr+n) = lhsc(m,n,i,jsize,k,c)
180                enddo
181                ptr = ptr+BLOCK_SIZE
182             enddo
183             do n=1,BLOCK_SIZE
184                in_buffer(ptr+n) = rhs(n,i,jsize,k,c)
185             enddo
186             ptr = ptr+BLOCK_SIZE
187          enddo
188       enddo
189
190 c---------------------------------------------------------------------
191 c     send buffer 
192 c---------------------------------------------------------------------
193       call mpi_isend(in_buffer, buffer_size,
194      >     dp_type, successor(2),
195      >     SOUTH+ip+kp*NCELLS, comm_solve,
196      >     send_id,error)
197
198       return
199       end
200
201 c---------------------------------------------------------------------
202 c---------------------------------------------------------------------
203
204       subroutine y_send_backsub_info(send_id,c)
205
206 c---------------------------------------------------------------------
207 c---------------------------------------------------------------------
208
209 c---------------------------------------------------------------------
210 c     pack up and send U(jstart) for all i and k
211 c---------------------------------------------------------------------
212
213       include 'header.h'
214       include 'mpinpb.h'
215
216       integer i,k,n,ptr,c,jstart,ip,kp
217       integer error,send_id,buffer_size
218
219 c---------------------------------------------------------------------
220 c     Send element 0 to previous processor
221 c---------------------------------------------------------------------
222       jstart = 0
223       ip = cell_coord(1,c)-1
224       kp = cell_coord(3,c)-1
225       buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE
226       ptr = 0
227       do k=0,KMAX-1
228          do i=0,IMAX-1
229             do n=1,BLOCK_SIZE
230                in_buffer(ptr+n) = rhs(n,i,jstart,k,c)
231             enddo
232             ptr = ptr+BLOCK_SIZE
233          enddo
234       enddo
235       call mpi_isend(in_buffer, buffer_size,
236      >     dp_type, predecessor(2), 
237      >     NORTH+ip+kp*NCELLS, comm_solve, 
238      >     send_id,error)
239
240       return
241       end
242
243 c---------------------------------------------------------------------
244 c---------------------------------------------------------------------
245
246       subroutine y_unpack_backsub_info(c)
247
248 c---------------------------------------------------------------------
249 c---------------------------------------------------------------------
250
251 c---------------------------------------------------------------------
252 c     unpack U(jsize) for all i and k
253 c---------------------------------------------------------------------
254
255       include 'header.h'
256
257       integer i,k,n,ptr,c 
258
259       ptr = 0
260       do k=0,KMAX-1
261          do i=0,IMAX-1
262             do n=1,BLOCK_SIZE
263                backsub_info(n,i,k,c) = out_buffer(ptr+n)
264             enddo
265             ptr = ptr+BLOCK_SIZE
266          enddo
267       enddo
268
269       return
270       end
271
272 c---------------------------------------------------------------------
273 c---------------------------------------------------------------------
274
275       subroutine y_receive_backsub_info(recv_id,c)
276
277 c---------------------------------------------------------------------
278 c---------------------------------------------------------------------
279
280 c---------------------------------------------------------------------
281 c     post mpi receives
282 c---------------------------------------------------------------------
283
284       include 'header.h'
285       include 'mpinpb.h'
286
287       integer error,recv_id,ip,kp,c,buffer_size
288       ip = cell_coord(1,c) - 1
289       kp = cell_coord(3,c) - 1
290       buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE
291       call mpi_irecv(out_buffer, buffer_size,
292      >     dp_type, successor(2), 
293      >     NORTH+ip+kp*NCELLS, comm_solve, 
294      >     recv_id, error)
295       return
296       end
297
298 c---------------------------------------------------------------------
299 c---------------------------------------------------------------------
300
301       subroutine y_receive_solve_info(recv_id,c)
302
303 c---------------------------------------------------------------------
304 c---------------------------------------------------------------------
305
306 c---------------------------------------------------------------------
307 c     post mpi receives 
308 c---------------------------------------------------------------------
309
310       include 'header.h'
311       include 'mpinpb.h'
312
313       integer ip,kp,recv_id,error,c,buffer_size
314       ip = cell_coord(1,c) - 1
315       kp = cell_coord(3,c) - 1
316       buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
317      >     (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE)
318       call mpi_irecv(out_buffer, buffer_size, 
319      >     dp_type, predecessor(2), 
320      >     SOUTH+ip+kp*NCELLS,  comm_solve, 
321      >     recv_id, error)
322
323       return
324       end
325       
326 c---------------------------------------------------------------------
327 c---------------------------------------------------------------------
328
329       subroutine y_backsubstitute(first, last, c)
330
331 c---------------------------------------------------------------------
332 c---------------------------------------------------------------------
333
334 c---------------------------------------------------------------------
335 c     back solve: if last cell, then generate U(jsize)=rhs(jsize)
336 c     else assume U(jsize) is loaded in un pack backsub_info
337 c     so just use it
338 c     after call u(jstart) will be sent to next cell
339 c---------------------------------------------------------------------
340
341       include 'header.h'
342
343       integer first, last, c, i, k
344       integer m,n,j,jsize,isize,ksize,jstart
345       
346       jstart = 0
347       isize = cell_size(1,c)-end(1,c)-1      
348       jsize = cell_size(2,c)-1
349       ksize = cell_size(3,c)-end(3,c)-1
350       if (last .eq. 0) then
351          do k=start(3,c),ksize
352             do i=start(1,c),isize
353 c---------------------------------------------------------------------
354 c     U(jsize) uses info from previous cell if not last cell
355 c---------------------------------------------------------------------
356                do m=1,BLOCK_SIZE
357                   do n=1,BLOCK_SIZE
358                      rhs(m,i,jsize,k,c) = rhs(m,i,jsize,k,c) 
359      >                    - lhsc(m,n,i,jsize,k,c)*
360      >                    backsub_info(n,i,k,c)
361                   enddo
362                enddo
363             enddo
364          enddo
365       endif
366       do k=start(3,c),ksize
367          do j=jsize-1,jstart,-1
368             do i=start(1,c),isize
369                do m=1,BLOCK_SIZE
370                   do n=1,BLOCK_SIZE
371                      rhs(m,i,j,k,c) = rhs(m,i,j,k,c) 
372      >                    - lhsc(m,n,i,j,k,c)*rhs(n,i,j+1,k,c)
373                   enddo
374                enddo
375             enddo
376          enddo
377       enddo
378
379       return
380       end
381
382 c---------------------------------------------------------------------
383 c---------------------------------------------------------------------
384
385       subroutine y_solve_cell(first,last,c)
386
387 c---------------------------------------------------------------------
388 c---------------------------------------------------------------------
389
390 c---------------------------------------------------------------------
391 c     performs guaussian elimination on this cell.
392 c     
393 c     assumes that unpacking routines for non-first cells 
394 c     preload C' and rhs' from previous cell.
395 c     
396 c     assumed send happens outside this routine, but that
397 c     c'(JMAX) and rhs'(JMAX) will be sent to next cell
398 c---------------------------------------------------------------------
399
400       include 'header.h'
401       include 'work_lhs_vec.h'
402
403       integer first,last,c
404       integer i,j,k,m,n,isize,ksize,jsize,jstart
405
406       jstart = 0
407       isize = cell_size(1,c)-end(1,c)-1
408       jsize = cell_size(2,c)-1
409       ksize = cell_size(3,c)-end(3,c)-1
410
411 c---------------------------------------------------------------------
412 c     zero the left hand side for starters
413 c     set diagonal values to 1. This is overkill, but convenient
414 c---------------------------------------------------------------------
415       do i = 0, isize
416          do m = 1, 5
417             do n = 1, 5
418                lhsa(m,n,i,0) = 0.0d0
419                lhsb(m,n,i,0) = 0.0d0
420                lhsa(m,n,i,jsize) = 0.0d0
421                lhsb(m,n,i,jsize) = 0.0d0
422             enddo
423             lhsb(m,m,i,0) = 1.0d0
424             lhsb(m,m,i,jsize) = 1.0d0
425          enddo
426       enddo
427
428       do k=start(3,c),ksize 
429
430 c---------------------------------------------------------------------
431 c     This function computes the left hand side for the three y-factors 
432 c---------------------------------------------------------------------
433
434 c---------------------------------------------------------------------
435 c     Compute the indices for storing the tri-diagonal matrix;
436 c     determine a (labeled f) and n jacobians for cell c
437 c---------------------------------------------------------------------
438
439          do j = start(2,c)-1, cell_size(2,c)-end(2,c)
440             do i=start(1,c),isize
441
442                tmp1 = 1.0d0 / u(1,i,j,k,c)
443                tmp2 = tmp1 * tmp1
444                tmp3 = tmp1 * tmp2
445
446                fjac(1,1,i,j) = 0.0d+00
447                fjac(1,2,i,j) = 0.0d+00
448                fjac(1,3,i,j) = 1.0d+00
449                fjac(1,4,i,j) = 0.0d+00
450                fjac(1,5,i,j) = 0.0d+00
451
452                fjac(2,1,i,j) = - ( u(2,i,j,k,c)*u(3,i,j,k,c) )
453      >              * tmp2
454                fjac(2,2,i,j) = u(3,i,j,k,c) * tmp1
455                fjac(2,3,i,j) = u(2,i,j,k,c) * tmp1
456                fjac(2,4,i,j) = 0.0d+00
457                fjac(2,5,i,j) = 0.0d+00
458
459                fjac(3,1,i,j) = - ( u(3,i,j,k,c)*u(3,i,j,k,c)*tmp2)
460      >              + c2 * qs(i,j,k,c)
461                fjac(3,2,i,j) = - c2 *  u(2,i,j,k,c) * tmp1
462                fjac(3,3,i,j) = ( 2.0d+00 - c2 )
463      >              *  u(3,i,j,k,c) * tmp1 
464                fjac(3,4,i,j) = - c2 * u(4,i,j,k,c) * tmp1 
465                fjac(3,5,i,j) = c2
466
467                fjac(4,1,i,j) = - ( u(3,i,j,k,c)*u(4,i,j,k,c) )
468      >              * tmp2
469                fjac(4,2,i,j) = 0.0d+00
470                fjac(4,3,i,j) = u(4,i,j,k,c) * tmp1
471                fjac(4,4,i,j) = u(3,i,j,k,c) * tmp1
472                fjac(4,5,i,j) = 0.0d+00
473
474                fjac(5,1,i,j) = ( c2 * 2.0d0 * qs(i,j,k,c)
475      >              - c1 * u(5,i,j,k,c) * tmp1 ) 
476      >              * u(3,i,j,k,c) * tmp1 
477                fjac(5,2,i,j) = - c2 * u(2,i,j,k,c)*u(3,i,j,k,c) 
478      >              * tmp2
479                fjac(5,3,i,j) = c1 * u(5,i,j,k,c) * tmp1 
480      >              - c2 * ( qs(i,j,k,c)
481      >              + u(3,i,j,k,c)*u(3,i,j,k,c) * tmp2 )
482                fjac(5,4,i,j) = - c2 * ( u(3,i,j,k,c)*u(4,i,j,k,c) )
483      >              * tmp2
484                fjac(5,5,i,j) = c1 * u(3,i,j,k,c) * tmp1 
485
486                njac(1,1,i,j) = 0.0d+00
487                njac(1,2,i,j) = 0.0d+00
488                njac(1,3,i,j) = 0.0d+00
489                njac(1,4,i,j) = 0.0d+00
490                njac(1,5,i,j) = 0.0d+00
491
492                njac(2,1,i,j) = - c3c4 * tmp2 * u(2,i,j,k,c)
493                njac(2,2,i,j) =   c3c4 * tmp1
494                njac(2,3,i,j) =   0.0d+00
495                njac(2,4,i,j) =   0.0d+00
496                njac(2,5,i,j) =   0.0d+00
497
498                njac(3,1,i,j) = - con43 * c3c4 * tmp2 * u(3,i,j,k,c)
499                njac(3,2,i,j) =   0.0d+00
500                njac(3,3,i,j) =   con43 * c3c4 * tmp1
501                njac(3,4,i,j) =   0.0d+00
502                njac(3,5,i,j) =   0.0d+00
503
504                njac(4,1,i,j) = - c3c4 * tmp2 * u(4,i,j,k,c)
505                njac(4,2,i,j) =   0.0d+00
506                njac(4,3,i,j) =   0.0d+00
507                njac(4,4,i,j) =   c3c4 * tmp1
508                njac(4,5,i,j) =   0.0d+00
509
510                njac(5,1,i,j) = - (  c3c4
511      >              - c1345 ) * tmp3 * (u(2,i,j,k,c)**2)
512      >              - ( con43 * c3c4
513      >              - c1345 ) * tmp3 * (u(3,i,j,k,c)**2)
514      >              - ( c3c4 - c1345 ) * tmp3 * (u(4,i,j,k,c)**2)
515      >              - c1345 * tmp2 * u(5,i,j,k,c)
516
517                njac(5,2,i,j) = (  c3c4 - c1345 ) * tmp2 * u(2,i,j,k,c)
518                njac(5,3,i,j) = ( con43 * c3c4
519      >              - c1345 ) * tmp2 * u(3,i,j,k,c)
520                njac(5,4,i,j) = ( c3c4 - c1345 ) * tmp2 * u(4,i,j,k,c)
521                njac(5,5,i,j) = ( c1345 ) * tmp1
522
523             enddo
524          enddo
525
526 c---------------------------------------------------------------------
527 c     now joacobians set, so form left hand side in y direction
528 c---------------------------------------------------------------------
529          do j = start(2,c), jsize-end(2,c)
530             do i=start(1,c),isize
531
532                tmp1 = dt * ty1
533                tmp2 = dt * ty2
534
535                lhsa(1,1,i,j) = - tmp2 * fjac(1,1,i,j-1)
536      >              - tmp1 * njac(1,1,i,j-1)
537      >              - tmp1 * dy1 
538                lhsa(1,2,i,j) = - tmp2 * fjac(1,2,i,j-1)
539      >              - tmp1 * njac(1,2,i,j-1)
540                lhsa(1,3,i,j) = - tmp2 * fjac(1,3,i,j-1)
541      >              - tmp1 * njac(1,3,i,j-1)
542                lhsa(1,4,i,j) = - tmp2 * fjac(1,4,i,j-1)
543      >              - tmp1 * njac(1,4,i,j-1)
544                lhsa(1,5,i,j) = - tmp2 * fjac(1,5,i,j-1)
545      >              - tmp1 * njac(1,5,i,j-1)
546
547                lhsa(2,1,i,j) = - tmp2 * fjac(2,1,i,j-1)
548      >              - tmp1 * njac(2,1,i,j-1)
549                lhsa(2,2,i,j) = - tmp2 * fjac(2,2,i,j-1)
550      >              - tmp1 * njac(2,2,i,j-1)
551      >              - tmp1 * dy2
552                lhsa(2,3,i,j) = - tmp2 * fjac(2,3,i,j-1)
553      >              - tmp1 * njac(2,3,i,j-1)
554                lhsa(2,4,i,j) = - tmp2 * fjac(2,4,i,j-1)
555      >              - tmp1 * njac(2,4,i,j-1)
556                lhsa(2,5,i,j) = - tmp2 * fjac(2,5,i,j-1)
557      >              - tmp1 * njac(2,5,i,j-1)
558
559                lhsa(3,1,i,j) = - tmp2 * fjac(3,1,i,j-1)
560      >              - tmp1 * njac(3,1,i,j-1)
561                lhsa(3,2,i,j) = - tmp2 * fjac(3,2,i,j-1)
562      >              - tmp1 * njac(3,2,i,j-1)
563                lhsa(3,3,i,j) = - tmp2 * fjac(3,3,i,j-1)
564      >              - tmp1 * njac(3,3,i,j-1)
565      >              - tmp1 * dy3 
566                lhsa(3,4,i,j) = - tmp2 * fjac(3,4,i,j-1)
567      >              - tmp1 * njac(3,4,i,j-1)
568                lhsa(3,5,i,j) = - tmp2 * fjac(3,5,i,j-1)
569      >              - tmp1 * njac(3,5,i,j-1)
570
571                lhsa(4,1,i,j) = - tmp2 * fjac(4,1,i,j-1)
572      >              - tmp1 * njac(4,1,i,j-1)
573                lhsa(4,2,i,j) = - tmp2 * fjac(4,2,i,j-1)
574      >              - tmp1 * njac(4,2,i,j-1)
575                lhsa(4,3,i,j) = - tmp2 * fjac(4,3,i,j-1)
576      >              - tmp1 * njac(4,3,i,j-1)
577                lhsa(4,4,i,j) = - tmp2 * fjac(4,4,i,j-1)
578      >              - tmp1 * njac(4,4,i,j-1)
579      >              - tmp1 * dy4
580                lhsa(4,5,i,j) = - tmp2 * fjac(4,5,i,j-1)
581      >              - tmp1 * njac(4,5,i,j-1)
582
583                lhsa(5,1,i,j) = - tmp2 * fjac(5,1,i,j-1)
584      >              - tmp1 * njac(5,1,i,j-1)
585                lhsa(5,2,i,j) = - tmp2 * fjac(5,2,i,j-1)
586      >              - tmp1 * njac(5,2,i,j-1)
587                lhsa(5,3,i,j) = - tmp2 * fjac(5,3,i,j-1)
588      >              - tmp1 * njac(5,3,i,j-1)
589                lhsa(5,4,i,j) = - tmp2 * fjac(5,4,i,j-1)
590      >              - tmp1 * njac(5,4,i,j-1)
591                lhsa(5,5,i,j) = - tmp2 * fjac(5,5,i,j-1)
592      >              - tmp1 * njac(5,5,i,j-1)
593      >              - tmp1 * dy5
594
595                lhsb(1,1,i,j) = 1.0d+00
596      >              + tmp1 * 2.0d+00 * njac(1,1,i,j)
597      >              + tmp1 * 2.0d+00 * dy1
598                lhsb(1,2,i,j) = tmp1 * 2.0d+00 * njac(1,2,i,j)
599                lhsb(1,3,i,j) = tmp1 * 2.0d+00 * njac(1,3,i,j)
600                lhsb(1,4,i,j) = tmp1 * 2.0d+00 * njac(1,4,i,j)
601                lhsb(1,5,i,j) = tmp1 * 2.0d+00 * njac(1,5,i,j)
602
603                lhsb(2,1,i,j) = tmp1 * 2.0d+00 * njac(2,1,i,j)
604                lhsb(2,2,i,j) = 1.0d+00
605      >              + tmp1 * 2.0d+00 * njac(2,2,i,j)
606      >              + tmp1 * 2.0d+00 * dy2
607                lhsb(2,3,i,j) = tmp1 * 2.0d+00 * njac(2,3,i,j)
608                lhsb(2,4,i,j) = tmp1 * 2.0d+00 * njac(2,4,i,j)
609                lhsb(2,5,i,j) = tmp1 * 2.0d+00 * njac(2,5,i,j)
610
611                lhsb(3,1,i,j) = tmp1 * 2.0d+00 * njac(3,1,i,j)
612                lhsb(3,2,i,j) = tmp1 * 2.0d+00 * njac(3,2,i,j)
613                lhsb(3,3,i,j) = 1.0d+00
614      >              + tmp1 * 2.0d+00 * njac(3,3,i,j)
615      >              + tmp1 * 2.0d+00 * dy3
616                lhsb(3,4,i,j) = tmp1 * 2.0d+00 * njac(3,4,i,j)
617                lhsb(3,5,i,j) = tmp1 * 2.0d+00 * njac(3,5,i,j)
618
619                lhsb(4,1,i,j) = tmp1 * 2.0d+00 * njac(4,1,i,j)
620                lhsb(4,2,i,j) = tmp1 * 2.0d+00 * njac(4,2,i,j)
621                lhsb(4,3,i,j) = tmp1 * 2.0d+00 * njac(4,3,i,j)
622                lhsb(4,4,i,j) = 1.0d+00
623      >              + tmp1 * 2.0d+00 * njac(4,4,i,j)
624      >              + tmp1 * 2.0d+00 * dy4
625                lhsb(4,5,i,j) = tmp1 * 2.0d+00 * njac(4,5,i,j)
626
627                lhsb(5,1,i,j) = tmp1 * 2.0d+00 * njac(5,1,i,j)
628                lhsb(5,2,i,j) = tmp1 * 2.0d+00 * njac(5,2,i,j)
629                lhsb(5,3,i,j) = tmp1 * 2.0d+00 * njac(5,3,i,j)
630                lhsb(5,4,i,j) = tmp1 * 2.0d+00 * njac(5,4,i,j)
631                lhsb(5,5,i,j) = 1.0d+00
632      >              + tmp1 * 2.0d+00 * njac(5,5,i,j) 
633      >              + tmp1 * 2.0d+00 * dy5
634
635                lhsc(1,1,i,j,k,c) =  tmp2 * fjac(1,1,i,j+1)
636      >              - tmp1 * njac(1,1,i,j+1)
637      >              - tmp1 * dy1
638                lhsc(1,2,i,j,k,c) =  tmp2 * fjac(1,2,i,j+1)
639      >              - tmp1 * njac(1,2,i,j+1)
640                lhsc(1,3,i,j,k,c) =  tmp2 * fjac(1,3,i,j+1)
641      >              - tmp1 * njac(1,3,i,j+1)
642                lhsc(1,4,i,j,k,c) =  tmp2 * fjac(1,4,i,j+1)
643      >              - tmp1 * njac(1,4,i,j+1)
644                lhsc(1,5,i,j,k,c) =  tmp2 * fjac(1,5,i,j+1)
645      >              - tmp1 * njac(1,5,i,j+1)
646
647                lhsc(2,1,i,j,k,c) =  tmp2 * fjac(2,1,i,j+1)
648      >              - tmp1 * njac(2,1,i,j+1)
649                lhsc(2,2,i,j,k,c) =  tmp2 * fjac(2,2,i,j+1)
650      >              - tmp1 * njac(2,2,i,j+1)
651      >              - tmp1 * dy2
652                lhsc(2,3,i,j,k,c) =  tmp2 * fjac(2,3,i,j+1)
653      >              - tmp1 * njac(2,3,i,j+1)
654                lhsc(2,4,i,j,k,c) =  tmp2 * fjac(2,4,i,j+1)
655      >              - tmp1 * njac(2,4,i,j+1)
656                lhsc(2,5,i,j,k,c) =  tmp2 * fjac(2,5,i,j+1)
657      >              - tmp1 * njac(2,5,i,j+1)
658
659                lhsc(3,1,i,j,k,c) =  tmp2 * fjac(3,1,i,j+1)
660      >              - tmp1 * njac(3,1,i,j+1)
661                lhsc(3,2,i,j,k,c) =  tmp2 * fjac(3,2,i,j+1)
662      >              - tmp1 * njac(3,2,i,j+1)
663                lhsc(3,3,i,j,k,c) =  tmp2 * fjac(3,3,i,j+1)
664      >              - tmp1 * njac(3,3,i,j+1)
665      >              - tmp1 * dy3
666                lhsc(3,4,i,j,k,c) =  tmp2 * fjac(3,4,i,j+1)
667      >              - tmp1 * njac(3,4,i,j+1)
668                lhsc(3,5,i,j,k,c) =  tmp2 * fjac(3,5,i,j+1)
669      >              - tmp1 * njac(3,5,i,j+1)
670
671                lhsc(4,1,i,j,k,c) =  tmp2 * fjac(4,1,i,j+1)
672      >              - tmp1 * njac(4,1,i,j+1)
673                lhsc(4,2,i,j,k,c) =  tmp2 * fjac(4,2,i,j+1)
674      >              - tmp1 * njac(4,2,i,j+1)
675                lhsc(4,3,i,j,k,c) =  tmp2 * fjac(4,3,i,j+1)
676      >              - tmp1 * njac(4,3,i,j+1)
677                lhsc(4,4,i,j,k,c) =  tmp2 * fjac(4,4,i,j+1)
678      >              - tmp1 * njac(4,4,i,j+1)
679      >              - tmp1 * dy4
680                lhsc(4,5,i,j,k,c) =  tmp2 * fjac(4,5,i,j+1)
681      >              - tmp1 * njac(4,5,i,j+1)
682
683                lhsc(5,1,i,j,k,c) =  tmp2 * fjac(5,1,i,j+1)
684      >              - tmp1 * njac(5,1,i,j+1)
685                lhsc(5,2,i,j,k,c) =  tmp2 * fjac(5,2,i,j+1)
686      >              - tmp1 * njac(5,2,i,j+1)
687                lhsc(5,3,i,j,k,c) =  tmp2 * fjac(5,3,i,j+1)
688      >              - tmp1 * njac(5,3,i,j+1)
689                lhsc(5,4,i,j,k,c) =  tmp2 * fjac(5,4,i,j+1)
690      >              - tmp1 * njac(5,4,i,j+1)
691                lhsc(5,5,i,j,k,c) =  tmp2 * fjac(5,5,i,j+1)
692      >              - tmp1 * njac(5,5,i,j+1)
693      >              - tmp1 * dy5
694
695             enddo
696          enddo
697
698
699 c---------------------------------------------------------------------
700 c     outer most do loops - sweeping in i direction
701 c---------------------------------------------------------------------
702          if (first .eq. 1) then 
703
704 c---------------------------------------------------------------------
705 c     multiply c(i,jstart,k) by b_inverse and copy back to c
706 c     multiply rhs(jstart) by b_inverse(jstart) and copy to rhs
707 c---------------------------------------------------------------------
708 !dir$ ivdep
709             do i=start(1,c),isize
710                call binvcrhs( lhsb(1,1,i,jstart),
711      >                        lhsc(1,1,i,jstart,k,c),
712      >                        rhs(1,i,jstart,k,c) )
713             enddo
714
715          endif
716
717 c---------------------------------------------------------------------
718 c     begin inner most do loop
719 c     do all the elements of the cell unless last 
720 c---------------------------------------------------------------------
721          do j=jstart+first,jsize-last
722 !dir$ ivdep
723             do i=start(1,c),isize
724
725 c---------------------------------------------------------------------
726 c     subtract A*lhs_vector(j-1) from lhs_vector(j)
727 c     
728 c     rhs(j) = rhs(j) - A*rhs(j-1)
729 c---------------------------------------------------------------------
730                call matvec_sub(lhsa(1,1,i,j),
731      >                         rhs(1,i,j-1,k,c),rhs(1,i,j,k,c))
732
733 c---------------------------------------------------------------------
734 c     B(j) = B(j) - C(j-1)*A(j)
735 c---------------------------------------------------------------------
736                call matmul_sub(lhsa(1,1,i,j),
737      >                         lhsc(1,1,i,j-1,k,c),
738      >                         lhsb(1,1,i,j))
739
740 c---------------------------------------------------------------------
741 c     multiply c(i,j,k) by b_inverse and copy back to c
742 c     multiply rhs(i,1,k) by b_inverse(i,1,k) and copy to rhs
743 c---------------------------------------------------------------------
744                call binvcrhs( lhsb(1,1,i,j),
745      >                        lhsc(1,1,i,j,k,c),
746      >                        rhs(1,i,j,k,c) )
747
748             enddo
749          enddo
750
751 c---------------------------------------------------------------------
752 c     Now finish up special cases for last cell
753 c---------------------------------------------------------------------
754          if (last .eq. 1) then
755
756 !dir$ ivdep
757             do i=start(1,c),isize
758 c---------------------------------------------------------------------
759 c     rhs(jsize) = rhs(jsize) - A*rhs(jsize-1)
760 c---------------------------------------------------------------------
761                call matvec_sub(lhsa(1,1,i,jsize),
762      >                         rhs(1,i,jsize-1,k,c),rhs(1,i,jsize,k,c))
763
764 c---------------------------------------------------------------------
765 c     B(jsize) = B(jsize) - C(jsize-1)*A(jsize)
766 c     call matmul_sub(aa,i,jsize,k,c,
767 c     $              cc,i,jsize-1,k,c,bb,i,jsize,k,c)
768 c---------------------------------------------------------------------
769                call matmul_sub(lhsa(1,1,i,jsize),
770      >                         lhsc(1,1,i,jsize-1,k,c),
771      >                         lhsb(1,1,i,jsize))
772
773 c---------------------------------------------------------------------
774 c     multiply rhs(jsize) by b_inverse(jsize) and copy to rhs
775 c---------------------------------------------------------------------
776                call binvrhs( lhsb(1,1,i,jsize),
777      >                       rhs(1,i,jsize,k,c) )
778             enddo
779
780          endif
781       enddo
782
783
784       return
785       end
786       
787
788