Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Improve error message
[simgrid.git] / examples / smpi / NAS / BT / x_solve_vec.f
1
2 c---------------------------------------------------------------------
3 c---------------------------------------------------------------------
4
5       subroutine x_solve
6
7 c---------------------------------------------------------------------
8 c---------------------------------------------------------------------
9
10 c---------------------------------------------------------------------
11 c     
12 c     Performs line solves in X direction by first factoring
13 c     the block-tridiagonal matrix into an upper triangular matrix, 
14 c     and then performing back substitution to solve for the unknow
15 c     vectors of each line.  
16 c     
17 c     Make sure we treat elements zero to cell_size in the direction
18 c     of the sweep.
19 c     
20 c---------------------------------------------------------------------
21
22       include 'header.h'
23       include 'mpinpb.h'
24       integer  c, istart, stage,
25      >     first, last, recv_id, error, r_status(MPI_STATUS_SIZE),
26      >     isize,jsize,ksize,send_id
27
28       istart = 0
29
30 c---------------------------------------------------------------------
31 c     in our terminology stage is the number of the cell in the x-direct
32 c     i.e. stage = 1 means the start of the line stage=ncells means end
33 c---------------------------------------------------------------------
34       do stage = 1,ncells
35          c = slice(1,stage)
36          isize = cell_size(1,c) - 1
37          jsize = cell_size(2,c) - 1
38          ksize = cell_size(3,c) - 1
39          
40 c---------------------------------------------------------------------
41 c     set last-cell flag
42 c---------------------------------------------------------------------
43          if (stage .eq. ncells) then
44             last = 1
45          else
46             last = 0
47          endif
48
49          if (stage .eq. 1) then
50 c---------------------------------------------------------------------
51 c     This is the first cell, so solve without receiving data
52 c---------------------------------------------------------------------
53             first = 1
54 c            call lhsx(c)
55             call x_solve_cell(first,last,c)
56          else
57 c---------------------------------------------------------------------
58 c     Not the first cell of this line, so receive info from
59 c     processor working on preceeding cell
60 c---------------------------------------------------------------------
61             first = 0
62             call x_receive_solve_info(recv_id,c)
63 c---------------------------------------------------------------------
64 c     overlap computations and communications
65 c---------------------------------------------------------------------
66 c            call lhsx(c)
67 c---------------------------------------------------------------------
68 c     wait for completion
69 c---------------------------------------------------------------------
70             call mpi_wait(send_id,r_status,error)
71             call mpi_wait(recv_id,r_status,error)
72 c---------------------------------------------------------------------
73 c     install C'(istart) and rhs'(istart) to be used in this cell
74 c---------------------------------------------------------------------
75             call x_unpack_solve_info(c)
76             call x_solve_cell(first,last,c)
77          endif
78
79          if (last .eq. 0) call x_send_solve_info(send_id,c)
80       enddo
81
82 c---------------------------------------------------------------------
83 c     now perform backsubstitution in reverse direction
84 c---------------------------------------------------------------------
85       do stage = ncells, 1, -1
86          c = slice(1,stage)
87          first = 0
88          last = 0
89          if (stage .eq. 1) first = 1
90          if (stage .eq. ncells) then
91             last = 1
92 c---------------------------------------------------------------------
93 c     last cell, so perform back substitute without waiting
94 c---------------------------------------------------------------------
95             call x_backsubstitute(first, last,c)
96          else
97             call x_receive_backsub_info(recv_id,c)
98             call mpi_wait(send_id,r_status,error)
99             call mpi_wait(recv_id,r_status,error)
100             call x_unpack_backsub_info(c)
101             call x_backsubstitute(first,last,c)
102          endif
103          if (first .eq. 0) call x_send_backsub_info(send_id,c)
104       enddo
105
106
107       return
108       end
109       
110       
111 c---------------------------------------------------------------------
112 c---------------------------------------------------------------------
113
114       subroutine x_unpack_solve_info(c)
115
116 c---------------------------------------------------------------------
117 c---------------------------------------------------------------------
118
119 c---------------------------------------------------------------------
120 c     unpack C'(-1) and rhs'(-1) for
121 c     all j and k
122 c---------------------------------------------------------------------
123
124       include 'header.h'
125       integer j,k,m,n,ptr,c,istart 
126
127       istart = 0
128       ptr = 0
129       do k=0,KMAX-1
130          do j=0,JMAX-1
131             do m=1,BLOCK_SIZE
132                do n=1,BLOCK_SIZE
133                   lhsc(m,n,istart-1,j,k,c) = out_buffer(ptr+n)
134                enddo
135                ptr = ptr+BLOCK_SIZE
136             enddo
137             do n=1,BLOCK_SIZE
138                rhs(n,istart-1,j,k,c) = out_buffer(ptr+n)
139             enddo
140             ptr = ptr+BLOCK_SIZE
141          enddo
142       enddo
143
144       return
145       end
146
147 c---------------------------------------------------------------------
148 c---------------------------------------------------------------------
149       
150       subroutine x_send_solve_info(send_id,c)
151
152 c---------------------------------------------------------------------
153 c---------------------------------------------------------------------
154
155 c---------------------------------------------------------------------
156 c     pack up and send C'(iend) and rhs'(iend) for
157 c     all j and k
158 c---------------------------------------------------------------------
159
160       include 'header.h'
161       include 'mpinpb.h'
162
163       integer j,k,m,n,isize,ptr,c,jp,kp
164       integer error,send_id,buffer_size 
165
166       isize = cell_size(1,c)-1
167       jp = cell_coord(2,c) - 1
168       kp = cell_coord(3,c) - 1
169       buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
170      >     (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE)
171
172 c---------------------------------------------------------------------
173 c     pack up buffer
174 c---------------------------------------------------------------------
175       ptr = 0
176       do k=0,KMAX-1
177          do j=0,JMAX-1
178             do m=1,BLOCK_SIZE
179                do n=1,BLOCK_SIZE
180                   in_buffer(ptr+n) = lhsc(m,n,isize,j,k,c)
181                enddo
182                ptr = ptr+BLOCK_SIZE
183             enddo
184             do n=1,BLOCK_SIZE
185                in_buffer(ptr+n) = rhs(n,isize,j,k,c)
186             enddo
187             ptr = ptr+BLOCK_SIZE
188          enddo
189       enddo
190
191 c---------------------------------------------------------------------
192 c     send buffer 
193 c---------------------------------------------------------------------
194       call mpi_isend(in_buffer, buffer_size,
195      >     dp_type, successor(1),
196      >     WEST+jp+kp*NCELLS, comm_solve,
197      >     send_id,error)
198
199       return
200       end
201
202 c---------------------------------------------------------------------
203 c---------------------------------------------------------------------
204
205       subroutine x_send_backsub_info(send_id,c)
206
207 c---------------------------------------------------------------------
208 c---------------------------------------------------------------------
209
210 c---------------------------------------------------------------------
211 c     pack up and send U(istart) for all j and k
212 c---------------------------------------------------------------------
213
214       include 'header.h'
215       include 'mpinpb.h'
216
217       integer j,k,n,ptr,c,istart,jp,kp
218       integer error,send_id,buffer_size
219
220 c---------------------------------------------------------------------
221 c     Send element 0 to previous processor
222 c---------------------------------------------------------------------
223       istart = 0
224       jp = cell_coord(2,c)-1
225       kp = cell_coord(3,c)-1
226       buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE
227       ptr = 0
228       do k=0,KMAX-1
229          do j=0,JMAX-1
230             do n=1,BLOCK_SIZE
231                in_buffer(ptr+n) = rhs(n,istart,j,k,c)
232             enddo
233             ptr = ptr+BLOCK_SIZE
234          enddo
235       enddo
236       call mpi_isend(in_buffer, buffer_size,
237      >     dp_type, predecessor(1), 
238      >     EAST+jp+kp*NCELLS, comm_solve, 
239      >     send_id,error)
240
241       return
242       end
243
244 c---------------------------------------------------------------------
245 c---------------------------------------------------------------------
246
247       subroutine x_unpack_backsub_info(c)
248
249 c---------------------------------------------------------------------
250 c---------------------------------------------------------------------
251
252 c---------------------------------------------------------------------
253 c     unpack U(isize) for all j and k
254 c---------------------------------------------------------------------
255
256       include 'header.h'
257       integer j,k,n,ptr,c
258
259       ptr = 0
260       do k=0,KMAX-1
261          do j=0,JMAX-1
262             do n=1,BLOCK_SIZE
263                backsub_info(n,j,k,c) = out_buffer(ptr+n)
264             enddo
265             ptr = ptr+BLOCK_SIZE
266          enddo
267       enddo
268
269       return
270       end
271
272 c---------------------------------------------------------------------
273 c---------------------------------------------------------------------
274
275       subroutine x_receive_backsub_info(recv_id,c)
276
277 c---------------------------------------------------------------------
278 c---------------------------------------------------------------------
279
280 c---------------------------------------------------------------------
281 c     post mpi receives
282 c---------------------------------------------------------------------
283
284       include 'header.h'
285       include 'mpinpb.h'
286
287       integer error,recv_id,jp,kp,c,buffer_size
288       jp = cell_coord(2,c) - 1
289       kp = cell_coord(3,c) - 1
290       buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE
291       call mpi_irecv(out_buffer, buffer_size,
292      >     dp_type, successor(1), 
293      >     EAST+jp+kp*NCELLS, comm_solve, 
294      >     recv_id, error)
295
296       return
297       end
298
299 c---------------------------------------------------------------------
300 c---------------------------------------------------------------------
301
302       subroutine x_receive_solve_info(recv_id,c)
303
304 c---------------------------------------------------------------------
305 c---------------------------------------------------------------------
306
307 c---------------------------------------------------------------------
308 c     post mpi receives 
309 c---------------------------------------------------------------------
310
311       include 'header.h'
312       include 'mpinpb.h'
313
314       integer jp,kp,recv_id,error,c,buffer_size
315       jp = cell_coord(2,c) - 1
316       kp = cell_coord(3,c) - 1
317       buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
318      >     (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE)
319       call mpi_irecv(out_buffer, buffer_size,
320      >     dp_type, predecessor(1), 
321      >     WEST+jp+kp*NCELLS,  comm_solve, 
322      >     recv_id, error)
323
324       return
325       end
326
327 c---------------------------------------------------------------------
328 c---------------------------------------------------------------------
329       
330       subroutine x_backsubstitute(first, last, c)
331
332 c---------------------------------------------------------------------
333 c---------------------------------------------------------------------
334
335 c---------------------------------------------------------------------
336 c     back solve: if last cell, then generate U(isize)=rhs(isize)
337 c     else assume U(isize) is loaded in un pack backsub_info
338 c     so just use it
339 c     after call u(istart) will be sent to next cell
340 c---------------------------------------------------------------------
341
342       include 'header.h'
343
344       integer first, last, c, i, j, k
345       integer m,n,isize,jsize,ksize,istart
346       
347       istart = 0
348       isize = cell_size(1,c)-1
349       jsize = cell_size(2,c)-end(2,c)-1      
350       ksize = cell_size(3,c)-end(3,c)-1
351       if (last .eq. 0) then
352          do k=start(3,c),ksize
353             do j=start(2,c),jsize
354 c---------------------------------------------------------------------
355 c     U(isize) uses info from previous cell if not last cell
356 c---------------------------------------------------------------------
357                do m=1,BLOCK_SIZE
358                   do n=1,BLOCK_SIZE
359                      rhs(m,isize,j,k,c) = rhs(m,isize,j,k,c) 
360      >                    - lhsc(m,n,isize,j,k,c)*
361      >                    backsub_info(n,j,k,c)
362 c---------------------------------------------------------------------
363 c     rhs(m,isize,j,k,c) = rhs(m,isize,j,k,c) 
364 c     $                    - lhsc(m,n,isize,j,k,c)*rhs(n,isize+1,j,k,c)
365 c---------------------------------------------------------------------
366                   enddo
367                enddo
368             enddo
369          enddo
370       endif
371       do k=start(3,c),ksize
372          do j=start(2,c),jsize
373             do i=isize-1,istart,-1
374                do m=1,BLOCK_SIZE
375                   do n=1,BLOCK_SIZE
376                      rhs(m,i,j,k,c) = rhs(m,i,j,k,c) 
377      >                    - lhsc(m,n,i,j,k,c)*rhs(n,i+1,j,k,c)
378                   enddo
379                enddo
380             enddo
381          enddo
382       enddo
383
384       return
385       end
386
387
388 c---------------------------------------------------------------------
389 c---------------------------------------------------------------------
390
391       subroutine x_solve_cell(first,last,c)
392
393 c---------------------------------------------------------------------
394 c---------------------------------------------------------------------
395
396 c---------------------------------------------------------------------
397 c     performs guaussian elimination on this cell.
398 c     
399 c     assumes that unpacking routines for non-first cells 
400 c     preload C' and rhs' from previous cell.
401 c     
402 c     assumed send happens outside this routine, but that
403 c     c'(IMAX) and rhs'(IMAX) will be sent to next cell
404 c---------------------------------------------------------------------
405
406       include 'header.h'
407       include 'work_lhs_vec.h'
408
409       integer first,last,c
410       integer i,j,k,m,n,isize,ksize,jsize,istart
411
412       istart = 0
413       isize = cell_size(1,c)-1
414       jsize = cell_size(2,c)-end(2,c)-1
415       ksize = cell_size(3,c)-end(3,c)-1
416
417 c---------------------------------------------------------------------
418 c     zero the left hand side for starters
419 c     set diagonal values to 1. This is overkill, but convenient
420 c---------------------------------------------------------------------
421       do j = 0, jsize
422          do m = 1, 5
423             do n = 1, 5
424                lhsa(m,n,0,j) = 0.0d0
425                lhsb(m,n,0,j) = 0.0d0
426                lhsa(m,n,isize,j) = 0.0d0
427                lhsb(m,n,isize,j) = 0.0d0
428             enddo
429             lhsb(m,m,0,j) = 1.0d0
430             lhsb(m,m,isize,j) = 1.0d0
431          enddo
432       enddo
433
434       do k=start(3,c),ksize 
435
436 c---------------------------------------------------------------------
437 c     This function computes the left hand side in the xi-direction
438 c---------------------------------------------------------------------
439
440 c---------------------------------------------------------------------
441 c     determine a (labeled f) and n jacobians for cell c
442 c---------------------------------------------------------------------
443          do j=start(2,c),jsize
444             do i = start(1,c)-1, cell_size(1,c) - end(1,c)
445
446                tmp1 = rho_i(i,j,k,c)
447                tmp2 = tmp1 * tmp1
448                tmp3 = tmp1 * tmp2
449 c---------------------------------------------------------------------
450 c     
451 c---------------------------------------------------------------------
452                fjac(1,1,i,j) = 0.0d+00
453                fjac(1,2,i,j) = 1.0d+00
454                fjac(1,3,i,j) = 0.0d+00
455                fjac(1,4,i,j) = 0.0d+00
456                fjac(1,5,i,j) = 0.0d+00
457
458                fjac(2,1,i,j) = -(u(2,i,j,k,c) * tmp2 * 
459      >              u(2,i,j,k,c))
460      >              + c2 * qs(i,j,k,c)
461                fjac(2,2,i,j) = ( 2.0d+00 - c2 )
462      >              * ( u(2,i,j,k,c) * tmp1 )
463                fjac(2,3,i,j) = - c2 * ( u(3,i,j,k,c) * tmp1 )
464                fjac(2,4,i,j) = - c2 * ( u(4,i,j,k,c) * tmp1 )
465                fjac(2,5,i,j) = c2
466
467                fjac(3,1,i,j) = - ( u(2,i,j,k,c)*u(3,i,j,k,c) ) * tmp2
468                fjac(3,2,i,j) = u(3,i,j,k,c) * tmp1
469                fjac(3,3,i,j) = u(2,i,j,k,c) * tmp1
470                fjac(3,4,i,j) = 0.0d+00
471                fjac(3,5,i,j) = 0.0d+00
472
473                fjac(4,1,i,j) = - ( u(2,i,j,k,c)*u(4,i,j,k,c) ) * tmp2
474                fjac(4,2,i,j) = u(4,i,j,k,c) * tmp1
475                fjac(4,3,i,j) = 0.0d+00
476                fjac(4,4,i,j) = u(2,i,j,k,c) * tmp1
477                fjac(4,5,i,j) = 0.0d+00
478
479                fjac(5,1,i,j) = ( c2 * 2.0d0 * qs(i,j,k,c)
480      >              - c1 * ( u(5,i,j,k,c) * tmp1 ) )
481      >              * ( u(2,i,j,k,c) * tmp1 )
482                fjac(5,2,i,j) = c1 *  u(5,i,j,k,c) * tmp1 
483      >              - c2
484      >              * ( u(2,i,j,k,c)*u(2,i,j,k,c) * tmp2
485      >              + qs(i,j,k,c) )
486                fjac(5,3,i,j) = - c2 * ( u(3,i,j,k,c)*u(2,i,j,k,c) )
487      >              * tmp2
488                fjac(5,4,i,j) = - c2 * ( u(4,i,j,k,c)*u(2,i,j,k,c) )
489      >              * tmp2
490                fjac(5,5,i,j) = c1 * ( u(2,i,j,k,c) * tmp1 )
491
492                njac(1,1,i,j) = 0.0d+00
493                njac(1,2,i,j) = 0.0d+00
494                njac(1,3,i,j) = 0.0d+00
495                njac(1,4,i,j) = 0.0d+00
496                njac(1,5,i,j) = 0.0d+00
497
498                njac(2,1,i,j) = - con43 * c3c4 * tmp2 * u(2,i,j,k,c)
499                njac(2,2,i,j) =   con43 * c3c4 * tmp1
500                njac(2,3,i,j) =   0.0d+00
501                njac(2,4,i,j) =   0.0d+00
502                njac(2,5,i,j) =   0.0d+00
503
504                njac(3,1,i,j) = - c3c4 * tmp2 * u(3,i,j,k,c)
505                njac(3,2,i,j) =   0.0d+00
506                njac(3,3,i,j) =   c3c4 * tmp1
507                njac(3,4,i,j) =   0.0d+00
508                njac(3,5,i,j) =   0.0d+00
509
510                njac(4,1,i,j) = - c3c4 * tmp2 * u(4,i,j,k,c)
511                njac(4,2,i,j) =   0.0d+00 
512                njac(4,3,i,j) =   0.0d+00
513                njac(4,4,i,j) =   c3c4 * tmp1
514                njac(4,5,i,j) =   0.0d+00
515
516                njac(5,1,i,j) = - ( con43 * c3c4
517      >              - c1345 ) * tmp3 * (u(2,i,j,k,c)**2)
518      >              - ( c3c4 - c1345 ) * tmp3 * (u(3,i,j,k,c)**2)
519      >              - ( c3c4 - c1345 ) * tmp3 * (u(4,i,j,k,c)**2)
520      >              - c1345 * tmp2 * u(5,i,j,k,c)
521
522                njac(5,2,i,j) = ( con43 * c3c4
523      >              - c1345 ) * tmp2 * u(2,i,j,k,c)
524                njac(5,3,i,j) = ( c3c4 - c1345 ) * tmp2 * u(3,i,j,k,c)
525                njac(5,4,i,j) = ( c3c4 - c1345 ) * tmp2 * u(4,i,j,k,c)
526                njac(5,5,i,j) = ( c1345 ) * tmp1
527
528             enddo
529          enddo
530
531 c---------------------------------------------------------------------
532 c     now jacobians set, so form left hand side in x direction
533 c---------------------------------------------------------------------
534          do j=start(2,c),jsize
535             do i = start(1,c), isize - end(1,c)
536
537                tmp1 = dt * tx1
538                tmp2 = dt * tx2
539
540                lhsa(1,1,i,j) = - tmp2 * fjac(1,1,i-1,j)
541      >              - tmp1 * njac(1,1,i-1,j)
542      >              - tmp1 * dx1 
543                lhsa(1,2,i,j) = - tmp2 * fjac(1,2,i-1,j)
544      >              - tmp1 * njac(1,2,i-1,j)
545                lhsa(1,3,i,j) = - tmp2 * fjac(1,3,i-1,j)
546      >              - tmp1 * njac(1,3,i-1,j)
547                lhsa(1,4,i,j) = - tmp2 * fjac(1,4,i-1,j)
548      >              - tmp1 * njac(1,4,i-1,j)
549                lhsa(1,5,i,j) = - tmp2 * fjac(1,5,i-1,j)
550      >              - tmp1 * njac(1,5,i-1,j)
551
552                lhsa(2,1,i,j) = - tmp2 * fjac(2,1,i-1,j)
553      >              - tmp1 * njac(2,1,i-1,j)
554                lhsa(2,2,i,j) = - tmp2 * fjac(2,2,i-1,j)
555      >              - tmp1 * njac(2,2,i-1,j)
556      >              - tmp1 * dx2
557                lhsa(2,3,i,j) = - tmp2 * fjac(2,3,i-1,j)
558      >              - tmp1 * njac(2,3,i-1,j)
559                lhsa(2,4,i,j) = - tmp2 * fjac(2,4,i-1,j)
560      >              - tmp1 * njac(2,4,i-1,j)
561                lhsa(2,5,i,j) = - tmp2 * fjac(2,5,i-1,j)
562      >              - tmp1 * njac(2,5,i-1,j)
563
564                lhsa(3,1,i,j) = - tmp2 * fjac(3,1,i-1,j)
565      >              - tmp1 * njac(3,1,i-1,j)
566                lhsa(3,2,i,j) = - tmp2 * fjac(3,2,i-1,j)
567      >              - tmp1 * njac(3,2,i-1,j)
568                lhsa(3,3,i,j) = - tmp2 * fjac(3,3,i-1,j)
569      >              - tmp1 * njac(3,3,i-1,j)
570      >              - tmp1 * dx3 
571                lhsa(3,4,i,j) = - tmp2 * fjac(3,4,i-1,j)
572      >              - tmp1 * njac(3,4,i-1,j)
573                lhsa(3,5,i,j) = - tmp2 * fjac(3,5,i-1,j)
574      >              - tmp1 * njac(3,5,i-1,j)
575
576                lhsa(4,1,i,j) = - tmp2 * fjac(4,1,i-1,j)
577      >              - tmp1 * njac(4,1,i-1,j)
578                lhsa(4,2,i,j) = - tmp2 * fjac(4,2,i-1,j)
579      >              - tmp1 * njac(4,2,i-1,j)
580                lhsa(4,3,i,j) = - tmp2 * fjac(4,3,i-1,j)
581      >              - tmp1 * njac(4,3,i-1,j)
582                lhsa(4,4,i,j) = - tmp2 * fjac(4,4,i-1,j)
583      >              - tmp1 * njac(4,4,i-1,j)
584      >              - tmp1 * dx4
585                lhsa(4,5,i,j) = - tmp2 * fjac(4,5,i-1,j)
586      >              - tmp1 * njac(4,5,i-1,j)
587
588                lhsa(5,1,i,j) = - tmp2 * fjac(5,1,i-1,j)
589      >              - tmp1 * njac(5,1,i-1,j)
590                lhsa(5,2,i,j) = - tmp2 * fjac(5,2,i-1,j)
591      >              - tmp1 * njac(5,2,i-1,j)
592                lhsa(5,3,i,j) = - tmp2 * fjac(5,3,i-1,j)
593      >              - tmp1 * njac(5,3,i-1,j)
594                lhsa(5,4,i,j) = - tmp2 * fjac(5,4,i-1,j)
595      >              - tmp1 * njac(5,4,i-1,j)
596                lhsa(5,5,i,j) = - tmp2 * fjac(5,5,i-1,j)
597      >              - tmp1 * njac(5,5,i-1,j)
598      >              - tmp1 * dx5
599
600                lhsb(1,1,i,j) = 1.0d+00
601      >              + tmp1 * 2.0d+00 * njac(1,1,i,j)
602      >              + tmp1 * 2.0d+00 * dx1
603                lhsb(1,2,i,j) = tmp1 * 2.0d+00 * njac(1,2,i,j)
604                lhsb(1,3,i,j) = tmp1 * 2.0d+00 * njac(1,3,i,j)
605                lhsb(1,4,i,j) = tmp1 * 2.0d+00 * njac(1,4,i,j)
606                lhsb(1,5,i,j) = tmp1 * 2.0d+00 * njac(1,5,i,j)
607
608                lhsb(2,1,i,j) = tmp1 * 2.0d+00 * njac(2,1,i,j)
609                lhsb(2,2,i,j) = 1.0d+00
610      >              + tmp1 * 2.0d+00 * njac(2,2,i,j)
611      >              + tmp1 * 2.0d+00 * dx2
612                lhsb(2,3,i,j) = tmp1 * 2.0d+00 * njac(2,3,i,j)
613                lhsb(2,4,i,j) = tmp1 * 2.0d+00 * njac(2,4,i,j)
614                lhsb(2,5,i,j) = tmp1 * 2.0d+00 * njac(2,5,i,j)
615
616                lhsb(3,1,i,j) = tmp1 * 2.0d+00 * njac(3,1,i,j)
617                lhsb(3,2,i,j) = tmp1 * 2.0d+00 * njac(3,2,i,j)
618                lhsb(3,3,i,j) = 1.0d+00
619      >              + tmp1 * 2.0d+00 * njac(3,3,i,j)
620      >              + tmp1 * 2.0d+00 * dx3
621                lhsb(3,4,i,j) = tmp1 * 2.0d+00 * njac(3,4,i,j)
622                lhsb(3,5,i,j) = tmp1 * 2.0d+00 * njac(3,5,i,j)
623
624                lhsb(4,1,i,j) = tmp1 * 2.0d+00 * njac(4,1,i,j)
625                lhsb(4,2,i,j) = tmp1 * 2.0d+00 * njac(4,2,i,j)
626                lhsb(4,3,i,j) = tmp1 * 2.0d+00 * njac(4,3,i,j)
627                lhsb(4,4,i,j) = 1.0d+00
628      >              + tmp1 * 2.0d+00 * njac(4,4,i,j)
629      >              + tmp1 * 2.0d+00 * dx4
630                lhsb(4,5,i,j) = tmp1 * 2.0d+00 * njac(4,5,i,j)
631
632                lhsb(5,1,i,j) = tmp1 * 2.0d+00 * njac(5,1,i,j)
633                lhsb(5,2,i,j) = tmp1 * 2.0d+00 * njac(5,2,i,j)
634                lhsb(5,3,i,j) = tmp1 * 2.0d+00 * njac(5,3,i,j)
635                lhsb(5,4,i,j) = tmp1 * 2.0d+00 * njac(5,4,i,j)
636                lhsb(5,5,i,j) = 1.0d+00
637      >              + tmp1 * 2.0d+00 * njac(5,5,i,j)
638      >              + tmp1 * 2.0d+00 * dx5
639
640                lhsc(1,1,i,j,k,c) =  tmp2 * fjac(1,1,i+1,j)
641      >              - tmp1 * njac(1,1,i+1,j)
642      >              - tmp1 * dx1
643                lhsc(1,2,i,j,k,c) =  tmp2 * fjac(1,2,i+1,j)
644      >              - tmp1 * njac(1,2,i+1,j)
645                lhsc(1,3,i,j,k,c) =  tmp2 * fjac(1,3,i+1,j)
646      >              - tmp1 * njac(1,3,i+1,j)
647                lhsc(1,4,i,j,k,c) =  tmp2 * fjac(1,4,i+1,j)
648      >              - tmp1 * njac(1,4,i+1,j)
649                lhsc(1,5,i,j,k,c) =  tmp2 * fjac(1,5,i+1,j)
650      >              - tmp1 * njac(1,5,i+1,j)
651
652                lhsc(2,1,i,j,k,c) =  tmp2 * fjac(2,1,i+1,j)
653      >              - tmp1 * njac(2,1,i+1,j)
654                lhsc(2,2,i,j,k,c) =  tmp2 * fjac(2,2,i+1,j)
655      >              - tmp1 * njac(2,2,i+1,j)
656      >              - tmp1 * dx2
657                lhsc(2,3,i,j,k,c) =  tmp2 * fjac(2,3,i+1,j)
658      >              - tmp1 * njac(2,3,i+1,j)
659                lhsc(2,4,i,j,k,c) =  tmp2 * fjac(2,4,i+1,j)
660      >              - tmp1 * njac(2,4,i+1,j)
661                lhsc(2,5,i,j,k,c) =  tmp2 * fjac(2,5,i+1,j)
662      >              - tmp1 * njac(2,5,i+1,j)
663
664                lhsc(3,1,i,j,k,c) =  tmp2 * fjac(3,1,i+1,j)
665      >              - tmp1 * njac(3,1,i+1,j)
666                lhsc(3,2,i,j,k,c) =  tmp2 * fjac(3,2,i+1,j)
667      >              - tmp1 * njac(3,2,i+1,j)
668                lhsc(3,3,i,j,k,c) =  tmp2 * fjac(3,3,i+1,j)
669      >              - tmp1 * njac(3,3,i+1,j)
670      >              - tmp1 * dx3
671                lhsc(3,4,i,j,k,c) =  tmp2 * fjac(3,4,i+1,j)
672      >              - tmp1 * njac(3,4,i+1,j)
673                lhsc(3,5,i,j,k,c) =  tmp2 * fjac(3,5,i+1,j)
674      >              - tmp1 * njac(3,5,i+1,j)
675
676                lhsc(4,1,i,j,k,c) =  tmp2 * fjac(4,1,i+1,j)
677      >              - tmp1 * njac(4,1,i+1,j)
678                lhsc(4,2,i,j,k,c) =  tmp2 * fjac(4,2,i+1,j)
679      >              - tmp1 * njac(4,2,i+1,j)
680                lhsc(4,3,i,j,k,c) =  tmp2 * fjac(4,3,i+1,j)
681      >              - tmp1 * njac(4,3,i+1,j)
682                lhsc(4,4,i,j,k,c) =  tmp2 * fjac(4,4,i+1,j)
683      >              - tmp1 * njac(4,4,i+1,j)
684      >              - tmp1 * dx4
685                lhsc(4,5,i,j,k,c) =  tmp2 * fjac(4,5,i+1,j)
686      >              - tmp1 * njac(4,5,i+1,j)
687
688                lhsc(5,1,i,j,k,c) =  tmp2 * fjac(5,1,i+1,j)
689      >              - tmp1 * njac(5,1,i+1,j)
690                lhsc(5,2,i,j,k,c) =  tmp2 * fjac(5,2,i+1,j)
691      >              - tmp1 * njac(5,2,i+1,j)
692                lhsc(5,3,i,j,k,c) =  tmp2 * fjac(5,3,i+1,j)
693      >              - tmp1 * njac(5,3,i+1,j)
694                lhsc(5,4,i,j,k,c) =  tmp2 * fjac(5,4,i+1,j)
695      >              - tmp1 * njac(5,4,i+1,j)
696                lhsc(5,5,i,j,k,c) =  tmp2 * fjac(5,5,i+1,j)
697      >              - tmp1 * njac(5,5,i+1,j)
698      >              - tmp1 * dx5
699
700             enddo
701          enddo
702
703
704 c---------------------------------------------------------------------
705 c     outer most do loops - sweeping in i direction
706 c---------------------------------------------------------------------
707          if (first .eq. 1) then 
708
709 c---------------------------------------------------------------------
710 c     multiply c(istart,j,k) by b_inverse and copy back to c
711 c     multiply rhs(istart) by b_inverse(istart) and copy to rhs
712 c---------------------------------------------------------------------
713 !dir$ ivdep
714             do j=start(2,c),jsize
715                call binvcrhs( lhsb(1,1,istart,j),
716      >                        lhsc(1,1,istart,j,k,c),
717      >                        rhs(1,istart,j,k,c) )
718             enddo
719
720          endif
721
722 c---------------------------------------------------------------------
723 c     begin inner most do loop
724 c     do all the elements of the cell unless last 
725 c---------------------------------------------------------------------
726 !dir$ ivdep
727 !dir$ interchange(i,j)
728          do j=start(2,c),jsize
729             do i=istart+first,isize-last
730
731 c---------------------------------------------------------------------
732 c     rhs(i) = rhs(i) - A*rhs(i-1)
733 c---------------------------------------------------------------------
734                call matvec_sub(lhsa(1,1,i,j),
735      >                         rhs(1,i-1,j,k,c),rhs(1,i,j,k,c))
736
737 c---------------------------------------------------------------------
738 c     B(i) = B(i) - C(i-1)*A(i)
739 c---------------------------------------------------------------------
740                call matmul_sub(lhsa(1,1,i,j),
741      >                         lhsc(1,1,i-1,j,k,c),
742      >                         lhsb(1,1,i,j))
743
744
745 c---------------------------------------------------------------------
746 c     multiply c(i,j,k) by b_inverse and copy back to c
747 c     multiply rhs(1,j,k) by b_inverse(1,j,k) and copy to rhs
748 c---------------------------------------------------------------------
749                call binvcrhs( lhsb(1,1,i,j),
750      >                        lhsc(1,1,i,j,k,c),
751      >                        rhs(1,i,j,k,c) )
752
753             enddo
754          enddo
755
756 c---------------------------------------------------------------------
757 c     Now finish up special cases for last cell
758 c---------------------------------------------------------------------
759          if (last .eq. 1) then
760
761 !dir$ ivdep
762             do j=start(2,c),jsize
763 c---------------------------------------------------------------------
764 c     rhs(isize) = rhs(isize) - A*rhs(isize-1)
765 c---------------------------------------------------------------------
766                call matvec_sub(lhsa(1,1,isize,j),
767      >                         rhs(1,isize-1,j,k,c),rhs(1,isize,j,k,c))
768
769 c---------------------------------------------------------------------
770 c     B(isize) = B(isize) - C(isize-1)*A(isize)
771 c---------------------------------------------------------------------
772                call matmul_sub(lhsa(1,1,isize,j),
773      >                         lhsc(1,1,isize-1,j,k,c),
774      >                         lhsb(1,1,isize,j))
775
776 c---------------------------------------------------------------------
777 c     multiply rhs() by b_inverse() and copy to rhs
778 c---------------------------------------------------------------------
779                call binvrhs( lhsb(1,1,isize,j),
780      >                       rhs(1,isize,j,k,c) )
781             enddo
782
783          endif
784       enddo
785
786
787       return
788       end
789