-!-------------------------------------------------------------------------!
-! !
-! N A S P A R A L L E L B E N C H M A R K S 3.3 !
-! !
-! M G !
-! !
-!-------------------------------------------------------------------------!
-! !
-! This benchmark is part of the NAS Parallel Benchmark 3.3 suite. !
-! It is described in NAS Technical Reports 95-020 and 02-007 !
-! !
-! Permission to use, copy, distribute and modify this software !
-! for any purpose with or without fee is hereby granted. We !
-! request, however, that all derived work reference the NAS !
-! Parallel Benchmarks 3.3. This software is provided "as is" !
-! without express or implied warranty. !
-! !
-! Information on NPB 3.3, including the technical report, the !
-! original specifications, source code, results and information !
-! on how to submit new results, is available at: !
-! !
-! http://www.nas.nasa.gov/Software/NPB/ !
-! !
-! Send comments or suggestions to npb@nas.nasa.gov !
-! !
-! NAS Parallel Benchmarks Group !
-! NASA Ames Research Center !
-! Mail Stop: T27A-1 !
-! Moffett Field, CA 94035-1000 !
-! !
-! E-mail: npb@nas.nasa.gov !
-! Fax: (650) 604-3957 !
-! !
-!-------------------------------------------------------------------------!
-
-
-c---------------------------------------------------------------------
-c
-c Authors: E. Barszcz
-c P. Frederickson
-c A. Woo
-c M. Yarrow
-c R. F. Van der Wijngaart
-c
-c---------------------------------------------------------------------
-
-
-c---------------------------------------------------------------------
- program mg_mpi
-c---------------------------------------------------------------------
-
- implicit none
-
- include 'mpinpb.h'
- include 'globals.h'
-
-c---------------------------------------------------------------------------c
-c k is the current level. It is passed down through subroutine args
-c and is NOT global. it is the current iteration
-c---------------------------------------------------------------------------c
-
- integer k, it
-
- external timer_read
- double precision t, t0, tinit, mflops, timer_read
-
-c---------------------------------------------------------------------------c
-c These arrays are in common because they are quite large
-c and probably shouldn't be allocated on the stack. They
-c are always passed as subroutine args.
-c---------------------------------------------------------------------------c
-
- double precision u(nr),v(nv),r(nr),a(0:3),c(0:3)
- common /noautom/ u,v,r
-
- double precision rnm2, rnmu, old2, oldu, epsilon
- integer n1, n2, n3, nit
- double precision nn, verify_value, err
- logical verified
-
- integer ierr,i, fstatus
- integer T_bench, T_init
- parameter (T_bench=1, T_init=2)
-
- call mpi_init(ierr)
- call mpi_comm_rank(mpi_comm_world, me, ierr)
- call mpi_comm_size(mpi_comm_world, nprocs, ierr)
-
- root = 0
- if (nprocs_compiled .gt. maxprocs) then
- if (me .eq. root) write(*,20) nprocs_compiled, maxprocs
- 20 format(' ERROR: compiled for ',i8,' processes'//
- & ' The maximum size allowed for this benchmark is ',i6)
- call mpi_abort(MPI_COMM_WORLD, ierr)
- stop
- endif
-
- if (.not. convertdouble) then
- dp_type = MPI_DOUBLE_PRECISION
- else
- dp_type = MPI_REAL
- endif
-
-
- call timer_clear(T_bench)
- call timer_clear(T_init)
-
- call mpi_barrier(MPI_COMM_WORLD, ierr)
-
- call timer_start(T_init)
-
-
-c---------------------------------------------------------------------
-c Read in and broadcast input data
-c---------------------------------------------------------------------
-
- if( me .eq. root )then
- write (*, 1000)
-
- open(unit=7,file="mg.input", status="old", iostat=fstatus)
- if (fstatus .eq. 0) then
- write(*,50)
- 50 format(' Reading from input file mg.input')
- read(7,*) lt
- read(7,*) nx(lt), ny(lt), nz(lt)
- read(7,*) nit
- read(7,*) (debug_vec(i),i=0,7)
- else
- write(*,51)
- 51 format(' No input file. Using compiled defaults ')
- lt = lt_default
- nit = nit_default
- nx(lt) = nx_default
- ny(lt) = ny_default
- nz(lt) = nz_default
- do i = 0,7
- debug_vec(i) = debug_default
- end do
- endif
- endif
-
- call mpi_bcast(lt, 1, MPI_INTEGER, 0, mpi_comm_world, ierr)
- call mpi_bcast(nit, 1, MPI_INTEGER, 0, mpi_comm_world, ierr)
- call mpi_bcast(nx(lt), 1, MPI_INTEGER, 0, mpi_comm_world, ierr)
- call mpi_bcast(ny(lt), 1, MPI_INTEGER, 0, mpi_comm_world, ierr)
- call mpi_bcast(nz(lt), 1, MPI_INTEGER, 0, mpi_comm_world, ierr)
- call mpi_bcast(debug_vec(0), 8, MPI_INTEGER, 0,
- > mpi_comm_world, ierr)
-
- if ( (nx(lt) .ne. ny(lt)) .or. (nx(lt) .ne. nz(lt)) ) then
- Class = 'U'
- else if( nx(lt) .eq. 32 .and. nit .eq. 4 ) then
- Class = 'S'
- else if( nx(lt) .eq. 128 .and. nit .eq. 4 ) then
- Class = 'W'
- else if( nx(lt) .eq. 256 .and. nit .eq. 4 ) then
- Class = 'A'
- else if( nx(lt) .eq. 256 .and. nit .eq. 20 ) then
- Class = 'B'
- else if( nx(lt) .eq. 512 .and. nit .eq. 20 ) then
- Class = 'C'
- else if( nx(lt) .eq. 1024 .and. nit .eq. 50 ) then
- Class = 'D'
- else if( nx(lt) .eq. 2048 .and. nit .eq. 50 ) then
- Class = 'E'
- else
- Class = 'U'
- endif
-
-c---------------------------------------------------------------------
-c Use these for debug info:
-c---------------------------------------------------------------------
-c debug_vec(0) = 1 !=> report all norms
-c debug_vec(1) = 1 !=> some setup information
-c debug_vec(1) = 2 !=> more setup information
-c debug_vec(2) = k => at level k or below, show result of resid
-c debug_vec(3) = k => at level k or below, show result of psinv
-c debug_vec(4) = k => at level k or below, show result of rprj
-c debug_vec(5) = k => at level k or below, show result of interp
-c debug_vec(6) = 1 => (unused)
-c debug_vec(7) = 1 => (unused)
-c---------------------------------------------------------------------
- a(0) = -8.0D0/3.0D0
- a(1) = 0.0D0
- a(2) = 1.0D0/6.0D0
- a(3) = 1.0D0/12.0D0
-
- if(Class .eq. 'A' .or. Class .eq. 'S'.or. Class .eq.'W') then
-c---------------------------------------------------------------------
-c Coefficients for the S(a) smoother
-c---------------------------------------------------------------------
- c(0) = -3.0D0/8.0D0
- c(1) = +1.0D0/32.0D0
- c(2) = -1.0D0/64.0D0
- c(3) = 0.0D0
- else
-c---------------------------------------------------------------------
-c Coefficients for the S(b) smoother
-c---------------------------------------------------------------------
- c(0) = -3.0D0/17.0D0
- c(1) = +1.0D0/33.0D0
- c(2) = -1.0D0/61.0D0
- c(3) = 0.0D0
- endif
- lb = 1
- k = lt
-
- call setup(n1,n2,n3,k)
- call zero3(u,n1,n2,n3)
- call zran3(v,n1,n2,n3,nx(lt),ny(lt),k)
-
- call norm2u3(v,n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt))
-
- if( me .eq. root )then
- write (*, 1001) nx(lt),ny(lt),nz(lt), Class
- write (*, 1002) nit
-
- 1000 format(//,' NAS Parallel Benchmarks 3.3 -- MG Benchmark', /)
- 1001 format(' Size: ', i4, 'x', i4, 'x', i4, ' (class ', A, ')' )
- 1002 format(' Iterations: ', i4)
- 1003 format(' Number of processes: ', i6)
- if (nprocs .ne. nprocs_compiled) then
- write (*, 1004) nprocs_compiled
- write (*, 1005) nprocs
- 1004 format(' WARNING: compiled for ', i6, ' processes ')
- 1005 format(' Number of active processes: ', i6, /)
- else
- write (*, 1003) nprocs
- endif
- endif
-
- call resid(u,v,r,n1,n2,n3,a,k)
- call norm2u3(r,n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt))
- old2 = rnm2
- oldu = rnmu
-
-c---------------------------------------------------------------------
-c One iteration for startup
-c---------------------------------------------------------------------
- call mg3P(u,v,r,a,c,n1,n2,n3,k)
- call resid(u,v,r,n1,n2,n3,a,k)
- call setup(n1,n2,n3,k)
- call zero3(u,n1,n2,n3)
- call zran3(v,n1,n2,n3,nx(lt),ny(lt),k)
-
- call timer_stop(T_init)
- if( me .eq. root )then
- tinit = timer_read(T_init)
- write( *,'(/A,F15.3,A/)' )
- > ' Initialization time: ',tinit, ' seconds'
- endif
-
- call mpi_barrier(mpi_comm_world,ierr)
-
- call timer_start(T_bench)
-
- call resid(u,v,r,n1,n2,n3,a,k)
- call norm2u3(r,n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt))
- old2 = rnm2
- oldu = rnmu
-
- do it=1,nit
- if (it.eq.1 .or. it.eq.nit .or. mod(it,5).eq.0) then
- if (me .eq. root) write(*,80) it
- 80 format(' iter ',i4)
- endif
- call mg3P(u,v,r,a,c,n1,n2,n3,k)
- call resid(u,v,r,n1,n2,n3,a,k)
- enddo
-
-
- call norm2u3(r,n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt))
-
- call timer_stop(T_bench)
-
- t0 = timer_read(T_bench)
-
- call mpi_reduce(t0,t,1,dp_type,
- > mpi_max,root,mpi_comm_world,ierr)
- verified = .FALSE.
- verify_value = 0.0
- if( me .eq. root )then
- write(*,100)
- 100 format(/' Benchmark completed ')
-
- epsilon = 1.d-8
- if (Class .ne. 'U') then
- if(Class.eq.'S') then
- verify_value = 0.5307707005734d-04
- elseif(Class.eq.'W') then
- verify_value = 0.6467329375339d-05
- elseif(Class.eq.'A') then
- verify_value = 0.2433365309069d-05
- elseif(Class.eq.'B') then
- verify_value = 0.1800564401355d-05
- elseif(Class.eq.'C') then
- verify_value = 0.5706732285740d-06
- elseif(Class.eq.'D') then
- verify_value = 0.1583275060440d-09
- elseif(Class.eq.'E') then
- verify_value = 0.5630442584711d-10
- endif
-
- err = abs( rnm2 - verify_value ) / verify_value
- if( err .le. epsilon ) then
- verified = .TRUE.
- write(*, 200)
- write(*, 201) rnm2
- write(*, 202) err
- 200 format(' VERIFICATION SUCCESSFUL ')
- 201 format(' L2 Norm is ', E20.13)
- 202 format(' Error is ', E20.13)
- else
- verified = .FALSE.
- write(*, 300)
- write(*, 301) rnm2
- write(*, 302) verify_value
- 300 format(' VERIFICATION FAILED')
- 301 format(' L2 Norm is ', E20.13)
- 302 format(' The correct L2 Norm is ', E20.13)
- endif
- else
- verified = .FALSE.
- write (*, 400)
- write (*, 401)
- write (*, 201) rnm2
- 400 format(' Problem size unknown')
- 401 format(' NO VERIFICATION PERFORMED')
- endif
-
- nn = 1.0d0*nx(lt)*ny(lt)*nz(lt)
-
- if( t .ne. 0. ) then
- mflops = 58.*1.0D-6*nit*nn / t
- else
- mflops = 0.0
- endif
-
- call print_results('MG', class, nx(lt), ny(lt), nz(lt),
- > nit, nprocs_compiled, nprocs, t,
- > mflops, ' floating point',
- > verified, npbversion, compiletime,
- > cs1, cs2, cs3, cs4, cs5, cs6, cs7)
-
-
- endif
-
-
- call mpi_finalize(ierr)
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine setup(n1,n2,n3,k)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- implicit none
-
- include 'mpinpb.h'
- include 'globals.h'
-
- integer is1, is2, is3, ie1, ie2, ie3
- common /grid/ is1,is2,is3,ie1,ie2,ie3
-
- integer n1,n2,n3,k
- integer dx, dy, log_p, d, i, j
-
- integer ax, next(3),mi(3,maxlevel),mip(3,maxlevel)
- integer ng(3,maxlevel)
- integer idi(3), pi(3), idin(3,-1:1)
- integer s, dir,ierr
-
- do j=-1,1,1
- do d=1,3
- msg_type(d,j) = 100*(j+2+10*d)
- enddo
- enddo
-
- ng(1,lt) = nx(lt)
- ng(2,lt) = ny(lt)
- ng(3,lt) = nz(lt)
- do ax=1,3
- next(ax) = 1
- do k=lt-1,1,-1
- ng(ax,k) = ng(ax,k+1)/2
- enddo
- enddo
- 61 format(10i4)
- do k=lt,1,-1
- nx(k) = ng(1,k)
- ny(k) = ng(2,k)
- nz(k) = ng(3,k)
- enddo
-
- log_p = log(float(nprocs)+0.0001)/log(2.0)
- dx = log_p/3
- pi(1) = 2**dx
- idi(1) = mod(me,pi(1))
-
- dy = (log_p-dx)/2
- pi(2) = 2**dy
- idi(2) = mod((me/pi(1)),pi(2))
-
- pi(3) = nprocs/(pi(1)*pi(2))
- idi(3) = me/(pi(1)*pi(2))
-
- do k = lt,1,-1
- dead(k) = .false.
- do ax = 1,3
- take_ex(ax,k) = .false.
- give_ex(ax,k) = .false.
-
- mi(ax,k) = 2 +
- > ((idi(ax)+1)*ng(ax,k))/pi(ax) -
- > ((idi(ax)+0)*ng(ax,k))/pi(ax)
- mip(ax,k) = 2 +
- > ((next(ax)+idi(ax)+1)*ng(ax,k))/pi(ax) -
- > ((next(ax)+idi(ax)+0)*ng(ax,k))/pi(ax)
-
- if(mip(ax,k).eq.2.or.mi(ax,k).eq.2)then
- next(ax) = 2*next(ax)
- endif
-
- if( k+1 .le. lt )then
- if((mip(ax,k).eq.2).and.(mi(ax,k).eq.3))then
- give_ex(ax,k+1) = .true.
- endif
- if((mip(ax,k).eq.3).and.(mi(ax,k).eq.2))then
- take_ex(ax,k+1) = .true.
- endif
- endif
- enddo
-
- if( mi(1,k).eq.2 .or.
- > mi(2,k).eq.2 .or.
- > mi(3,k).eq.2 )then
- dead(k) = .true.
- endif
- m1(k) = mi(1,k)
- m2(k) = mi(2,k)
- m3(k) = mi(3,k)
-
- do ax=1,3
- idin(ax,+1) = mod( idi(ax) + next(ax) + pi(ax) , pi(ax) )
- idin(ax,-1) = mod( idi(ax) - next(ax) + pi(ax) , pi(ax) )
- enddo
- do dir = 1,-1,-2
- nbr(1,dir,k) = idin(1,dir) + pi(1)
- > *(idi(2) + pi(2)
- > * idi(3))
- nbr(2,dir,k) = idi(1) + pi(1)
- > *(idin(2,dir) + pi(2)
- > * idi(3))
- nbr(3,dir,k) = idi(1) + pi(1)
- > *(idi(2) + pi(2)
- > * idin(3,dir))
- enddo
- enddo
-
- k = lt
- is1 = 2 + ng(1,k) - ((pi(1) -idi(1))*ng(1,lt))/pi(1)
- ie1 = 1 + ng(1,k) - ((pi(1)-1-idi(1))*ng(1,lt))/pi(1)
- n1 = 3 + ie1 - is1
- is2 = 2 + ng(2,k) - ((pi(2) -idi(2))*ng(2,lt))/pi(2)
- ie2 = 1 + ng(2,k) - ((pi(2)-1-idi(2))*ng(2,lt))/pi(2)
- n2 = 3 + ie2 - is2
- is3 = 2 + ng(3,k) - ((pi(3) -idi(3))*ng(3,lt))/pi(3)
- ie3 = 1 + ng(3,k) - ((pi(3)-1-idi(3))*ng(3,lt))/pi(3)
- n3 = 3 + ie3 - is3
-
-
- ir(lt)=1
- do j = lt-1, 1, -1
- ir(j)=ir(j+1)+m1(j+1)*m2(j+1)*m3(j+1)
- enddo
-
-
- if( debug_vec(1) .ge. 1 )then
- if( me .eq. root )write(*,*)' in setup, '
- if( me .eq. root )write(*,*)' me k lt nx ny nz ',
- > ' n1 n2 n3 is1 is2 is3 ie1 ie2 ie3'
- do i=0,nprocs-1
- if( me .eq. i )then
- write(*,9) me,k,lt,ng(1,k),ng(2,k),ng(3,k),
- > n1,n2,n3,is1,is2,is3,ie1,ie2,ie3
- 9 format(15i4)
- endif
- call mpi_barrier(mpi_comm_world,ierr)
- enddo
- endif
- if( debug_vec(1) .ge. 2 )then
- do i=0,nprocs-1
- if( me .eq. i )then
- write(*,*)' '
- write(*,*)' processor =',me
- do k=lt,1,-1
- write(*,7)k,idi(1),idi(2),idi(3),
- > ((nbr(d,j,k),j=-1,1,2),d=1,3),
- > (mi(d,k),d=1,3)
- enddo
- 7 format(i4,'idi=',3i4,'nbr=',3(2i4,' '),'mi=',3i4,' ')
- write(*,*)'idi(s) = ',(idi(s),s=1,3)
- write(*,*)'dead(2), dead(1) = ',dead(2),dead(1)
- do ax=1,3
- write(*,*)'give_ex(ax,2)= ',give_ex(ax,2)
- write(*,*)'take_ex(ax,2)= ',take_ex(ax,2)
- enddo
- endif
- call mpi_barrier(mpi_comm_world,ierr)
- enddo
- endif
-
- k = lt
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine mg3P(u,v,r,a,c,n1,n2,n3,k)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c multigrid V-cycle routine
-c---------------------------------------------------------------------
- implicit none
-
- include 'mpinpb.h'
- include 'globals.h'
-
- integer n1, n2, n3, k
- double precision u(nr),v(nv),r(nr)
- double precision a(0:3),c(0:3)
-
- integer j
-
-c---------------------------------------------------------------------
-c down cycle.
-c restrict the residual from the find grid to the coarse
-c---------------------------------------------------------------------
-
- do k= lt, lb+1 , -1
- j = k-1
- call rprj3(r(ir(k)),m1(k),m2(k),m3(k),
- > r(ir(j)),m1(j),m2(j),m3(j),k)
- enddo
-
- k = lb
-c---------------------------------------------------------------------
-c compute an approximate solution on the coarsest grid
-c---------------------------------------------------------------------
- call zero3(u(ir(k)),m1(k),m2(k),m3(k))
- call psinv(r(ir(k)),u(ir(k)),m1(k),m2(k),m3(k),c,k)
-
- do k = lb+1, lt-1
- j = k-1
-c---------------------------------------------------------------------
-c prolongate from level k-1 to k
-c---------------------------------------------------------------------
- call zero3(u(ir(k)),m1(k),m2(k),m3(k))
- call interp(u(ir(j)),m1(j),m2(j),m3(j),
- > u(ir(k)),m1(k),m2(k),m3(k),k)
-c---------------------------------------------------------------------
-c compute residual for level k
-c---------------------------------------------------------------------
- call resid(u(ir(k)),r(ir(k)),r(ir(k)),m1(k),m2(k),m3(k),a,k)
-c---------------------------------------------------------------------
-c apply smoother
-c---------------------------------------------------------------------
- call psinv(r(ir(k)),u(ir(k)),m1(k),m2(k),m3(k),c,k)
- enddo
- 200 continue
- j = lt - 1
- k = lt
- call interp(u(ir(j)),m1(j),m2(j),m3(j),u,n1,n2,n3,k)
- call resid(u,v,r,n1,n2,n3,a,k)
- call psinv(r,u,n1,n2,n3,c,k)
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine psinv( r,u,n1,n2,n3,c,k)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c psinv applies an approximate inverse as smoother: u = u + Cr
-c
-c This implementation costs 15A + 4M per result, where
-c A and M denote the costs of Addition and Multiplication.
-c Presuming coefficient c(3) is zero (the NPB assumes this,
-c but it is thus not a general case), 2A + 1M may be eliminated,
-c resulting in 13A + 3M.
-c Note that this vectorizes, and is also fine for cache
-c based machines.
-c---------------------------------------------------------------------
- implicit none
-
- include 'globals.h'
-
- integer n1,n2,n3,k
- double precision u(n1,n2,n3),r(n1,n2,n3),c(0:3)
- integer i3, i2, i1
-
- double precision r1(m), r2(m)
-
- do i3=2,n3-1
- do i2=2,n2-1
- do i1=1,n1
- r1(i1) = r(i1,i2-1,i3) + r(i1,i2+1,i3)
- > + r(i1,i2,i3-1) + r(i1,i2,i3+1)
- r2(i1) = r(i1,i2-1,i3-1) + r(i1,i2+1,i3-1)
- > + r(i1,i2-1,i3+1) + r(i1,i2+1,i3+1)
- enddo
- do i1=2,n1-1
- u(i1,i2,i3) = u(i1,i2,i3)
- > + c(0) * r(i1,i2,i3)
- > + c(1) * ( r(i1-1,i2,i3) + r(i1+1,i2,i3)
- > + r1(i1) )
- > + c(2) * ( r2(i1) + r1(i1-1) + r1(i1+1) )
-c---------------------------------------------------------------------
-c Assume c(3) = 0 (Enable line below if c(3) not= 0)
-c---------------------------------------------------------------------
-c > + c(3) * ( r2(i1-1) + r2(i1+1) )
-c---------------------------------------------------------------------
- enddo
- enddo
- enddo
-
-c---------------------------------------------------------------------
-c exchange boundary points
-c---------------------------------------------------------------------
- call comm3(u,n1,n2,n3,k)
-
- if( debug_vec(0) .ge. 1 )then
- call rep_nrm(u,n1,n2,n3,' psinv',k)
- endif
-
- if( debug_vec(3) .ge. k )then
- call showall(u,n1,n2,n3)
- endif
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine resid( u,v,r,n1,n2,n3,a,k )
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c resid computes the residual: r = v - Au
-c
-c This implementation costs 15A + 4M per result, where
-c A and M denote the costs of Addition (or Subtraction) and
-c Multiplication, respectively.
-c Presuming coefficient a(1) is zero (the NPB assumes this,
-c but it is thus not a general case), 3A + 1M may be eliminated,
-c resulting in 12A + 3M.
-c Note that this vectorizes, and is also fine for cache
-c based machines.
-c---------------------------------------------------------------------
- implicit none
-
- include 'globals.h'
-
- integer n1,n2,n3,k
- double precision u(n1,n2,n3),v(n1,n2,n3),r(n1,n2,n3),a(0:3)
- integer i3, i2, i1
- double precision u1(m), u2(m)
-
- do i3=2,n3-1
- do i2=2,n2-1
- do i1=1,n1
- u1(i1) = u(i1,i2-1,i3) + u(i1,i2+1,i3)
- > + u(i1,i2,i3-1) + u(i1,i2,i3+1)
- u2(i1) = u(i1,i2-1,i3-1) + u(i1,i2+1,i3-1)
- > + u(i1,i2-1,i3+1) + u(i1,i2+1,i3+1)
- enddo
- do i1=2,n1-1
- r(i1,i2,i3) = v(i1,i2,i3)
- > - a(0) * u(i1,i2,i3)
-c---------------------------------------------------------------------
-c Assume a(1) = 0 (Enable 2 lines below if a(1) not= 0)
-c---------------------------------------------------------------------
-c > - a(1) * ( u(i1-1,i2,i3) + u(i1+1,i2,i3)
-c > + u1(i1) )
-c---------------------------------------------------------------------
- > - a(2) * ( u2(i1) + u1(i1-1) + u1(i1+1) )
- > - a(3) * ( u2(i1-1) + u2(i1+1) )
- enddo
- enddo
- enddo
-
-c---------------------------------------------------------------------
-c exchange boundary data
-c---------------------------------------------------------------------
- call comm3(r,n1,n2,n3,k)
-
- if( debug_vec(0) .ge. 1 )then
- call rep_nrm(r,n1,n2,n3,' resid',k)
- endif
-
- if( debug_vec(2) .ge. k )then
- call showall(r,n1,n2,n3)
- endif
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine rprj3( r,m1k,m2k,m3k,s,m1j,m2j,m3j,k )
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c rprj3 projects onto the next coarser grid,
-c using a trilinear Finite Element projection: s = r' = P r
-c
-c This implementation costs 20A + 4M per result, where
-c A and M denote the costs of Addition and Multiplication.
-c Note that this vectorizes, and is also fine for cache
-c based machines.
-c---------------------------------------------------------------------
- implicit none
-
- include 'mpinpb.h'
- include 'globals.h'
-
- integer m1k, m2k, m3k, m1j, m2j, m3j,k
- double precision r(m1k,m2k,m3k), s(m1j,m2j,m3j)
- integer j3, j2, j1, i3, i2, i1, d1, d2, d3, j
-
- double precision x1(m), y1(m), x2,y2
-
-
- if(m1k.eq.3)then
- d1 = 2
- else
- d1 = 1
- endif
-
- if(m2k.eq.3)then
- d2 = 2
- else
- d2 = 1
- endif
-
- if(m3k.eq.3)then
- d3 = 2
- else
- d3 = 1
- endif
-
- do j3=2,m3j-1
- i3 = 2*j3-d3
-C i3 = 2*j3-1
- do j2=2,m2j-1
- i2 = 2*j2-d2
-C i2 = 2*j2-1
-
- do j1=2,m1j
- i1 = 2*j1-d1
-C i1 = 2*j1-1
- x1(i1-1) = r(i1-1,i2-1,i3 ) + r(i1-1,i2+1,i3 )
- > + r(i1-1,i2, i3-1) + r(i1-1,i2, i3+1)
- y1(i1-1) = r(i1-1,i2-1,i3-1) + r(i1-1,i2-1,i3+1)
- > + r(i1-1,i2+1,i3-1) + r(i1-1,i2+1,i3+1)
- enddo
-
- do j1=2,m1j-1
- i1 = 2*j1-d1
-C i1 = 2*j1-1
- y2 = r(i1, i2-1,i3-1) + r(i1, i2-1,i3+1)
- > + r(i1, i2+1,i3-1) + r(i1, i2+1,i3+1)
- x2 = r(i1, i2-1,i3 ) + r(i1, i2+1,i3 )
- > + r(i1, i2, i3-1) + r(i1, i2, i3+1)
- s(j1,j2,j3) =
- > 0.5D0 * r(i1,i2,i3)
- > + 0.25D0 * ( r(i1-1,i2,i3) + r(i1+1,i2,i3) + x2)
- > + 0.125D0 * ( x1(i1-1) + x1(i1+1) + y2)
- > + 0.0625D0 * ( y1(i1-1) + y1(i1+1) )
- enddo
-
- enddo
- enddo
-
-
- j = k-1
- call comm3(s,m1j,m2j,m3j,j)
-
- if( debug_vec(0) .ge. 1 )then
- call rep_nrm(s,m1j,m2j,m3j,' rprj3',k-1)
- endif
-
- if( debug_vec(4) .ge. k )then
- call showall(s,m1j,m2j,m3j)
- endif
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine interp( z,mm1,mm2,mm3,u,n1,n2,n3,k )
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c interp adds the trilinear interpolation of the correction
-c from the coarser grid to the current approximation: u = u + Qu'
-c
-c Observe that this implementation costs 16A + 4M, where
-c A and M denote the costs of Addition and Multiplication.
-c Note that this vectorizes, and is also fine for cache
-c based machines. Vector machines may get slightly better
-c performance however, with 8 separate "do i1" loops, rather than 4.
-c---------------------------------------------------------------------
- implicit none
-
- include 'mpinpb.h'
- include 'globals.h'
-
- integer mm1, mm2, mm3, n1, n2, n3,k
- double precision z(mm1,mm2,mm3),u(n1,n2,n3)
- integer i3, i2, i1, d1, d2, d3, t1, t2, t3
-
-c note that m = 1037 in globals.h but for this only need to be
-c 535 to handle up to 1024^3
-c integer m
-c parameter( m=535 )
- double precision z1(m),z2(m),z3(m)
-
-
- if( n1 .ne. 3 .and. n2 .ne. 3 .and. n3 .ne. 3 ) then
-
- do i3=1,mm3-1
- do i2=1,mm2-1
-
- do i1=1,mm1
- z1(i1) = z(i1,i2+1,i3) + z(i1,i2,i3)
- z2(i1) = z(i1,i2,i3+1) + z(i1,i2,i3)
- z3(i1) = z(i1,i2+1,i3+1) + z(i1,i2,i3+1) + z1(i1)
- enddo
-
- do i1=1,mm1-1
- u(2*i1-1,2*i2-1,2*i3-1)=u(2*i1-1,2*i2-1,2*i3-1)
- > +z(i1,i2,i3)
- u(2*i1,2*i2-1,2*i3-1)=u(2*i1,2*i2-1,2*i3-1)
- > +0.5d0*(z(i1+1,i2,i3)+z(i1,i2,i3))
- enddo
- do i1=1,mm1-1
- u(2*i1-1,2*i2,2*i3-1)=u(2*i1-1,2*i2,2*i3-1)
- > +0.5d0 * z1(i1)
- u(2*i1,2*i2,2*i3-1)=u(2*i1,2*i2,2*i3-1)
- > +0.25d0*( z1(i1) + z1(i1+1) )
- enddo
- do i1=1,mm1-1
- u(2*i1-1,2*i2-1,2*i3)=u(2*i1-1,2*i2-1,2*i3)
- > +0.5d0 * z2(i1)
- u(2*i1,2*i2-1,2*i3)=u(2*i1,2*i2-1,2*i3)
- > +0.25d0*( z2(i1) + z2(i1+1) )
- enddo
- do i1=1,mm1-1
- u(2*i1-1,2*i2,2*i3)=u(2*i1-1,2*i2,2*i3)
- > +0.25d0* z3(i1)
- u(2*i1,2*i2,2*i3)=u(2*i1,2*i2,2*i3)
- > +0.125d0*( z3(i1) + z3(i1+1) )
- enddo
- enddo
- enddo
-
- else
-
- if(n1.eq.3)then
- d1 = 2
- t1 = 1
- else
- d1 = 1
- t1 = 0
- endif
-
- if(n2.eq.3)then
- d2 = 2
- t2 = 1
- else
- d2 = 1
- t2 = 0
- endif
-
- if(n3.eq.3)then
- d3 = 2
- t3 = 1
- else
- d3 = 1
- t3 = 0
- endif
-
- do i3=d3,mm3-1
- do i2=d2,mm2-1
- do i1=d1,mm1-1
- u(2*i1-d1,2*i2-d2,2*i3-d3)=u(2*i1-d1,2*i2-d2,2*i3-d3)
- > +z(i1,i2,i3)
- enddo
- do i1=1,mm1-1
- u(2*i1-t1,2*i2-d2,2*i3-d3)=u(2*i1-t1,2*i2-d2,2*i3-d3)
- > +0.5D0*(z(i1+1,i2,i3)+z(i1,i2,i3))
- enddo
- enddo
- do i2=1,mm2-1
- do i1=d1,mm1-1
- u(2*i1-d1,2*i2-t2,2*i3-d3)=u(2*i1-d1,2*i2-t2,2*i3-d3)
- > +0.5D0*(z(i1,i2+1,i3)+z(i1,i2,i3))
- enddo
- do i1=1,mm1-1
- u(2*i1-t1,2*i2-t2,2*i3-d3)=u(2*i1-t1,2*i2-t2,2*i3-d3)
- > +0.25D0*(z(i1+1,i2+1,i3)+z(i1+1,i2,i3)
- > +z(i1, i2+1,i3)+z(i1, i2,i3))
- enddo
- enddo
- enddo
-
- do i3=1,mm3-1
- do i2=d2,mm2-1
- do i1=d1,mm1-1
- u(2*i1-d1,2*i2-d2,2*i3-t3)=u(2*i1-d1,2*i2-d2,2*i3-t3)
- > +0.5D0*(z(i1,i2,i3+1)+z(i1,i2,i3))
- enddo
- do i1=1,mm1-1
- u(2*i1-t1,2*i2-d2,2*i3-t3)=u(2*i1-t1,2*i2-d2,2*i3-t3)
- > +0.25D0*(z(i1+1,i2,i3+1)+z(i1,i2,i3+1)
- > +z(i1+1,i2,i3 )+z(i1,i2,i3 ))
- enddo
- enddo
- do i2=1,mm2-1
- do i1=d1,mm1-1
- u(2*i1-d1,2*i2-t2,2*i3-t3)=u(2*i1-d1,2*i2-t2,2*i3-t3)
- > +0.25D0*(z(i1,i2+1,i3+1)+z(i1,i2,i3+1)
- > +z(i1,i2+1,i3 )+z(i1,i2,i3 ))
- enddo
- do i1=1,mm1-1
- u(2*i1-t1,2*i2-t2,2*i3-t3)=u(2*i1-t1,2*i2-t2,2*i3-t3)
- > +0.125D0*(z(i1+1,i2+1,i3+1)+z(i1+1,i2,i3+1)
- > +z(i1 ,i2+1,i3+1)+z(i1 ,i2,i3+1)
- > +z(i1+1,i2+1,i3 )+z(i1+1,i2,i3 )
- > +z(i1 ,i2+1,i3 )+z(i1 ,i2,i3 ))
- enddo
- enddo
- enddo
-
- endif
-
- call comm3_ex(u,n1,n2,n3,k)
-
- if( debug_vec(0) .ge. 1 )then
- call rep_nrm(z,mm1,mm2,mm3,'z: inter',k-1)
- call rep_nrm(u,n1,n2,n3,'u: inter',k)
- endif
-
- if( debug_vec(5) .ge. k )then
- call showall(z,mm1,mm2,mm3)
- call showall(u,n1,n2,n3)
- endif
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine norm2u3(r,n1,n2,n3,rnm2,rnmu,nx,ny,nz)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c norm2u3 evaluates approximations to the L2 norm and the
-c uniform (or L-infinity or Chebyshev) norm, under the
-c assumption that the boundaries are periodic or zero. Add the
-c boundaries in with half weight (quarter weight on the edges
-c and eighth weight at the corners) for inhomogeneous boundaries.
-c---------------------------------------------------------------------
- implicit none
-
- include 'mpinpb.h'
-
- integer n1, n2, n3, nx, ny, nz
- double precision rnm2, rnmu, r(n1,n2,n3)
- double precision s, a, ss
- integer i3, i2, i1, ierr
-
- double precision dn
-
- dn = 1.0d0*nx*ny*nz
-
- s=0.0D0
- rnmu = 0.0D0
- do i3=2,n3-1
- do i2=2,n2-1
- do i1=2,n1-1
- s=s+r(i1,i2,i3)**2
- a=abs(r(i1,i2,i3))
- if(a.gt.rnmu)rnmu=a
- enddo
- enddo
- enddo
-
- call mpi_allreduce(rnmu,ss,1,dp_type,
- > mpi_max,mpi_comm_world,ierr)
- rnmu = ss
- call mpi_allreduce(s, ss, 1, dp_type,
- > mpi_sum,mpi_comm_world,ierr)
- s = ss
- rnm2=sqrt( s / dn )
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine rep_nrm(u,n1,n2,n3,title,kk)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c report on norm
-c---------------------------------------------------------------------
- implicit none
-
- include 'mpinpb.h'
- include 'globals.h'
-
- integer n1, n2, n3, kk
- double precision u(n1,n2,n3)
- character*8 title
-
- double precision rnm2, rnmu
-
-
- call norm2u3(u,n1,n2,n3,rnm2,rnmu,nx(kk),ny(kk),nz(kk))
- if( me .eq. root )then
- write(*,7)kk,title,rnm2,rnmu
- 7 format(' Level',i2,' in ',a8,': norms =',D21.14,D21.14)
- endif
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine comm3(u,n1,n2,n3,kk)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c comm3 organizes the communication on all borders
-c---------------------------------------------------------------------
- implicit none
-
- include 'mpinpb.h'
- include 'globals.h'
-
- integer n1, n2, n3, kk
- double precision u(n1,n2,n3)
- integer axis
-
- if( .not. dead(kk) )then
- do axis = 1, 3
- if( nprocs .ne. 1) then
-
- call ready( axis, -1, kk )
- call ready( axis, +1, kk )
-
- call give3( axis, +1, u, n1, n2, n3, kk )
- call give3( axis, -1, u, n1, n2, n3, kk )
-
- call take3( axis, -1, u, n1, n2, n3 )
- call take3( axis, +1, u, n1, n2, n3 )
-
- else
- call comm1p( axis, u, n1, n2, n3, kk )
- endif
- enddo
- else
- call zero3(u,n1,n2,n3)
- endif
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine comm3_ex(u,n1,n2,n3,kk)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c comm3_ex communicates to expand the number of processors
-c---------------------------------------------------------------------
- implicit none
-
- include 'mpinpb.h'
- include 'globals.h'
-
- integer n1, n2, n3, kk
- double precision u(n1,n2,n3)
- integer axis
-
- do axis = 1, 3
- if( nprocs .ne. 1 ) then
- if( take_ex( axis, kk ) )then
- call ready( axis, -1, kk )
- call ready( axis, +1, kk )
- call take3_ex( axis, -1, u, n1, n2, n3 )
- call take3_ex( axis, +1, u, n1, n2, n3 )
- endif
-
- if( give_ex( axis, kk ) )then
- call give3_ex( axis, +1, u, n1, n2, n3, kk )
- call give3_ex( axis, -1, u, n1, n2, n3, kk )
- endif
- else
- call comm1p_ex( axis, u, n1, n2, n3, kk )
- endif
- enddo
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine ready( axis, dir, k )
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c ready allocates a buffer to take in a message
-c---------------------------------------------------------------------
- implicit none
-
- include 'mpinpb.h'
- include 'globals.h'
-
- integer axis, dir, k
- integer buff_id,buff_len,i,ierr
-
- buff_id = 3 + dir
- buff_len = nm2
-
- do i=1,nm2
- buff(i,buff_id) = 0.0D0
- enddo
-
-
-c---------------------------------------------------------------------
-c fake message request type
-c---------------------------------------------------------------------
- msg_id(axis,dir,1) = msg_type(axis,dir) +1000*me
-
- call mpi_irecv( buff(1,buff_id), buff_len,
- > dp_type, nbr(axis,-dir,k), msg_type(axis,dir),
- > mpi_comm_world, msg_id(axis,dir,1), ierr)
- return
- end
-
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine give3( axis, dir, u, n1, n2, n3, k )
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c give3 sends border data out in the requested direction
-c---------------------------------------------------------------------
- implicit none
-
- include 'mpinpb.h'
- include 'globals.h'
-
- integer axis, dir, n1, n2, n3, k, ierr
- double precision u( n1, n2, n3 )
-
- integer i3, i2, i1, buff_len,buff_id
-
- buff_id = 2 + dir
- buff_len = 0
-
- if( axis .eq. 1 )then
- if( dir .eq. -1 )then
-
- do i3=2,n3-1
- do i2=2,n2-1
- buff_len = buff_len + 1
- buff(buff_len,buff_id ) = u( 2, i2,i3)
- enddo
- enddo
-
- call mpi_send(
- > buff(1, buff_id ), buff_len,dp_type,
- > nbr( axis, dir, k ), msg_type(axis,dir),
- > mpi_comm_world, ierr)
-
- else if( dir .eq. +1 ) then
-
- do i3=2,n3-1
- do i2=2,n2-1
- buff_len = buff_len + 1
- buff(buff_len, buff_id ) = u( n1-1, i2,i3)
- enddo
- enddo
-
- call mpi_send(
- > buff(1, buff_id ), buff_len,dp_type,
- > nbr( axis, dir, k ), msg_type(axis,dir),
- > mpi_comm_world, ierr)
-
- endif
- endif
-
- if( axis .eq. 2 )then
- if( dir .eq. -1 )then
-
- do i3=2,n3-1
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len, buff_id ) = u( i1, 2,i3)
- enddo
- enddo
-
- call mpi_send(
- > buff(1, buff_id ), buff_len,dp_type,
- > nbr( axis, dir, k ), msg_type(axis,dir),
- > mpi_comm_world, ierr)
-
- else if( dir .eq. +1 ) then
-
- do i3=2,n3-1
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len, buff_id )= u( i1,n2-1,i3)
- enddo
- enddo
-
- call mpi_send(
- > buff(1, buff_id ), buff_len,dp_type,
- > nbr( axis, dir, k ), msg_type(axis,dir),
- > mpi_comm_world, ierr)
-
- endif
- endif
-
- if( axis .eq. 3 )then
- if( dir .eq. -1 )then
-
- do i2=1,n2
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len, buff_id ) = u( i1,i2,2)
- enddo
- enddo
-
- call mpi_send(
- > buff(1, buff_id ), buff_len,dp_type,
- > nbr( axis, dir, k ), msg_type(axis,dir),
- > mpi_comm_world, ierr)
-
- else if( dir .eq. +1 ) then
-
- do i2=1,n2
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len, buff_id ) = u( i1,i2,n3-1)
- enddo
- enddo
-
- call mpi_send(
- > buff(1, buff_id ), buff_len,dp_type,
- > nbr( axis, dir, k ), msg_type(axis,dir),
- > mpi_comm_world, ierr)
-
- endif
- endif
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine take3( axis, dir, u, n1, n2, n3 )
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c take3 copies in border data from the requested direction
-c---------------------------------------------------------------------
- implicit none
-
- include 'mpinpb.h'
- include 'globals.h'
-
- integer axis, dir, n1, n2, n3
- double precision u( n1, n2, n3 )
-
- integer buff_id, indx
-
- integer status(mpi_status_size), ierr
-
- integer i3, i2, i1
-
- call mpi_wait( msg_id( axis, dir, 1 ),status,ierr)
- buff_id = 3 + dir
- indx = 0
-
- if( axis .eq. 1 )then
- if( dir .eq. -1 )then
-
- do i3=2,n3-1
- do i2=2,n2-1
- indx = indx + 1
- u(n1,i2,i3) = buff(indx, buff_id )
- enddo
- enddo
-
- else if( dir .eq. +1 ) then
-
- do i3=2,n3-1
- do i2=2,n2-1
- indx = indx + 1
- u(1,i2,i3) = buff(indx, buff_id )
- enddo
- enddo
-
- endif
- endif
-
- if( axis .eq. 2 )then
- if( dir .eq. -1 )then
-
- do i3=2,n3-1
- do i1=1,n1
- indx = indx + 1
- u(i1,n2,i3) = buff(indx, buff_id )
- enddo
- enddo
-
- else if( dir .eq. +1 ) then
-
- do i3=2,n3-1
- do i1=1,n1
- indx = indx + 1
- u(i1,1,i3) = buff(indx, buff_id )
- enddo
- enddo
-
- endif
- endif
-
- if( axis .eq. 3 )then
- if( dir .eq. -1 )then
-
- do i2=1,n2
- do i1=1,n1
- indx = indx + 1
- u(i1,i2,n3) = buff(indx, buff_id )
- enddo
- enddo
-
- else if( dir .eq. +1 ) then
-
- do i2=1,n2
- do i1=1,n1
- indx = indx + 1
- u(i1,i2,1) = buff(indx, buff_id )
- enddo
- enddo
-
- endif
- endif
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine give3_ex( axis, dir, u, n1, n2, n3, k )
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c give3_ex sends border data out to expand number of processors
-c---------------------------------------------------------------------
- implicit none
-
- include 'mpinpb.h'
- include 'globals.h'
-
- integer axis, dir, n1, n2, n3, k, ierr
- double precision u( n1, n2, n3 )
-
- integer i3, i2, i1, buff_len, buff_id
-
- buff_id = 2 + dir
- buff_len = 0
-
- if( axis .eq. 1 )then
- if( dir .eq. -1 )then
-
- do i3=1,n3
- do i2=1,n2
- buff_len = buff_len + 1
- buff(buff_len,buff_id ) = u( 2, i2,i3)
- enddo
- enddo
-
- call mpi_send(
- > buff(1, buff_id ), buff_len,dp_type,
- > nbr( axis, dir, k ), msg_type(axis,dir),
- > mpi_comm_world, ierr)
-
- else if( dir .eq. +1 ) then
-
- do i3=1,n3
- do i2=1,n2
- do i1=n1-1,n1
- buff_len = buff_len + 1
- buff(buff_len,buff_id)= u(i1,i2,i3)
- enddo
- enddo
- enddo
-
- call mpi_send(
- > buff(1, buff_id ), buff_len,dp_type,
- > nbr( axis, dir, k ), msg_type(axis,dir),
- > mpi_comm_world, ierr)
-
- endif
- endif
-
- if( axis .eq. 2 )then
- if( dir .eq. -1 )then
-
- do i3=1,n3
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len, buff_id ) = u( i1, 2,i3)
- enddo
- enddo
-
- call mpi_send(
- > buff(1, buff_id ), buff_len,dp_type,
- > nbr( axis, dir, k ), msg_type(axis,dir),
- > mpi_comm_world, ierr)
-
- else if( dir .eq. +1 ) then
-
- do i3=1,n3
- do i2=n2-1,n2
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len,buff_id )= u(i1,i2,i3)
- enddo
- enddo
- enddo
-
- call mpi_send(
- > buff(1, buff_id ), buff_len,dp_type,
- > nbr( axis, dir, k ), msg_type(axis,dir),
- > mpi_comm_world, ierr)
-
- endif
- endif
-
- if( axis .eq. 3 )then
- if( dir .eq. -1 )then
-
- do i2=1,n2
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len, buff_id ) = u( i1,i2,2)
- enddo
- enddo
-
- call mpi_send(
- > buff(1, buff_id ), buff_len,dp_type,
- > nbr( axis, dir, k ), msg_type(axis,dir),
- > mpi_comm_world, ierr)
-
- else if( dir .eq. +1 ) then
-
- do i3=n3-1,n3
- do i2=1,n2
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len, buff_id ) = u( i1,i2,i3)
- enddo
- enddo
- enddo
-
- call mpi_send(
- > buff(1, buff_id ), buff_len,dp_type,
- > nbr( axis, dir, k ), msg_type(axis,dir),
- > mpi_comm_world, ierr)
-
- endif
- endif
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine take3_ex( axis, dir, u, n1, n2, n3 )
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c take3_ex copies in border data to expand number of processors
-c---------------------------------------------------------------------
- implicit none
-
- include 'mpinpb.h'
- include 'globals.h'
-
- integer axis, dir, n1, n2, n3
- double precision u( n1, n2, n3 )
-
- integer buff_id, indx
-
- integer status(mpi_status_size) , ierr
-
- integer i3, i2, i1
-
- call mpi_wait( msg_id( axis, dir, 1 ),status,ierr)
- buff_id = 3 + dir
- indx = 0
-
- if( axis .eq. 1 )then
- if( dir .eq. -1 )then
-
- do i3=1,n3
- do i2=1,n2
- indx = indx + 1
- u(n1,i2,i3) = buff(indx, buff_id )
- enddo
- enddo
-
- else if( dir .eq. +1 ) then
-
- do i3=1,n3
- do i2=1,n2
- do i1=1,2
- indx = indx + 1
- u(i1,i2,i3) = buff(indx,buff_id)
- enddo
- enddo
- enddo
-
- endif
- endif
-
- if( axis .eq. 2 )then
- if( dir .eq. -1 )then
-
- do i3=1,n3
- do i1=1,n1
- indx = indx + 1
- u(i1,n2,i3) = buff(indx, buff_id )
- enddo
- enddo
-
- else if( dir .eq. +1 ) then
-
- do i3=1,n3
- do i2=1,2
- do i1=1,n1
- indx = indx + 1
- u(i1,i2,i3) = buff(indx,buff_id)
- enddo
- enddo
- enddo
-
- endif
- endif
-
- if( axis .eq. 3 )then
- if( dir .eq. -1 )then
-
- do i2=1,n2
- do i1=1,n1
- indx = indx + 1
- u(i1,i2,n3) = buff(indx, buff_id )
- enddo
- enddo
-
- else if( dir .eq. +1 ) then
-
- do i3=1,2
- do i2=1,n2
- do i1=1,n1
- indx = indx + 1
- u(i1,i2,i3) = buff(indx,buff_id)
- enddo
- enddo
- enddo
-
- endif
- endif
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine comm1p( axis, u, n1, n2, n3, kk )
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- implicit none
-
- include 'mpinpb.h'
- include 'globals.h'
-
- integer axis, dir, n1, n2, n3
- double precision u( n1, n2, n3 )
-
- integer i3, i2, i1, buff_len,buff_id
- integer i, kk, indx
-
- dir = -1
-
- buff_id = 3 + dir
- buff_len = nm2
-
- do i=1,nm2
- buff(i,buff_id) = 0.0D0
- enddo
-
-
- dir = +1
-
- buff_id = 3 + dir
- buff_len = nm2
-
- do i=1,nm2
- buff(i,buff_id) = 0.0D0
- enddo
-
- dir = +1
-
- buff_id = 2 + dir
- buff_len = 0
-
- if( axis .eq. 1 )then
- do i3=2,n3-1
- do i2=2,n2-1
- buff_len = buff_len + 1
- buff(buff_len, buff_id ) = u( n1-1, i2,i3)
- enddo
- enddo
- endif
-
- if( axis .eq. 2 )then
- do i3=2,n3-1
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len, buff_id )= u( i1,n2-1,i3)
- enddo
- enddo
- endif
-
- if( axis .eq. 3 )then
- do i2=1,n2
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len, buff_id ) = u( i1,i2,n3-1)
- enddo
- enddo
- endif
-
- dir = -1
-
- buff_id = 2 + dir
- buff_len = 0
-
- if( axis .eq. 1 )then
- do i3=2,n3-1
- do i2=2,n2-1
- buff_len = buff_len + 1
- buff(buff_len,buff_id ) = u( 2, i2,i3)
- enddo
- enddo
- endif
-
- if( axis .eq. 2 )then
- do i3=2,n3-1
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len, buff_id ) = u( i1, 2,i3)
- enddo
- enddo
- endif
-
- if( axis .eq. 3 )then
- do i2=1,n2
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len, buff_id ) = u( i1,i2,2)
- enddo
- enddo
- endif
-
- do i=1,nm2
- buff(i,4) = buff(i,3)
- buff(i,2) = buff(i,1)
- enddo
-
- dir = -1
-
- buff_id = 3 + dir
- indx = 0
-
- if( axis .eq. 1 )then
- do i3=2,n3-1
- do i2=2,n2-1
- indx = indx + 1
- u(n1,i2,i3) = buff(indx, buff_id )
- enddo
- enddo
- endif
-
- if( axis .eq. 2 )then
- do i3=2,n3-1
- do i1=1,n1
- indx = indx + 1
- u(i1,n2,i3) = buff(indx, buff_id )
- enddo
- enddo
- endif
-
- if( axis .eq. 3 )then
- do i2=1,n2
- do i1=1,n1
- indx = indx + 1
- u(i1,i2,n3) = buff(indx, buff_id )
- enddo
- enddo
- endif
-
-
- dir = +1
-
- buff_id = 3 + dir
- indx = 0
-
- if( axis .eq. 1 )then
- do i3=2,n3-1
- do i2=2,n2-1
- indx = indx + 1
- u(1,i2,i3) = buff(indx, buff_id )
- enddo
- enddo
- endif
-
- if( axis .eq. 2 )then
- do i3=2,n3-1
- do i1=1,n1
- indx = indx + 1
- u(i1,1,i3) = buff(indx, buff_id )
- enddo
- enddo
- endif
-
- if( axis .eq. 3 )then
- do i2=1,n2
- do i1=1,n1
- indx = indx + 1
- u(i1,i2,1) = buff(indx, buff_id )
- enddo
- enddo
- endif
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine comm1p_ex( axis, u, n1, n2, n3, kk )
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- implicit none
-
- include 'mpinpb.h'
- include 'globals.h'
-
- integer axis, dir, n1, n2, n3
- double precision u( n1, n2, n3 )
-
- integer i3, i2, i1, buff_len,buff_id
- integer i, kk, indx
-
- if( take_ex( axis, kk ) ) then
-
- dir = -1
-
- buff_id = 3 + dir
- buff_len = nm2
-
- do i=1,nm2
- buff(i,buff_id) = 0.0D0
- enddo
-
-
- dir = +1
-
- buff_id = 3 + dir
- buff_len = nm2
-
- do i=1,nm2
- buff(i,buff_id) = 0.0D0
- enddo
-
-
- dir = -1
-
- buff_id = 3 + dir
- indx = 0
-
- if( axis .eq. 1 )then
- do i3=1,n3
- do i2=1,n2
- indx = indx + 1
- u(n1,i2,i3) = buff(indx, buff_id )
- enddo
- enddo
- endif
-
- if( axis .eq. 2 )then
- do i3=1,n3
- do i1=1,n1
- indx = indx + 1
- u(i1,n2,i3) = buff(indx, buff_id )
- enddo
- enddo
- endif
-
- if( axis .eq. 3 )then
- do i2=1,n2
- do i1=1,n1
- indx = indx + 1
- u(i1,i2,n3) = buff(indx, buff_id )
- enddo
- enddo
- endif
-
- dir = +1
-
- buff_id = 3 + dir
- indx = 0
-
- if( axis .eq. 1 )then
- do i3=1,n3
- do i2=1,n2
- do i1=1,2
- indx = indx + 1
- u(i1,i2,i3) = buff(indx,buff_id)
- enddo
- enddo
- enddo
- endif
-
- if( axis .eq. 2 )then
- do i3=1,n3
- do i2=1,2
- do i1=1,n1
- indx = indx + 1
- u(i1,i2,i3) = buff(indx,buff_id)
- enddo
- enddo
- enddo
- endif
-
- if( axis .eq. 3 )then
- do i3=1,2
- do i2=1,n2
- do i1=1,n1
- indx = indx + 1
- u(i1,i2,i3) = buff(indx,buff_id)
- enddo
- enddo
- enddo
- endif
-
- endif
-
- if( give_ex( axis, kk ) )then
-
- dir = +1
-
- buff_id = 2 + dir
- buff_len = 0
-
- if( axis .eq. 1 )then
- do i3=1,n3
- do i2=1,n2
- do i1=n1-1,n1
- buff_len = buff_len + 1
- buff(buff_len,buff_id)= u(i1,i2,i3)
- enddo
- enddo
- enddo
- endif
-
- if( axis .eq. 2 )then
- do i3=1,n3
- do i2=n2-1,n2
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len,buff_id )= u(i1,i2,i3)
- enddo
- enddo
- enddo
- endif
-
- if( axis .eq. 3 )then
- do i3=n3-1,n3
- do i2=1,n2
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len, buff_id ) = u( i1,i2,i3)
- enddo
- enddo
- enddo
- endif
-
- dir = -1
-
- buff_id = 2 + dir
- buff_len = 0
-
- if( axis .eq. 1 )then
- do i3=1,n3
- do i2=1,n2
- buff_len = buff_len + 1
- buff(buff_len,buff_id ) = u( 2, i2,i3)
- enddo
- enddo
- endif
-
- if( axis .eq. 2 )then
- do i3=1,n3
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len, buff_id ) = u( i1, 2,i3)
- enddo
- enddo
- endif
-
- if( axis .eq. 3 )then
- do i2=1,n2
- do i1=1,n1
- buff_len = buff_len + 1
- buff(buff_len, buff_id ) = u( i1,i2,2)
- enddo
- enddo
- endif
-
- endif
-
- do i=1,nm2
- buff(i,4) = buff(i,3)
- buff(i,2) = buff(i,1)
- enddo
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine zran3(z,n1,n2,n3,nx,ny,k)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c zran3 loads +1 at ten randomly chosen points,
-c loads -1 at a different ten random points,
-c and zero elsewhere.
-c---------------------------------------------------------------------
- implicit none
-
- include 'mpinpb.h'
-
- integer is1, is2, is3, ie1, ie2, ie3
- common /grid/ is1,is2,is3,ie1,ie2,ie3
-
- integer n1, n2, n3, k, nx, ny, ierr, i0, m0, m1
- double precision z(n1,n2,n3)
-
- integer mm, i1, i2, i3, d1, e1, e2, e3
- double precision x, a
- double precision xx, x0, x1, a1, a2, ai, power
- parameter( mm = 10, a = 5.D0 ** 13, x = 314159265.D0)
- double precision ten( mm, 0:1 ), temp, best
- integer i, j1( mm, 0:1 ), j2( mm, 0:1 ), j3( mm, 0:1 )
- integer jg( 0:3, mm, 0:1 ), jg_temp(4)
-
- external randlc
- double precision randlc, rdummy
-
- a1 = power( a, nx, 1, 0 )
- a2 = power( a, nx, ny, 0 )
-
- call zero3(z,n1,n2,n3)
-
-c i = is1-2+nx*(is2-2+ny*(is3-2))
-
- ai = power( a, nx, is2-2+ny*(is3-2), is1-2 )
- d1 = ie1 - is1 + 1
- e1 = ie1 - is1 + 2
- e2 = ie2 - is2 + 2
- e3 = ie3 - is3 + 2
- x0 = x
- rdummy = randlc( x0, ai )
- do i3 = 2, e3
- x1 = x0
- do i2 = 2, e2
- xx = x1
- call vranlc( d1, xx, a, z( 2, i2, i3 ))
- rdummy = randlc( x1, a1 )
- enddo
- rdummy = randlc( x0, a2 )
- enddo
-
-c---------------------------------------------------------------------
-c call comm3(z,n1,n2,n3)
-c call showall(z,n1,n2,n3)
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c each processor looks for twenty candidates
-c---------------------------------------------------------------------
- do i=1,mm
- ten( i, 1 ) = 0.0D0
- j1( i, 1 ) = 0
- j2( i, 1 ) = 0
- j3( i, 1 ) = 0
- ten( i, 0 ) = 1.0D0
- j1( i, 0 ) = 0
- j2( i, 0 ) = 0
- j3( i, 0 ) = 0
- enddo
-
- do i3=2,n3-1
- do i2=2,n2-1
- do i1=2,n1-1
- if( z(i1,i2,i3) .gt. ten( 1, 1 ) )then
- ten(1,1) = z(i1,i2,i3)
- j1(1,1) = i1
- j2(1,1) = i2
- j3(1,1) = i3
- call bubble( ten, j1, j2, j3, mm, 1 )
- endif
- if( z(i1,i2,i3) .lt. ten( 1, 0 ) )then
- ten(1,0) = z(i1,i2,i3)
- j1(1,0) = i1
- j2(1,0) = i2
- j3(1,0) = i3
- call bubble( ten, j1, j2, j3, mm, 0 )
- endif
- enddo
- enddo
- enddo
-
- call mpi_barrier(mpi_comm_world,ierr)
-
-c---------------------------------------------------------------------
-c Now which of these are globally best?
-c---------------------------------------------------------------------
- i1 = mm
- i0 = mm
- do i=mm,1,-1
-
- best = z( j1(i1,1), j2(i1,1), j3(i1,1) )
- call mpi_allreduce(best,temp,1,dp_type,
- > mpi_max,mpi_comm_world,ierr)
- best = temp
- if(best.eq.z(j1(i1,1),j2(i1,1),j3(i1,1)))then
- jg( 0, i, 1) = me
- jg( 1, i, 1) = is1 - 2 + j1( i1, 1 )
- jg( 2, i, 1) = is2 - 2 + j2( i1, 1 )
- jg( 3, i, 1) = is3 - 2 + j3( i1, 1 )
- i1 = i1-1
- else
- jg( 0, i, 1) = 0
- jg( 1, i, 1) = 0
- jg( 2, i, 1) = 0
- jg( 3, i, 1) = 0
- endif
- ten( i, 1 ) = best
- call mpi_allreduce(jg(0,i,1), jg_temp,4,MPI_INTEGER,
- > mpi_max,mpi_comm_world,ierr)
- jg( 0, i, 1) = jg_temp(1)
- jg( 1, i, 1) = jg_temp(2)
- jg( 2, i, 1) = jg_temp(3)
- jg( 3, i, 1) = jg_temp(4)
-
- best = z( j1(i0,0), j2(i0,0), j3(i0,0) )
- call mpi_allreduce(best,temp,1,dp_type,
- > mpi_min,mpi_comm_world,ierr)
- best = temp
- if(best.eq.z(j1(i0,0),j2(i0,0),j3(i0,0)))then
- jg( 0, i, 0) = me
- jg( 1, i, 0) = is1 - 2 + j1( i0, 0 )
- jg( 2, i, 0) = is2 - 2 + j2( i0, 0 )
- jg( 3, i, 0) = is3 - 2 + j3( i0, 0 )
- i0 = i0-1
- else
- jg( 0, i, 0) = 0
- jg( 1, i, 0) = 0
- jg( 2, i, 0) = 0
- jg( 3, i, 0) = 0
- endif
- ten( i, 0 ) = best
- call mpi_allreduce(jg(0,i,0), jg_temp,4,MPI_INTEGER,
- > mpi_max,mpi_comm_world,ierr)
- jg( 0, i, 0) = jg_temp(1)
- jg( 1, i, 0) = jg_temp(2)
- jg( 2, i, 0) = jg_temp(3)
- jg( 3, i, 0) = jg_temp(4)
-
- enddo
- m1 = i1+1
- m0 = i0+1
-
-c if( me .eq. root) then
-c write(*,*)' '
-c write(*,*)' negative charges at'
-c write(*,9)(jg(1,i,0),jg(2,i,0),jg(3,i,0),i=1,mm)
-c write(*,*)' positive charges at'
-c write(*,9)(jg(1,i,1),jg(2,i,1),jg(3,i,1),i=1,mm)
-c write(*,*)' small random numbers were'
-c write(*,8)(ten( i,0),i=mm,1,-1)
-c write(*,*)' and they were found on processor number'
-c write(*,7)(jg(0,i,0),i=mm,1,-1)
-c write(*,*)' large random numbers were'
-c write(*,8)(ten( i,1),i=mm,1,-1)
-c write(*,*)' and they were found on processor number'
-c write(*,7)(jg(0,i,1),i=mm,1,-1)
-c endif
-c 9 format(5(' (',i3,2(',',i3),')'))
-c 8 format(5D15.8)
-c 7 format(10i4)
- call mpi_barrier(mpi_comm_world,ierr)
- do i3=1,n3
- do i2=1,n2
- do i1=1,n1
- z(i1,i2,i3) = 0.0D0
- enddo
- enddo
- enddo
- do i=mm,m0,-1
- z( j1(i,0), j2(i,0), j3(i,0) ) = -1.0D0
- enddo
- do i=mm,m1,-1
- z( j1(i,1), j2(i,1), j3(i,1) ) = +1.0D0
- enddo
- call comm3(z,n1,n2,n3,k)
-
-c---------------------------------------------------------------------
-c call showall(z,n1,n2,n3)
-c---------------------------------------------------------------------
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine show_l(z,n1,n2,n3)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- implicit none
-
- include 'mpinpb.h'
-
- integer n1,n2,n3,i1,i2,i3,ierr
- double precision z(n1,n2,n3)
- integer m1, m2, m3,i
-
- m1 = min(n1,18)
- m2 = min(n2,14)
- m3 = min(n3,18)
-
- write(*,*)' '
- do i=0,nprocs-1
- if( me .eq. i )then
- write(*,*)' id = ', me
- do i3=1,m3
- do i1=1,m1
- write(*,6)(z(i1,i2,i3),i2=1,m2)
- enddo
- write(*,*)' - - - - - - - '
- enddo
- write(*,*)' '
- 6 format(6f15.11)
- endif
- call mpi_barrier(mpi_comm_world,ierr)
- enddo
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine showall(z,n1,n2,n3)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- implicit none
-
- include 'mpinpb.h'
-
- integer n1,n2,n3,i1,i2,i3,i,ierr
- double precision z(n1,n2,n3)
- integer m1, m2, m3
-
- m1 = min(n1,18)
- m2 = min(n2,14)
- m3 = min(n3,18)
-
- write(*,*)' '
- do i=0,nprocs-1
- if( me .eq. i )then
- write(*,*)' id = ', me
- do i3=1,m3
- do i1=1,m1
- write(*,6)(z(i1,i2,i3),i2=1,m2)
- enddo
- write(*,*)' - - - - - - - '
- enddo
- write(*,*)' '
- 6 format(15f6.3)
- endif
- call mpi_barrier(mpi_comm_world,ierr)
- enddo
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine show(z,n1,n2,n3)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- implicit none
-
- include 'mpinpb.h'
- integer n1,n2,n3,i1,i2,i3,ierr,i
- double precision z(n1,n2,n3)
-
- write(*,*)' '
- do i=0,nprocs-1
- if( me .eq. i )then
- write(*,*)' id = ', me
- do i3=2,n3-1
- do i1=2,n1-1
- write(*,6)(z(i1,i2,i3),i2=2,n1-1)
- enddo
- write(*,*)' - - - - - - - '
- enddo
- write(*,*)' '
- 6 format(8D10.3)
- endif
- call mpi_barrier(mpi_comm_world,ierr)
- enddo
-
-c call comm3(z,n1,n2,n3)
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- double precision function power( a, n1, n2, n3 )
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c power raises an integer, disguised as a double
-c precision real, to an integer power.
-c This version tries to avoid integer overflow by treating
-c it as expressed in a form of "n1*n2+n3".
-c---------------------------------------------------------------------
- implicit none
-
- double precision a, aj
- integer n1, n2, n3
-
- integer n1j, n2j, nj
- external randlc
- double precision randlc, rdummy
-
- power = 1.0d0
- aj = a
- nj = n3
- n1j = n1
- n2j = n2
- 100 continue
-
- if( n2j .gt. 0 ) then
- if( mod(n2j,2) .eq. 1 ) nj = nj + n1j
- n2j = n2j/2
- else if( nj .eq. 0 ) then
- go to 200
- endif
- if( mod(nj,2) .eq. 1 ) rdummy = randlc( power, aj )
- rdummy = randlc( aj, aj )
- nj = nj/2
- go to 100
-
- 200 continue
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine bubble( ten, j1, j2, j3, m, ind )
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c bubble does a bubble sort in direction dir
-c---------------------------------------------------------------------
- implicit none
-
- include 'mpinpb.h'
-
- integer m, ind, j1( m, 0:1 ), j2( m, 0:1 ), j3( m, 0:1 )
- double precision ten( m, 0:1 )
- double precision temp
- integer i, j_temp
-
- if( ind .eq. 1 )then
-
- do i=1,m-1
- if( ten(i,ind) .gt. ten(i+1,ind) )then
-
- temp = ten( i+1, ind )
- ten( i+1, ind ) = ten( i, ind )
- ten( i, ind ) = temp
-
- j_temp = j1( i+1, ind )
- j1( i+1, ind ) = j1( i, ind )
- j1( i, ind ) = j_temp
-
- j_temp = j2( i+1, ind )
- j2( i+1, ind ) = j2( i, ind )
- j2( i, ind ) = j_temp
-
- j_temp = j3( i+1, ind )
- j3( i+1, ind ) = j3( i, ind )
- j3( i, ind ) = j_temp
-
- else
- return
- endif
- enddo
-
- else
-
- do i=1,m-1
- if( ten(i,ind) .lt. ten(i+1,ind) )then
-
- temp = ten( i+1, ind )
- ten( i+1, ind ) = ten( i, ind )
- ten( i, ind ) = temp
-
- j_temp = j1( i+1, ind )
- j1( i+1, ind ) = j1( i, ind )
- j1( i, ind ) = j_temp
-
- j_temp = j2( i+1, ind )
- j2( i+1, ind ) = j2( i, ind )
- j2( i, ind ) = j_temp
-
- j_temp = j3( i+1, ind )
- j3( i+1, ind ) = j3( i, ind )
- j3( i, ind ) = j_temp
-
- else
- return
- endif
- enddo
-
- endif
-
- return
- end
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- subroutine zero3(z,n1,n2,n3)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
- implicit none
-
- include 'mpinpb.h'
-
- integer n1, n2, n3
- double precision z(n1,n2,n3)
- integer i1, i2, i3
-
- do i3=1,n3
- do i2=1,n2
- do i1=1,n1
- z(i1,i2,i3)=0.0D0
- enddo
- enddo
- enddo
-
- return
- end
-
-
-c----- end of program ------------------------------------------------