1 !-------------------------------------------------------------------------!
3 ! N A S P A R A L L E L B E N C H M A R K S 3.3 !
7 !-------------------------------------------------------------------------!
9 ! This benchmark is part of the NAS Parallel Benchmark 3.3 suite. !
10 ! It is described in NAS Technical Reports 95-020 and 02-007 !
12 ! Permission to use, copy, distribute and modify this software !
13 ! for any purpose with or without fee is hereby granted. We !
14 ! request, however, that all derived work reference the NAS !
15 ! Parallel Benchmarks 3.3. This software is provided "as is" !
16 ! without express or implied warranty. !
18 ! Information on NPB 3.3, including the technical report, the !
19 ! original specifications, source code, results and information !
20 ! on how to submit new results, is available at: !
22 ! http://www.nas.nasa.gov/Software/NPB/ !
24 ! Send comments or suggestions to npb@nas.nasa.gov !
26 ! NAS Parallel Benchmarks Group !
27 ! NASA Ames Research Center !
29 ! Moffett Field, CA 94035-1000 !
31 ! E-mail: npb@nas.nasa.gov !
32 ! Fax: (650) 604-3957 !
34 !-------------------------------------------------------------------------!
37 c---------------------------------------------------------------------
43 c R. F. Van der Wijngaart
45 c---------------------------------------------------------------------
48 c---------------------------------------------------------------------
50 c---------------------------------------------------------------------
57 c---------------------------------------------------------------------------c
58 c k is the current level. It is passed down through subroutine args
59 c and is NOT global. it is the current iteration
60 c---------------------------------------------------------------------------c
65 double precision t, t0, tinit, mflops, timer_read
67 c---------------------------------------------------------------------------c
68 c These arrays are in common because they are quite large
69 c and probably shouldn't be allocated on the stack. They
70 c are always passed as subroutine args.
71 c---------------------------------------------------------------------------c
73 double precision u(nr),v(nv),r(nr),a(0:3),c(0:3)
74 common /noautom/ u,v,r
76 double precision rnm2, rnmu, old2, oldu, epsilon
77 integer n1, n2, n3, nit
78 double precision nn, verify_value, err
81 integer ierr,i, fstatus
82 integer T_bench, T_init
83 parameter (T_bench=1, T_init=2)
86 call mpi_comm_rank(mpi_comm_world, me, ierr)
87 call mpi_comm_size(mpi_comm_world, nprocs, ierr)
90 if (nprocs_compiled .gt. maxprocs) then
91 if (me .eq. root) write(*,20) nprocs_compiled, maxprocs
92 20 format(' ERROR: compiled for ',i8,' processes'//
93 & ' The maximum size allowed for this benchmark is ',i6)
94 call mpi_abort(MPI_COMM_WORLD, ierr)
98 if (.not. convertdouble) then
99 dp_type = MPI_DOUBLE_PRECISION
105 call timer_clear(T_bench)
106 call timer_clear(T_init)
108 call mpi_barrier(MPI_COMM_WORLD, ierr)
110 call timer_start(T_init)
113 c---------------------------------------------------------------------
114 c Read in and broadcast input data
115 c---------------------------------------------------------------------
117 if( me .eq. root )then
120 open(unit=7,file="mg.input", status="old", iostat=fstatus)
121 if (fstatus .eq. 0) then
123 50 format(' Reading from input file mg.input')
125 read(7,*) nx(lt), ny(lt), nz(lt)
127 read(7,*) (debug_vec(i),i=0,7)
130 51 format(' No input file. Using compiled defaults ')
137 debug_vec(i) = debug_default
142 call mpi_bcast(lt, 1, MPI_INTEGER, 0, mpi_comm_world, ierr)
143 call mpi_bcast(nit, 1, MPI_INTEGER, 0, mpi_comm_world, ierr)
144 call mpi_bcast(nx(lt), 1, MPI_INTEGER, 0, mpi_comm_world, ierr)
145 call mpi_bcast(ny(lt), 1, MPI_INTEGER, 0, mpi_comm_world, ierr)
146 call mpi_bcast(nz(lt), 1, MPI_INTEGER, 0, mpi_comm_world, ierr)
147 call mpi_bcast(debug_vec(0), 8, MPI_INTEGER, 0,
148 > mpi_comm_world, ierr)
150 if ( (nx(lt) .ne. ny(lt)) .or. (nx(lt) .ne. nz(lt)) ) then
152 else if( nx(lt) .eq. 32 .and. nit .eq. 4 ) then
154 else if( nx(lt) .eq. 128 .and. nit .eq. 4 ) then
156 else if( nx(lt) .eq. 256 .and. nit .eq. 4 ) then
158 else if( nx(lt) .eq. 256 .and. nit .eq. 20 ) then
160 else if( nx(lt) .eq. 512 .and. nit .eq. 20 ) then
162 else if( nx(lt) .eq. 1024 .and. nit .eq. 50 ) then
164 else if( nx(lt) .eq. 2048 .and. nit .eq. 50 ) then
170 c---------------------------------------------------------------------
171 c Use these for debug info:
172 c---------------------------------------------------------------------
173 c debug_vec(0) = 1 !=> report all norms
174 c debug_vec(1) = 1 !=> some setup information
175 c debug_vec(1) = 2 !=> more setup information
176 c debug_vec(2) = k => at level k or below, show result of resid
177 c debug_vec(3) = k => at level k or below, show result of psinv
178 c debug_vec(4) = k => at level k or below, show result of rprj
179 c debug_vec(5) = k => at level k or below, show result of interp
180 c debug_vec(6) = 1 => (unused)
181 c debug_vec(7) = 1 => (unused)
182 c---------------------------------------------------------------------
188 if(Class .eq. 'A' .or. Class .eq. 'S'.or. Class .eq.'W') then
189 c---------------------------------------------------------------------
190 c Coefficients for the S(a) smoother
191 c---------------------------------------------------------------------
197 c---------------------------------------------------------------------
198 c Coefficients for the S(b) smoother
199 c---------------------------------------------------------------------
208 call setup(n1,n2,n3,k)
209 call zero3(u,n1,n2,n3)
210 call zran3(v,n1,n2,n3,nx(lt),ny(lt),k)
212 call norm2u3(v,n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt))
214 if( me .eq. root )then
215 write (*, 1001) nx(lt),ny(lt),nz(lt), Class
218 1000 format(//,' NAS Parallel Benchmarks 3.3 -- MG Benchmark', /)
219 1001 format(' Size: ', i4, 'x', i4, 'x', i4, ' (class ', A, ')' )
220 1002 format(' Iterations: ', i4)
221 1003 format(' Number of processes: ', i6)
222 if (nprocs .ne. nprocs_compiled) then
223 write (*, 1004) nprocs_compiled
224 write (*, 1005) nprocs
225 1004 format(' WARNING: compiled for ', i6, ' processes ')
226 1005 format(' Number of active processes: ', i6, /)
228 write (*, 1003) nprocs
232 call resid(u,v,r,n1,n2,n3,a,k)
233 call norm2u3(r,n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt))
237 c---------------------------------------------------------------------
238 c One iteration for startup
239 c---------------------------------------------------------------------
240 call mg3P(u,v,r,a,c,n1,n2,n3,k)
241 call resid(u,v,r,n1,n2,n3,a,k)
242 call setup(n1,n2,n3,k)
243 call zero3(u,n1,n2,n3)
244 call zran3(v,n1,n2,n3,nx(lt),ny(lt),k)
246 call timer_stop(T_init)
247 if( me .eq. root )then
248 tinit = timer_read(T_init)
249 write( *,'(/A,F15.3,A/)' )
250 > ' Initialization time: ',tinit, ' seconds'
253 call mpi_barrier(mpi_comm_world,ierr)
255 call timer_start(T_bench)
257 call resid(u,v,r,n1,n2,n3,a,k)
258 call norm2u3(r,n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt))
263 if (it.eq.1 .or. it.eq.nit .or. mod(it,5).eq.0) then
264 if (me .eq. root) write(*,80) it
265 80 format(' iter ',i4)
267 call mg3P(u,v,r,a,c,n1,n2,n3,k)
268 call resid(u,v,r,n1,n2,n3,a,k)
272 call norm2u3(r,n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt))
274 call timer_stop(T_bench)
276 t0 = timer_read(T_bench)
278 call mpi_reduce(t0,t,1,dp_type,
279 > mpi_max,root,mpi_comm_world,ierr)
282 if( me .eq. root )then
284 100 format(/' Benchmark completed ')
287 if (Class .ne. 'U') then
288 if(Class.eq.'S') then
289 verify_value = 0.5307707005734d-04
290 elseif(Class.eq.'W') then
291 verify_value = 0.6467329375339d-05
292 elseif(Class.eq.'A') then
293 verify_value = 0.2433365309069d-05
294 elseif(Class.eq.'B') then
295 verify_value = 0.1800564401355d-05
296 elseif(Class.eq.'C') then
297 verify_value = 0.5706732285740d-06
298 elseif(Class.eq.'D') then
299 verify_value = 0.1583275060440d-09
300 elseif(Class.eq.'E') then
301 verify_value = 0.5630442584711d-10
304 err = abs( rnm2 - verify_value ) / verify_value
305 if( err .le. epsilon ) then
310 200 format(' VERIFICATION SUCCESSFUL ')
311 201 format(' L2 Norm is ', E20.13)
312 202 format(' Error is ', E20.13)
317 write(*, 302) verify_value
318 300 format(' VERIFICATION FAILED')
319 301 format(' L2 Norm is ', E20.13)
320 302 format(' The correct L2 Norm is ', E20.13)
327 400 format(' Problem size unknown')
328 401 format(' NO VERIFICATION PERFORMED')
331 nn = 1.0d0*nx(lt)*ny(lt)*nz(lt)
334 mflops = 58.*1.0D-6*nit*nn / t
339 call print_results('MG', class, nx(lt), ny(lt), nz(lt),
340 > nit, nprocs_compiled, nprocs, t,
341 > mflops, ' floating point',
342 > verified, npbversion, compiletime,
343 > cs1, cs2, cs3, cs4, cs5, cs6, cs7)
349 call mpi_finalize(ierr)
352 c---------------------------------------------------------------------
353 c---------------------------------------------------------------------
355 subroutine setup(n1,n2,n3,k)
357 c---------------------------------------------------------------------
358 c---------------------------------------------------------------------
365 integer is1, is2, is3, ie1, ie2, ie3
366 common /grid/ is1,is2,is3,ie1,ie2,ie3
369 integer dx, dy, log_p, d, i, j
371 integer ax, next(3),mi(3,maxlevel),mip(3,maxlevel)
372 integer ng(3,maxlevel)
373 integer idi(3), pi(3), idin(3,-1:1)
378 msg_type(d,j) = 100*(j+2+10*d)
388 ng(ax,k) = ng(ax,k+1)/2
398 log_p = log(float(nprocs)+0.0001)/log(2.0)
401 idi(1) = mod(me,pi(1))
405 idi(2) = mod((me/pi(1)),pi(2))
407 pi(3) = nprocs/(pi(1)*pi(2))
408 idi(3) = me/(pi(1)*pi(2))
413 take_ex(ax,k) = .false.
414 give_ex(ax,k) = .false.
417 > ((idi(ax)+1)*ng(ax,k))/pi(ax) -
418 > ((idi(ax)+0)*ng(ax,k))/pi(ax)
420 > ((next(ax)+idi(ax)+1)*ng(ax,k))/pi(ax) -
421 > ((next(ax)+idi(ax)+0)*ng(ax,k))/pi(ax)
423 if(mip(ax,k).eq.2.or.mi(ax,k).eq.2)then
424 next(ax) = 2*next(ax)
427 if( k+1 .le. lt )then
428 if((mip(ax,k).eq.2).and.(mi(ax,k).eq.3))then
429 give_ex(ax,k+1) = .true.
431 if((mip(ax,k).eq.3).and.(mi(ax,k).eq.2))then
432 take_ex(ax,k+1) = .true.
437 if( mi(1,k).eq.2 .or.
447 idin(ax,+1) = mod( idi(ax) + next(ax) + pi(ax) , pi(ax) )
448 idin(ax,-1) = mod( idi(ax) - next(ax) + pi(ax) , pi(ax) )
451 nbr(1,dir,k) = idin(1,dir) + pi(1)
454 nbr(2,dir,k) = idi(1) + pi(1)
455 > *(idin(2,dir) + pi(2)
457 nbr(3,dir,k) = idi(1) + pi(1)
464 is1 = 2 + ng(1,k) - ((pi(1) -idi(1))*ng(1,lt))/pi(1)
465 ie1 = 1 + ng(1,k) - ((pi(1)-1-idi(1))*ng(1,lt))/pi(1)
467 is2 = 2 + ng(2,k) - ((pi(2) -idi(2))*ng(2,lt))/pi(2)
468 ie2 = 1 + ng(2,k) - ((pi(2)-1-idi(2))*ng(2,lt))/pi(2)
470 is3 = 2 + ng(3,k) - ((pi(3) -idi(3))*ng(3,lt))/pi(3)
471 ie3 = 1 + ng(3,k) - ((pi(3)-1-idi(3))*ng(3,lt))/pi(3)
477 ir(j)=ir(j+1)+m1(j+1)*m2(j+1)*m3(j+1)
481 if( debug_vec(1) .ge. 1 )then
482 if( me .eq. root )write(*,*)' in setup, '
483 if( me .eq. root )write(*,*)' me k lt nx ny nz ',
484 > ' n1 n2 n3 is1 is2 is3 ie1 ie2 ie3'
487 write(*,9) me,k,lt,ng(1,k),ng(2,k),ng(3,k),
488 > n1,n2,n3,is1,is2,is3,ie1,ie2,ie3
491 call mpi_barrier(mpi_comm_world,ierr)
494 if( debug_vec(1) .ge. 2 )then
498 write(*,*)' processor =',me
500 write(*,7)k,idi(1),idi(2),idi(3),
501 > ((nbr(d,j,k),j=-1,1,2),d=1,3),
504 7 format(i4,'idi=',3i4,'nbr=',3(2i4,' '),'mi=',3i4,' ')
505 write(*,*)'idi(s) = ',(idi(s),s=1,3)
506 write(*,*)'dead(2), dead(1) = ',dead(2),dead(1)
508 write(*,*)'give_ex(ax,2)= ',give_ex(ax,2)
509 write(*,*)'take_ex(ax,2)= ',take_ex(ax,2)
512 call mpi_barrier(mpi_comm_world,ierr)
521 c---------------------------------------------------------------------
522 c---------------------------------------------------------------------
524 subroutine mg3P(u,v,r,a,c,n1,n2,n3,k)
526 c---------------------------------------------------------------------
527 c---------------------------------------------------------------------
529 c---------------------------------------------------------------------
530 c multigrid V-cycle routine
531 c---------------------------------------------------------------------
537 integer n1, n2, n3, k
538 double precision u(nr),v(nv),r(nr)
539 double precision a(0:3),c(0:3)
543 c---------------------------------------------------------------------
545 c restrict the residual from the find grid to the coarse
546 c---------------------------------------------------------------------
550 call rprj3(r(ir(k)),m1(k),m2(k),m3(k),
551 > r(ir(j)),m1(j),m2(j),m3(j),k)
555 c---------------------------------------------------------------------
556 c compute an approximate solution on the coarsest grid
557 c---------------------------------------------------------------------
558 call zero3(u(ir(k)),m1(k),m2(k),m3(k))
559 call psinv(r(ir(k)),u(ir(k)),m1(k),m2(k),m3(k),c,k)
563 c---------------------------------------------------------------------
564 c prolongate from level k-1 to k
565 c---------------------------------------------------------------------
566 call zero3(u(ir(k)),m1(k),m2(k),m3(k))
567 call interp(u(ir(j)),m1(j),m2(j),m3(j),
568 > u(ir(k)),m1(k),m2(k),m3(k),k)
569 c---------------------------------------------------------------------
570 c compute residual for level k
571 c---------------------------------------------------------------------
572 call resid(u(ir(k)),r(ir(k)),r(ir(k)),m1(k),m2(k),m3(k),a,k)
573 c---------------------------------------------------------------------
575 c---------------------------------------------------------------------
576 call psinv(r(ir(k)),u(ir(k)),m1(k),m2(k),m3(k),c,k)
581 call interp(u(ir(j)),m1(j),m2(j),m3(j),u,n1,n2,n3,k)
582 call resid(u,v,r,n1,n2,n3,a,k)
583 call psinv(r,u,n1,n2,n3,c,k)
588 c---------------------------------------------------------------------
589 c---------------------------------------------------------------------
591 subroutine psinv( r,u,n1,n2,n3,c,k)
593 c---------------------------------------------------------------------
594 c---------------------------------------------------------------------
596 c---------------------------------------------------------------------
597 c psinv applies an approximate inverse as smoother: u = u + Cr
599 c This implementation costs 15A + 4M per result, where
600 c A and M denote the costs of Addition and Multiplication.
601 c Presuming coefficient c(3) is zero (the NPB assumes this,
602 c but it is thus not a general case), 2A + 1M may be eliminated,
603 c resulting in 13A + 3M.
604 c Note that this vectorizes, and is also fine for cache
606 c---------------------------------------------------------------------
612 double precision u(n1,n2,n3),r(n1,n2,n3),c(0:3)
615 double precision r1(m), r2(m)
620 r1(i1) = r(i1,i2-1,i3) + r(i1,i2+1,i3)
621 > + r(i1,i2,i3-1) + r(i1,i2,i3+1)
622 r2(i1) = r(i1,i2-1,i3-1) + r(i1,i2+1,i3-1)
623 > + r(i1,i2-1,i3+1) + r(i1,i2+1,i3+1)
626 u(i1,i2,i3) = u(i1,i2,i3)
627 > + c(0) * r(i1,i2,i3)
628 > + c(1) * ( r(i1-1,i2,i3) + r(i1+1,i2,i3)
630 > + c(2) * ( r2(i1) + r1(i1-1) + r1(i1+1) )
631 c---------------------------------------------------------------------
632 c Assume c(3) = 0 (Enable line below if c(3) not= 0)
633 c---------------------------------------------------------------------
634 c > + c(3) * ( r2(i1-1) + r2(i1+1) )
635 c---------------------------------------------------------------------
640 c---------------------------------------------------------------------
641 c exchange boundary points
642 c---------------------------------------------------------------------
643 call comm3(u,n1,n2,n3,k)
645 if( debug_vec(0) .ge. 1 )then
646 call rep_nrm(u,n1,n2,n3,' psinv',k)
649 if( debug_vec(3) .ge. k )then
650 call showall(u,n1,n2,n3)
656 c---------------------------------------------------------------------
657 c---------------------------------------------------------------------
659 subroutine resid( u,v,r,n1,n2,n3,a,k )
661 c---------------------------------------------------------------------
662 c---------------------------------------------------------------------
664 c---------------------------------------------------------------------
665 c resid computes the residual: r = v - Au
667 c This implementation costs 15A + 4M per result, where
668 c A and M denote the costs of Addition (or Subtraction) and
669 c Multiplication, respectively.
670 c Presuming coefficient a(1) is zero (the NPB assumes this,
671 c but it is thus not a general case), 3A + 1M may be eliminated,
672 c resulting in 12A + 3M.
673 c Note that this vectorizes, and is also fine for cache
675 c---------------------------------------------------------------------
681 double precision u(n1,n2,n3),v(n1,n2,n3),r(n1,n2,n3),a(0:3)
683 double precision u1(m), u2(m)
688 u1(i1) = u(i1,i2-1,i3) + u(i1,i2+1,i3)
689 > + u(i1,i2,i3-1) + u(i1,i2,i3+1)
690 u2(i1) = u(i1,i2-1,i3-1) + u(i1,i2+1,i3-1)
691 > + u(i1,i2-1,i3+1) + u(i1,i2+1,i3+1)
694 r(i1,i2,i3) = v(i1,i2,i3)
695 > - a(0) * u(i1,i2,i3)
696 c---------------------------------------------------------------------
697 c Assume a(1) = 0 (Enable 2 lines below if a(1) not= 0)
698 c---------------------------------------------------------------------
699 c > - a(1) * ( u(i1-1,i2,i3) + u(i1+1,i2,i3)
701 c---------------------------------------------------------------------
702 > - a(2) * ( u2(i1) + u1(i1-1) + u1(i1+1) )
703 > - a(3) * ( u2(i1-1) + u2(i1+1) )
708 c---------------------------------------------------------------------
709 c exchange boundary data
710 c---------------------------------------------------------------------
711 call comm3(r,n1,n2,n3,k)
713 if( debug_vec(0) .ge. 1 )then
714 call rep_nrm(r,n1,n2,n3,' resid',k)
717 if( debug_vec(2) .ge. k )then
718 call showall(r,n1,n2,n3)
724 c---------------------------------------------------------------------
725 c---------------------------------------------------------------------
727 subroutine rprj3( r,m1k,m2k,m3k,s,m1j,m2j,m3j,k )
729 c---------------------------------------------------------------------
730 c---------------------------------------------------------------------
732 c---------------------------------------------------------------------
733 c rprj3 projects onto the next coarser grid,
734 c using a trilinear Finite Element projection: s = r' = P r
736 c This implementation costs 20A + 4M per result, where
737 c A and M denote the costs of Addition and Multiplication.
738 c Note that this vectorizes, and is also fine for cache
740 c---------------------------------------------------------------------
746 integer m1k, m2k, m3k, m1j, m2j, m3j,k
747 double precision r(m1k,m2k,m3k), s(m1j,m2j,m3j)
748 integer j3, j2, j1, i3, i2, i1, d1, d2, d3, j
750 double precision x1(m), y1(m), x2,y2
781 x1(i1-1) = r(i1-1,i2-1,i3 ) + r(i1-1,i2+1,i3 )
782 > + r(i1-1,i2, i3-1) + r(i1-1,i2, i3+1)
783 y1(i1-1) = r(i1-1,i2-1,i3-1) + r(i1-1,i2-1,i3+1)
784 > + r(i1-1,i2+1,i3-1) + r(i1-1,i2+1,i3+1)
790 y2 = r(i1, i2-1,i3-1) + r(i1, i2-1,i3+1)
791 > + r(i1, i2+1,i3-1) + r(i1, i2+1,i3+1)
792 x2 = r(i1, i2-1,i3 ) + r(i1, i2+1,i3 )
793 > + r(i1, i2, i3-1) + r(i1, i2, i3+1)
795 > 0.5D0 * r(i1,i2,i3)
796 > + 0.25D0 * ( r(i1-1,i2,i3) + r(i1+1,i2,i3) + x2)
797 > + 0.125D0 * ( x1(i1-1) + x1(i1+1) + y2)
798 > + 0.0625D0 * ( y1(i1-1) + y1(i1+1) )
806 call comm3(s,m1j,m2j,m3j,j)
808 if( debug_vec(0) .ge. 1 )then
809 call rep_nrm(s,m1j,m2j,m3j,' rprj3',k-1)
812 if( debug_vec(4) .ge. k )then
813 call showall(s,m1j,m2j,m3j)
819 c---------------------------------------------------------------------
820 c---------------------------------------------------------------------
822 subroutine interp( z,mm1,mm2,mm3,u,n1,n2,n3,k )
824 c---------------------------------------------------------------------
825 c---------------------------------------------------------------------
827 c---------------------------------------------------------------------
828 c interp adds the trilinear interpolation of the correction
829 c from the coarser grid to the current approximation: u = u + Qu'
831 c Observe that this implementation costs 16A + 4M, where
832 c A and M denote the costs of Addition and Multiplication.
833 c Note that this vectorizes, and is also fine for cache
834 c based machines. Vector machines may get slightly better
835 c performance however, with 8 separate "do i1" loops, rather than 4.
836 c---------------------------------------------------------------------
842 integer mm1, mm2, mm3, n1, n2, n3,k
843 double precision z(mm1,mm2,mm3),u(n1,n2,n3)
844 integer i3, i2, i1, d1, d2, d3, t1, t2, t3
846 c note that m = 1037 in globals.h but for this only need to be
847 c 535 to handle up to 1024^3
850 double precision z1(m),z2(m),z3(m)
853 if( n1 .ne. 3 .and. n2 .ne. 3 .and. n3 .ne. 3 ) then
859 z1(i1) = z(i1,i2+1,i3) + z(i1,i2,i3)
860 z2(i1) = z(i1,i2,i3+1) + z(i1,i2,i3)
861 z3(i1) = z(i1,i2+1,i3+1) + z(i1,i2,i3+1) + z1(i1)
865 u(2*i1-1,2*i2-1,2*i3-1)=u(2*i1-1,2*i2-1,2*i3-1)
867 u(2*i1,2*i2-1,2*i3-1)=u(2*i1,2*i2-1,2*i3-1)
868 > +0.5d0*(z(i1+1,i2,i3)+z(i1,i2,i3))
871 u(2*i1-1,2*i2,2*i3-1)=u(2*i1-1,2*i2,2*i3-1)
873 u(2*i1,2*i2,2*i3-1)=u(2*i1,2*i2,2*i3-1)
874 > +0.25d0*( z1(i1) + z1(i1+1) )
877 u(2*i1-1,2*i2-1,2*i3)=u(2*i1-1,2*i2-1,2*i3)
879 u(2*i1,2*i2-1,2*i3)=u(2*i1,2*i2-1,2*i3)
880 > +0.25d0*( z2(i1) + z2(i1+1) )
883 u(2*i1-1,2*i2,2*i3)=u(2*i1-1,2*i2,2*i3)
885 u(2*i1,2*i2,2*i3)=u(2*i1,2*i2,2*i3)
886 > +0.125d0*( z3(i1) + z3(i1+1) )
920 u(2*i1-d1,2*i2-d2,2*i3-d3)=u(2*i1-d1,2*i2-d2,2*i3-d3)
924 u(2*i1-t1,2*i2-d2,2*i3-d3)=u(2*i1-t1,2*i2-d2,2*i3-d3)
925 > +0.5D0*(z(i1+1,i2,i3)+z(i1,i2,i3))
930 u(2*i1-d1,2*i2-t2,2*i3-d3)=u(2*i1-d1,2*i2-t2,2*i3-d3)
931 > +0.5D0*(z(i1,i2+1,i3)+z(i1,i2,i3))
934 u(2*i1-t1,2*i2-t2,2*i3-d3)=u(2*i1-t1,2*i2-t2,2*i3-d3)
935 > +0.25D0*(z(i1+1,i2+1,i3)+z(i1+1,i2,i3)
936 > +z(i1, i2+1,i3)+z(i1, i2,i3))
944 u(2*i1-d1,2*i2-d2,2*i3-t3)=u(2*i1-d1,2*i2-d2,2*i3-t3)
945 > +0.5D0*(z(i1,i2,i3+1)+z(i1,i2,i3))
948 u(2*i1-t1,2*i2-d2,2*i3-t3)=u(2*i1-t1,2*i2-d2,2*i3-t3)
949 > +0.25D0*(z(i1+1,i2,i3+1)+z(i1,i2,i3+1)
950 > +z(i1+1,i2,i3 )+z(i1,i2,i3 ))
955 u(2*i1-d1,2*i2-t2,2*i3-t3)=u(2*i1-d1,2*i2-t2,2*i3-t3)
956 > +0.25D0*(z(i1,i2+1,i3+1)+z(i1,i2,i3+1)
957 > +z(i1,i2+1,i3 )+z(i1,i2,i3 ))
960 u(2*i1-t1,2*i2-t2,2*i3-t3)=u(2*i1-t1,2*i2-t2,2*i3-t3)
961 > +0.125D0*(z(i1+1,i2+1,i3+1)+z(i1+1,i2,i3+1)
962 > +z(i1 ,i2+1,i3+1)+z(i1 ,i2,i3+1)
963 > +z(i1+1,i2+1,i3 )+z(i1+1,i2,i3 )
964 > +z(i1 ,i2+1,i3 )+z(i1 ,i2,i3 ))
971 call comm3_ex(u,n1,n2,n3,k)
973 if( debug_vec(0) .ge. 1 )then
974 call rep_nrm(z,mm1,mm2,mm3,'z: inter',k-1)
975 call rep_nrm(u,n1,n2,n3,'u: inter',k)
978 if( debug_vec(5) .ge. k )then
979 call showall(z,mm1,mm2,mm3)
980 call showall(u,n1,n2,n3)
986 c---------------------------------------------------------------------
987 c---------------------------------------------------------------------
989 subroutine norm2u3(r,n1,n2,n3,rnm2,rnmu,nx,ny,nz)
991 c---------------------------------------------------------------------
992 c---------------------------------------------------------------------
994 c---------------------------------------------------------------------
995 c norm2u3 evaluates approximations to the L2 norm and the
996 c uniform (or L-infinity or Chebyshev) norm, under the
997 c assumption that the boundaries are periodic or zero. Add the
998 c boundaries in with half weight (quarter weight on the edges
999 c and eighth weight at the corners) for inhomogeneous boundaries.
1000 c---------------------------------------------------------------------
1005 integer n1, n2, n3, nx, ny, nz
1006 double precision rnm2, rnmu, r(n1,n2,n3)
1007 double precision s, a, ss
1008 integer i3, i2, i1, ierr
1026 call mpi_allreduce(rnmu,ss,1,dp_type,
1027 > mpi_max,mpi_comm_world,ierr)
1029 call mpi_allreduce(s, ss, 1, dp_type,
1030 > mpi_sum,mpi_comm_world,ierr)
1037 c---------------------------------------------------------------------
1038 c---------------------------------------------------------------------
1040 subroutine rep_nrm(u,n1,n2,n3,title,kk)
1042 c---------------------------------------------------------------------
1043 c---------------------------------------------------------------------
1045 c---------------------------------------------------------------------
1047 c---------------------------------------------------------------------
1053 integer n1, n2, n3, kk
1054 double precision u(n1,n2,n3)
1057 double precision rnm2, rnmu
1060 call norm2u3(u,n1,n2,n3,rnm2,rnmu,nx(kk),ny(kk),nz(kk))
1061 if( me .eq. root )then
1062 write(*,7)kk,title,rnm2,rnmu
1063 7 format(' Level',i2,' in ',a8,': norms =',D21.14,D21.14)
1069 c---------------------------------------------------------------------
1070 c---------------------------------------------------------------------
1072 subroutine comm3(u,n1,n2,n3,kk)
1074 c---------------------------------------------------------------------
1075 c---------------------------------------------------------------------
1077 c---------------------------------------------------------------------
1078 c comm3 organizes the communication on all borders
1079 c---------------------------------------------------------------------
1085 integer n1, n2, n3, kk
1086 double precision u(n1,n2,n3)
1089 if( .not. dead(kk) )then
1091 if( nprocs .ne. 1) then
1093 call ready( axis, -1, kk )
1094 call ready( axis, +1, kk )
1096 call give3( axis, +1, u, n1, n2, n3, kk )
1097 call give3( axis, -1, u, n1, n2, n3, kk )
1099 call take3( axis, -1, u, n1, n2, n3 )
1100 call take3( axis, +1, u, n1, n2, n3 )
1103 call comm1p( axis, u, n1, n2, n3, kk )
1107 call zero3(u,n1,n2,n3)
1112 c---------------------------------------------------------------------
1113 c---------------------------------------------------------------------
1115 subroutine comm3_ex(u,n1,n2,n3,kk)
1117 c---------------------------------------------------------------------
1118 c---------------------------------------------------------------------
1120 c---------------------------------------------------------------------
1121 c comm3_ex communicates to expand the number of processors
1122 c---------------------------------------------------------------------
1128 integer n1, n2, n3, kk
1129 double precision u(n1,n2,n3)
1133 if( nprocs .ne. 1 ) then
1134 if( take_ex( axis, kk ) )then
1135 call ready( axis, -1, kk )
1136 call ready( axis, +1, kk )
1137 call take3_ex( axis, -1, u, n1, n2, n3 )
1138 call take3_ex( axis, +1, u, n1, n2, n3 )
1141 if( give_ex( axis, kk ) )then
1142 call give3_ex( axis, +1, u, n1, n2, n3, kk )
1143 call give3_ex( axis, -1, u, n1, n2, n3, kk )
1146 call comm1p_ex( axis, u, n1, n2, n3, kk )
1153 c---------------------------------------------------------------------
1154 c---------------------------------------------------------------------
1156 subroutine ready( axis, dir, k )
1158 c---------------------------------------------------------------------
1159 c---------------------------------------------------------------------
1161 c---------------------------------------------------------------------
1162 c ready allocates a buffer to take in a message
1163 c---------------------------------------------------------------------
1169 integer axis, dir, k
1170 integer buff_id,buff_len,i,ierr
1176 buff(i,buff_id) = 0.0D0
1180 c---------------------------------------------------------------------
1181 c fake message request type
1182 c---------------------------------------------------------------------
1183 msg_id(axis,dir,1) = msg_type(axis,dir) +1000*me
1185 call mpi_irecv( buff(1,buff_id), buff_len,
1186 > dp_type, nbr(axis,-dir,k), msg_type(axis,dir),
1187 > mpi_comm_world, msg_id(axis,dir,1), ierr)
1192 c---------------------------------------------------------------------
1193 c---------------------------------------------------------------------
1195 subroutine give3( axis, dir, u, n1, n2, n3, k )
1197 c---------------------------------------------------------------------
1198 c---------------------------------------------------------------------
1200 c---------------------------------------------------------------------
1201 c give3 sends border data out in the requested direction
1202 c---------------------------------------------------------------------
1208 integer axis, dir, n1, n2, n3, k, ierr
1209 double precision u( n1, n2, n3 )
1211 integer i3, i2, i1, buff_len,buff_id
1216 if( axis .eq. 1 )then
1217 if( dir .eq. -1 )then
1221 buff_len = buff_len + 1
1222 buff(buff_len,buff_id ) = u( 2, i2,i3)
1227 > buff(1, buff_id ), buff_len,dp_type,
1228 > nbr( axis, dir, k ), msg_type(axis,dir),
1229 > mpi_comm_world, ierr)
1231 else if( dir .eq. +1 ) then
1235 buff_len = buff_len + 1
1236 buff(buff_len, buff_id ) = u( n1-1, i2,i3)
1241 > buff(1, buff_id ), buff_len,dp_type,
1242 > nbr( axis, dir, k ), msg_type(axis,dir),
1243 > mpi_comm_world, ierr)
1248 if( axis .eq. 2 )then
1249 if( dir .eq. -1 )then
1253 buff_len = buff_len + 1
1254 buff(buff_len, buff_id ) = u( i1, 2,i3)
1259 > buff(1, buff_id ), buff_len,dp_type,
1260 > nbr( axis, dir, k ), msg_type(axis,dir),
1261 > mpi_comm_world, ierr)
1263 else if( dir .eq. +1 ) then
1267 buff_len = buff_len + 1
1268 buff(buff_len, buff_id )= u( i1,n2-1,i3)
1273 > buff(1, buff_id ), buff_len,dp_type,
1274 > nbr( axis, dir, k ), msg_type(axis,dir),
1275 > mpi_comm_world, ierr)
1280 if( axis .eq. 3 )then
1281 if( dir .eq. -1 )then
1285 buff_len = buff_len + 1
1286 buff(buff_len, buff_id ) = u( i1,i2,2)
1291 > buff(1, buff_id ), buff_len,dp_type,
1292 > nbr( axis, dir, k ), msg_type(axis,dir),
1293 > mpi_comm_world, ierr)
1295 else if( dir .eq. +1 ) then
1299 buff_len = buff_len + 1
1300 buff(buff_len, buff_id ) = u( i1,i2,n3-1)
1305 > buff(1, buff_id ), buff_len,dp_type,
1306 > nbr( axis, dir, k ), msg_type(axis,dir),
1307 > mpi_comm_world, ierr)
1315 c---------------------------------------------------------------------
1316 c---------------------------------------------------------------------
1318 subroutine take3( axis, dir, u, n1, n2, n3 )
1320 c---------------------------------------------------------------------
1321 c---------------------------------------------------------------------
1323 c---------------------------------------------------------------------
1324 c take3 copies in border data from the requested direction
1325 c---------------------------------------------------------------------
1331 integer axis, dir, n1, n2, n3
1332 double precision u( n1, n2, n3 )
1334 integer buff_id, indx
1336 integer status(mpi_status_size), ierr
1340 call mpi_wait( msg_id( axis, dir, 1 ),status,ierr)
1344 if( axis .eq. 1 )then
1345 if( dir .eq. -1 )then
1350 u(n1,i2,i3) = buff(indx, buff_id )
1354 else if( dir .eq. +1 ) then
1359 u(1,i2,i3) = buff(indx, buff_id )
1366 if( axis .eq. 2 )then
1367 if( dir .eq. -1 )then
1372 u(i1,n2,i3) = buff(indx, buff_id )
1376 else if( dir .eq. +1 ) then
1381 u(i1,1,i3) = buff(indx, buff_id )
1388 if( axis .eq. 3 )then
1389 if( dir .eq. -1 )then
1394 u(i1,i2,n3) = buff(indx, buff_id )
1398 else if( dir .eq. +1 ) then
1403 u(i1,i2,1) = buff(indx, buff_id )
1413 c---------------------------------------------------------------------
1414 c---------------------------------------------------------------------
1416 subroutine give3_ex( axis, dir, u, n1, n2, n3, k )
1418 c---------------------------------------------------------------------
1419 c---------------------------------------------------------------------
1421 c---------------------------------------------------------------------
1422 c give3_ex sends border data out to expand number of processors
1423 c---------------------------------------------------------------------
1429 integer axis, dir, n1, n2, n3, k, ierr
1430 double precision u( n1, n2, n3 )
1432 integer i3, i2, i1, buff_len, buff_id
1437 if( axis .eq. 1 )then
1438 if( dir .eq. -1 )then
1442 buff_len = buff_len + 1
1443 buff(buff_len,buff_id ) = u( 2, i2,i3)
1448 > buff(1, buff_id ), buff_len,dp_type,
1449 > nbr( axis, dir, k ), msg_type(axis,dir),
1450 > mpi_comm_world, ierr)
1452 else if( dir .eq. +1 ) then
1457 buff_len = buff_len + 1
1458 buff(buff_len,buff_id)= u(i1,i2,i3)
1464 > buff(1, buff_id ), buff_len,dp_type,
1465 > nbr( axis, dir, k ), msg_type(axis,dir),
1466 > mpi_comm_world, ierr)
1471 if( axis .eq. 2 )then
1472 if( dir .eq. -1 )then
1476 buff_len = buff_len + 1
1477 buff(buff_len, buff_id ) = u( i1, 2,i3)
1482 > buff(1, buff_id ), buff_len,dp_type,
1483 > nbr( axis, dir, k ), msg_type(axis,dir),
1484 > mpi_comm_world, ierr)
1486 else if( dir .eq. +1 ) then
1491 buff_len = buff_len + 1
1492 buff(buff_len,buff_id )= u(i1,i2,i3)
1498 > buff(1, buff_id ), buff_len,dp_type,
1499 > nbr( axis, dir, k ), msg_type(axis,dir),
1500 > mpi_comm_world, ierr)
1505 if( axis .eq. 3 )then
1506 if( dir .eq. -1 )then
1510 buff_len = buff_len + 1
1511 buff(buff_len, buff_id ) = u( i1,i2,2)
1516 > buff(1, buff_id ), buff_len,dp_type,
1517 > nbr( axis, dir, k ), msg_type(axis,dir),
1518 > mpi_comm_world, ierr)
1520 else if( dir .eq. +1 ) then
1525 buff_len = buff_len + 1
1526 buff(buff_len, buff_id ) = u( i1,i2,i3)
1532 > buff(1, buff_id ), buff_len,dp_type,
1533 > nbr( axis, dir, k ), msg_type(axis,dir),
1534 > mpi_comm_world, ierr)
1542 c---------------------------------------------------------------------
1543 c---------------------------------------------------------------------
1545 subroutine take3_ex( axis, dir, u, n1, n2, n3 )
1547 c---------------------------------------------------------------------
1548 c---------------------------------------------------------------------
1550 c---------------------------------------------------------------------
1551 c take3_ex copies in border data to expand number of processors
1552 c---------------------------------------------------------------------
1558 integer axis, dir, n1, n2, n3
1559 double precision u( n1, n2, n3 )
1561 integer buff_id, indx
1563 integer status(mpi_status_size) , ierr
1567 call mpi_wait( msg_id( axis, dir, 1 ),status,ierr)
1571 if( axis .eq. 1 )then
1572 if( dir .eq. -1 )then
1577 u(n1,i2,i3) = buff(indx, buff_id )
1581 else if( dir .eq. +1 ) then
1587 u(i1,i2,i3) = buff(indx,buff_id)
1595 if( axis .eq. 2 )then
1596 if( dir .eq. -1 )then
1601 u(i1,n2,i3) = buff(indx, buff_id )
1605 else if( dir .eq. +1 ) then
1611 u(i1,i2,i3) = buff(indx,buff_id)
1619 if( axis .eq. 3 )then
1620 if( dir .eq. -1 )then
1625 u(i1,i2,n3) = buff(indx, buff_id )
1629 else if( dir .eq. +1 ) then
1635 u(i1,i2,i3) = buff(indx,buff_id)
1646 c---------------------------------------------------------------------
1647 c---------------------------------------------------------------------
1649 subroutine comm1p( axis, u, n1, n2, n3, kk )
1651 c---------------------------------------------------------------------
1652 c---------------------------------------------------------------------
1659 integer axis, dir, n1, n2, n3
1660 double precision u( n1, n2, n3 )
1662 integer i3, i2, i1, buff_len,buff_id
1671 buff(i,buff_id) = 0.0D0
1681 buff(i,buff_id) = 0.0D0
1689 if( axis .eq. 1 )then
1692 buff_len = buff_len + 1
1693 buff(buff_len, buff_id ) = u( n1-1, i2,i3)
1698 if( axis .eq. 2 )then
1701 buff_len = buff_len + 1
1702 buff(buff_len, buff_id )= u( i1,n2-1,i3)
1707 if( axis .eq. 3 )then
1710 buff_len = buff_len + 1
1711 buff(buff_len, buff_id ) = u( i1,i2,n3-1)
1721 if( axis .eq. 1 )then
1724 buff_len = buff_len + 1
1725 buff(buff_len,buff_id ) = u( 2, i2,i3)
1730 if( axis .eq. 2 )then
1733 buff_len = buff_len + 1
1734 buff(buff_len, buff_id ) = u( i1, 2,i3)
1739 if( axis .eq. 3 )then
1742 buff_len = buff_len + 1
1743 buff(buff_len, buff_id ) = u( i1,i2,2)
1749 buff(i,4) = buff(i,3)
1750 buff(i,2) = buff(i,1)
1758 if( axis .eq. 1 )then
1762 u(n1,i2,i3) = buff(indx, buff_id )
1767 if( axis .eq. 2 )then
1771 u(i1,n2,i3) = buff(indx, buff_id )
1776 if( axis .eq. 3 )then
1780 u(i1,i2,n3) = buff(indx, buff_id )
1791 if( axis .eq. 1 )then
1795 u(1,i2,i3) = buff(indx, buff_id )
1800 if( axis .eq. 2 )then
1804 u(i1,1,i3) = buff(indx, buff_id )
1809 if( axis .eq. 3 )then
1813 u(i1,i2,1) = buff(indx, buff_id )
1821 c---------------------------------------------------------------------
1822 c---------------------------------------------------------------------
1824 subroutine comm1p_ex( axis, u, n1, n2, n3, kk )
1826 c---------------------------------------------------------------------
1827 c---------------------------------------------------------------------
1834 integer axis, dir, n1, n2, n3
1835 double precision u( n1, n2, n3 )
1837 integer i3, i2, i1, buff_len,buff_id
1840 if( take_ex( axis, kk ) ) then
1848 buff(i,buff_id) = 0.0D0
1858 buff(i,buff_id) = 0.0D0
1867 if( axis .eq. 1 )then
1871 u(n1,i2,i3) = buff(indx, buff_id )
1876 if( axis .eq. 2 )then
1880 u(i1,n2,i3) = buff(indx, buff_id )
1885 if( axis .eq. 3 )then
1889 u(i1,i2,n3) = buff(indx, buff_id )
1899 if( axis .eq. 1 )then
1904 u(i1,i2,i3) = buff(indx,buff_id)
1910 if( axis .eq. 2 )then
1915 u(i1,i2,i3) = buff(indx,buff_id)
1921 if( axis .eq. 3 )then
1926 u(i1,i2,i3) = buff(indx,buff_id)
1934 if( give_ex( axis, kk ) )then
1941 if( axis .eq. 1 )then
1945 buff_len = buff_len + 1
1946 buff(buff_len,buff_id)= u(i1,i2,i3)
1952 if( axis .eq. 2 )then
1956 buff_len = buff_len + 1
1957 buff(buff_len,buff_id )= u(i1,i2,i3)
1963 if( axis .eq. 3 )then
1967 buff_len = buff_len + 1
1968 buff(buff_len, buff_id ) = u( i1,i2,i3)
1979 if( axis .eq. 1 )then
1982 buff_len = buff_len + 1
1983 buff(buff_len,buff_id ) = u( 2, i2,i3)
1988 if( axis .eq. 2 )then
1991 buff_len = buff_len + 1
1992 buff(buff_len, buff_id ) = u( i1, 2,i3)
1997 if( axis .eq. 3 )then
2000 buff_len = buff_len + 1
2001 buff(buff_len, buff_id ) = u( i1,i2,2)
2009 buff(i,4) = buff(i,3)
2010 buff(i,2) = buff(i,1)
2016 c---------------------------------------------------------------------
2017 c---------------------------------------------------------------------
2019 subroutine zran3(z,n1,n2,n3,nx,ny,k)
2021 c---------------------------------------------------------------------
2022 c---------------------------------------------------------------------
2024 c---------------------------------------------------------------------
2025 c zran3 loads +1 at ten randomly chosen points,
2026 c loads -1 at a different ten random points,
2027 c and zero elsewhere.
2028 c---------------------------------------------------------------------
2033 integer is1, is2, is3, ie1, ie2, ie3
2034 common /grid/ is1,is2,is3,ie1,ie2,ie3
2036 integer n1, n2, n3, k, nx, ny, ierr, i0, m0, m1
2037 double precision z(n1,n2,n3)
2039 integer mm, i1, i2, i3, d1, e1, e2, e3
2040 double precision x, a
2041 double precision xx, x0, x1, a1, a2, ai, power
2042 parameter( mm = 10, a = 5.D0 ** 13, x = 314159265.D0)
2043 double precision ten( mm, 0:1 ), temp, best
2044 integer i, j1( mm, 0:1 ), j2( mm, 0:1 ), j3( mm, 0:1 )
2045 integer jg( 0:3, mm, 0:1 ), jg_temp(4)
2048 double precision randlc, rdummy
2050 a1 = power( a, nx, 1, 0 )
2051 a2 = power( a, nx, ny, 0 )
2053 call zero3(z,n1,n2,n3)
2055 c i = is1-2+nx*(is2-2+ny*(is3-2))
2057 ai = power( a, nx, is2-2+ny*(is3-2), is1-2 )
2063 rdummy = randlc( x0, ai )
2068 call vranlc( d1, xx, a, z( 2, i2, i3 ))
2069 rdummy = randlc( x1, a1 )
2071 rdummy = randlc( x0, a2 )
2074 c---------------------------------------------------------------------
2075 c call comm3(z,n1,n2,n3)
2076 c call showall(z,n1,n2,n3)
2077 c---------------------------------------------------------------------
2079 c---------------------------------------------------------------------
2080 c each processor looks for twenty candidates
2081 c---------------------------------------------------------------------
2096 if( z(i1,i2,i3) .gt. ten( 1, 1 ) )then
2097 ten(1,1) = z(i1,i2,i3)
2101 call bubble( ten, j1, j2, j3, mm, 1 )
2103 if( z(i1,i2,i3) .lt. ten( 1, 0 ) )then
2104 ten(1,0) = z(i1,i2,i3)
2108 call bubble( ten, j1, j2, j3, mm, 0 )
2114 call mpi_barrier(mpi_comm_world,ierr)
2116 c---------------------------------------------------------------------
2117 c Now which of these are globally best?
2118 c---------------------------------------------------------------------
2123 best = z( j1(i1,1), j2(i1,1), j3(i1,1) )
2124 call mpi_allreduce(best,temp,1,dp_type,
2125 > mpi_max,mpi_comm_world,ierr)
2127 if(best.eq.z(j1(i1,1),j2(i1,1),j3(i1,1)))then
2129 jg( 1, i, 1) = is1 - 2 + j1( i1, 1 )
2130 jg( 2, i, 1) = is2 - 2 + j2( i1, 1 )
2131 jg( 3, i, 1) = is3 - 2 + j3( i1, 1 )
2140 call mpi_allreduce(jg(0,i,1), jg_temp,4,MPI_INTEGER,
2141 > mpi_max,mpi_comm_world,ierr)
2142 jg( 0, i, 1) = jg_temp(1)
2143 jg( 1, i, 1) = jg_temp(2)
2144 jg( 2, i, 1) = jg_temp(3)
2145 jg( 3, i, 1) = jg_temp(4)
2147 best = z( j1(i0,0), j2(i0,0), j3(i0,0) )
2148 call mpi_allreduce(best,temp,1,dp_type,
2149 > mpi_min,mpi_comm_world,ierr)
2151 if(best.eq.z(j1(i0,0),j2(i0,0),j3(i0,0)))then
2153 jg( 1, i, 0) = is1 - 2 + j1( i0, 0 )
2154 jg( 2, i, 0) = is2 - 2 + j2( i0, 0 )
2155 jg( 3, i, 0) = is3 - 2 + j3( i0, 0 )
2164 call mpi_allreduce(jg(0,i,0), jg_temp,4,MPI_INTEGER,
2165 > mpi_max,mpi_comm_world,ierr)
2166 jg( 0, i, 0) = jg_temp(1)
2167 jg( 1, i, 0) = jg_temp(2)
2168 jg( 2, i, 0) = jg_temp(3)
2169 jg( 3, i, 0) = jg_temp(4)
2175 c if( me .eq. root) then
2177 c write(*,*)' negative charges at'
2178 c write(*,9)(jg(1,i,0),jg(2,i,0),jg(3,i,0),i=1,mm)
2179 c write(*,*)' positive charges at'
2180 c write(*,9)(jg(1,i,1),jg(2,i,1),jg(3,i,1),i=1,mm)
2181 c write(*,*)' small random numbers were'
2182 c write(*,8)(ten( i,0),i=mm,1,-1)
2183 c write(*,*)' and they were found on processor number'
2184 c write(*,7)(jg(0,i,0),i=mm,1,-1)
2185 c write(*,*)' large random numbers were'
2186 c write(*,8)(ten( i,1),i=mm,1,-1)
2187 c write(*,*)' and they were found on processor number'
2188 c write(*,7)(jg(0,i,1),i=mm,1,-1)
2190 c 9 format(5(' (',i3,2(',',i3),')'))
2193 call mpi_barrier(mpi_comm_world,ierr)
2202 z( j1(i,0), j2(i,0), j3(i,0) ) = -1.0D0
2205 z( j1(i,1), j2(i,1), j3(i,1) ) = +1.0D0
2207 call comm3(z,n1,n2,n3,k)
2209 c---------------------------------------------------------------------
2210 c call showall(z,n1,n2,n3)
2211 c---------------------------------------------------------------------
2216 c---------------------------------------------------------------------
2217 c---------------------------------------------------------------------
2219 subroutine show_l(z,n1,n2,n3)
2221 c---------------------------------------------------------------------
2222 c---------------------------------------------------------------------
2228 integer n1,n2,n3,i1,i2,i3,ierr
2229 double precision z(n1,n2,n3)
2230 integer m1, m2, m3,i
2239 write(*,*)' id = ', me
2242 write(*,6)(z(i1,i2,i3),i2=1,m2)
2244 write(*,*)' - - - - - - - '
2249 call mpi_barrier(mpi_comm_world,ierr)
2255 c---------------------------------------------------------------------
2256 c---------------------------------------------------------------------
2258 subroutine showall(z,n1,n2,n3)
2260 c---------------------------------------------------------------------
2261 c---------------------------------------------------------------------
2267 integer n1,n2,n3,i1,i2,i3,i,ierr
2268 double precision z(n1,n2,n3)
2278 write(*,*)' id = ', me
2281 write(*,6)(z(i1,i2,i3),i2=1,m2)
2283 write(*,*)' - - - - - - - '
2288 call mpi_barrier(mpi_comm_world,ierr)
2294 c---------------------------------------------------------------------
2295 c---------------------------------------------------------------------
2297 subroutine show(z,n1,n2,n3)
2299 c---------------------------------------------------------------------
2300 c---------------------------------------------------------------------
2305 integer n1,n2,n3,i1,i2,i3,ierr,i
2306 double precision z(n1,n2,n3)
2311 write(*,*)' id = ', me
2314 write(*,6)(z(i1,i2,i3),i2=2,n1-1)
2316 write(*,*)' - - - - - - - '
2321 call mpi_barrier(mpi_comm_world,ierr)
2324 c call comm3(z,n1,n2,n3)
2329 c---------------------------------------------------------------------
2330 c---------------------------------------------------------------------
2332 double precision function power( a, n1, n2, n3 )
2334 c---------------------------------------------------------------------
2335 c---------------------------------------------------------------------
2337 c---------------------------------------------------------------------
2338 c power raises an integer, disguised as a double
2339 c precision real, to an integer power.
2340 c This version tries to avoid integer overflow by treating
2341 c it as expressed in a form of "n1*n2+n3".
2342 c---------------------------------------------------------------------
2345 double precision a, aj
2348 integer n1j, n2j, nj
2350 double precision randlc, rdummy
2359 if( n2j .gt. 0 ) then
2360 if( mod(n2j,2) .eq. 1 ) nj = nj + n1j
2362 else if( nj .eq. 0 ) then
2365 if( mod(nj,2) .eq. 1 ) rdummy = randlc( power, aj )
2366 rdummy = randlc( aj, aj )
2374 c---------------------------------------------------------------------
2375 c---------------------------------------------------------------------
2377 subroutine bubble( ten, j1, j2, j3, m, ind )
2379 c---------------------------------------------------------------------
2380 c---------------------------------------------------------------------
2382 c---------------------------------------------------------------------
2383 c bubble does a bubble sort in direction dir
2384 c---------------------------------------------------------------------
2389 integer m, ind, j1( m, 0:1 ), j2( m, 0:1 ), j3( m, 0:1 )
2390 double precision ten( m, 0:1 )
2391 double precision temp
2394 if( ind .eq. 1 )then
2397 if( ten(i,ind) .gt. ten(i+1,ind) )then
2399 temp = ten( i+1, ind )
2400 ten( i+1, ind ) = ten( i, ind )
2401 ten( i, ind ) = temp
2403 j_temp = j1( i+1, ind )
2404 j1( i+1, ind ) = j1( i, ind )
2405 j1( i, ind ) = j_temp
2407 j_temp = j2( i+1, ind )
2408 j2( i+1, ind ) = j2( i, ind )
2409 j2( i, ind ) = j_temp
2411 j_temp = j3( i+1, ind )
2412 j3( i+1, ind ) = j3( i, ind )
2413 j3( i, ind ) = j_temp
2423 if( ten(i,ind) .lt. ten(i+1,ind) )then
2425 temp = ten( i+1, ind )
2426 ten( i+1, ind ) = ten( i, ind )
2427 ten( i, ind ) = temp
2429 j_temp = j1( i+1, ind )
2430 j1( i+1, ind ) = j1( i, ind )
2431 j1( i, ind ) = j_temp
2433 j_temp = j2( i+1, ind )
2434 j2( i+1, ind ) = j2( i, ind )
2435 j2( i, ind ) = j_temp
2437 j_temp = j3( i+1, ind )
2438 j3( i+1, ind ) = j3( i, ind )
2439 j3( i, ind ) = j_temp
2451 c---------------------------------------------------------------------
2452 c---------------------------------------------------------------------
2454 subroutine zero3(z,n1,n2,n3)
2456 c---------------------------------------------------------------------
2457 c---------------------------------------------------------------------
2464 double precision z(n1,n2,n3)
2479 c----- end of program ------------------------------------------------