From 594e990878254062bcbb022525407d24d2591653 Mon Sep 17 00:00:00 2001 From: Frederic Suter Date: Mon, 1 Feb 2016 12:55:13 +0100 Subject: [PATCH] further cleaning in NAS + fortran files are not used (and not smpified) + Don't need a MPI_dummy implementation, we have SMPI + have DT and DT-folding in the same dir to avoid replication of DGraph.* --- examples/smpi/NAS/DT-folding/DGraph.c | 184 ---------- examples/smpi/NAS/DT-folding/DGraph.h | 43 --- examples/smpi/NAS/DT-folding/Makefile | 26 -- examples/smpi/NAS/DT-folding/README | 22 -- examples/smpi/NAS/DT/Makefile | 12 +- .../NAS/{DT-folding/dt.c => DT/dt-folding.c} | 0 examples/smpi/NAS/EP/Makefile | 6 - examples/smpi/NAS/EP/ep.f | 316 ------------------ examples/smpi/NAS/MPI_dummy/Makefile | 38 --- examples/smpi/NAS/MPI_dummy/README | 52 --- examples/smpi/NAS/MPI_dummy/mpi.h | 112 ------- examples/smpi/NAS/MPI_dummy/mpi_dummy.c | 265 --------------- examples/smpi/NAS/MPI_dummy/mpi_dummy.f | 309 ----------------- examples/smpi/NAS/MPI_dummy/mpif.h | 27 -- examples/smpi/NAS/MPI_dummy/test.f | 10 - examples/smpi/NAS/MPI_dummy/wtime.c | 13 - examples/smpi/NAS/MPI_dummy/wtime.f | 12 - examples/smpi/NAS/MPI_dummy/wtime.h | 12 - examples/smpi/NAS/MPI_dummy/wtime_sgi64.c | 74 ---- examples/smpi/NAS/Makefile | 25 +- examples/smpi/NAS/common/print_results.f | 115 ------- examples/smpi/NAS/common/randdp.f | 137 -------- examples/smpi/NAS/common/randdpvec.f | 186 ----------- examples/smpi/NAS/common/randi8.f | 79 ----- examples/smpi/NAS/common/randi8_safe.f | 64 ---- examples/smpi/NAS/common/timers.f | 78 ----- examples/smpi/NAS/config/make.dummy | 7 - 27 files changed, 11 insertions(+), 2213 deletions(-) delete mode 100644 examples/smpi/NAS/DT-folding/DGraph.c delete mode 100644 examples/smpi/NAS/DT-folding/DGraph.h delete mode 100644 examples/smpi/NAS/DT-folding/Makefile delete mode 100644 examples/smpi/NAS/DT-folding/README rename examples/smpi/NAS/{DT-folding/dt.c => DT/dt-folding.c} (100%) delete mode 100644 examples/smpi/NAS/EP/ep.f delete mode 100644 examples/smpi/NAS/MPI_dummy/Makefile delete mode 100644 examples/smpi/NAS/MPI_dummy/README delete mode 100644 examples/smpi/NAS/MPI_dummy/mpi.h delete mode 100644 examples/smpi/NAS/MPI_dummy/mpi_dummy.c delete mode 100644 examples/smpi/NAS/MPI_dummy/mpi_dummy.f delete mode 100644 examples/smpi/NAS/MPI_dummy/mpif.h delete mode 100644 examples/smpi/NAS/MPI_dummy/test.f delete mode 100644 examples/smpi/NAS/MPI_dummy/wtime.c delete mode 100644 examples/smpi/NAS/MPI_dummy/wtime.f delete mode 100644 examples/smpi/NAS/MPI_dummy/wtime.h delete mode 100644 examples/smpi/NAS/MPI_dummy/wtime_sgi64.c delete mode 100644 examples/smpi/NAS/common/print_results.f delete mode 100644 examples/smpi/NAS/common/randdp.f delete mode 100644 examples/smpi/NAS/common/randdpvec.f delete mode 100644 examples/smpi/NAS/common/randi8.f delete mode 100644 examples/smpi/NAS/common/randi8_safe.f delete mode 100644 examples/smpi/NAS/common/timers.f delete mode 100644 examples/smpi/NAS/config/make.dummy diff --git a/examples/smpi/NAS/DT-folding/DGraph.c b/examples/smpi/NAS/DT-folding/DGraph.c deleted file mode 100644 index f573786db8..0000000000 --- a/examples/smpi/NAS/DT-folding/DGraph.c +++ /dev/null @@ -1,184 +0,0 @@ -#include -#include -#include - -#include "DGraph.h" - -DGArc *newArc(DGNode *tl,DGNode *hd){ - DGArc *ar=(DGArc *)malloc(sizeof(DGArc)); - ar->tail=tl; - ar->head=hd; - return ar; -} -void arcShow(DGArc *ar){ - DGNode *tl=(DGNode *)ar->tail, - *hd=(DGNode *)ar->head; - fprintf(stderr,"%d. |%s ->%s\n",ar->id,tl->name,hd->name); -} - -DGNode *newNode(char *nm){ - DGNode *nd=(DGNode *)malloc(sizeof(DGNode)); - nd->attribute=0; - nd->color=0; - nd->inDegree=0; - nd->outDegree=0; - nd->maxInDegree=SMALL_BLOCK_SIZE; - nd->maxOutDegree=SMALL_BLOCK_SIZE; - nd->inArc=(DGArc **)malloc(nd->maxInDegree*sizeof(DGArc*)); - nd->outArc=(DGArc **)malloc(nd->maxOutDegree*sizeof(DGArc*)); - nd->name=strdup(nm); - nd->feat=NULL; - return nd; -} -void nodeShow(DGNode* nd){ - fprintf( stderr,"%3d.%s: (%d,%d)\n", - nd->id,nd->name,nd->inDegree,nd->outDegree); -/* - if(nd->verified==1) fprintf(stderr,"%ld.%s\t: usable.",nd->id,nd->name); - else if(nd->verified==0) fprintf(stderr,"%ld.%s\t: unusable.",nd->id,nd->name); - else fprintf(stderr,"%ld.%s\t: notverified.",nd->id,nd->name); -*/ -} - -DGraph* newDGraph(char* nm){ - DGraph *dg=(DGraph *)malloc(sizeof(DGraph)); - dg->numNodes=0; - dg->numArcs=0; - dg->maxNodes=BLOCK_SIZE; - dg->maxArcs=BLOCK_SIZE; - dg->node=(DGNode **)malloc(dg->maxNodes*sizeof(DGNode*)); - dg->arc=(DGArc **)malloc(dg->maxArcs*sizeof(DGArc*)); - dg->name=strdup(nm); - return dg; -} -int AttachNode(DGraph* dg, DGNode* nd) { - int i=0,j,len=0; - DGNode **nds =NULL, *tmpnd=NULL; - DGArc **ar=NULL; - - if (dg->numNodes == dg->maxNodes-1 ) { - dg->maxNodes += BLOCK_SIZE; - nds =(DGNode **) calloc(dg->maxNodes,sizeof(DGNode*)); - memcpy(nds,dg->node,(dg->maxNodes-BLOCK_SIZE)*sizeof(DGNode*)); - free(dg->node); - dg->node=nds; - } - - len = strlen( nd->name); - for (i = 0; i < dg->numNodes; i++) { - tmpnd =dg->node[ i]; - ar=NULL; - if ( strlen( tmpnd->name) != len ) continue; - if ( strncmp( nd->name, tmpnd->name, len) ) continue; - if ( nd->inDegree > 0 ) { - tmpnd->maxInDegree += nd->maxInDegree; - ar =(DGArc **) calloc(tmpnd->maxInDegree,sizeof(DGArc*)); - memcpy(ar,tmpnd->inArc,(tmpnd->inDegree)*sizeof(DGArc*)); - free(tmpnd->inArc); - tmpnd->inArc=ar; - for (j = 0; j < nd->inDegree; j++ ) { - nd->inArc[ j]->head = tmpnd; - } - memcpy( &(tmpnd->inArc[ tmpnd->inDegree]), nd->inArc, nd->inDegree*sizeof( DGArc *)); - tmpnd->inDegree += nd->inDegree; - } - if ( nd->outDegree > 0 ) { - tmpnd->maxOutDegree += nd->maxOutDegree; - ar =(DGArc **) calloc(tmpnd->maxOutDegree,sizeof(DGArc*)); - memcpy(ar,tmpnd->outArc,(tmpnd->outDegree)*sizeof(DGArc*)); - free(tmpnd->outArc); - tmpnd->outArc=ar; - for (j = 0; j < nd->outDegree; j++ ) { - nd->outArc[ j]->tail = tmpnd; - } - memcpy( &(tmpnd->outArc[tmpnd->outDegree]),nd->outArc,nd->outDegree*sizeof( DGArc *)); - tmpnd->outDegree += nd->outDegree; - } - free(nd); - return i; - } - nd->id = dg->numNodes; - dg->node[dg->numNodes] = nd; - dg->numNodes++; -return nd->id; -} -int AttachArc(DGraph *dg,DGArc* nar){ -int arcId = -1; -int i=0,newNumber=0; -DGNode *head = nar->head, - *tail = nar->tail; -DGArc **ars=NULL,*probe=NULL; -/*fprintf(stderr,"AttachArc %ld\n",dg->numArcs); */ - if ( !tail || !head ) return arcId; - if ( dg->numArcs == dg->maxArcs-1 ) { - dg->maxArcs += BLOCK_SIZE; - ars =(DGArc **) calloc(dg->maxArcs,sizeof(DGArc*)); - memcpy(ars,dg->arc,(dg->maxArcs-BLOCK_SIZE)*sizeof(DGArc*)); - free(dg->arc); - dg->arc=ars; - } - for(i = 0; i < tail->outDegree; i++ ) { /* parallel arc */ - probe = tail->outArc[ i]; - if(probe->head == head - && - probe->length == nar->length - ){ - free(nar); - return probe->id; - } - } - - nar->id = dg->numArcs; - arcId=dg->numArcs; - dg->arc[dg->numArcs] = nar; - dg->numArcs++; - - head->inArc[ head->inDegree] = nar; - head->inDegree++; - if ( head->inDegree >= head->maxInDegree ) { - newNumber = head->maxInDegree + SMALL_BLOCK_SIZE; - ars =(DGArc **) calloc(newNumber,sizeof(DGArc*)); - memcpy(ars,head->inArc,(head->inDegree)*sizeof(DGArc*)); - free(head->inArc); - head->inArc=ars; - head->maxInDegree = newNumber; - } - tail->outArc[ tail->outDegree] = nar; - tail->outDegree++; - if(tail->outDegree >= tail->maxOutDegree ) { - newNumber = tail->maxOutDegree + SMALL_BLOCK_SIZE; - ars =(DGArc **) calloc(newNumber,sizeof(DGArc*)); - memcpy(ars,tail->outArc,(tail->outDegree)*sizeof(DGArc*)); - free(tail->outArc); - tail->outArc=ars; - tail->maxOutDegree = newNumber; - } -/*fprintf(stderr,"AttachArc: head->in=%d tail->out=%ld\n",head->inDegree,tail->outDegree);*/ -return arcId; -} -void graphShow(DGraph *dg,int DetailsLevel){ - int i=0,j=0; - fprintf(stderr,"%d.%s: (%d,%d)\n",dg->id,dg->name,dg->numNodes,dg->numArcs); - if ( DetailsLevel < 1) return; - for (i = 0; i < dg->numNodes; i++ ) { - DGNode *focusNode = dg->node[ i]; - if(DetailsLevel >= 2) { - for (j = 0; j < focusNode->inDegree; j++ ) { - fprintf(stderr,"\t "); - nodeShow(focusNode->inArc[ j]->tail); - } - } - nodeShow(focusNode); - if ( DetailsLevel < 2) continue; - for (j = 0; j < focusNode->outDegree; j++ ) { - fprintf(stderr, "\t "); - nodeShow(focusNode->outArc[ j]->head); - } - fprintf(stderr, "---\n"); - } - fprintf(stderr,"----------------------------------------\n"); - if ( DetailsLevel < 3) return; -} - - - diff --git a/examples/smpi/NAS/DT-folding/DGraph.h b/examples/smpi/NAS/DT-folding/DGraph.h deleted file mode 100644 index f38f898b24..0000000000 --- a/examples/smpi/NAS/DT-folding/DGraph.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef _DGRAPH -#define _DGRAPH - -#define BLOCK_SIZE 128 -#define SMALL_BLOCK_SIZE 32 - -typedef struct{ - int id; - void *tail,*head; - int length,width,attribute,maxWidth; -}DGArc; - -typedef struct{ - int maxInDegree,maxOutDegree; - int inDegree,outDegree; - int id; - char *name; - DGArc **inArc,**outArc; - int depth,height,width; - int color,attribute,address,verified; - void *feat; -}DGNode; - -typedef struct{ - int maxNodes,maxArcs; - int id; - char *name; - int numNodes,numArcs; - DGNode **node; - DGArc **arc; -} DGraph; - -DGArc *newArc(DGNode *tl,DGNode *hd); -void arcShow(DGArc *ar); -DGNode *newNode(char *nm); -void nodeShow(DGNode* nd); - -DGraph* newDGraph(char *nm); -int AttachNode(DGraph *dg,DGNode *nd); -int AttachArc(DGraph *dg,DGArc* nar); -void graphShow(DGraph *dg,int DetailsLevel); - -#endif diff --git a/examples/smpi/NAS/DT-folding/Makefile b/examples/smpi/NAS/DT-folding/Makefile deleted file mode 100644 index d1b3702916..0000000000 --- a/examples/smpi/NAS/DT-folding/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -SHELL=/bin/sh -BENCHMARK=dt -BENCHMARKU=DT - -include ../config/make.def - -include ../sys/make.common -#Override PROGRAM -DTPROGRAM = $(BINDIR)/$(BENCHMARK)-folding.$(CLASS) - -OBJS = dt.o DGraph.o \ - ${COMMON}/c_print_results.o ${COMMON}/c_timers.o ${COMMON}/c_randdp.o - - -${PROGRAM}: config ${OBJS} - ${CLINK} ${CLINKFLAGS} -o ${DTPROGRAM} ${OBJS} ${CMPI_LIB} - -.c.o: - ${CCOMPILE} $< - -dt.o: dt.c npbparams.h -DGraph.o: DGraph.c DGraph.h - -clean: - - rm -f *.o *~ mputil* - - rm -f dt npbparams.h core diff --git a/examples/smpi/NAS/DT-folding/README b/examples/smpi/NAS/DT-folding/README deleted file mode 100644 index 873e3ae6f2..0000000000 --- a/examples/smpi/NAS/DT-folding/README +++ /dev/null @@ -1,22 +0,0 @@ -Data Traffic benchmark DT is new in the NPB suite -(released as part of NPB3.x-MPI package). ----------------------------------------------------- - -DT is written in C and same executable can run on any number of processors, -provided this number is not less than the number of nodes in the communication -graph. DT benchmark takes one argument: BH, WH, or SH. This argument -specifies the communication graph Black Hole, White Hole, or SHuffle -respectively. The current release contains verification numbers for -CLASSES S, W, A, and B only. Classes C and D are defined, but verification -numbers are not provided in this release. - -The following table summarizes the number of nodes in the communication -graph based on CLASS and graph TYPE. - -CLASS N_Source N_Nodes(BH,WH) N_Nodes(SH) - S 4 5 12 - W 8 11 32 - A 16 21 80 - B 32 43 192 - C 64 85 448 - D 128 171 1024 diff --git a/examples/smpi/NAS/DT/Makefile b/examples/smpi/NAS/DT/Makefile index 28d9502fe2..f21a8a64e4 100644 --- a/examples/smpi/NAS/DT/Makefile +++ b/examples/smpi/NAS/DT/Makefile @@ -1,6 +1,5 @@ SHELL=/bin/sh BENCHMARK=dt -BENCHMARKU=DT include ../config/make.def @@ -8,19 +7,24 @@ include ../sys/make.common #Override PROGRAM DTPROGRAM = $(BINDIR)/$(BENCHMARK).$(CLASS) -OBJS = dt.o DGraph.o \ +OBJS = dt.o DGraph.o \ + ${COMMON}/c_print_results.o ${COMMON}/c_timers.o ${COMMON}/c_randdp.o + +OBJS-F = dt-folding.o DGraph.o \ ${COMMON}/c_print_results.o ${COMMON}/c_timers.o ${COMMON}/c_randdp.o -${PROGRAM}: config ${OBJS} +${PROGRAM}: config ${OBJS} ${OBJS-F} ${CLINK} ${CLINKFLAGS} -o ${DTPROGRAM} ${OBJS} ${CMPI_LIB} + ${CLINK} ${CLINKFLAGS} -o ${DTPROGRAM}-folding ${OBJS-F} ${CMPI_LIB} .c.o: ${CCOMPILE} $< dt.o: dt.c npbparams.h +dt-folding.o: dt-folding.c npbparams.h DGraph.o: DGraph.c DGraph.h clean: - rm -f *.o *~ mputil* - - rm -f dt npbparams.h core + - rm -f dt dt-folding npbparams.h diff --git a/examples/smpi/NAS/DT-folding/dt.c b/examples/smpi/NAS/DT/dt-folding.c similarity index 100% rename from examples/smpi/NAS/DT-folding/dt.c rename to examples/smpi/NAS/DT/dt-folding.c diff --git a/examples/smpi/NAS/EP/Makefile b/examples/smpi/NAS/EP/Makefile index 2014329288..217f57df69 100644 --- a/examples/smpi/NAS/EP/Makefile +++ b/examples/smpi/NAS/EP/Makefile @@ -4,19 +4,13 @@ BENCHMARKU=EP include ../config/make.def -#OBJS = ep.o ${COMMON}/print_results.o ${COMMON}/${RAND}.o ${COMMON}/timers.o OBJS = ep.o randlc.o include ../sys/make.common ${PROGRAM}: config ${OBJS} -# ${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${FMPI_LIB} ${CLINK} ${CLINKFLAGS} -o ${PROGRAM} ${OBJS} ${CMPI_LIB} - -#ep.o: ep.f mpinpb.h npbparams.h -# ${FCOMPILE} ep.f - ep.o: ep.c randlc.c mpinpb.h npbparams.h ${CCOMPILE} ep.c diff --git a/examples/smpi/NAS/EP/ep.f b/examples/smpi/NAS/EP/ep.f deleted file mode 100644 index ca7cc24620..0000000000 --- a/examples/smpi/NAS/EP/ep.f +++ /dev/null @@ -1,316 +0,0 @@ -!-------------------------------------------------------------------------! -! ! -! N A S P A R A L L E L B E N C H M A R K S 3.3 ! -! ! -! E P ! -! ! -!-------------------------------------------------------------------------! -! ! -! This benchmark is part of the NAS Parallel Benchmark 3.3 suite. ! -! It is described in NAS Technical Reports 95-020 and 02-007 ! -! ! -! Permission to use, copy, distribute and modify this software ! -! for any purpose with or without fee is hereby granted. We ! -! request, however, that all derived work reference the NAS ! -! Parallel Benchmarks 3.3. This software is provided "as is" ! -! without express or implied warranty. ! -! ! -! Information on NPB 3.3, including the technical report, the ! -! original specifications, source code, results and information ! -! on how to submit new results, is available at: ! -! ! -! http://www.nas.nasa.gov/Software/NPB/ ! -! ! -! Send comments or suggestions to npb@nas.nasa.gov ! -! ! -! NAS Parallel Benchmarks Group ! -! NASA Ames Research Center ! -! Mail Stop: T27A-1 ! -! Moffett Field, CA 94035-1000 ! -! ! -! E-mail: npb@nas.nasa.gov ! -! Fax: (650) 604-3957 ! -! ! -!-------------------------------------------------------------------------! - - -c--------------------------------------------------------------------- -c -c Authors: P. O. Frederickson -c D. H. Bailey -c A. C. Woo -c R. F. Van der Wijngaart -c--------------------------------------------------------------------- - -c--------------------------------------------------------------------- - program EMBAR -c--------------------------------------------------------------------- -C -c This is the MPI version of the APP Benchmark 1, -c the "embarassingly parallel" benchmark. -c -c -c M is the Log_2 of the number of complex pairs of uniform (0, 1) random -c numbers. MK is the Log_2 of the size of each batch of uniform random -c numbers. MK can be set for convenience on a given system, since it does -c not affect the results. - - implicit none - - include 'npbparams.h' - include 'mpinpb.h' - - double precision Mops, epsilon, a, s, t1, t2, t3, t4, x, x1, - > x2, q, sx, sy, tm, an, tt, gc, dum(3), - > timer_read - double precision sx_verify_value, sy_verify_value, sx_err, sy_err - integer mk, mm, nn, nk, nq, np, ierr, node, no_nodes, - > i, ik, kk, l, k, nit, ierrcode, no_large_nodes, - > np_add, k_offset, j - logical verified, timers_enabled - parameter (timers_enabled = .false.) - external randlc, timer_read - double precision randlc, qq - character*15 size - - parameter (mk = 16, mm = m - mk, nn = 2 ** mm, - > nk = 2 ** mk, nq = 10, epsilon=1.d-8, - > a = 1220703125.d0, s = 271828183.d0) - - common/storage/ x(2*nk), q(0:nq-1), qq(10000) - data dum /1.d0, 1.d0, 1.d0/ - - call mpi_init(ierr) - call mpi_comm_rank(MPI_COMM_WORLD,node,ierr) - call mpi_comm_size(MPI_COMM_WORLD,no_nodes,ierr) - - root = 0 - - if (.not. convertdouble) then - dp_type = MPI_DOUBLE_PRECISION - else - dp_type = MPI_REAL - endif - - if (node.eq.root) then - -c Because the size of the problem is too large to store in a 32-bit -c integer for some classes, we put it into a string (for printing). -c Have to strip off the decimal point put in there by the floating -c point print statement (internal file) - - write(*, 1000) - write(size, '(f15.0)' ) 2.d0**(m+1) - j = 15 - if (size(j:j) .eq. '.') j = j - 1 - write (*,1001) size(1:j) - write(*, 1003) no_nodes - - 1000 format(/,' NAS Parallel Benchmarks 3.3 -- EP Benchmark',/) - 1001 format(' Number of random numbers generated: ', a15) - 1003 format(' Number of active processes: ', 2x, i13, /) - - endif - - verified = .false. - -c Compute the number of "batches" of random number pairs generated -c per processor. Adjust if the number of processors does not evenly -c divide the total number - - np = nn / no_nodes - no_large_nodes = mod(nn, no_nodes) - if (node .lt. no_large_nodes) then - np_add = 1 - else - np_add = 0 - endif - np = np + np_add - - if (np .eq. 0) then - write (6, 1) no_nodes, nn - 1 format ('Too many nodes:',2i6) - call mpi_abort(MPI_COMM_WORLD,ierrcode,ierr) - stop - endif - -c Call the random number generator functions and initialize -c the x-array to reduce the effects of paging on the timings. -c Also, call all mathematical functions that are used. Make -c sure these initializations cannot be eliminated as dead code. - - call vranlc(0, dum(1), dum(2), dum(3)) - dum(1) = randlc(dum(2), dum(3)) - do 5 i = 1, 2*nk - x(i) = -1.d99 - 5 continue - Mops = log(sqrt(abs(max(1.d0,1.d0)))) - -c--------------------------------------------------------------------- -c Synchronize before placing time stamp -c--------------------------------------------------------------------- - call mpi_barrier(MPI_COMM_WORLD, ierr) - - call timer_clear(1) - call timer_clear(2) - call timer_clear(3) - call timer_start(1) - - t1 = a - call vranlc(0, t1, a, x) - -c Compute AN = A ^ (2 * NK) (mod 2^46). - - t1 = a - - do 100 i = 1, mk + 1 - t2 = randlc(t1, t1) - 100 continue - - an = t1 - tt = s - gc = 0.d0 - sx = 0.d0 - sy = 0.d0 - - do 110 i = 0, nq - 1 - q(i) = 0.d0 - 110 continue - -c Each instance of this loop may be performed independently. We compute -c the k offsets separately to take into account the fact that some nodes -c have more numbers to generate than others - - if (np_add .eq. 1) then - k_offset = node * np -1 - else - k_offset = no_large_nodes*(np+1) + (node-no_large_nodes)*np -1 - endif - - do 150 k = 1, np - kk = k_offset + k - t1 = s - t2 = an - -c Find starting seed t1 for this kk. - - do 120 i = 1, 100 - ik = kk / 2 - if (2 * ik .ne. kk) t3 = randlc(t1, t2) - if (ik .eq. 0) goto 130 - t3 = randlc(t2, t2) - kk = ik - 120 continue - -c Compute uniform pseudorandom numbers. - 130 continue - - if (timers_enabled) call timer_start(3) - call vranlc(2 * nk, t1, a, x) - if (timers_enabled) call timer_stop(3) - -c Compute Gaussian deviates by acceptance-rejection method and -c tally counts in concentric square annuli. This loop is not -c vectorizable. - - if (timers_enabled) call timer_start(2) - - do 140 i = 1, nk - x1 = 2.d0 * x(2*i-1) - 1.d0 - x2 = 2.d0 * x(2*i) - 1.d0 - t1 = x1 ** 2 + x2 ** 2 - if (t1 .le. 1.d0) then - t2 = sqrt(-2.d0 * log(t1) / t1) - t3 = (x1 * t2) - t4 = (x2 * t2) - l = max(abs(t3), abs(t4)) - q(l) = q(l) + 1.d0 - sx = sx + t3 - sy = sy + t4 - endif - 140 continue - - if (timers_enabled) call timer_stop(2) - - 150 continue - - call mpi_allreduce(sx, x, 1, dp_type, - > MPI_SUM, MPI_COMM_WORLD, ierr) - sx = x(1) - call mpi_allreduce(sy, x, 1, dp_type, - > MPI_SUM, MPI_COMM_WORLD, ierr) - sy = x(1) - call mpi_allreduce(q, x, nq, dp_type, - > MPI_SUM, MPI_COMM_WORLD, ierr) - - do i = 1, nq - q(i-1) = x(i) - enddo - - do 160 i = 0, nq - 1 - gc = gc + q(i) - 160 continue - - call timer_stop(1) - tm = timer_read(1) - - call mpi_allreduce(tm, x, 1, dp_type, - > MPI_MAX, MPI_COMM_WORLD, ierr) - tm = x(1) - - if (node.eq.root) then - nit=0 - verified = .true. - if (m.eq.24) then - sx_verify_value = -3.247834652034740D+3 - sy_verify_value = -6.958407078382297D+3 - elseif (m.eq.25) then - sx_verify_value = -2.863319731645753D+3 - sy_verify_value = -6.320053679109499D+3 - elseif (m.eq.28) then - sx_verify_value = -4.295875165629892D+3 - sy_verify_value = -1.580732573678431D+4 - elseif (m.eq.30) then - sx_verify_value = 4.033815542441498D+4 - sy_verify_value = -2.660669192809235D+4 - elseif (m.eq.32) then - sx_verify_value = 4.764367927995374D+4 - sy_verify_value = -8.084072988043731D+4 - elseif (m.eq.36) then - sx_verify_value = 1.982481200946593D+5 - sy_verify_value = -1.020596636361769D+5 - elseif (m.eq.40) then - sx_verify_value = -5.319717441530D+05 - sy_verify_value = -3.688834557731D+05 - else - verified = .false. - endif - if (verified) then - sx_err = abs((sx - sx_verify_value)/sx_verify_value) - sy_err = abs((sy - sy_verify_value)/sy_verify_value) - verified = ((sx_err.le.epsilon) .and. (sy_err.le.epsilon)) - endif - Mops = 2.d0**(m+1)/tm/1000000.d0 - - write (6,11) tm, m, gc, sx, sy, (i, q(i), i = 0, nq - 1) - 11 format ('EP Benchmark Results:'//'CPU Time =',f10.4/'N = 2^', - > i5/'No. Gaussian Pairs =',f15.0/'Sums = ',1p,2d25.15/ - > 'Counts:'/(i3,0p,f15.0)) - - call print_results('EP', class, m+1, 0, 0, nit, npm, - > no_nodes, tm, Mops, - > 'Random numbers generated', - > verified, npbversion, compiletime, cs1, - > cs2, cs3, cs4, cs5, cs6, cs7) - - endif - - if (timers_enabled .and. (node .eq. root)) then - print *, 'Total time: ', timer_read(1) - print *, 'Gaussian pairs: ', timer_read(2) - print *, 'Random numbers: ', timer_read(3) - endif - - call mpi_finalize(ierr) - - end diff --git a/examples/smpi/NAS/MPI_dummy/Makefile b/examples/smpi/NAS/MPI_dummy/Makefile deleted file mode 100644 index 86288d7a1d..0000000000 --- a/examples/smpi/NAS/MPI_dummy/Makefile +++ /dev/null @@ -1,38 +0,0 @@ -# Makefile for MPI dummy library. -# Must be edited for a specific machine. Does NOT read in -# the make.def file of NPB 2.3 -F77 = f77 -CC = cc -AR = ar - -# Enable if either Cray or IBM: (no such flag for most machines: see wtime.h) -# MACHINE = -DCRAY -# MACHINE = -DIBM - -libmpi.a: mpi_dummy.o mpi_dummy_c.o wtime.o - $(AR) r libmpi.a mpi_dummy.o mpi_dummy_c.o wtime.o - -mpi_dummy.o: mpi_dummy.f mpif.h - $(F77) -c mpi_dummy.f -# For a Cray C90, try: -# cf77 -dp -c mpi_dummy.f -# For an IBM 590, try: -# xlf -c mpi_dummy.f - -mpi_dummy_c.o: mpi_dummy.c mpi.h - $(CC) -c ${MACHINE} -o mpi_dummy_c.o mpi_dummy.c - -wtime.o: wtime.c -# For most machines or CRAY or IBM - $(CC) -c ${MACHINE} wtime.c -# For a precise timer on an SGI Power Challenge, try: -# $(CC) -o wtime.o -c wtime_sgi64.c - -test: test.f - $(F77) -o test -I. test.f -L. -lmpi - - - -clean: - - rm -f *~ *.o - - rm -f test libmpi.a diff --git a/examples/smpi/NAS/MPI_dummy/README b/examples/smpi/NAS/MPI_dummy/README deleted file mode 100644 index 9096a0b350..0000000000 --- a/examples/smpi/NAS/MPI_dummy/README +++ /dev/null @@ -1,52 +0,0 @@ -########################################### -# NAS Parallel Benchmarks 2&3 # -# MPI/F77/C # -# Revision 3.3 # -# NASA Ames Research Center # -# npb@nas.nasa.gov # -# http://www.nas.nasa.gov/Software/NPB/ # -########################################### - -MPI Dummy Library - - -The MPI dummy library is supplied as a convenience for people who do -not have an MPI library but would like to try running on one processor -anyway. The NPB 2.x/3.x benchmarks are designed so that they do not -actually try to do any message passing when run on one node. The MPI -dummy library is just that - a set of dummy MPI routines which don't -do anything, but allow you to link the benchmarks. Actually they do a -few things, but nothing important. Note that the dummy library is -sufficient only for the NPB 2.x/3.x benchmarks. It probably won't be -useful for anything else because it implements only a handful of -functions. - -Because the dummy library is just an extra goody, and since we don't -have an infinite amount of time, it may be a bit trickier to configure -than the rest of the benchmarks. You need to: - -1. Find out how C and Fortran interact on your machine. On most machines, -the fortran functon foo(x) is declared in C as foo_(xp) where xp is -a pointer, not a value. On IBMs, it's just foo(xp). On Cray C90s, its -FOO(xp). You can define CRAY or IBM to get these, or you need to -edit wtime.c if you've got something else. - -2. Edit the Makefile to compile mpi_dummy.f and wtime.c correctly -for your machine (including -DCRAY or -DIBM if necessary). - -3. The substitute MPI timer gives wall clock time, not CPU time. -If you're running on a timeshared machine, you may want to -use a CPU timer. Edit the function mpi_wtime() in mpi_dummy.f -to change this timer. (NOTE: for official benchmark results, -ONLY wall clock times are valid. Using a CPU timer is ok -if you want to get things running, but don't report any results -measured with a CPU timer. ) - -TROUBLESHOOTING - -o Compiling or linking of the benchmark aborts because the dummy MPI - header file or the dummy MPI library cannot be found. - - the file make.dummy in subdirectory config relies on the use - of the -I"path" and -L"path" -l"library" constructs to pass - information to the compilers and linkers. Edit this file to conform - to your system. diff --git a/examples/smpi/NAS/MPI_dummy/mpi.h b/examples/smpi/NAS/MPI_dummy/mpi.h deleted file mode 100644 index 70eb3138b4..0000000000 --- a/examples/smpi/NAS/MPI_dummy/mpi.h +++ /dev/null @@ -1,112 +0,0 @@ -#define MPI_DOUBLE 1 -#define MPI_INT 2 -#define MPI_BYTE 3 -#define MPI_FLOAT 4 -#define MPI_LONG 5 - -#define MPI_COMM_WORLD 0 - -#define MPI_MAX 1 -#define MPI_SUM 2 -#define MPI_MIN 3 - -#define MPI_SUCCESS 0 -#define MPI_ANY_SOURCE -1 -#define MPI_ERR_OTHER -1 -#define MPI_STATUS_SIZE 3 - - -/* - Status object. It is the only user-visible MPI data-structure - The "count" field is PRIVATE; use MPI_Get_count to access it. - */ -typedef struct { - int count; - int MPI_SOURCE; - int MPI_TAG; - int MPI_ERROR; -} MPI_Status; - - -/* MPI request objects */ -typedef int MPI_Request; - -/* MPI datatype */ -typedef int MPI_Datatype; - -/* MPI comm */ -typedef int MPI_Comm; - -/* MPI operation */ -typedef int MPI_Op; - - - -/* Prototypes: */ -void mpi_error( void ); - -int MPI_Irecv( void *buf, - int count, - MPI_Datatype datatype, - int source, - int tag, - MPI_Comm comm, - MPI_Request *request ); - -int MPI_Send( void *buf, - int count, - MPI_Datatype datatype, - int dest, - int tag, - MPI_Comm comm ); - -int MPI_Wait( MPI_Request *request, - MPI_Status *status ); - -int MPI_Init( int *argc, - char ***argv ); - -int MPI_Comm_rank( MPI_Comm comm, - int *rank ); - -int MPI_Comm_size( MPI_Comm comm, - int *size ); - -double MPI_Wtime( void ); - -int MPI_Barrier( MPI_Comm comm ); - -int MPI_Finalize( void ); - -int MPI_Allreduce( void *sendbuf, - void *recvbuf, - int nitems, - MPI_Datatype type, - MPI_Op op, - MPI_Comm comm ); - -int MPI_Reduce( void *sendbuf, - void *recvbuf, - int nitems, - MPI_Datatype type, - MPI_Op op, - int root, - MPI_Comm comm ); - -int MPI_Alltoall( void *sendbuf, - int sendcount, - MPI_Datatype sendtype, - void *recvbuf, - int recvcount, - MPI_Datatype recvtype, - MPI_Comm comm ); - -int MPI_Alltoallv( void *sendbuf, - int *sendcounts, - int *senddispl, - MPI_Datatype sendtype, - void *recvbuf, - int *recvcounts, - int *recvdispl, - MPI_Datatype recvtype, - MPI_Comm comm ); diff --git a/examples/smpi/NAS/MPI_dummy/mpi_dummy.c b/examples/smpi/NAS/MPI_dummy/mpi_dummy.c deleted file mode 100644 index 4cb7f06e63..0000000000 --- a/examples/smpi/NAS/MPI_dummy/mpi_dummy.c +++ /dev/null @@ -1,265 +0,0 @@ -#include -#include "mpi.h" -#include "wtime.h" - -void mpi_error( void ) -{ - printf( "mpi_error called\n" ); - abort(); -} - - - - -int MPI_Irecv( void *buf, - int count, - MPI_Datatype datatype, - int source, - int tag, - MPI_Comm comm, - MPI_Request *request ) -{ - mpi_error(); - return( MPI_ERR_OTHER ); -} - - - - -int MPI_Recv( void *buf, - int count, - MPI_Datatype datatype, - int source, - int tag, - MPI_Comm comm, - MPI_Status *status ) -{ - mpi_error(); - return( MPI_ERR_OTHER ); -} - - - - -int MPI_Send( void *buf, - int count, - MPI_Datatype datatype, - int dest, - int tag, - MPI_Comm comm ) -{ - mpi_error(); - return( MPI_ERR_OTHER ); -} - - - - -int MPI_Wait( MPI_Request *request, - MPI_Status *status ) -{ - mpi_error(); - return( MPI_ERR_OTHER ); -} - - - - -int MPI_Init( int *argc, - char ***argv ) -{ - return( MPI_SUCCESS ); -} - - - - -int MPI_Comm_rank( MPI_Comm comm, - int *rank ) -{ - *rank = 0; - return( MPI_SUCCESS ); -} - - - - -int MPI_Comm_size( MPI_Comm comm, - int *size ) -{ - *size = 1; - return( MPI_SUCCESS ); -} - - - - -double MPI_Wtime( void ) -{ - void wtime(); - - double t; - wtime( &t ); - return( t ); -} - - - - -int MPI_Barrier( MPI_Comm comm ) -{ - return( MPI_SUCCESS ); -} - - - - -int MPI_Finalize( void ) -{ - return( MPI_SUCCESS ); -} - - - - -int MPI_Allreduce( void *sendbuf, - void *recvbuf, - int nitems, - MPI_Datatype type, - MPI_Op op, - MPI_Comm comm ) -{ - int i; - if( type == MPI_INT ) - { - int *pd_sendbuf, *pd_recvbuf; - pd_sendbuf = (int *) sendbuf; - pd_recvbuf = (int *) recvbuf; - for( i=0; i mpi_double_precision, mpi_complex, - > mpi_double_complex - parameter (mpi_double_precision = 1, - $ mpi_integer = 2, - $ mpi_byte = 3, - $ mpi_real= 4, - $ mpi_complex = 5, - $ mpi_double_complex = 6) - - integer mpi_any_source - parameter (mpi_any_source = -1) - - integer mpi_err_other - parameter (mpi_err_other = -1) - - double precision mpi_wtime - external mpi_wtime - - integer mpi_status_size - parameter (mpi_status_size=3) diff --git a/examples/smpi/NAS/MPI_dummy/test.f b/examples/smpi/NAS/MPI_dummy/test.f deleted file mode 100644 index 081c73c72f..0000000000 --- a/examples/smpi/NAS/MPI_dummy/test.f +++ /dev/null @@ -1,10 +0,0 @@ - program - implicit none - double precision t, mpi_wtime - external mpi_wtime - t = 0.0 - t = mpi_wtime() - print *, t - t = mpi_wtime() - print *, t - end diff --git a/examples/smpi/NAS/MPI_dummy/wtime.c b/examples/smpi/NAS/MPI_dummy/wtime.c deleted file mode 100644 index 221d2225ae..0000000000 --- a/examples/smpi/NAS/MPI_dummy/wtime.c +++ /dev/null @@ -1,13 +0,0 @@ -#include "wtime.h" -#include - -void wtime(double *t) -{ - static int sec = -1; - struct timeval tv; - gettimeofday(&tv, (void *)0); - if (sec < 0) sec = tv.tv_sec; - *t = (tv.tv_sec - sec) + 1.0e-6*tv.tv_usec; -} - - diff --git a/examples/smpi/NAS/MPI_dummy/wtime.f b/examples/smpi/NAS/MPI_dummy/wtime.f deleted file mode 100644 index a1cfde9aa3..0000000000 --- a/examples/smpi/NAS/MPI_dummy/wtime.f +++ /dev/null @@ -1,12 +0,0 @@ - subroutine wtime(tim) - real*8 tim - dimension tarray(2) - call etime(tarray) - tim = tarray(1) - return - end - - - - - diff --git a/examples/smpi/NAS/MPI_dummy/wtime.h b/examples/smpi/NAS/MPI_dummy/wtime.h deleted file mode 100644 index 12eb0cb0ee..0000000000 --- a/examples/smpi/NAS/MPI_dummy/wtime.h +++ /dev/null @@ -1,12 +0,0 @@ -/* C/Fortran interface is different on different machines. - * You may need to tweak this. - */ - - -#if defined(IBM) -#define wtime wtime -#elif defined(CRAY) -#define wtime WTIME -#else -#define wtime wtime_ -#endif diff --git a/examples/smpi/NAS/MPI_dummy/wtime_sgi64.c b/examples/smpi/NAS/MPI_dummy/wtime_sgi64.c deleted file mode 100644 index d08d50cd34..0000000000 --- a/examples/smpi/NAS/MPI_dummy/wtime_sgi64.c +++ /dev/null @@ -1,74 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -/* The following works on SGI Power Challenge systems */ - -typedef unsigned long iotimer_t; - -unsigned int cycleval; -volatile iotimer_t *iotimer_addr, base_counter; -double resolution; - -/* address_t is an integer type big enough to hold an address */ -typedef unsigned long address_t; - - - -void timer_init() -{ - - int fd; - char *virt_addr; - address_t phys_addr, page_offset, pagemask, pagebase_addr; - - pagemask = getpagesize() - 1; - errno = 0; - phys_addr = syssgi(SGI_QUERY_CYCLECNTR, &cycleval); - if (errno != 0) { - perror("SGI_QUERY_CYCLECNTR"); - exit(1); - } - /* rel_addr = page offset of physical address */ - page_offset = phys_addr & pagemask; - pagebase_addr = phys_addr - page_offset; - fd = open("/dev/mmem", O_RDONLY); - - virt_addr = mmap(0, pagemask, PROT_READ, MAP_PRIVATE, fd, pagebase_addr); - virt_addr = virt_addr + page_offset; - iotimer_addr = (iotimer_t *)virt_addr; - /* cycleval in picoseconds to this gives resolution in seconds */ - resolution = 1.0e-12*cycleval; - base_counter = *iotimer_addr; -} - -void wtime_(double *time) -{ - static int initialized = 0; - volatile iotimer_t counter_value; - if (!initialized) { - timer_init(); - initialized = 1; - } - counter_value = *iotimer_addr - base_counter; - *time = (double)counter_value * resolution; -} - - -void wtime(double *time) -{ - static int initialized = 0; - volatile iotimer_t counter_value; - if (!initialized) { - timer_init(); - initialized = 1; - } - counter_value = *iotimer_addr - base_counter; - *time = (double)counter_value * resolution; -} - - diff --git a/examples/smpi/NAS/Makefile b/examples/smpi/NAS/Makefile index 7f1bee88a4..04524009d1 100644 --- a/examples/smpi/NAS/Makefile +++ b/examples/smpi/NAS/Makefile @@ -1,5 +1,5 @@ SHELL=/bin/sh -CLASS=U +CLASS=S NPROCS=1 SUBTYPE= VERSION= @@ -12,18 +12,11 @@ IS: is is: header cd IS; $(MAKE) NPROCS=$(NPROCS) CLASS=$(CLASS) -IS-trace: is-trace -is-trace: header - cd IS-trace; $(MAKE) NPROCS=$(NPROCS) CLASS=$(CLASS) EP: ep ep: header cd EP; $(MAKE) NPROCS=$(NPROCS) CLASS=$(CLASS) -EP-trace: ep-trace -ep-trace: header - cd EP-trace; $(MAKE) NPROCS=$(NPROCS) CLASS=$(CLASS) - EP-sampling: ep-sampling ep-sampling: header cd EP-sampling; $(MAKE) NPROCS=$(NPROCS) CLASS=$(CLASS) @@ -32,14 +25,6 @@ DT: dt dt: header cd DT; $(MAKE) CLASS=$(CLASS) -DT-trace: dt-trace -dt-trace: header - cd DT-trace; $(MAKE) CLASS=$(CLASS) - -DT-folding: dt-folding -dt-folding: header - cd DT-folding; $(MAKE) CLASS=$(CLASS) - # Awk script courtesy cmg@cray.com, modified by Haoqiang Jin suite: @ awk -f sys/suite.awk SMAKE=$(MAKE) $(SFILE) | $(SHELL) @@ -49,16 +34,12 @@ suite: # are defined) but on a really clean system this will won't work # because those makefiles need config/make.def clean: - - rm -f core - - rm -f *~ */core */*~ */*.o */npbparams.h */*.obj */*.exe - - rm -f MPI_dummy/test MPI_dummy/libmpi.a + - rm -f *~ */*~ */*.o */npbparams.h - rm -f sys/setparams sys/makesuite sys/setparams.h - - rm -f btio.*.out* veryclean: clean - rm -f config/make.def config/suite.def - - rm -f bin/sp.* bin/lu.* bin/mg.* bin/ft.* bin/bt.* bin/is.* - - rm -f bin/ep.* bin/cg.* bin/dt.* + - rm -f bin/is.* bin/ep.* bin/dt.* header: @ sys/print_header diff --git a/examples/smpi/NAS/common/print_results.f b/examples/smpi/NAS/common/print_results.f deleted file mode 100644 index 9feddac482..0000000000 --- a/examples/smpi/NAS/common/print_results.f +++ /dev/null @@ -1,115 +0,0 @@ - - subroutine print_results(name, class, n1, n2, n3, niter, - > nprocs_compiled, nprocs_total, - > t, mops, optype, verified, npbversion, - > compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7) - - implicit none - character*2 name - character*1 class - integer n1, n2, n3, niter, nprocs_compiled, nprocs_total, j - double precision t, mops - character optype*24, size*15 - logical verified - character*(*) npbversion, compiletime, - > cs1, cs2, cs3, cs4, cs5, cs6, cs7 - - write (*, 2) name - 2 format(//, ' ', A2, ' Benchmark Completed.') - - write (*, 3) Class - 3 format(' Class = ', 12x, a12) - -c If this is not a grid-based problem (EP, FT, CG), then -c we only print n1, which contains some measure of the -c problem size. In that case, n2 and n3 are both zero. -c Otherwise, we print the grid size n1xn2xn3 - - if ((n2 .eq. 0) .and. (n3 .eq. 0)) then - if (name(1:2) .eq. 'EP') then - write(size, '(f15.0)' ) 2.d0**n1 - j = 15 - if (size(j:j) .eq. '.') j = j - 1 - write (*,42) size(1:j) - 42 format(' Size = ',9x, a15) - else - write (*,44) n1 - 44 format(' Size = ',12x, i12) - endif - else - write (*, 4) n1,n2,n3 - 4 format(' Size = ',9x, i4,'x',i4,'x',i4) - endif - - write (*, 5) niter - 5 format(' Iterations = ', 12x, i12) - - write (*, 6) t - 6 format(' Time in seconds = ',12x, f12.2) - - write (*,7) nprocs_total - 7 format(' Total processes = ', 12x, i12) - - write (*,8) nprocs_compiled - 8 format(' Compiled procs = ', 12x, i12) - - write (*,9) mops - 9 format(' Mop/s total = ',12x, f12.2) - - write (*,10) mops/float( nprocs_total ) - 10 format(' Mop/s/process = ', 12x, f12.2) - - write(*, 11) optype - 11 format(' Operation type = ', a24) - - if (verified) then - write(*,12) ' SUCCESSFUL' - else - write(*,12) 'UNSUCCESSFUL' - endif - 12 format(' Verification = ', 12x, a) - - write(*,13) npbversion - 13 format(' Version = ', 12x, a12) - - write(*,14) compiletime - 14 format(' Compile date = ', 12x, a12) - - - write (*,121) cs1 - 121 format(/, ' Compile options:', /, - > ' MPIF77 = ', A) - - write (*,122) cs2 - 122 format(' FLINK = ', A) - - write (*,123) cs3 - 123 format(' FMPI_LIB = ', A) - - write (*,124) cs4 - 124 format(' FMPI_INC = ', A) - - write (*,125) cs5 - 125 format(' FFLAGS = ', A) - - write (*,126) cs6 - 126 format(' FLINKFLAGS = ', A) - - write(*, 127) cs7 - 127 format(' RAND = ', A) - - write (*,130) - 130 format(//' Please send the results of this run to:'// - > ' NPB Development Team '/ - > ' Internet: npb@nas.nasa.gov'/ - > ' '/ - > ' If email is not available, send this to:'// - > ' MS T27A-1'/ - > ' NASA Ames Research Center'/ - > ' Moffett Field, CA 94035-1000'// - > ' Fax: 650-604-3957'//) - - - return - end - diff --git a/examples/smpi/NAS/common/randdp.f b/examples/smpi/NAS/common/randdp.f deleted file mode 100644 index 64860d96d0..0000000000 --- a/examples/smpi/NAS/common/randdp.f +++ /dev/null @@ -1,137 +0,0 @@ -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - - double precision function randlc (x, a) - -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - -c--------------------------------------------------------------------- -c -c This routine returns a uniform pseudorandom double precision number in the -c range (0, 1) by using the linear congruential generator -c -c x_{k+1} = a x_k (mod 2^46) -c -c where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers -c before repeating. The argument A is the same as 'a' in the above formula, -c and X is the same as x_0. A and X must be odd double precision integers -c in the range (1, 2^46). The returned value RANDLC is normalized to be -c between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain -c the new seed x_1, so that subsequent calls to RANDLC using the same -c arguments will generate a continuous sequence. -c -c This routine should produce the same results on any computer with at least -c 48 mantissa bits in double precision floating point data. On 64 bit -c systems, double precision should be disabled. -c -c David H. Bailey October 26, 1990 -c -c--------------------------------------------------------------------- - - implicit none - - double precision r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z - parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23, - > t46 = t23 ** 2) - -c--------------------------------------------------------------------- -c Break A into two parts such that A = 2^23 * A1 + A2. -c--------------------------------------------------------------------- - t1 = r23 * a - a1 = int (t1) - a2 = a - t23 * a1 - -c--------------------------------------------------------------------- -c Break X into two parts such that X = 2^23 * X1 + X2, compute -c Z = A1 * X2 + A2 * X1 (mod 2^23), and then -c X = 2^23 * Z + A2 * X2 (mod 2^46). -c--------------------------------------------------------------------- - t1 = r23 * x - x1 = int (t1) - x2 = x - t23 * x1 - t1 = a1 * x2 + a2 * x1 - t2 = int (r23 * t1) - z = t1 - t23 * t2 - t3 = t23 * z + a2 * x2 - t4 = int (r46 * t3) - x = t3 - t46 * t4 - randlc = r46 * x - - return - end - - - - -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - - subroutine vranlc (n, x, a, y) - -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - -c--------------------------------------------------------------------- -c -c This routine generates N uniform pseudorandom double precision numbers in -c the range (0, 1) by using the linear congruential generator -c -c x_{k+1} = a x_k (mod 2^46) -c -c where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers -c before repeating. The argument A is the same as 'a' in the above formula, -c and X is the same as x_0. A and X must be odd double precision integers -c in the range (1, 2^46). The N results are placed in Y and are normalized -c to be between 0 and 1. X is updated to contain the new seed, so that -c subsequent calls to VRANLC using the same arguments will generate a -c continuous sequence. If N is zero, only initialization is performed, and -c the variables X, A and Y are ignored. -c -c This routine is the standard version designed for scalar or RISC systems. -c However, it should produce the same results on any single processor -c computer with at least 48 mantissa bits in double precision floating point -c data. On 64 bit systems, double precision should be disabled. -c -c--------------------------------------------------------------------- - - implicit none - - integer i,n - double precision y,r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z - dimension y(*) - parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23, - > t46 = t23 ** 2) - - -c--------------------------------------------------------------------- -c Break A into two parts such that A = 2^23 * A1 + A2. -c--------------------------------------------------------------------- - t1 = r23 * a - a1 = int (t1) - a2 = a - t23 * a1 - -c--------------------------------------------------------------------- -c Generate N results. This loop is not vectorizable. -c--------------------------------------------------------------------- - do i = 1, n - -c--------------------------------------------------------------------- -c Break X into two parts such that X = 2^23 * X1 + X2, compute -c Z = A1 * X2 + A2 * X1 (mod 2^23), and then -c X = 2^23 * Z + A2 * X2 (mod 2^46). -c--------------------------------------------------------------------- - t1 = r23 * x - x1 = int (t1) - x2 = x - t23 * x1 - t1 = a1 * x2 + a2 * x1 - t2 = int (r23 * t1) - z = t1 - t23 * t2 - t3 = t23 * z + a2 * x2 - t4 = int (r46 * t3) - x = t3 - t46 * t4 - y(i) = r46 * x - enddo - - return - end diff --git a/examples/smpi/NAS/common/randdpvec.f b/examples/smpi/NAS/common/randdpvec.f deleted file mode 100644 index c7080717ce..0000000000 --- a/examples/smpi/NAS/common/randdpvec.f +++ /dev/null @@ -1,186 +0,0 @@ -c--------------------------------------------------------------------- - double precision function randlc (x, a) -c--------------------------------------------------------------------- - -c--------------------------------------------------------------------- -c -c This routine returns a uniform pseudorandom double precision number in the -c range (0, 1) by using the linear congruential generator -c -c x_{k+1} = a x_k (mod 2^46) -c -c where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers -c before repeating. The argument A is the same as 'a' in the above formula, -c and X is the same as x_0. A and X must be odd double precision integers -c in the range (1, 2^46). The returned value RANDLC is normalized to be -c between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain -c the new seed x_1, so that subsequent calls to RANDLC using the same -c arguments will generate a continuous sequence. -c -c This routine should produce the same results on any computer with at least -c 48 mantissa bits in double precision floating point data. On 64 bit -c systems, double precision should be disabled. -c -c David H. Bailey October 26, 1990 -c -c--------------------------------------------------------------------- - - implicit none - - double precision r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z - parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23, - > t46 = t23 ** 2) - -c--------------------------------------------------------------------- -c Break A into two parts such that A = 2^23 * A1 + A2. -c--------------------------------------------------------------------- - t1 = r23 * a - a1 = int (t1) - a2 = a - t23 * a1 - -c--------------------------------------------------------------------- -c Break X into two parts such that X = 2^23 * X1 + X2, compute -c Z = A1 * X2 + A2 * X1 (mod 2^23), and then -c X = 2^23 * Z + A2 * X2 (mod 2^46). -c--------------------------------------------------------------------- - t1 = r23 * x - x1 = int (t1) - x2 = x - t23 * x1 - - - t1 = a1 * x2 + a2 * x1 - t2 = int (r23 * t1) - z = t1 - t23 * t2 - t3 = t23 * z + a2 * x2 - t4 = int (r46 * t3) - x = t3 - t46 * t4 - randlc = r46 * x - return - end - - - -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - - subroutine vranlc (n, x, a, y) - -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - -c--------------------------------------------------------------------- -c This routine generates N uniform pseudorandom double precision numbers in -c the range (0, 1) by using the linear congruential generator -c -c x_{k+1} = a x_k (mod 2^46) -c -c where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers -c before repeating. The argument A is the same as 'a' in the above formula, -c and X is the same as x_0. A and X must be odd double precision integers -c in the range (1, 2^46). The N results are placed in Y and are normalized -c to be between 0 and 1. X is updated to contain the new seed, so that -c subsequent calls to RANDLC using the same arguments will generate a -c continuous sequence. -c -c This routine generates the output sequence in batches of length NV, for -c convenience on vector computers. This routine should produce the same -c results on any computer with at least 48 mantissa bits in double precision -c floating point data. On Cray systems, double precision should be disabled. -c -c David H. Bailey August 30, 1990 -c--------------------------------------------------------------------- - - integer n - double precision x, a, y(*) - - double precision r23, r46, t23, t46 - integer nv - parameter (r23 = 2.d0 ** (-23), r46 = r23 * r23, t23 = 2.d0 ** 23, - > t46 = t23 * t23, nv = 64) - double precision xv(nv), t1, t2, t3, t4, an, a1, a2, x1, x2, yy - integer n1, i, j - external randlc - double precision randlc - -c--------------------------------------------------------------------- -c Compute the first NV elements of the sequence using RANDLC. -c--------------------------------------------------------------------- - t1 = x - n1 = min (n, nv) - - do i = 1, n1 - xv(i) = t46 * randlc (t1, a) - enddo - -c--------------------------------------------------------------------- -c It is not necessary to compute AN, A1 or A2 unless N is greater than NV. -c--------------------------------------------------------------------- - if (n .gt. nv) then - -c--------------------------------------------------------------------- -c Compute AN = AA ^ NV (mod 2^46) using successive calls to RANDLC. -c--------------------------------------------------------------------- - t1 = a - t2 = r46 * a - - do i = 1, nv - 1 - t2 = randlc (t1, a) - enddo - - an = t46 * t2 - -c--------------------------------------------------------------------- -c Break AN into two parts such that AN = 2^23 * A1 + A2. -c--------------------------------------------------------------------- - t1 = r23 * an - a1 = aint (t1) - a2 = an - t23 * a1 - endif - -c--------------------------------------------------------------------- -c Compute N pseudorandom results in batches of size NV. -c--------------------------------------------------------------------- - do j = 0, n - 1, nv - n1 = min (nv, n - j) - -c--------------------------------------------------------------------- -c Compute up to NV results based on the current seed vector XV. -c--------------------------------------------------------------------- - do i = 1, n1 - y(i+j) = r46 * xv(i) - enddo - -c--------------------------------------------------------------------- -c If this is the last pass through the 140 loop, it is not necessary to -c update the XV vector. -c--------------------------------------------------------------------- - if (j + n1 .eq. n) goto 150 - -c--------------------------------------------------------------------- -c Update the XV vector by multiplying each element by AN (mod 2^46). -c--------------------------------------------------------------------- - do i = 1, nv - t1 = r23 * xv(i) - x1 = aint (t1) - x2 = xv(i) - t23 * x1 - t1 = a1 * x2 + a2 * x1 - t2 = aint (r23 * t1) - yy = t1 - t23 * t2 - t3 = t23 * yy + a2 * x2 - t4 = aint (r46 * t3) - xv(i) = t3 - t46 * t4 - enddo - - enddo - -c--------------------------------------------------------------------- -c Save the last seed in X so that subsequent calls to VRANLC will generate -c a continuous sequence. -c--------------------------------------------------------------------- - 150 x = xv(n1) - - return - end - -c----- end of program ------------------------------------------------ - diff --git a/examples/smpi/NAS/common/randi8.f b/examples/smpi/NAS/common/randi8.f deleted file mode 100644 index 21ab8815db..0000000000 --- a/examples/smpi/NAS/common/randi8.f +++ /dev/null @@ -1,79 +0,0 @@ - double precision function randlc(x, a) - -c--------------------------------------------------------------------- -c -c This routine returns a uniform pseudorandom double precision number in the -c range (0, 1) by using the linear congruential generator -c -c x_{k+1} = a x_k (mod 2^46) -c -c where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers -c before repeating. The argument A is the same as 'a' in the above formula, -c and X is the same as x_0. A and X must be odd double precision integers -c in the range (1, 2^46). The returned value RANDLC is normalized to be -c between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain -c the new seed x_1, so that subsequent calls to RANDLC using the same -c arguments will generate a continuous sequence. - - implicit none - double precision x, a - integer*8 i246m1, Lx, La - double precision d2m46 - - parameter(d2m46=0.5d0**46) - - save i246m1 - data i246m1/X'00003FFFFFFFFFFF'/ - - Lx = X - La = A - - Lx = iand(Lx*La,i246m1) - randlc = d2m46*dble(Lx) - x = dble(Lx) - return - end - - -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - - - SUBROUTINE VRANLC (N, X, A, Y) - implicit none - integer n, i - double precision x, a, y(*) - integer*8 i246m1, Lx, La - double precision d2m46 - -c This doesn't work, because the compiler does the calculation in 32 -c bits and overflows. No standard way (without f90 stuff) to specify -c that the rhs should be done in 64 bit arithmetic. -c parameter(i246m1=2**46-1) - - parameter(d2m46=0.5d0**46) - - save i246m1 - data i246m1/X'00003FFFFFFFFFFF'/ - -c Note that the v6 compiler on an R8000 does something stupid with -c the above. Using the following instead (or various other things) -c makes the calculation run almost 10 times as fast. -c -c save d2m46 -c data d2m46/0.0d0/ -c if (d2m46 .eq. 0.0d0) then -c d2m46 = 0.5d0**46 -c endif - - Lx = X - La = A - do i = 1, N - Lx = iand(Lx*La,i246m1) - y(i) = d2m46*dble(Lx) - end do - x = dble(Lx) - - return - end - diff --git a/examples/smpi/NAS/common/randi8_safe.f b/examples/smpi/NAS/common/randi8_safe.f deleted file mode 100644 index f725b6a1fb..0000000000 --- a/examples/smpi/NAS/common/randi8_safe.f +++ /dev/null @@ -1,64 +0,0 @@ - double precision function randlc(x, a) - -c--------------------------------------------------------------------- -c -c This routine returns a uniform pseudorandom double precision number in the -c range (0, 1) by using the linear congruential generator -c -c x_{k+1} = a x_k (mod 2^46) -c -c where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers -c before repeating. The argument A is the same as 'a' in the above formula, -c and X is the same as x_0. A and X must be odd double precision integers -c in the range (1, 2^46). The returned value RANDLC is normalized to be -c between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain -c the new seed x_1, so that subsequent calls to RANDLC using the same -c arguments will generate a continuous sequence. - - implicit none - double precision x, a - integer*8 Lx, La, a1, a2, x1, x2, xa - double precision d2m46 - parameter(d2m46=0.5d0**46) - - Lx = x - La = A - a1 = ibits(La, 23, 23) - a2 = ibits(La, 0, 23) - x1 = ibits(Lx, 23, 23) - x2 = ibits(Lx, 0, 23) - xa = ishft(ibits(a1*x2+a2*x1, 0, 23), 23) + a2*x2 - Lx = ibits(xa,0, 46) - x = dble(Lx) - randlc = d2m46*x - return - end - - -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - - - SUBROUTINE VRANLC (N, X, A, Y) - implicit none - integer n, i - double precision x, a, y(*) - integer*8 Lx, La, a1, a2, x1, x2, xa - double precision d2m46 - parameter(d2m46=0.5d0**46) - - Lx = X - La = A - a1 = ibits(La, 23, 23) - a2 = ibits(La, 0, 23) - do i = 1, N - x1 = ibits(Lx, 23, 23) - x2 = ibits(Lx, 0, 23) - xa = ishft(ibits(a1*x2+a2*x1, 0, 23), 23) + a2*x2 - Lx = ibits(xa,0, 46) - y(i) = d2m46*dble(Lx) - end do - x = dble(Lx) - return - end - diff --git a/examples/smpi/NAS/common/timers.f b/examples/smpi/NAS/common/timers.f deleted file mode 100644 index 7a19ccf56f..0000000000 --- a/examples/smpi/NAS/common/timers.f +++ /dev/null @@ -1,78 +0,0 @@ -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - - subroutine timer_clear(n) - -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - - implicit none - integer n - - double precision start(64), elapsed(64) - common /tt/ start, elapsed - - elapsed(n) = 0.0 - return - end - - -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - - subroutine timer_start(n) - -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - - implicit none - integer n - include 'mpif.h' - double precision start(64), elapsed(64) - common /tt/ start, elapsed - - start(n) = MPI_Wtime() - - return - end - - -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - - subroutine timer_stop(n) - -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - - implicit none - integer n - include 'mpif.h' - double precision start(64), elapsed(64) - common /tt/ start, elapsed - double precision t, now - now = MPI_Wtime() - t = now - start(n) - elapsed(n) = elapsed(n) + t - - return - end - - -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - - double precision function timer_read(n) - -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - - implicit none - integer n - double precision start(64), elapsed(64) - common /tt/ start, elapsed - - timer_read = elapsed(n) - return - end - diff --git a/examples/smpi/NAS/config/make.dummy b/examples/smpi/NAS/config/make.dummy deleted file mode 100644 index 16b2350667..0000000000 --- a/examples/smpi/NAS/config/make.dummy +++ /dev/null @@ -1,7 +0,0 @@ -FMPI_LIB = -L../MPI_dummy -lmpi -FMPI_INC = -I../MPI_dummy -CMPI_LIB = -L../MPI_dummy -lmpi -CMPI_INC = -I../MPI_dummy -default:: ${PROGRAM} libmpi.a -libmpi.a: - cd ../MPI_dummy; $(MAKE) F77=$(MPIF77) CC=$(MPICC) -- 2.20.1