From: Frederic Suter Date: Mon, 1 Feb 2016 10:45:00 +0000 (+0100) Subject: some cleaning in the NAS examples X-Git-Tag: v3_13~1009 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/202e327eb4f29590a1847fd31db81471021c2a36 some cleaning in the NAS examples --- diff --git a/examples/smpi/NAS/DT-trace/DGraph.c b/examples/smpi/NAS/DT-trace/DGraph.c deleted file mode 100644 index f573786db8..0000000000 --- a/examples/smpi/NAS/DT-trace/DGraph.c +++ /dev/null @@ -1,184 +0,0 @@ -#include -#include -#include - -#include "DGraph.h" - -DGArc *newArc(DGNode *tl,DGNode *hd){ - DGArc *ar=(DGArc *)malloc(sizeof(DGArc)); - ar->tail=tl; - ar->head=hd; - return ar; -} -void arcShow(DGArc *ar){ - DGNode *tl=(DGNode *)ar->tail, - *hd=(DGNode *)ar->head; - fprintf(stderr,"%d. |%s ->%s\n",ar->id,tl->name,hd->name); -} - -DGNode *newNode(char *nm){ - DGNode *nd=(DGNode *)malloc(sizeof(DGNode)); - nd->attribute=0; - nd->color=0; - nd->inDegree=0; - nd->outDegree=0; - nd->maxInDegree=SMALL_BLOCK_SIZE; - nd->maxOutDegree=SMALL_BLOCK_SIZE; - nd->inArc=(DGArc **)malloc(nd->maxInDegree*sizeof(DGArc*)); - nd->outArc=(DGArc **)malloc(nd->maxOutDegree*sizeof(DGArc*)); - nd->name=strdup(nm); - nd->feat=NULL; - return nd; -} -void nodeShow(DGNode* nd){ - fprintf( stderr,"%3d.%s: (%d,%d)\n", - nd->id,nd->name,nd->inDegree,nd->outDegree); -/* - if(nd->verified==1) fprintf(stderr,"%ld.%s\t: usable.",nd->id,nd->name); - else if(nd->verified==0) fprintf(stderr,"%ld.%s\t: unusable.",nd->id,nd->name); - else fprintf(stderr,"%ld.%s\t: notverified.",nd->id,nd->name); -*/ -} - -DGraph* newDGraph(char* nm){ - DGraph *dg=(DGraph *)malloc(sizeof(DGraph)); - dg->numNodes=0; - dg->numArcs=0; - dg->maxNodes=BLOCK_SIZE; - dg->maxArcs=BLOCK_SIZE; - dg->node=(DGNode **)malloc(dg->maxNodes*sizeof(DGNode*)); - dg->arc=(DGArc **)malloc(dg->maxArcs*sizeof(DGArc*)); - dg->name=strdup(nm); - return dg; -} -int AttachNode(DGraph* dg, DGNode* nd) { - int i=0,j,len=0; - DGNode **nds =NULL, *tmpnd=NULL; - DGArc **ar=NULL; - - if (dg->numNodes == dg->maxNodes-1 ) { - dg->maxNodes += BLOCK_SIZE; - nds =(DGNode **) calloc(dg->maxNodes,sizeof(DGNode*)); - memcpy(nds,dg->node,(dg->maxNodes-BLOCK_SIZE)*sizeof(DGNode*)); - free(dg->node); - dg->node=nds; - } - - len = strlen( nd->name); - for (i = 0; i < dg->numNodes; i++) { - tmpnd =dg->node[ i]; - ar=NULL; - if ( strlen( tmpnd->name) != len ) continue; - if ( strncmp( nd->name, tmpnd->name, len) ) continue; - if ( nd->inDegree > 0 ) { - tmpnd->maxInDegree += nd->maxInDegree; - ar =(DGArc **) calloc(tmpnd->maxInDegree,sizeof(DGArc*)); - memcpy(ar,tmpnd->inArc,(tmpnd->inDegree)*sizeof(DGArc*)); - free(tmpnd->inArc); - tmpnd->inArc=ar; - for (j = 0; j < nd->inDegree; j++ ) { - nd->inArc[ j]->head = tmpnd; - } - memcpy( &(tmpnd->inArc[ tmpnd->inDegree]), nd->inArc, nd->inDegree*sizeof( DGArc *)); - tmpnd->inDegree += nd->inDegree; - } - if ( nd->outDegree > 0 ) { - tmpnd->maxOutDegree += nd->maxOutDegree; - ar =(DGArc **) calloc(tmpnd->maxOutDegree,sizeof(DGArc*)); - memcpy(ar,tmpnd->outArc,(tmpnd->outDegree)*sizeof(DGArc*)); - free(tmpnd->outArc); - tmpnd->outArc=ar; - for (j = 0; j < nd->outDegree; j++ ) { - nd->outArc[ j]->tail = tmpnd; - } - memcpy( &(tmpnd->outArc[tmpnd->outDegree]),nd->outArc,nd->outDegree*sizeof( DGArc *)); - tmpnd->outDegree += nd->outDegree; - } - free(nd); - return i; - } - nd->id = dg->numNodes; - dg->node[dg->numNodes] = nd; - dg->numNodes++; -return nd->id; -} -int AttachArc(DGraph *dg,DGArc* nar){ -int arcId = -1; -int i=0,newNumber=0; -DGNode *head = nar->head, - *tail = nar->tail; -DGArc **ars=NULL,*probe=NULL; -/*fprintf(stderr,"AttachArc %ld\n",dg->numArcs); */ - if ( !tail || !head ) return arcId; - if ( dg->numArcs == dg->maxArcs-1 ) { - dg->maxArcs += BLOCK_SIZE; - ars =(DGArc **) calloc(dg->maxArcs,sizeof(DGArc*)); - memcpy(ars,dg->arc,(dg->maxArcs-BLOCK_SIZE)*sizeof(DGArc*)); - free(dg->arc); - dg->arc=ars; - } - for(i = 0; i < tail->outDegree; i++ ) { /* parallel arc */ - probe = tail->outArc[ i]; - if(probe->head == head - && - probe->length == nar->length - ){ - free(nar); - return probe->id; - } - } - - nar->id = dg->numArcs; - arcId=dg->numArcs; - dg->arc[dg->numArcs] = nar; - dg->numArcs++; - - head->inArc[ head->inDegree] = nar; - head->inDegree++; - if ( head->inDegree >= head->maxInDegree ) { - newNumber = head->maxInDegree + SMALL_BLOCK_SIZE; - ars =(DGArc **) calloc(newNumber,sizeof(DGArc*)); - memcpy(ars,head->inArc,(head->inDegree)*sizeof(DGArc*)); - free(head->inArc); - head->inArc=ars; - head->maxInDegree = newNumber; - } - tail->outArc[ tail->outDegree] = nar; - tail->outDegree++; - if(tail->outDegree >= tail->maxOutDegree ) { - newNumber = tail->maxOutDegree + SMALL_BLOCK_SIZE; - ars =(DGArc **) calloc(newNumber,sizeof(DGArc*)); - memcpy(ars,tail->outArc,(tail->outDegree)*sizeof(DGArc*)); - free(tail->outArc); - tail->outArc=ars; - tail->maxOutDegree = newNumber; - } -/*fprintf(stderr,"AttachArc: head->in=%d tail->out=%ld\n",head->inDegree,tail->outDegree);*/ -return arcId; -} -void graphShow(DGraph *dg,int DetailsLevel){ - int i=0,j=0; - fprintf(stderr,"%d.%s: (%d,%d)\n",dg->id,dg->name,dg->numNodes,dg->numArcs); - if ( DetailsLevel < 1) return; - for (i = 0; i < dg->numNodes; i++ ) { - DGNode *focusNode = dg->node[ i]; - if(DetailsLevel >= 2) { - for (j = 0; j < focusNode->inDegree; j++ ) { - fprintf(stderr,"\t "); - nodeShow(focusNode->inArc[ j]->tail); - } - } - nodeShow(focusNode); - if ( DetailsLevel < 2) continue; - for (j = 0; j < focusNode->outDegree; j++ ) { - fprintf(stderr, "\t "); - nodeShow(focusNode->outArc[ j]->head); - } - fprintf(stderr, "---\n"); - } - fprintf(stderr,"----------------------------------------\n"); - if ( DetailsLevel < 3) return; -} - - - diff --git a/examples/smpi/NAS/DT-trace/DGraph.h b/examples/smpi/NAS/DT-trace/DGraph.h deleted file mode 100644 index f38f898b24..0000000000 --- a/examples/smpi/NAS/DT-trace/DGraph.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef _DGRAPH -#define _DGRAPH - -#define BLOCK_SIZE 128 -#define SMALL_BLOCK_SIZE 32 - -typedef struct{ - int id; - void *tail,*head; - int length,width,attribute,maxWidth; -}DGArc; - -typedef struct{ - int maxInDegree,maxOutDegree; - int inDegree,outDegree; - int id; - char *name; - DGArc **inArc,**outArc; - int depth,height,width; - int color,attribute,address,verified; - void *feat; -}DGNode; - -typedef struct{ - int maxNodes,maxArcs; - int id; - char *name; - int numNodes,numArcs; - DGNode **node; - DGArc **arc; -} DGraph; - -DGArc *newArc(DGNode *tl,DGNode *hd); -void arcShow(DGArc *ar); -DGNode *newNode(char *nm); -void nodeShow(DGNode* nd); - -DGraph* newDGraph(char *nm); -int AttachNode(DGraph *dg,DGNode *nd); -int AttachArc(DGraph *dg,DGArc* nar); -void graphShow(DGraph *dg,int DetailsLevel); - -#endif diff --git a/examples/smpi/NAS/DT-trace/Makefile b/examples/smpi/NAS/DT-trace/Makefile deleted file mode 100644 index c727c75051..0000000000 --- a/examples/smpi/NAS/DT-trace/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -SHELL=/bin/sh -BENCHMARK=dt -BENCHMARKU=DT - -include ../config/make.def - -include ../sys/make.common -#Override PROGRAM -DTPROGRAM = $(BINDIR)/$(BENCHMARK)-trace.$(CLASS) - -OBJS = dt.o DGraph.o \ - ${COMMON}/c_print_results.o ${COMMON}/c_timers.o ${COMMON}/c_randdp.o - - -${PROGRAM}: config ${OBJS} - ${CLINK} ${CLINKFLAGS} -o ${DTPROGRAM} ${OBJS} ${CMPI_LIB} - -.c.o: - ${CCOMPILE} $< - -dt.o: dt.c npbparams.h -DGraph.o: DGraph.c DGraph.h - -clean: - - rm -f *.o *~ mputil* - - rm -f dt npbparams.h core diff --git a/examples/smpi/NAS/DT-trace/README b/examples/smpi/NAS/DT-trace/README deleted file mode 100644 index 873e3ae6f2..0000000000 --- a/examples/smpi/NAS/DT-trace/README +++ /dev/null @@ -1,22 +0,0 @@ -Data Traffic benchmark DT is new in the NPB suite -(released as part of NPB3.x-MPI package). ----------------------------------------------------- - -DT is written in C and same executable can run on any number of processors, -provided this number is not less than the number of nodes in the communication -graph. DT benchmark takes one argument: BH, WH, or SH. This argument -specifies the communication graph Black Hole, White Hole, or SHuffle -respectively. The current release contains verification numbers for -CLASSES S, W, A, and B only. Classes C and D are defined, but verification -numbers are not provided in this release. - -The following table summarizes the number of nodes in the communication -graph based on CLASS and graph TYPE. - -CLASS N_Source N_Nodes(BH,WH) N_Nodes(SH) - S 4 5 12 - W 8 11 32 - A 16 21 80 - B 32 43 192 - C 64 85 448 - D 128 171 1024 diff --git a/examples/smpi/NAS/DT-trace/dt.c b/examples/smpi/NAS/DT-trace/dt.c deleted file mode 100644 index 1af5ccead4..0000000000 --- a/examples/smpi/NAS/DT-trace/dt.c +++ /dev/null @@ -1,777 +0,0 @@ -/************************************************************************* - * * - * N A S P A R A L L E L B E N C H M A R K S 3.3 * - * * - * D T * - * * - ************************************************************************* - * * - * This benchmark is part of the NAS Parallel Benchmark 3.3 suite. * - * * - * Permission to use, copy, distribute and modify this software * - * for any purpose with or without fee is hereby granted. We * - * request, however, that all derived work reference the NAS * - * Parallel Benchmarks 3.3. This software is provided "as is" * - * without express or implied warranty. * - * * - * Information on NPB 3.3, including the technical report, the * - * original specifications, source code, results and information * - * on how to submit new results, is available at: * - * * - * http: www.nas.nasa.gov/Software/NPB * - * * - * Send comments or suggestions to npb@nas.nasa.gov * - * Send bug reports to npb-bugs@nas.nasa.gov * - * * - * NAS Parallel Benchmarks Group * - * NASA Ames Research Center * - * Mail Stop: T27A-1 * - * Moffett Field, CA 94035-1000 * - * * - * E-mail: npb@nas.nasa.gov * - * Fax: (650) 604-3957 * - * * - ************************************************************************* - * * - * Author: M. Frumkin * * - * * - *************************************************************************/ - -#include -#include -#include - -#include "mpi.h" -#include "npbparams.h" - -#include "simgrid/instr.h" //TRACE_ - -#ifndef CLASS -#define CLASS 'S' -#define NUM_PROCS 1 -#endif - -//int passed_verification; -extern double randlc( double *X, double *A ); -extern -void c_print_results( char *name, - char class, - int n1, - int n2, - int n3, - int niter, - int nprocs_compiled, - int nprocs_total, - double t, - double mops, - char *optype, - int passed_verification, - char *npbversion, - char *compiletime, - char *mpicc, - char *clink, - char *cmpi_lib, - char *cmpi_inc, - char *cflags, - char *clinkflags ); - -void timer_clear( int n ); -void timer_start( int n ); -void timer_stop( int n ); -double timer_read( int n ); -int timer_on=0,timers_tot=64; - -int verify(char *bmname,double rnm2){ - double verify_value=0.0; - double epsilon=1.0E-8; - char cls=CLASS; - int verified=-1; - if (cls != 'U') { - if(cls=='S') { - if(strstr(bmname,"BH")){ - verify_value=30892725.0; - }else if(strstr(bmname,"WH")){ - verify_value=67349758.0; - }else if(strstr(bmname,"SH")){ - verify_value=58875767.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - } - verified = 0; - }else if(cls=='W') { - if(strstr(bmname,"BH")){ - verify_value = 4102461.0; - }else if(strstr(bmname,"WH")){ - verify_value = 204280762.0; - }else if(strstr(bmname,"SH")){ - verify_value = 186944764.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - } - verified = 0; - }else if(cls=='A') { - if(strstr(bmname,"BH")){ - verify_value = 17809491.0; - }else if(strstr(bmname,"WH")){ - verify_value = 1289925229.0; - }else if(strstr(bmname,"SH")){ - verify_value = 610856482.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - } - verified = 0; - }else if(cls=='B') { - if(strstr(bmname,"BH")){ - verify_value = 4317114.0; - }else if(strstr(bmname,"WH")){ - verify_value = 7877279917.0; - }else if(strstr(bmname,"SH")){ - verify_value = 1836863082.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - verified = 0; - } - }else if(cls=='C') { - if(strstr(bmname,"BH")){ - verify_value = 0.0; - }else if(strstr(bmname,"WH")){ - verify_value = 0.0; - }else if(strstr(bmname,"SH")){ - verify_value = 0.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - verified = -1; - } - }else if(cls=='D') { - if(strstr(bmname,"BH")){ - verify_value = 0.0; - }else if(strstr(bmname,"WH")){ - verify_value = 0.0; - }else if(strstr(bmname,"SH")){ - verify_value = 0.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - } - verified = -1; - }else{ - fprintf(stderr,"No such class as %c.\n",cls); - } - fprintf(stderr," %s L2 Norm = %f\n",bmname,rnm2); - if(verified==-1){ - fprintf(stderr," No verification was performed.\n"); - }else if( rnm2 - verify_value < epsilon && - rnm2 - verify_value > -epsilon) { /* abs here does not work on ALTIX */ - verified = 1; - fprintf(stderr," Deviation = %f\n",(rnm2 - verify_value)); - }else{ - verified = 0; - fprintf(stderr," The correct verification value = %f\n",verify_value); - fprintf(stderr," Got value = %f\n",rnm2); - } - }else{ - verified = -1; - } - return verified; - } - -int ipowMod(int a,long long int n,int md){ - int seed=1,q=a,r=1; - if(n<0){ - fprintf(stderr,"ipowMod: exponent must be nonnegative exp=%lld\n",n); - n=-n; /* temp fix */ -/* return 1; */ - } - if(md<=0){ - fprintf(stderr,"ipowMod: module must be positive mod=%d",md); - return 1; - } - if(n==0) return 1; - while(n>1){ - int n2 = n/2; - if (n2*2==n){ - seed = (q*q)%md; - q=seed; - n = n2; - }else{ - seed = (r*q)%md; - r=seed; - n = n-1; - } - } - seed = (r*q)%md; - return seed; -} - -#include "DGraph.h" -DGraph *buildSH(char cls){ -/* - Nodes of the graph must be topologically sorted - to avoid MPI deadlock. -*/ - DGraph *dg; - int numSources=NUM_SOURCES; /* must be power of 2 */ - int numOfLayers=0,tmpS=numSources>>1; - int firstLayerNode=0; - DGArc *ar=NULL; - DGNode *nd=NULL; - int mask=0x0,ndid=0,ndoff=0; - int i=0,j=0; - char nm[BLOCK_SIZE]; - - sprintf(nm,"DT_SH.%c",cls); - dg=newDGraph(nm); - - while(tmpS>1){ - numOfLayers++; - tmpS>>=1; - } - for(i=0;inode[ndid],nd); - AttachArc(dg,ar); - ndoff+=mask; - ndid=firstLayerNode+ndoff; - ar=newArc(dg->node[ndid],nd); - AttachArc(dg,ar); - } - firstLayerNode+=numSources; - } - mask=0x00000001<node[ndid],nd); - AttachArc(dg,ar); - ndoff+=mask; - ndid=firstLayerNode+ndoff; - ar=newArc(dg->node[ndid],nd); - AttachArc(dg,ar); - } -return dg; -} -DGraph *buildWH(char cls){ -/* - Nodes of the graph must be topologically sorted - to avoid MPI deadlock. -*/ - int i=0,j=0; - int numSources=NUM_SOURCES,maxInDeg=4; - int numLayerNodes=numSources,firstLayerNode=0; - int totComparators=0; - int numPrevLayerNodes=numLayerNodes; - int id=0,sid=0; - DGraph *dg; - DGNode *nd=NULL,*source=NULL,*tmp=NULL,*snd=NULL; - DGArc *ar=NULL; - char nm[BLOCK_SIZE]; - - sprintf(nm,"DT_WH.%c",cls); - dg=newDGraph(nm); - - for(i=0;imaxInDeg){ - numLayerNodes=numLayerNodes/maxInDeg; - if(numLayerNodes*maxInDeg=numPrevLayerNodes) break; - snd=dg->node[firstLayerNode+sid]; - ar=newArc(dg->node[id],snd); - AttachArc(dg,ar); - } - } - firstLayerNode+=numPrevLayerNodes; - numPrevLayerNodes=numLayerNodes; - } - source=newNode("Source"); - AttachNode(dg,source); - for(i=0;inode[firstLayerNode+i]; - ar=newArc(source,nd); - AttachArc(dg,ar); - } - - for(i=0;inumNodes/2;i++){ /* Topological sorting */ - tmp=dg->node[i]; - dg->node[i]=dg->node[dg->numNodes-1-i]; - dg->node[i]->id=i; - dg->node[dg->numNodes-1-i]=tmp; - dg->node[dg->numNodes-1-i]->id=dg->numNodes-1-i; - } -return dg; -} -DGraph *buildBH(char cls){ -/* - Nodes of the graph must be topologically sorted - to avoid MPI deadlock. -*/ - int i=0,j=0; - int numSources=NUM_SOURCES,maxInDeg=4; - int numLayerNodes=numSources,firstLayerNode=0; - DGraph *dg; - DGNode *nd=NULL, *snd=NULL, *sink=NULL; - DGArc *ar=NULL; - int totComparators=0; - int numPrevLayerNodes=numLayerNodes; - int id=0, sid=0; - char nm[BLOCK_SIZE]; - - sprintf(nm,"DT_BH.%c",cls); - dg=newDGraph(nm); - - for(i=0;imaxInDeg){ - numLayerNodes=numLayerNodes/maxInDeg; - if(numLayerNodes*maxInDeg=numPrevLayerNodes) break; - snd=dg->node[firstLayerNode+sid]; - ar=newArc(snd,dg->node[id]); - AttachArc(dg,ar); - } - } - firstLayerNode+=numPrevLayerNodes; - numPrevLayerNodes=numLayerNodes; - } - sink=newNode("Sink"); - AttachNode(dg,sink); - for(i=0;inode[firstLayerNode+i]; - ar=newArc(nd,sink); - AttachArc(dg,ar); - } -return dg; -} - -typedef struct{ - int len; - double* val; -} Arr; -Arr *newArr(int len){ - Arr *arr=(Arr *)SMPI_SHARED_MALLOC(sizeof(Arr)); - arr->len=len; - arr->val=(double *)SMPI_SHARED_MALLOC(len*sizeof(double)); - return arr; -} -void arrShow(Arr* a){ - if(!a) fprintf(stderr,"-- NULL array\n"); - else{ - fprintf(stderr,"-- length=%d\n",a->len); - } -} -double CheckVal(Arr *feat){ - double csum=0.0; - int i=0; - for(i=0;ilen;i++){ - csum+=feat->val[i]*feat->val[i]/feat->len; /* The truncation does not work since - result will be 0 for large len */ - } - return csum; -} -int GetFNumDPar(int* mean, int* stdev){ - *mean=NUM_SAMPLES; - *stdev=STD_DEVIATION; - return 0; -} -int GetFeatureNum(char *mbname,int id){ - double tran=314159265.0; - double A=2*id+1; - double denom=randlc(&tran,&A); - char cval='S'; - int mean=NUM_SAMPLES,stdev=128; - int rtfs=0,len=0; - GetFNumDPar(&mean,&stdev); - rtfs=ipowMod((int)(1/denom)*(int)cval,(long long int) (2*id+1),2*stdev); - if(rtfs<0) rtfs=-rtfs; - len=mean-stdev+rtfs; - return len; -} -Arr* RandomFeatures(char *bmname,int fdim,int id){ - int len=GetFeatureNum(bmname,id)*fdim; - Arr* feat=newArr(len); - int nxg=2,nyg=2,nzg=2,nfg=5; - int nx=421,ny=419,nz=1427,nf=3527; - long long int expon=(len*(id+1))%3141592; - int seedx=ipowMod(nxg,expon,nx), - seedy=ipowMod(nyg,expon,ny), - seedz=ipowMod(nzg,expon,nz), - seedf=ipowMod(nfg,expon,nf); - int i=0; - if(timer_on){ - timer_clear(id+1); - timer_start(id+1); - } - for(i=0;ival[i]=seedx; - feat->val[i+1]=seedy; - feat->val[i+2]=seedz; - feat->val[i+3]=seedf; - } - if(timer_on){ - timer_stop(id+1); - fprintf(stderr,"** RandomFeatures time in node %d = %f\n",id,timer_read(id+1)); - } - return feat; -} -void Resample(Arr *a,int blen){ - long long int i=0,j=0,jlo=0,jhi=0; - double avval=0.0; - double *nval=(double *)SMPI_SHARED_MALLOC(blen*sizeof(double)); - Arr *tmp=newArr(10); - for(i=0;ilen-1;i++){ - jlo=(int)(0.5*(2*i-1)*(blen/a->len)); - jhi=(int)(0.5*(2*i+1)*(blen/a->len)); - - avval=a->val[i]/(jhi-jlo+1); - for(j=jlo;j<=jhi;j++){ - nval[j]+=avval; - } - } - nval[0]=a->val[0]; - nval[blen-1]=a->val[a->len-1]; - SMPI_SHARED_FREE(a->val); - a->val=nval; - a->len=blen; -} -#define fielddim 4 -Arr* WindowFilter(Arr *a, Arr* b,int w){ - int i=0,j=0,k=0; - double rms0=0.0,rms1=0.0,rmsm1=0.0; - double weight=((double) (w+1))/(w+2); - - w+=1; - if(timer_on){ - timer_clear(w); - timer_start(w); - } - if(a->lenlen) Resample(a,b->len); - if(a->len>b->len) Resample(b,a->len); - for(i=fielddim;ilen-fielddim;i+=fielddim){ - rms0=(a->val[i]-b->val[i])*(a->val[i]-b->val[i]) - +(a->val[i+1]-b->val[i+1])*(a->val[i+1]-b->val[i+1]) - +(a->val[i+2]-b->val[i+2])*(a->val[i+2]-b->val[i+2]) - +(a->val[i+3]-b->val[i+3])*(a->val[i+3]-b->val[i+3]); - j=i+fielddim; - rms1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j]) - +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1]) - +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2]) - +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]); - j=i-fielddim; - rmsm1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j]) - +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1]) - +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2]) - +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]); - k=0; - if(rms1val[i]=weight*b->val[i]; - a->val[i+1]=weight*b->val[i+1]; - a->val[i+2]=weight*b->val[i+2]; - a->val[i+3]=weight*b->val[i+3]; - }else if(k==1){ - j=i+fielddim; - a->val[i]=weight*b->val[j]; - a->val[i+1]=weight*b->val[j+1]; - a->val[i+2]=weight*b->val[j+2]; - a->val[i+3]=weight*b->val[j+3]; - }else { /*if(k==-1)*/ - j=i-fielddim; - a->val[i]=weight*b->val[j]; - a->val[i+1]=weight*b->val[j+1]; - a->val[i+2]=weight*b->val[j+2]; - a->val[i+3]=weight*b->val[j+3]; - } - } - if(timer_on){ - timer_stop(w); - fprintf(stderr,"** WindowFilter time in node %d = %f\n",(w-1),timer_read(w)); - } - return a; -} - -int SendResults(DGraph *dg,DGNode *nd,Arr *feat){ - int i=0,tag=0; - DGArc *ar=NULL; - DGNode *head=NULL; - if(!feat) return 0; - TRACE_smpi_set_category ("SendResults"); - for(i=0;ioutDegree;i++){ - ar=nd->outArc[i]; - if(ar->tail!=nd) continue; - head=ar->head; - tag=ar->id; - if(head->address!=nd->address){ - MPI_Send(&feat->len,1,MPI_INT,head->address,tag,MPI_COMM_WORLD); - MPI_Send(feat->val,feat->len,MPI_DOUBLE,head->address,tag,MPI_COMM_WORLD); - } - } - TRACE_smpi_set_category (NULL); - return 1; -} -Arr* CombineStreams(DGraph *dg,DGNode *nd){ - Arr *resfeat=newArr(NUM_SAMPLES*fielddim); - int i=0,len=0,tag=0; - DGArc *ar=NULL; - DGNode *tail=NULL; - MPI_Status status; - Arr *feat=NULL,*featp=NULL; - - if(nd->inDegree==0) return NULL; - for(i=0;iinDegree;i++){ - ar=nd->inArc[i]; - if(ar->head!=nd) continue; - tail=ar->tail; - if(tail->address!=nd->address){ - len=0; - tag=ar->id; - MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status); - feat=newArr(len); - MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status); - resfeat=WindowFilter(resfeat,feat,nd->id); - SMPI_SHARED_FREE(feat); - }else{ - featp=(Arr *)tail->feat; - feat=newArr(featp->len); - memcpy(feat->val,featp->val,featp->len*sizeof(double)); - resfeat=WindowFilter(resfeat,feat,nd->id); - SMPI_SHARED_FREE(feat); - } - } - for(i=0;ilen;i++) resfeat->val[i]=((int)resfeat->val[i])/nd->inDegree; - nd->feat=resfeat; - return nd->feat; -} -double Reduce(Arr *a,int w){ - double retv=0.0; - if(timer_on){ - timer_clear(w); - timer_start(w); - } - retv=(int)(w*CheckVal(a));/* The casting needed for node - and array dependent verifcation */ - if(timer_on){ - timer_stop(w); - fprintf(stderr,"** Reduce time in node %d = %f\n",(w-1),timer_read(w)); - } - return retv; -} - -double ReduceStreams(DGraph *dg,DGNode *nd){ - double csum=0.0; - int i=0,len=0,tag=0; - DGArc *ar=NULL; - DGNode *tail=NULL; - Arr *feat=NULL; - double retv=0.0; - - TRACE_smpi_set_category ("ReduceStreams"); - - for(i=0;iinDegree;i++){ - ar=nd->inArc[i]; - if(ar->head!=nd) continue; - tail=ar->tail; - if(tail->address!=nd->address){ - MPI_Status status; - len=0; - tag=ar->id; - MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status); - feat=newArr(len); - MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status); - csum+=Reduce(feat,(nd->id+1)); - SMPI_SHARED_FREE(feat); - }else{ - csum+=Reduce(tail->feat,(nd->id+1)); - } - } - if(nd->inDegree>0)csum=(((long long int)csum)/nd->inDegree); - retv=(nd->id+1)*csum; - return retv; -} - -int ProcessNodes(DGraph *dg,int me){ - double chksum=0.0; - Arr *feat=NULL; - int i=0,verified=0,tag; - DGNode *nd=NULL; - double rchksum=0.0; - MPI_Status status; - - TRACE_smpi_set_category ("ProcessNodes"); - - - for(i=0;inumNodes;i++){ - nd=dg->node[i]; - if(nd->address!=me) continue; - if(strstr(nd->name,"Source")){ - nd->feat=RandomFeatures(dg->name,fielddim,nd->id); - SendResults(dg,nd,nd->feat); - }else if(strstr(nd->name,"Sink")){ - chksum=ReduceStreams(dg,nd); - tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */ - MPI_Send(&chksum,1,MPI_DOUBLE,0,tag,MPI_COMM_WORLD); - }else{ - feat=CombineStreams(dg,nd); - SendResults(dg,nd,feat); - } - } - - TRACE_smpi_set_category ("ProcessNodes"); - - - if(me==0){ /* Report node */ - rchksum=0.0; - chksum=0.0; - for(i=0;inumNodes;i++){ - nd=dg->node[i]; - if(!strstr(nd->name,"Sink")) continue; - tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */ - MPI_Recv(&rchksum,1,MPI_DOUBLE,nd->address,tag,MPI_COMM_WORLD,&status); - chksum+=rchksum; - } - verified=verify(dg->name,chksum); - } -return verified; -} - -int main(int argc,char **argv ){ - int my_rank,comm_size; - int i; - DGraph *dg=NULL; - int verified=0, featnum=0; - double bytes_sent=2.0,tot_time=0.0; - - - - MPI_Init( &argc, &argv ); - MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); - MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); - TRACE_smpi_set_category ("begin"); - - if(argc!=2|| - ( strncmp(argv[1],"BH",2)!=0 - &&strncmp(argv[1],"WH",2)!=0 - &&strncmp(argv[1],"SH",2)!=0 - ) - ){ - if(my_rank==0){ - fprintf(stderr,"** Usage: mpirun -np N ../bin/dt.S GraphName\n"); - fprintf(stderr,"** Where \n - N is integer number of MPI processes\n"); - fprintf(stderr," - S is the class S, W, or A \n"); - fprintf(stderr," - GraphName is the communication graph name BH, WH, or SH.\n"); - fprintf(stderr," - the number of MPI processes N should not be be less than \n"); - fprintf(stderr," the number of nodes in the graph\n"); - } - MPI_Finalize(); - exit(0); - } - if(strncmp(argv[1],"BH",2)==0){ - dg=buildBH(CLASS); - }else if(strncmp(argv[1],"WH",2)==0){ - dg=buildWH(CLASS); - }else if(strncmp(argv[1],"SH",2)==0){ - dg=buildSH(CLASS); - } - - if(timer_on&&dg->numNodes+1>timers_tot){ - timer_on=0; - if(my_rank==0) - fprintf(stderr,"Not enough timers. Node timeing is off. \n"); - } - if(dg->numNodes>comm_size){ - if(my_rank==0){ - fprintf(stderr,"** The number of MPI processes should not be less than \n"); - fprintf(stderr,"** the number of nodes in the graph\n"); - fprintf(stderr,"** Number of MPI processes = %d\n",comm_size); - fprintf(stderr,"** Number nodes in the graph = %d\n",dg->numNodes); - } - MPI_Finalize(); - exit(0); - } - for(i=0;inumNodes;i++){ - dg->node[i]->address=i; - } - if( my_rank == 0 ){ - printf( "\n\n NAS Parallel Benchmarks 3.3 -- DT Benchmark\n\n" ); - graphShow(dg,0); - timer_clear(0); - timer_start(0); - } - - verified=ProcessNodes(dg,my_rank); - TRACE_smpi_set_category ("end"); - - featnum=NUM_SAMPLES*fielddim; - bytes_sent=featnum*dg->numArcs; - bytes_sent/=1048576; - if(my_rank==0){ - timer_stop(0); - tot_time=timer_read(0); - c_print_results( dg->name, - CLASS, - featnum, - 0, - 0, - dg->numNodes, - 0, - comm_size, - tot_time, - bytes_sent/tot_time, - "bytes transmitted", - verified, - NPBVERSION, - COMPILETIME, - MPICC, - CLINK, - CMPI_LIB, - CMPI_INC, - CFLAGS, - CLINKFLAGS ); - } - MPI_Finalize(); - return 1; -} diff --git a/examples/smpi/NAS/DT/dt.c b/examples/smpi/NAS/DT/dt.c index d210b16424..1a4931704a 100644 --- a/examples/smpi/NAS/DT/dt.c +++ b/examples/smpi/NAS/DT/dt.c @@ -44,6 +44,8 @@ #include "mpi.h" #include "npbparams.h" +#include "simgrid/instr.h" //TRACE_ + #ifndef CLASS #define CLASS 'S' #define NUM_PROCS 1 @@ -537,6 +539,7 @@ int SendResults(DGraph *dg,DGNode *nd,Arr *feat){ DGArc *ar=NULL; DGNode *head=NULL; if(!feat) return 0; + TRACE_smpi_set_category ("SendResults"); for(i=0;ioutDegree;i++){ ar=nd->outArc[i]; if(ar->tail!=nd) continue; @@ -547,6 +550,7 @@ int SendResults(DGraph *dg,DGNode *nd,Arr *feat){ MPI_Send(feat->val,feat->len,MPI_DOUBLE,head->address,tag,MPI_COMM_WORLD); } } + TRACE_smpi_set_category (NULL); return 1; } Arr* CombineStreams(DGraph *dg,DGNode *nd){ @@ -605,6 +609,8 @@ double ReduceStreams(DGraph *dg,DGNode *nd){ Arr *feat=NULL; double retv=0.0; + TRACE_smpi_set_category ("ReduceStreams"); + for(i=0;iinDegree;i++){ ar=nd->inArc[i]; if(ar->head!=nd) continue; @@ -635,6 +641,8 @@ int ProcessNodes(DGraph *dg,int me){ double rchksum=0.0; MPI_Status status; + TRACE_smpi_set_category ("ProcessNodes"); + for(i=0;inumNodes;i++){ nd=dg->node[i]; if(nd->address!=me) continue; @@ -650,6 +658,9 @@ int ProcessNodes(DGraph *dg,int me){ SendResults(dg,nd,feat); } } + + TRACE_smpi_set_category ("ProcessNodes"); + if(me==0){ /* Report node */ rchksum=0.0; chksum=0.0; @@ -675,6 +686,7 @@ int main(int argc,char **argv ){ MPI_Init( &argc, &argv ); MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); + TRACE_smpi_set_category ("begin"); if(argc!=2|| ( strncmp(argv[1],"BH",2)!=0 @@ -726,7 +738,8 @@ int main(int argc,char **argv ){ timer_start(0); } verified=ProcessNodes(dg,my_rank); - + TRACE_smpi_set_category ("end"); + featnum=NUM_SAMPLES*fielddim; bytes_sent=featnum*dg->numArcs; bytes_sent/=1048576; diff --git a/examples/smpi/NAS/EP-trace/Makefile b/examples/smpi/NAS/EP-trace/Makefile deleted file mode 100644 index 6194dcfe97..0000000000 --- a/examples/smpi/NAS/EP-trace/Makefile +++ /dev/null @@ -1,28 +0,0 @@ -SHELL=/bin/sh -BENCHMARK=ep -BENCHMARKU=EP - -include ../config/make.def - -#OBJS = ep-trace.o ${COMMON}/print_results.o ${COMMON}/${RAND}.o ${COMMON}/timers.o -OBJS = ep-trace.o randlc.o - -include ../sys/make.common - -${PROGRAM}: config ${OBJS} -# ${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${FMPI_LIB} - ${CLINK} ${CLINKFLAGS} -o ${PROGRAM} ${OBJS} ${CMPI_LIB} - - -#ep-trace.o: ep-trace.f mpinpb.h npbparams.h -# ${FCOMPILE} ep-trace.f - -ep-trace.o: ep-trace.c randlc.c mpinpb.h npbparams.h - ${CCOMPILE} ep-trace.c - -clean: - - rm -f *.o *~ - - rm -f npbparams.h core - - - diff --git a/examples/smpi/NAS/EP-trace/README b/examples/smpi/NAS/EP-trace/README deleted file mode 100644 index 6eb36571af..0000000000 --- a/examples/smpi/NAS/EP-trace/README +++ /dev/null @@ -1,6 +0,0 @@ -This code implements the random-number generator described in the -NAS Parallel Benchmark document RNR Technical Report RNR-94-007. -The code is "embarrassingly" parallel in that no communication is -required for the generation of the random numbers itself. There is -no special requirement on the number of processors used for running -the benchmark. diff --git a/examples/smpi/NAS/EP-trace/ep-trace.c b/examples/smpi/NAS/EP-trace/ep-trace.c deleted file mode 100644 index 4f606a0e39..0000000000 --- a/examples/smpi/NAS/EP-trace/ep-trace.c +++ /dev/null @@ -1,449 +0,0 @@ -#include -#include -#include -#include - -#include "mpi.h" -#include "npbparams.h" - -#include "simgrid/instr.h" //TRACE_ - -#include "randlc.h" - -#ifndef CLASS -#define CLASS 'S' -#define NUM_PROCS 1 -#endif -#define true 1 -#define false 0 - - -//---NOTE : all the timers function have been modified to -// avoid global timers (privatize these). - // ----------------------- timers --------------------- - void timer_clear(double *onetimer) { - //elapsed[n] = 0.0; - *onetimer = 0.0; - } - - void timer_start(double *onetimer) { - *onetimer = MPI_Wtime(); - } - - void timer_stop(int n,double *elapsed,double *start) { - double t, now; - - now = MPI_Wtime(); - t = now - start[n]; - elapsed[n] += t; - } - - double timer_read(int n, double *elapsed) { /* ok, useless, but jsut to keep function call */ - return(elapsed[n]); - } - /******************************************************************** - ***************** V R A N L C ****************** - ***************** *****************/ - double vranlc(int n, double x, double a, double *y) - { - int i; - long i246m1=0x00003FFFFFFFFFFF; - long LLx, Lx, La; - double d2m46; - -// This doesn't work, because the compiler does the calculation in 32 -// bits and overflows. No standard way (without f90 stuff) to specify -// that the rhs should be done in 64 bit arithmetic. -// parameter(i246m1=2**46-1) - - d2m46=pow(0.5,46); - -// c Note that the v6 compiler on an R8000 does something stupid with -// c the above. Using the following instead (or various other things) -// c makes the calculation run almost 10 times as fast. -// -// c save d2m46 -// c data d2m46/0.0d0/ -// c if (d2m46 .eq. 0.0d0) then -// c d2m46 = 0.5d0**46 -// c endif - - Lx = (long)x; - La = (long)a; - //fprintf(stdout,("================== Vranlc ================"); - //fprintf(stdout,("Before Loop: Lx = " + Lx + ", La = " + La); - LLx = Lx; - for (i=0; i< n; i++) { - Lx = Lx*La & i246m1 ; - LLx = Lx; - y[i] = d2m46 * (double)LLx; - /* - if(i == 0) { - fprintf(stdout,("After loop 0:"); - fprintf(stdout,("Lx = " + Lx + ", La = " + La); - fprintf(stdout,("d2m46 = " + d2m46); - fprintf(stdout,("LLX(Lx) = " + LLX.doubleValue()); - fprintf(stdout,("Y[0]" + y[0]); - } - */ - } - - x = (double)LLx; - /* - fprintf(stdout,("Change: Lx = " + Lx); - fprintf(stdout,("=============End Vranlc ================"); - */ - return x; - } - - - -//-------------- the core (unique function) ----------- - void doTest(int argc, char **argv) { - double dum[3] = {1.,1.,1.}; - double x1, x2, sx, sy, tm, an, tt, gc; - double Mops; - double epsilon=1.0E-8, a = 1220703125., s=271828183.; - double t1, t2, t3, t4; - double sx_verify_value, sy_verify_value, sx_err, sy_err; - -#include "npbparams.h" - int mk=16, - // --> set by make : in npbparams.h - //m=28, // for CLASS=A - //m=30, // for CLASS=B - //npm=2, // NPROCS - mm = m-mk, - nn = (int)(pow(2,mm)), - nk = (int)(pow(2,mk)), - nq=10, - np, - node, - no_nodes, - i, - ik, - kk, - l, - k, nit, no_large_nodes, - np_add, k_offset, j; - int me, nprocs, root=0, dp_type; - int verified, - timers_enabled=true; - char size[500]; // mind the size of the string to represent a big number - - //Use in randlc.. - int KS = 0; - double R23, R46, T23, T46; - - double *qq = (double *) malloc (10000*sizeof(double)); - double *start = (double *) malloc (64*sizeof(double)); - double *elapsed = (double *) malloc (64*sizeof(double)); - - double *x = (double *) malloc (2*nk*sizeof(double)); - double *q = (double *) malloc (nq*sizeof(double)); - - TRACE_smpi_set_category ("start"); - - MPI_Init( &argc, &argv ); - MPI_Comm_size( MPI_COMM_WORLD, &no_nodes); - MPI_Comm_rank( MPI_COMM_WORLD, &node); - -#ifdef USE_MPE - MPE_Init_log(); -#endif - root = 0; - if (node == root ) { - - /* Because the size of the problem is too large to store in a 32-bit - * integer for some classes, we put it into a string (for printing). - * Have to strip off the decimal point put in there by the floating - * point print statement (internal file) - */ - fprintf(stdout," NAS Parallel Benchmarks 3.2 -- EP Benchmark"); - sprintf(size,"%d",pow(2,m+1)); - //size = size.replace('.', ' '); - fprintf(stdout," Number of random numbers generated: %s\n",size); - fprintf(stdout," Number of active processes: %d\n",no_nodes); - - } - verified = false; - - /* c Compute the number of "batches" of random number pairs generated - c per processor. Adjust if the number of processors does not evenly - c divide the total number -*/ - - np = nn / no_nodes; - no_large_nodes = nn % no_nodes; - if (node < no_large_nodes) np_add = 1; - else np_add = 0; - np = np + np_add; - - if (np == 0) { - fprintf(stdout,"Too many nodes: %d %d",no_nodes,nn); - MPI_Abort(MPI_COMM_WORLD,1); - exit(0); - } - -/* c Call the random number generator functions and initialize - c the x-array to reduce the effects of paging on the timings. - c Also, call all mathematical functions that are used. Make - c sure these initializations cannot be eliminated as dead code. -*/ - - //call vranlc(0, dum[1], dum[2], dum[3]); - // Array indexes start at 1 in Fortran, 0 in Java - vranlc(0, dum[0], dum[1], &(dum[2])); - - dum[0] = randlc(&(dum[1]),&(dum[2])); - ///////////////////////////////// - for (i=0;i<2*nk;i++) { - x[i] = -1e99; - } - Mops = log(sqrt(abs(1))); - - /* - c--------------------------------------------------------------------- - c Synchronize before placing time stamp - c--------------------------------------------------------------------- - */ - MPI_Barrier( MPI_COMM_WORLD ); - - - TRACE_smpi_set_category ("ep"); - - timer_clear(&(elapsed[1])); - timer_clear(&(elapsed[2])); - timer_clear(&(elapsed[3])); - timer_start(&(start[1])); - - t1 = a; - //fprintf(stdout,("(ep.f:160) t1 = " + t1); - t1 = vranlc(0, t1, a, x); - //fprintf(stdout,("(ep.f:161) t1 = " + t1); - - -/* c Compute AN = A ^ (2 * NK) (mod 2^46). */ - - t1 = a; - //fprintf(stdout,("(ep.f:165) t1 = " + t1); - for (i=1; i <= mk+1; i++) { - t2 = randlc(&t1, &t1); - //fprintf(stdout,("(ep.f:168)[loop i=" + i +"] t1 = " + t1); - } - an = t1; - //fprintf(stdout,("(ep.f:172) s = " + s); - tt = s; - gc = 0.; - sx = 0.; - sy = 0.; - for (i=0; i < nq ; i++) { - q[i] = 0.; - } - -/* - Each instance of this loop may be performed independently. We compute - the k offsets separately to take into account the fact that some nodes - have more numbers to generate than others -*/ - - if (np_add == 1) - k_offset = node * np -1; - else - k_offset = no_large_nodes*(np+1) + (node-no_large_nodes)*np -1; - - int stop = false; - for(k = 1; k <= np; k++) { - stop = false; - kk = k_offset + k ; - t1 = s; - //fprintf(stdout,("(ep.f:193) t1 = " + t1); - t2 = an; - -// Find starting seed t1 for this kk. - - for (i=1;i<=100 && !stop;i++) { - ik = kk / 2; - //fprintf(stdout,("(ep.f:199) ik = " +ik+", kk = " + kk); - if (2 * ik != kk) { - t3 = randlc(&t1, &t2); - //fprintf(stdout,("(ep.f:200) t1= " +t1 ); - } - if (ik==0) - stop = true; - else { - t3 = randlc(&t2, &t2); - kk = ik; - } - } -// Compute uniform pseudorandom numbers. - - //if (timers_enabled) timer_start(3); - timer_start(&(start[3])); - //call vranlc(2 * nk, t1, a, x) --> t1 and y are modified - - //fprintf(stdout,">>>>>>>>>>>Before vranlc(l.210)<<<<<<<<<<<<<"); - //fprintf(stdout,"2*nk = " + (2*nk)); - //fprintf(stdout,"t1 = " + t1); - //fprintf(stdout,"a = " + a); - //fprintf(stdout,"x[0] = " + x[0]); - //fprintf(stdout,">>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<"); - - t1 = vranlc(2 * nk, t1, a, x); - - //fprintf(stdout,(">>>>>>>>>>>After Enter vranlc (l.210)<<<<<<"); - //fprintf(stdout,("2*nk = " + (2*nk)); - //fprintf(stdout,("t1 = " + t1); - //fprintf(stdout,("a = " + a); - //fprintf(stdout,("x[0] = " + x[0]); - //fprintf(stdout,(">>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<"); - - //if (timers_enabled) timer_stop(3); - timer_stop(3,elapsed,start); - -/* Compute Gaussian deviates by acceptance-rejection method and - * tally counts in concentric square annuli. This loop is not - * vectorizable. - */ - //if (timers_enabled) timer_start(2); - timer_start(&(start[2])); - for(i=1; i<=nk;i++) { - x1 = 2. * x[2*i-2] -1.0; - x2 = 2. * x[2*i-1] - 1.0; - t1 = x1*x1 + x2*x2; - if (t1 <= 1.) { - t2 = sqrt(-2. * log(t1) / t1); - t3 = (x1 * t2); - t4 = (x2 * t2); - l = (int)(abs(t3) > abs(t4) ? abs(t3) : abs(t4)); - q[l] = q[l] + 1.; - sx = sx + t3; - sy = sy + t4; - } - /* - if(i == 1) { - fprintf(stdout,"x1 = " + x1); - fprintf(stdout,"x2 = " + x2); - fprintf(stdout,"t1 = " + t1); - fprintf(stdout,"t2 = " + t2); - fprintf(stdout,"t3 = " + t3); - fprintf(stdout,"t4 = " + t4); - fprintf(stdout,"l = " + l); - fprintf(stdout,"q[l] = " + q[l]); - fprintf(stdout,"sx = " + sx); - fprintf(stdout,"sy = " + sy); - } - */ - } - //if (timers_enabled) timer_stop(2); - timer_stop(2,elapsed,start); - } - - TRACE_smpi_set_category ("finalize"); - - //int MPI_Allreduce(void *sbuf, void *rbuf, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) - MPI_Allreduce(&sx, x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - sx = x[0]; //FIXME : x[0] or x[1] => x[0] because fortran starts with 1 - MPI_Allreduce(&sy, x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - sy = x[0]; - MPI_Allreduce(q, x, nq, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - - for(i = 0; i < nq; i++) { - q[i] = x[i]; - } - for(i = 0; i < nq; i++) { - gc += q[i]; - } - - timer_stop(1,elapsed,start); - tm = timer_read(1,elapsed); - MPI_Allreduce(&tm, x, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); - tm = x[0]; - - if(node == root) { - nit = 0; - verified = true; - - if(m == 24) { - sx_verify_value = -3.247834652034740E3; - sy_verify_value = -6.958407078382297E3; - } else if(m == 25) { - sx_verify_value = -2.863319731645753E3; - sy_verify_value = -6.320053679109499E3; - } else if(m == 28) { - sx_verify_value = -4.295875165629892E3; - sy_verify_value = -1.580732573678431E4; - } else if(m == 30) { - sx_verify_value = 4.033815542441498E4; - sy_verify_value = -2.660669192809235E4; - } else if(m == 32) { - sx_verify_value = 4.764367927995374E4; - sy_verify_value = -8.084072988043731E4; - } else if(m == 36) { - sx_verify_value = 1.982481200946593E5; - sy_verify_value = -1.020596636361769E5; - } else { - verified = false; - } - - /* - fprintf(stdout,("sx = " + sx); - fprintf(stdout,("sx_verify = " + sx_verify_value); - fprintf(stdout,("sy = " + sy); - fprintf(stdout,("sy_verify = " + sy_verify_value); - */ - if(verified) { - sx_err = abs((sx - sx_verify_value)/sx_verify_value); - sy_err = abs((sy - sy_verify_value)/sy_verify_value); - /* - fprintf(stdout,("sx_err = " + sx_err); - fprintf(stdout,("sy_err = " + sx_err); - fprintf(stdout,("epsilon= " + epsilon); - */ - verified = ((sx_err < epsilon) && (sy_err < epsilon)); - } - - Mops = (pow(2.0, m+1))/tm/1000; - - fprintf(stdout,"EP Benchmark Results:\n"); - fprintf(stdout,"CPU Time=%d\n",tm); - fprintf(stdout,"N = 2^%d\n",m); - fprintf(stdout,"No. Gaussain Pairs =%d\n",gc); - fprintf(stdout,"Sum = %f %ld\n",sx,sy); - fprintf(stdout,"Count:"); - for(i = 0; i < nq; i++) { - fprintf(stdout,"%d\t %ld\n",i,q[i]); - } - - /* - print_results("EP", _class, m+1, 0, 0, nit, npm, no_nodes, tm, Mops, - "Random numbers generated", verified, npbversion, - compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7) */ - fprintf(stdout,"\nEP Benchmark Completed\n"); - fprintf(stdout,"Class = %s\n", _class); - fprintf(stdout,"Size = %s\n", size); - fprintf(stdout,"Iteration = %d\n", nit); - fprintf(stdout,"Time in seconds = %f\n",(tm/1000)); - fprintf(stdout,"Total processes = %d\n",no_nodes); - fprintf(stdout,"Mops/s total = %f\n",Mops); - fprintf(stdout,"Mops/s/process = %f\n", Mops/no_nodes); - fprintf(stdout,"Operation type = Random number generated\n"); - if(verified) { - fprintf(stdout,"Verification = SUCCESSFUL\n"); - } else { - fprintf(stdout,"Verification = UNSUCCESSFUL\n"); - } - fprintf(stdout,"Total time: %f\n",(timer_read(1,elapsed)/1000)); - fprintf(stdout,"Gaussian pairs: %f\n",(timer_read(2,elapsed)/1000)); - fprintf(stdout,"Random numbers: %f\n",(timer_read(3,elapsed)/1000)); - } -#ifdef USE_MPE - MPE_Finish_log(argv[0]); -#endif - - MPI_Finalize(); - } - - int main(int argc, char **argv) { - doTest(argc,argv); - } diff --git a/examples/smpi/NAS/EP-trace/mpinpb.h b/examples/smpi/NAS/EP-trace/mpinpb.h deleted file mode 100644 index 1f13637ae4..0000000000 --- a/examples/smpi/NAS/EP-trace/mpinpb.h +++ /dev/null @@ -1,9 +0,0 @@ - -c--------------------------------------------------------------------- -c--------------------------------------------------------------------- - - include 'mpif.h' - - integer me, nprocs, root, dp_type - common /mpistuff/ me, nprocs, root, dp_type - diff --git a/examples/smpi/NAS/EP-trace/randlc.c b/examples/smpi/NAS/EP-trace/randlc.c deleted file mode 100644 index 4de6c93b88..0000000000 --- a/examples/smpi/NAS/EP-trace/randlc.c +++ /dev/null @@ -1,107 +0,0 @@ - -/* - * FUNCTION RANDLC (X, A) - * - * This routine returns a uniform pseudorandom double precision number in the - * range (0, 1) by using the linear congruential generator - * - * x_{k+1} = a x_k (mod 2^46) - * - * where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers - * before repeating. The argument A is the same as 'a' in the above formula, - * and X is the same as x_0. A and X must be odd double precision integers - * in the range (1, 2^46). The returned value RANDLC is normalized to be - * between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain - * the new seed x_1, so that subsequent calls to RANDLC using the same - * arguments will generate a continuous sequence. - * - * This routine should produce the same results on any computer with at least - * 48 mantissa bits in double precision floating point data. On Cray systems, - * double precision should be disabled. - * - * David H. Bailey October 26, 1990 - * - * IMPLICIT DOUBLE PRECISION (A-H, O-Z) - * SAVE KS, R23, R46, T23, T46 - * DATA KS/0/ - * - * If this is the first call to RANDLC, compute R23 = 2 ^ -23, R46 = 2 ^ -46, - * T23 = 2 ^ 23, and T46 = 2 ^ 46. These are computed in loops, rather than - * by merely using the ** operator, in order to insure that the results are - * exact on all systems. This code assumes that 0.5D0 is represented exactly. - */ - - -/*****************************************************************/ -/************* R A N D L C ************/ -/************* ************/ -/************* portable random number generator ************/ -/*****************************************************************/ - -double randlc( double *X, double *A ) -{ - static int KS=0; - static double R23, R46, T23, T46; - double T1, T2, T3, T4; - double A1; - double A2; - double X1; - double X2; - double Z; - int i, j; - - if (KS == 0) - { - R23 = 1.0; - R46 = 1.0; - T23 = 1.0; - T46 = 1.0; - - for (i=1; i<=23; i++) - { - R23 = 0.50 * R23; - T23 = 2.0 * T23; - } - for (i=1; i<=46; i++) - { - R46 = 0.50 * R46; - T46 = 2.0 * T46; - } - KS = 1; - } - -/* Break A into two parts such that A = 2^23 * A1 + A2 and set X = N. */ - - T1 = R23 * *A; - j = T1; - A1 = j; - A2 = *A - T23 * A1; - -/* Break X into two parts such that X = 2^23 * X1 + X2, compute - Z = A1 * X2 + A2 * X1 (mod 2^23), and then - X = 2^23 * Z + A2 * X2 (mod 2^46). */ - - T1 = R23 * *X; - j = T1; - X1 = j; - X2 = *X - T23 * X1; - T1 = A1 * X2 + A2 * X1; - - j = R23 * T1; - T2 = j; - Z = T1 - T23 * T2; - T3 = T23 * Z + A2 * X2; - j = R46 * T3; - T4 = j; - *X = T3 - T46 * T4; - return(R46 * *X); -} - - - -/*****************************************************************/ -/************ F I N D _ M Y _ S E E D ************/ -/************ ************/ -/************ returns parallel random number seq seed ************/ -/*****************************************************************/ - diff --git a/examples/smpi/NAS/EP-trace/randlc.h b/examples/smpi/NAS/EP-trace/randlc.h deleted file mode 100644 index aff84d341a..0000000000 --- a/examples/smpi/NAS/EP-trace/randlc.h +++ /dev/null @@ -1,3 +0,0 @@ - -double randlc( double *X, double *A ); - diff --git a/examples/smpi/NAS/EP/ep.c b/examples/smpi/NAS/EP/ep.c index 961422af26..5cf9d5fb7f 100644 --- a/examples/smpi/NAS/EP/ep.c +++ b/examples/smpi/NAS/EP/ep.c @@ -6,6 +6,8 @@ #include "mpi.h" #include "npbparams.h" +#include "simgrid/instr.h" //TRACE_ + #include "randlc.h" #ifndef CLASS @@ -140,6 +142,8 @@ double *x = (double *) malloc (2*nk*sizeof(double)); double *q = (double *) malloc (nq*sizeof(double)); + TRACE_smpi_set_category ("start"); + MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &no_nodes); MPI_Comm_rank( MPI_COMM_WORLD, &node); @@ -205,6 +209,8 @@ */ MPI_Barrier( MPI_COMM_WORLD ); + TRACE_smpi_set_category ("ep"); + timer_clear(&(elapsed[1])); timer_clear(&(elapsed[2])); timer_clear(&(elapsed[3])); @@ -332,6 +338,8 @@ timer_stop(2,elapsed,start); } + TRACE_smpi_set_category ("finalize"); + //int MPI_Allreduce(void *sbuf, void *rbuf, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) MPI_Allreduce(&sx, x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); sx = x[0]; //FIXME : x[0] or x[1] => x[0] because fortran starts with 1 diff --git a/examples/smpi/NAS/IS-trace/Makefile b/examples/smpi/NAS/IS-trace/Makefile deleted file mode 100644 index 4911d0ddd5..0000000000 --- a/examples/smpi/NAS/IS-trace/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -SHELL=/bin/sh -BENCHMARK=is -BENCHMARKU=IS - -include ../config/make.def - -include ../sys/make.common - -OBJS = is-trace.o ${COMMON}/c_print_results.o - - -${PROGRAM}: config ${OBJS} - ${CLINK} ${CLINKFLAGS} -o ${PROGRAM} ${OBJS} ${CMPI_LIB} - -.c.o: - ${CCOMPILE} $< - -is-trace.o: is-trace.c npbparams.h - -clean: - - rm -f *.o *~ mputil* - - rm -f is-trace npbparams.h core diff --git a/examples/smpi/NAS/IS-trace/is-trace.c b/examples/smpi/NAS/IS-trace/is-trace.c deleted file mode 100644 index 57b370bb39..0000000000 --- a/examples/smpi/NAS/IS-trace/is-trace.c +++ /dev/null @@ -1,1154 +0,0 @@ -/************************************************************************* - * * - * N A S P A R A L L E L B E N C H M A R K S 3.3 * - * * - * I S * - * * - ************************************************************************* - * * - * This benchmark is part of the NAS Parallel Benchmark 3.3 suite. * - * It is described in NAS Technical Report 95-020. * - * * - * Permission to use, copy, distribute and modify this software * - * for any purpose with or without fee is hereby granted. We * - * request, however, that all derived work reference the NAS * - * Parallel Benchmarks 3.3. This software is provided "as is" * - * without express or implied warranty. * - * * - * Information on NPB 3.3, including the technical report, the * - * original specifications, source code, results and information * - * on how to submit new results, is available at: * - * * - * http://www.nas.nasa.gov/Software/NPB * - * * - * Send comments or suggestions to npb@nas.nasa.gov * - * Send bug reports to npb-bugs@nas.nasa.gov * - * * - * NAS Parallel Benchmarks Group * - * NASA Ames Research Center * - * Mail Stop: T27A-1 * - * Moffett Field, CA 94035-1000 * - * * - * E-mail: npb@nas.nasa.gov * - * Fax: (650) 604-3957 * - * * - ************************************************************************* - * * - * Author: M. Yarrow * - * H. Jin * - * * - *************************************************************************/ - -#include "mpi.h" -#include "npbparams.h" -#include -#include - -#include "simgrid/instr.h" //TRACE_ - -/******************/ -/* default values */ -/******************/ -#ifndef CLASS -#define CLASS 'S' -#define NUM_PROCS 1 -#endif -#define MIN_PROCS 1 - - -/*************/ -/* CLASS S */ -/*************/ -#if CLASS == 'S' -#define TOTAL_KEYS_LOG_2 16 -#define MAX_KEY_LOG_2 11 -#define NUM_BUCKETS_LOG_2 9 -#endif - - -/*************/ -/* CLASS W */ -/*************/ -#if CLASS == 'W' -#define TOTAL_KEYS_LOG_2 20 -#define MAX_KEY_LOG_2 16 -#define NUM_BUCKETS_LOG_2 10 -#endif - -/*************/ -/* CLASS A */ -/*************/ -#if CLASS == 'A' -#define TOTAL_KEYS_LOG_2 23 -#define MAX_KEY_LOG_2 19 -#define NUM_BUCKETS_LOG_2 10 -#endif - - -/*************/ -/* CLASS B */ -/*************/ -#if CLASS == 'B' -#define TOTAL_KEYS_LOG_2 25 -#define MAX_KEY_LOG_2 21 -#define NUM_BUCKETS_LOG_2 10 -#endif - - -/*************/ -/* CLASS C */ -/*************/ -#if CLASS == 'C' -#define TOTAL_KEYS_LOG_2 27 -#define MAX_KEY_LOG_2 23 -#define NUM_BUCKETS_LOG_2 10 -#endif - - -/*************/ -/* CLASS D */ -/*************/ -#if CLASS == 'D' -#define TOTAL_KEYS_LOG_2 29 -#define MAX_KEY_LOG_2 27 -#define NUM_BUCKETS_LOG_2 10 -#undef MIN_PROCS -#define MIN_PROCS 4 -#endif - - -#define TOTAL_KEYS (1 << TOTAL_KEYS_LOG_2) -#define MAX_KEY (1 << MAX_KEY_LOG_2) -#define NUM_BUCKETS (1 << NUM_BUCKETS_LOG_2) -#define NUM_KEYS (TOTAL_KEYS/NUM_PROCS*MIN_PROCS) - -/*****************************************************************/ -/* On larger number of processors, since the keys are (roughly) */ -/* gaussian distributed, the first and last processor sort keys */ -/* in a large interval, requiring array sizes to be larger. Note */ -/* that for large NUM_PROCS, NUM_KEYS is, however, a small number*/ -/* The required array size also depends on the bucket size used. */ -/* The following values are validated for the 1024-bucket setup. */ -/*****************************************************************/ -#if NUM_PROCS < 256 -#define SIZE_OF_BUFFERS 3*NUM_KEYS/2 -#elif NUM_PROCS < 512 -#define SIZE_OF_BUFFERS 5*NUM_KEYS/2 -#elif NUM_PROCS < 1024 -#define SIZE_OF_BUFFERS 4*NUM_KEYS -#else -#define SIZE_OF_BUFFERS 13*NUM_KEYS/2 -#endif - -/*****************************************************************/ -/* NOTE: THIS CODE CANNOT BE RUN ON ARBITRARILY LARGE NUMBERS OF */ -/* PROCESSORS. THE LARGEST VERIFIED NUMBER IS 1024. INCREASE */ -/* MAX_PROCS AT YOUR PERIL */ -/*****************************************************************/ -#if CLASS == 'S' -#define MAX_PROCS 128 -#else -#define MAX_PROCS 1024 -#endif - -#define MAX_ITERATIONS 10 -#define TEST_ARRAY_SIZE 5 - - -/***********************************/ -/* Enable separate communication, */ -/* computation timing and printout */ -/***********************************/ -/* #define TIMING_ENABLED */ - - -/*************************************/ -/* Typedef: if necessary, change the */ -/* size of int here by changing the */ -/* int type to, say, long */ -/*************************************/ -typedef int INT_TYPE; -typedef long INT_TYPE2; -#define MP_KEY_TYPE MPI_INT - - -typedef struct { - -/********************/ -/* MPI properties: */ -/********************/ -int my_rank, - comm_size; - - -/********************/ -/* Some global info */ -/********************/ -INT_TYPE *key_buff_ptr_global, /* used by full_verify to get */ - total_local_keys, /* copies of rank info */ - total_lesser_keys; - - -int passed_verification; - - - -/************************************/ -/* These are the three main arrays. */ -/* See SIZE_OF_BUFFERS def above */ -/************************************/ -INT_TYPE key_array[SIZE_OF_BUFFERS], - key_buff1[SIZE_OF_BUFFERS], - key_buff2[SIZE_OF_BUFFERS], - bucket_size[NUM_BUCKETS+TEST_ARRAY_SIZE], /* Top 5 elements for */ - bucket_size_totals[NUM_BUCKETS+TEST_ARRAY_SIZE], /* part. ver. vals */ - bucket_ptrs[NUM_BUCKETS], - process_bucket_distrib_ptr1[NUM_BUCKETS+TEST_ARRAY_SIZE], - process_bucket_distrib_ptr2[NUM_BUCKETS+TEST_ARRAY_SIZE]; -int send_count[MAX_PROCS], recv_count[MAX_PROCS], - send_displ[MAX_PROCS], recv_displ[MAX_PROCS]; - - -/**********************/ -/* Partial verif info */ -/**********************/ -INT_TYPE2 test_index_array[TEST_ARRAY_SIZE], - test_rank_array[TEST_ARRAY_SIZE]; - -/**********/ -/* Timers */ -/**********/ -double start[64], elapsed[64]; - -} global_data; - - -const INT_TYPE2 - S_test_index_array[TEST_ARRAY_SIZE] = - {48427,17148,23627,62548,4431}, - S_test_rank_array[TEST_ARRAY_SIZE] = - {0,18,346,64917,65463}, - - W_test_index_array[TEST_ARRAY_SIZE] = - {357773,934767,875723,898999,404505}, - W_test_rank_array[TEST_ARRAY_SIZE] = - {1249,11698,1039987,1043896,1048018}, - - A_test_index_array[TEST_ARRAY_SIZE] = - {2112377,662041,5336171,3642833,4250760}, - A_test_rank_array[TEST_ARRAY_SIZE] = - {104,17523,123928,8288932,8388264}, - - B_test_index_array[TEST_ARRAY_SIZE] = - {41869,812306,5102857,18232239,26860214}, - B_test_rank_array[TEST_ARRAY_SIZE] = - {33422937,10244,59149,33135281,99}, - - C_test_index_array[TEST_ARRAY_SIZE] = - {44172927,72999161,74326391,129606274,21736814}, - C_test_rank_array[TEST_ARRAY_SIZE] = - {61147,882988,266290,133997595,133525895}, - - D_test_index_array[TEST_ARRAY_SIZE] = - {1317351170,995930646,1157283250,1503301535,1453734525}, - D_test_rank_array[TEST_ARRAY_SIZE] = - {1,36538729,1978098519,2145192618,2147425337}; - - - -/***********************/ -/* function prototypes */ -/***********************/ -double randlc( double *X, double *A ); - -void full_verify( global_data* gd ); - -void c_print_results( char *name, - char class, - int n1, - int n2, - int n3, - int niter, - int nprocs_compiled, - int nprocs_total, - double t, - double mops, - char *optype, - int passed_verification, - char *npbversion, - char *compiletime, - char *mpicc, - char *clink, - char *cmpi_lib, - char *cmpi_inc, - char *cflags, - char *clinkflags ); - -void timer_clear(global_data* gd, int n ); -void timer_start(global_data* gd, int n ); -void timer_stop(global_data* gd, int n ); -double timer_read(global_data* gd, int n ); - -void timer_clear(global_data* gd, int n ) { - gd->elapsed[n] = 0.0; -} - -void timer_start(global_data* gd, int n ) { - gd->start[n] = MPI_Wtime(); -} - -void timer_stop(global_data* gd, int n ) { - gd->elapsed[n] += MPI_Wtime() - gd->start[n]; -} - -double timer_read(global_data* gd, int n ) { - return gd->elapsed[n]; -} - - -/* - * FUNCTION RANDLC (X, A) - * - * This routine returns a uniform pseudorandom double precision number in the - * range (0, 1) by using the linear congruential generator - * - * x_{k+1} = a x_k (mod 2^46) - * - * where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers - * before repeating. The argument A is the same as 'a' in the above formula, - * and X is the same as x_0. A and X must be odd double precision integers - * in the range (1, 2^46). The returned value RANDLC is normalized to be - * between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain - * the new seed x_1, so that subsequent calls to RANDLC using the same - * arguments will generate a continuous sequence. - * - * This routine should produce the same results on any computer with at least - * 48 mantissa bits in double precision floating point data. On Cray systems, - * double precision should be disabled. - * - * David H. Bailey October 26, 1990 - * - * IMPLICIT DOUBLE PRECISION (A-H, O-Z) - * SAVE KS, R23, R46, T23, T46 - * DATA KS/0/ - * - * If this is the first call to RANDLC, compute R23 = 2 ^ -23, R46 = 2 ^ -46, - * T23 = 2 ^ 23, and T46 = 2 ^ 46. These are computed in loops, rather than - * by merely using the ** operator, in order to insure that the results are - * exact on all systems. This code assumes that 0.5D0 is represented exactly. - */ - - -/*****************************************************************/ -/************* R A N D L C ************/ -/************* ************/ -/************* portable random number generator ************/ -/*****************************************************************/ - -double randlc( double *X, double *A ) -{ - static int KS=0; - static double R23, R46, T23, T46; - double T1, T2, T3, T4; - double A1; - double A2; - double X1; - double X2; - double Z; - int i, j; - - if (KS == 0) - { - R23 = 1.0; - R46 = 1.0; - T23 = 1.0; - T46 = 1.0; - - for (i=1; i<=23; i++) - { - R23 = 0.50 * R23; - T23 = 2.0 * T23; - } - for (i=1; i<=46; i++) - { - R46 = 0.50 * R46; - T46 = 2.0 * T46; - } - KS = 1; - } - -/* Break A into two parts such that A = 2^23 * A1 + A2 and set X = N. */ - - T1 = R23 * *A; - j = T1; - A1 = j; - A2 = *A - T23 * A1; - -/* Break X into two parts such that X = 2^23 * X1 + X2, compute - Z = A1 * X2 + A2 * X1 (mod 2^23), and then - X = 2^23 * Z + A2 * X2 (mod 2^46). */ - - T1 = R23 * *X; - j = T1; - X1 = j; - X2 = *X - T23 * X1; - T1 = A1 * X2 + A2 * X1; - - j = R23 * T1; - T2 = j; - Z = T1 - T23 * T2; - T3 = T23 * Z + A2 * X2; - j = R46 * T3; - T4 = j; - *X = T3 - T46 * T4; - return(R46 * *X); -} - - - -/*****************************************************************/ -/************ F I N D _ M Y _ S E E D ************/ -/************ ************/ -/************ returns parallel random number seq seed ************/ -/*****************************************************************/ - -/* - * Create a random number sequence of total length nn residing - * on np number of processors. Each processor will therefore have a - * subsequence of length nn/np. This routine returns that random - * number which is the first random number for the subsequence belonging - * to processor rank kn, and which is used as seed for proc kn ran # gen. - */ - -double find_my_seed( int kn, /* my processor rank, 0<=kn<=num procs */ - int np, /* np = num procs */ - long nn, /* total num of ran numbers, all procs */ - double s, /* Ran num seed, for ex.: 314159265.00 */ - double a ) /* Ran num gen mult, try 1220703125.00 */ -{ - - long i; - - double t1,t2,t3,an; - long mq,nq,kk,ik; - - - - nq = nn / np; - - for( mq=0; nq>1; mq++,nq/=2 ) - ; - - t1 = a; - - for( i=1; i<=mq; i++ ) - t2 = randlc( &t1, &t1 ); - - an = t1; - - kk = kn; - t1 = s; - t2 = an; - - for( i=1; i<=100; i++ ) - { - ik = kk / 2; - if( 2 * ik != kk ) - t3 = randlc( &t1, &t2 ); - if( ik == 0 ) - break; - t3 = randlc( &t2, &t2 ); - kk = ik; - } - - return( t1 ); - -} - - - - -/*****************************************************************/ -/************* C R E A T E _ S E Q ************/ -/*****************************************************************/ - -void create_seq( global_data* gd, double seed, double a ) -{ - double x; - int i, k; - - k = MAX_KEY/4; - - for (i=0; ikey_array[i] = k*x; - } -} - - - - -/*****************************************************************/ -/************* F U L L _ V E R I F Y ************/ -/*****************************************************************/ - - -void full_verify( global_data* gd ) -{ - MPI_Status status; - MPI_Request request; - - INT_TYPE i, j; - INT_TYPE k, last_local_key; - - -/* Now, finally, sort the keys: */ - for( i=0; itotal_local_keys; i++ ) - gd->key_array[--gd->key_buff_ptr_global[gd->key_buff2[i]]- - gd->total_lesser_keys] = gd->key_buff2[i]; - last_local_key = (gd->total_local_keys<1)? 0 : (gd->total_local_keys-1); - -/* Send largest key value to next processor */ - if( gd->my_rank > 0 ) - MPI_Irecv( &k, - 1, - MP_KEY_TYPE, - gd->my_rank-1, - 1000, - MPI_COMM_WORLD, - &request ); - if( gd->my_rank < gd->comm_size-1 ) - MPI_Send( &gd->key_array[last_local_key], - 1, - MP_KEY_TYPE, - gd->my_rank+1, - 1000, - MPI_COMM_WORLD ); - if( gd->my_rank > 0 ) - MPI_Wait( &request, &status ); - -/* Confirm that neighbor's greatest key value - is not greater than my least key value */ - j = 0; - if( gd->my_rank > 0 && gd->total_local_keys > 0 ) - if( k > gd->key_array[0] ) - j++; - - -/* Confirm keys correctly sorted: count incorrectly sorted keys, if any */ - for( i=1; itotal_local_keys; i++ ) - if( gd->key_array[i-1] > gd->key_array[i] ) - j++; - - - if( j != 0 ) - { - printf( "Processor %d: Full_verify: number of keys out of sort: %d\n", - gd->my_rank, j ); - } - else - gd->passed_verification++; - - -} - - - - -/*****************************************************************/ -/************* R A N K ****************/ -/*****************************************************************/ - - -void rank( global_data* gd, int iteration ) -{ - - INT_TYPE i, k; - - INT_TYPE shift = MAX_KEY_LOG_2 - NUM_BUCKETS_LOG_2; - INT_TYPE key; - INT_TYPE2 bucket_sum_accumulator, j, m; - INT_TYPE local_bucket_sum_accumulator; - INT_TYPE min_key_val, max_key_val; - INT_TYPE *key_buff_ptr; - - - - -/* Iteration alteration of keys */ - if(gd->my_rank == 0 ) - { - gd->key_array[iteration] = iteration; - gd->key_array[iteration+MAX_ITERATIONS] = MAX_KEY - iteration; - } - - -/* Initialize */ - for( i=0; ibucket_size[i] = 0; - gd->bucket_size_totals[i] = 0; - gd->process_bucket_distrib_ptr1[i] = 0; - gd->process_bucket_distrib_ptr2[i] = 0; - } - - -/* Determine where the partial verify test keys are, load into */ -/* top of array bucket_size */ - for( i=0; itest_index_array[i]/NUM_KEYS) == gd->my_rank ) - gd->bucket_size[NUM_BUCKETS+i] = - gd->key_array[gd->test_index_array[i] % NUM_KEYS]; - - -/* Determine the number of keys in each bucket */ - for( i=0; ibucket_size[gd->key_array[i] >> shift]++; - - -/* Accumulative bucket sizes are the bucket pointers */ - gd->bucket_ptrs[0] = 0; - for( i=1; i< NUM_BUCKETS; i++ ) - gd->bucket_ptrs[i] = gd->bucket_ptrs[i-1] + gd->bucket_size[i-1]; - - -/* Sort into appropriate bucket */ - for( i=0; ikey_array[i]; - gd->key_buff1[gd->bucket_ptrs[key >> shift]++] = key; - } - -#ifdef TIMING_ENABLED - timer_stop(gd, 2 ); - timer_start(gd, 3 ); -#endif - -/* Get the bucket size totals for the entire problem. These - will be used to determine the redistribution of keys */ - MPI_Allreduce( gd->bucket_size, - gd->bucket_size_totals, - NUM_BUCKETS+TEST_ARRAY_SIZE, - MP_KEY_TYPE, - MPI_SUM, - MPI_COMM_WORLD ); - -#ifdef TIMING_ENABLED - timer_stop(gd, 3 ); - timer_start(gd, 2 ); -#endif - -/* Determine Redistibution of keys: accumulate the bucket size totals - till this number surpasses NUM_KEYS (which the average number of keys - per processor). Then all keys in these buckets go to processor 0. - Continue accumulating again until supassing 2*NUM_KEYS. All keys - in these buckets go to processor 1, etc. This algorithm guarantees - that all processors have work ranking; no processors are left idle. - The optimum number of buckets, however, does not result in as high - a degree of load balancing (as even a distribution of keys as is - possible) as is obtained from increasing the number of buckets, but - more buckets results in more computation per processor so that the - optimum number of buckets turns out to be 1024 for machines tested. - Note that process_bucket_distrib_ptr1 and ..._ptr2 hold the bucket - number of first and last bucket which each processor will have after - the redistribution is done. */ - - bucket_sum_accumulator = 0; - local_bucket_sum_accumulator = 0; - gd->send_displ[0] = 0; - gd->process_bucket_distrib_ptr1[0] = 0; - for( i=0, j=0; ibucket_size_totals[i]; - local_bucket_sum_accumulator += gd->bucket_size[i]; - if( bucket_sum_accumulator >= (j+1)*NUM_KEYS ) - { - gd->send_count[j] = local_bucket_sum_accumulator; - if( j != 0 ) - { - gd->send_displ[j] = gd->send_displ[j-1] + gd->send_count[j-1]; - gd->process_bucket_distrib_ptr1[j] = - gd->process_bucket_distrib_ptr2[j-1]+1; - } - gd->process_bucket_distrib_ptr2[j++] = i; - local_bucket_sum_accumulator = 0; - } - } - -/* When NUM_PROCS approaching NUM_BUCKETS, it is highly possible - that the last few processors don't get any buckets. So, we - need to set counts properly in this case to avoid any fallouts. */ - while( j < gd->comm_size ) - { - gd->send_count[j] = 0; - gd->process_bucket_distrib_ptr1[j] = 1; - j++; - } - -#ifdef TIMING_ENABLED - timer_stop(gd, 2 ); - timer_start(gd, 3 ); -#endif - -/* This is the redistribution section: first find out how many keys - each processor will send to every other processor: */ - MPI_Alltoall( gd->send_count, - 1, - MPI_INT, - gd->recv_count, - 1, - MPI_INT, - MPI_COMM_WORLD ); - -/* Determine the receive array displacements for the buckets */ - gd->recv_displ[0] = 0; - for( i=1; icomm_size; i++ ) - gd->recv_displ[i] = gd->recv_displ[i-1] + gd->recv_count[i-1]; - - -/* Now send the keys to respective processors */ - MPI_Alltoallv( gd->key_buff1, - gd->send_count, - gd->send_displ, - MP_KEY_TYPE, - gd->key_buff2, - gd->recv_count, - gd->recv_displ, - MP_KEY_TYPE, - MPI_COMM_WORLD ); - -#ifdef TIMING_ENABLED - timer_stop(gd, 3 ); - timer_start(gd, 2 ); -#endif - -/* The starting and ending bucket numbers on each processor are - multiplied by the interval size of the buckets to obtain the - smallest possible min and greatest possible max value of any - key on each processor */ - min_key_val = gd->process_bucket_distrib_ptr1[gd->my_rank] << shift; - max_key_val = ((gd->process_bucket_distrib_ptr2[gd->my_rank] + 1) << shift)-1; - -/* Clear the work array */ - for( i=0; ikey_buff1[i] = 0; - -/* Determine the total number of keys on all other - processors holding keys of lesser value */ - m = 0; - for( k=0; kmy_rank; k++ ) - for( i= gd->process_bucket_distrib_ptr1[k]; - i<=gd->process_bucket_distrib_ptr2[k]; - i++ ) - m += gd->bucket_size_totals[i]; /* m has total # of lesser keys */ - -/* Determine total number of keys on this processor */ - j = 0; - for( i= gd->process_bucket_distrib_ptr1[gd->my_rank]; - i<=gd->process_bucket_distrib_ptr2[gd->my_rank]; - i++ ) - j += gd->bucket_size_totals[i]; /* j has total # of local keys */ - - -/* Ranking of all keys occurs in this section: */ -/* shift it backwards so no subtractions are necessary in loop */ - key_buff_ptr = gd->key_buff1 - min_key_val; - -/* In this section, the keys themselves are used as their - own indexes to determine how many of each there are: their - individual population */ - for( i=0; ikey_buff2[i]]++; /* Now they have individual key */ - /* population */ - -/* To obtain ranks of each key, successively add the individual key - population, not forgetting the total of lesser keys, m. - NOTE: Since the total of lesser keys would be subtracted later - in verification, it is no longer added to the first key population - here, but still needed during the partial verify test. This is to - ensure that 32-bit key_buff can still be used for class D. */ -/* key_buff_ptr[min_key_val] += m; */ - for( i=min_key_val; ibucket_size_totals[i+NUM_BUCKETS]; /* Keys were hidden here */ - if( min_key_val <= k && k <= max_key_val ) - { - /* Add the total of lesser keys, m, here */ - INT_TYPE2 key_rank = key_buff_ptr[k-1] + m; - int failed = 0; - - switch( CLASS ) - { - case 'S': - if( i <= 2 ) - { - if( key_rank != gd->test_rank_array[i]+iteration ) - failed = 1; - else - gd->passed_verification++; - } - else - { - if( key_rank != gd->test_rank_array[i]-iteration ) - failed = 1; - else - gd->passed_verification++; - } - break; - case 'W': - if( i < 2 ) - { - if( key_rank != gd->test_rank_array[i]+(iteration-2) ) - failed = 1; - else - gd->passed_verification++; - } - else - { - if( key_rank != gd->test_rank_array[i]-iteration ) - failed = 1; - else - gd->passed_verification++; - } - break; - case 'A': - if( i <= 2 ) - { - if( key_rank != gd->test_rank_array[i]+(iteration-1) ) - failed = 1; - else - gd->passed_verification++; - } - else - { - if( key_rank != gd->test_rank_array[i]-(iteration-1) ) - failed = 1; - else - gd->passed_verification++; - } - break; - case 'B': - if( i == 1 || i == 2 || i == 4 ) - { - if( key_rank != gd->test_rank_array[i]+iteration ) - failed = 1; - else - gd->passed_verification++; - } - else - { - if( key_rank != gd->test_rank_array[i]-iteration ) - failed = 1; - else - gd->passed_verification++; - } - break; - case 'C': - if( i <= 2 ) - { - if( key_rank != gd->test_rank_array[i]+iteration ) - failed = 1; - else - gd->passed_verification++; - } - else - { - if( key_rank != gd->test_rank_array[i]-iteration ) - failed = 1; - else - gd->passed_verification++; - } - break; - case 'D': - if( i < 2 ) - { - if( key_rank != gd->test_rank_array[i]+iteration ) - failed = 1; - else - gd->passed_verification++; - } - else - { - if( key_rank != gd->test_rank_array[i]-iteration ) - failed = 1; - else - gd->passed_verification++; - } - break; - } - if( failed == 1 ) - printf( "Failed partial verification: " - "iteration %d, processor %d, test key %d\n", - iteration, gd->my_rank, (int)i ); - } - } - - - - -/* Make copies of rank info for use by full_verify: these variables - in rank are local; making them global slows down the code, probably - since they cannot be made register by compiler */ - - if( iteration == MAX_ITERATIONS ) - { - gd->key_buff_ptr_global = key_buff_ptr; - gd->total_local_keys = j; - gd->total_lesser_keys = 0; /* no longer set to 'm', see note above */ - } - -} - - -/*****************************************************************/ -/************* M A I N ****************/ -/*****************************************************************/ - -int main( int argc, char **argv ) -{ - - int i, iteration, itemp; - - double timecounter, maxtime; - - global_data* gd = malloc(sizeof(global_data)); -/* Initialize MPI */ - MPI_Init( &argc, &argv ); - MPI_Comm_rank( MPI_COMM_WORLD, &gd->my_rank ); - MPI_Comm_size( MPI_COMM_WORLD, &gd->comm_size ); - -/* Initialize the verification arrays if a valid class */ - for( i=0; itest_index_array[i] = S_test_index_array[i]; - gd->test_rank_array[i] = S_test_rank_array[i]; - break; - case 'A': - gd->test_index_array[i] = A_test_index_array[i]; - gd->test_rank_array[i] = A_test_rank_array[i]; - break; - case 'W': - gd->test_index_array[i] = W_test_index_array[i]; - gd->test_rank_array[i] = W_test_rank_array[i]; - break; - case 'B': - gd->test_index_array[i] = B_test_index_array[i]; - gd->test_rank_array[i] = B_test_rank_array[i]; - break; - case 'C': - gd->test_index_array[i] = C_test_index_array[i]; - gd->test_rank_array[i] = C_test_rank_array[i]; - break; - case 'D': - gd->test_index_array[i] = D_test_index_array[i]; - gd->test_rank_array[i] = D_test_rank_array[i]; - break; - }; - - - -/* Printout initial NPB info */ - if( gd->my_rank == 0 ) - { - printf( "\n\n NAS Parallel Benchmarks 3.3 -- IS Benchmark\n\n" ); - printf( " Size: %ld (class %c)\n", (long)TOTAL_KEYS*MIN_PROCS, CLASS ); - printf( " Iterations: %d\n", MAX_ITERATIONS ); - printf( " Number of processes: %d\n",gd->comm_size ); - } - -/* Check that actual and compiled number of processors agree */ - if( gd->comm_size != NUM_PROCS ) - { - if( gd->my_rank == 0 ) - printf( "\n ERROR: compiled for %d processes\n" - " Number of active processes: %d\n" - " Exiting program!\n\n", NUM_PROCS, gd->comm_size ); - MPI_Finalize(); - exit( 1 ); - } - -/* Check to see whether total number of processes is within bounds. - This could in principle be checked in setparams.c, but it is more - convenient to do it here */ - if( gd->comm_size < MIN_PROCS || gd->comm_size > MAX_PROCS) - { - if( gd->my_rank == 0 ) - printf( "\n ERROR: number of processes %d not within range %d-%d" - "\n Exiting program!\n\n", gd->comm_size, MIN_PROCS, MAX_PROCS); - MPI_Finalize(); - exit( 1 ); - } - - -/* Generate random number sequence and subsequent keys on all procs */ - create_seq(gd, find_my_seed( gd->my_rank, - gd->comm_size, - 4*(long)TOTAL_KEYS*MIN_PROCS, - 314159265.00, /* Random number gen seed */ - 1220703125.00 ), /* Random number gen mult */ - 1220703125.00 ); /* Random number gen mult */ - -/* Do one interation for free (i.e., untimed) to guarantee initialization of - all data and code pages and respective tables */ - rank(gd, 1 ); - -/* Start verification counter */ - gd->passed_verification = 0; - - if( gd->my_rank == 0 && CLASS != 'S' ) printf( "\n iteration\n" ); - -/* Initialize timer */ - timer_clear(gd, 0 ); - -/* Initialize separate communication, computation timing */ -#ifdef TIMING_ENABLED - for( i=1; i<=3; i++ ) timer_clear(gd, i ); -#endif - -/* Start timer */ - timer_start(gd, 0 ); - -#ifdef TIMING_ENABLED - timer_start(gd, 1 ); - timer_start(gd, 2 ); -#endif - - char smpi_category[100]; - snprintf (smpi_category, 100, "%d", gd->my_rank); - TRACE_smpi_set_category (smpi_category); - -/* This is the main iteration */ - for( iteration=1; iteration<=MAX_ITERATIONS; iteration++ ) - { - if( gd->my_rank == 0 && CLASS != 'S' ) printf( " %d\n", iteration ); - rank(gd, iteration ); - } - TRACE_smpi_set_category (NULL); - -#ifdef TIMING_ENABLED - timer_stop(gd, 2 ); - timer_stop(gd, 1 ); -#endif - -/* Stop timer, obtain time for processors */ - timer_stop(gd, 0 ); - - timecounter = timer_read(gd, 0 ); - -/* End of timing, obtain maximum time of all processors */ - MPI_Reduce( &timecounter, - &maxtime, - 1, - MPI_DOUBLE, - MPI_MAX, - 0, - MPI_COMM_WORLD ); - -#ifdef TIMING_ENABLED - { - double tmin, tsum, tmax; - - if( my_rank == 0 ) - { - printf( "\ntimer 1/2/3 = total/computation/communication time\n"); - printf( " min avg max\n" ); - } - for( i=1; i<=3; i++ ) - { - timecounter = timer_read(gd, i ); - MPI_Reduce( &timecounter, - &tmin, - 1, - MPI_DOUBLE, - MPI_MIN, - 0, - MPI_COMM_WORLD ); - MPI_Reduce( &timecounter, - &tsum, - 1, - MPI_DOUBLE, - MPI_SUM, - 0, - MPI_COMM_WORLD ); - MPI_Reduce( &timecounter, - &tmax, - 1, - MPI_DOUBLE, - MPI_MAX, - 0, - MPI_COMM_WORLD ); - if( my_rank == 0 ) - printf( "timer %d: %f %f %f\n", - i, tmin, tsum/((double) comm_size), tmax ); - } - if( my_rank == 0 ) - printf( "\n" ); - } -#endif - -/* This tests that keys are in sequence: sorting of last ranked key seq - occurs here, but is an untimed operation */ - full_verify(gd); - - -/* Obtain verification counter sum */ - itemp =gd->passed_verification; - MPI_Reduce( &itemp, - &gd->passed_verification, - 1, - MPI_INT, - MPI_SUM, - 0, - MPI_COMM_WORLD ); - - - -/* The final printout */ - if( gd->my_rank == 0 ) - { - if( gd->passed_verification != 5*MAX_ITERATIONS + gd->comm_size ) - gd->passed_verification = 0; - c_print_results( "IS", - CLASS, - (int)(TOTAL_KEYS), - MIN_PROCS, - 0, - MAX_ITERATIONS, - NUM_PROCS, - gd->comm_size, - maxtime, - ((double) (MAX_ITERATIONS)*TOTAL_KEYS*MIN_PROCS) - /maxtime/1000000., - "keys ranked", - gd->passed_verification, - NPBVERSION, - COMPILETIME, - MPICC, - CLINK, - CMPI_LIB, - CMPI_INC, - CFLAGS, - CLINKFLAGS ); - } - - MPI_Finalize(); - free(gd); - - return 0; - /**************************/ -} /* E N D P R O G R A M */ - /**************************/ diff --git a/examples/smpi/NAS/IS/is.c b/examples/smpi/NAS/IS/is.c index d617726937..57b370bb39 100644 --- a/examples/smpi/NAS/IS/is.c +++ b/examples/smpi/NAS/IS/is.c @@ -44,6 +44,8 @@ #include #include +#include "simgrid/instr.h" //TRACE_ + /******************/ /* default values */ /******************/ @@ -1025,13 +1027,17 @@ int main( int argc, char **argv ) timer_start(gd, 2 ); #endif + char smpi_category[100]; + snprintf (smpi_category, 100, "%d", gd->my_rank); + TRACE_smpi_set_category (smpi_category); + /* This is the main iteration */ for( iteration=1; iteration<=MAX_ITERATIONS; iteration++ ) { if( gd->my_rank == 0 && CLASS != 'S' ) printf( " %d\n", iteration ); rank(gd, iteration ); } - + TRACE_smpi_set_category (NULL); #ifdef TIMING_ENABLED timer_stop(gd, 2 ); diff --git a/examples/smpi/NAS/config/NAS.samples/README b/examples/smpi/NAS/config/NAS.samples/README deleted file mode 100644 index ae535e95cf..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/README +++ /dev/null @@ -1,7 +0,0 @@ -This directory contains examples of make.def files that were used -by the NPB team in testing the benchmarks on different platforms. -They can be used as starting points for make.def files for your -own platform, but you may need to taylor them for best performance -on your installation. A clean template can be found in directory -`config'. -Some examples of suite.def files are also provided. \ No newline at end of file diff --git a/examples/smpi/NAS/config/NAS.samples/make.def.dec_alpha b/examples/smpi/NAS/config/NAS.samples/make.def.dec_alpha deleted file mode 100644 index 44f045354d..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/make.def.dec_alpha +++ /dev/null @@ -1,18 +0,0 @@ -#This is for a DEC Alpha 8400. The code will execute on a -#single processor -#Warning: parallel make does not work properly in general -MPIF77 = f77 -FLINK = f77 -#Optimization -O5 breaks SP; works fine for all other codes -FFLAGS = -O4 - -MPICC = cc -CLINK = cc -CFLAGS = -O5 - -include ../config/make.dummy - -CC = cc -g -BINDIR = ../bin - -RAND = randi8 diff --git a/examples/smpi/NAS/config/NAS.samples/make.def.irix6.2 b/examples/smpi/NAS/config/NAS.samples/make.def.irix6.2 deleted file mode 100644 index f76404755a..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/make.def.irix6.2 +++ /dev/null @@ -1,16 +0,0 @@ -#This is for a generic single-processor SGI workstation -MPIF77 = f77 -FLINK = f77 -FFLAGS = -O3 - -MPICC = cc -CLINK = cc -CFLAGS = -O3 - -include ../config/make.dummy - -CC = cc -g -BINDIR = ../bin - -RAND = randi8 - diff --git a/examples/smpi/NAS/config/NAS.samples/make.def.origin b/examples/smpi/NAS/config/NAS.samples/make.def.origin deleted file mode 100644 index 11c63c975c..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/make.def.origin +++ /dev/null @@ -1,20 +0,0 @@ -# This is for a an SGI Origin 2000 or 3000 with vendor MPI. The Fortran -# record length is specified, so it can be used for the I/O benchmark. -# as well -MPIF77 = f77 -FMPI_LIB = -lmpi -FLINK = f77 -64 -FFLAGS = -O3 -64 - -MPICC = cc -CMPI_LIB = -lmpi -CLINK = cc -CFLAGS = -O3 - -CC = cc -g -BINDIR = ../bin - -RAND = randi8 - -CONVERTFLAG = -DFORTRAN_REC_SIZE=4 - diff --git a/examples/smpi/NAS/config/NAS.samples/make.def.sgi_powerchallenge b/examples/smpi/NAS/config/NAS.samples/make.def.sgi_powerchallenge deleted file mode 100644 index 379726d057..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/make.def.sgi_powerchallenge +++ /dev/null @@ -1,16 +0,0 @@ -# This is for the SGI PowerChallenge Array at NASA Ames. mrf77 and -# mrcc are local scripts that invoke the proper MPI library. -MPIF77 = mrf77 -FLINK = mrf77 -FFLAGS = -O3 -OPT:fold_arith_limit=1204 - -MPICC = mrcc -CLINK = mrcc -CFLAGS = -O3 -OPT:fold_arith_limit=1204 - -CC = cc -g -BINDIR = ../bin - -RAND = randi8 - - diff --git a/examples/smpi/NAS/config/NAS.samples/make.def.sp2_babbage b/examples/smpi/NAS/config/NAS.samples/make.def.sp2_babbage deleted file mode 100644 index 7896d56f7d..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/make.def.sp2_babbage +++ /dev/null @@ -1,17 +0,0 @@ -#This is for the IBM SP2 at Ames; mrf77 and mrcc are local scripts -MPIF77 = mrf77 -FLINK = mrf77 -FFLAGS = -O3 -FLINKFLAGS = -bmaxdata:0x60000000 - -MPICC = mrcc -CLINK = mrcc -CFLAGS = -O3 -CLINKFLAGS = -bmaxdata:0x60000000 - -CC = cc -g - -BINDIR = ../bin - -RAND = randi8 - diff --git a/examples/smpi/NAS/config/NAS.samples/make.def.sun_ultra_sparc b/examples/smpi/NAS/config/NAS.samples/make.def.sun_ultra_sparc deleted file mode 100644 index 420dfde821..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/make.def.sun_ultra_sparc +++ /dev/null @@ -1,30 +0,0 @@ -# This is for a Sun SparcCenter or UltraEnterprise machine -MPIF77 = f77 -FLINK = f77 -FMPI_LIB = -L/lib/solaris/ch_lfshmem -lmpi -FMPI_INC = -I/include -# sparc10,20 SparcCenter{1,2}000 (uname -m returns sun4m) -# and f77 -V returns 4.0 or greater -# FFLAGS = -fast -xtarget=super -xO4 -depend -# Ultra1,2, UltraEnterprise servers (uname -m returns sun4u) -FFLAGS = -fast -xtarget=ultra -xarch=v8plus -xO4 -depend -FLINKFLAGS = -lmopt -lcopt -lsunmath - -MPICC = cc -CLINK = cc -CMPI_LIB = -L/lib/solaris/ch_lfshmem -lmpi -CMPI_INC = -I/include -# sparc10,20 SparcCenter{1,2}000 (uname -m returns sun4m) -# and cc -V returns 4.0 or greater -#CFLAGS = -fast -xtarget=super -xO4 -xdepend -# Ultra1,2, UltraEnterprise servers (uname -m returns sun4u) -CFLAGS = -fast -xtarget=ultra -xarch=v8plus -xO4 -xdepend -CLINKFLAGS = -fast - -CC = cc -g - -BINDIR = ../bin - -# Cannot use randi8 or randi8-safe on a 32-but machine. Use double precision -RAND = randdp - diff --git a/examples/smpi/NAS/config/NAS.samples/make.def.t3d_cosmos b/examples/smpi/NAS/config/NAS.samples/make.def.t3d_cosmos deleted file mode 100644 index d3b3bbf528..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/make.def.t3d_cosmos +++ /dev/null @@ -1,25 +0,0 @@ -#This is for the Cray T3D at the Jet Propulsion Laboratory -MPIF77 = cf77 -FLINK = cf77 -FMPI_LIB = -L/usr/local/mpp/lib -lmpi -FMPI_INC = -I/usr/local/mpp/lib/include/mpp -FFLAGS = -dp -Wf-onoieeedivide -C cray-t3d -#The following flags provide more effective optimization, but may -#cause the random number generator randi8(_safe) to break in EP -#FFLAGS = -dp -Wf-oaggress -Wf-onoieeedivide -C cray-t3d -FLINKFLAGS = -Wl-Drdahead=on -C cray-t3d - -MPICC = cc -CLINK = cc -CMPI_LIB = -L/usr/local/mpp/lib -lmpi -CMPI_INC = -I/usr/local/mpp/lib/include/mpp -CFLAGS = -O3 -Tcray-t3d -CLINKFLAGS = -Tcray-t3d - -CC = cc -g -Tcray-ymp -BINDIR = ../bin - -CONVERTFLAG= -DCONVERTDOUBLE - -RAND = randi8 - diff --git a/examples/smpi/NAS/config/NAS.samples/make.def_sun_mpich b/examples/smpi/NAS/config/NAS.samples/make.def_sun_mpich deleted file mode 100644 index 99b0b69041..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/make.def_sun_mpich +++ /dev/null @@ -1,165 +0,0 @@ -#--------------------------------------------------------------------------- -# -# SITE- AND/OR PLATFORM-SPECIFIC DEFINITIONS. -# -#--------------------------------------------------------------------------- - -#--------------------------------------------------------------------------- -# Items in this file will need to be changed for each platform. -# (Note these definitions are inconsistent with NPB2.1.) -#--------------------------------------------------------------------------- - -#--------------------------------------------------------------------------- -# Parallel Fortran: -# -# For CG, EP, FT, MG, LU, SP and BT, which are in Fortran, the following must -# be defined: -# -# MPIF77 - Fortran compiler -# FFLAGS - Fortran compilation arguments -# FMPI_INC - any -I arguments required for compiling MPI/Fortran -# FLINK - Fortran linker -# FLINKFLAGS - Fortran linker arguments -# FMPI_LIB - any -L and -l arguments required for linking MPI/Fortran -# -# compilations are done with $(MPIF77) $(FMPI_INC) $(FFLAGS) or -# $(MPIF77) $(FFLAGS) -# linking is done with $(FLINK) $(FMPI_LIB) $(FLINKFLAGS) -#--------------------------------------------------------------------------- - -#--------------------------------------------------------------------------- -# This is the fortran compiler used for MPI programs -#--------------------------------------------------------------------------- -MPIF77 = mpif77 -# This links MPI fortran programs; usually the same as ${MPIF77} -FLINK = $(MPIF77) - -#--------------------------------------------------------------------------- -# These macros are passed to the linker to help link with MPI correctly -#--------------------------------------------------------------------------- -FMPI_LIB = - -#--------------------------------------------------------------------------- -# These macros are passed to the compiler to help find 'mpif.h' -#--------------------------------------------------------------------------- -FMPI_INC = - -#--------------------------------------------------------------------------- -# Global *compile time* flags for Fortran programs -#--------------------------------------------------------------------------- -FFLAGS = -fast -# FFLAGS = -g - -#--------------------------------------------------------------------------- -# Global *link time* flags. Flags for increasing maximum executable -# size usually go here. -#--------------------------------------------------------------------------- -FLINKFLAGS = -fast - - -#--------------------------------------------------------------------------- -# Parallel C: -# -# For IS, which is in C, the following must be defined: -# -# MPICC - C compiler -# CFLAGS - C compilation arguments -# CMPI_INC - any -I arguments required for compiling MPI/C -# CLINK - C linker -# CLINKFLAGS - C linker flags -# CMPI_LIB - any -L and -l arguments required for linking MPI/C -# -# compilations are done with $(MPICC) $(CMPI_INC) $(CFLAGS) or -# $(MPICC) $(CFLAGS) -# linking is done with $(CLINK) $(CMPI_LIB) $(CLINKFLAGS) -#--------------------------------------------------------------------------- - -#--------------------------------------------------------------------------- -# This is the C compiler used for MPI programs -#--------------------------------------------------------------------------- -MPICC = mpicc -# This links MPI C programs; usually the same as ${MPICC} -CLINK = $(MPICC) - -#--------------------------------------------------------------------------- -# These macros are passed to the linker to help link with MPI correctly -#--------------------------------------------------------------------------- -CMPI_LIB = - -#--------------------------------------------------------------------------- -# These macros are passed to the compiler to help find 'mpi.h' -#--------------------------------------------------------------------------- -CMPI_INC = - -#--------------------------------------------------------------------------- -# Global *compile time* flags for C programs -#--------------------------------------------------------------------------- -CFLAGS = -fast -# CFLAGS = -g - -#--------------------------------------------------------------------------- -# Global *link time* flags. Flags for increasing maximum executable -# size usually go here. -#--------------------------------------------------------------------------- -CLINKFLAGS = -fast - - -#--------------------------------------------------------------------------- -# MPI dummy library: -# -# Uncomment if you want to use the MPI dummy library supplied by NAS instead -# of the true message-passing library. The include file redefines several of -# the above macros. It also invokes make in subdirectory MPI_dummy. Make -# sure that no spaces or tabs precede include. -#--------------------------------------------------------------------------- -# include ../config/make.dummy - - -#--------------------------------------------------------------------------- -# Utilities C: -# -# This is the C compiler used to compile C utilities. Flags required by -# this compiler go here also; typically there are few flags required; hence -# there are no separate macros provided for such flags. -#--------------------------------------------------------------------------- -CC = cc -g - - -#--------------------------------------------------------------------------- -# Destination of executables, relative to subdirs of the main directory. . -#--------------------------------------------------------------------------- -BINDIR = ../bin - - -#--------------------------------------------------------------------------- -# Some machines (e.g. Crays) have 128-bit DOUBLE PRECISION numbers, which -# is twice the precision required for the NPB suite. A compiler flag -# (e.g. -dp) can usually be used to change DOUBLE PRECISION variables to -# 64 bits, but the MPI library may continue to send 128 bits. Short of -# recompiling MPI, the solution is to use MPI_REAL to send these 64-bit -# numbers, and MPI_COMPLEX to send their complex counterparts. Uncomment -# the following line to enable this substitution. -# -# NOTE: IF THE I/O BENCHMARK IS BEING BUILT, WE USE CONVERTFLAG TO -# SPECIFIY THE FORTRAN RECORD LENGTH UNIT. IT IS A SYSTEM-SPECIFIC -# VALUE (USUALLY 1 OR 4). UNCOMMENT THE SECOND LINE AND SUBSTITUTE -# THE CORRECT VALUE FOR "length". -# IF BOTH 128-BIT DOUBLE PRECISION NUMBERS AND I/O ARE TO BE ENABLED, -# UNCOMMENT THE THIRD LINE AND SUBSTITUTE THE CORRECT VALUE FOR -# "length" -#--------------------------------------------------------------------------- -# CONVERTFLAG = -DCONVERTDOUBLE -CONVERTFLAG = -DFORTRAN_REC_SIZE=1 -# CONVERTFLAG = -DCONVERTDOUBLE -DFORTRAN_REC_SIZE=length - - -#--------------------------------------------------------------------------- -# The variable RAND controls which random number generator -# is used. It is described in detail in Doc/README.install. -# Use "randi8" unless there is a reason to use another one. -# Other allowed values are "randi8_safe", "randdp" and "randdpvec" -#--------------------------------------------------------------------------- -RAND = randi8 -# The following is highly reliable but may be slow: -# RAND = randdp - diff --git a/examples/smpi/NAS/config/NAS.samples/suite.def.bt b/examples/smpi/NAS/config/NAS.samples/suite.def.bt deleted file mode 100644 index f330636aa0..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/suite.def.bt +++ /dev/null @@ -1,37 +0,0 @@ -bt S 1 -bt S 4 -bt S 9 -bt S 16 -bt A 1 -bt A 4 -bt A 9 -bt A 16 -bt A 25 -bt A 36 -bt A 49 -bt A 64 -bt A 81 -bt A 100 -bt A 121 -bt B 1 -bt B 4 -bt B 9 -bt B 16 -bt B 25 -bt B 36 -bt B 49 -bt B 64 -bt B 81 -bt B 100 -bt B 121 -bt C 1 -bt C 4 -bt C 9 -bt C 16 -bt C 25 -bt C 36 -bt C 49 -bt C 64 -bt C 81 -bt C 100 -bt C 121 diff --git a/examples/smpi/NAS/config/NAS.samples/suite.def.cg b/examples/smpi/NAS/config/NAS.samples/suite.def.cg deleted file mode 100644 index 393bc508f6..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/suite.def.cg +++ /dev/null @@ -1,29 +0,0 @@ -cg S 1 -cg S 2 -cg S 4 -cg S 8 -cg S 16 -cg A 1 -cg A 2 -cg A 4 -cg A 8 -cg A 16 -cg A 32 -cg A 64 -cg A 128 -cg B 1 -cg B 2 -cg B 4 -cg B 8 -cg B 16 -cg B 32 -cg B 64 -cg B 128 -cg C 1 -cg C 2 -cg C 4 -cg C 8 -cg C 16 -cg C 32 -cg C 64 -cg C 128 diff --git a/examples/smpi/NAS/config/NAS.samples/suite.def.ft b/examples/smpi/NAS/config/NAS.samples/suite.def.ft deleted file mode 100644 index 6f05189094..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/suite.def.ft +++ /dev/null @@ -1,29 +0,0 @@ -ft S 1 -ft S 2 -ft S 4 -ft S 8 -ft S 16 -ft A 1 -ft A 2 -ft A 4 -ft A 8 -ft A 16 -ft A 32 -ft A 64 -ft A 128 -ft B 1 -ft B 2 -ft B 4 -ft B 8 -ft B 16 -ft B 32 -ft B 64 -ft B 128 -ft C 1 -ft C 2 -ft C 4 -ft C 8 -ft C 16 -ft C 32 -ft C 64 -ft C 128 diff --git a/examples/smpi/NAS/config/NAS.samples/suite.def.lu b/examples/smpi/NAS/config/NAS.samples/suite.def.lu deleted file mode 100644 index 442e0b6842..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/suite.def.lu +++ /dev/null @@ -1,29 +0,0 @@ -lu S 1 -lu S 2 -lu S 4 -lu S 8 -lu S 16 -lu A 1 -lu A 2 -lu A 4 -lu A 8 -lu A 16 -lu A 32 -lu A 64 -lu A 128 -lu B 1 -lu B 2 -lu B 4 -lu B 8 -lu B 16 -lu B 32 -lu B 64 -lu B 128 -lu C 1 -lu C 2 -lu C 4 -lu C 8 -lu C 16 -lu C 32 -lu C 64 -lu C 128 diff --git a/examples/smpi/NAS/config/NAS.samples/suite.def.mg b/examples/smpi/NAS/config/NAS.samples/suite.def.mg deleted file mode 100644 index b5c01d4557..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/suite.def.mg +++ /dev/null @@ -1,29 +0,0 @@ -mg S 1 -mg S 2 -mg S 4 -mg S 8 -mg S 16 -mg A 1 -mg A 2 -mg A 4 -mg A 8 -mg A 16 -mg A 32 -mg A 64 -mg A 128 -mg B 1 -mg B 2 -mg B 4 -mg B 8 -mg B 16 -mg B 32 -mg B 64 -mg B 128 -mg C 1 -mg C 2 -mg C 4 -mg C 8 -mg C 16 -mg C 32 -mg C 64 -mg C 128 diff --git a/examples/smpi/NAS/config/NAS.samples/suite.def.small b/examples/smpi/NAS/config/NAS.samples/suite.def.small deleted file mode 100644 index 5a094042bd..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/suite.def.small +++ /dev/null @@ -1,8 +0,0 @@ -bt S 1 -cg S 1 -ep S 1 -ft S 1 -is S 1 -lu S 1 -mg S 1 -sp S 1 diff --git a/examples/smpi/NAS/config/NAS.samples/suite.def.sp b/examples/smpi/NAS/config/NAS.samples/suite.def.sp deleted file mode 100644 index f8113a256e..0000000000 --- a/examples/smpi/NAS/config/NAS.samples/suite.def.sp +++ /dev/null @@ -1,37 +0,0 @@ -sp S 1 -sp S 4 -sp S 9 -sp S 16 -sp A 1 -sp A 4 -sp A 9 -sp A 16 -sp A 25 -sp A 36 -sp A 49 -sp A 64 -sp A 81 -sp A 100 -sp A 121 -sp B 1 -sp B 4 -sp B 9 -sp B 16 -sp B 25 -sp B 36 -sp B 49 -sp B 64 -sp B 81 -sp B 100 -sp B 121 -sp C 1 -sp C 4 -sp C 9 -sp C 16 -sp C 25 -sp C 36 -sp C 49 -sp C 64 -sp C 81 -sp C 100 -sp C 121 diff --git a/examples/smpi/NAS/config/suite.def.template b/examples/smpi/NAS/config/suite.def.template index aea8b2358e..5e02f30a22 100644 --- a/examples/smpi/NAS/config/suite.def.template +++ b/examples/smpi/NAS/config/suite.def.template @@ -3,8 +3,7 @@ # Typing "make suite" in the main directory will build all the benchmarks # specified in this file. # Each line of this file contains a benchmark name, class, and number -# of nodes. The name is one of "cg", "is", "ep", mg", "ft", "sp", "bt", -# "lu", and "dt". +# of nodes. The name is one of "is", "ep", and "dt" # The class is one of "S", "W", "A", "B", "C", "D", and "E" # (except that no classes C, D and E for DT, and no class E for IS). # The number of nodes must be a legal number for a particular @@ -13,12 +12,6 @@ # Comments start with "#" as the first character on a line. # No blank lines. # The following example builds 1 processor sample sizes of all benchmarks. -ft S 1 -mg S 1 -sp S 1 -lu S 1 -bt S 1 is S 1 ep S 1 -cg S 1 dt S 1