From: pini Date: Thu, 23 Sep 2010 10:01:21 +0000 (+0000) Subject: Added a version of DT with RAM folding. X-Git-Tag: v3_5~616 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/827813f7390e44cb5c35a9d9c6aea722e5d71d6a?ds=sidebyside Added a version of DT with RAM folding. git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/simgrid/simgrid/trunk@8208 48e7efb5-ca39-0410-a469-dd3cf9ba447f --- diff --git a/examples/smpi/NAS/DT-folding/DGraph.c b/examples/smpi/NAS/DT-folding/DGraph.c new file mode 100644 index 0000000000..5d5839df19 --- /dev/null +++ b/examples/smpi/NAS/DT-folding/DGraph.c @@ -0,0 +1,184 @@ +#include +#include +#include + +#include "DGraph.h" + +DGArc *newArc(DGNode *tl,DGNode *hd){ + DGArc *ar=(DGArc *)malloc(sizeof(DGArc)); + ar->tail=tl; + ar->head=hd; + return ar; +} +void arcShow(DGArc *ar){ + DGNode *tl=(DGNode *)ar->tail, + *hd=(DGNode *)ar->head; + fprintf(stderr,"%d. |%s ->%s\n",ar->id,tl->name,hd->name); +} + +DGNode *newNode(char *nm){ + DGNode *nd=(DGNode *)malloc(sizeof(DGNode)); + nd->attribute=0; + nd->color=0; + nd->inDegree=0; + nd->outDegree=0; + nd->maxInDegree=SMALL_BLOCK_SIZE; + nd->maxOutDegree=SMALL_BLOCK_SIZE; + nd->inArc=(DGArc **)malloc(nd->maxInDegree*sizeof(DGArc*)); + nd->outArc=(DGArc **)malloc(nd->maxOutDegree*sizeof(DGArc*)); + nd->name=strdup(nm); + nd->feat=NULL; + return nd; +} +void nodeShow(DGNode* nd){ + fprintf( stderr,"%3d.%s: (%d,%d)\n", + nd->id,nd->name,nd->inDegree,nd->outDegree); +/* + if(nd->verified==1) fprintf(stderr,"%ld.%s\t: usable.",nd->id,nd->name); + else if(nd->verified==0) fprintf(stderr,"%ld.%s\t: unusable.",nd->id,nd->name); + else fprintf(stderr,"%ld.%s\t: notverified.",nd->id,nd->name); +*/ +} + +DGraph* newDGraph(char* nm){ + DGraph *dg=(DGraph *)malloc(sizeof(DGraph)); + dg->numNodes=0; + dg->numArcs=0; + dg->maxNodes=BLOCK_SIZE; + dg->maxArcs=BLOCK_SIZE; + dg->node=(DGNode **)malloc(dg->maxNodes*sizeof(DGNode*)); + dg->arc=(DGArc **)malloc(dg->maxArcs*sizeof(DGArc*)); + dg->name=strdup(nm); + return dg; +} +int AttachNode(DGraph* dg, DGNode* nd) { + int i=0,j,len=0; + DGNode **nds =NULL, *tmpnd=NULL; + DGArc **ar=NULL; + + if (dg->numNodes == dg->maxNodes-1 ) { + dg->maxNodes += BLOCK_SIZE; + nds =(DGNode **) calloc(dg->maxNodes,sizeof(DGNode*)); + memcpy(nds,dg->node,(dg->maxNodes-BLOCK_SIZE)*sizeof(DGNode*)); + free(dg->node); + dg->node=nds; + } + + len = strlen( nd->name); + for (i = 0; i < dg->numNodes; i++) { + tmpnd =dg->node[ i]; + ar=NULL; + if ( strlen( tmpnd->name) != len ) continue; + if ( strncmp( nd->name, tmpnd->name, len) ) continue; + if ( nd->inDegree > 0 ) { + tmpnd->maxInDegree += nd->maxInDegree; + ar =(DGArc **) calloc(tmpnd->maxInDegree,sizeof(DGArc*)); + memcpy(ar,tmpnd->inArc,(tmpnd->inDegree)*sizeof(DGArc*)); + free(tmpnd->inArc); + tmpnd->inArc=ar; + for (j = 0; j < nd->inDegree; j++ ) { + nd->inArc[ j]->head = tmpnd; + } + memcpy( &(tmpnd->inArc[ tmpnd->inDegree]), nd->inArc, nd->inDegree*sizeof( DGArc *)); + tmpnd->inDegree += nd->inDegree; + } + if ( nd->outDegree > 0 ) { + tmpnd->maxOutDegree += nd->maxOutDegree; + ar =(DGArc **) calloc(tmpnd->maxOutDegree,sizeof(DGArc*)); + memcpy(ar,tmpnd->outArc,(tmpnd->outDegree)*sizeof(DGArc*)); + free(tmpnd->outArc); + tmpnd->outArc=ar; + for (j = 0; j < nd->outDegree; j++ ) { + nd->outArc[ j]->tail = tmpnd; + } + memcpy( &(tmpnd->outArc[tmpnd->outDegree]),nd->outArc,nd->outDegree*sizeof( DGArc *)); + tmpnd->outDegree += nd->outDegree; + } + free(nd); + return i; + } + nd->id = dg->numNodes; + dg->node[dg->numNodes] = nd; + dg->numNodes++; +return nd->id; +} +int AttachArc(DGraph *dg,DGArc* nar){ +int arcId = -1; +int i=0,newNumber=0; +DGNode *head = nar->head, + *tail = nar->tail; +DGArc **ars=NULL,*probe=NULL; +/*fprintf(stderr,"AttachArc %ld\n",dg->numArcs); */ + if ( !tail || !head ) return arcId; + if ( dg->numArcs == dg->maxArcs-1 ) { + dg->maxArcs += BLOCK_SIZE; + ars =(DGArc **) calloc(dg->maxArcs,sizeof(DGArc*)); + memcpy(ars,dg->arc,(dg->maxArcs-BLOCK_SIZE)*sizeof(DGArc*)); + free(dg->arc); + dg->arc=ars; + } + for(i = 0; i < tail->outDegree; i++ ) { /* parallel arc */ + probe = tail->outArc[ i]; + if(probe->head == head + && + probe->length == nar->length + ){ + free(nar); + return probe->id; + } + } + + nar->id = dg->numArcs; + arcId=dg->numArcs; + dg->arc[dg->numArcs] = nar; + dg->numArcs++; + + head->inArc[ head->inDegree] = nar; + head->inDegree++; + if ( head->inDegree >= head->maxInDegree ) { + newNumber = head->maxInDegree + SMALL_BLOCK_SIZE; + ars =(DGArc **) calloc(newNumber,sizeof(DGArc*)); + memcpy(ars,head->inArc,(head->inDegree)*sizeof(DGArc*)); + free(head->inArc); + head->inArc=ars; + head->maxInDegree = newNumber; + } + tail->outArc[ tail->outDegree] = nar; + tail->outDegree++; + if(tail->outDegree >= tail->maxOutDegree ) { + newNumber = tail->maxOutDegree + SMALL_BLOCK_SIZE; + ars =(DGArc **) calloc(newNumber,sizeof(DGArc*)); + memcpy(ars,tail->outArc,(tail->outDegree)*sizeof(DGArc*)); + free(tail->outArc); + tail->outArc=ars; + tail->maxOutDegree = newNumber; + } +/*fprintf(stderr,"AttachArc: head->in=%d tail->out=%ld\n",head->inDegree,tail->outDegree);*/ +return arcId; +} +void graphShow(DGraph *dg,int DetailsLevel){ + int i=0,j=0; + fprintf(stderr,"%d.%s: (%d,%d)\n",dg->id,dg->name,dg->numNodes,dg->numArcs); + if ( DetailsLevel < 1) return; + for (i = 0; i < dg->numNodes; i++ ) { + DGNode *focusNode = dg->node[ i]; + if(DetailsLevel >= 2) { + for (j = 0; j < focusNode->inDegree; j++ ) { + fprintf(stderr,"\t "); + nodeShow(focusNode->inArc[ j]->tail); + } + } + nodeShow(focusNode); + if ( DetailsLevel < 2) continue; + for (j = 0; j < focusNode->outDegree; j++ ) { + fprintf(stderr, "\t "); + nodeShow(focusNode->outArc[ j]->head); + } + fprintf(stderr, "---\n"); + } + fprintf(stderr,"----------------------------------------\n"); + if ( DetailsLevel < 3) return; +} + + + diff --git a/examples/smpi/NAS/DT-folding/DGraph.h b/examples/smpi/NAS/DT-folding/DGraph.h new file mode 100644 index 0000000000..f38f898b24 --- /dev/null +++ b/examples/smpi/NAS/DT-folding/DGraph.h @@ -0,0 +1,43 @@ +#ifndef _DGRAPH +#define _DGRAPH + +#define BLOCK_SIZE 128 +#define SMALL_BLOCK_SIZE 32 + +typedef struct{ + int id; + void *tail,*head; + int length,width,attribute,maxWidth; +}DGArc; + +typedef struct{ + int maxInDegree,maxOutDegree; + int inDegree,outDegree; + int id; + char *name; + DGArc **inArc,**outArc; + int depth,height,width; + int color,attribute,address,verified; + void *feat; +}DGNode; + +typedef struct{ + int maxNodes,maxArcs; + int id; + char *name; + int numNodes,numArcs; + DGNode **node; + DGArc **arc; +} DGraph; + +DGArc *newArc(DGNode *tl,DGNode *hd); +void arcShow(DGArc *ar); +DGNode *newNode(char *nm); +void nodeShow(DGNode* nd); + +DGraph* newDGraph(char *nm); +int AttachNode(DGraph *dg,DGNode *nd); +int AttachArc(DGraph *dg,DGArc* nar); +void graphShow(DGraph *dg,int DetailsLevel); + +#endif diff --git a/examples/smpi/NAS/DT-folding/Makefile b/examples/smpi/NAS/DT-folding/Makefile new file mode 100644 index 0000000000..d1b3702916 --- /dev/null +++ b/examples/smpi/NAS/DT-folding/Makefile @@ -0,0 +1,26 @@ +SHELL=/bin/sh +BENCHMARK=dt +BENCHMARKU=DT + +include ../config/make.def + +include ../sys/make.common +#Override PROGRAM +DTPROGRAM = $(BINDIR)/$(BENCHMARK)-folding.$(CLASS) + +OBJS = dt.o DGraph.o \ + ${COMMON}/c_print_results.o ${COMMON}/c_timers.o ${COMMON}/c_randdp.o + + +${PROGRAM}: config ${OBJS} + ${CLINK} ${CLINKFLAGS} -o ${DTPROGRAM} ${OBJS} ${CMPI_LIB} + +.c.o: + ${CCOMPILE} $< + +dt.o: dt.c npbparams.h +DGraph.o: DGraph.c DGraph.h + +clean: + - rm -f *.o *~ mputil* + - rm -f dt npbparams.h core diff --git a/examples/smpi/NAS/DT-folding/README b/examples/smpi/NAS/DT-folding/README new file mode 100644 index 0000000000..873e3ae6f2 --- /dev/null +++ b/examples/smpi/NAS/DT-folding/README @@ -0,0 +1,22 @@ +Data Traffic benchmark DT is new in the NPB suite +(released as part of NPB3.x-MPI package). +---------------------------------------------------- + +DT is written in C and same executable can run on any number of processors, +provided this number is not less than the number of nodes in the communication +graph. DT benchmark takes one argument: BH, WH, or SH. This argument +specifies the communication graph Black Hole, White Hole, or SHuffle +respectively. The current release contains verification numbers for +CLASSES S, W, A, and B only. Classes C and D are defined, but verification +numbers are not provided in this release. + +The following table summarizes the number of nodes in the communication +graph based on CLASS and graph TYPE. + +CLASS N_Source N_Nodes(BH,WH) N_Nodes(SH) + S 4 5 12 + W 8 11 32 + A 16 21 80 + B 32 43 192 + C 64 85 448 + D 128 171 1024 diff --git a/examples/smpi/NAS/DT-folding/dt.c b/examples/smpi/NAS/DT-folding/dt.c new file mode 100644 index 0000000000..d080bde52b --- /dev/null +++ b/examples/smpi/NAS/DT-folding/dt.c @@ -0,0 +1,759 @@ +/************************************************************************* + * * + * N A S P A R A L L E L B E N C H M A R K S 3.3 * + * * + * D T * + * * + ************************************************************************* + * * + * This benchmark is part of the NAS Parallel Benchmark 3.3 suite. * + * * + * Permission to use, copy, distribute and modify this software * + * for any purpose with or without fee is hereby granted. We * + * request, however, that all derived work reference the NAS * + * Parallel Benchmarks 3.3. This software is provided "as is" * + * without express or implied warranty. * + * * + * Information on NPB 3.3, including the technical report, the * + * original specifications, source code, results and information * + * on how to submit new results, is available at: * + * * + * http: www.nas.nasa.gov/Software/NPB * + * * + * Send comments or suggestions to npb@nas.nasa.gov * + * Send bug reports to npb-bugs@nas.nasa.gov * + * * + * NAS Parallel Benchmarks Group * + * NASA Ames Research Center * + * Mail Stop: T27A-1 * + * Moffett Field, CA 94035-1000 * + * * + * E-mail: npb@nas.nasa.gov * + * Fax: (650) 604-3957 * + * * + ************************************************************************* + * * + * Author: M. Frumkin * * + * * + *************************************************************************/ + +#include +#include +#include + +#include "mpi.h" +#include "npbparams.h" + +#ifndef CLASS +#define CLASS 'S' +#define NUM_PROCS 1 +#endif + +//int passed_verification; +extern double randlc( double *X, double *A ); +extern +void c_print_results( char *name, + char class, + int n1, + int n2, + int n3, + int niter, + int nprocs_compiled, + int nprocs_total, + double t, + double mops, + char *optype, + int passed_verification, + char *npbversion, + char *compiletime, + char *mpicc, + char *clink, + char *cmpi_lib, + char *cmpi_inc, + char *cflags, + char *clinkflags ); + +void timer_clear( int n ); +void timer_start( int n ); +void timer_stop( int n ); +double timer_read( int n ); +int timer_on=0,timers_tot=64; + +int verify(char *bmname,double rnm2){ + double verify_value=0.0; + double epsilon=1.0E-8; + char cls=CLASS; + int verified=-1; + if (cls != 'U') { + if(cls=='S') { + if(strstr(bmname,"BH")){ + verify_value=30892725.0; + }else if(strstr(bmname,"WH")){ + verify_value=67349758.0; + }else if(strstr(bmname,"SH")){ + verify_value=58875767.0; + }else{ + fprintf(stderr,"No such benchmark as %s.\n",bmname); + } + verified = 0; + }else if(cls=='W') { + if(strstr(bmname,"BH")){ + verify_value = 4102461.0; + }else if(strstr(bmname,"WH")){ + verify_value = 204280762.0; + }else if(strstr(bmname,"SH")){ + verify_value = 186944764.0; + }else{ + fprintf(stderr,"No such benchmark as %s.\n",bmname); + } + verified = 0; + }else if(cls=='A') { + if(strstr(bmname,"BH")){ + verify_value = 17809491.0; + }else if(strstr(bmname,"WH")){ + verify_value = 1289925229.0; + }else if(strstr(bmname,"SH")){ + verify_value = 610856482.0; + }else{ + fprintf(stderr,"No such benchmark as %s.\n",bmname); + } + verified = 0; + }else if(cls=='B') { + if(strstr(bmname,"BH")){ + verify_value = 4317114.0; + }else if(strstr(bmname,"WH")){ + verify_value = 7877279917.0; + }else if(strstr(bmname,"SH")){ + verify_value = 1836863082.0; + }else{ + fprintf(stderr,"No such benchmark as %s.\n",bmname); + verified = 0; + } + }else if(cls=='C') { + if(strstr(bmname,"BH")){ + verify_value = 0.0; + }else if(strstr(bmname,"WH")){ + verify_value = 0.0; + }else if(strstr(bmname,"SH")){ + verify_value = 0.0; + }else{ + fprintf(stderr,"No such benchmark as %s.\n",bmname); + verified = -1; + } + }else if(cls=='D') { + if(strstr(bmname,"BH")){ + verify_value = 0.0; + }else if(strstr(bmname,"WH")){ + verify_value = 0.0; + }else if(strstr(bmname,"SH")){ + verify_value = 0.0; + }else{ + fprintf(stderr,"No such benchmark as %s.\n",bmname); + } + verified = -1; + }else{ + fprintf(stderr,"No such class as %c.\n",cls); + } + fprintf(stderr," %s L2 Norm = %f\n",bmname,rnm2); + if(verified==-1){ + fprintf(stderr," No verification was performed.\n"); + }else if( rnm2 - verify_value < epsilon && + rnm2 - verify_value > -epsilon) { /* abs here does not work on ALTIX */ + verified = 1; + fprintf(stderr," Deviation = %f\n",(rnm2 - verify_value)); + }else{ + verified = 0; + fprintf(stderr," The correct verification value = %f\n",verify_value); + fprintf(stderr," Got value = %f\n",rnm2); + } + }else{ + verified = -1; + } + return verified; + } + +int ipowMod(int a,long long int n,int md){ + int seed=1,q=a,r=1; + if(n<0){ + fprintf(stderr,"ipowMod: exponent must be nonnegative exp=%lld\n",n); + n=-n; /* temp fix */ +/* return 1; */ + } + if(md<=0){ + fprintf(stderr,"ipowMod: module must be positive mod=%d",md); + return 1; + } + if(n==0) return 1; + while(n>1){ + int n2 = n/2; + if (n2*2==n){ + seed = (q*q)%md; + q=seed; + n = n2; + }else{ + seed = (r*q)%md; + r=seed; + n = n-1; + } + } + seed = (r*q)%md; + return seed; +} + +#include "DGraph.h" +DGraph *buildSH(char cls){ +/* + Nodes of the graph must be topologically sorted + to avoid MPI deadlock. +*/ + DGraph *dg; + int numSources=NUM_SOURCES; /* must be power of 2 */ + int numOfLayers=0,tmpS=numSources>>1; + int firstLayerNode=0; + DGArc *ar=NULL; + DGNode *nd=NULL; + int mask=0x0,ndid=0,ndoff=0; + int i=0,j=0; + char nm[BLOCK_SIZE]; + + sprintf(nm,"DT_SH.%c",cls); + dg=newDGraph(nm); + + while(tmpS>1){ + numOfLayers++; + tmpS>>=1; + } + for(i=0;inode[ndid],nd); + AttachArc(dg,ar); + ndoff+=mask; + ndid=firstLayerNode+ndoff; + ar=newArc(dg->node[ndid],nd); + AttachArc(dg,ar); + } + firstLayerNode+=numSources; + } + mask=0x00000001<node[ndid],nd); + AttachArc(dg,ar); + ndoff+=mask; + ndid=firstLayerNode+ndoff; + ar=newArc(dg->node[ndid],nd); + AttachArc(dg,ar); + } +return dg; +} +DGraph *buildWH(char cls){ +/* + Nodes of the graph must be topologically sorted + to avoid MPI deadlock. +*/ + int i=0,j=0; + int numSources=NUM_SOURCES,maxInDeg=4; + int numLayerNodes=numSources,firstLayerNode=0; + int totComparators=0; + int numPrevLayerNodes=numLayerNodes; + int id=0,sid=0; + DGraph *dg; + DGNode *nd=NULL,*source=NULL,*tmp=NULL,*snd=NULL; + DGArc *ar=NULL; + char nm[BLOCK_SIZE]; + + sprintf(nm,"DT_WH.%c",cls); + dg=newDGraph(nm); + + for(i=0;imaxInDeg){ + numLayerNodes=numLayerNodes/maxInDeg; + if(numLayerNodes*maxInDeg=numPrevLayerNodes) break; + snd=dg->node[firstLayerNode+sid]; + ar=newArc(dg->node[id],snd); + AttachArc(dg,ar); + } + } + firstLayerNode+=numPrevLayerNodes; + numPrevLayerNodes=numLayerNodes; + } + source=newNode("Source"); + AttachNode(dg,source); + for(i=0;inode[firstLayerNode+i]; + ar=newArc(source,nd); + AttachArc(dg,ar); + } + + for(i=0;inumNodes/2;i++){ /* Topological sorting */ + tmp=dg->node[i]; + dg->node[i]=dg->node[dg->numNodes-1-i]; + dg->node[i]->id=i; + dg->node[dg->numNodes-1-i]=tmp; + dg->node[dg->numNodes-1-i]->id=dg->numNodes-1-i; + } +return dg; +} +DGraph *buildBH(char cls){ +/* + Nodes of the graph must be topologically sorted + to avoid MPI deadlock. +*/ + int i=0,j=0; + int numSources=NUM_SOURCES,maxInDeg=4; + int numLayerNodes=numSources,firstLayerNode=0; + DGraph *dg; + DGNode *nd=NULL, *snd=NULL, *sink=NULL; + DGArc *ar=NULL; + int totComparators=0; + int numPrevLayerNodes=numLayerNodes; + int id=0, sid=0; + char nm[BLOCK_SIZE]; + + sprintf(nm,"DT_BH.%c",cls); + dg=newDGraph(nm); + + for(i=0;imaxInDeg){ + numLayerNodes=numLayerNodes/maxInDeg; + if(numLayerNodes*maxInDeg=numPrevLayerNodes) break; + snd=dg->node[firstLayerNode+sid]; + ar=newArc(snd,dg->node[id]); + AttachArc(dg,ar); + } + } + firstLayerNode+=numPrevLayerNodes; + numPrevLayerNodes=numLayerNodes; + } + sink=newNode("Sink"); + AttachNode(dg,sink); + for(i=0;inode[firstLayerNode+i]; + ar=newArc(nd,sink); + AttachArc(dg,ar); + } +return dg; +} + +typedef struct{ + int len; + double* val; +} Arr; +Arr *newArr(int len){ + Arr *arr=(Arr *)SMPI_SHARED_MALLOC(sizeof(Arr)); + arr->len=len; + arr->val=(double *)SMPI_SHARED_MALLOC(len*sizeof(double)); + return arr; +} +void arrShow(Arr* a){ + if(!a) fprintf(stderr,"-- NULL array\n"); + else{ + fprintf(stderr,"-- length=%d\n",a->len); + } +} +double CheckVal(Arr *feat){ + double csum=0.0; + int i=0; + for(i=0;ilen;i++){ + csum+=feat->val[i]*feat->val[i]/feat->len; /* The truncation does not work since + result will be 0 for large len */ + } + return csum; +} +int GetFNumDPar(int* mean, int* stdev){ + *mean=NUM_SAMPLES; + *stdev=STD_DEVIATION; + return 0; +} +int GetFeatureNum(char *mbname,int id){ + double tran=314159265.0; + double A=2*id+1; + double denom=randlc(&tran,&A); + char cval='S'; + int mean=NUM_SAMPLES,stdev=128; + int rtfs=0,len=0; + GetFNumDPar(&mean,&stdev); + rtfs=ipowMod((int)(1/denom)*(int)cval,(long long int) (2*id+1),2*stdev); + if(rtfs<0) rtfs=-rtfs; + len=mean-stdev+rtfs; + return len; +} +Arr* RandomFeatures(char *bmname,int fdim,int id){ + int len=GetFeatureNum(bmname,id)*fdim; + Arr* feat=newArr(len); + int nxg=2,nyg=2,nzg=2,nfg=5; + int nx=421,ny=419,nz=1427,nf=3527; + long long int expon=(len*(id+1))%3141592; + int seedx=ipowMod(nxg,expon,nx), + seedy=ipowMod(nyg,expon,ny), + seedz=ipowMod(nzg,expon,nz), + seedf=ipowMod(nfg,expon,nf); + int i=0; + if(timer_on){ + timer_clear(id+1); + timer_start(id+1); + } + for(i=0;ival[i]=seedx; + feat->val[i+1]=seedy; + feat->val[i+2]=seedz; + feat->val[i+3]=seedf; + } + if(timer_on){ + timer_stop(id+1); + fprintf(stderr,"** RandomFeatures time in node %d = %f\n",id,timer_read(id+1)); + } + return feat; +} +void Resample(Arr *a,int blen){ + long long int i=0,j=0,jlo=0,jhi=0; + double avval=0.0; + double *nval=(double *)SMPI_SHARED_MALLOC(blen*sizeof(double)); + Arr *tmp=newArr(10); + for(i=0;ilen-1;i++){ + jlo=(int)(0.5*(2*i-1)*(blen/a->len)); + jhi=(int)(0.5*(2*i+1)*(blen/a->len)); + + avval=a->val[i]/(jhi-jlo+1); + for(j=jlo;j<=jhi;j++){ + nval[j]+=avval; + } + } + nval[0]=a->val[0]; + nval[blen-1]=a->val[a->len-1]; + SMPI_SHARED_FREE(a->val); + a->val=nval; + a->len=blen; +} +#define fielddim 4 +Arr* WindowFilter(Arr *a, Arr* b,int w){ + int i=0,j=0,k=0; + double rms0=0.0,rms1=0.0,rmsm1=0.0; + double weight=((double) (w+1))/(w+2); + + w+=1; + if(timer_on){ + timer_clear(w); + timer_start(w); + } + if(a->lenlen) Resample(a,b->len); + if(a->len>b->len) Resample(b,a->len); + for(i=fielddim;ilen-fielddim;i+=fielddim){ + rms0=(a->val[i]-b->val[i])*(a->val[i]-b->val[i]) + +(a->val[i+1]-b->val[i+1])*(a->val[i+1]-b->val[i+1]) + +(a->val[i+2]-b->val[i+2])*(a->val[i+2]-b->val[i+2]) + +(a->val[i+3]-b->val[i+3])*(a->val[i+3]-b->val[i+3]); + j=i+fielddim; + rms1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j]) + +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1]) + +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2]) + +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]); + j=i-fielddim; + rmsm1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j]) + +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1]) + +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2]) + +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]); + k=0; + if(rms1val[i]=weight*b->val[i]; + a->val[i+1]=weight*b->val[i+1]; + a->val[i+2]=weight*b->val[i+2]; + a->val[i+3]=weight*b->val[i+3]; + }else if(k==1){ + j=i+fielddim; + a->val[i]=weight*b->val[j]; + a->val[i+1]=weight*b->val[j+1]; + a->val[i+2]=weight*b->val[j+2]; + a->val[i+3]=weight*b->val[j+3]; + }else { /*if(k==-1)*/ + j=i-fielddim; + a->val[i]=weight*b->val[j]; + a->val[i+1]=weight*b->val[j+1]; + a->val[i+2]=weight*b->val[j+2]; + a->val[i+3]=weight*b->val[j+3]; + } + } + if(timer_on){ + timer_stop(w); + fprintf(stderr,"** WindowFilter time in node %d = %f\n",(w-1),timer_read(w)); + } + return a; +} + +int SendResults(DGraph *dg,DGNode *nd,Arr *feat){ + int i=0,tag=0; + DGArc *ar=NULL; + DGNode *head=NULL; + if(!feat) return 0; + for(i=0;ioutDegree;i++){ + ar=nd->outArc[i]; + if(ar->tail!=nd) continue; + head=ar->head; + tag=ar->id; + if(head->address!=nd->address){ + MPI_Send(&feat->len,1,MPI_INT,head->address,tag,MPI_COMM_WORLD); + MPI_Send(feat->val,feat->len,MPI_DOUBLE,head->address,tag,MPI_COMM_WORLD); + } + } + return 1; +} +Arr* CombineStreams(DGraph *dg,DGNode *nd){ + Arr *resfeat=newArr(NUM_SAMPLES*fielddim); + int i=0,len=0,tag=0; + DGArc *ar=NULL; + DGNode *tail=NULL; + MPI_Status status; + Arr *feat=NULL,*featp=NULL; + + if(nd->inDegree==0) return NULL; + for(i=0;iinDegree;i++){ + ar=nd->inArc[i]; + if(ar->head!=nd) continue; + tail=ar->tail; + if(tail->address!=nd->address){ + len=0; + tag=ar->id; + MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status); + feat=newArr(len); + MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status); + resfeat=WindowFilter(resfeat,feat,nd->id); + SMPI_SHARED_FREE(feat); + }else{ + featp=(Arr *)tail->feat; + feat=newArr(featp->len); + memcpy(feat->val,featp->val,featp->len*sizeof(double)); + resfeat=WindowFilter(resfeat,feat,nd->id); + SMPI_SHARED_FREE(feat); + } + } + for(i=0;ilen;i++) resfeat->val[i]=((int)resfeat->val[i])/nd->inDegree; + nd->feat=resfeat; + return nd->feat; +} +double Reduce(Arr *a,int w){ + double retv=0.0; + if(timer_on){ + timer_clear(w); + timer_start(w); + } + retv=(int)(w*CheckVal(a));/* The casting needed for node + and array dependent verifcation */ + if(timer_on){ + timer_stop(w); + fprintf(stderr,"** Reduce time in node %d = %f\n",(w-1),timer_read(w)); + } + return retv; +} + +double ReduceStreams(DGraph *dg,DGNode *nd){ + double csum=0.0; + int i=0,len=0,tag=0; + DGArc *ar=NULL; + DGNode *tail=NULL; + Arr *feat=NULL; + double retv=0.0; + + for(i=0;iinDegree;i++){ + ar=nd->inArc[i]; + if(ar->head!=nd) continue; + tail=ar->tail; + if(tail->address!=nd->address){ + MPI_Status status; + len=0; + tag=ar->id; + MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status); + feat=newArr(len); + MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status); + csum+=Reduce(feat,(nd->id+1)); + SMPI_SHARED_FREE(feat); + }else{ + csum+=Reduce(tail->feat,(nd->id+1)); + } + } + if(nd->inDegree>0)csum=(((long long int)csum)/nd->inDegree); + retv=(nd->id+1)*csum; + return retv; +} + +int ProcessNodes(DGraph *dg,int me){ + double chksum=0.0; + Arr *feat=NULL; + int i=0,verified=0,tag; + DGNode *nd=NULL; + double rchksum=0.0; + MPI_Status status; + + for(i=0;inumNodes;i++){ + nd=dg->node[i]; + if(nd->address!=me) continue; + if(strstr(nd->name,"Source")){ + nd->feat=RandomFeatures(dg->name,fielddim,nd->id); + SendResults(dg,nd,nd->feat); + }else if(strstr(nd->name,"Sink")){ + chksum=ReduceStreams(dg,nd); + tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */ + MPI_Send(&chksum,1,MPI_DOUBLE,0,tag,MPI_COMM_WORLD); + }else{ + feat=CombineStreams(dg,nd); + SendResults(dg,nd,feat); + } + } + if(me==0){ /* Report node */ + rchksum=0.0; + chksum=0.0; + for(i=0;inumNodes;i++){ + nd=dg->node[i]; + if(!strstr(nd->name,"Sink")) continue; + tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */ + MPI_Recv(&rchksum,1,MPI_DOUBLE,nd->address,tag,MPI_COMM_WORLD,&status); + chksum+=rchksum; + } + verified=verify(dg->name,chksum); + } +return verified; +} + +int main(int argc,char **argv ){ + int my_rank,comm_size; + int i; + DGraph *dg=NULL; + int verified=0, featnum=0; + double bytes_sent=2.0,tot_time=0.0; + + MPI_Init( &argc, &argv ); + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); + + if(argc!=2|| + ( strncmp(argv[1],"BH",2)!=0 + &&strncmp(argv[1],"WH",2)!=0 + &&strncmp(argv[1],"SH",2)!=0 + ) + ){ + if(my_rank==0){ + fprintf(stderr,"** Usage: mpirun -np N ../bin/dt.S GraphName\n"); + fprintf(stderr,"** Where \n - N is integer number of MPI processes\n"); + fprintf(stderr," - S is the class S, W, or A \n"); + fprintf(stderr," - GraphName is the communication graph name BH, WH, or SH.\n"); + fprintf(stderr," - the number of MPI processes N should not be be less than \n"); + fprintf(stderr," the number of nodes in the graph\n"); + } + MPI_Finalize(); + exit(0); + } + if(strncmp(argv[1],"BH",2)==0){ + dg=buildBH(CLASS); + }else if(strncmp(argv[1],"WH",2)==0){ + dg=buildWH(CLASS); + }else if(strncmp(argv[1],"SH",2)==0){ + dg=buildSH(CLASS); + } + + if(timer_on&&dg->numNodes+1>timers_tot){ + timer_on=0; + if(my_rank==0) + fprintf(stderr,"Not enough timers. Node timeing is off. \n"); + } + if(dg->numNodes>comm_size){ + if(my_rank==0){ + fprintf(stderr,"** The number of MPI processes should not be less than \n"); + fprintf(stderr,"** the number of nodes in the graph\n"); + fprintf(stderr,"** Number of MPI processes = %d\n",comm_size); + fprintf(stderr,"** Number nodes in the graph = %d\n",dg->numNodes); + } + MPI_Finalize(); + exit(0); + } + for(i=0;inumNodes;i++){ + dg->node[i]->address=i; + } + if( my_rank == 0 ){ + printf( "\n\n NAS Parallel Benchmarks 3.3 -- DT Benchmark\n\n" ); + graphShow(dg,0); + timer_clear(0); + timer_start(0); + } + verified=ProcessNodes(dg,my_rank); + + featnum=NUM_SAMPLES*fielddim; + bytes_sent=featnum*dg->numArcs; + bytes_sent/=1048576; + if(my_rank==0){ + timer_stop(0); + tot_time=timer_read(0); + c_print_results( dg->name, + CLASS, + featnum, + 0, + 0, + dg->numNodes, + 0, + comm_size, + tot_time, + bytes_sent/tot_time, + "bytes transmitted", + verified, + NPBVERSION, + COMPILETIME, + MPICC, + CLINK, + CMPI_LIB, + CMPI_INC, + CFLAGS, + CLINKFLAGS ); + } + MPI_Finalize(); + return 1; +} diff --git a/examples/smpi/NAS/Makefile b/examples/smpi/NAS/Makefile index 0386e2d556..794191c451 100644 --- a/examples/smpi/NAS/Makefile +++ b/examples/smpi/NAS/Makefile @@ -56,6 +56,10 @@ DT-trace: dt-trace dt-trace: header cd DT-trace; $(MAKE) CLASS=$(CLASS) +DT-folding: dt-folding +dt-folding: header + cd DT-folding; $(MAKE) CLASS=$(CLASS) + # Awk script courtesy cmg@cray.com, modified by Haoqiang Jin suite: @ awk -f sys/suite.awk SMAKE=$(MAKE) $(SFILE) | $(SHELL)