From ff6722c4292f04aef9e1417b5698881f1fd414eb Mon Sep 17 00:00:00 2001 From: Frederic Suter Date: Thu, 10 Mar 2016 16:26:31 +0100 Subject: [PATCH 1/1] reindent and only one timer type --- examples/smpi/NAS/DGraph.c | 2 + examples/smpi/NAS/dt.c | 485 ++++++++++++++++----------------- examples/smpi/NAS/ep.c | 56 ++-- examples/smpi/NAS/is.c | 3 +- examples/smpi/NAS/nas_common.c | 16 -- examples/smpi/NAS/nas_common.h | 6 +- 6 files changed, 260 insertions(+), 308 deletions(-) diff --git a/examples/smpi/NAS/DGraph.c b/examples/smpi/NAS/DGraph.c index 6dd0ed353c..530bf3ee94 100644 --- a/examples/smpi/NAS/DGraph.c +++ b/examples/smpi/NAS/DGraph.c @@ -10,6 +10,7 @@ DGArc *newArc(DGNode *tl,DGNode *hd){ ar->head=hd; return ar; } + void arcShow(DGArc *ar){ DGNode *tl=(DGNode *)ar->tail, *hd=(DGNode *)ar->head; @@ -30,6 +31,7 @@ DGNode *newNode(char *nm){ nd->feat=NULL; return nd; } + void nodeShow(DGNode* nd){ fprintf( stderr,"%3d.%s: (%d,%d)\n", nd->id,nd->name,nd->inDegree,nd->outDegree); /* diff --git a/examples/smpi/NAS/dt.c b/examples/smpi/NAS/dt.c index ca3a18a942..06625ea782 100644 --- a/examples/smpi/NAS/dt.c +++ b/examples/smpi/NAS/dt.c @@ -1,51 +1,51 @@ /************************************************************************* * * - * N A S P A R A L L E L B E N C H M A R K S 3.3 * - * * - * D T * - * * + * N A S P A R A L L E L B E N C H M A R K S 3.3 * + * * + * D T * + * * ************************************************************************* - * * - * This benchmark is part of the NAS Parallel Benchmark 3.3 suite. * - * * - * Permission to use, copy, distribute and modify this software * - * for any purpose with or without fee is hereby granted. We * - * request, however, that all derived work reference the NAS * - * Parallel Benchmarks 3.3. This software is provided "as is" * - * without express or implied warranty. * - * * - * Information on NPB 3.3, including the technical report, the * - * original specifications, source code, results and information * - * on how to submit new results, is available at: * - * * - * http: www.nas.nasa.gov/Software/NPB * - * * - * Send comments or suggestions to npb@nas.nasa.gov * - * Send bug reports to npb-bugs@nas.nasa.gov * - * * - * NAS Parallel Benchmarks Group * - * NASA Ames Research Center * - * Mail Stop: T27A-1 * - * Moffett Field, CA 94035-1000 * - * * - * E-mail: npb@nas.nasa.gov * - * Fax: (650) 604-3957 * - * * + * * + * This benchmark is part of the NAS Parallel Benchmark 3.3 suite. * + * * + * Permission to use, copy, distribute and modify this software * + * for any purpose with or without fee is hereby granted. We * + * request, however, that all derived work reference the NAS * + * Parallel Benchmarks 3.3. This software is provided "as is" * + * without express or implied warranty. * + * * + * Information on NPB 3.3, including the technical report, the * + * original specifications, source code, results and information * + * on how to submit new results, is available at: * + * * + * http: www.nas.nasa.gov/Software/NPB * + * * + * Send comments or suggestions to npb@nas.nasa.gov * + * Send bug reports to npb-bugs@nas.nasa.gov * + * * + * NAS Parallel Benchmarks Group * + * NASA Ames Research Center * + * Mail Stop: T27A-1 * + * Moffett Field, CA 94035-1000 * + * * + * E-mail: npb@nas.nasa.gov * + * Fax: (650) 604-3957 * + * * ************************************************************************* - * * - * Author: M. Frumkin * * - * * + * * + * Author: M. Frumkin * * + * * *************************************************************************/ #include #include #include +#include "DGraph.h" #include "smpi/mpi.h" #include "nas_common.h" #include "simgrid/instr.h" //TRACE_ - int timer_on=0,timers_tot=64; double start[64], elapsed[64]; @@ -56,103 +56,102 @@ int deviation; int num_sources; static int verify(char *bmname,double rnm2){ - double verify_value=0.0; - double epsilon=1.0E-8; - int verified=-1; - if (class != 'U') { - if(class=='S') { - if(strstr(bmname,"BH")){ - verify_value=30892725.0; - }else if(strstr(bmname,"WH")){ - verify_value=67349758.0; - }else if(strstr(bmname,"SH")){ - verify_value=58875767.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - } - verified = 0; - }else if(class=='W') { - if(strstr(bmname,"BH")){ - verify_value = 4102461.0; - }else if(strstr(bmname,"WH")){ - verify_value = 204280762.0; - }else if(strstr(bmname,"SH")){ - verify_value = 186944764.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - } - verified = 0; - }else if(class=='A') { - if(strstr(bmname,"BH")){ - verify_value = 17809491.0; - }else if(strstr(bmname,"WH")){ - verify_value = 1289925229.0; - }else if(strstr(bmname,"SH")){ - verify_value = 610856482.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - } + double verify_value=0.0; + double epsilon=1.0E-8; + int verified=-1; + if (class != 'U') { + if(class=='S') { + if(strstr(bmname,"BH")){ + verify_value=30892725.0; + }else if(strstr(bmname,"WH")){ + verify_value=67349758.0; + }else if(strstr(bmname,"SH")){ + verify_value=58875767.0; + }else{ + fprintf(stderr,"No such benchmark as %s.\n",bmname); + } verified = 0; - }else if(class=='B') { - if(strstr(bmname,"BH")){ - verify_value = 4317114.0; - }else if(strstr(bmname,"WH")){ - verify_value = 7877279917.0; - }else if(strstr(bmname,"SH")){ - verify_value = 1836863082.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - verified = 0; - } - }else if(class=='C') { - if(strstr(bmname,"BH")){ - verify_value = 0.0; - }else if(strstr(bmname,"WH")){ - verify_value = 0.0; - }else if(strstr(bmname,"SH")){ - verify_value = 0.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - verified = -1; - } - }else if(class=='D') { - if(strstr(bmname,"BH")){ - verify_value = 0.0; - }else if(strstr(bmname,"WH")){ - verify_value = 0.0; - }else if(strstr(bmname,"SH")){ - verify_value = 0.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - } - verified = -1; - }else{ - fprintf(stderr,"No such class as %c.\n",class); - } - fprintf(stderr," %s L2 Norm = %f\n",bmname,rnm2); - if(verified==-1){ - fprintf(stderr," No verification was performed.\n"); - }else if( rnm2 - verify_value < epsilon && - rnm2 - verify_value > -epsilon) { /* abs here does not work on ALTIX */ + }else if(class=='W') { + if(strstr(bmname,"BH")){ + verify_value = 4102461.0; + }else if(strstr(bmname,"WH")){ + verify_value = 204280762.0; + }else if(strstr(bmname,"SH")){ + verify_value = 186944764.0; + }else{ + fprintf(stderr,"No such benchmark as %s.\n",bmname); + } + verified = 0; + }else if(class=='A') { + if(strstr(bmname,"BH")){ + verify_value = 17809491.0; + }else if(strstr(bmname,"WH")){ + verify_value = 1289925229.0; + }else if(strstr(bmname,"SH")){ + verify_value = 610856482.0; + }else{ + fprintf(stderr,"No such benchmark as %s.\n",bmname); + } + verified = 0; + }else if(class=='B') { + if(strstr(bmname,"BH")){ + verify_value = 4317114.0; + }else if(strstr(bmname,"WH")){ + verify_value = 7877279917.0; + }else if(strstr(bmname,"SH")){ + verify_value = 1836863082.0; + }else{ + fprintf(stderr,"No such benchmark as %s.\n",bmname); + verified = 0; + } + }else if(class=='C') { + if(strstr(bmname,"BH")){ + verify_value = 0.0; + }else if(strstr(bmname,"WH")){ + verify_value = 0.0; + }else if(strstr(bmname,"SH")){ + verify_value = 0.0; + }else{ + fprintf(stderr,"No such benchmark as %s.\n",bmname); + verified = -1; + } + }else if(class=='D') { + if(strstr(bmname,"BH")){ + verify_value = 0.0; + }else if(strstr(bmname,"WH")){ + verify_value = 0.0; + }else if(strstr(bmname,"SH")){ + verify_value = 0.0; + }else{ + fprintf(stderr,"No such benchmark as %s.\n",bmname); + } + verified = -1; + }else{ + fprintf(stderr,"No such class as %c.\n",class); + } + fprintf(stderr," %s L2 Norm = %f\n",bmname,rnm2); + if(verified==-1){ + fprintf(stderr," No verification was performed.\n"); + }else if( rnm2 - verify_value < epsilon && rnm2 - verify_value > -epsilon) { /* abs here does not work on ALTIX */ verified = 1; fprintf(stderr," Deviation = %f\n",(rnm2 - verify_value)); - }else{ - verified = 0; - fprintf(stderr," The correct verification value = %f\n",verify_value); - fprintf(stderr," Got value = %f\n",rnm2); - } }else{ - verified = -1; + verified = 0; + fprintf(stderr," The correct verification value = %f\n",verify_value); + fprintf(stderr," Got value = %f\n",rnm2); } - return verified; + }else{ + verified = -1; } + return verified; +} static int ipowMod(int a,long long int n,int md){ int seed=1,q=a,r=1; if(n<0){ fprintf(stderr,"ipowMod: exponent must be nonnegative exp=%lld\n",n); n=-n; /* temp fix */ -/* return 1; */ + /* return 1; */ } if(md<=0){ fprintf(stderr,"ipowMod: module must be positive mod=%d",md); @@ -162,25 +161,21 @@ static int ipowMod(int a,long long int n,int md){ while(n>1){ int n2 = n/2; if (n2*2==n){ - seed = (q*q)%md; - q=seed; - n = n2; + seed = (q*q)%md; + q=seed; + n = n2; }else{ - seed = (r*q)%md; - r=seed; - n = n-1; + seed = (r*q)%md; + r=seed; + n = n-1; } } seed = (r*q)%md; return seed; } -#include "DGraph.h" static DGraph *buildSH(const char cls){ -/* - Nodes of the graph must be topologically sorted - to avoid MPI deadlock. -*/ +/* Nodes of the graph must be topologically sorted to avoid MPI deadlock. */ DGraph *dg; int numSources=num_sources; /* must be power of 2 */ int numOfLayers=0,tmpS=numSources>>1; @@ -190,18 +185,18 @@ static DGraph *buildSH(const char cls){ int mask=0x0,ndid=0,ndoff=0; int i=0,j=0; char nm[BLOCK_SIZE]; - + sprintf(nm,"DT_SH.%c",cls); dg=newDGraph(nm); while(tmpS>1){ - numOfLayers++; - tmpS>>=1; + numOfLayers++; + tmpS>>=1; } for(i=0;inode[ndid],nd); + ar=newArc(dg->node[ndid],nd); AttachArc(dg,ar); ndoff+=mask; ndid=firstLayerNode+ndoff; - ar=newArc(dg->node[ndid],nd); + ar=newArc(dg->node[ndid],nd); AttachArc(dg,ar); } firstLayerNode+=numSources; @@ -227,15 +222,16 @@ static DGraph *buildSH(const char cls){ AttachNode(dg,nd); ndoff=i&(~mask); ndid=firstLayerNode+ndoff; - ar=newArc(dg->node[ndid],nd); + ar=newArc(dg->node[ndid],nd); AttachArc(dg,ar); ndoff+=mask; ndid=firstLayerNode+ndoff; - ar=newArc(dg->node[ndid],nd); + ar=newArc(dg->node[ndid],nd); AttachArc(dg,ar); } -return dg; + return dg; } + static DGraph *buildWH(const char cls){ /* Nodes of the graph must be topologically sorted to avoid MPI deadlock. */ int i=0,j=0; @@ -269,7 +265,7 @@ static DGraph *buildWH(const char cls){ id=AttachNode(dg,nd); for(j=0;j=numPrevLayerNodes) break; + if(sid>=numPrevLayerNodes) break; snd=dg->node[firstLayerNode+sid]; ar=newArc(dg->node[id],snd); AttachArc(dg,ar); @@ -295,6 +291,7 @@ static DGraph *buildWH(const char cls){ } return dg; } + static DGraph *buildBH(const char cls){ /* Nodes of the graph must be topologically sorted to avoid MPI deadlock.*/ int i=0,j=0; @@ -326,7 +323,7 @@ static DGraph *buildBH(const char cls){ id=AttachNode(dg,nd); for(j=0;j=numPrevLayerNodes) break; + if(sid>=numPrevLayerNodes) break; snd=dg->node[firstLayerNode+sid]; ar=newArc(snd,dg->node[id]); AttachArc(dg,ar); @@ -342,7 +339,7 @@ static DGraph *buildBH(const char cls){ ar=newArc(nd,sink); AttachArc(dg,ar); } -return dg; + return dg; } typedef struct{ @@ -366,8 +363,7 @@ static void arrShow(Arr* a){ static double CheckVal(Arr *feat){ double csum=0.0; - int i=0; - for(i=0;ilen;i++){ + for(int i=0;ilen;i++){ csum+=feat->val[i]*feat->val[i]/feat->len; /* The truncation does not work since result will be 0 for large len */ } return csum; @@ -399,16 +395,13 @@ static Arr* RandomFeatures(char *bmname,int fdim,int id){ int nxg=2,nyg=2,nzg=2,nfg=5; int nx=421,ny=419,nz=1427,nf=3527; long long int expon=(len*(id+1))%3141592; - int seedx=ipowMod(nxg,expon,nx), - seedy=ipowMod(nyg,expon,ny), - seedz=ipowMod(nzg,expon,nz), - seedf=ipowMod(nfg,expon,nf); - int i=0; + int seedx=ipowMod(nxg,expon,nx), seedy=ipowMod(nyg,expon,ny), seedz=ipowMod(nzg,expon,nz),seedf=ipowMod(nfg,expon,nf); + if(timer_on){ timer_clear(id+1); timer_start(id+1); } - for(i=0;ival[i+3]=seedf; } if(timer_on){ - timer_stop(id+1); - fprintf(stderr,"** RandomFeatures time in node %d = %f\n",id,timer_read(id+1)); + timer_stop(id+1); + fprintf(stderr,"** RandomFeatures time in node %d = %f\n",id,timer_read(id+1)); } return feat; } static void Resample(Arr *a,int blen){ - long long int i=0,j=0,jlo=0,jhi=0; - double avval=0.0; - double *nval=(double *)malloc(blen*sizeof(double)); - //double *nval=(double *)SMPI_SHARED_MALLOC(blen*sizeof(double)); - for(i=0;ilen-1;i++){ - jlo=(int)(0.5*(2*i-1)*(blen/a->len)); - jhi=(int)(0.5*(2*i+1)*(blen/a->len)); - - avval=a->val[i]/(jhi-jlo+1); - for(j=jlo;j<=jhi;j++){ - nval[j]+=avval; - } + long long int i=0,j=0,jlo=0,jhi=0; + double avval=0.0; + double *nval=(double *)malloc(blen*sizeof(double)); + //double *nval=(double *)SMPI_SHARED_MALLOC(blen*sizeof(double)); + for(i=0;ilen-1;i++){ + jlo=(int)(0.5*(2*i-1)*(blen/a->len)); + jhi=(int)(0.5*(2*i+1)*(blen/a->len)); + + avval=a->val[i]/(jhi-jlo+1); + for(j=jlo;j<=jhi;j++){ + nval[j]+=avval; } - nval[0]=a->val[0]; - nval[blen-1]=a->val[a->len-1]; - free(a->val); //SMPI_SHARED_FREE(a->val); - a->val=nval; - a->len=blen; + } + nval[0]=a->val[0]; + nval[blen-1]=a->val[a->len-1]; + free(a->val); //SMPI_SHARED_FREE(a->val); + a->val=nval; + a->len=blen; } #define fielddim 4 @@ -461,20 +454,14 @@ static Arr* WindowFilter(Arr *a, Arr* b,int w){ if(a->lenlen) Resample(a,b->len); if(a->len>b->len) Resample(b,a->len); for(i=fielddim;ilen-fielddim;i+=fielddim){ - rms0=(a->val[i]-b->val[i])*(a->val[i]-b->val[i]) - +(a->val[i+1]-b->val[i+1])*(a->val[i+1]-b->val[i+1]) - +(a->val[i+2]-b->val[i+2])*(a->val[i+2]-b->val[i+2]) - +(a->val[i+3]-b->val[i+3])*(a->val[i+3]-b->val[i+3]); + rms0=(a->val[i]-b->val[i])*(a->val[i]-b->val[i]) +(a->val[i+1]-b->val[i+1])*(a->val[i+1]-b->val[i+1]) + +(a->val[i+2]-b->val[i+2])*(a->val[i+2]-b->val[i+2]) +(a->val[i+3]-b->val[i+3])*(a->val[i+3]-b->val[i+3]); j=i+fielddim; - rms1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j]) - +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1]) - +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2]) - +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]); + rms1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j]) +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1]) + +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2]) +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]); j=i-fielddim; - rmsm1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j]) - +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1]) - +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2]) - +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]); + rmsm1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j]) +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1]) + +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2]) +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]); k=0; if(rms1val[i]=weight*b->val[i]; a->val[i+1]=weight*b->val[i+1]; a->val[i+2]=weight*b->val[i+2]; - a->val[i+3]=weight*b->val[i+3]; + a->val[i+3]=weight*b->val[i+3]; }else if(k==1){ j=i+fielddim; a->val[i]=weight*b->val[j]; a->val[i+1]=weight*b->val[j+1]; a->val[i+2]=weight*b->val[j+2]; - a->val[i+3]=weight*b->val[j+3]; + a->val[i+3]=weight*b->val[j+3]; }else { /*if(k==-1)*/ j=i-fielddim; a->val[i]=weight*b->val[j]; a->val[i+1]=weight*b->val[j+1]; a->val[i+2]=weight*b->val[j+2]; - a->val[i+3]=weight*b->val[j+3]; - } + a->val[i+3]=weight*b->val[j+3]; + } } if(timer_on){ timer_stop(w); @@ -527,6 +514,7 @@ static int SendResults(DGraph *dg,DGNode *nd,Arr *feat){ TRACE_smpi_set_category (NULL); return 1; } + static Arr* CombineStreams(DGraph *dg,DGNode *nd){ Arr *resfeat=newArr(num_samples*fielddim); int i=0,len=0,tag=0; @@ -552,7 +540,7 @@ static Arr* CombineStreams(DGraph *dg,DGNode *nd){ featp=(Arr *)tail->feat; feat=newArr(featp->len); memcpy(feat->val,featp->val,featp->len*sizeof(double)); - resfeat=WindowFilter(resfeat,feat,nd->id); + resfeat=WindowFilter(resfeat,feat,nd->id); free(feat);//SMPI_SHARED_FREE(feat); } } @@ -596,10 +584,10 @@ static double ReduceStreams(DGraph *dg,DGNode *nd){ MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status); feat=newArr(len); MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status); - csum+=Reduce(feat,(nd->id+1)); + csum+=Reduce(feat,(nd->id+1)); free(feat);//SMPI_SHARED_FREE(feat); }else{ - csum+=Reduce(tail->feat,(nd->id+1)); + csum+=Reduce(tail->feat,(nd->id+1)); } } if(nd->inDegree>0)csum=(((long long int)csum)/nd->inDegree); @@ -621,7 +609,7 @@ static int ProcessNodes(DGraph *dg,int me){ nd=dg->node[i]; if(nd->address!=me) continue; if(strstr(nd->name,"Source")){ - nd->feat=RandomFeatures(dg->name,fielddim,nd->id); + nd->feat=RandomFeatures(dg->name,fielddim,nd->id); SendResults(dg,nd,nd->feat); }else if(strstr(nd->name,"Sink")){ chksum=ReduceStreams(dg,nd); @@ -641,13 +629,13 @@ static int ProcessNodes(DGraph *dg,int me){ for(i=0;inumNodes;i++){ nd=dg->node[i]; if(!strstr(nd->name,"Sink")) continue; - tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */ + tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */ MPI_Recv(&rchksum,1,MPI_DOUBLE,nd->address,tag,MPI_COMM_WORLD,&status); chksum+=rchksum; } verified=verify(dg->name,chksum); } -return verified; + return verified; } int main(int argc,char **argv ){ @@ -665,7 +653,7 @@ int main(int argc,char **argv ){ get_info(argc, argv, &nprocs, &class); check_info(DT, nprocs, class); - if (class == 'S') { num_samples=1728; deviation=128; num_sources=4; } + if (class == 'S') { num_samples=1728; deviation=128; num_sources=4; } else if (class == 'W') { num_samples=1728*8; deviation=128*2; num_sources=4*2; } else if (class == 'A') { num_samples=1728*64; deviation=128*4; num_sources=4*4; } else if (class == 'B') { num_samples=1728*512; deviation=128*8; num_sources=4*8; } @@ -676,63 +664,62 @@ int main(int argc,char **argv ){ exit(1); } - - if(argc!=2|| ( strncmp(argv[1],"BH",2)!=0 && strncmp(argv[1],"WH",2)!=0 &&strncmp(argv[1],"SH",2)!=0)){ - if(my_rank==0){ - fprintf(stderr,"** Usage: mpirun -np N ../bin/dt.S GraphName\n"); - fprintf(stderr,"** Where \n - N is integer number of MPI processes\n"); - fprintf(stderr," - S is the class S, W, or A \n"); - fprintf(stderr," - GraphName is the communication graph name BH, WH, or SH.\n"); - fprintf(stderr," - the number of MPI processes N should not be be less than \n"); - fprintf(stderr," the number of nodes in the graph\n"); - } - MPI_Finalize(); - exit(0); - } - if(strncmp(argv[1],"BH",2)==0){ - dg=buildBH(class); - }else if(strncmp(argv[1],"WH",2)==0){ - dg=buildWH(class); - }else if(strncmp(argv[1],"SH",2)==0){ - dg=buildSH(class); - } - - if(timer_on&&dg->numNodes+1>timers_tot){ - timer_on=0; - if(my_rank==0) - fprintf(stderr,"Not enough timers. Node timeing is off. \n"); - } - if(dg->numNodes>comm_size){ - if(my_rank==0){ - fprintf(stderr,"** The number of MPI processes should not be less than \n"); - fprintf(stderr,"** the number of nodes in the graph\n"); - fprintf(stderr,"** Number of MPI processes = %d\n",comm_size); - fprintf(stderr,"** Number nodes in the graph = %d\n",dg->numNodes); - } - MPI_Finalize(); - exit(0); - } - for(i=0;inumNodes;i++){ - dg->node[i]->address=i; - } - if( my_rank == 0 ){ - printf( "\n\n NAS Parallel Benchmarks 3.3 -- DT Benchmark\n\n" ); - graphShow(dg,0); - timer_clear(0); - timer_start(0); + if(argc!=2|| ( strncmp(argv[1],"BH",2)!=0 && strncmp(argv[1],"WH",2)!=0 &&strncmp(argv[1],"SH",2)!=0)){ + if(my_rank==0){ + fprintf(stderr,"** Usage: mpirun -np N ../bin/dt.S GraphName\n"); + fprintf(stderr,"** Where \n - N is integer number of MPI processes\n"); + fprintf(stderr," - S is the class S, W, or A \n"); + fprintf(stderr," - GraphName is the communication graph name BH, WH, or SH.\n"); + fprintf(stderr," - the number of MPI processes N should not be be less than \n"); + fprintf(stderr," the number of nodes in the graph\n"); } - verified=ProcessNodes(dg,my_rank); - TRACE_smpi_set_category ("end"); + MPI_Finalize(); + exit(0); + } + if(strncmp(argv[1],"BH",2)==0){ + dg=buildBH(class); + }else if(strncmp(argv[1],"WH",2)==0){ + dg=buildWH(class); + }else if(strncmp(argv[1],"SH",2)==0){ + dg=buildSH(class); + } - featnum=num_samples*fielddim; - bytes_sent=featnum*dg->numArcs; - bytes_sent/=1048576; + if(timer_on&&dg->numNodes+1>timers_tot){ + timer_on=0; + if(my_rank==0) + fprintf(stderr,"Not enough timers. Node timeing is off. \n"); + } + if(dg->numNodes>comm_size){ if(my_rank==0){ - timer_stop(0); - tot_time=timer_read(0); - c_print_results( dg->name, class, featnum, 0, 0, dg->numNodes, 0, comm_size, tot_time, bytes_sent/tot_time, - "bytes transmitted", verified); - } + fprintf(stderr,"** The number of MPI processes should not be less than \n"); + fprintf(stderr,"** the number of nodes in the graph\n"); + fprintf(stderr,"** Number of MPI processes = %d\n",comm_size); + fprintf(stderr,"** Number nodes in the graph = %d\n",dg->numNodes); + } MPI_Finalize(); + exit(0); + } + for(i=0;inumNodes;i++){ + dg->node[i]->address=i; + } + if( my_rank == 0 ){ + printf( "\n\n NAS Parallel Benchmarks 3.3 -- DT Benchmark\n\n" ); + graphShow(dg,0); + timer_clear(0); + timer_start(0); + } + verified=ProcessNodes(dg,my_rank); + TRACE_smpi_set_category ("end"); + + featnum=num_samples*fielddim; + bytes_sent=featnum*dg->numArcs; + bytes_sent/=1048576; + if(my_rank==0){ + timer_stop(0); + tot_time=timer_read(0); + c_print_results( dg->name, class, featnum, 0, 0, dg->numNodes, 0, comm_size, tot_time, bytes_sent/tot_time, + "bytes transmitted", verified); + } + MPI_Finalize(); return 1; } diff --git a/examples/smpi/NAS/ep.c b/examples/smpi/NAS/ep.c index c56cf2043f..33aa77e7bf 100644 --- a/examples/smpi/NAS/ep.c +++ b/examples/smpi/NAS/ep.c @@ -19,26 +19,16 @@ int main(int argc, char **argv) { double epsilon=1.0E-8, a = 1220703125., s=271828183.; double t1, t2, t3, t4; double sx_verify_value, sy_verify_value, sx_err, sy_err; + int timers_enabled = true; int m, mk=16, mm, nn, nk = (int)(pow(2,mk)), nq=10, - np, - node, - no_nodes, - i, - ik, - kk, - l, - k, nit, no_large_nodes, - np_add, k_offset; + np, node, no_nodes, i, ik, kk, l, k, nit, no_large_nodes, np_add, k_offset; int root=0; - int verified; - char size[500]; // mind the size of the string to represent a big number - - double *start = (double *) malloc (64*sizeof(double)); - double *elapsed = (double *) malloc (64*sizeof(double)); + int verified; + char size[500]; // mind the size of the string to represent a big number double *x = (double *) malloc (2*nk*sizeof(double)); double *q = (double *) malloc (nq*sizeof(double)); @@ -70,8 +60,7 @@ int main(int argc, char **argv) { if (node == root ) { /* Because the size of the problem is too large to store in a 32-bit integer for some classes, we put it into a * string (for printing). Have to strip off the decimal point put in there by the floating point print statement - * (internal file) - */ + * (internal file) */ fprintf(stdout," NAS Parallel Benchmarks 3.2 -- EP Benchmark"); sprintf(size,"%zu",(unsigned long)pow(2,m+1)); //size = size.replace('.', ' '); @@ -81,9 +70,7 @@ int main(int argc, char **argv) { verified = false; /* Compute the number of "batches" of random number pairs generated per processor. Adjust if the number of processors - * does not evenly divide the total number - */ - + * does not evenly divide the total number */ np = nn / no_nodes; no_large_nodes = nn % no_nodes; if (node < no_large_nodes) np_add = 1; @@ -109,15 +96,15 @@ int main(int argc, char **argv) { } Mops = log(sqrt(abs(1))); - /* Synchronize before placing time stamp */ + /* Synchronize before placing time stamp */ MPI_Barrier( MPI_COMM_WORLD ); TRACE_smpi_set_category ("ep"); - time_clear(&(elapsed[1])); - time_clear(&(elapsed[2])); - time_clear(&(elapsed[3])); - time_start(&(start[1])); + timer_clear(1); + timer_clear(2); + timer_clear(3); + timer_start(1); t1 = a; //fprintf(stdout,("(ep.f:160) t1 = " + t1); @@ -143,7 +130,6 @@ int main(int argc, char **argv) { /* Each instance of this loop may be performed independently. We compute the k offsets separately to take into account * the fact that some nodes have more numbers to generate than others */ - if (np_add == 1) k_offset = node * np -1; else @@ -175,7 +161,7 @@ int main(int argc, char **argv) { // Compute uniform pseudorandom numbers. //if (timers_enabled) timer_start(3); - time_start(&(start[3])); + timer_start(3); //call vranlc(2 * nk, t1, a, x) --> t1 and y are modified //fprintf(stdout,">>>>>>>>>>>Before vranlc(l.210)<<<<<<<<<<<<<"); @@ -194,12 +180,11 @@ int main(int argc, char **argv) { //fprintf(stdout,(">>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<"); //if (timers_enabled) timer_stop(3); - time_stop(3,elapsed,start); + timer_stop(3); /* Compute Gaussian deviates by acceptance-rejection method and tally counts in concentric square annuli. * This loop is not vectorizable. */ - //if (timers_enabled) timer_start(2); - time_start(&(start[2])); + if (timers_enabled) timer_start(2); for(i=1; i<=nk;i++) { x1 = 2. * x[2*i-2] -1.0; x2 = 2. * x[2*i-1] - 1.0; @@ -228,8 +213,7 @@ int main(int argc, char **argv) { } */ } - //if (timers_enabled) timer_stop(2); - time_stop(2,elapsed,start); + if (timers_enabled) timer_stop(2); } TRACE_smpi_set_category ("finalize"); @@ -247,8 +231,8 @@ int main(int argc, char **argv) { gc += q[i]; } - time_stop(1,elapsed,start); - tm = time_read(1,elapsed); + timer_stop(1); + tm = timer_read(1); MPI_Allreduce(&tm, x, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); tm = x[0]; @@ -308,9 +292,9 @@ int main(int argc, char **argv) { } c_print_results("EP", class, m+1, 0, 0, nit, nprocs, no_nodes, tm, Mops, "Random number generated",verified); - fprintf(stdout,"Total time: %f\n",(time_read(1,elapsed)/1000)); - fprintf(stdout,"Gaussian pairs: %f\n",(time_read(2,elapsed)/1000)); - fprintf(stdout,"Random numbers: %f\n",(time_read(3,elapsed)/1000)); + fprintf(stdout,"Total time: %f\n",(timer_read(1)/1000)); + fprintf(stdout,"Gaussian pairs: %f\n",(timer_read(2)/1000)); + fprintf(stdout,"Random numbers: %f\n",(timer_read(3)/1000)); } MPI_Finalize(); diff --git a/examples/smpi/NAS/is.c b/examples/smpi/NAS/is.c index 68cbb77487..e29b87c4ea 100644 --- a/examples/smpi/NAS/is.c +++ b/examples/smpi/NAS/is.c @@ -53,8 +53,7 @@ int max_key_log_2; int num_bucket_log_2; int min_procs=1; /* NOTE: THIS CODE CANNOT BE RUN ON ARBITRARILY LARGE NUMBERS OF PROCESSORS. THE LARGEST VERIFIED NUMBER IS 1024. - * INCREASE max_procs AT YOUR PERIL - */ + * INCREASE max_procs AT YOUR PERIL */ int max_procs=1024; int total_keys; diff --git a/examples/smpi/NAS/nas_common.c b/examples/smpi/NAS/nas_common.c index 51ac28f479..cda03cc85b 100644 --- a/examples/smpi/NAS/nas_common.c +++ b/examples/smpi/NAS/nas_common.c @@ -78,22 +78,6 @@ void check_info(int type, int nprocs, char class) } } -void time_clear(double *onetimer) { - *onetimer = 0.0; -} - -void time_start(double *onetimer) { - *onetimer = MPI_Wtime(); -} - -void time_stop(int n,double *elapsed,double *start) { - elapsed[n] = MPI_Wtime()- start[n]; -} - -double time_read(int n, double *elapsed) { /* ok, useless, but jsut to keep function call */ - return(elapsed[n]); -} - void timer_clear(int n) { elapsed[n] = 0.0; diff --git a/examples/smpi/NAS/nas_common.h b/examples/smpi/NAS/nas_common.h index 1990c0d1cc..a264f21957 100644 --- a/examples/smpi/NAS/nas_common.h +++ b/examples/smpi/NAS/nas_common.h @@ -14,16 +14,12 @@ enum benchmark_types {IS, DT, EP}; int ilog2(int i); + void timer_clear(int n); void timer_start(int n); void timer_stop(int n); double timer_read(int n); -void time_clear(double *onetimer); -void time_start(double *onetimer); -void time_stop(int n,double *elapsed,double *start); -double time_read(int n, double *elapsed); - double vranlc(int n, double x, double a, double *y); double randlc(double *X, double*A); -- 2.20.1