From: Frederic Suter Date: Thu, 10 Mar 2016 14:12:40 +0000 (+0100) Subject: Merge branch 'master' of git+ssh://scm.gforge.inria.fr//gitroot/simgrid/simgrid X-Git-Tag: v3_13~453 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/bc48db087894fd960073b3120cebf90e6b2f8c02?hp=d73e15ba74e08212c750b27197e964efd759974c Merge branch 'master' of git+ssh://scm.gforge.inria.fr//gitroot/simgrid/simgrid --- diff --git a/.gitignore b/.gitignore index 6a6cfb79c7..ce66e3d2c5 100644 --- a/.gitignore +++ b/.gitignore @@ -247,6 +247,9 @@ examples/smpi/mc/bugged2 examples/smpi/mc/mutual_exclusion examples/smpi/mc/non_deterministic examples/smpi/mc/send_deterministic +examples/smpi/NAS/dt +examples/smpi/NAS/ep +examples/smpi/NAS/is examples/smpi/mvmul examples/smpi/replay_multiple/replay_multiple examples/smpi/replay/one_trace diff --git a/examples/smpi/NAS/CMakeLists.txt b/examples/smpi/NAS/CMakeLists.txt new file mode 100644 index 0000000000..b4de3b0f4c --- /dev/null +++ b/examples/smpi/NAS/CMakeLists.txt @@ -0,0 +1,25 @@ +if(enable_smpi) + if(WIN32) + set(CMAKE_C_FLAGS "-include ${CMAKE_HOME_DIRECTORY}/include/smpi/smpi_main.h") + else() + set(CMAKE_C_COMPILER "${CMAKE_BINARY_DIR}/smpi_script/bin/smpicc") + endif() + + include_directories(BEFORE "${CMAKE_HOME_DIRECTORY}/include/smpi") + add_executable (is is.c nas_common.c) + target_link_libraries(is simgrid m) + add_executable (ep ep.c nas_common.c) + target_link_libraries(ep simgrid m) + add_executable (dt dt.c nas_common.c DGraph.c) + target_link_libraries(dt simgrid m) +endif() + +set(examples_src ${examples_src} ${CMAKE_CURRENT_SOURCE_DIR}/nas_common.h + ${CMAKE_CURRENT_SOURCE_DIR}/nas_common.c + ${CMAKE_CURRENT_SOURCE_DIR}/is.c + ${CMAKE_CURRENT_SOURCE_DIR}/dt.c + ${CMAKE_CURRENT_SOURCE_DIR}/ep.c + ${CMAKE_CURRENT_SOURCE_DIR}/DGraph.c + ${CMAKE_CURRENT_SOURCE_DIR}/DGraph.h + PARENT_SCOPE) +set(txt_files ${txt_files} ${CMAKE_CURRENT_SOURCE_DIR}/README.install PARENT_SCOPE) \ No newline at end of file diff --git a/examples/smpi/NAS/DT/DGraph.c b/examples/smpi/NAS/DGraph.c similarity index 82% rename from examples/smpi/NAS/DT/DGraph.c rename to examples/smpi/NAS/DGraph.c index f573786db8..6dd0ed353c 100644 --- a/examples/smpi/NAS/DT/DGraph.c +++ b/examples/smpi/NAS/DGraph.c @@ -31,8 +31,7 @@ DGNode *newNode(char *nm){ return nd; } void nodeShow(DGNode* nd){ - fprintf( stderr,"%3d.%s: (%d,%d)\n", - nd->id,nd->name,nd->inDegree,nd->outDegree); + fprintf( stderr,"%3d.%s: (%d,%d)\n", nd->id,nd->name,nd->inDegree,nd->outDegree); /* if(nd->verified==1) fprintf(stderr,"%ld.%s\t: usable.",nd->id,nd->name); else if(nd->verified==0) fprintf(stderr,"%ld.%s\t: unusable.",nd->id,nd->name); @@ -51,6 +50,7 @@ DGraph* newDGraph(char* nm){ dg->name=strdup(nm); return dg; } + int AttachNode(DGraph* dg, DGNode* nd) { int i=0,j,len=0; DGNode **nds =NULL, *tmpnd=NULL; @@ -58,13 +58,13 @@ int AttachNode(DGraph* dg, DGNode* nd) { if (dg->numNodes == dg->maxNodes-1 ) { dg->maxNodes += BLOCK_SIZE; - nds =(DGNode **) calloc(dg->maxNodes,sizeof(DGNode*)); + nds =(DGNode **) calloc(dg->maxNodes,sizeof(DGNode*)); memcpy(nds,dg->node,(dg->maxNodes-BLOCK_SIZE)*sizeof(DGNode*)); free(dg->node); dg->node=nds; } - len = strlen( nd->name); + len = strlen( nd->name); for (i = 0; i < dg->numNodes; i++) { tmpnd =dg->node[ i]; ar=NULL; @@ -72,7 +72,7 @@ int AttachNode(DGraph* dg, DGNode* nd) { if ( strncmp( nd->name, tmpnd->name, len) ) continue; if ( nd->inDegree > 0 ) { tmpnd->maxInDegree += nd->maxInDegree; - ar =(DGArc **) calloc(tmpnd->maxInDegree,sizeof(DGArc*)); + ar =(DGArc **) calloc(tmpnd->maxInDegree,sizeof(DGArc*)); memcpy(ar,tmpnd->inArc,(tmpnd->inDegree)*sizeof(DGArc*)); free(tmpnd->inArc); tmpnd->inArc=ar; @@ -84,60 +84,58 @@ int AttachNode(DGraph* dg, DGNode* nd) { } if ( nd->outDegree > 0 ) { tmpnd->maxOutDegree += nd->maxOutDegree; - ar =(DGArc **) calloc(tmpnd->maxOutDegree,sizeof(DGArc*)); + ar =(DGArc **) calloc(tmpnd->maxOutDegree,sizeof(DGArc*)); memcpy(ar,tmpnd->outArc,(tmpnd->outDegree)*sizeof(DGArc*)); free(tmpnd->outArc); tmpnd->outArc=ar; for (j = 0; j < nd->outDegree; j++ ) { nd->outArc[ j]->tail = tmpnd; - } + } memcpy( &(tmpnd->outArc[tmpnd->outDegree]),nd->outArc,nd->outDegree*sizeof( DGArc *)); tmpnd->outDegree += nd->outDegree; - } + } free(nd); return i; } nd->id = dg->numNodes; dg->node[dg->numNodes] = nd; dg->numNodes++; -return nd->id; + return nd->id; } + int AttachArc(DGraph *dg,DGArc* nar){ -int arcId = -1; -int i=0,newNumber=0; -DGNode *head = nar->head, - *tail = nar->tail; -DGArc **ars=NULL,*probe=NULL; -/*fprintf(stderr,"AttachArc %ld\n",dg->numArcs); */ + int arcId = -1; + int i=0,newNumber=0; + DGNode *head = nar->head, + *tail = nar->tail; + DGArc **ars=NULL,*probe=NULL; + /*fprintf(stderr,"AttachArc %ld\n",dg->numArcs); */ if ( !tail || !head ) return arcId; if ( dg->numArcs == dg->maxArcs-1 ) { dg->maxArcs += BLOCK_SIZE; - ars =(DGArc **) calloc(dg->maxArcs,sizeof(DGArc*)); + ars =(DGArc **) calloc(dg->maxArcs,sizeof(DGArc*)); memcpy(ars,dg->arc,(dg->maxArcs-BLOCK_SIZE)*sizeof(DGArc*)); free(dg->arc); dg->arc=ars; } for(i = 0; i < tail->outDegree; i++ ) { /* parallel arc */ probe = tail->outArc[ i]; - if(probe->head == head - && - probe->length == nar->length - ){ - free(nar); - return probe->id; + if(probe->head == head && probe->length == nar->length){ + free(nar); + return probe->id; } } - + nar->id = dg->numArcs; arcId=dg->numArcs; dg->arc[dg->numArcs] = nar; dg->numArcs++; - + head->inArc[ head->inDegree] = nar; head->inDegree++; if ( head->inDegree >= head->maxInDegree ) { newNumber = head->maxInDegree + SMALL_BLOCK_SIZE; - ars =(DGArc **) calloc(newNumber,sizeof(DGArc*)); + ars =(DGArc **) calloc(newNumber,sizeof(DGArc*)); memcpy(ars,head->inArc,(head->inDegree)*sizeof(DGArc*)); free(head->inArc); head->inArc=ars; @@ -147,15 +145,16 @@ DGArc **ars=NULL,*probe=NULL; tail->outDegree++; if(tail->outDegree >= tail->maxOutDegree ) { newNumber = tail->maxOutDegree + SMALL_BLOCK_SIZE; - ars =(DGArc **) calloc(newNumber,sizeof(DGArc*)); + ars =(DGArc **) calloc(newNumber,sizeof(DGArc*)); memcpy(ars,tail->outArc,(tail->outDegree)*sizeof(DGArc*)); free(tail->outArc); tail->outArc=ars; tail->maxOutDegree = newNumber; } /*fprintf(stderr,"AttachArc: head->in=%d tail->out=%ld\n",head->inDegree,tail->outDegree);*/ -return arcId; + return arcId; } + void graphShow(DGraph *dg,int DetailsLevel){ int i=0,j=0; fprintf(stderr,"%d.%s: (%d,%d)\n",dg->id,dg->name,dg->numNodes,dg->numArcs); @@ -164,8 +163,8 @@ void graphShow(DGraph *dg,int DetailsLevel){ DGNode *focusNode = dg->node[ i]; if(DetailsLevel >= 2) { for (j = 0; j < focusNode->inDegree; j++ ) { - fprintf(stderr,"\t "); - nodeShow(focusNode->inArc[ j]->tail); + fprintf(stderr,"\t "); + nodeShow(focusNode->inArc[ j]->tail); } } nodeShow(focusNode); @@ -173,12 +172,9 @@ void graphShow(DGraph *dg,int DetailsLevel){ for (j = 0; j < focusNode->outDegree; j++ ) { fprintf(stderr, "\t "); nodeShow(focusNode->outArc[ j]->head); - } + } fprintf(stderr, "---\n"); } fprintf(stderr,"----------------------------------------\n"); if ( DetailsLevel < 3) return; } - - - diff --git a/examples/smpi/NAS/DT/DGraph.h b/examples/smpi/NAS/DGraph.h similarity index 100% rename from examples/smpi/NAS/DT/DGraph.h rename to examples/smpi/NAS/DGraph.h diff --git a/examples/smpi/NAS/DT/Makefile b/examples/smpi/NAS/DT/Makefile deleted file mode 100644 index cfcfea51f8..0000000000 --- a/examples/smpi/NAS/DT/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -BENCHMARK=dt -include ../config/make.def -include ../sys/make.common - -OBJS = DGraph.o ${COMMON}/c_print_results.o ${COMMON}/c_timers.o ${COMMON}/randdp.o - -${PROGRAM}: config dt.o dt-folding.o ${OBJS} - ${CLINK} ${CLINKFLAGS} -o $(BINDIR)/dt.${CLASS} dt.o ${OBJS} ${CMPI_LIB} - ${CLINK} ${CLINKFLAGS} -o ${BINDIR}/dt-folding.${CLASS} dt-folding.o ${OBJS} ${CMPI_LIB} - -.c.o: - ${CCOMPILE} $< - -dt.o: dt.c npbparams.h -dt-folding.o: dt-folding.c npbparams.h -DGraph.o: DGraph.c DGraph.h - -clean: - - rm -f *.o *~ npbparams.h diff --git a/examples/smpi/NAS/DT/README b/examples/smpi/NAS/DT/README deleted file mode 100644 index 873e3ae6f2..0000000000 --- a/examples/smpi/NAS/DT/README +++ /dev/null @@ -1,22 +0,0 @@ -Data Traffic benchmark DT is new in the NPB suite -(released as part of NPB3.x-MPI package). ----------------------------------------------------- - -DT is written in C and same executable can run on any number of processors, -provided this number is not less than the number of nodes in the communication -graph. DT benchmark takes one argument: BH, WH, or SH. This argument -specifies the communication graph Black Hole, White Hole, or SHuffle -respectively. The current release contains verification numbers for -CLASSES S, W, A, and B only. Classes C and D are defined, but verification -numbers are not provided in this release. - -The following table summarizes the number of nodes in the communication -graph based on CLASS and graph TYPE. - -CLASS N_Source N_Nodes(BH,WH) N_Nodes(SH) - S 4 5 12 - W 8 11 32 - A 16 21 80 - B 32 43 192 - C 64 85 448 - D 128 171 1024 diff --git a/examples/smpi/NAS/DT/dt-folding.c b/examples/smpi/NAS/DT/dt-folding.c deleted file mode 100644 index b088fbd5c8..0000000000 --- a/examples/smpi/NAS/DT/dt-folding.c +++ /dev/null @@ -1,759 +0,0 @@ -/************************************************************************* - * * - * N A S P A R A L L E L B E N C H M A R K S 3.3 * - * * - * D T * - * * - ************************************************************************* - * * - * This benchmark is part of the NAS Parallel Benchmark 3.3 suite. * - * * - * Permission to use, copy, distribute and modify this software * - * for any purpose with or without fee is hereby granted. We * - * request, however, that all derived work reference the NAS * - * Parallel Benchmarks 3.3. This software is provided "as is" * - * without express or implied warranty. * - * * - * Information on NPB 3.3, including the technical report, the * - * original specifications, source code, results and information * - * on how to submit new results, is available at: * - * * - * http: www.nas.nasa.gov/Software/NPB * - * * - * Send comments or suggestions to npb@nas.nasa.gov * - * Send bug reports to npb-bugs@nas.nasa.gov * - * * - * NAS Parallel Benchmarks Group * - * NASA Ames Research Center * - * Mail Stop: T27A-1 * - * Moffett Field, CA 94035-1000 * - * * - * E-mail: npb@nas.nasa.gov * - * Fax: (650) 604-3957 * - * * - ************************************************************************* - * * - * Author: M. Frumkin * * - * * - *************************************************************************/ - -#include -#include -#include - -#include "mpi.h" -#include "npbparams.h" - -#ifndef CLASS -#define CLASS 'S' -#define NUM_PROCS 1 -#endif - -//int passed_verification; -extern double randlc( double *X, double *A ); -extern -void c_print_results( char *name, - char class, - int n1, - int n2, - int n3, - int niter, - int nprocs_compiled, - int nprocs_total, - double t, - double mops, - char *optype, - int passed_verification, - char *npbversion, - char *compiletime, - char *mpicc, - char *clink, - char *cmpi_lib, - char *cmpi_inc, - char *cflags, - char *clinkflags ); - -void timer_clear( int n ); -void timer_start( int n ); -void timer_stop( int n ); -double timer_read( int n ); -int timer_on=0,timers_tot=64; - -int verify(char *bmname,double rnm2){ - double verify_value=0.0; - double epsilon=1.0E-8; - char cls=CLASS; - int verified=-1; - if (cls != 'U') { - if(cls=='S') { - if(strstr(bmname,"BH")){ - verify_value=30892725.0; - }else if(strstr(bmname,"WH")){ - verify_value=67349758.0; - }else if(strstr(bmname,"SH")){ - verify_value=58875767.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - } - verified = 0; - }else if(cls=='W') { - if(strstr(bmname,"BH")){ - verify_value = 4102461.0; - }else if(strstr(bmname,"WH")){ - verify_value = 204280762.0; - }else if(strstr(bmname,"SH")){ - verify_value = 186944764.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - } - verified = 0; - }else if(cls=='A') { - if(strstr(bmname,"BH")){ - verify_value = 17809491.0; - }else if(strstr(bmname,"WH")){ - verify_value = 1289925229.0; - }else if(strstr(bmname,"SH")){ - verify_value = 610856482.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - } - verified = 0; - }else if(cls=='B') { - if(strstr(bmname,"BH")){ - verify_value = 4317114.0; - }else if(strstr(bmname,"WH")){ - verify_value = 7877279917.0; - }else if(strstr(bmname,"SH")){ - verify_value = 1836863082.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - verified = 0; - } - }else if(cls=='C') { - if(strstr(bmname,"BH")){ - verify_value = 0.0; - }else if(strstr(bmname,"WH")){ - verify_value = 0.0; - }else if(strstr(bmname,"SH")){ - verify_value = 0.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - verified = -1; - } - }else if(cls=='D') { - if(strstr(bmname,"BH")){ - verify_value = 0.0; - }else if(strstr(bmname,"WH")){ - verify_value = 0.0; - }else if(strstr(bmname,"SH")){ - verify_value = 0.0; - }else{ - fprintf(stderr,"No such benchmark as %s.\n",bmname); - } - verified = -1; - }else{ - fprintf(stderr,"No such class as %c.\n",cls); - } - fprintf(stderr," %s L2 Norm = %f\n",bmname,rnm2); - if(verified==-1){ - fprintf(stderr," No verification was performed.\n"); - }else if( rnm2 - verify_value < epsilon && - rnm2 - verify_value > -epsilon) { /* abs here does not work on ALTIX */ - verified = 1; - fprintf(stderr," Deviation = %f\n",(rnm2 - verify_value)); - }else{ - verified = 0; - fprintf(stderr," The correct verification value = %f\n",verify_value); - fprintf(stderr," Got value = %f\n",rnm2); - } - }else{ - verified = -1; - } - return verified; - } - -int ipowMod(int a,long long int n,int md){ - int seed=1,q=a,r=1; - if(n<0){ - fprintf(stderr,"ipowMod: exponent must be nonnegative exp=%lld\n",n); - n=-n; /* temp fix */ -/* return 1; */ - } - if(md<=0){ - fprintf(stderr,"ipowMod: module must be positive mod=%d",md); - return 1; - } - if(n==0) return 1; - while(n>1){ - int n2 = n/2; - if (n2*2==n){ - seed = (q*q)%md; - q=seed; - n = n2; - }else{ - seed = (r*q)%md; - r=seed; - n = n-1; - } - } - seed = (r*q)%md; - return seed; -} - -#include "DGraph.h" -DGraph *buildSH(char cls){ -/* - Nodes of the graph must be topologically sorted - to avoid MPI deadlock. -*/ - DGraph *dg; - int numSources=NUM_SOURCES; /* must be power of 2 */ - int numOfLayers=0,tmpS=numSources>>1; - int firstLayerNode=0; - DGArc *ar=NULL; - DGNode *nd=NULL; - int mask=0x0,ndid=0,ndoff=0; - int i=0,j=0; - char nm[BLOCK_SIZE]; - - sprintf(nm,"DT_SH.%c",cls); - dg=newDGraph(nm); - - while(tmpS>1){ - numOfLayers++; - tmpS>>=1; - } - for(i=0;inode[ndid],nd); - AttachArc(dg,ar); - ndoff+=mask; - ndid=firstLayerNode+ndoff; - ar=newArc(dg->node[ndid],nd); - AttachArc(dg,ar); - } - firstLayerNode+=numSources; - } - mask=0x00000001<node[ndid],nd); - AttachArc(dg,ar); - ndoff+=mask; - ndid=firstLayerNode+ndoff; - ar=newArc(dg->node[ndid],nd); - AttachArc(dg,ar); - } -return dg; -} -DGraph *buildWH(char cls){ -/* - Nodes of the graph must be topologically sorted - to avoid MPI deadlock. -*/ - int i=0,j=0; - int numSources=NUM_SOURCES,maxInDeg=4; - int numLayerNodes=numSources,firstLayerNode=0; - int totComparators=0; - int numPrevLayerNodes=numLayerNodes; - int id=0,sid=0; - DGraph *dg; - DGNode *nd=NULL,*source=NULL,*tmp=NULL,*snd=NULL; - DGArc *ar=NULL; - char nm[BLOCK_SIZE]; - - sprintf(nm,"DT_WH.%c",cls); - dg=newDGraph(nm); - - for(i=0;imaxInDeg){ - numLayerNodes=numLayerNodes/maxInDeg; - if(numLayerNodes*maxInDeg=numPrevLayerNodes) break; - snd=dg->node[firstLayerNode+sid]; - ar=newArc(dg->node[id],snd); - AttachArc(dg,ar); - } - } - firstLayerNode+=numPrevLayerNodes; - numPrevLayerNodes=numLayerNodes; - } - source=newNode("Source"); - AttachNode(dg,source); - for(i=0;inode[firstLayerNode+i]; - ar=newArc(source,nd); - AttachArc(dg,ar); - } - - for(i=0;inumNodes/2;i++){ /* Topological sorting */ - tmp=dg->node[i]; - dg->node[i]=dg->node[dg->numNodes-1-i]; - dg->node[i]->id=i; - dg->node[dg->numNodes-1-i]=tmp; - dg->node[dg->numNodes-1-i]->id=dg->numNodes-1-i; - } -return dg; -} -DGraph *buildBH(char cls){ -/* - Nodes of the graph must be topologically sorted - to avoid MPI deadlock. -*/ - int i=0,j=0; - int numSources=NUM_SOURCES,maxInDeg=4; - int numLayerNodes=numSources,firstLayerNode=0; - DGraph *dg; - DGNode *nd=NULL, *snd=NULL, *sink=NULL; - DGArc *ar=NULL; - int totComparators=0; - int numPrevLayerNodes=numLayerNodes; - int id=0, sid=0; - char nm[BLOCK_SIZE]; - - sprintf(nm,"DT_BH.%c",cls); - dg=newDGraph(nm); - - for(i=0;imaxInDeg){ - numLayerNodes=numLayerNodes/maxInDeg; - if(numLayerNodes*maxInDeg=numPrevLayerNodes) break; - snd=dg->node[firstLayerNode+sid]; - ar=newArc(snd,dg->node[id]); - AttachArc(dg,ar); - } - } - firstLayerNode+=numPrevLayerNodes; - numPrevLayerNodes=numLayerNodes; - } - sink=newNode("Sink"); - AttachNode(dg,sink); - for(i=0;inode[firstLayerNode+i]; - ar=newArc(nd,sink); - AttachArc(dg,ar); - } -return dg; -} - -typedef struct{ - int len; - double* val; -} Arr; -Arr *newArr(int len){ - Arr *arr=(Arr *)SMPI_SHARED_MALLOC(sizeof(Arr)); - arr->len=len; - arr->val=(double *)SMPI_SHARED_MALLOC(len*sizeof(double)); - return arr; -} -void arrShow(Arr* a){ - if(!a) fprintf(stderr,"-- NULL array\n"); - else{ - fprintf(stderr,"-- length=%d\n",a->len); - } -} -double CheckVal(Arr *feat){ - double csum=0.0; - int i=0; - for(i=0;ilen;i++){ - csum+=feat->val[i]*feat->val[i]/feat->len; /* The truncation does not work since - result will be 0 for large len */ - } - return csum; -} -int GetFNumDPar(int* mean, int* stdev){ - *mean=NUM_SAMPLES; - *stdev=STD_DEVIATION; - return 0; -} -int GetFeatureNum(char *mbname,int id){ - double tran=314159265.0; - double A=2*id+1; - double denom=randlc(&tran,&A); - char cval='S'; - int mean=NUM_SAMPLES,stdev=128; - int rtfs=0,len=0; - GetFNumDPar(&mean,&stdev); - rtfs=ipowMod((int)(1/denom)*(int)cval,(long long int) (2*id+1),2*stdev); - if(rtfs<0) rtfs=-rtfs; - len=mean-stdev+rtfs; - return len; -} -Arr* RandomFeatures(char *bmname,int fdim,int id){ - int len=GetFeatureNum(bmname,id)*fdim; - Arr* feat=newArr(len); - int nxg=2,nyg=2,nzg=2,nfg=5; - int nx=421,ny=419,nz=1427,nf=3527; - long long int expon=(len*(id+1))%3141592; - int seedx=ipowMod(nxg,expon,nx), - seedy=ipowMod(nyg,expon,ny), - seedz=ipowMod(nzg,expon,nz), - seedf=ipowMod(nfg,expon,nf); - int i=0; - if(timer_on){ - timer_clear(id+1); - timer_start(id+1); - } - for(i=0;ival[i]=seedx; - feat->val[i+1]=seedy; - feat->val[i+2]=seedz; - feat->val[i+3]=seedf; - } - if(timer_on){ - timer_stop(id+1); - fprintf(stderr,"** RandomFeatures time in node %d = %f\n",id,timer_read(id+1)); - } - return feat; -} -void Resample(Arr *a,int blen){ - long long int i=0,j=0,jlo=0,jhi=0; - double avval=0.0; - double *nval=(double *)SMPI_SHARED_MALLOC(blen*sizeof(double)); - Arr *tmp=newArr(10); - for(i=0;ilen-1;i++){ - jlo=(int)(0.5*(2*i-1)*(blen/a->len)); - jhi=(int)(0.5*(2*i+1)*(blen/a->len)); - - avval=a->val[i]/(jhi-jlo+1); - for(j=jlo;j<=jhi;j++){ - nval[j]+=avval; - } - } - nval[0]=a->val[0]; - nval[blen-1]=a->val[a->len-1]; - SMPI_SHARED_FREE(a->val); - a->val=nval; - a->len=blen; -} -#define fielddim 4 -Arr* WindowFilter(Arr *a, Arr* b,int w){ - int i=0,j=0,k=0; - double rms0=0.0,rms1=0.0,rmsm1=0.0; - double weight=((double) (w+1))/(w+2); - - w+=1; - if(timer_on){ - timer_clear(w); - timer_start(w); - } - if(a->lenlen) Resample(a,b->len); - if(a->len>b->len) Resample(b,a->len); - for(i=fielddim;ilen-fielddim;i+=fielddim){ - rms0=(a->val[i]-b->val[i])*(a->val[i]-b->val[i]) - +(a->val[i+1]-b->val[i+1])*(a->val[i+1]-b->val[i+1]) - +(a->val[i+2]-b->val[i+2])*(a->val[i+2]-b->val[i+2]) - +(a->val[i+3]-b->val[i+3])*(a->val[i+3]-b->val[i+3]); - j=i+fielddim; - rms1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j]) - +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1]) - +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2]) - +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]); - j=i-fielddim; - rmsm1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j]) - +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1]) - +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2]) - +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]); - k=0; - if(rms1val[i]=weight*b->val[i]; - a->val[i+1]=weight*b->val[i+1]; - a->val[i+2]=weight*b->val[i+2]; - a->val[i+3]=weight*b->val[i+3]; - }else if(k==1){ - j=i+fielddim; - a->val[i]=weight*b->val[j]; - a->val[i+1]=weight*b->val[j+1]; - a->val[i+2]=weight*b->val[j+2]; - a->val[i+3]=weight*b->val[j+3]; - }else { /*if(k==-1)*/ - j=i-fielddim; - a->val[i]=weight*b->val[j]; - a->val[i+1]=weight*b->val[j+1]; - a->val[i+2]=weight*b->val[j+2]; - a->val[i+3]=weight*b->val[j+3]; - } - } - if(timer_on){ - timer_stop(w); - fprintf(stderr,"** WindowFilter time in node %d = %f\n",(w-1),timer_read(w)); - } - return a; -} - -int SendResults(DGraph *dg,DGNode *nd,Arr *feat){ - int i=0,tag=0; - DGArc *ar=NULL; - DGNode *head=NULL; - if(!feat) return 0; - for(i=0;ioutDegree;i++){ - ar=nd->outArc[i]; - if(ar->tail!=nd) continue; - head=ar->head; - tag=ar->id; - if(head->address!=nd->address){ - MPI_Send(&feat->len,1,MPI_INT,head->address,tag,MPI_COMM_WORLD); - MPI_Send(feat->val,feat->len,MPI_DOUBLE,head->address,tag,MPI_COMM_WORLD); - } - } - return 1; -} -Arr* CombineStreams(DGraph *dg,DGNode *nd){ - Arr *resfeat=newArr(NUM_SAMPLES*fielddim); - int i=0,len=0,tag=0; - DGArc *ar=NULL; - DGNode *tail=NULL; - MPI_Status status; - Arr *feat=NULL,*featp=NULL; - - if(nd->inDegree==0) return NULL; - for(i=0;iinDegree;i++){ - ar=nd->inArc[i]; - if(ar->head!=nd) continue; - tail=ar->tail; - if(tail->address!=nd->address){ - len=0; - tag=ar->id; - MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status); - feat=newArr(len); - MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status); - resfeat=WindowFilter(resfeat,feat,nd->id); - SMPI_SHARED_FREE(feat); - }else{ - featp=(Arr *)tail->feat; - feat=newArr(featp->len); - memcpy(feat->val,featp->val,featp->len*sizeof(double)); - resfeat=WindowFilter(resfeat,feat,nd->id); - SMPI_SHARED_FREE(feat); - } - } - for(i=0;ilen;i++) resfeat->val[i]=((int)resfeat->val[i])/nd->inDegree; - nd->feat=resfeat; - return nd->feat; -} -double Reduce(Arr *a,int w){ - double retv=0.0; - if(timer_on){ - timer_clear(w); - timer_start(w); - } - retv=(int)(w*CheckVal(a));/* The casting needed for node - and array dependent verifcation */ - if(timer_on){ - timer_stop(w); - fprintf(stderr,"** Reduce time in node %d = %f\n",(w-1),timer_read(w)); - } - return retv; -} - -double ReduceStreams(DGraph *dg,DGNode *nd){ - double csum=0.0; - int i=0,len=0,tag=0; - DGArc *ar=NULL; - DGNode *tail=NULL; - Arr *feat=NULL; - double retv=0.0; - - for(i=0;iinDegree;i++){ - ar=nd->inArc[i]; - if(ar->head!=nd) continue; - tail=ar->tail; - if(tail->address!=nd->address){ - MPI_Status status; - len=0; - tag=ar->id; - MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status); - feat=newArr(len); - MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status); - csum+=Reduce(feat,(nd->id+1)); - SMPI_SHARED_FREE(feat); - }else{ - csum+=Reduce(tail->feat,(nd->id+1)); - } - } - if(nd->inDegree>0)csum=(((long long int)csum)/nd->inDegree); - retv=(nd->id+1)*csum; - return retv; -} - -int ProcessNodes(DGraph *dg,int me){ - double chksum=0.0; - Arr *feat=NULL; - int i=0,verified=0,tag; - DGNode *nd=NULL; - double rchksum=0.0; - MPI_Status status; - - for(i=0;inumNodes;i++){ - nd=dg->node[i]; - if(nd->address!=me) continue; - if(strstr(nd->name,"Source")){ - nd->feat=RandomFeatures(dg->name,fielddim,nd->id); - SendResults(dg,nd,nd->feat); - }else if(strstr(nd->name,"Sink")){ - chksum=ReduceStreams(dg,nd); - tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */ - MPI_Send(&chksum,1,MPI_DOUBLE,0,tag,MPI_COMM_WORLD); - }else{ - feat=CombineStreams(dg,nd); - SendResults(dg,nd,feat); - } - } - if(me==0){ /* Report node */ - rchksum=0.0; - chksum=0.0; - for(i=0;inumNodes;i++){ - nd=dg->node[i]; - if(!strstr(nd->name,"Sink")) continue; - tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */ - MPI_Recv(&rchksum,1,MPI_DOUBLE,nd->address,tag,MPI_COMM_WORLD,&status); - chksum+=rchksum; - } - verified=verify(dg->name,chksum); - } -return verified; -} - -int main(int argc,char **argv ){ - int my_rank,comm_size; - int i; - DGraph *dg=NULL; - int verified=0, featnum=0; - double bytes_sent=2.0,tot_time=0.0; - - MPI_Init( &argc, &argv ); - MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); - MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); - - if(argc!=2|| - ( strncmp(argv[1],"BH",2)!=0 - &&strncmp(argv[1],"WH",2)!=0 - &&strncmp(argv[1],"SH",2)!=0 - ) - ){ - if(my_rank==0){ - fprintf(stderr,"** Usage: mpirun -np N ../bin/dt.S GraphName\n"); - fprintf(stderr,"** Where \n - N is integer number of MPI processes\n"); - fprintf(stderr," - S is the class S, W, or A \n"); - fprintf(stderr," - GraphName is the communication graph name BH, WH, or SH.\n"); - fprintf(stderr," - the number of MPI processes N should not be be less than \n"); - fprintf(stderr," the number of nodes in the graph\n"); - } - MPI_Finalize(); - exit(0); - } - if(strncmp(argv[1],"BH",2)==0){ - dg=buildBH(CLASS); - }else if(strncmp(argv[1],"WH",2)==0){ - dg=buildWH(CLASS); - }else if(strncmp(argv[1],"SH",2)==0){ - dg=buildSH(CLASS); - } - - if(timer_on&&dg->numNodes+1>timers_tot){ - timer_on=0; - if(my_rank==0) - fprintf(stderr,"Not enough timers. Node timeing is off. \n"); - } - if(dg->numNodes>comm_size){ - if(my_rank==0){ - fprintf(stderr,"** The number of MPI processes should not be less than \n"); - fprintf(stderr,"** the number of nodes in the graph\n"); - fprintf(stderr,"** Number of MPI processes = %d\n",comm_size); - fprintf(stderr,"** Number nodes in the graph = %d\n",dg->numNodes); - } - MPI_Finalize(); - exit(0); - } - for(i=0;inumNodes;i++){ - dg->node[i]->address=i; - } - if( my_rank == 0 ){ - printf( "\n\n NAS Parallel Benchmarks 3.3 -- DT Benchmark\n\n" ); - graphShow(dg,0); - timer_clear(0); - timer_start(0); - } - verified=ProcessNodes(dg,my_rank); - - featnum=NUM_SAMPLES*fielddim; - bytes_sent=featnum*dg->numArcs; - bytes_sent/=1048576; - if(my_rank==0){ - timer_stop(0); - tot_time=timer_read(0); - c_print_results( dg->name, - CLASS, - featnum, - 0, - 0, - dg->numNodes, - 0, - comm_size, - tot_time, - bytes_sent/tot_time, - "bytes transmitted", - verified, - NPBVERSION, - COMPILETIME, - MPICC, - CLINK, - CMPI_LIB, - CMPI_INC, - CFLAGS, - CLINKFLAGS ); - } - MPI_Finalize(); - return 1; -} diff --git a/examples/smpi/NAS/EP/Makefile b/examples/smpi/NAS/EP/Makefile deleted file mode 100644 index 3d38c277cd..0000000000 --- a/examples/smpi/NAS/EP/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -BENCHMARK=ep -include ../config/make.def -include ../sys/make.common - -${PROGRAM}: config ep.o ep-sampling.o ../common/randdp.o - ${CLINK} ${CLINKFLAGS} -o ${BINDIR}/ep.${CLASS}.${NPROCS} ep.o ../common/randdp.o ${CMPI_LIB} -lm - ${CLINK} ${CLINKFLAGS} -o ${BINDIR}/ep-sampling.${CLASS}.${NPROCS} ep-sampling.o ../common/randdp.o ${CMPI_LIB} -lm - -ep.o: ep.c npbparams.h - ${CCOMPILE} ep.c -ep-sampling.o: ep-sampling.c npbparams.h - ${CCOMPILE} ep-sampling.c - -clean: - - rm -f *.o *~ npbparams.h diff --git a/examples/smpi/NAS/EP/ep-sampling.c b/examples/smpi/NAS/EP/ep-sampling.c deleted file mode 100644 index c5956b6d2e..0000000000 --- a/examples/smpi/NAS/EP/ep-sampling.c +++ /dev/null @@ -1,438 +0,0 @@ -#include -#include -#include -#include - -#include "mpi.h" -#include "npbparams.h" - -#ifndef CLASS -#define CLASS 'S' -#define NUM_PROCS 1 -#endif -#define true 1 -#define false 0 - - -//---NOTE : all the timers function have been modified to -// avoid global timers (privatize these). - // ----------------------- timers --------------------- - void timer_clear(double *onetimer) { - //elapsed[n] = 0.0; - *onetimer = 0.0; - } - - void timer_start(double *onetimer) { - *onetimer = MPI_Wtime(); - } - - void timer_stop(int n,double *elapsed,double *start) { - double t, now; - - now = MPI_Wtime(); - t = now - start[n]; - elapsed[n] += t; - } - - double timer_read(int n, double *elapsed) { /* ok, useless, but jsut to keep function call */ - return(elapsed[n]); - } - /******************************************************************** - ***************** V R A N L C ****************** - ***************** *****************/ - double vranlc(int n, double x, double a, double *y) - { - int i; - long i246m1=0x00003FFFFFFFFFFF; - long LLx, Lx, La; - double d2m46; - -// This doesn't work, because the compiler does the calculation in 32 -// bits and overflows. No standard way (without f90 stuff) to specify -// that the rhs should be done in 64 bit arithmetic. -// parameter(i246m1=2**46-1) - - d2m46=pow(0.5,46); - -// c Note that the v6 compiler on an R8000 does something stupid with -// c the above. Using the following instead (or various other things) -// c makes the calculation run almost 10 times as fast. -// -// c save d2m46 -// c data d2m46/0.0d0/ -// c if (d2m46 .eq. 0.0d0) then -// c d2m46 = 0.5d0**46 -// c endif - - Lx = (long)x; - La = (long)a; - //fprintf(stdout,("================== Vranlc ================"); - //fprintf(stdout,("Before Loop: Lx = " + Lx + ", La = " + La); - LLx = Lx; - for (i=0; i< n; i++) { - Lx = Lx*La & i246m1 ; - LLx = Lx; - y[i] = d2m46 * (double)LLx; - /* - if(i == 0) { - fprintf(stdout,("After loop 0:"); - fprintf(stdout,("Lx = " + Lx + ", La = " + La); - fprintf(stdout,("d2m46 = " + d2m46); - fprintf(stdout,("LLX(Lx) = " + LLX.doubleValue()); - fprintf(stdout,("Y[0]" + y[0]); - } - */ - } - - x = (double)LLx; - /* - fprintf(stdout,("Change: Lx = " + Lx); - fprintf(stdout,("=============End Vranlc ================"); - */ - return x; - } - - - -//-------------- the core (unique function) ----------- - void doTest(int argc, char **argv) { - double dum[3] = {1.,1.,1.}; - double x1, x2, sx, sy, tm, an, tt, gc; - double Mops; - double epsilon=1.0E-8, a = 1220703125., s=271828183.; - double t1, t2, t3, t4; - double sx_verify_value, sy_verify_value, sx_err, sy_err; - -#include "npbparams.h" - int mk=16, - // --> set by make : in npbparams.h - //m=28, // for CLASS=A - //m=30, // for CLASS=B - //npm=2, // NPROCS - mm = m-mk, - nn = (int)(pow(2,mm)), - nk = (int)(pow(2,mk)), - nq=10, - np, - node, - no_nodes, - i, - ik, - kk, - l, - k, nit, no_large_nodes, - np_add, k_offset, j; - int me, nprocs, root=0, dp_type; - int verified, - timers_enabled=true; - char size[500]; // mind the size of the string to represent a big number - - //Use in randlc.. - int KS = 0; - double R23, R46, T23, T46; - - double *qq = (double *) malloc (10000*sizeof(double)); - double *start = (double *) malloc (64*sizeof(double)); - double *elapsed = (double *) malloc (64*sizeof(double)); - - double *x = (double *) malloc (2*nk*sizeof(double)); - double *q = (double *) malloc (nq*sizeof(double)); - - MPI_Init( &argc, &argv ); - MPI_Comm_size( MPI_COMM_WORLD, &no_nodes); - MPI_Comm_rank( MPI_COMM_WORLD, &node); - -#ifdef USE_MPE - MPE_Init_log(); -#endif - root = 0; - if (node == root ) { - - /* Because the size of the problem is too large to store in a 32-bit - * integer for some classes, we put it into a string (for printing). - * Have to strip off the decimal point put in there by the floating - * point print statement (internal file) - */ - fprintf(stdout," NAS Parallel Benchmarks 3.2 -- EP Benchmark"); - sprintf(size,"%d",(int) pow(2,m+1)); - //size = size.replace('.', ' '); - fprintf(stdout," Number of random numbers generated: %s\n",size); - fprintf(stdout," Number of active processes: %d\n",no_nodes); - - } - verified = false; - - /* c Compute the number of "batches" of random number pairs generated - c per processor. Adjust if the number of processors does not evenly - c divide the total number -*/ - - np = nn / no_nodes; - no_large_nodes = nn % no_nodes; - if (node < no_large_nodes) np_add = 1; - else np_add = 0; - np = np + np_add; - - if (np == 0) { - fprintf(stdout,"Too many nodes: %d %d",no_nodes,nn); - MPI_Abort(MPI_COMM_WORLD,1); - exit(0); - } - -/* c Call the random number generator functions and initialize - c the x-array to reduce the effects of paging on the timings. - c Also, call all mathematical functions that are used. Make - c sure these initializations cannot be eliminated as dead code. -*/ - - //call vranlc(0, dum[1], dum[2], dum[3]); - // Array indexes start at 1 in Fortran, 0 in Java - vranlc(0, dum[0], dum[1], &(dum[2])); - - dum[0] = randlc(&(dum[1]),&(dum[2])); - ///////////////////////////////// - for (i=0;i<2*nk;i++) { - x[i] = -1e99; - } - Mops = log(sqrt(abs(1))); - - /* - c--------------------------------------------------------------------- - c Synchronize before placing time stamp - c--------------------------------------------------------------------- - */ - MPI_Barrier( MPI_COMM_WORLD ); - - timer_clear(&(elapsed[1])); - timer_clear(&(elapsed[2])); - timer_clear(&(elapsed[3])); - timer_start(&(start[1])); - - t1 = a; - //fprintf(stdout,("(ep.f:160) t1 = " + t1); - t1 = vranlc(0, t1, a, x); - //fprintf(stdout,("(ep.f:161) t1 = " + t1); - - -/* c Compute AN = A ^ (2 * NK) (mod 2^46). */ - - t1 = a; - //fprintf(stdout,("(ep.f:165) t1 = " + t1); - for (i=1; i <= mk+1; i++) { - t2 = randlc(&t1, &t1); - //fprintf(stdout,("(ep.f:168)[loop i=" + i +"] t1 = " + t1); - } - an = t1; - //fprintf(stdout,("(ep.f:172) s = " + s); - tt = s; - gc = 0.; - sx = 0.; - sy = 0.; - for (i=0; i < nq ; i++) { - q[i] = 0.; - } - -/* - Each instance of this loop may be performed independently. We compute - the k offsets separately to take into account the fact that some nodes - have more numbers to generate than others -*/ - - if (np_add == 1) - k_offset = node * np -1; - else - k_offset = no_large_nodes*(np+1) + (node-no_large_nodes)*np -1; - - int stop = false; - for(k = 1; k <= np; k++) SMPI_SAMPLE_LOCAL(0.25 * np, 0.03) { - stop = false; - kk = k_offset + k ; - t1 = s; - //fprintf(stdout,("(ep.f:193) t1 = " + t1); - t2 = an; - -// Find starting seed t1 for this kk. - - for (i=1;i<=100 && !stop;i++) { - ik = kk / 2; - //fprintf(stdout,("(ep.f:199) ik = " +ik+", kk = " + kk); - if (2 * ik != kk) { - t3 = randlc(&t1, &t2); - //fprintf(stdout,("(ep.f:200) t1= " +t1 ); - } - if (ik==0) - stop = true; - else { - t3 = randlc(&t2, &t2); - kk = ik; - } - } -// Compute uniform pseudorandom numbers. - - //if (timers_enabled) timer_start(3); - timer_start(&(start[3])); - //call vranlc(2 * nk, t1, a, x) --> t1 and y are modified - - //fprintf(stdout,">>>>>>>>>>>Before vranlc(l.210)<<<<<<<<<<<<<"); - //fprintf(stdout,"2*nk = " + (2*nk)); - //fprintf(stdout,"t1 = " + t1); - //fprintf(stdout,"a = " + a); - //fprintf(stdout,"x[0] = " + x[0]); - //fprintf(stdout,">>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<"); - - t1 = vranlc(2 * nk, t1, a, x); - - //fprintf(stdout,(">>>>>>>>>>>After Enter vranlc (l.210)<<<<<<"); - //fprintf(stdout,("2*nk = " + (2*nk)); - //fprintf(stdout,("t1 = " + t1); - //fprintf(stdout,("a = " + a); - //fprintf(stdout,("x[0] = " + x[0]); - //fprintf(stdout,(">>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<"); - - //if (timers_enabled) timer_stop(3); - timer_stop(3,elapsed,start); - -/* Compute Gaussian deviates by acceptance-rejection method and - * tally counts in concentric square annuli. This loop is not - * vectorizable. - */ - //if (timers_enabled) timer_start(2); - timer_start(&(start[2])); - for(i=1; i<=nk;i++) { - x1 = 2. * x[2*i-2] -1.0; - x2 = 2. * x[2*i-1] - 1.0; - t1 = x1*x1 + x2*x2; - if (t1 <= 1.) { - t2 = sqrt(-2. * log(t1) / t1); - t3 = (x1 * t2); - t4 = (x2 * t2); - l = (int)(abs(t3) > abs(t4) ? abs(t3) : abs(t4)); - q[l] = q[l] + 1.; - sx = sx + t3; - sy = sy + t4; - } - /* - if(i == 1) { - fprintf(stdout,"x1 = " + x1); - fprintf(stdout,"x2 = " + x2); - fprintf(stdout,"t1 = " + t1); - fprintf(stdout,"t2 = " + t2); - fprintf(stdout,"t3 = " + t3); - fprintf(stdout,"t4 = " + t4); - fprintf(stdout,"l = " + l); - fprintf(stdout,"q[l] = " + q[l]); - fprintf(stdout,"sx = " + sx); - fprintf(stdout,"sy = " + sy); - } - */ - } - //if (timers_enabled) timer_stop(2); - timer_stop(2,elapsed,start); - } - - //int MPI_Allreduce(void *sbuf, void *rbuf, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) - MPI_Allreduce(&sx, x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - sx = x[0]; //FIXME : x[0] or x[1] => x[0] because fortran starts with 1 - MPI_Allreduce(&sy, x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - sy = x[0]; - MPI_Allreduce(q, x, nq, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - - for(i = 0; i < nq; i++) { - q[i] = x[i]; - } - for(i = 0; i < nq; i++) { - gc += q[i]; - } - - timer_stop(1,elapsed,start); - tm = timer_read(1,elapsed); - MPI_Allreduce(&tm, x, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); - tm = x[0]; - - if(node == root) { - nit = 0; - verified = true; - - if(m == 24) { - sx_verify_value = -3.247834652034740E3; - sy_verify_value = -6.958407078382297E3; - } else if(m == 25) { - sx_verify_value = -2.863319731645753E3; - sy_verify_value = -6.320053679109499E3; - } else if(m == 28) { - sx_verify_value = -4.295875165629892E3; - sy_verify_value = -1.580732573678431E4; - } else if(m == 30) { - sx_verify_value = 4.033815542441498E4; - sy_verify_value = -2.660669192809235E4; - } else if(m == 32) { - sx_verify_value = 4.764367927995374E4; - sy_verify_value = -8.084072988043731E4; - } else if(m == 36) { - sx_verify_value = 1.982481200946593E5; - sy_verify_value = -1.020596636361769E5; - } else { - verified = false; - } - - /* - fprintf(stdout,("sx = " + sx); - fprintf(stdout,("sx_verify = " + sx_verify_value); - fprintf(stdout,("sy = " + sy); - fprintf(stdout,("sy_verify = " + sy_verify_value); - */ - if(verified) { - sx_err = abs((sx - sx_verify_value)/sx_verify_value); - sy_err = abs((sy - sy_verify_value)/sy_verify_value); - /* - fprintf(stdout,("sx_err = " + sx_err); - fprintf(stdout,("sy_err = " + sx_err); - fprintf(stdout,("epsilon= " + epsilon); - */ - verified = ((sx_err < epsilon) && (sy_err < epsilon)); - } - - Mops = (pow(2.0, m+1))/tm/1000; - - fprintf(stdout,"EP Benchmark Results:\n"); - fprintf(stdout,"CPU Time=%d\n",(int) tm); - fprintf(stdout,"N = 2^%d\n",m); - fprintf(stdout,"No. Gaussain Pairs =%d\n",(int) gc); - fprintf(stdout,"Sum = %f %ld\n",sx,(long) sy); - fprintf(stdout,"Count:"); - for(i = 0; i < nq; i++) { - fprintf(stdout,"%d\t %ld\n",i,(long) q[i]); - } - - /* - print_results("EP", _class, m+1, 0, 0, nit, npm, no_nodes, tm, Mops, - "Random numbers generated", verified, npbversion, - compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7) */ - fprintf(stdout,"\nEP Benchmark Completed\n"); - fprintf(stdout,"Class = %s\n", _class); - fprintf(stdout,"Size = %s\n", size); - fprintf(stdout,"Iteration = %d\n", nit); - fprintf(stdout,"Time in seconds = %f\n",(tm/1000)); - fprintf(stdout,"Total processes = %d\n",no_nodes); - fprintf(stdout,"Mops/s total = %f\n",Mops); - fprintf(stdout,"Mops/s/process = %f\n", Mops/no_nodes); - fprintf(stdout,"Operation type = Random number generated\n"); - if(verified) { - fprintf(stdout,"Verification = SUCCESSFUL\n"); - } else { - fprintf(stdout,"Verification = UNSUCCESSFUL\n"); - } - fprintf(stdout,"Total time: %f\n",(timer_read(1,elapsed)/1000)); - fprintf(stdout,"Gaussian pairs: %f\n",(timer_read(2,elapsed)/1000)); - fprintf(stdout,"Random numbers: %f\n",(timer_read(3,elapsed)/1000)); - } -#ifdef USE_MPE - MPE_Finish_log(argv[0]); -#endif - - MPI_Finalize(); - } - - int main(int argc, char **argv) { - doTest(argc,argv); - } diff --git a/examples/smpi/NAS/EP/ep.c b/examples/smpi/NAS/EP/ep.c deleted file mode 100644 index 569c2f21df..0000000000 --- a/examples/smpi/NAS/EP/ep.c +++ /dev/null @@ -1,445 +0,0 @@ -#include -#include -#include -#include - -#include "mpi.h" -#include "npbparams.h" - -#include "simgrid/instr.h" //TRACE_ - -#ifndef CLASS -#define CLASS 'S' -#define NUM_PROCS 1 -#endif -#define true 1 -#define false 0 - -//---NOTE : all the timers function have been modified to -// avoid global timers (privatize these). - // ----------------------- timers --------------------- - void timer_clear(double *onetimer) { - //elapsed[n] = 0.0; - *onetimer = 0.0; - } - - void timer_start(double *onetimer) { - *onetimer = MPI_Wtime(); - } - - void timer_stop(int n,double *elapsed,double *start) { - double t, now; - - now = MPI_Wtime(); - t = now - start[n]; - elapsed[n] += t; - } - - double timer_read(int n, double *elapsed) { /* ok, useless, but jsut to keep function call */ - return(elapsed[n]); - } - /******************************************************************** - ***************** V R A N L C ****************** - ***************** *****************/ - double vranlc(int n, double x, double a, double *y) - { - int i; - long i246m1=0x00003FFFFFFFFFFF; - long LLx, Lx, La; - double d2m46; - -// This doesn't work, because the compiler does the calculation in 32 -// bits and overflows. No standard way (without f90 stuff) to specify -// that the rhs should be done in 64 bit arithmetic. -// parameter(i246m1=2**46-1) - - d2m46=pow(0.5,46); - -// c Note that the v6 compiler on an R8000 does something stupid with -// c the above. Using the following instead (or various other things) -// c makes the calculation run almost 10 times as fast. -// -// c save d2m46 -// c data d2m46/0.0d0/ -// c if (d2m46 .eq. 0.0d0) then -// c d2m46 = 0.5d0**46 -// c endif - - Lx = (long)x; - La = (long)a; - //fprintf(stdout,("================== Vranlc ================"); - //fprintf(stdout,("Before Loop: Lx = " + Lx + ", La = " + La); - LLx = Lx; - for (i=0; i< n; i++) { - Lx = Lx*La & i246m1 ; - LLx = Lx; - y[i] = d2m46 * (double)LLx; - /* - if(i == 0) { - fprintf(stdout,("After loop 0:"); - fprintf(stdout,("Lx = " + Lx + ", La = " + La); - fprintf(stdout,("d2m46 = " + d2m46); - fprintf(stdout,("LLX(Lx) = " + LLX.doubleValue()); - fprintf(stdout,("Y[0]" + y[0]); - } - */ - } - - x = (double)LLx; - /* - fprintf(stdout,("Change: Lx = " + Lx); - fprintf(stdout,("=============End Vranlc ================"); - */ - return x; - } - - - -//-------------- the core (unique function) ----------- - void doTest(int argc, char **argv) { - double dum[3] = {1.,1.,1.}; - double x1, x2, sx, sy, tm, an, tt, gc; - double Mops; - double epsilon=1.0E-8, a = 1220703125., s=271828183.; - double t1, t2, t3, t4; - double sx_verify_value, sy_verify_value, sx_err, sy_err; - -#include "npbparams.h" - int mk=16, - // --> set by make : in npbparams.h - //m=28, // for CLASS=A - //m=30, // for CLASS=B - //npm=2, // NPROCS - mm = m-mk, - nn = (int)(pow(2,mm)), - nk = (int)(pow(2,mk)), - nq=10, - np, - node, - no_nodes, - i, - ik, - kk, - l, - k, nit, no_large_nodes, - np_add, k_offset, j; - int me, nprocs, root=0, dp_type; - int verified, - timers_enabled=true; - char size[500]; // mind the size of the string to represent a big number - - //Use in randlc.. - int KS = 0; - double R23, R46, T23, T46; - - double *qq = (double *) malloc (10000*sizeof(double)); - double *start = (double *) malloc (64*sizeof(double)); - double *elapsed = (double *) malloc (64*sizeof(double)); - - double *x = (double *) malloc (2*nk*sizeof(double)); - double *q = (double *) malloc (nq*sizeof(double)); - - TRACE_smpi_set_category ("start"); - - MPI_Init( &argc, &argv ); - MPI_Comm_size( MPI_COMM_WORLD, &no_nodes); - MPI_Comm_rank( MPI_COMM_WORLD, &node); - -#ifdef USE_MPE - MPE_Init_log(); -#endif - root = 0; - if (node == root ) { - - /* Because the size of the problem is too large to store in a 32-bit - * integer for some classes, we put it into a string (for printing). - * Have to strip off the decimal point put in there by the floating - * point print statement (internal file) - */ - fprintf(stdout," NAS Parallel Benchmarks 3.2 -- EP Benchmark"); - sprintf(size,"%d",(int)pow(2,m+1)); - //size = size.replace('.', ' '); - fprintf(stdout," Number of random numbers generated: %s\n",size); - fprintf(stdout," Number of active processes: %d\n",no_nodes); - - } - verified = false; - - /* c Compute the number of "batches" of random number pairs generated - c per processor. Adjust if the number of processors does not evenly - c divide the total number -*/ - - np = nn / no_nodes; - no_large_nodes = nn % no_nodes; - if (node < no_large_nodes) np_add = 1; - else np_add = 0; - np = np + np_add; - - if (np == 0) { - fprintf(stdout,"Too many nodes: %d %d",no_nodes,nn); - MPI_Abort(MPI_COMM_WORLD,1); - exit(0); - } - -/* c Call the random number generator functions and initialize - c the x-array to reduce the effects of paging on the timings. - c Also, call all mathematical functions that are used. Make - c sure these initializations cannot be eliminated as dead code. -*/ - - //call vranlc(0, dum[1], dum[2], dum[3]); - // Array indexes start at 1 in Fortran, 0 in Java - vranlc(0, dum[0], dum[1], &(dum[2])); - - dum[0] = randlc(&(dum[1]),&(dum[2])); - ///////////////////////////////// - for (i=0;i<2*nk;i++) { - x[i] = -1e99; - } - Mops = log(sqrt(abs(1))); - - /* - c--------------------------------------------------------------------- - c Synchronize before placing time stamp - c--------------------------------------------------------------------- - */ - MPI_Barrier( MPI_COMM_WORLD ); - - TRACE_smpi_set_category ("ep"); - - timer_clear(&(elapsed[1])); - timer_clear(&(elapsed[2])); - timer_clear(&(elapsed[3])); - timer_start(&(start[1])); - - t1 = a; - //fprintf(stdout,("(ep.f:160) t1 = " + t1); - t1 = vranlc(0, t1, a, x); - //fprintf(stdout,("(ep.f:161) t1 = " + t1); - - -/* c Compute AN = A ^ (2 * NK) (mod 2^46). */ - - t1 = a; - //fprintf(stdout,("(ep.f:165) t1 = " + t1); - for (i=1; i <= mk+1; i++) { - t2 = randlc(&t1, &t1); - //fprintf(stdout,("(ep.f:168)[loop i=" + i +"] t1 = " + t1); - } - an = t1; - //fprintf(stdout,("(ep.f:172) s = " + s); - tt = s; - gc = 0.; - sx = 0.; - sy = 0.; - for (i=0; i < nq ; i++) { - q[i] = 0.; - } - -/* - Each instance of this loop may be performed independently. We compute - the k offsets separately to take into account the fact that some nodes - have more numbers to generate than others -*/ - - if (np_add == 1) - k_offset = node * np -1; - else - k_offset = no_large_nodes*(np+1) + (node-no_large_nodes)*np -1; - - int stop = false; - for(k = 1; k <= np; k++) { - stop = false; - kk = k_offset + k ; - t1 = s; - //fprintf(stdout,("(ep.f:193) t1 = " + t1); - t2 = an; - -// Find starting seed t1 for this kk. - - for (i=1;i<=100 && !stop;i++) { - ik = kk / 2; - //fprintf(stdout,("(ep.f:199) ik = " +ik+", kk = " + kk); - if (2 * ik != kk) { - t3 = randlc(&t1, &t2); - //fprintf(stdout,("(ep.f:200) t1= " +t1 ); - } - if (ik==0) - stop = true; - else { - t3 = randlc(&t2, &t2); - kk = ik; - } - } -// Compute uniform pseudorandom numbers. - - //if (timers_enabled) timer_start(3); - timer_start(&(start[3])); - //call vranlc(2 * nk, t1, a, x) --> t1 and y are modified - - //fprintf(stdout,">>>>>>>>>>>Before vranlc(l.210)<<<<<<<<<<<<<"); - //fprintf(stdout,"2*nk = " + (2*nk)); - //fprintf(stdout,"t1 = " + t1); - //fprintf(stdout,"a = " + a); - //fprintf(stdout,"x[0] = " + x[0]); - //fprintf(stdout,">>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<"); - - t1 = vranlc(2 * nk, t1, a, x); - - //fprintf(stdout,(">>>>>>>>>>>After Enter vranlc (l.210)<<<<<<"); - //fprintf(stdout,("2*nk = " + (2*nk)); - //fprintf(stdout,("t1 = " + t1); - //fprintf(stdout,("a = " + a); - //fprintf(stdout,("x[0] = " + x[0]); - //fprintf(stdout,(">>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<"); - - //if (timers_enabled) timer_stop(3); - timer_stop(3,elapsed,start); - -/* Compute Gaussian deviates by acceptance-rejection method and - * tally counts in concentric square annuli. This loop is not - * vectorizable. - */ - //if (timers_enabled) timer_start(2); - timer_start(&(start[2])); - for(i=1; i<=nk;i++) { - x1 = 2. * x[2*i-2] -1.0; - x2 = 2. * x[2*i-1] - 1.0; - t1 = x1*x1 + x2*x2; - if (t1 <= 1.) { - t2 = sqrt(-2. * log(t1) / t1); - t3 = (x1 * t2); - t4 = (x2 * t2); - l = (int)(abs(t3) > abs(t4) ? abs(t3) : abs(t4)); - q[l] = q[l] + 1.; - sx = sx + t3; - sy = sy + t4; - } - /* - if(i == 1) { - fprintf(stdout,"x1 = " + x1); - fprintf(stdout,"x2 = " + x2); - fprintf(stdout,"t1 = " + t1); - fprintf(stdout,"t2 = " + t2); - fprintf(stdout,"t3 = " + t3); - fprintf(stdout,"t4 = " + t4); - fprintf(stdout,"l = " + l); - fprintf(stdout,"q[l] = " + q[l]); - fprintf(stdout,"sx = " + sx); - fprintf(stdout,"sy = " + sy); - } - */ - } - //if (timers_enabled) timer_stop(2); - timer_stop(2,elapsed,start); - } - - TRACE_smpi_set_category ("finalize"); - - //int MPI_Allreduce(void *sbuf, void *rbuf, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) - MPI_Allreduce(&sx, x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - sx = x[0]; //FIXME : x[0] or x[1] => x[0] because fortran starts with 1 - MPI_Allreduce(&sy, x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - sy = x[0]; - MPI_Allreduce(q, x, nq, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - - for(i = 0; i < nq; i++) { - q[i] = x[i]; - } - for(i = 0; i < nq; i++) { - gc += q[i]; - } - - timer_stop(1,elapsed,start); - tm = timer_read(1,elapsed); - MPI_Allreduce(&tm, x, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); - tm = x[0]; - - if(node == root) { - nit = 0; - verified = true; - - if(m == 24) { - sx_verify_value = -3.247834652034740E3; - sy_verify_value = -6.958407078382297E3; - } else if(m == 25) { - sx_verify_value = -2.863319731645753E3; - sy_verify_value = -6.320053679109499E3; - } else if(m == 28) { - sx_verify_value = -4.295875165629892E3; - sy_verify_value = -1.580732573678431E4; - } else if(m == 30) { - sx_verify_value = 4.033815542441498E4; - sy_verify_value = -2.660669192809235E4; - } else if(m == 32) { - sx_verify_value = 4.764367927995374E4; - sy_verify_value = -8.084072988043731E4; - } else if(m == 36) { - sx_verify_value = 1.982481200946593E5; - sy_verify_value = -1.020596636361769E5; - } else { - verified = false; - } - - /* - fprintf(stdout,("sx = " + sx); - fprintf(stdout,("sx_verify = " + sx_verify_value); - fprintf(stdout,("sy = " + sy); - fprintf(stdout,("sy_verify = " + sy_verify_value); - */ - if(verified) { - sx_err = abs((sx - sx_verify_value)/sx_verify_value); - sy_err = abs((sy - sy_verify_value)/sy_verify_value); - /* - fprintf(stdout,("sx_err = " + sx_err); - fprintf(stdout,("sy_err = " + sx_err); - fprintf(stdout,("epsilon= " + epsilon); - */ - verified = ((sx_err < epsilon) && (sy_err < epsilon)); - } - - Mops = (pow(2.0, m+1))/tm/1000; - - fprintf(stdout,"EP Benchmark Results:\n"); - fprintf(stdout,"CPU Time=%d\n",(int) tm); - fprintf(stdout,"N = 2^%d\n",m); - fprintf(stdout,"No. Gaussain Pairs =%d\n",(int) gc); - fprintf(stdout,"Sum = %f %ld\n",sx,(long) sy); - fprintf(stdout,"Count:"); - for(i = 0; i < nq; i++) { - fprintf(stdout,"%d\t %ld\n",i,(long) q[i]); - } - - /* - print_results("EP", _class, m+1, 0, 0, nit, npm, no_nodes, tm, Mops, - "Random numbers generated", verified, npbversion, - compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7) */ - fprintf(stdout,"\nEP Benchmark Completed\n"); - fprintf(stdout,"Class = %s\n", _class); - fprintf(stdout,"Size = %s\n", size); - fprintf(stdout,"Iteration = %d\n", nit); - fprintf(stdout,"Time in seconds = %f\n",(tm/1000)); - fprintf(stdout,"Total processes = %d\n",no_nodes); - fprintf(stdout,"Mops/s total = %f\n",Mops); - fprintf(stdout,"Mops/s/process = %f\n", Mops/no_nodes); - fprintf(stdout,"Operation type = Random number generated\n"); - if(verified) { - fprintf(stdout,"Verification = SUCCESSFUL\n"); - } else { - fprintf(stdout,"Verification = UNSUCCESSFUL\n"); - } - fprintf(stdout,"Total time: %f\n",(timer_read(1,elapsed)/1000)); - fprintf(stdout,"Gaussian pairs: %f\n",(timer_read(2,elapsed)/1000)); - fprintf(stdout,"Random numbers: %f\n",(timer_read(3,elapsed)/1000)); - } -#ifdef USE_MPE - MPE_Finish_log(argv[0]); -#endif - - MPI_Finalize(); - } - - int main(int argc, char **argv) { - doTest(argc,argv); - } diff --git a/examples/smpi/NAS/IS/Makefile b/examples/smpi/NAS/IS/Makefile deleted file mode 100644 index bfdc3edbb8..0000000000 --- a/examples/smpi/NAS/IS/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -BENCHMARK=is - -include ../config/make.def -include ../sys/make.common - -OBJS = is.o ${COMMON}/c_print_results.o - -${PROGRAM}: config ${OBJS} - ${CLINK} ${CLINKFLAGS} -o ${PROGRAM} ${OBJS} ${CMPI_LIB} - -.c.o: - ${CCOMPILE} $< - -is.o: is.c npbparams.h - -clean: - - rm -f *.o *~ is npbparams.h diff --git a/examples/smpi/NAS/IS/is.c b/examples/smpi/NAS/IS/is.c deleted file mode 100644 index 57b370bb39..0000000000 --- a/examples/smpi/NAS/IS/is.c +++ /dev/null @@ -1,1154 +0,0 @@ -/************************************************************************* - * * - * N A S P A R A L L E L B E N C H M A R K S 3.3 * - * * - * I S * - * * - ************************************************************************* - * * - * This benchmark is part of the NAS Parallel Benchmark 3.3 suite. * - * It is described in NAS Technical Report 95-020. * - * * - * Permission to use, copy, distribute and modify this software * - * for any purpose with or without fee is hereby granted. We * - * request, however, that all derived work reference the NAS * - * Parallel Benchmarks 3.3. This software is provided "as is" * - * without express or implied warranty. * - * * - * Information on NPB 3.3, including the technical report, the * - * original specifications, source code, results and information * - * on how to submit new results, is available at: * - * * - * http://www.nas.nasa.gov/Software/NPB * - * * - * Send comments or suggestions to npb@nas.nasa.gov * - * Send bug reports to npb-bugs@nas.nasa.gov * - * * - * NAS Parallel Benchmarks Group * - * NASA Ames Research Center * - * Mail Stop: T27A-1 * - * Moffett Field, CA 94035-1000 * - * * - * E-mail: npb@nas.nasa.gov * - * Fax: (650) 604-3957 * - * * - ************************************************************************* - * * - * Author: M. Yarrow * - * H. Jin * - * * - *************************************************************************/ - -#include "mpi.h" -#include "npbparams.h" -#include -#include - -#include "simgrid/instr.h" //TRACE_ - -/******************/ -/* default values */ -/******************/ -#ifndef CLASS -#define CLASS 'S' -#define NUM_PROCS 1 -#endif -#define MIN_PROCS 1 - - -/*************/ -/* CLASS S */ -/*************/ -#if CLASS == 'S' -#define TOTAL_KEYS_LOG_2 16 -#define MAX_KEY_LOG_2 11 -#define NUM_BUCKETS_LOG_2 9 -#endif - - -/*************/ -/* CLASS W */ -/*************/ -#if CLASS == 'W' -#define TOTAL_KEYS_LOG_2 20 -#define MAX_KEY_LOG_2 16 -#define NUM_BUCKETS_LOG_2 10 -#endif - -/*************/ -/* CLASS A */ -/*************/ -#if CLASS == 'A' -#define TOTAL_KEYS_LOG_2 23 -#define MAX_KEY_LOG_2 19 -#define NUM_BUCKETS_LOG_2 10 -#endif - - -/*************/ -/* CLASS B */ -/*************/ -#if CLASS == 'B' -#define TOTAL_KEYS_LOG_2 25 -#define MAX_KEY_LOG_2 21 -#define NUM_BUCKETS_LOG_2 10 -#endif - - -/*************/ -/* CLASS C */ -/*************/ -#if CLASS == 'C' -#define TOTAL_KEYS_LOG_2 27 -#define MAX_KEY_LOG_2 23 -#define NUM_BUCKETS_LOG_2 10 -#endif - - -/*************/ -/* CLASS D */ -/*************/ -#if CLASS == 'D' -#define TOTAL_KEYS_LOG_2 29 -#define MAX_KEY_LOG_2 27 -#define NUM_BUCKETS_LOG_2 10 -#undef MIN_PROCS -#define MIN_PROCS 4 -#endif - - -#define TOTAL_KEYS (1 << TOTAL_KEYS_LOG_2) -#define MAX_KEY (1 << MAX_KEY_LOG_2) -#define NUM_BUCKETS (1 << NUM_BUCKETS_LOG_2) -#define NUM_KEYS (TOTAL_KEYS/NUM_PROCS*MIN_PROCS) - -/*****************************************************************/ -/* On larger number of processors, since the keys are (roughly) */ -/* gaussian distributed, the first and last processor sort keys */ -/* in a large interval, requiring array sizes to be larger. Note */ -/* that for large NUM_PROCS, NUM_KEYS is, however, a small number*/ -/* The required array size also depends on the bucket size used. */ -/* The following values are validated for the 1024-bucket setup. */ -/*****************************************************************/ -#if NUM_PROCS < 256 -#define SIZE_OF_BUFFERS 3*NUM_KEYS/2 -#elif NUM_PROCS < 512 -#define SIZE_OF_BUFFERS 5*NUM_KEYS/2 -#elif NUM_PROCS < 1024 -#define SIZE_OF_BUFFERS 4*NUM_KEYS -#else -#define SIZE_OF_BUFFERS 13*NUM_KEYS/2 -#endif - -/*****************************************************************/ -/* NOTE: THIS CODE CANNOT BE RUN ON ARBITRARILY LARGE NUMBERS OF */ -/* PROCESSORS. THE LARGEST VERIFIED NUMBER IS 1024. INCREASE */ -/* MAX_PROCS AT YOUR PERIL */ -/*****************************************************************/ -#if CLASS == 'S' -#define MAX_PROCS 128 -#else -#define MAX_PROCS 1024 -#endif - -#define MAX_ITERATIONS 10 -#define TEST_ARRAY_SIZE 5 - - -/***********************************/ -/* Enable separate communication, */ -/* computation timing and printout */ -/***********************************/ -/* #define TIMING_ENABLED */ - - -/*************************************/ -/* Typedef: if necessary, change the */ -/* size of int here by changing the */ -/* int type to, say, long */ -/*************************************/ -typedef int INT_TYPE; -typedef long INT_TYPE2; -#define MP_KEY_TYPE MPI_INT - - -typedef struct { - -/********************/ -/* MPI properties: */ -/********************/ -int my_rank, - comm_size; - - -/********************/ -/* Some global info */ -/********************/ -INT_TYPE *key_buff_ptr_global, /* used by full_verify to get */ - total_local_keys, /* copies of rank info */ - total_lesser_keys; - - -int passed_verification; - - - -/************************************/ -/* These are the three main arrays. */ -/* See SIZE_OF_BUFFERS def above */ -/************************************/ -INT_TYPE key_array[SIZE_OF_BUFFERS], - key_buff1[SIZE_OF_BUFFERS], - key_buff2[SIZE_OF_BUFFERS], - bucket_size[NUM_BUCKETS+TEST_ARRAY_SIZE], /* Top 5 elements for */ - bucket_size_totals[NUM_BUCKETS+TEST_ARRAY_SIZE], /* part. ver. vals */ - bucket_ptrs[NUM_BUCKETS], - process_bucket_distrib_ptr1[NUM_BUCKETS+TEST_ARRAY_SIZE], - process_bucket_distrib_ptr2[NUM_BUCKETS+TEST_ARRAY_SIZE]; -int send_count[MAX_PROCS], recv_count[MAX_PROCS], - send_displ[MAX_PROCS], recv_displ[MAX_PROCS]; - - -/**********************/ -/* Partial verif info */ -/**********************/ -INT_TYPE2 test_index_array[TEST_ARRAY_SIZE], - test_rank_array[TEST_ARRAY_SIZE]; - -/**********/ -/* Timers */ -/**********/ -double start[64], elapsed[64]; - -} global_data; - - -const INT_TYPE2 - S_test_index_array[TEST_ARRAY_SIZE] = - {48427,17148,23627,62548,4431}, - S_test_rank_array[TEST_ARRAY_SIZE] = - {0,18,346,64917,65463}, - - W_test_index_array[TEST_ARRAY_SIZE] = - {357773,934767,875723,898999,404505}, - W_test_rank_array[TEST_ARRAY_SIZE] = - {1249,11698,1039987,1043896,1048018}, - - A_test_index_array[TEST_ARRAY_SIZE] = - {2112377,662041,5336171,3642833,4250760}, - A_test_rank_array[TEST_ARRAY_SIZE] = - {104,17523,123928,8288932,8388264}, - - B_test_index_array[TEST_ARRAY_SIZE] = - {41869,812306,5102857,18232239,26860214}, - B_test_rank_array[TEST_ARRAY_SIZE] = - {33422937,10244,59149,33135281,99}, - - C_test_index_array[TEST_ARRAY_SIZE] = - {44172927,72999161,74326391,129606274,21736814}, - C_test_rank_array[TEST_ARRAY_SIZE] = - {61147,882988,266290,133997595,133525895}, - - D_test_index_array[TEST_ARRAY_SIZE] = - {1317351170,995930646,1157283250,1503301535,1453734525}, - D_test_rank_array[TEST_ARRAY_SIZE] = - {1,36538729,1978098519,2145192618,2147425337}; - - - -/***********************/ -/* function prototypes */ -/***********************/ -double randlc( double *X, double *A ); - -void full_verify( global_data* gd ); - -void c_print_results( char *name, - char class, - int n1, - int n2, - int n3, - int niter, - int nprocs_compiled, - int nprocs_total, - double t, - double mops, - char *optype, - int passed_verification, - char *npbversion, - char *compiletime, - char *mpicc, - char *clink, - char *cmpi_lib, - char *cmpi_inc, - char *cflags, - char *clinkflags ); - -void timer_clear(global_data* gd, int n ); -void timer_start(global_data* gd, int n ); -void timer_stop(global_data* gd, int n ); -double timer_read(global_data* gd, int n ); - -void timer_clear(global_data* gd, int n ) { - gd->elapsed[n] = 0.0; -} - -void timer_start(global_data* gd, int n ) { - gd->start[n] = MPI_Wtime(); -} - -void timer_stop(global_data* gd, int n ) { - gd->elapsed[n] += MPI_Wtime() - gd->start[n]; -} - -double timer_read(global_data* gd, int n ) { - return gd->elapsed[n]; -} - - -/* - * FUNCTION RANDLC (X, A) - * - * This routine returns a uniform pseudorandom double precision number in the - * range (0, 1) by using the linear congruential generator - * - * x_{k+1} = a x_k (mod 2^46) - * - * where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers - * before repeating. The argument A is the same as 'a' in the above formula, - * and X is the same as x_0. A and X must be odd double precision integers - * in the range (1, 2^46). The returned value RANDLC is normalized to be - * between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain - * the new seed x_1, so that subsequent calls to RANDLC using the same - * arguments will generate a continuous sequence. - * - * This routine should produce the same results on any computer with at least - * 48 mantissa bits in double precision floating point data. On Cray systems, - * double precision should be disabled. - * - * David H. Bailey October 26, 1990 - * - * IMPLICIT DOUBLE PRECISION (A-H, O-Z) - * SAVE KS, R23, R46, T23, T46 - * DATA KS/0/ - * - * If this is the first call to RANDLC, compute R23 = 2 ^ -23, R46 = 2 ^ -46, - * T23 = 2 ^ 23, and T46 = 2 ^ 46. These are computed in loops, rather than - * by merely using the ** operator, in order to insure that the results are - * exact on all systems. This code assumes that 0.5D0 is represented exactly. - */ - - -/*****************************************************************/ -/************* R A N D L C ************/ -/************* ************/ -/************* portable random number generator ************/ -/*****************************************************************/ - -double randlc( double *X, double *A ) -{ - static int KS=0; - static double R23, R46, T23, T46; - double T1, T2, T3, T4; - double A1; - double A2; - double X1; - double X2; - double Z; - int i, j; - - if (KS == 0) - { - R23 = 1.0; - R46 = 1.0; - T23 = 1.0; - T46 = 1.0; - - for (i=1; i<=23; i++) - { - R23 = 0.50 * R23; - T23 = 2.0 * T23; - } - for (i=1; i<=46; i++) - { - R46 = 0.50 * R46; - T46 = 2.0 * T46; - } - KS = 1; - } - -/* Break A into two parts such that A = 2^23 * A1 + A2 and set X = N. */ - - T1 = R23 * *A; - j = T1; - A1 = j; - A2 = *A - T23 * A1; - -/* Break X into two parts such that X = 2^23 * X1 + X2, compute - Z = A1 * X2 + A2 * X1 (mod 2^23), and then - X = 2^23 * Z + A2 * X2 (mod 2^46). */ - - T1 = R23 * *X; - j = T1; - X1 = j; - X2 = *X - T23 * X1; - T1 = A1 * X2 + A2 * X1; - - j = R23 * T1; - T2 = j; - Z = T1 - T23 * T2; - T3 = T23 * Z + A2 * X2; - j = R46 * T3; - T4 = j; - *X = T3 - T46 * T4; - return(R46 * *X); -} - - - -/*****************************************************************/ -/************ F I N D _ M Y _ S E E D ************/ -/************ ************/ -/************ returns parallel random number seq seed ************/ -/*****************************************************************/ - -/* - * Create a random number sequence of total length nn residing - * on np number of processors. Each processor will therefore have a - * subsequence of length nn/np. This routine returns that random - * number which is the first random number for the subsequence belonging - * to processor rank kn, and which is used as seed for proc kn ran # gen. - */ - -double find_my_seed( int kn, /* my processor rank, 0<=kn<=num procs */ - int np, /* np = num procs */ - long nn, /* total num of ran numbers, all procs */ - double s, /* Ran num seed, for ex.: 314159265.00 */ - double a ) /* Ran num gen mult, try 1220703125.00 */ -{ - - long i; - - double t1,t2,t3,an; - long mq,nq,kk,ik; - - - - nq = nn / np; - - for( mq=0; nq>1; mq++,nq/=2 ) - ; - - t1 = a; - - for( i=1; i<=mq; i++ ) - t2 = randlc( &t1, &t1 ); - - an = t1; - - kk = kn; - t1 = s; - t2 = an; - - for( i=1; i<=100; i++ ) - { - ik = kk / 2; - if( 2 * ik != kk ) - t3 = randlc( &t1, &t2 ); - if( ik == 0 ) - break; - t3 = randlc( &t2, &t2 ); - kk = ik; - } - - return( t1 ); - -} - - - - -/*****************************************************************/ -/************* C R E A T E _ S E Q ************/ -/*****************************************************************/ - -void create_seq( global_data* gd, double seed, double a ) -{ - double x; - int i, k; - - k = MAX_KEY/4; - - for (i=0; ikey_array[i] = k*x; - } -} - - - - -/*****************************************************************/ -/************* F U L L _ V E R I F Y ************/ -/*****************************************************************/ - - -void full_verify( global_data* gd ) -{ - MPI_Status status; - MPI_Request request; - - INT_TYPE i, j; - INT_TYPE k, last_local_key; - - -/* Now, finally, sort the keys: */ - for( i=0; itotal_local_keys; i++ ) - gd->key_array[--gd->key_buff_ptr_global[gd->key_buff2[i]]- - gd->total_lesser_keys] = gd->key_buff2[i]; - last_local_key = (gd->total_local_keys<1)? 0 : (gd->total_local_keys-1); - -/* Send largest key value to next processor */ - if( gd->my_rank > 0 ) - MPI_Irecv( &k, - 1, - MP_KEY_TYPE, - gd->my_rank-1, - 1000, - MPI_COMM_WORLD, - &request ); - if( gd->my_rank < gd->comm_size-1 ) - MPI_Send( &gd->key_array[last_local_key], - 1, - MP_KEY_TYPE, - gd->my_rank+1, - 1000, - MPI_COMM_WORLD ); - if( gd->my_rank > 0 ) - MPI_Wait( &request, &status ); - -/* Confirm that neighbor's greatest key value - is not greater than my least key value */ - j = 0; - if( gd->my_rank > 0 && gd->total_local_keys > 0 ) - if( k > gd->key_array[0] ) - j++; - - -/* Confirm keys correctly sorted: count incorrectly sorted keys, if any */ - for( i=1; itotal_local_keys; i++ ) - if( gd->key_array[i-1] > gd->key_array[i] ) - j++; - - - if( j != 0 ) - { - printf( "Processor %d: Full_verify: number of keys out of sort: %d\n", - gd->my_rank, j ); - } - else - gd->passed_verification++; - - -} - - - - -/*****************************************************************/ -/************* R A N K ****************/ -/*****************************************************************/ - - -void rank( global_data* gd, int iteration ) -{ - - INT_TYPE i, k; - - INT_TYPE shift = MAX_KEY_LOG_2 - NUM_BUCKETS_LOG_2; - INT_TYPE key; - INT_TYPE2 bucket_sum_accumulator, j, m; - INT_TYPE local_bucket_sum_accumulator; - INT_TYPE min_key_val, max_key_val; - INT_TYPE *key_buff_ptr; - - - - -/* Iteration alteration of keys */ - if(gd->my_rank == 0 ) - { - gd->key_array[iteration] = iteration; - gd->key_array[iteration+MAX_ITERATIONS] = MAX_KEY - iteration; - } - - -/* Initialize */ - for( i=0; ibucket_size[i] = 0; - gd->bucket_size_totals[i] = 0; - gd->process_bucket_distrib_ptr1[i] = 0; - gd->process_bucket_distrib_ptr2[i] = 0; - } - - -/* Determine where the partial verify test keys are, load into */ -/* top of array bucket_size */ - for( i=0; itest_index_array[i]/NUM_KEYS) == gd->my_rank ) - gd->bucket_size[NUM_BUCKETS+i] = - gd->key_array[gd->test_index_array[i] % NUM_KEYS]; - - -/* Determine the number of keys in each bucket */ - for( i=0; ibucket_size[gd->key_array[i] >> shift]++; - - -/* Accumulative bucket sizes are the bucket pointers */ - gd->bucket_ptrs[0] = 0; - for( i=1; i< NUM_BUCKETS; i++ ) - gd->bucket_ptrs[i] = gd->bucket_ptrs[i-1] + gd->bucket_size[i-1]; - - -/* Sort into appropriate bucket */ - for( i=0; ikey_array[i]; - gd->key_buff1[gd->bucket_ptrs[key >> shift]++] = key; - } - -#ifdef TIMING_ENABLED - timer_stop(gd, 2 ); - timer_start(gd, 3 ); -#endif - -/* Get the bucket size totals for the entire problem. These - will be used to determine the redistribution of keys */ - MPI_Allreduce( gd->bucket_size, - gd->bucket_size_totals, - NUM_BUCKETS+TEST_ARRAY_SIZE, - MP_KEY_TYPE, - MPI_SUM, - MPI_COMM_WORLD ); - -#ifdef TIMING_ENABLED - timer_stop(gd, 3 ); - timer_start(gd, 2 ); -#endif - -/* Determine Redistibution of keys: accumulate the bucket size totals - till this number surpasses NUM_KEYS (which the average number of keys - per processor). Then all keys in these buckets go to processor 0. - Continue accumulating again until supassing 2*NUM_KEYS. All keys - in these buckets go to processor 1, etc. This algorithm guarantees - that all processors have work ranking; no processors are left idle. - The optimum number of buckets, however, does not result in as high - a degree of load balancing (as even a distribution of keys as is - possible) as is obtained from increasing the number of buckets, but - more buckets results in more computation per processor so that the - optimum number of buckets turns out to be 1024 for machines tested. - Note that process_bucket_distrib_ptr1 and ..._ptr2 hold the bucket - number of first and last bucket which each processor will have after - the redistribution is done. */ - - bucket_sum_accumulator = 0; - local_bucket_sum_accumulator = 0; - gd->send_displ[0] = 0; - gd->process_bucket_distrib_ptr1[0] = 0; - for( i=0, j=0; ibucket_size_totals[i]; - local_bucket_sum_accumulator += gd->bucket_size[i]; - if( bucket_sum_accumulator >= (j+1)*NUM_KEYS ) - { - gd->send_count[j] = local_bucket_sum_accumulator; - if( j != 0 ) - { - gd->send_displ[j] = gd->send_displ[j-1] + gd->send_count[j-1]; - gd->process_bucket_distrib_ptr1[j] = - gd->process_bucket_distrib_ptr2[j-1]+1; - } - gd->process_bucket_distrib_ptr2[j++] = i; - local_bucket_sum_accumulator = 0; - } - } - -/* When NUM_PROCS approaching NUM_BUCKETS, it is highly possible - that the last few processors don't get any buckets. So, we - need to set counts properly in this case to avoid any fallouts. */ - while( j < gd->comm_size ) - { - gd->send_count[j] = 0; - gd->process_bucket_distrib_ptr1[j] = 1; - j++; - } - -#ifdef TIMING_ENABLED - timer_stop(gd, 2 ); - timer_start(gd, 3 ); -#endif - -/* This is the redistribution section: first find out how many keys - each processor will send to every other processor: */ - MPI_Alltoall( gd->send_count, - 1, - MPI_INT, - gd->recv_count, - 1, - MPI_INT, - MPI_COMM_WORLD ); - -/* Determine the receive array displacements for the buckets */ - gd->recv_displ[0] = 0; - for( i=1; icomm_size; i++ ) - gd->recv_displ[i] = gd->recv_displ[i-1] + gd->recv_count[i-1]; - - -/* Now send the keys to respective processors */ - MPI_Alltoallv( gd->key_buff1, - gd->send_count, - gd->send_displ, - MP_KEY_TYPE, - gd->key_buff2, - gd->recv_count, - gd->recv_displ, - MP_KEY_TYPE, - MPI_COMM_WORLD ); - -#ifdef TIMING_ENABLED - timer_stop(gd, 3 ); - timer_start(gd, 2 ); -#endif - -/* The starting and ending bucket numbers on each processor are - multiplied by the interval size of the buckets to obtain the - smallest possible min and greatest possible max value of any - key on each processor */ - min_key_val = gd->process_bucket_distrib_ptr1[gd->my_rank] << shift; - max_key_val = ((gd->process_bucket_distrib_ptr2[gd->my_rank] + 1) << shift)-1; - -/* Clear the work array */ - for( i=0; ikey_buff1[i] = 0; - -/* Determine the total number of keys on all other - processors holding keys of lesser value */ - m = 0; - for( k=0; kmy_rank; k++ ) - for( i= gd->process_bucket_distrib_ptr1[k]; - i<=gd->process_bucket_distrib_ptr2[k]; - i++ ) - m += gd->bucket_size_totals[i]; /* m has total # of lesser keys */ - -/* Determine total number of keys on this processor */ - j = 0; - for( i= gd->process_bucket_distrib_ptr1[gd->my_rank]; - i<=gd->process_bucket_distrib_ptr2[gd->my_rank]; - i++ ) - j += gd->bucket_size_totals[i]; /* j has total # of local keys */ - - -/* Ranking of all keys occurs in this section: */ -/* shift it backwards so no subtractions are necessary in loop */ - key_buff_ptr = gd->key_buff1 - min_key_val; - -/* In this section, the keys themselves are used as their - own indexes to determine how many of each there are: their - individual population */ - for( i=0; ikey_buff2[i]]++; /* Now they have individual key */ - /* population */ - -/* To obtain ranks of each key, successively add the individual key - population, not forgetting the total of lesser keys, m. - NOTE: Since the total of lesser keys would be subtracted later - in verification, it is no longer added to the first key population - here, but still needed during the partial verify test. This is to - ensure that 32-bit key_buff can still be used for class D. */ -/* key_buff_ptr[min_key_val] += m; */ - for( i=min_key_val; ibucket_size_totals[i+NUM_BUCKETS]; /* Keys were hidden here */ - if( min_key_val <= k && k <= max_key_val ) - { - /* Add the total of lesser keys, m, here */ - INT_TYPE2 key_rank = key_buff_ptr[k-1] + m; - int failed = 0; - - switch( CLASS ) - { - case 'S': - if( i <= 2 ) - { - if( key_rank != gd->test_rank_array[i]+iteration ) - failed = 1; - else - gd->passed_verification++; - } - else - { - if( key_rank != gd->test_rank_array[i]-iteration ) - failed = 1; - else - gd->passed_verification++; - } - break; - case 'W': - if( i < 2 ) - { - if( key_rank != gd->test_rank_array[i]+(iteration-2) ) - failed = 1; - else - gd->passed_verification++; - } - else - { - if( key_rank != gd->test_rank_array[i]-iteration ) - failed = 1; - else - gd->passed_verification++; - } - break; - case 'A': - if( i <= 2 ) - { - if( key_rank != gd->test_rank_array[i]+(iteration-1) ) - failed = 1; - else - gd->passed_verification++; - } - else - { - if( key_rank != gd->test_rank_array[i]-(iteration-1) ) - failed = 1; - else - gd->passed_verification++; - } - break; - case 'B': - if( i == 1 || i == 2 || i == 4 ) - { - if( key_rank != gd->test_rank_array[i]+iteration ) - failed = 1; - else - gd->passed_verification++; - } - else - { - if( key_rank != gd->test_rank_array[i]-iteration ) - failed = 1; - else - gd->passed_verification++; - } - break; - case 'C': - if( i <= 2 ) - { - if( key_rank != gd->test_rank_array[i]+iteration ) - failed = 1; - else - gd->passed_verification++; - } - else - { - if( key_rank != gd->test_rank_array[i]-iteration ) - failed = 1; - else - gd->passed_verification++; - } - break; - case 'D': - if( i < 2 ) - { - if( key_rank != gd->test_rank_array[i]+iteration ) - failed = 1; - else - gd->passed_verification++; - } - else - { - if( key_rank != gd->test_rank_array[i]-iteration ) - failed = 1; - else - gd->passed_verification++; - } - break; - } - if( failed == 1 ) - printf( "Failed partial verification: " - "iteration %d, processor %d, test key %d\n", - iteration, gd->my_rank, (int)i ); - } - } - - - - -/* Make copies of rank info for use by full_verify: these variables - in rank are local; making them global slows down the code, probably - since they cannot be made register by compiler */ - - if( iteration == MAX_ITERATIONS ) - { - gd->key_buff_ptr_global = key_buff_ptr; - gd->total_local_keys = j; - gd->total_lesser_keys = 0; /* no longer set to 'm', see note above */ - } - -} - - -/*****************************************************************/ -/************* M A I N ****************/ -/*****************************************************************/ - -int main( int argc, char **argv ) -{ - - int i, iteration, itemp; - - double timecounter, maxtime; - - global_data* gd = malloc(sizeof(global_data)); -/* Initialize MPI */ - MPI_Init( &argc, &argv ); - MPI_Comm_rank( MPI_COMM_WORLD, &gd->my_rank ); - MPI_Comm_size( MPI_COMM_WORLD, &gd->comm_size ); - -/* Initialize the verification arrays if a valid class */ - for( i=0; itest_index_array[i] = S_test_index_array[i]; - gd->test_rank_array[i] = S_test_rank_array[i]; - break; - case 'A': - gd->test_index_array[i] = A_test_index_array[i]; - gd->test_rank_array[i] = A_test_rank_array[i]; - break; - case 'W': - gd->test_index_array[i] = W_test_index_array[i]; - gd->test_rank_array[i] = W_test_rank_array[i]; - break; - case 'B': - gd->test_index_array[i] = B_test_index_array[i]; - gd->test_rank_array[i] = B_test_rank_array[i]; - break; - case 'C': - gd->test_index_array[i] = C_test_index_array[i]; - gd->test_rank_array[i] = C_test_rank_array[i]; - break; - case 'D': - gd->test_index_array[i] = D_test_index_array[i]; - gd->test_rank_array[i] = D_test_rank_array[i]; - break; - }; - - - -/* Printout initial NPB info */ - if( gd->my_rank == 0 ) - { - printf( "\n\n NAS Parallel Benchmarks 3.3 -- IS Benchmark\n\n" ); - printf( " Size: %ld (class %c)\n", (long)TOTAL_KEYS*MIN_PROCS, CLASS ); - printf( " Iterations: %d\n", MAX_ITERATIONS ); - printf( " Number of processes: %d\n",gd->comm_size ); - } - -/* Check that actual and compiled number of processors agree */ - if( gd->comm_size != NUM_PROCS ) - { - if( gd->my_rank == 0 ) - printf( "\n ERROR: compiled for %d processes\n" - " Number of active processes: %d\n" - " Exiting program!\n\n", NUM_PROCS, gd->comm_size ); - MPI_Finalize(); - exit( 1 ); - } - -/* Check to see whether total number of processes is within bounds. - This could in principle be checked in setparams.c, but it is more - convenient to do it here */ - if( gd->comm_size < MIN_PROCS || gd->comm_size > MAX_PROCS) - { - if( gd->my_rank == 0 ) - printf( "\n ERROR: number of processes %d not within range %d-%d" - "\n Exiting program!\n\n", gd->comm_size, MIN_PROCS, MAX_PROCS); - MPI_Finalize(); - exit( 1 ); - } - - -/* Generate random number sequence and subsequent keys on all procs */ - create_seq(gd, find_my_seed( gd->my_rank, - gd->comm_size, - 4*(long)TOTAL_KEYS*MIN_PROCS, - 314159265.00, /* Random number gen seed */ - 1220703125.00 ), /* Random number gen mult */ - 1220703125.00 ); /* Random number gen mult */ - -/* Do one interation for free (i.e., untimed) to guarantee initialization of - all data and code pages and respective tables */ - rank(gd, 1 ); - -/* Start verification counter */ - gd->passed_verification = 0; - - if( gd->my_rank == 0 && CLASS != 'S' ) printf( "\n iteration\n" ); - -/* Initialize timer */ - timer_clear(gd, 0 ); - -/* Initialize separate communication, computation timing */ -#ifdef TIMING_ENABLED - for( i=1; i<=3; i++ ) timer_clear(gd, i ); -#endif - -/* Start timer */ - timer_start(gd, 0 ); - -#ifdef TIMING_ENABLED - timer_start(gd, 1 ); - timer_start(gd, 2 ); -#endif - - char smpi_category[100]; - snprintf (smpi_category, 100, "%d", gd->my_rank); - TRACE_smpi_set_category (smpi_category); - -/* This is the main iteration */ - for( iteration=1; iteration<=MAX_ITERATIONS; iteration++ ) - { - if( gd->my_rank == 0 && CLASS != 'S' ) printf( " %d\n", iteration ); - rank(gd, iteration ); - } - TRACE_smpi_set_category (NULL); - -#ifdef TIMING_ENABLED - timer_stop(gd, 2 ); - timer_stop(gd, 1 ); -#endif - -/* Stop timer, obtain time for processors */ - timer_stop(gd, 0 ); - - timecounter = timer_read(gd, 0 ); - -/* End of timing, obtain maximum time of all processors */ - MPI_Reduce( &timecounter, - &maxtime, - 1, - MPI_DOUBLE, - MPI_MAX, - 0, - MPI_COMM_WORLD ); - -#ifdef TIMING_ENABLED - { - double tmin, tsum, tmax; - - if( my_rank == 0 ) - { - printf( "\ntimer 1/2/3 = total/computation/communication time\n"); - printf( " min avg max\n" ); - } - for( i=1; i<=3; i++ ) - { - timecounter = timer_read(gd, i ); - MPI_Reduce( &timecounter, - &tmin, - 1, - MPI_DOUBLE, - MPI_MIN, - 0, - MPI_COMM_WORLD ); - MPI_Reduce( &timecounter, - &tsum, - 1, - MPI_DOUBLE, - MPI_SUM, - 0, - MPI_COMM_WORLD ); - MPI_Reduce( &timecounter, - &tmax, - 1, - MPI_DOUBLE, - MPI_MAX, - 0, - MPI_COMM_WORLD ); - if( my_rank == 0 ) - printf( "timer %d: %f %f %f\n", - i, tmin, tsum/((double) comm_size), tmax ); - } - if( my_rank == 0 ) - printf( "\n" ); - } -#endif - -/* This tests that keys are in sequence: sorting of last ranked key seq - occurs here, but is an untimed operation */ - full_verify(gd); - - -/* Obtain verification counter sum */ - itemp =gd->passed_verification; - MPI_Reduce( &itemp, - &gd->passed_verification, - 1, - MPI_INT, - MPI_SUM, - 0, - MPI_COMM_WORLD ); - - - -/* The final printout */ - if( gd->my_rank == 0 ) - { - if( gd->passed_verification != 5*MAX_ITERATIONS + gd->comm_size ) - gd->passed_verification = 0; - c_print_results( "IS", - CLASS, - (int)(TOTAL_KEYS), - MIN_PROCS, - 0, - MAX_ITERATIONS, - NUM_PROCS, - gd->comm_size, - maxtime, - ((double) (MAX_ITERATIONS)*TOTAL_KEYS*MIN_PROCS) - /maxtime/1000000., - "keys ranked", - gd->passed_verification, - NPBVERSION, - COMPILETIME, - MPICC, - CLINK, - CMPI_LIB, - CMPI_INC, - CFLAGS, - CLINKFLAGS ); - } - - MPI_Finalize(); - free(gd); - - return 0; - /**************************/ -} /* E N D P R O G R A M */ - /**************************/ diff --git a/examples/smpi/NAS/Makefile b/examples/smpi/NAS/Makefile deleted file mode 100644 index a15725403d..0000000000 --- a/examples/smpi/NAS/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -SHELL=/bin/sh -CLASS=S -NPROCS=1 -default: - @ sys/print_instructions - -IS: is -is: - cd IS; $(MAKE) NPROCS=$(NPROCS) CLASS=$(CLASS) - -EP: ep -ep: - cd EP; $(MAKE) NPROCS=$(NPROCS) CLASS=$(CLASS) - -DT: dt -dt: - cd DT; $(MAKE) CLASS=$(CLASS) - -clean: - - rm -f *~ */*~ */*.o */npbparams.h - - rm -f sys/setparams sys/setparams.h - -veryclean: clean - - rm -f bin/* \ No newline at end of file diff --git a/examples/smpi/NAS/common/c_print_results.c b/examples/smpi/NAS/common/c_print_results.c deleted file mode 100644 index 942c37075a..0000000000 --- a/examples/smpi/NAS/common/c_print_results.c +++ /dev/null @@ -1,53 +0,0 @@ -#include -#include - -void c_print_results(char *name, char class, int n1, int n2, int n3, int niter, int nprocs_compiled, int nprocs_total, - double t, double mops, char *optype, int passed_verification, char *npbversion, char *compiletime, - char *mpicc, char *clink, char *cmpi_lib, char *cmpi_inc, char *cflags, char *clinkflags) -{ - printf( "\n\n %s Benchmark Completed\n", name ); - printf( " Class = %c\n", class ); - - if( n3 == 0 ) { - long nn = n1; - if ( n2 != 0 ) nn *= n2; - printf( " Size = %12ld\n", nn ); /* as in IS */ - } - else - printf( " Size = %3dx %3dx %3d\n", n1,n2,n3 ); - - printf( " Iterations = %12d\n", niter ); - printf( " Time in seconds = %12.2f\n", t ); - printf( " Total processes = %12d\n", nprocs_total ); - - if ( nprocs_compiled != 0 ) - printf( " Compiled procs = %12d\n", nprocs_compiled ); - - printf( " Mop/s total = %12.2f\n", mops ); - printf( " Mop/s/process = %12.2f\n", mops/((float) nprocs_total) ); - printf( " Operation type = %24s\n", optype); - - if( passed_verification ) - printf( " Verification = SUCCESSFUL\n" ); - else - printf( " Verification = UNSUCCESSFUL\n" ); - - printf( " Version = %12s\n", npbversion ); - printf( " Compile date = %12s\n", compiletime ); - printf( "\n Compile options:\n" ); - printf( " MPICC = %s\n", mpicc ); - printf( " CLINK = %s\n", clink ); - printf( " CMPI_LIB = %s\n", cmpi_lib ); - printf( " CMPI_INC = %s\n", cmpi_inc ); - printf( " CFLAGS = %s\n", cflags ); - printf( " CLINKFLAGS = %s\n", clinkflags ); - printf( "\n\n" ); - printf( " Please send the results of this run to:\n\n" ); - printf( " NPB Development Team\n" ); - printf( " Internet: npb@nas.nasa.gov\n \n" ); - printf( " If email is not available, send this to:\n\n" ); - printf( " MS T27A-1\n" ); - printf( " NASA Ames Research Center\n" ); - printf( " Moffett Field, CA 94035-1000\n\n" ); - printf( " Fax: 650-604-3957\n\n" ); -} diff --git a/examples/smpi/NAS/common/c_timers.c b/examples/smpi/NAS/common/c_timers.c deleted file mode 100644 index a3af153d29..0000000000 --- a/examples/smpi/NAS/common/c_timers.c +++ /dev/null @@ -1,27 +0,0 @@ -#include "mpi.h" - -double start[64], elapsed[64]; - -void timer_clear( int n ) -{ - elapsed[n] = 0.0; -} - -void timer_start( int n ) -{ - start[n] = MPI_Wtime(); -} - -void timer_stop( int n ) -{ - double t, now; - now = MPI_Wtime(); - t = now - start[n]; - elapsed[n] += t; -} - -double timer_read( int n ) -{ - return( elapsed[n] ); -} - diff --git a/examples/smpi/NAS/common/randdp.c b/examples/smpi/NAS/common/randdp.c deleted file mode 100644 index 554d6b68de..0000000000 --- a/examples/smpi/NAS/common/randdp.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * FUNCTION RANDLC (X, A) - * - * This routine returns a uniform pseudorandom double precision number in the - * range (0, 1) by using the linear congruential generator - * - * x_{k+1} = a x_k (mod 2^46) - * - * where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers - * before repeating. The argument A is the same as 'a' in the above formula, - * and X is the same as x_0. A and X must be odd double precision integers - * in the range (1, 2^46). The returned value RANDLC is normalized to be - * between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain - * the new seed x_1, so that subsequent calls to RANDLC using the same - * arguments will generate a continuous sequence. - * - * This routine should produce the same results on any computer with at least - * 48 mantissa bits in double precision floating point data. On Cray systems, - * double precision should be disabled. - * - * David H. Bailey October 26, 1990 - * - * IMPLICIT DOUBLE PRECISION (A-H, O-Z) - * SAVE KS, R23, R46, T23, T46 - * DATA KS/0/ - * - * If this is the first call to RANDLC, compute R23 = 2 ^ -23, R46 = 2 ^ -46, - * T23 = 2 ^ 23, and T46 = 2 ^ 46. These are computed in loops, rather than - * by merely using the ** operator, in order to insure that the results are - * exact on all systems. This code assumes that 0.5D0 is represented exactly. - */ -double randlc(double *X, double*A) -{ - static int KS=0; - static double R23, R46, T23, T46; - double T1, T2, T3, T4; - double A1, A2; - double X1, X2; - double Z; - int i, j; - - if (KS == 0) { - R23 = 1.0; - R46 = 1.0; - T23 = 1.0; - T46 = 1.0; - - for (i=1; i<=23; i++) { - R23 = 0.50 * R23; - T23 = 2.0 * T23; - } - for (i=1; i<=46; i++) { - R46 = 0.50 * R46; - T46 = 2.0 * T46; - } - KS = 1; - } - -/* Break A into two parts such that A = 2^23 * A1 + A2 and set X = N. */ - T1 = R23 * *A; - j = T1; - A1 = j; - A2 = *A - T23 * A1; - -/* Break X into two parts such that X = 2^23 * X1 + X2, compute - Z = A1 * X2 + A2 * X1 (mod 2^23), and then X = 2^23 * Z + A2 * X2 (mod 2^46). */ - T1 = R23 * *X; - j = T1; - X1 = j; - X2 = *X - T23 * X1; - T1 = A1 * X2 + A2 * X1; - - j = R23 * T1; - T2 = j; - Z = T1 - T23 * T2; - T3 = T23 * Z + A2 * X2; - j = R46 * T3; - T4 = j; - *X = T3 - T46 * T4; - return(R46 * *X); -} diff --git a/examples/smpi/NAS/config/make.def b/examples/smpi/NAS/config/make.def deleted file mode 100644 index ad8f454a91..0000000000 --- a/examples/smpi/NAS/config/make.def +++ /dev/null @@ -1,73 +0,0 @@ -#--------------------------------------------------------------------------- -# -# SITE- AND/OR PLATFORM-SPECIFIC DEFINITIONS. -# -#--------------------------------------------------------------------------- - -#--------------------------------------------------------------------------- -# Items in this file will need to be changed for each platform. -#--------------------------------------------------------------------------- - -#--------------------------------------------------------------------------- -# Parallel C: -# -# MPICC - C compiler -# CFLAGS - C compilation arguments -# CMPI_INC - any -I arguments required for compiling MPI/C -# CLINK - C linker -# CLINKFLAGS - C linker flags -# CMPI_LIB - any -L and -l arguments required for linking MPI/C -# -# compilations are done with $(MPICC) $(CMPI_INC) $(CFLAGS) or -# $(MPICC) $(CFLAGS) -# linking is done with $(CLINK) $(CMPI_LIB) $(CLINKFLAGS) -#--------------------------------------------------------------------------- - -#--------------------------------------------------------------------------- -# This is the C compiler used for MPI programs -#--------------------------------------------------------------------------- -MPICC = smpicc -# This links MPI C programs; usually the same as ${MPICC} -CLINK = $(MPICC) - -#--------------------------------------------------------------------------- -# These macros are passed to the linker to help link with MPI correctly -#--------------------------------------------------------------------------- -CMPI_LIB = - -#--------------------------------------------------------------------------- -# These macros are passed to the compiler to help find 'mpi.h' -#--------------------------------------------------------------------------- -CMPI_INC = - -#--------------------------------------------------------------------------- -# Global *compile time* flags for C programs -#--------------------------------------------------------------------------- -CFLAGS = -O2 - -#--------------------------------------------------------------------------- -# Global *link time* flags. Flags for increasing maximum executable -# size usually go here. -#--------------------------------------------------------------------------- -CLINKFLAGS = -O2 - -#--------------------------------------------------------------------------- -# Utilities C: -# -# This is the C compiler used to compile C utilities. Flags required by -# this compiler go here also; typically there are few flags required; hence -# there are no separate macros provided for such flags. -#--------------------------------------------------------------------------- -CC = gcc -g - -#--------------------------------------------------------------------------- -# Destination of executables, relative to subdirs of the main directory. . -#--------------------------------------------------------------------------- -BINDIR = ../bin - -#--------------------------------------------------------------------------- -# The variable RAND controls which random number generator -# is used. It is described in detail in README.install. -# Use "randi8" unless there is a reason to use another one. -#--------------------------------------------------------------------------- -RAND = randi8 diff --git a/examples/smpi/NAS/DT/dt.c b/examples/smpi/NAS/dt.c similarity index 82% rename from examples/smpi/NAS/DT/dt.c rename to examples/smpi/NAS/dt.c index 1a4931704a..ca3a18a942 100644 --- a/examples/smpi/NAS/DT/dt.c +++ b/examples/smpi/NAS/dt.c @@ -41,53 +41,26 @@ #include #include -#include "mpi.h" -#include "npbparams.h" - +#include "smpi/mpi.h" +#include "nas_common.h" #include "simgrid/instr.h" //TRACE_ -#ifndef CLASS -#define CLASS 'S' -#define NUM_PROCS 1 -#endif - -//int passed_verification; -extern double randlc( double *X, double *A ); -extern -void c_print_results( char *name, - char class, - int n1, - int n2, - int n3, - int niter, - int nprocs_compiled, - int nprocs_total, - double t, - double mops, - char *optype, - int passed_verification, - char *npbversion, - char *compiletime, - char *mpicc, - char *clink, - char *cmpi_lib, - char *cmpi_inc, - char *cflags, - char *clinkflags ); - -void timer_clear( int n ); -void timer_start( int n ); -void timer_stop( int n ); -double timer_read( int n ); + int timer_on=0,timers_tot=64; +double start[64], elapsed[64]; -int verify(char *bmname,double rnm2){ +char class; +int nprocs; +int num_samples; +int deviation; +int num_sources; + +static int verify(char *bmname,double rnm2){ double verify_value=0.0; double epsilon=1.0E-8; - char cls=CLASS; int verified=-1; - if (cls != 'U') { - if(cls=='S') { + if (class != 'U') { + if(class=='S') { if(strstr(bmname,"BH")){ verify_value=30892725.0; }else if(strstr(bmname,"WH")){ @@ -98,18 +71,18 @@ int verify(char *bmname,double rnm2){ fprintf(stderr,"No such benchmark as %s.\n",bmname); } verified = 0; - }else if(cls=='W') { + }else if(class=='W') { if(strstr(bmname,"BH")){ verify_value = 4102461.0; }else if(strstr(bmname,"WH")){ - verify_value = 204280762.0; + verify_value = 204280762.0; }else if(strstr(bmname,"SH")){ verify_value = 186944764.0; }else{ fprintf(stderr,"No such benchmark as %s.\n",bmname); } verified = 0; - }else if(cls=='A') { + }else if(class=='A') { if(strstr(bmname,"BH")){ verify_value = 17809491.0; }else if(strstr(bmname,"WH")){ @@ -120,7 +93,7 @@ int verify(char *bmname,double rnm2){ fprintf(stderr,"No such benchmark as %s.\n",bmname); } verified = 0; - }else if(cls=='B') { + }else if(class=='B') { if(strstr(bmname,"BH")){ verify_value = 4317114.0; }else if(strstr(bmname,"WH")){ @@ -131,7 +104,7 @@ int verify(char *bmname,double rnm2){ fprintf(stderr,"No such benchmark as %s.\n",bmname); verified = 0; } - }else if(cls=='C') { + }else if(class=='C') { if(strstr(bmname,"BH")){ verify_value = 0.0; }else if(strstr(bmname,"WH")){ @@ -142,7 +115,7 @@ int verify(char *bmname,double rnm2){ fprintf(stderr,"No such benchmark as %s.\n",bmname); verified = -1; } - }else if(cls=='D') { + }else if(class=='D') { if(strstr(bmname,"BH")){ verify_value = 0.0; }else if(strstr(bmname,"WH")){ @@ -154,7 +127,7 @@ int verify(char *bmname,double rnm2){ } verified = -1; }else{ - fprintf(stderr,"No such class as %c.\n",cls); + fprintf(stderr,"No such class as %c.\n",class); } fprintf(stderr," %s L2 Norm = %f\n",bmname,rnm2); if(verified==-1){ @@ -174,7 +147,7 @@ int verify(char *bmname,double rnm2){ return verified; } -int ipowMod(int a,long long int n,int md){ +static int ipowMod(int a,long long int n,int md){ int seed=1,q=a,r=1; if(n<0){ fprintf(stderr,"ipowMod: exponent must be nonnegative exp=%lld\n",n); @@ -203,13 +176,13 @@ int ipowMod(int a,long long int n,int md){ } #include "DGraph.h" -DGraph *buildSH(char cls){ +static DGraph *buildSH(const char cls){ /* Nodes of the graph must be topologically sorted to avoid MPI deadlock. */ DGraph *dg; - int numSources=NUM_SOURCES; /* must be power of 2 */ + int numSources=num_sources; /* must be power of 2 */ int numOfLayers=0,tmpS=numSources>>1; int firstLayerNode=0; DGArc *ar=NULL; @@ -263,13 +236,10 @@ DGraph *buildSH(char cls){ } return dg; } -DGraph *buildWH(char cls){ -/* - Nodes of the graph must be topologically sorted - to avoid MPI deadlock. -*/ +static DGraph *buildWH(const char cls){ +/* Nodes of the graph must be topologically sorted to avoid MPI deadlock. */ int i=0,j=0; - int numSources=NUM_SOURCES,maxInDeg=4; + int numSources=num_sources,maxInDeg=4; int numLayerNodes=numSources,firstLayerNode=0; int totComparators=0; int numPrevLayerNodes=numLayerNodes; @@ -308,7 +278,7 @@ DGraph *buildWH(char cls){ firstLayerNode+=numPrevLayerNodes; numPrevLayerNodes=numLayerNodes; } - source=newNode("Source"); + source=newNode((char*)"Source"); AttachNode(dg,source); for(i=0;inode[firstLayerNode+i]; @@ -325,13 +295,10 @@ DGraph *buildWH(char cls){ } return dg; } -DGraph *buildBH(char cls){ -/* - Nodes of the graph must be topologically sorted - to avoid MPI deadlock. -*/ +static DGraph *buildBH(const char cls){ +/* Nodes of the graph must be topologically sorted to avoid MPI deadlock.*/ int i=0,j=0; - int numSources=NUM_SOURCES,maxInDeg=4; + int numSources=num_sources,maxInDeg=4; int numLayerNodes=numSources,firstLayerNode=0; DGraph *dg; DGNode *nd=NULL, *snd=NULL, *sink=NULL; @@ -368,7 +335,7 @@ DGraph *buildBH(char cls){ firstLayerNode+=numPrevLayerNodes; numPrevLayerNodes=numLayerNodes; } - sink=newNode("Sink"); + sink=newNode((char*)"Sink"); AttachNode(dg,sink); for(i=0;inode[firstLayerNode+i]; @@ -382,38 +349,42 @@ typedef struct{ int len; double* val; } Arr; -Arr *newArr(int len){ - Arr *arr=(Arr *)malloc(sizeof(Arr)); + +static Arr *newArr(int len){ + Arr *arr=(Arr *)malloc(sizeof(Arr)); //Arr *arr=(Arr *)SMPI_SHARED_MALLOC(sizeof(Arr)); arr->len=len; - arr->val=(double *)malloc(len*sizeof(double)); + arr->val=(double *)malloc(len*sizeof(double)); //arr->val=(double *)SMPI_SHARED_MALLOC(len*sizeof(double)); return arr; } -void arrShow(Arr* a){ + +static void arrShow(Arr* a){ if(!a) fprintf(stderr,"-- NULL array\n"); else{ fprintf(stderr,"-- length=%d\n",a->len); } } -double CheckVal(Arr *feat){ + +static double CheckVal(Arr *feat){ double csum=0.0; int i=0; for(i=0;ilen;i++){ - csum+=feat->val[i]*feat->val[i]/feat->len; /* The truncation does not work since - result will be 0 for large len */ + csum+=feat->val[i]*feat->val[i]/feat->len; /* The truncation does not work since result will be 0 for large len */ } - return csum; + return csum; } -int GetFNumDPar(int* mean, int* stdev){ - *mean=NUM_SAMPLES; - *stdev=STD_DEVIATION; + +static int GetFNumDPar(int* mean, int* stdev){ + *mean=num_samples; + *stdev=deviation; return 0; } -int GetFeatureNum(char *mbname,int id){ + +static int GetFeatureNum(char *mbname,int id){ double tran=314159265.0; double A=2*id+1; double denom=randlc(&tran,&A); char cval='S'; - int mean=NUM_SAMPLES,stdev=128; + int mean=num_samples,stdev=128; int rtfs=0,len=0; GetFNumDPar(&mean,&stdev); rtfs=ipowMod((int)(1/denom)*(int)cval,(long long int) (2*id+1),2*stdev); @@ -421,7 +392,8 @@ int GetFeatureNum(char *mbname,int id){ len=mean-stdev+rtfs; return len; } -Arr* RandomFeatures(char *bmname,int fdim,int id){ + +static Arr* RandomFeatures(char *bmname,int fdim,int id){ int len=GetFeatureNum(bmname,id)*fdim; Arr* feat=newArr(len); int nxg=2,nyg=2,nzg=2,nfg=5; @@ -450,31 +422,33 @@ Arr* RandomFeatures(char *bmname,int fdim,int id){ timer_stop(id+1); fprintf(stderr,"** RandomFeatures time in node %d = %f\n",id,timer_read(id+1)); } - return feat; + return feat; } -void Resample(Arr *a,int blen){ + +static void Resample(Arr *a,int blen){ long long int i=0,j=0,jlo=0,jhi=0; double avval=0.0; double *nval=(double *)malloc(blen*sizeof(double)); - Arr *tmp=newArr(10); + //double *nval=(double *)SMPI_SHARED_MALLOC(blen*sizeof(double)); for(i=0;ilen-1;i++){ jlo=(int)(0.5*(2*i-1)*(blen/a->len)); jhi=(int)(0.5*(2*i+1)*(blen/a->len)); - avval=a->val[i]/(jhi-jlo+1); + avval=a->val[i]/(jhi-jlo+1); for(j=jlo;j<=jhi;j++){ nval[j]+=avval; } } nval[0]=a->val[0]; nval[blen-1]=a->val[a->len-1]; - free(a->val); + free(a->val); //SMPI_SHARED_FREE(a->val); a->val=nval; a->len=blen; } + #define fielddim 4 -Arr* WindowFilter(Arr *a, Arr* b,int w){ +static Arr* WindowFilter(Arr *a, Arr* b,int w){ int i=0,j=0,k=0; double rms0=0.0,rms1=0.0,rmsm1=0.0; double weight=((double) (w+1))/(w+2); @@ -534,7 +508,7 @@ Arr* WindowFilter(Arr *a, Arr* b,int w){ return a; } -int SendResults(DGraph *dg,DGNode *nd,Arr *feat){ +static int SendResults(DGraph *dg,DGNode *nd,Arr *feat){ int i=0,tag=0; DGArc *ar=NULL; DGNode *head=NULL; @@ -553,8 +527,8 @@ int SendResults(DGraph *dg,DGNode *nd,Arr *feat){ TRACE_smpi_set_category (NULL); return 1; } -Arr* CombineStreams(DGraph *dg,DGNode *nd){ - Arr *resfeat=newArr(NUM_SAMPLES*fielddim); +static Arr* CombineStreams(DGraph *dg,DGNode *nd){ + Arr *resfeat=newArr(num_samples*fielddim); int i=0,len=0,tag=0; DGArc *ar=NULL; DGNode *tail=NULL; @@ -573,27 +547,27 @@ Arr* CombineStreams(DGraph *dg,DGNode *nd){ feat=newArr(len); MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status); resfeat=WindowFilter(resfeat,feat,nd->id); - free(feat); + free(feat);//SMPI_SHARED_FREE(feat); }else{ featp=(Arr *)tail->feat; feat=newArr(featp->len); memcpy(feat->val,featp->val,featp->len*sizeof(double)); resfeat=WindowFilter(resfeat,feat,nd->id); - free(feat); + free(feat);//SMPI_SHARED_FREE(feat); } } for(i=0;ilen;i++) resfeat->val[i]=((int)resfeat->val[i])/nd->inDegree; nd->feat=resfeat; return nd->feat; } -double Reduce(Arr *a,int w){ + +static double Reduce(Arr *a,int w){ double retv=0.0; if(timer_on){ timer_clear(w); timer_start(w); } - retv=(int)(w*CheckVal(a));/* The casting needed for node - and array dependent verifcation */ + retv=(int)(w*CheckVal(a));/* The casting needed for node and array dependent verifcation */ if(timer_on){ timer_stop(w); fprintf(stderr,"** Reduce time in node %d = %f\n",(w-1),timer_read(w)); @@ -601,7 +575,7 @@ double Reduce(Arr *a,int w){ return retv; } -double ReduceStreams(DGraph *dg,DGNode *nd){ +static double ReduceStreams(DGraph *dg,DGNode *nd){ double csum=0.0; int i=0,len=0,tag=0; DGArc *ar=NULL; @@ -623,7 +597,7 @@ double ReduceStreams(DGraph *dg,DGNode *nd){ feat=newArr(len); MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status); csum+=Reduce(feat,(nd->id+1)); - free(feat); + free(feat);//SMPI_SHARED_FREE(feat); }else{ csum+=Reduce(tail->feat,(nd->id+1)); } @@ -633,7 +607,7 @@ double ReduceStreams(DGraph *dg,DGNode *nd){ return retv; } -int ProcessNodes(DGraph *dg,int me){ +static int ProcessNodes(DGraph *dg,int me){ double chksum=0.0; Arr *feat=NULL; int i=0,verified=0,tag; @@ -683,17 +657,27 @@ int main(int argc,char **argv ){ int verified=0, featnum=0; double bytes_sent=2.0,tot_time=0.0; - MPI_Init( &argc, &argv ); - MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); - MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); - TRACE_smpi_set_category ("begin"); - - if(argc!=2|| - ( strncmp(argv[1],"BH",2)!=0 - &&strncmp(argv[1],"WH",2)!=0 - &&strncmp(argv[1],"SH",2)!=0 - ) - ){ + MPI_Init( &argc, &argv ); + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + MPI_Comm_size( MPI_COMM_WORLD, &comm_size ); + + TRACE_smpi_set_category ("begin"); + get_info(argc, argv, &nprocs, &class); + check_info(DT, nprocs, class); + + if (class == 'S') { num_samples=1728; deviation=128; num_sources=4; } + else if (class == 'W') { num_samples=1728*8; deviation=128*2; num_sources=4*2; } + else if (class == 'A') { num_samples=1728*64; deviation=128*4; num_sources=4*4; } + else if (class == 'B') { num_samples=1728*512; deviation=128*8; num_sources=4*8; } + else if (class == 'C') { num_samples=1728*4096; deviation=128*16; num_sources=4*16; } + else if (class == 'D') { num_samples=1728*4096*8; deviation=128*32; num_sources=4*32; } + else { + printf("setparams: Internal error: invalid class type %c\n", class); + exit(1); + } + + + if(argc!=2|| ( strncmp(argv[1],"BH",2)!=0 && strncmp(argv[1],"WH",2)!=0 &&strncmp(argv[1],"SH",2)!=0)){ if(my_rank==0){ fprintf(stderr,"** Usage: mpirun -np N ../bin/dt.S GraphName\n"); fprintf(stderr,"** Where \n - N is integer number of MPI processes\n"); @@ -706,11 +690,11 @@ int main(int argc,char **argv ){ exit(0); } if(strncmp(argv[1],"BH",2)==0){ - dg=buildBH(CLASS); + dg=buildBH(class); }else if(strncmp(argv[1],"WH",2)==0){ - dg=buildWH(CLASS); + dg=buildWH(class); }else if(strncmp(argv[1],"SH",2)==0){ - dg=buildSH(CLASS); + dg=buildSH(class); } if(timer_on&&dg->numNodes+1>timers_tot){ @@ -740,32 +724,14 @@ int main(int argc,char **argv ){ verified=ProcessNodes(dg,my_rank); TRACE_smpi_set_category ("end"); - featnum=NUM_SAMPLES*fielddim; + featnum=num_samples*fielddim; bytes_sent=featnum*dg->numArcs; bytes_sent/=1048576; if(my_rank==0){ timer_stop(0); tot_time=timer_read(0); - c_print_results( dg->name, - CLASS, - featnum, - 0, - 0, - dg->numNodes, - 0, - comm_size, - tot_time, - bytes_sent/tot_time, - "bytes transmitted", - verified, - NPBVERSION, - COMPILETIME, - MPICC, - CLINK, - CMPI_LIB, - CMPI_INC, - CFLAGS, - CLINKFLAGS ); + c_print_results( dg->name, class, featnum, 0, 0, dg->numNodes, 0, comm_size, tot_time, bytes_sent/tot_time, + "bytes transmitted", verified); } MPI_Finalize(); return 1; diff --git a/examples/smpi/NAS/ep.c b/examples/smpi/NAS/ep.c new file mode 100644 index 0000000000..c56cf2043f --- /dev/null +++ b/examples/smpi/NAS/ep.c @@ -0,0 +1,318 @@ +#include +#include +#include +#include +#include "smpi/mpi.h" +#include "nas_common.h" +#include "simgrid/instr.h" //TRACE_ + +char class; +int nprocs; + +#define true 1 +#define false 0 + +int main(int argc, char **argv) { + double dum[3] = {1.,1.,1.}; + double x1, x2, sx, sy, tm, an, tt, gc; + double Mops; + double epsilon=1.0E-8, a = 1220703125., s=271828183.; + double t1, t2, t3, t4; + double sx_verify_value, sy_verify_value, sx_err, sy_err; + + int m, mk=16, + mm, nn, + nk = (int)(pow(2,mk)), + nq=10, + np, + node, + no_nodes, + i, + ik, + kk, + l, + k, nit, no_large_nodes, + np_add, k_offset; + int root=0; + int verified; + char size[500]; // mind the size of the string to represent a big number + + double *start = (double *) malloc (64*sizeof(double)); + double *elapsed = (double *) malloc (64*sizeof(double)); + + double *x = (double *) malloc (2*nk*sizeof(double)); + double *q = (double *) malloc (nq*sizeof(double)); + + MPI_Init( &argc, &argv ); + MPI_Comm_size( MPI_COMM_WORLD, &no_nodes); + MPI_Comm_rank( MPI_COMM_WORLD, &node); + + TRACE_smpi_set_category ("start"); + + get_info(argc, argv, &nprocs, &class); + check_info(EP, nprocs, class); + + if (class == 'S') { m = 24; } + else if (class == 'W') { m = 25; } + else if (class == 'A') { m = 28; } + else if (class == 'B') { m = 30; } + else if (class == 'C') { m = 32; } + else if (class == 'D') { m = 36; } + else if (class == 'E') { m = 40; } + else { + printf("EP: Internal error: invalid class type %c\n", class); + exit(1); + } + mm = m -mk; + nn = (int)(pow(2,mm)), + + root = 0; + if (node == root ) { + /* Because the size of the problem is too large to store in a 32-bit integer for some classes, we put it into a + * string (for printing). Have to strip off the decimal point put in there by the floating point print statement + * (internal file) + */ + fprintf(stdout," NAS Parallel Benchmarks 3.2 -- EP Benchmark"); + sprintf(size,"%zu",(unsigned long)pow(2,m+1)); + //size = size.replace('.', ' '); + fprintf(stdout," Number of random numbers generated: %s\n",size); + fprintf(stdout," Number of active processes: %d\n",no_nodes); + } + verified = false; + + /* Compute the number of "batches" of random number pairs generated per processor. Adjust if the number of processors + * does not evenly divide the total number + */ + + np = nn / no_nodes; + no_large_nodes = nn % no_nodes; + if (node < no_large_nodes) np_add = 1; + else np_add = 0; + np = np + np_add; + + if (np == 0) { + fprintf(stdout,"Too many nodes: %d %d",no_nodes,nn); + MPI_Abort(MPI_COMM_WORLD,1); + exit(0); + } + + /* Call the random number generator functions and initialize the x-array to reduce the effects of paging the timings. + Also, call all mathematical functions that are used. Make sure initializations cannot be eliminated as dead code. */ + + //call vranlc(0, dum[1], dum[2], dum[3]); + // Array indexes start at 1 in Fortran, 0 in Java + vranlc(0, dum[0], dum[1], &(dum[2])); + + dum[0] = randlc(&(dum[1]),&(dum[2])); + for (i=0;i<2*nk;i++) { + x[i] = -1e99; + } + Mops = log(sqrt(abs(1))); + + /* Synchronize before placing time stamp */ + MPI_Barrier( MPI_COMM_WORLD ); + + TRACE_smpi_set_category ("ep"); + + time_clear(&(elapsed[1])); + time_clear(&(elapsed[2])); + time_clear(&(elapsed[3])); + time_start(&(start[1])); + + t1 = a; + //fprintf(stdout,("(ep.f:160) t1 = " + t1); + t1 = vranlc(0, t1, a, x); + //fprintf(stdout,("(ep.f:161) t1 = " + t1); + + /* Compute AN = A ^ (2 * NK) (mod 2^46). */ + t1 = a; + //fprintf(stdout,("(ep.f:165) t1 = " + t1); + for (i=1; i <= mk+1; i++) { + t2 = randlc(&t1, &t1); + //fprintf(stdout,("(ep.f:168)[loop i=" + i +"] t1 = " + t1); + } + an = t1; + //fprintf(stdout,("(ep.f:172) s = " + s); + tt = s; + gc = tt = 0.; + sx = 0.; + sy = 0.; + for (i=0; i < nq ; i++) { + q[i] = 0.; + } + +/* Each instance of this loop may be performed independently. We compute the k offsets separately to take into account + * the fact that some nodes have more numbers to generate than others */ + + if (np_add == 1) + k_offset = node * np -1; + else + k_offset = no_large_nodes*(np+1) + (node-no_large_nodes)*np -1; + + int stop = false; + for(k = 1; k <= np; k++) {// SMPI_SAMPLE_LOCAL(0.25 * np, 0.03) { + stop = false; + kk = k_offset + k ; + t1 = s; + //fprintf(stdout,("(ep.f:193) t1 = " + t1); + t2 = an; + + // Find starting seed t1 for this kk. + for (i=1;i<=100 && !stop;i++) { + ik = kk / 2; + //fprintf(stdout,("(ep.f:199) ik = " +ik+", kk = " + kk); + if (2 * ik != kk) { + t3 = randlc(&t1, &t2); + //fprintf(stdout,("(ep.f:200) t1= " +t1 ); + } + if (ik==0) + stop = true; + else { + t3 = randlc(&t2, &t2); + kk = ik; + } + } + // Compute uniform pseudorandom numbers. + + //if (timers_enabled) timer_start(3); + time_start(&(start[3])); + //call vranlc(2 * nk, t1, a, x) --> t1 and y are modified + + //fprintf(stdout,">>>>>>>>>>>Before vranlc(l.210)<<<<<<<<<<<<<"); + //fprintf(stdout,"2*nk = " + (2*nk)); + //fprintf(stdout,"t1 = " + t1); + //fprintf(stdout,"a = " + a); + //fprintf(stdout,"x[0] = " + x[0]); + //fprintf(stdout,">>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<"); + t1 = vranlc(2 * nk, t1, a, x); + + //fprintf(stdout,(">>>>>>>>>>>After Enter vranlc (l.210)<<<<<<"); + //fprintf(stdout,("2*nk = " + (2*nk)); + //fprintf(stdout,("t1 = " + t1); + //fprintf(stdout,("a = " + a); + //fprintf(stdout,("x[0] = " + x[0]); + //fprintf(stdout,(">>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<"); + + //if (timers_enabled) timer_stop(3); + time_stop(3,elapsed,start); + + /* Compute Gaussian deviates by acceptance-rejection method and tally counts in concentric square annuli. + * This loop is not vectorizable. */ + //if (timers_enabled) timer_start(2); + time_start(&(start[2])); + for(i=1; i<=nk;i++) { + x1 = 2. * x[2*i-2] -1.0; + x2 = 2. * x[2*i-1] - 1.0; + t1 = x1*x1 + x2*x2; + if (t1 <= 1.) { + t2 = sqrt(-2. * log(t1) / t1); + t3 = (x1 * t2); + t4 = (x2 * t2); + l = (int)(abs(t3) > abs(t4) ? abs(t3) : abs(t4)); + q[l] = q[l] + 1.; + sx = sx + t3; + sy = sy + t4; + } + /* + if(i == 1) { + fprintf(stdout,"x1 = " + x1); + fprintf(stdout,"x2 = " + x2); + fprintf(stdout,"t1 = " + t1); + fprintf(stdout,"t2 = " + t2); + fprintf(stdout,"t3 = " + t3); + fprintf(stdout,"t4 = " + t4); + fprintf(stdout,"l = " + l); + fprintf(stdout,"q[l] = " + q[l]); + fprintf(stdout,"sx = " + sx); + fprintf(stdout,"sy = " + sy); + } + */ + } + //if (timers_enabled) timer_stop(2); + time_stop(2,elapsed,start); + } + + TRACE_smpi_set_category ("finalize"); + + MPI_Allreduce(&sx, x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + sx = x[0]; //FIXME : x[0] or x[1] => x[0] because fortran starts with 1 + MPI_Allreduce(&sy, x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + sy = x[0]; + MPI_Allreduce(q, x, nq, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + for(i = 0; i < nq; i++) { + q[i] = x[i]; + } + for(i = 0; i < nq; i++) { + gc += q[i]; + } + + time_stop(1,elapsed,start); + tm = time_read(1,elapsed); + MPI_Allreduce(&tm, x, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + tm = x[0]; + + if(node == root) { + nit = 0; + verified = true; + + if(m == 24) { + sx_verify_value = -3.247834652034740E3; + sy_verify_value = -6.958407078382297E3; + } else if(m == 25) { + sx_verify_value = -2.863319731645753E3; + sy_verify_value = -6.320053679109499E3; + } else if(m == 28) { + sx_verify_value = -4.295875165629892E3; + sy_verify_value = -1.580732573678431E4; + } else if(m == 30) { + sx_verify_value = 4.033815542441498E4; + sy_verify_value = -2.660669192809235E4; + } else if(m == 32) { + sx_verify_value = 4.764367927995374E4; + sy_verify_value = -8.084072988043731E4; + } else if(m == 36) { + sx_verify_value = 1.982481200946593E5; + sy_verify_value = -1.020596636361769E5; + } else { + verified = false; + } + + /* + fprintf(stdout,("sx = " + sx); + fprintf(stdout,("sx_verify = " + sx_verify_value); + fprintf(stdout,("sy = " + sy); + fprintf(stdout,("sy_verify = " + sy_verify_value); + */ + if(verified) { + sx_err = abs((sx - sx_verify_value)/sx_verify_value); + sy_err = abs((sy - sy_verify_value)/sy_verify_value); + /* + fprintf(stdout,("sx_err = " + sx_err); + fprintf(stdout,("sy_err = " + sx_err); + fprintf(stdout,("epsilon= " + epsilon); + */ + verified = ((sx_err < epsilon) && (sy_err < epsilon)); + } + + Mops = (pow(2.0, m+1))/tm/1000; + + fprintf(stdout,"EP Benchmark Results:\n"); + fprintf(stdout,"CPU Time=%d\n",(int) tm); + fprintf(stdout,"N = 2^%d\n",m); + fprintf(stdout,"No. Gaussain Pairs =%d\n",(int) gc); + fprintf(stdout,"Sum = %f %ld\n",sx,(long) sy); + fprintf(stdout,"Count:"); + for(i = 0; i < nq; i++) { + fprintf(stdout,"%d\t %ld\n",i,(long) q[i]); + } + c_print_results("EP", class, m+1, 0, 0, nit, nprocs, no_nodes, tm, Mops, "Random number generated",verified); + + fprintf(stdout,"Total time: %f\n",(time_read(1,elapsed)/1000)); + fprintf(stdout,"Gaussian pairs: %f\n",(time_read(2,elapsed)/1000)); + fprintf(stdout,"Random numbers: %f\n",(time_read(3,elapsed)/1000)); + } + + MPI_Finalize(); + return 0; +} diff --git a/examples/smpi/NAS/is.c b/examples/smpi/NAS/is.c new file mode 100644 index 0000000000..68cbb77487 --- /dev/null +++ b/examples/smpi/NAS/is.c @@ -0,0 +1,637 @@ +/************************************************************************* + * * + * N A S P A R A L L E L B E N C H M A R K S 3.3 * + * * + * I S * + * * + ************************************************************************* + * * + * This benchmark is part of the NAS Parallel Benchmark 3.3 suite. * + * It is described in NAS Technical Report 95-020. * + * * + * Permission to use, copy, distribute and modify this software * + * for any purpose with or without fee is hereby granted. We * + * request, however, that all derived work reference the NAS * + * Parallel Benchmarks 3.3. This software is provided "as is" * + * without express or implied warranty. * + * * + * Information on NPB 3.3, including the technical report, the * + * original specifications, source code, results and information * + * on how to submit new results, is available at: * + * * + * http://www.nas.nasa.gov/Software/NPB * + * * + * Send comments or suggestions to npb@nas.nasa.gov * + * Send bug reports to npb-bugs@nas.nasa.gov * + * * + * NAS Parallel Benchmarks Group * + * NASA Ames Research Center * + * Mail Stop: T27A-1 * + * Moffett Field, CA 94035-1000 * + * * + * E-mail: npb@nas.nasa.gov * + * Fax: (650) 604-3957 * + * * + ************************************************************************* + * * + * Author: M. Yarrow * + * H. Jin * + * * + *************************************************************************/ + +#include "smpi/mpi.h" +#include "nas_common.h" +#include +#include + +#include "simgrid/instr.h" //TRACE_ + +char class; +int nprocs; +int total_keys_log2; +int max_key_log_2; +int num_bucket_log_2; +int min_procs=1; +/* NOTE: THIS CODE CANNOT BE RUN ON ARBITRARILY LARGE NUMBERS OF PROCESSORS. THE LARGEST VERIFIED NUMBER IS 1024. + * INCREASE max_procs AT YOUR PERIL + */ +int max_procs=1024; + +int total_keys; +int max_key; +int num_buckets; +int num_keys; +long size_of_buffers; + +#define MAX_ITERATIONS 10 +#define TEST_ARRAY_SIZE 5 + +/* Typedef: if necessary, change the size of int here by changing the int type to, say, long */ +typedef int INT_TYPE; +typedef long INT_TYPE2; +#define MP_KEY_TYPE MPI_INT + +typedef struct { +/* MPI properties: */ +int my_rank, comm_size; +/* Some global info */ +INT_TYPE *key_buff_ptr_global, /* used by full_verify to get */ + total_local_keys, /* copies of rank info */ + total_lesser_keys; + +int passed_verification; +/* These are the three main arrays. See SIZE_OF_BUFFERS def above */ +INT_TYPE *key_array, *key_buff1, *key_buff2, + *bucket_size, /* Top 5 elements for */ + *bucket_size_totals, /* part. ver. vals */ + *bucket_ptrs, *process_bucket_distrib_ptr1, *process_bucket_distrib_ptr2; +int send_count[1024], recv_count[1024], send_displ[1024], recv_displ[1024]; + +/* Partial verif info */ +INT_TYPE2 test_index_array[TEST_ARRAY_SIZE], + test_rank_array[TEST_ARRAY_SIZE]; +} global_data; + +const INT_TYPE2 + S_test_index_array[TEST_ARRAY_SIZE] = {48427,17148,23627,62548,4431}, + S_test_rank_array[TEST_ARRAY_SIZE] = {0,18,346,64917,65463}, + W_test_index_array[TEST_ARRAY_SIZE] = {357773,934767,875723,898999,404505}, + W_test_rank_array[TEST_ARRAY_SIZE] = {1249,11698,1039987,1043896,1048018}, + + A_test_index_array[TEST_ARRAY_SIZE] = {2112377,662041,5336171,3642833,4250760}, + A_test_rank_array[TEST_ARRAY_SIZE] = {104,17523,123928,8288932,8388264}, + + B_test_index_array[TEST_ARRAY_SIZE] = {41869,812306,5102857,18232239,26860214}, + B_test_rank_array[TEST_ARRAY_SIZE] = {33422937,10244,59149,33135281,99}, + + C_test_index_array[TEST_ARRAY_SIZE] = {44172927,72999161,74326391,129606274,21736814}, + C_test_rank_array[TEST_ARRAY_SIZE] = {61147,882988,266290,133997595,133525895}, + + D_test_index_array[TEST_ARRAY_SIZE] = {1317351170,995930646,1157283250,1503301535,1453734525}, + D_test_rank_array[TEST_ARRAY_SIZE] = {1,36538729,1978098519,2145192618,2147425337}; + +void full_verify( global_data* gd ); + +/************ returns parallel random number seq seed ************/ +/* + * Create a random number sequence of total length nn residing on np number of processors. Each processor will + * therefore have a subsequence of length nn/np. This routine returns that random number which is the first random + * number for the subsequence belonging to processor rank kn, and which is used as seed for proc kn ran # gen. + */ +static double find_my_seed( int kn, /* my processor rank, 0<=kn<=num procs */ + int np, /* np = num procs */ + long nn, /* total num of ran numbers, all procs */ + double s, /* Ran num seed, for ex.: 314159265.00 */ + double a ) /* Ran num gen mult, try 1220703125.00 */ +{ + long i; + double t1,t2,t3,an; + long mq,nq,kk,ik; + + nq = nn / np; + + for( mq=0; nq>1; mq++,nq/=2); + + t1 = a; + + for( i=1; i<=mq; i++ ) + t2 = randlc( &t1, &t1 ); + + an = t1; + + kk = kn; + t1 = s; + t2 = an; + + for( i=1; i<=100; i++ ){ + ik = kk / 2; + if( 2 * ik != kk ) + t3 = randlc( &t1, &t2 ); + if( ik == 0 ) + break; + t3 = randlc( &t2, &t2 ); + kk = ik; + } + an=t3;//added to silence paranoid compilers + + return t1; +} + +static void create_seq( global_data* gd, double seed, double a ) +{ + double x; + int i, k; + + k = max_key/4; + + for (i=0; ikey_array[i] = k*x; + } +} + +void full_verify( global_data* gd ) +{ + MPI_Status status; + MPI_Request request; + + INT_TYPE i, j; + INT_TYPE k, last_local_key; + +/* Now, finally, sort the keys: */ + for( i=0; itotal_local_keys; i++ ) + gd->key_array[--gd->key_buff_ptr_global[gd->key_buff2[i]]- gd->total_lesser_keys] = gd->key_buff2[i]; + last_local_key = (gd->total_local_keys<1)? 0 : (gd->total_local_keys-1); + +/* Send largest key value to next processor */ + if( gd->my_rank > 0 ) + MPI_Irecv( &k, 1, MP_KEY_TYPE, gd->my_rank-1, 1000, MPI_COMM_WORLD, &request ); + if( gd->my_rank < gd->comm_size-1 ) + MPI_Send( &gd->key_array[last_local_key], 1, MP_KEY_TYPE, gd->my_rank+1, 1000, MPI_COMM_WORLD ); + if( gd->my_rank > 0 ) + MPI_Wait( &request, &status ); + +/* Confirm that neighbor's greatest key value is not greater than my least key value */ + j = 0; + if( gd->my_rank > 0 && gd->total_local_keys > 0 ) + if( k > gd->key_array[0] ) + j++; + +/* Confirm keys correctly sorted: count incorrectly sorted keys, if any */ + for( i=1; itotal_local_keys; i++ ) + if( gd->key_array[i-1] > gd->key_array[i] ) + j++; + + if( j != 0 ) { + printf( "Processor %d: Full_verify: number of keys out of sort: %d\n", gd->my_rank, j ); + } else + gd->passed_verification++; +} + +static void rank( global_data* gd, int iteration ) +{ + INT_TYPE i, k; + INT_TYPE shift = max_key_log_2 - num_bucket_log_2; + INT_TYPE key; + INT_TYPE2 bucket_sum_accumulator, j, m; + INT_TYPE local_bucket_sum_accumulator; + INT_TYPE min_key_val, max_key_val; + INT_TYPE *key_buff_ptr; + +/* Iteration alteration of keys */ + if(gd->my_rank == 0){ + gd->key_array[iteration] = iteration; + gd->key_array[iteration+MAX_ITERATIONS] = max_key - iteration; + } + +/* Initialize */ + for( i=0; ibucket_size[i] = 0; + gd->bucket_size_totals[i] = 0; + gd->process_bucket_distrib_ptr1[i] = 0; + gd->process_bucket_distrib_ptr2[i] = 0; + } + +/* Determine where the partial verify test keys are, load into top of array bucket_size */ + for( i=0; itest_index_array[i]/num_keys) == gd->my_rank ) + gd->bucket_size[num_buckets+i] = gd->key_array[gd->test_index_array[i] % num_keys]; + +/* Determine the number of keys in each bucket */ + for( i=0; ibucket_size[gd->key_array[i] >> shift]++; + +/* Accumulative bucket sizes are the bucket pointers */ + gd->bucket_ptrs[0] = 0; + for( i=1; i< num_buckets; i++ ) + gd->bucket_ptrs[i] = gd->bucket_ptrs[i-1] + gd->bucket_size[i-1]; + +/* Sort into appropriate bucket */ + for( i=0; ikey_array[i]; + gd->key_buff1[gd->bucket_ptrs[key >> shift]++] = key; + } + +/* Get the bucket size totals for the entire problem. These will be used to determine the redistribution of keys */ + MPI_Allreduce(gd->bucket_size, gd->bucket_size_totals, num_buckets+TEST_ARRAY_SIZE, MP_KEY_TYPE, MPI_SUM, + MPI_COMM_WORLD); + +/* Determine Redistibution of keys: accumulate the bucket size totals till this number surpasses num_keys (which the + * average number of keys per processor). Then all keys in these buckets go to processor 0. + Continue accumulating again until supassing 2*num_keys. All keys in these buckets go to processor 1, etc. This + algorithm guarantees that all processors have work ranking; no processors are left idle. + The optimum number of buckets, however, does not result in as high a degree of load balancing (as even a distribution + of keys as is possible) as is obtained from increasing the number of buckets, but more buckets results in more + computation per processor so that the optimum number of buckets turns out to be 1024 for machines tested. + Note that process_bucket_distrib_ptr1 and ..._ptr2 hold the bucket number of first and last bucket which each + processor will have after the redistribution is done. +*/ + + bucket_sum_accumulator = 0; + local_bucket_sum_accumulator = 0; + gd->send_displ[0] = 0; + gd->process_bucket_distrib_ptr1[0] = 0; + for( i=0, j=0; ibucket_size_totals[i]; + local_bucket_sum_accumulator += gd->bucket_size[i]; + if( bucket_sum_accumulator >= (j+1)*num_keys ) { + gd->send_count[j] = local_bucket_sum_accumulator; + if( j != 0 ){ + gd->send_displ[j] = gd->send_displ[j-1] + gd->send_count[j-1]; + gd->process_bucket_distrib_ptr1[j] = gd->process_bucket_distrib_ptr2[j-1]+1; + } + gd->process_bucket_distrib_ptr2[j++] = i; + local_bucket_sum_accumulator = 0; + } + } + +/* When nprocs approaching num_buckets, it is highly possible that the last few processors don't get any buckets. + * So, we need to set counts properly in this case to avoid any fallouts. */ + while( j < gd->comm_size ) { + gd->send_count[j] = 0; + gd->process_bucket_distrib_ptr1[j] = 1; + j++; + } + +/* This is the redistribution section: first find out how many keys + each processor will send to every other processor: */ + MPI_Alltoall( gd->send_count, 1, MPI_INT, gd->recv_count, 1, MPI_INT, MPI_COMM_WORLD ); + +/* Determine the receive array displacements for the buckets */ + gd->recv_displ[0] = 0; + for( i=1; icomm_size; i++ ) + gd->recv_displ[i] = gd->recv_displ[i-1] + gd->recv_count[i-1]; + + /* Now send the keys to respective processors */ + MPI_Alltoallv(gd->key_buff1, gd->send_count, gd->send_displ, MP_KEY_TYPE, gd->key_buff2, gd->recv_count, + gd->recv_displ, MP_KEY_TYPE, MPI_COMM_WORLD ); + +/* The starting and ending bucket numbers on each processor are multiplied by the interval size of the buckets to + * obtain the smallest possible min and greatest possible max value of any key on each processor + */ + min_key_val = gd->process_bucket_distrib_ptr1[gd->my_rank] << shift; + max_key_val = ((gd->process_bucket_distrib_ptr2[gd->my_rank] + 1) << shift)-1; + +/* Clear the work array */ + for( i=0; ikey_buff1[i] = 0; + +/* Determine the total number of keys on all other processors holding keys of lesser value */ + m = 0; + for( k=0; kmy_rank; k++ ) + for( i= gd->process_bucket_distrib_ptr1[k]; i<=gd->process_bucket_distrib_ptr2[k]; i++ ) + m += gd->bucket_size_totals[i]; /* m has total # of lesser keys */ + +/* Determine total number of keys on this processor */ + j = 0; + for( i= gd->process_bucket_distrib_ptr1[gd->my_rank]; i<=gd->process_bucket_distrib_ptr2[gd->my_rank]; i++ ) + j += gd->bucket_size_totals[i]; /* j has total # of local keys */ + +/* Ranking of all keys occurs in this section: */ +/* shift it backwards so no subtractions are necessary in loop */ + key_buff_ptr = gd->key_buff1 - min_key_val; + +/* In this section, the keys themselves are used as their own indexes to determine how many of each there are: their + individual population */ + for( i=0; ikey_buff2[i]]++; /* Now they have individual key population */ + +/* To obtain ranks of each key, successively add the individual key population, not forgetting the total of lesser + * keys, m. + NOTE: Since the total of lesser keys would be subtracted later in verification, it is no longer added to the first + key population here, but still needed during the partial verify test. This is to ensure that 32-bit key_buff can + still be used for class D. */ +/* key_buff_ptr[min_key_val] += m; */ + for( i=min_key_val; ibucket_size_totals[i+num_buckets]; /* Keys were hidden here */ + if( min_key_val <= k && k <= max_key_val ){ + /* Add the total of lesser keys, m, here */ + INT_TYPE2 key_rank = key_buff_ptr[k-1] + m; + int failed = 0; + + switch( class ){ + case 'S': + if( i <= 2 ) { + if( key_rank != gd->test_rank_array[i]+iteration ) + failed = 1; + else + gd->passed_verification++; + } else { + if( key_rank != gd->test_rank_array[i]-iteration ) + failed = 1; + else + gd->passed_verification++; + } + break; + case 'W': + if( i < 2 ){ + if( key_rank != gd->test_rank_array[i]+(iteration-2) ) + failed = 1; + else + gd->passed_verification++; + } else { + if( key_rank != gd->test_rank_array[i]-iteration ) + failed = 1; + else + gd->passed_verification++; + } + break; + case 'A': + if( i <= 2 ){ + if( key_rank != gd->test_rank_array[i]+(iteration-1) ) + failed = 1; + else + gd->passed_verification++; + } else { + if( key_rank != gd->test_rank_array[i]-(iteration-1) ) + failed = 1; + else + gd->passed_verification++; + } + break; + case 'B': + if( i == 1 || i == 2 || i == 4 ) { + if( key_rank != gd->test_rank_array[i]+iteration ) + failed = 1; + else + gd->passed_verification++; + } else { + if( key_rank != gd->test_rank_array[i]-iteration ) + failed = 1; + else + gd->passed_verification++; + } + break; + case 'C': + if( i <= 2 ){ + if( key_rank != gd->test_rank_array[i]+iteration ) + failed = 1; + else + gd->passed_verification++; + } else { + if( key_rank != gd->test_rank_array[i]-iteration ) + failed = 1; + else + gd->passed_verification++; + } + break; + case 'D': + if( i < 2 ) { + if( key_rank != gd->test_rank_array[i]+iteration ) + failed = 1; + else + gd->passed_verification++; + } else { + if( key_rank != gd->test_rank_array[i]-iteration ) + failed = 1; + else + gd->passed_verification++; + } + break; + } + if( failed == 1 ) + printf( "Failed partial verification: iteration %d, processor %d, test key %d\n", + iteration, gd->my_rank, (int)i ); + } + } + +/* Make copies of rank info for use by full_verify: these variables in rank are local; making them global slows down + * the code, probably since they cannot be made register by compiler */ + + if( iteration == MAX_ITERATIONS ) { + gd->key_buff_ptr_global = key_buff_ptr; + gd->total_local_keys = j; + gd->total_lesser_keys = 0; /* no longer set to 'm', see note above */ + } +} + +int main( int argc, char **argv ) +{ + int i, iteration, itemp; + double timecounter, maxtime; + + global_data* gd = malloc(sizeof(global_data)); +/* Initialize MPI */ + MPI_Init( &argc, &argv ); + MPI_Comm_rank( MPI_COMM_WORLD, &gd->my_rank ); + MPI_Comm_size( MPI_COMM_WORLD, &gd->comm_size ); + + get_info(argc, argv, &nprocs, &class); + check_info(IS, nprocs, class); +/* Initialize the verification arrays if a valid class */ + for( i=0; itest_index_array[i] = S_test_index_array[i]; + gd->test_rank_array[i] = S_test_rank_array[i]; + break; + case 'A': + total_keys_log2 = 23; + max_key_log_2 = 19; + num_bucket_log_2 = 10; + gd->test_index_array[i] = A_test_index_array[i]; + gd->test_rank_array[i] = A_test_rank_array[i]; + break; + case 'W': + total_keys_log2 = 20; + max_key_log_2 = 16; + num_bucket_log_2 = 10; + gd->test_index_array[i] = W_test_index_array[i]; + gd->test_rank_array[i] = W_test_rank_array[i]; + break; + case 'B': + total_keys_log2 = 25; + max_key_log_2 = 21; + num_bucket_log_2 = 10; + gd->test_index_array[i] = B_test_index_array[i]; + gd->test_rank_array[i] = B_test_rank_array[i]; + break; + case 'C': + total_keys_log2 = 27; + max_key_log_2 = 23; + num_bucket_log_2 = 10; + gd->test_index_array[i] = C_test_index_array[i]; + gd->test_rank_array[i] = C_test_rank_array[i]; + break; + case 'D': + total_keys_log2 = 29; + max_key_log_2 = 27; + num_bucket_log_2 = 10; + min_procs = 4; + gd->test_index_array[i] = D_test_index_array[i]; + gd->test_rank_array[i] = D_test_rank_array[i]; + break; + }; + + total_keys = (1 << total_keys_log2); + max_key = (1 << max_key_log_2); + num_buckets = (1 << num_bucket_log_2); + num_keys = (total_keys/nprocs*min_procs); + + /* On larger number of processors, since the keys are (roughly) gaussian distributed, the first and last processor + * sort keys in a large interval, requiring array sizes to be larger. Note that for large NUM_PROCS, num_keys is, + * however, a small number The required array size also depends on the bucket size used. The following values are + * validated for the 1024-bucket setup. */ + if (nprocs < 256) + size_of_buffers = 3*num_keys/2; + else if (nprocs < 512) + size_of_buffers = 5*num_keys/2; + else if (nprocs < 1024) + size_of_buffers = 4*num_keys/2; + else + size_of_buffers = 13*num_keys/2; + + gd->key_array = (INT_TYPE*)malloc(size_of_buffers*sizeof(INT_TYPE)); + gd->key_buff1 = (INT_TYPE*)malloc(size_of_buffers*sizeof(INT_TYPE)); + gd->key_buff2 = (INT_TYPE*)malloc(size_of_buffers*sizeof(INT_TYPE)); + gd->bucket_size = (INT_TYPE*)malloc((num_buckets+TEST_ARRAY_SIZE)*sizeof(INT_TYPE)); /* Top 5 elements for */ + gd->bucket_size_totals = (INT_TYPE*)malloc((num_buckets+TEST_ARRAY_SIZE)*sizeof(INT_TYPE)); /* part. ver. vals */ + gd->bucket_ptrs = (INT_TYPE*)malloc(num_buckets*sizeof(INT_TYPE)); + gd->process_bucket_distrib_ptr1 = (INT_TYPE*)malloc((num_buckets+TEST_ARRAY_SIZE)*sizeof(INT_TYPE)); + gd->process_bucket_distrib_ptr2 = (INT_TYPE*)malloc((num_buckets+TEST_ARRAY_SIZE)*sizeof(INT_TYPE)); +// int send_count[max_procs], recv_count[max_procs], +// send_displ[max_procs], recv_displ[max_procs]; + +/* Printout initial NPB info */ + if( gd->my_rank == 0 ){ + printf( "\n\n NAS Parallel Benchmarks 3.3 -- IS Benchmark\n\n" ); + printf( " Size: %ld (class %c)\n", (long)total_keys*min_procs, class); + printf( " Iterations: %d\n", MAX_ITERATIONS ); + printf( " Number of processes: %d\n",gd->comm_size ); + } + +/* Check that actual and compiled number of processors agree */ + if( gd->comm_size != nprocs) { + if( gd->my_rank == 0 ) + printf( "\n ERROR: compiled for %d processes\n" + " Number of active processes: %d\n" + " Exiting program!\n\n", nprocs, gd->comm_size ); + MPI_Finalize(); + exit( 1 ); + } + +/* Check to see whether total number of processes is within bounds. + This could in principle be checked in setparams.c, but it is more convenient to do it here */ + if( gd->comm_size < min_procs || gd->comm_size > max_procs){ + if( gd->my_rank == 0 ) + printf( "\n ERROR: number of processes %d not within range %d-%d" + "\n Exiting program!\n\n", gd->comm_size, min_procs, max_procs); + MPI_Finalize(); + exit( 1 ); + } + +/* Generate random number sequence and subsequent keys on all procs */ + create_seq(gd, find_my_seed( gd->my_rank, gd->comm_size, 4*(long)total_keys*min_procs, + 314159265.00, /* Random number gen seed */ + 1220703125.00 ), /* Random number gen mult */ + 1220703125.00 ); /* Random number gen mult */ + +/* Do one interation for free (i.e., untimed) to guarantee initialization of + all data and code pages and respective tables */ + rank(gd, 1 ); + +/* Start verification counter */ + gd->passed_verification = 0; + + if( gd->my_rank == 0 && class != 'S' ) printf( "\n iteration\n" ); + +/* Initialize timer */ + timer_clear(0); + +/* Start timer */ + timer_start(0); + + char smpi_category[100]; + snprintf (smpi_category, 100, "%d", gd->my_rank); + TRACE_smpi_set_category (smpi_category); + +/* This is the main iteration */ + for( iteration=1; iteration<=MAX_ITERATIONS; iteration++ ) { + if( gd->my_rank == 0 && class != 'S' ) printf( " %d\n", iteration ); + rank(gd, iteration ); + } + TRACE_smpi_set_category (NULL); + +/* Stop timer, obtain time for processors */ + timer_stop(0); + + timecounter = timer_read(0); + +/* End of timing, obtain maximum time of all processors */ + MPI_Reduce( &timecounter, &maxtime, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD ); + +/* This tests that keys are in sequence: sorting of last ranked key seq occurs here, but is an untimed operation */ + full_verify(gd); + +/* Obtain verification counter sum */ + itemp =gd->passed_verification; + MPI_Reduce( &itemp, &gd->passed_verification, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD ); + +/* The final printout */ + if( gd->my_rank == 0 ) { + if( gd->passed_verification != 5*MAX_ITERATIONS + gd->comm_size ) + gd->passed_verification = 0; + c_print_results("IS", class, (int)(total_keys), min_procs, 0, MAX_ITERATIONS, nprocs, gd->comm_size, maxtime, + ((double) (MAX_ITERATIONS)*total_keys*min_procs)/maxtime/1000000., "keys ranked", + gd->passed_verification); + } + + MPI_Finalize(); + free(gd); + + return 0; +} diff --git a/examples/smpi/NAS/nas_common.c b/examples/smpi/NAS/nas_common.c new file mode 100644 index 0000000000..51ac28f479 --- /dev/null +++ b/examples/smpi/NAS/nas_common.c @@ -0,0 +1,268 @@ +/* Copyright (c) 2016. The SimGrid Team. + * All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ +#include "nas_common.h" + +static double start[64], elapsed[64]; + +/* integer log base two. Return error is argument isn't a power of two or is less than or equal to zero */ +int ilog2(int i) +{ + int log2; + int exp2 = 1; + if (i <= 0) return(-1); + + for (log2 = 0; log2 < 20; log2++) { + if (exp2 == i) return(log2); + exp2 *= 2; + } + return(-1); +} + +/* get_info(): Get parameters from command line */ +void get_info(int argc, char *argv[], int *nprocsp, char *classp) +{ + if (argc < 3) { + printf("Usage: %s (%d) nprocs class\n", argv[0], argc); + exit(1); + } + + *nprocsp = atoi(argv[1]); + *classp = *argv[2]; +} + +/* check_info(): Make sure command line data is ok for this benchmark */ +void check_info(int type, int nprocs, char class) +{ + int logprocs; + + /* check number of processors */ + if (nprocs <= 0) { + printf("setparams: Number of processors must be greater than zero\n"); + exit(1); + } + switch(type) { + case IS: + logprocs = ilog2(nprocs); + if (logprocs < 0) { + printf("setparams: Number of processors must be a power of two (1,2,4,...) for this benchmark\n"); + exit(1); + } + break; + case EP: + case DT: + break; + default: + /* never should have gotten this far with a bad name */ + printf("setparams: (Internal Error) Benchmark type %d unknown to this program\n", type); + exit(1); + } + + /* check class */ + if (class != 'S' && class != 'W' && class != 'A' && class != 'B' && class != 'C' && class != 'D' && class != 'E') { + printf("setparams: Unknown benchmark class %c\n", class); + printf("setparams: Allowed classes are \"S\", \"W\", and \"A\" through \"E\"\n"); + exit(1); + } + + if (class == 'E' && (type == IS || type == DT)) { + printf("setparams: Benchmark class %c not defined for IS or DT\n", class); + exit(1); + } + + if (class == 'D' && type == IS && nprocs < 4) { + printf("setparams: IS class D size cannot be run on less than 4 processors\n"); + exit(1); + } +} + +void time_clear(double *onetimer) { + *onetimer = 0.0; +} + +void time_start(double *onetimer) { + *onetimer = MPI_Wtime(); +} + +void time_stop(int n,double *elapsed,double *start) { + elapsed[n] = MPI_Wtime()- start[n]; +} + +double time_read(int n, double *elapsed) { /* ok, useless, but jsut to keep function call */ + return(elapsed[n]); +} + +void timer_clear(int n) +{ + elapsed[n] = 0.0; +} + +void timer_start(int n) +{ + start[n] = MPI_Wtime(); +} + +void timer_stop(int n) +{ + elapsed[n] += MPI_Wtime() - start[n]; +} + +double timer_read(int n) +{ + return elapsed[n]; +} + +double vranlc(int n, double x, double a, double *y) +{ + int i; + long i246m1=0x00003FFFFFFFFFFF; + long LLx, Lx, La; + double d2m46; + +// This doesn't work, because the compiler does the calculation in 32 bits and overflows. No standard way (without +// f90 stuff) to specifythat the rhs should be done in 64 bit arithmetic. +// parameter(i246m1=2**46-1) + + d2m46=pow(0.5,46); + + Lx = (long)x; + La = (long)a; + //fprintf(stdout,("================== Vranlc ================"); + //fprintf(stdout,("Before Loop: Lx = " + Lx + ", La = " + La); + LLx = Lx; + for (i=0; i< n; i++) { + Lx = Lx*La & i246m1 ; + LLx = Lx; + y[i] = d2m46 * (double)LLx; + /* + if(i == 0) { + fprintf(stdout,("After loop 0:"); + fprintf(stdout,("Lx = " + Lx + ", La = " + La); + fprintf(stdout,("d2m46 = " + d2m46); + fprintf(stdout,("LLX(Lx) = " + LLX.doubleValue()); + fprintf(stdout,("Y[0]" + y[0]); + } + */ + } + + x = (double)LLx; + /* + fprintf(stdout,("Change: Lx = " + Lx); + fprintf(stdout,("=============End Vranlc ================"); + */ + return x; +} + +/* + * FUNCTION RANDLC (X, A) + * + * This routine returns a uniform pseudorandom double precision number in the + * range (0, 1) by using the linear congruential generator + * + * x_{k+1} = a x_k (mod 2^46) + * + * where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers + * before repeating. The argument A is the same as 'a' in the above formula, + * and X is the same as x_0. A and X must be odd double precision integers + * in the range (1, 2^46). The returned value RANDLC is normalized to be + * between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain + * the new seed x_1, so that subsequent calls to RANDLC using the same + * arguments will generate a continuous sequence. + * + * This routine should produce the same results on any computer with at least + * 48 mantissa bits in double precision floating point data. On Cray systems, + * double precision should be disabled. + * + * David H. Bailey October 26, 1990 + * + * IMPLICIT DOUBLE PRECISION (A-H, O-Z) + * SAVE KS, R23, R46, T23, T46 + * DATA KS/0/ + * + * If this is the first call to RANDLC, compute R23 = 2 ^ -23, R46 = 2 ^ -46, + * T23 = 2 ^ 23, and T46 = 2 ^ 46. These are computed in loops, rather than + * by merely using the ** operator, in order to insure that the results are + * exact on all systems. This code assumes that 0.5D0 is represented exactly. + */ +double randlc(double *X, double*A) +{ + static int KS=0; + static double R23, R46, T23, T46; + double T1, T2, T3, T4; + double A1, A2; + double X1, X2; + double Z; + int i, j; + + if (KS == 0) { + R23 = 1.0; + R46 = 1.0; + T23 = 1.0; + T46 = 1.0; + + for (i=1; i<=23; i++) { + R23 = 0.50 * R23; + T23 = 2.0 * T23; + } + for (i=1; i<=46; i++) { + R46 = 0.50 * R46; + T46 = 2.0 * T46; + } + KS = 1; + } + +/* Break A into two parts such that A = 2^23 * A1 + A2 and set X = N. */ + T1 = R23 * *A; + j = T1; + A1 = j; + A2 = *A - T23 * A1; + +/* Break X into two parts such that X = 2^23 * X1 + X2, compute + Z = A1 * X2 + A2 * X1 (mod 2^23), and then X = 2^23 * Z + A2 * X2 (mod 2^46). */ + T1 = R23 * *X; + j = T1; + X1 = j; + X2 = *X - T23 * X1; + T1 = A1 * X2 + A2 * X1; + + j = R23 * T1; + T2 = j; + Z = T1 - T23 * T2; + T3 = T23 * Z + A2 * X2; + j = R46 * T3; + T4 = j; + *X = T3 - T46 * T4; + return(R46 * *X); +} + +void c_print_results(const char *name, char class, int n1, int n2, int n3, int niter, int nprocs_compiled, + int nprocs_total, double t, double mops, const char *optype, int passed_verification) +{ + printf( "\n\n %s Benchmark Completed\n", name ); + printf( " Class = %c\n", class ); + + if( n3 == 0 ) { + long nn = n1; + if ( n2 != 0 ) nn *= n2; + printf( " Size = %12ld\n", nn ); /* as in IS */ + } else + printf( " Size = %3dx %3dx %3d\n", n1,n2,n3 ); + + printf( " Iterations = %12d\n", niter ); + printf( " Time in seconds = %12.2f\n", t ); + printf( " Total processes = %12d\n", nprocs_total ); + + if ( nprocs_compiled != 0 ) + printf( " Compiled procs = %12d\n", nprocs_compiled ); + + printf( " Mop/s total = %12.2f\n", mops ); + printf( " Mop/s/process = %12.2f\n", mops/((float) nprocs_total) ); + printf( " Operation type = %24s\n", optype); + + if( passed_verification ) + printf( " Verification = SUCCESSFUL\n" ); + else + printf( " Verification = UNSUCCESSFUL\n" ); +} diff --git a/examples/smpi/NAS/nas_common.h b/examples/smpi/NAS/nas_common.h new file mode 100644 index 0000000000..1990c0d1cc --- /dev/null +++ b/examples/smpi/NAS/nas_common.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2016. The SimGrid Team. + * All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ +#ifndef NAS_COMMON_H +#define NAS_COMMON_H + +#include +#include +#include +#include "mpi.h" + +enum benchmark_types {IS, DT, EP}; + +int ilog2(int i); +void timer_clear(int n); +void timer_start(int n); +void timer_stop(int n); +double timer_read(int n); + +void time_clear(double *onetimer); +void time_start(double *onetimer); +void time_stop(int n,double *elapsed,double *start); +double time_read(int n, double *elapsed); + +double vranlc(int n, double x, double a, double *y); +double randlc(double *X, double*A); + +void c_print_results(const char *name, char class, int n1, int n2, int n3, int niter, int nprocs_compiled, + int nprocs_total, double t, double mops, const char *optype, int passed_verification); + +void get_info(int argc, char *argv[], int *nprocsp, char *classp); +void check_info(int type, int nprocs, char class); + +#endif diff --git a/examples/smpi/NAS/sys/Makefile b/examples/smpi/NAS/sys/Makefile deleted file mode 100644 index 35b4166abf..0000000000 --- a/examples/smpi/NAS/sys/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -include ../config/make.def -all: setparams - -# setparams creates an npbparam.h file for each benchmark configuration. -# npbparams.h also contains info about how a benchmark was compiled and linked - -setparams: setparams.c ../config/make.def - $(CC) ${CONVERTFLAG} -o setparams setparams.c - -clean: - -rm -f setparams setparams.h npbparams.h *~ *.o - diff --git a/examples/smpi/NAS/sys/README b/examples/smpi/NAS/sys/README deleted file mode 100644 index 0a62dd15af..0000000000 --- a/examples/smpi/NAS/sys/README +++ /dev/null @@ -1,38 +0,0 @@ -This directory contains utilities and files used by the -build process. You should not need to change anything -in this directory. - -Original Files --------------- -setparams.c: - Source for the setparams program. This program is used internally - in the build process to create the file "npbparams.h" for each - benchmark. npbparams.h contains Fortran or C parameters to build a - benchmark for a specific class and number of nodes. The setparams - program is never run directly by a user. Its invocation syntax is - "setparams benchmark-name nprocs class". - It examines the file "npbparams.h" in the current directory. If - the specified parameters are the same as those in the npbparams.h - file, nothing it changed. If the file does not exist or corresponds - to a different class/number of nodes, it is (re)built. - One of the more complicated things in npbparams.h is that it - contains, in a Fortran string, the compiler flags used to build a - benchmark, so that a benchmark can print out how it was compiled. - -make.common - A makefile segment that is included in each individual benchmark - program makefile. It sets up some standard macros (COMPILE, etc) - and makes sure everything is configured correctly (npbparams.h) - -Makefile - Builds setparams - -README - This file. - -Created files -------------- - -setparams - See descriptions above - diff --git a/examples/smpi/NAS/sys/make.common b/examples/smpi/NAS/sys/make.common deleted file mode 100644 index 228036707a..0000000000 --- a/examples/smpi/NAS/sys/make.common +++ /dev/null @@ -1,45 +0,0 @@ -PROGRAM = $(BINDIR)/$(BENCHMARK).$(CLASS).$(NPROCS) -CCOMPILE = $(MPICC) -c $(CMPI_INC) $(CFLAGS) - -# Class "U" is used internally by the setparams program to mean -# "unknown". This means that if you don't specify CLASS= -# on the command line, you'll get an error. It would be nice -# to be able to avoid this, but we'd have to get information -# from the setparams back to the make program, which isn't easy. -CLASS=U -NPROCS=1 - -default:: ${PROGRAM} - -# This makes sure the configuration utility setparams -# is up to date. -# Note that this must be run every time, which is why the -# target does not exist and is not created. -# If you create a file called "config" you will break things. -config: - @cd ../sys; ${MAKE} all - ../sys/setparams ${BENCHMARK} ${NPROCS} ${CLASS} ${SUBTYPE} - -COMMON=../common -$${COMMON}/c_randdp.o: ${COMMON}/randdp.c - cd ${COMMON}; ${CCOMPILE} -o c_randdp.o randdp.c - -${COMMON}/c_print_results.o: ${COMMON}/c_print_results.c - cd ${COMMON}; ${CCOMPILE} c_print_results.c - -${COMMON}/c_timers.o: ${COMMON}/c_timers.c - cd ${COMMON}; ${CCOMPILE} c_timers.c - -# Normally setparams updates npbparams.h only if the settings (CLASS/NPROCS) -# have changed. However, we also want to update if the compile options -# may have changed (set in ../config/make.def). -npbparams.h: ../config/make.def - @ echo make.def modified. Rebuilding npbparams.h just in case - rm -f npbparams.h - ../sys/setparams ${BENCHMARK} ${NPROCS} ${CLASS} ${SUBTYPE} - -# So that "make benchmark-name" works -${BENCHMARK}: default -${BENCHMARKU}: default - - diff --git a/examples/smpi/NAS/sys/print_instructions b/examples/smpi/NAS/sys/print_instructions deleted file mode 100755 index 6a09f2a123..0000000000 --- a/examples/smpi/NAS/sys/print_instructions +++ /dev/null @@ -1,10 +0,0 @@ -echo '' -echo ' To make a NAS benchmark type ' -echo '' -echo ' make NPROCS= CLASS=' -echo '' -echo ' where is "ep", "dt", or "is" -echo ' is the number of processors' -echo ' is "S", "W", "A", "B", "C", or "D"' -echo '' - diff --git a/examples/smpi/NAS/sys/setparams.c b/examples/smpi/NAS/sys/setparams.c deleted file mode 100644 index a01dab58c7..0000000000 --- a/examples/smpi/NAS/sys/setparams.c +++ /dev/null @@ -1,597 +0,0 @@ -/* - * This utility configures a NPB to be built for a specific number - * of nodes and a specific class. It creates a file "npbparams.h" - * in the source directory. This file keeps state information about - * which size of benchmark is currently being built (so that nothing - * if unnecessarily rebuilt) and defines (through PARAMETER statements) - * the number of nodes and class for which a benchmark is being built. - - * The utility takes 3 arguments: - * setparams benchmark-name nprocs class - * benchmark-name is "ep", "dt", or "is" - * nprocs is the number of processors to run on - * class is the size of the benchmark - * These parameters are checked for the current benchmark. If they - * are invalid, this program prints a message and aborts. - * If the parameters are ok, the current npbsize.h (actually just - * the first line) is read in. If the new parameters are the same as - * the old, nothing is done, but an exit code is returned to force the - * user to specify (otherwise the make procedure succeeds but builds a - * binary of the wrong name). Otherwise the file is rewritten. - * Errors write a message (to stdout) and abort. - * - * This program makes use of two extra benchmark "classes" - * class "X" means an invalid specification. It is returned if - * there is an error parsing the config file. - * class "U" is an external specification meaning "unknown class" - * - * Unfortunately everything has to be case sensitive. This is - * because we can always convert lower to upper or v.v. but - * can't feed this information back to the makefile, so typing - * make CLASS=a and make CLASS=A will produce different binaries. - * - * - */ - -#include -#include -#include -#include -#include -#include - -/* - * This is the master version number for this set of NPB benchmarks. It is in an obscure place so people - * won't accidentally change it. - */ - -#define VERSION "3.3" - -/* controls verbose output from setparams */ -/* #define VERBOSE */ - -#define FILENAME "npbparams.h" -#define DESC_LINE "c NPROCS = %d CLASS = %c\n" -#define DEF_CLASS_LINE "#define CLASS '%c'\n" -#define DEF_NUM_PROCS_LINE "#define NUM_PROCS %d\n" -#define FINDENT " " -#define CONTINUE " > " - -void get_info(int argc, char *argv[], int *typep, int *nprocsp, char *classp, int* subtypep); -void check_info(int type, int nprocs, char class); -void read_info(int type, int *nprocsp, char *classp, int *subtypep); -void write_info(int type, int nprocs, char class, int subtype); -void write_ep_info_C(FILE *fp, int nprocs, char class); /* after C translation */ -void write_is_info(FILE *fp, int nprocs, char class); -void write_dt_info(FILE *fp, int nprocs, char class); -void write_compiler_info(int type, FILE *fp); -void check_line(char *line, char *label, char *val); -int check_include_line(char *line, char *filename); -void put_string(FILE *fp, char *name, char *val); -void put_def_string(FILE *fp, char *name, char *val); -void put_def_variable(FILE *fp, char *name, char *val); -int isqrt(int i); -int ilog2(int i); -int ipow2(int i); - -enum benchmark_types {IS, DT, EP}; - -int main(int argc, char *argv[]) -{ - int nprocs, nprocs_old, type; - char class, class_old; - int subtype = -1, old_subtype = -1; - - /* Get command line arguments. Make sure they're ok. */ - get_info(argc, argv, &type, &nprocs, &class, &subtype); - if (class != 'U') { -#ifdef VERBOSE - printf("setparams: For benchmark %s: number of processors = %d class = %c\n", - argv[1], nprocs, class); -#endif - check_info(type, nprocs, class); - } - - /* Get old information. */ - read_info(type, &nprocs_old, &class_old, &old_subtype); - if (class != 'U') { - if (class_old != 'X') { -#ifdef VERBOSE - printf("setparams: old settings: number of processors = %d class = %c\n", - nprocs_old, class_old); -#endif - } - } else { - printf("setparams:\n\ - *********************************************************************\n\ - * You must specify NPROCS and CLASS to build this benchmark *\n\ - * For example, to build a class A benchmark for 4 processors, type *\n\ - * make {benchmark-name} NPROCS=4 CLASS=A *\n\ - *********************************************************************\n\n"); - - if (class_old != 'X') { -#ifdef VERBOSE - printf("setparams: Previous settings were CLASS=%c NPROCS=%d\n", - class_old, nprocs_old); -#endif - } - exit(1); /* exit on class==U */ - } - - /* Write out new information if it's different. */ - if (nprocs != nprocs_old || class != class_old || subtype != old_subtype) { -#ifdef VERBOSE - printf("setparams: Writing %s\n", FILENAME); -#endif - write_info(type, nprocs, class, subtype); - } else { -#ifdef VERBOSE - printf("setparams: Settings unchanged. %s unmodified\n", FILENAME); -#endif - } - - return 0; -} - -/* get_info(): Get parameters from command line */ -void get_info(int argc, char *argv[], int *typep, int *nprocsp, char *classp, int *subtypep) -{ - if (argc < 4) { - printf("Usage: %s (%d) benchmark-name nprocs class\n", argv[0], argc); - exit(1); - } - - *nprocsp = atoi(argv[2]); - *classp = *argv[3]; - - if (!strcmp(argv[1], "is") || !strcmp(argv[1], "IS")) *typep = IS; - else if (!strcmp(argv[1], "dt") || !strcmp(argv[1], "DT")) *typep = DT; - else if (!strcmp(argv[1], "ep") || !strcmp(argv[1], "EP")) *typep = EP; - else { - printf("setparams: Error: unknown benchmark type %s\n", argv[1]); - exit(1); - } -} - -/* - * check_info(): Make sure command line data is ok for this benchmark - */ - -void check_info(int type, int nprocs, char class) -{ - int rootprocs, logprocs; - - /* check number of processors */ - if (nprocs <= 0) { - printf("setparams: Number of processors must be greater than zero\n"); - exit(1); - } - switch(type) { - case IS: - logprocs = ilog2(nprocs); - if (logprocs < 0) { - printf("setparams: Number of processors must be a power of two (1,2,4,...) for this benchmark\n"); - exit(1); - } - break; - - case EP: - case DT: - break; - - default: - /* never should have gotten this far with a bad name */ - printf("setparams: (Internal Error) Benchmark type %d unknown to this program\n", type); - exit(1); - } - - /* check class */ - if (class != 'S' && - class != 'W' && - class != 'A' && - class != 'B' && - class != 'C' && - class != 'D' && - class != 'E') { - printf("setparams: Unknown benchmark class %c\n", class); - printf("setparams: Allowed classes are \"S\", \"W\", and \"A\" through \"E\"\n"); - exit(1); - } - - if (class == 'E' && (type == IS || type == DT)) { - printf("setparams: Benchmark class %c not defined for IS or DT\n", class); - exit(1); - } - - if (class == 'D' && type == IS && nprocs < 4) { - printf("setparams: IS class D size cannot be run on less than 4 processors\n"); - exit(1); - } -} - -/* - * read_info(): Read previous information from file. - * Not an error if file doesn't exist, because this may be the first time we're running. - * Assumes the first two lines of the file is in a special format that we understand (since we wrote it). - */ - -void read_info(int type, int *nprocsp, char *classp, int *subtypep) -{ - int nread = 0; - FILE *fp; - fp = fopen(FILENAME, "r"); - if (fp == NULL) { -#ifdef VERBOSE - printf("setparams: INFO: configuration file %s does not exist (yet)\n", FILENAME); -#endif - goto abort; - } - - /* first two lines of file contains info */ - nread = fscanf(fp, DEF_CLASS_LINE, classp); - nread += fscanf(fp, DEF_NUM_PROCS_LINE, nprocsp); - if (nread != 2) { - printf("setparams: Error line %d parsing config file %s. Ignoring previous settings\n", __LINE__,FILENAME); - goto abort; - } - - fclose(fp); - return; - - abort: - *nprocsp = -1; - *classp = 'X'; - *subtypep = -1; - return; -} - -/* - * write_info(): Write new information to config file. - * First line is in a special format so we can read - * it in again. Then comes a warning. The rest is all - * specific to a particular benchmark. - */ - -void write_info(int type, int nprocs, char class, int subtype) -{ - FILE *fp; - char *BT_TYPES[] = {"NONE", "FULL", "SIMPLE", "EPIO", "FORTRAN"}; - - fp = fopen(FILENAME, "w"); - if (fp == NULL) { - printf("setparams: Can't open file %s for writing\n", FILENAME); - exit(1); - } - - fprintf(fp, DEF_CLASS_LINE, class); - fprintf(fp, DEF_NUM_PROCS_LINE, nprocs); - fprintf(fp, "\ -/*\n\ - This file is generated automatically by the setparams utility.\n\ - It sets the number of processors and the class of the NPB\n\ - in this directory. Do not modify it by hand. */\n\ - \n"); - - /* Now do benchmark-specific stuff */ - switch(type) { - case IS: - write_is_info(fp, nprocs, class); - break; - case DT: - write_dt_info(fp, nprocs, class); - break; - case EP: - write_ep_info_C(fp, nprocs, class); - break; - default: - printf("setparams: (Internal error): Unknown benchmark type %d\n", type); - exit(1); - } - write_compiler_info(type, fp); - fclose(fp); - return; -} - -/* write_dt_info(): Write DT specific info to config file */ - -void write_dt_info(FILE *fp, int nprocs, char class) -{ - int num_samples,deviation,num_sources; - if (class == 'S') { num_samples=1728; deviation=128; num_sources=4; } - else if (class == 'W') { num_samples=1728*8; deviation=128*2; num_sources=4*2; } - else if (class == 'A') { num_samples=1728*64; deviation=128*4; num_sources=4*4; } - else if (class == 'B') { num_samples=1728*512; deviation=128*8; num_sources=4*8; } - else if (class == 'C') { num_samples=1728*4096; deviation=128*16; num_sources=4*16; } - else if (class == 'D') { num_samples=1728*4096*8; deviation=128*32; num_sources=4*32; } - else { - printf("setparams: Internal error: invalid class type %c\n", class); - exit(1); - } - fprintf(fp, "#define NUM_SAMPLES %d\n", num_samples); - fprintf(fp, "#define STD_DEVIATION %d\n", deviation); - fprintf(fp, "#define NUM_SOURCES %d\n", num_sources); -} - -/* write_is_info(): Write IS specific info to config file */ -void write_is_info(FILE *fp, int nprocs, char class) -{ - if( class != 'S' && class != 'W' && class != 'A' && class != 'B' && class != 'C' && class != 'D' ) - { - printf("setparams: Internal error: invalid class type %c\n", class); - exit(1); - } -} - -/* write_ep_info_C(): Write EP specific info to config file */ -void write_ep_info_C(FILE *fp, int nprocs, char class) -{ - /* easiest way (given the way the benchmark is written) is to specify log of number of grid points in each - * direction m1, m2, m3. nt is the number of iterations - */ - int m; - if (class == 'S') { m = 24; } - else if (class == 'W') { m = 25; } - else if (class == 'A') { m = 28; } - else if (class == 'B') { m = 30; } - else if (class == 'C') { m = 32; } - else if (class == 'D') { m = 36; } - else if (class == 'E') { m = 40; } - else { - printf("setparams: Internal error: invalid class type %c\n", class); - exit(1); - } - - /* number of processors given by "npm" */ - fprintf(fp, "%schar *_class=\"%c\";\n",FINDENT,class); - fprintf(fp, "%sint m=%d;\n", FINDENT,m); - fprintf(fp, "%sint npm=%d;\n", FINDENT,nprocs); -} - -/* - * This is a gross hack to allow the benchmarks to print out how they were compiled. Various other ways - * of doing this have been tried and they all fail on some machine - due to a broken "make" program, or - * F77 limitations, of whatever. Hopefully this will always work because it uses very portable C. Unfortunately - * it relies on parsing the make.def file - YUK. - * If your machine doesn't have or , happy hacking! - */ - -#define VERBOSE -#define LL 400 -#include -#define DEFFILE "../config/make.def" -#define DEFAULT_MESSAGE "(none)" -FILE *deffile; -void write_compiler_info(int type, FILE *fp) -{ - char line[LL]; - char compiletime[LL], randfile[LL]; - char mpicc[LL], cflags[LL], clink[LL], clinkflags[LL], - cmpi_lib[LL], cmpi_inc[LL]; - struct tm *tmp; - time_t t; - deffile = fopen(DEFFILE, "r"); - if (deffile == NULL) { - printf("\n\ -setparams: File %s doesn't exist. To build the NAS benchmarks\n\ - you need to create is according to the instructions\n\ - in the README in the main directory and comments in \n\ - the file config/make.def.template\n", DEFFILE); - exit(1); - } - strcpy(randfile, DEFAULT_MESSAGE); - strcpy(mpicc, DEFAULT_MESSAGE); - strcpy(cflags, DEFAULT_MESSAGE); - strcpy(clink, DEFAULT_MESSAGE); - strcpy(clinkflags, DEFAULT_MESSAGE); - strcpy(cmpi_lib, DEFAULT_MESSAGE); - strcpy(cmpi_inc, DEFAULT_MESSAGE); - - while (fgets(line, LL, deffile) != NULL) { - if (*line == '#') continue; - /* yes, this is inefficient. but it's simple! */ - check_line(line, "RAND", randfile); - check_line(line, "MPICC", mpicc); - check_line(line, "CFLAGS", cflags); - check_line(line, "CLINK", clink); - check_line(line, "CLINKFLAGS", clinkflags); - check_line(line, "CMPI_LIB", cmpi_lib); - check_line(line, "CMPI_INC", cmpi_inc); - } - - (void) time(&t); - tmp = localtime(&t); - (void) strftime(compiletime, (size_t)LL, "%d %b %Y", tmp); - - put_def_string(fp, "COMPILETIME", compiletime); - put_def_string(fp, "NPBVERSION", VERSION); - put_def_string(fp, "MPICC", mpicc); - put_def_string(fp, "CFLAGS", cflags); - put_def_string(fp, "CLINK", clink); - put_def_string(fp, "CLINKFLAGS", clinkflags); - put_def_string(fp, "CMPI_LIB", cmpi_lib); - put_def_string(fp, "CMPI_INC", cmpi_inc); -} - -void check_line(char *line, char *label, char *val) -{ - char *original_line; - int n; - original_line = line; - /* compare beginning of line and label */ - while (*label != '\0' && *line == *label) { - line++; label++; - } - /* if *label is not EOS, we must have had a mismatch */ - if (*label != '\0') return; - /* if *line is not a space, actual label is longer than test label */ - if (!isspace(*line) && *line != '=') return ; - /* skip over white space */ - while (isspace(*line)) line++; - /* next char should be '=' */ - if (*line != '=') return; - /* skip over white space */ - while (isspace(*++line)); - /* if EOS, nothing was specified */ - if (*line == '\0') return; - /* finally we've come to the value */ - strcpy(val, line); - /* chop off the newline at the end */ - n = strlen(val)-1; - if (n >= 0 && val[n] == '\n') - val[n--] = '\0'; - if (n >= 0 && val[n] == '\r') - val[n--] = '\0'; - /* treat continuation */ - while (val[n] == '\\' && fgets(original_line, LL, deffile)) { - line = original_line; - while (isspace(*line)) line++; - if (isspace(*original_line)) val[n++] = ' '; - while (*line && *line != '\n' && *line != '\r' && n < LL-1) - val[n++] = *line++; - val[n] = '\0'; - n--; - } -/* if (val[strlen(val) - 1] == '\\') { - printf("\n\ -setparams: Error in file make.def. Because of the way in which\n\ - command line arguments are incorporated into the\n\ - executable benchmark, you can't have any continued\n\ - lines in the file make.def, that is, lines ending\n\ - with the character \"\\\". Although it may be ugly, \n\ - you should be able to reformat without continuation\n\ - lines. The offending line is\n\ - %s\n", original_line); - exit(1); - } */ -} - -int check_include_line(char *line, char *filename) -{ - char *include_string = "include"; - /* compare beginning of line and "include" */ - while (*include_string != '\0' && *line == *include_string) { - line++; include_string++; - } - /* if *include_string is not EOS, we must have had a mismatch */ - if (*include_string != '\0') return(0); - /* if *line is not a space, first word is not "include" */ - if (!isspace(*line)) return(0); - /* skip over white space */ - while (isspace(*++line)); - /* if EOS, nothing was specified */ - if (*line == '\0') return(0); - /* next keyword should be name of include file in *filename */ - while (*filename != '\0' && *line == *filename) { - line++; filename++; - } - if (*filename != '\0' || - (*line != ' ' && *line != '\0' && *line !='\n')) return(0); - else return(1); -} - -#define MAXL 46 -void put_string(FILE *fp, char *name, char *val) -{ - int len; - len = strlen(val); - if (len > MAXL) { - val[MAXL] = '\0'; - val[MAXL-1] = '.'; - val[MAXL-2] = '.'; - val[MAXL-3] = '.'; - len = MAXL; - } - fprintf(fp, "%scharacter*%d %s\n", FINDENT, len, name); - fprintf(fp, "%sparameter (%s=\'%s\')\n", FINDENT, name, val); -} - -/* NOTE: is the ... stuff necessary in C? */ -void put_def_string(FILE *fp, char *name, char *val) -{ - int len; - len = strlen(val); - if (len > MAXL) { - val[MAXL] = '\0'; - val[MAXL-1] = '.'; - val[MAXL-2] = '.'; - val[MAXL-3] = '.'; - len = MAXL; - } - fprintf(fp, "#define %s \"%s\"\n", name, val); -} - -void put_def_variable(FILE *fp, char *name, char *val) -{ - int len; - len = strlen(val); - if (len > MAXL) { - val[MAXL] = '\0'; - val[MAXL-1] = '.'; - val[MAXL-2] = '.'; - val[MAXL-3] = '.'; - len = MAXL; - } - fprintf(fp, "#define %s %s\n", name, val); -} - -#if 0 -/* this version allows arbitrarily long lines but some compilers don't like that and they're rarely useful */ - -#define LINELEN 65 -void put_string(FILE *fp, char *name, char *val) -{ - int len, nlines, pos, i; - char line[100]; - len = strlen(val); - nlines = len/LINELEN; - if (nlines*LINELEN < len) nlines++; - fprintf(fp, "%scharacter*%d %s\n", FINDENT, nlines*LINELEN, name); - fprintf(fp, "%sparameter (%s = \n", FINDENT, name); - for (i = 0; i < nlines; i++) { - pos = i*LINELEN; - if (i == 0) fprintf(fp, "%s\'", CONTINUE); - else fprintf(fp, "%s", CONTINUE); - /* number should be same as LINELEN */ - fprintf(fp, "%.65s", val+pos); - if (i == nlines-1) fprintf(fp, "\')\n"); - else fprintf(fp, "\n"); - } -} -#endif - - -/* integer square root. Return error if argument isn't a perfect square or is less than or equal to zero */ -int isqrt(int i) -{ - int root, square; - if (i <= 0) return(-1); - square = 0; - for (root = 1; square <= i; root++) { - square = root*root; - if (square == i) return(root); - } - return(-1); -} - -/* integer log base two. Return error is argument isn't a power of two or is less than or equal to zero */ -int ilog2(int i) -{ - int log2; - int exp2 = 1; - if (i <= 0) return(-1); - - for (log2 = 0; log2 < 20; log2++) { - if (exp2 == i) return(log2); - exp2 *= 2; - } - return(-1); -} - -int ipow2(int i) -{ - int pow2 = 1; - if (i < 0) return(-1); - if (i == 0) return(1); - while(i--) pow2 *= 2; - return(pow2); -} diff --git a/tools/cmake/DefinePackages.cmake b/tools/cmake/DefinePackages.cmake index e7033d9fcb..ef11ae1ad3 100644 --- a/tools/cmake/DefinePackages.cmake +++ b/tools/cmake/DefinePackages.cmake @@ -984,6 +984,7 @@ set(CMAKEFILES_TXT examples/simdag/scheduling/CMakeLists.txt examples/smpi/CMakeLists.txt + examples/smpi/NAS/CMakeLists.txt examples/smpi/smpi_msg_masterslave/CMakeLists.txt examples/smpi/replay_multiple/CMakeLists.txt examples/smpi/energy/CMakeLists.txt diff --git a/tools/internal/check_dist_archive.exclude b/tools/internal/check_dist_archive.exclude index 8cecb9f927..7070e3bc58 100644 --- a/tools/internal/check_dist_archive.exclude +++ b/tools/internal/check_dist_archive.exclude @@ -23,8 +23,6 @@ + contrib/.* -+ examples/smpi/NAS/.* - + src/simix/README_attempt_without_stack + src/simix/simix_network\.tla