1 /*************************************************************************
3 * N A S P A R A L L E L B E N C H M A R K S 3.3 *
7 *************************************************************************
9 * This benchmark is part of the NAS Parallel Benchmark 3.3 suite. *
11 * Permission to use, copy, distribute and modify this software *
12 * for any purpose with or without fee is hereby granted. We *
13 * request, however, that all derived work reference the NAS *
14 * Parallel Benchmarks 3.3. This software is provided "as is" *
15 * without express or implied warranty. *
17 * Information on NPB 3.3, including the technical report, the *
18 * original specifications, source code, results and information *
19 * on how to submit new results, is available at: *
21 * http: www.nas.nasa.gov/Software/NPB *
23 * Send comments or suggestions to npb@nas.nasa.gov *
24 * Send bug reports to npb-bugs@nas.nasa.gov *
26 * NAS Parallel Benchmarks Group *
27 * NASA Ames Research Center *
29 * Moffett Field, CA 94035-1000 *
31 * E-mail: npb@nas.nasa.gov *
32 * Fax: (650) 604-3957 *
34 *************************************************************************
36 * Author: M. Frumkin * *
38 *************************************************************************/
45 #include "npbparams.h"
47 #include "simgrid/instr.h" //TRACE_
54 //int passed_verification;
55 extern double randlc( double *X, double *A );
57 void c_print_results( char *name,
68 int passed_verification,
78 void timer_clear( int n );
79 void timer_start( int n );
80 void timer_stop( int n );
81 double timer_read( int n );
82 int timer_on=0,timers_tot=64;
84 int verify(char *bmname,double rnm2){
85 double verify_value=0.0;
86 double epsilon=1.0E-8;
91 if(strstr(bmname,"BH")){
92 verify_value=30892725.0;
93 }else if(strstr(bmname,"WH")){
94 verify_value=67349758.0;
95 }else if(strstr(bmname,"SH")){
96 verify_value=58875767.0;
98 fprintf(stderr,"No such benchmark as %s.\n",bmname);
102 if(strstr(bmname,"BH")){
103 verify_value = 4102461.0;
104 }else if(strstr(bmname,"WH")){
105 verify_value = 204280762.0;
106 }else if(strstr(bmname,"SH")){
107 verify_value = 186944764.0;
109 fprintf(stderr,"No such benchmark as %s.\n",bmname);
113 if(strstr(bmname,"BH")){
114 verify_value = 17809491.0;
115 }else if(strstr(bmname,"WH")){
116 verify_value = 1289925229.0;
117 }else if(strstr(bmname,"SH")){
118 verify_value = 610856482.0;
120 fprintf(stderr,"No such benchmark as %s.\n",bmname);
124 if(strstr(bmname,"BH")){
125 verify_value = 4317114.0;
126 }else if(strstr(bmname,"WH")){
127 verify_value = 7877279917.0;
128 }else if(strstr(bmname,"SH")){
129 verify_value = 1836863082.0;
131 fprintf(stderr,"No such benchmark as %s.\n",bmname);
135 if(strstr(bmname,"BH")){
137 }else if(strstr(bmname,"WH")){
139 }else if(strstr(bmname,"SH")){
142 fprintf(stderr,"No such benchmark as %s.\n",bmname);
146 if(strstr(bmname,"BH")){
148 }else if(strstr(bmname,"WH")){
150 }else if(strstr(bmname,"SH")){
153 fprintf(stderr,"No such benchmark as %s.\n",bmname);
157 fprintf(stderr,"No such class as %c.\n",cls);
159 fprintf(stderr," %s L2 Norm = %f\n",bmname,rnm2);
161 fprintf(stderr," No verification was performed.\n");
162 }else if( rnm2 - verify_value < epsilon &&
163 rnm2 - verify_value > -epsilon) { /* abs here does not work on ALTIX */
165 fprintf(stderr," Deviation = %f\n",(rnm2 - verify_value));
168 fprintf(stderr," The correct verification value = %f\n",verify_value);
169 fprintf(stderr," Got value = %f\n",rnm2);
177 int ipowMod(int a,long long int n,int md){
180 fprintf(stderr,"ipowMod: exponent must be nonnegative exp=%lld\n",n);
185 fprintf(stderr,"ipowMod: module must be positive mod=%d",md);
206 DGraph *buildSH(char cls){
208 Nodes of the graph must be topologically sorted
209 to avoid MPI deadlock.
212 int numSources=NUM_SOURCES; /* must be power of 2 */
213 int numOfLayers=0,tmpS=numSources>>1;
214 int firstLayerNode=0;
217 int mask=0x0,ndid=0,ndoff=0;
221 sprintf(nm,"DT_SH.%c",cls);
228 for(i=0;i<numSources;i++){
229 sprintf(nm,"Source.%d",i);
233 for(j=0;j<numOfLayers;j++){
235 for(i=0;i<numSources;i++){
236 sprintf(nm,"Comparator.%d",(i+j*firstLayerNode));
240 ndid=firstLayerNode+ndoff;
241 ar=newArc(dg->node[ndid],nd);
244 ndid=firstLayerNode+ndoff;
245 ar=newArc(dg->node[ndid],nd);
248 firstLayerNode+=numSources;
250 mask=0x00000001<<numOfLayers;
251 for(i=0;i<numSources;i++){
252 sprintf(nm,"Sink.%d",i);
256 ndid=firstLayerNode+ndoff;
257 ar=newArc(dg->node[ndid],nd);
260 ndid=firstLayerNode+ndoff;
261 ar=newArc(dg->node[ndid],nd);
266 DGraph *buildWH(char cls){
268 Nodes of the graph must be topologically sorted
269 to avoid MPI deadlock.
272 int numSources=NUM_SOURCES,maxInDeg=4;
273 int numLayerNodes=numSources,firstLayerNode=0;
274 int totComparators=0;
275 int numPrevLayerNodes=numLayerNodes;
278 DGNode *nd=NULL,*source=NULL,*tmp=NULL,*snd=NULL;
282 sprintf(nm,"DT_WH.%c",cls);
285 for(i=0;i<numSources;i++){
286 sprintf(nm,"Sink.%d",i);
291 numPrevLayerNodes=numLayerNodes;
292 while(numLayerNodes>maxInDeg){
293 numLayerNodes=numLayerNodes/maxInDeg;
294 if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
295 for(i=0;i<numLayerNodes;i++){
296 sprintf(nm,"Comparator.%d",totComparators);
299 id=AttachNode(dg,nd);
300 for(j=0;j<maxInDeg;j++){
302 if(sid>=numPrevLayerNodes) break;
303 snd=dg->node[firstLayerNode+sid];
304 ar=newArc(dg->node[id],snd);
308 firstLayerNode+=numPrevLayerNodes;
309 numPrevLayerNodes=numLayerNodes;
311 source=newNode("Source");
312 AttachNode(dg,source);
313 for(i=0;i<numPrevLayerNodes;i++){
314 nd=dg->node[firstLayerNode+i];
315 ar=newArc(source,nd);
319 for(i=0;i<dg->numNodes/2;i++){ /* Topological sorting */
321 dg->node[i]=dg->node[dg->numNodes-1-i];
323 dg->node[dg->numNodes-1-i]=tmp;
324 dg->node[dg->numNodes-1-i]->id=dg->numNodes-1-i;
328 DGraph *buildBH(char cls){
330 Nodes of the graph must be topologically sorted
331 to avoid MPI deadlock.
334 int numSources=NUM_SOURCES,maxInDeg=4;
335 int numLayerNodes=numSources,firstLayerNode=0;
337 DGNode *nd=NULL, *snd=NULL, *sink=NULL;
339 int totComparators=0;
340 int numPrevLayerNodes=numLayerNodes;
344 sprintf(nm,"DT_BH.%c",cls);
347 for(i=0;i<numSources;i++){
348 sprintf(nm,"Source.%d",i);
352 while(numLayerNodes>maxInDeg){
353 numLayerNodes=numLayerNodes/maxInDeg;
354 if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
355 for(i=0;i<numLayerNodes;i++){
356 sprintf(nm,"Comparator.%d",totComparators);
359 id=AttachNode(dg,nd);
360 for(j=0;j<maxInDeg;j++){
362 if(sid>=numPrevLayerNodes) break;
363 snd=dg->node[firstLayerNode+sid];
364 ar=newArc(snd,dg->node[id]);
368 firstLayerNode+=numPrevLayerNodes;
369 numPrevLayerNodes=numLayerNodes;
371 sink=newNode("Sink");
373 for(i=0;i<numPrevLayerNodes;i++){
374 nd=dg->node[firstLayerNode+i];
385 Arr *newArr(int len){
386 Arr *arr=(Arr *)SMPI_SHARED_MALLOC(sizeof(Arr));
388 arr->val=(double *)SMPI_SHARED_MALLOC(len*sizeof(double));
391 void arrShow(Arr* a){
392 if(!a) fprintf(stderr,"-- NULL array\n");
394 fprintf(stderr,"-- length=%d\n",a->len);
397 double CheckVal(Arr *feat){
400 for(i=0;i<feat->len;i++){
401 csum+=feat->val[i]*feat->val[i]/feat->len; /* The truncation does not work since
402 result will be 0 for large len */
406 int GetFNumDPar(int* mean, int* stdev){
408 *stdev=STD_DEVIATION;
411 int GetFeatureNum(char *mbname,int id){
412 double tran=314159265.0;
414 double denom=randlc(&tran,&A);
416 int mean=NUM_SAMPLES,stdev=128;
418 GetFNumDPar(&mean,&stdev);
419 rtfs=ipowMod((int)(1/denom)*(int)cval,(long long int) (2*id+1),2*stdev);
420 if(rtfs<0) rtfs=-rtfs;
424 Arr* RandomFeatures(char *bmname,int fdim,int id){
425 int len=GetFeatureNum(bmname,id)*fdim;
426 Arr* feat=newArr(len);
427 int nxg=2,nyg=2,nzg=2,nfg=5;
428 int nx=421,ny=419,nz=1427,nf=3527;
429 long long int expon=(len*(id+1))%3141592;
430 int seedx=ipowMod(nxg,expon,nx),
431 seedy=ipowMod(nyg,expon,ny),
432 seedz=ipowMod(nzg,expon,nz),
433 seedf=ipowMod(nfg,expon,nf);
439 for(i=0;i<len;i+=fdim){
440 seedx=(seedx*nxg)%nx;
441 seedy=(seedy*nyg)%ny;
442 seedz=(seedz*nzg)%nz;
443 seedf=(seedf*nfg)%nf;
445 feat->val[i+1]=seedy;
446 feat->val[i+2]=seedz;
447 feat->val[i+3]=seedf;
451 fprintf(stderr,"** RandomFeatures time in node %d = %f\n",id,timer_read(id+1));
455 void Resample(Arr *a,int blen){
456 long long int i=0,j=0,jlo=0,jhi=0;
458 double *nval=(double *)SMPI_SHARED_MALLOC(blen*sizeof(double));
460 for(i=0;i<blen;i++) nval[i]=0.0;
461 for(i=1;i<a->len-1;i++){
462 jlo=(int)(0.5*(2*i-1)*(blen/a->len));
463 jhi=(int)(0.5*(2*i+1)*(blen/a->len));
465 avval=a->val[i]/(jhi-jlo+1);
466 for(j=jlo;j<=jhi;j++){
471 nval[blen-1]=a->val[a->len-1];
472 SMPI_SHARED_FREE(a->val);
477 Arr* WindowFilter(Arr *a, Arr* b,int w){
479 double rms0=0.0,rms1=0.0,rmsm1=0.0;
480 double weight=((double) (w+1))/(w+2);
487 if(a->len<b->len) Resample(a,b->len);
488 if(a->len>b->len) Resample(b,a->len);
489 for(i=fielddim;i<a->len-fielddim;i+=fielddim){
490 rms0=(a->val[i]-b->val[i])*(a->val[i]-b->val[i])
491 +(a->val[i+1]-b->val[i+1])*(a->val[i+1]-b->val[i+1])
492 +(a->val[i+2]-b->val[i+2])*(a->val[i+2]-b->val[i+2])
493 +(a->val[i+3]-b->val[i+3])*(a->val[i+3]-b->val[i+3]);
495 rms1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
496 +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
497 +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
498 +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
500 rmsm1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
501 +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
502 +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
503 +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
512 a->val[i]=weight*b->val[i];
513 a->val[i+1]=weight*b->val[i+1];
514 a->val[i+2]=weight*b->val[i+2];
515 a->val[i+3]=weight*b->val[i+3];
518 a->val[i]=weight*b->val[j];
519 a->val[i+1]=weight*b->val[j+1];
520 a->val[i+2]=weight*b->val[j+2];
521 a->val[i+3]=weight*b->val[j+3];
522 }else { /*if(k==-1)*/
524 a->val[i]=weight*b->val[j];
525 a->val[i+1]=weight*b->val[j+1];
526 a->val[i+2]=weight*b->val[j+2];
527 a->val[i+3]=weight*b->val[j+3];
532 fprintf(stderr,"** WindowFilter time in node %d = %f\n",(w-1),timer_read(w));
537 int SendResults(DGraph *dg,DGNode *nd,Arr *feat){
542 TRACE_smpi_set_category ("SendResults");
543 for(i=0;i<nd->outDegree;i++){
545 if(ar->tail!=nd) continue;
548 if(head->address!=nd->address){
549 MPI_Send(&feat->len,1,MPI_INT,head->address,tag,MPI_COMM_WORLD);
550 MPI_Send(feat->val,feat->len,MPI_DOUBLE,head->address,tag,MPI_COMM_WORLD);
553 TRACE_smpi_set_category (NULL);
556 Arr* CombineStreams(DGraph *dg,DGNode *nd){
557 Arr *resfeat=newArr(NUM_SAMPLES*fielddim);
562 Arr *feat=NULL,*featp=NULL;
564 if(nd->inDegree==0) return NULL;
565 for(i=0;i<nd->inDegree;i++){
567 if(ar->head!=nd) continue;
569 if(tail->address!=nd->address){
572 MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
574 MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
575 resfeat=WindowFilter(resfeat,feat,nd->id);
576 SMPI_SHARED_FREE(feat);
578 featp=(Arr *)tail->feat;
579 feat=newArr(featp->len);
580 memcpy(feat->val,featp->val,featp->len*sizeof(double));
581 resfeat=WindowFilter(resfeat,feat,nd->id);
582 SMPI_SHARED_FREE(feat);
585 for(i=0;i<resfeat->len;i++) resfeat->val[i]=((int)resfeat->val[i])/nd->inDegree;
589 double Reduce(Arr *a,int w){
595 retv=(int)(w*CheckVal(a));/* The casting needed for node
596 and array dependent verifcation */
599 fprintf(stderr,"** Reduce time in node %d = %f\n",(w-1),timer_read(w));
604 double ReduceStreams(DGraph *dg,DGNode *nd){
612 TRACE_smpi_set_category ("ReduceStreams");
614 for(i=0;i<nd->inDegree;i++){
616 if(ar->head!=nd) continue;
618 if(tail->address!=nd->address){
622 MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
624 MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
625 csum+=Reduce(feat,(nd->id+1));
626 SMPI_SHARED_FREE(feat);
628 csum+=Reduce(tail->feat,(nd->id+1));
631 if(nd->inDegree>0)csum=(((long long int)csum)/nd->inDegree);
632 retv=(nd->id+1)*csum;
636 int ProcessNodes(DGraph *dg,int me){
639 int i=0,verified=0,tag;
644 TRACE_smpi_set_category ("ProcessNodes");
647 for(i=0;i<dg->numNodes;i++){
649 if(nd->address!=me) continue;
650 if(strstr(nd->name,"Source")){
651 nd->feat=RandomFeatures(dg->name,fielddim,nd->id);
652 SendResults(dg,nd,nd->feat);
653 }else if(strstr(nd->name,"Sink")){
654 chksum=ReduceStreams(dg,nd);
655 tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
656 MPI_Send(&chksum,1,MPI_DOUBLE,0,tag,MPI_COMM_WORLD);
658 feat=CombineStreams(dg,nd);
659 SendResults(dg,nd,feat);
663 TRACE_smpi_set_category ("ProcessNodes");
666 if(me==0){ /* Report node */
669 for(i=0;i<dg->numNodes;i++){
671 if(!strstr(nd->name,"Sink")) continue;
672 tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
673 MPI_Recv(&rchksum,1,MPI_DOUBLE,nd->address,tag,MPI_COMM_WORLD,&status);
676 verified=verify(dg->name,chksum);
681 int main(int argc,char **argv ){
682 int my_rank,comm_size;
685 int verified=0, featnum=0;
686 double bytes_sent=2.0,tot_time=0.0;
690 MPI_Init( &argc, &argv );
691 MPI_Comm_rank( MPI_COMM_WORLD, &my_rank );
692 MPI_Comm_size( MPI_COMM_WORLD, &comm_size );
693 TRACE_smpi_set_category ("begin");
696 ( strncmp(argv[1],"BH",2)!=0
697 &&strncmp(argv[1],"WH",2)!=0
698 &&strncmp(argv[1],"SH",2)!=0
702 fprintf(stderr,"** Usage: mpirun -np N ../bin/dt.S GraphName\n");
703 fprintf(stderr,"** Where \n - N is integer number of MPI processes\n");
704 fprintf(stderr," - S is the class S, W, or A \n");
705 fprintf(stderr," - GraphName is the communication graph name BH, WH, or SH.\n");
706 fprintf(stderr," - the number of MPI processes N should not be be less than \n");
707 fprintf(stderr," the number of nodes in the graph\n");
712 if(strncmp(argv[1],"BH",2)==0){
714 }else if(strncmp(argv[1],"WH",2)==0){
716 }else if(strncmp(argv[1],"SH",2)==0){
720 if(timer_on&&dg->numNodes+1>timers_tot){
723 fprintf(stderr,"Not enough timers. Node timeing is off. \n");
725 if(dg->numNodes>comm_size){
727 fprintf(stderr,"** The number of MPI processes should not be less than \n");
728 fprintf(stderr,"** the number of nodes in the graph\n");
729 fprintf(stderr,"** Number of MPI processes = %d\n",comm_size);
730 fprintf(stderr,"** Number nodes in the graph = %d\n",dg->numNodes);
735 for(i=0;i<dg->numNodes;i++){
736 dg->node[i]->address=i;
739 printf( "\n\n NAS Parallel Benchmarks 3.3 -- DT Benchmark\n\n" );
745 verified=ProcessNodes(dg,my_rank);
746 TRACE_smpi_set_category ("end");
748 featnum=NUM_SAMPLES*fielddim;
749 bytes_sent=featnum*dg->numArcs;
753 tot_time=timer_read(0);
754 c_print_results( dg->name,