1 /*************************************************************************
3 * N A S P A R A L L E L B E N C H M A R K S 3.3 *
7 *************************************************************************
9 * This benchmark is part of the NAS Parallel Benchmark 3.3 suite. *
11 * Permission to use, copy, distribute and modify this software *
12 * for any purpose with or without fee is hereby granted. We *
13 * request, however, that all derived work reference the NAS *
14 * Parallel Benchmarks 3.3. This software is provided "as is" *
15 * without express or implied warranty. *
17 * Information on NPB 3.3, including the technical report, the *
18 * original specifications, source code, results and information *
19 * on how to submit new results, is available at: *
21 * http: www.nas.nasa.gov/Software/NPB *
23 * Send comments or suggestions to npb@nas.nasa.gov *
24 * Send bug reports to npb-bugs@nas.nasa.gov *
26 * NAS Parallel Benchmarks Group *
27 * NASA Ames Research Center *
29 * Moffett Field, CA 94035-1000 *
31 * E-mail: npb@nas.nasa.gov *
32 * Fax: (650) 604-3957 *
34 *************************************************************************
36 * Author: M. Frumkin * *
38 *************************************************************************/
45 #include "npbparams.h"
52 //int passed_verification;
53 extern double randlc( double *X, double *A );
55 void c_print_results( char *name,
66 int passed_verification,
76 void timer_clear( int n );
77 void timer_start( int n );
78 void timer_stop( int n );
79 double timer_read( int n );
80 int timer_on=0,timers_tot=64;
82 int verify(char *bmname,double rnm2){
83 double verify_value=0.0;
84 double epsilon=1.0E-8;
89 if(strstr(bmname,"BH")){
90 verify_value=30892725.0;
91 }else if(strstr(bmname,"WH")){
92 verify_value=67349758.0;
93 }else if(strstr(bmname,"SH")){
94 verify_value=58875767.0;
96 fprintf(stderr,"No such benchmark as %s.\n",bmname);
100 if(strstr(bmname,"BH")){
101 verify_value = 4102461.0;
102 }else if(strstr(bmname,"WH")){
103 verify_value = 204280762.0;
104 }else if(strstr(bmname,"SH")){
105 verify_value = 186944764.0;
107 fprintf(stderr,"No such benchmark as %s.\n",bmname);
111 if(strstr(bmname,"BH")){
112 verify_value = 17809491.0;
113 }else if(strstr(bmname,"WH")){
114 verify_value = 1289925229.0;
115 }else if(strstr(bmname,"SH")){
116 verify_value = 610856482.0;
118 fprintf(stderr,"No such benchmark as %s.\n",bmname);
122 if(strstr(bmname,"BH")){
123 verify_value = 4317114.0;
124 }else if(strstr(bmname,"WH")){
125 verify_value = 7877279917.0;
126 }else if(strstr(bmname,"SH")){
127 verify_value = 1836863082.0;
129 fprintf(stderr,"No such benchmark as %s.\n",bmname);
133 if(strstr(bmname,"BH")){
135 }else if(strstr(bmname,"WH")){
137 }else if(strstr(bmname,"SH")){
140 fprintf(stderr,"No such benchmark as %s.\n",bmname);
144 if(strstr(bmname,"BH")){
146 }else if(strstr(bmname,"WH")){
148 }else if(strstr(bmname,"SH")){
151 fprintf(stderr,"No such benchmark as %s.\n",bmname);
155 fprintf(stderr,"No such class as %c.\n",cls);
157 fprintf(stderr," %s L2 Norm = %f\n",bmname,rnm2);
159 fprintf(stderr," No verification was performed.\n");
160 }else if( rnm2 - verify_value < epsilon &&
161 rnm2 - verify_value > -epsilon) { /* abs here does not work on ALTIX */
163 fprintf(stderr," Deviation = %f\n",(rnm2 - verify_value));
166 fprintf(stderr," The correct verification value = %f\n",verify_value);
167 fprintf(stderr," Got value = %f\n",rnm2);
175 int ipowMod(int a,long long int n,int md){
178 fprintf(stderr,"ipowMod: exponent must be nonnegative exp=%lld\n",n);
183 fprintf(stderr,"ipowMod: module must be positive mod=%d",md);
204 DGraph *buildSH(char cls){
206 Nodes of the graph must be topologically sorted
207 to avoid MPI deadlock.
210 int numSources=NUM_SOURCES; /* must be power of 2 */
211 int numOfLayers=0,tmpS=numSources>>1;
212 int firstLayerNode=0;
215 int mask=0x0,ndid=0,ndoff=0;
219 sprintf(nm,"DT_SH.%c",cls);
226 for(i=0;i<numSources;i++){
227 sprintf(nm,"Source.%d",i);
231 for(j=0;j<numOfLayers;j++){
233 for(i=0;i<numSources;i++){
234 sprintf(nm,"Comparator.%d",(i+j*firstLayerNode));
238 ndid=firstLayerNode+ndoff;
239 ar=newArc(dg->node[ndid],nd);
242 ndid=firstLayerNode+ndoff;
243 ar=newArc(dg->node[ndid],nd);
246 firstLayerNode+=numSources;
248 mask=0x00000001<<numOfLayers;
249 for(i=0;i<numSources;i++){
250 sprintf(nm,"Sink.%d",i);
254 ndid=firstLayerNode+ndoff;
255 ar=newArc(dg->node[ndid],nd);
258 ndid=firstLayerNode+ndoff;
259 ar=newArc(dg->node[ndid],nd);
264 DGraph *buildWH(char cls){
266 Nodes of the graph must be topologically sorted
267 to avoid MPI deadlock.
270 int numSources=NUM_SOURCES,maxInDeg=4;
271 int numLayerNodes=numSources,firstLayerNode=0;
272 int totComparators=0;
273 int numPrevLayerNodes=numLayerNodes;
276 DGNode *nd=NULL,*source=NULL,*tmp=NULL,*snd=NULL;
280 sprintf(nm,"DT_WH.%c",cls);
283 for(i=0;i<numSources;i++){
284 sprintf(nm,"Sink.%d",i);
289 numPrevLayerNodes=numLayerNodes;
290 while(numLayerNodes>maxInDeg){
291 numLayerNodes=numLayerNodes/maxInDeg;
292 if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
293 for(i=0;i<numLayerNodes;i++){
294 sprintf(nm,"Comparator.%d",totComparators);
297 id=AttachNode(dg,nd);
298 for(j=0;j<maxInDeg;j++){
300 if(sid>=numPrevLayerNodes) break;
301 snd=dg->node[firstLayerNode+sid];
302 ar=newArc(dg->node[id],snd);
306 firstLayerNode+=numPrevLayerNodes;
307 numPrevLayerNodes=numLayerNodes;
309 source=newNode("Source");
310 AttachNode(dg,source);
311 for(i=0;i<numPrevLayerNodes;i++){
312 nd=dg->node[firstLayerNode+i];
313 ar=newArc(source,nd);
317 for(i=0;i<dg->numNodes/2;i++){ /* Topological sorting */
319 dg->node[i]=dg->node[dg->numNodes-1-i];
321 dg->node[dg->numNodes-1-i]=tmp;
322 dg->node[dg->numNodes-1-i]->id=dg->numNodes-1-i;
326 DGraph *buildBH(char cls){
328 Nodes of the graph must be topologically sorted
329 to avoid MPI deadlock.
332 int numSources=NUM_SOURCES,maxInDeg=4;
333 int numLayerNodes=numSources,firstLayerNode=0;
335 DGNode *nd=NULL, *snd=NULL, *sink=NULL;
337 int totComparators=0;
338 int numPrevLayerNodes=numLayerNodes;
342 sprintf(nm,"DT_BH.%c",cls);
345 for(i=0;i<numSources;i++){
346 sprintf(nm,"Source.%d",i);
350 while(numLayerNodes>maxInDeg){
351 numLayerNodes=numLayerNodes/maxInDeg;
352 if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
353 for(i=0;i<numLayerNodes;i++){
354 sprintf(nm,"Comparator.%d",totComparators);
357 id=AttachNode(dg,nd);
358 for(j=0;j<maxInDeg;j++){
360 if(sid>=numPrevLayerNodes) break;
361 snd=dg->node[firstLayerNode+sid];
362 ar=newArc(snd,dg->node[id]);
366 firstLayerNode+=numPrevLayerNodes;
367 numPrevLayerNodes=numLayerNodes;
369 sink=newNode("Sink");
371 for(i=0;i<numPrevLayerNodes;i++){
372 nd=dg->node[firstLayerNode+i];
383 Arr *newArr(int len){
384 Arr *arr=(Arr *)SMPI_SHARED_MALLOC(sizeof(Arr));
386 arr->val=(double *)SMPI_SHARED_MALLOC(len*sizeof(double));
389 void arrShow(Arr* a){
390 if(!a) fprintf(stderr,"-- NULL array\n");
392 fprintf(stderr,"-- length=%d\n",a->len);
395 double CheckVal(Arr *feat){
398 for(i=0;i<feat->len;i++){
399 csum+=feat->val[i]*feat->val[i]/feat->len; /* The truncation does not work since
400 result will be 0 for large len */
404 int GetFNumDPar(int* mean, int* stdev){
406 *stdev=STD_DEVIATION;
409 int GetFeatureNum(char *mbname,int id){
410 double tran=314159265.0;
412 double denom=randlc(&tran,&A);
414 int mean=NUM_SAMPLES,stdev=128;
416 GetFNumDPar(&mean,&stdev);
417 rtfs=ipowMod((int)(1/denom)*(int)cval,(long long int) (2*id+1),2*stdev);
418 if(rtfs<0) rtfs=-rtfs;
422 Arr* RandomFeatures(char *bmname,int fdim,int id){
423 int len=GetFeatureNum(bmname,id)*fdim;
424 Arr* feat=newArr(len);
425 int nxg=2,nyg=2,nzg=2,nfg=5;
426 int nx=421,ny=419,nz=1427,nf=3527;
427 long long int expon=(len*(id+1))%3141592;
428 int seedx=ipowMod(nxg,expon,nx),
429 seedy=ipowMod(nyg,expon,ny),
430 seedz=ipowMod(nzg,expon,nz),
431 seedf=ipowMod(nfg,expon,nf);
437 for(i=0;i<len;i+=fdim){
438 seedx=(seedx*nxg)%nx;
439 seedy=(seedy*nyg)%ny;
440 seedz=(seedz*nzg)%nz;
441 seedf=(seedf*nfg)%nf;
443 feat->val[i+1]=seedy;
444 feat->val[i+2]=seedz;
445 feat->val[i+3]=seedf;
449 fprintf(stderr,"** RandomFeatures time in node %d = %f\n",id,timer_read(id+1));
453 void Resample(Arr *a,int blen){
454 long long int i=0,j=0,jlo=0,jhi=0;
456 double *nval=(double *)SMPI_SHARED_MALLOC(blen*sizeof(double));
458 for(i=0;i<blen;i++) nval[i]=0.0;
459 for(i=1;i<a->len-1;i++){
460 jlo=(int)(0.5*(2*i-1)*(blen/a->len));
461 jhi=(int)(0.5*(2*i+1)*(blen/a->len));
463 avval=a->val[i]/(jhi-jlo+1);
464 for(j=jlo;j<=jhi;j++){
469 nval[blen-1]=a->val[a->len-1];
470 SMPI_SHARED_FREE(a->val);
475 Arr* WindowFilter(Arr *a, Arr* b,int w){
477 double rms0=0.0,rms1=0.0,rmsm1=0.0;
478 double weight=((double) (w+1))/(w+2);
485 if(a->len<b->len) Resample(a,b->len);
486 if(a->len>b->len) Resample(b,a->len);
487 for(i=fielddim;i<a->len-fielddim;i+=fielddim){
488 rms0=(a->val[i]-b->val[i])*(a->val[i]-b->val[i])
489 +(a->val[i+1]-b->val[i+1])*(a->val[i+1]-b->val[i+1])
490 +(a->val[i+2]-b->val[i+2])*(a->val[i+2]-b->val[i+2])
491 +(a->val[i+3]-b->val[i+3])*(a->val[i+3]-b->val[i+3]);
493 rms1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
494 +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
495 +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
496 +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
498 rmsm1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
499 +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
500 +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
501 +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
510 a->val[i]=weight*b->val[i];
511 a->val[i+1]=weight*b->val[i+1];
512 a->val[i+2]=weight*b->val[i+2];
513 a->val[i+3]=weight*b->val[i+3];
516 a->val[i]=weight*b->val[j];
517 a->val[i+1]=weight*b->val[j+1];
518 a->val[i+2]=weight*b->val[j+2];
519 a->val[i+3]=weight*b->val[j+3];
520 }else { /*if(k==-1)*/
522 a->val[i]=weight*b->val[j];
523 a->val[i+1]=weight*b->val[j+1];
524 a->val[i+2]=weight*b->val[j+2];
525 a->val[i+3]=weight*b->val[j+3];
530 fprintf(stderr,"** WindowFilter time in node %d = %f\n",(w-1),timer_read(w));
535 int SendResults(DGraph *dg,DGNode *nd,Arr *feat){
540 for(i=0;i<nd->outDegree;i++){
542 if(ar->tail!=nd) continue;
545 if(head->address!=nd->address){
546 MPI_Send(&feat->len,1,MPI_INT,head->address,tag,MPI_COMM_WORLD);
547 MPI_Send(feat->val,feat->len,MPI_DOUBLE,head->address,tag,MPI_COMM_WORLD);
552 Arr* CombineStreams(DGraph *dg,DGNode *nd){
553 Arr *resfeat=newArr(NUM_SAMPLES*fielddim);
558 Arr *feat=NULL,*featp=NULL;
560 if(nd->inDegree==0) return NULL;
561 for(i=0;i<nd->inDegree;i++){
563 if(ar->head!=nd) continue;
565 if(tail->address!=nd->address){
568 MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
570 MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
571 resfeat=WindowFilter(resfeat,feat,nd->id);
572 SMPI_SHARED_FREE(feat);
574 featp=(Arr *)tail->feat;
575 feat=newArr(featp->len);
576 memcpy(feat->val,featp->val,featp->len*sizeof(double));
577 resfeat=WindowFilter(resfeat,feat,nd->id);
578 SMPI_SHARED_FREE(feat);
581 for(i=0;i<resfeat->len;i++) resfeat->val[i]=((int)resfeat->val[i])/nd->inDegree;
585 double Reduce(Arr *a,int w){
591 retv=(int)(w*CheckVal(a));/* The casting needed for node
592 and array dependent verifcation */
595 fprintf(stderr,"** Reduce time in node %d = %f\n",(w-1),timer_read(w));
600 double ReduceStreams(DGraph *dg,DGNode *nd){
608 for(i=0;i<nd->inDegree;i++){
610 if(ar->head!=nd) continue;
612 if(tail->address!=nd->address){
616 MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
618 MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
619 csum+=Reduce(feat,(nd->id+1));
620 SMPI_SHARED_FREE(feat);
622 csum+=Reduce(tail->feat,(nd->id+1));
625 if(nd->inDegree>0)csum=(((long long int)csum)/nd->inDegree);
626 retv=(nd->id+1)*csum;
630 int ProcessNodes(DGraph *dg,int me){
633 int i=0,verified=0,tag;
638 for(i=0;i<dg->numNodes;i++){
640 if(nd->address!=me) continue;
641 if(strstr(nd->name,"Source")){
642 nd->feat=RandomFeatures(dg->name,fielddim,nd->id);
643 SendResults(dg,nd,nd->feat);
644 }else if(strstr(nd->name,"Sink")){
645 chksum=ReduceStreams(dg,nd);
646 tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
647 MPI_Send(&chksum,1,MPI_DOUBLE,0,tag,MPI_COMM_WORLD);
649 feat=CombineStreams(dg,nd);
650 SendResults(dg,nd,feat);
653 if(me==0){ /* Report node */
656 for(i=0;i<dg->numNodes;i++){
658 if(!strstr(nd->name,"Sink")) continue;
659 tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
660 MPI_Recv(&rchksum,1,MPI_DOUBLE,nd->address,tag,MPI_COMM_WORLD,&status);
663 verified=verify(dg->name,chksum);
668 int main(int argc,char **argv ){
669 int my_rank,comm_size;
672 int verified=0, featnum=0;
673 double bytes_sent=2.0,tot_time=0.0;
675 MPI_Init( &argc, &argv );
676 MPI_Comm_rank( MPI_COMM_WORLD, &my_rank );
677 MPI_Comm_size( MPI_COMM_WORLD, &comm_size );
680 ( strncmp(argv[1],"BH",2)!=0
681 &&strncmp(argv[1],"WH",2)!=0
682 &&strncmp(argv[1],"SH",2)!=0
686 fprintf(stderr,"** Usage: mpirun -np N ../bin/dt.S GraphName\n");
687 fprintf(stderr,"** Where \n - N is integer number of MPI processes\n");
688 fprintf(stderr," - S is the class S, W, or A \n");
689 fprintf(stderr," - GraphName is the communication graph name BH, WH, or SH.\n");
690 fprintf(stderr," - the number of MPI processes N should not be be less than \n");
691 fprintf(stderr," the number of nodes in the graph\n");
696 if(strncmp(argv[1],"BH",2)==0){
698 }else if(strncmp(argv[1],"WH",2)==0){
700 }else if(strncmp(argv[1],"SH",2)==0){
704 if(timer_on&&dg->numNodes+1>timers_tot){
707 fprintf(stderr,"Not enough timers. Node timeing is off. \n");
709 if(dg->numNodes>comm_size){
711 fprintf(stderr,"** The number of MPI processes should not be less than \n");
712 fprintf(stderr,"** the number of nodes in the graph\n");
713 fprintf(stderr,"** Number of MPI processes = %d\n",comm_size);
714 fprintf(stderr,"** Number nodes in the graph = %d\n",dg->numNodes);
719 for(i=0;i<dg->numNodes;i++){
720 dg->node[i]->address=i;
723 printf( "\n\n NAS Parallel Benchmarks 3.3 -- DT Benchmark\n\n" );
728 verified=ProcessNodes(dg,my_rank);
730 featnum=NUM_SAMPLES*fielddim;
731 bytes_sent=featnum*dg->numArcs;
735 tot_time=timer_read(0);
736 c_print_results( dg->name,