Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
b088fbd5c8ca0eaadfad56b5ec72bed7ea0e63a1
[simgrid.git] / examples / smpi / NAS / DT / dt-folding.c
1 /*************************************************************************
2  *                                                                       * 
3  *        N  A  S     P A R A L L E L     B E N C H M A R K S  3.3       *
4  *                                                                       * 
5  *                                  D T           * 
6  *                                                                       * 
7  ************************************************************************* 
8  *                                                                       * 
9  *   This benchmark is part of the NAS Parallel Benchmark 3.3 suite.     *
10  *                                                                       * 
11  *   Permission to use, copy, distribute and modify this software        * 
12  *   for any purpose with or without fee is hereby granted.  We          * 
13  *   request, however, that all derived work reference the NAS           * 
14  *   Parallel Benchmarks 3.3. This software is provided "as is"          *
15  *   without express or implied warranty.                                * 
16  *                                                                       * 
17  *   Information on NPB 3.3, including the technical report, the         *
18  *   original specifications, source code, results and information       * 
19  *   on how to submit new results, is available at:                      * 
20  *                                                                       * 
21  *          http:  www.nas.nasa.gov/Software/NPB                         * 
22  *                                                                       * 
23  *   Send comments or suggestions to  npb@nas.nasa.gov                   * 
24  *   Send bug reports to              npb-bugs@nas.nasa.gov              * 
25  *                                                                       * 
26  *         NAS Parallel Benchmarks Group                                 * 
27  *         NASA Ames Research Center                                     * 
28  *         Mail Stop: T27A-1                                             * 
29  *         Moffett Field, CA   94035-1000                                * 
30  *                                                                       * 
31  *         E-mail:  npb@nas.nasa.gov                                     * 
32  *         Fax:     (650) 604-3957                                       * 
33  *                                                                       * 
34  ************************************************************************* 
35  *                                                                       * 
36  *   Author: M. Frumkin               *             * 
37  *                                                                       * 
38  *************************************************************************/
39
40 #include <stdlib.h>
41 #include <stdio.h>
42 #include <string.h>
43
44 #include "mpi.h"
45 #include "npbparams.h"
46
47 #ifndef CLASS
48 #define CLASS 'S'
49 #define NUM_PROCS            1                 
50 #endif
51
52 //int      passed_verification;
53 extern double randlc( double *X, double *A );
54 extern
55 void c_print_results( char   *name,
56                       char   class,
57                       int    n1, 
58                       int    n2,
59                       int    n3,
60                       int    niter,
61                       int    nprocs_compiled,
62                       int    nprocs_total,
63                       double t,
64                       double mops,
65           char   *optype,
66                       int    passed_verification,
67                       char   *npbversion,
68                       char   *compiletime,
69                       char   *mpicc,
70                       char   *clink,
71                       char   *cmpi_lib,
72                       char   *cmpi_inc,
73                       char   *cflags,
74                       char   *clinkflags );
75           
76 void    timer_clear( int n );
77 void    timer_start( int n );
78 void    timer_stop( int n );
79 double  timer_read( int n );
80 int timer_on=0,timers_tot=64;
81
82 int verify(char *bmname,double rnm2){
83     double verify_value=0.0;
84     double epsilon=1.0E-8;
85     char cls=CLASS;
86     int verified=-1;
87     if (cls != 'U') {
88        if(cls=='S') {
89          if(strstr(bmname,"BH")){
90            verify_value=30892725.0;
91          }else if(strstr(bmname,"WH")){
92            verify_value=67349758.0;
93          }else if(strstr(bmname,"SH")){
94            verify_value=58875767.0;
95          }else{
96            fprintf(stderr,"No such benchmark as %s.\n",bmname);
97          }
98          verified = 0;
99        }else if(cls=='W') {
100          if(strstr(bmname,"BH")){
101        verify_value = 4102461.0;
102          }else if(strstr(bmname,"WH")){
103        verify_value = 204280762.0;
104          }else if(strstr(bmname,"SH")){
105        verify_value = 186944764.0;
106          }else{
107            fprintf(stderr,"No such benchmark as %s.\n",bmname);
108          }
109          verified = 0;
110        }else if(cls=='A') {
111          if(strstr(bmname,"BH")){
112        verify_value = 17809491.0;
113          }else if(strstr(bmname,"WH")){
114        verify_value = 1289925229.0;
115          }else if(strstr(bmname,"SH")){
116        verify_value = 610856482.0;
117          }else{
118            fprintf(stderr,"No such benchmark as %s.\n",bmname);
119          }
120      verified = 0;
121        }else if(cls=='B') {
122          if(strstr(bmname,"BH")){
123        verify_value = 4317114.0;
124          }else if(strstr(bmname,"WH")){
125        verify_value = 7877279917.0;
126          }else if(strstr(bmname,"SH")){
127        verify_value = 1836863082.0;
128          }else{
129            fprintf(stderr,"No such benchmark as %s.\n",bmname);
130        verified = 0;
131          }
132        }else if(cls=='C') {
133          if(strstr(bmname,"BH")){
134        verify_value = 0.0;
135          }else if(strstr(bmname,"WH")){
136        verify_value = 0.0;
137          }else if(strstr(bmname,"SH")){
138        verify_value = 0.0;
139          }else{
140            fprintf(stderr,"No such benchmark as %s.\n",bmname);
141        verified = -1;
142          }
143        }else if(cls=='D') {
144          if(strstr(bmname,"BH")){
145        verify_value = 0.0;
146          }else if(strstr(bmname,"WH")){
147        verify_value = 0.0;
148          }else if(strstr(bmname,"SH")){
149        verify_value = 0.0;
150          }else{
151            fprintf(stderr,"No such benchmark as %s.\n",bmname);
152          }
153          verified = -1;
154        }else{
155          fprintf(stderr,"No such class as %c.\n",cls);
156        }
157        fprintf(stderr," %s L2 Norm = %f\n",bmname,rnm2);
158        if(verified==-1){
159      fprintf(stderr," No verification was performed.\n");
160        }else if( rnm2 - verify_value < epsilon &&
161                  rnm2 - verify_value > -epsilon) {  /* abs here does not work on ALTIX */
162       verified = 1;
163       fprintf(stderr," Deviation = %f\n",(rnm2 - verify_value));
164        }else{
165      verified = 0;
166      fprintf(stderr," The correct verification value = %f\n",verify_value);
167      fprintf(stderr," Got value = %f\n",rnm2);
168        }
169     }else{
170        verified = -1;
171     }
172     return  verified;  
173   }
174
175 int ipowMod(int a,long long int n,int md){ 
176   int seed=1,q=a,r=1;
177   if(n<0){
178     fprintf(stderr,"ipowMod: exponent must be nonnegative exp=%lld\n",n);
179     n=-n; /* temp fix */
180 /*    return 1; */
181   }
182   if(md<=0){
183     fprintf(stderr,"ipowMod: module must be positive mod=%d",md);
184     return 1;
185   }
186   if(n==0) return 1;
187   while(n>1){
188     int n2 = n/2;
189     if (n2*2==n){
190        seed = (q*q)%md;
191        q=seed;
192        n = n2;
193     }else{
194        seed = (r*q)%md;
195        r=seed;
196        n = n-1;
197     }
198   }
199   seed = (r*q)%md;
200   return seed;
201 }
202
203 #include "DGraph.h"
204 DGraph *buildSH(char cls){
205 /*
206   Nodes of the graph must be topologically sorted
207   to avoid MPI deadlock.
208 */
209   DGraph *dg;
210   int numSources=NUM_SOURCES; /* must be power of 2 */
211   int numOfLayers=0,tmpS=numSources>>1;
212   int firstLayerNode=0;
213   DGArc *ar=NULL;
214   DGNode *nd=NULL;
215   int mask=0x0,ndid=0,ndoff=0;
216   int i=0,j=0;
217   char nm[BLOCK_SIZE];
218   
219   sprintf(nm,"DT_SH.%c",cls);
220   dg=newDGraph(nm);
221
222   while(tmpS>1){
223     numOfLayers++;
224     tmpS>>=1;
225   }
226   for(i=0;i<numSources;i++){
227     sprintf(nm,"Source.%d",i);
228     nd=newNode(nm);
229     AttachNode(dg,nd);
230   }
231   for(j=0;j<numOfLayers;j++){
232     mask=0x00000001<<j;
233     for(i=0;i<numSources;i++){
234       sprintf(nm,"Comparator.%d",(i+j*firstLayerNode));
235       nd=newNode(nm);
236       AttachNode(dg,nd);
237       ndoff=i&(~mask);
238       ndid=firstLayerNode+ndoff;
239       ar=newArc(dg->node[ndid],nd);     
240       AttachArc(dg,ar);
241       ndoff+=mask;
242       ndid=firstLayerNode+ndoff;
243       ar=newArc(dg->node[ndid],nd);     
244       AttachArc(dg,ar);
245     }
246     firstLayerNode+=numSources;
247   }
248   mask=0x00000001<<numOfLayers;
249   for(i=0;i<numSources;i++){
250     sprintf(nm,"Sink.%d",i);
251     nd=newNode(nm);
252     AttachNode(dg,nd);
253     ndoff=i&(~mask);
254     ndid=firstLayerNode+ndoff;
255     ar=newArc(dg->node[ndid],nd);     
256     AttachArc(dg,ar);
257     ndoff+=mask;
258     ndid=firstLayerNode+ndoff;
259     ar=newArc(dg->node[ndid],nd);     
260     AttachArc(dg,ar);
261   }
262 return dg;
263 }
264 DGraph *buildWH(char cls){
265 /*
266   Nodes of the graph must be topologically sorted
267   to avoid MPI deadlock.
268 */
269   int i=0,j=0;
270   int numSources=NUM_SOURCES,maxInDeg=4;
271   int numLayerNodes=numSources,firstLayerNode=0;
272   int totComparators=0;
273   int numPrevLayerNodes=numLayerNodes;
274   int id=0,sid=0;
275   DGraph *dg;
276   DGNode *nd=NULL,*source=NULL,*tmp=NULL,*snd=NULL;
277   DGArc *ar=NULL;
278   char nm[BLOCK_SIZE];
279
280   sprintf(nm,"DT_WH.%c",cls);
281   dg=newDGraph(nm);
282
283   for(i=0;i<numSources;i++){
284     sprintf(nm,"Sink.%d",i);
285     nd=newNode(nm);
286     AttachNode(dg,nd);
287   }
288   totComparators=0;
289   numPrevLayerNodes=numLayerNodes;
290   while(numLayerNodes>maxInDeg){
291     numLayerNodes=numLayerNodes/maxInDeg;
292     if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
293     for(i=0;i<numLayerNodes;i++){
294       sprintf(nm,"Comparator.%d",totComparators);
295       totComparators++;
296       nd=newNode(nm);
297       id=AttachNode(dg,nd);
298       for(j=0;j<maxInDeg;j++){
299         sid=i*maxInDeg+j;
300   if(sid>=numPrevLayerNodes) break;
301         snd=dg->node[firstLayerNode+sid];
302         ar=newArc(dg->node[id],snd);
303         AttachArc(dg,ar);
304       }
305     }
306     firstLayerNode+=numPrevLayerNodes;
307     numPrevLayerNodes=numLayerNodes;
308   }
309   source=newNode("Source");
310   AttachNode(dg,source);   
311   for(i=0;i<numPrevLayerNodes;i++){
312     nd=dg->node[firstLayerNode+i];
313     ar=newArc(source,nd);
314     AttachArc(dg,ar);
315   }
316
317   for(i=0;i<dg->numNodes/2;i++){  /* Topological sorting */
318     tmp=dg->node[i];
319     dg->node[i]=dg->node[dg->numNodes-1-i];
320     dg->node[i]->id=i;
321     dg->node[dg->numNodes-1-i]=tmp;
322     dg->node[dg->numNodes-1-i]->id=dg->numNodes-1-i;
323   }
324 return dg;
325 }
326 DGraph *buildBH(char cls){
327 /*
328   Nodes of the graph must be topologically sorted
329   to avoid MPI deadlock.
330 */
331   int i=0,j=0;
332   int numSources=NUM_SOURCES,maxInDeg=4;
333   int numLayerNodes=numSources,firstLayerNode=0;
334   DGraph *dg;
335   DGNode *nd=NULL, *snd=NULL, *sink=NULL;
336   DGArc *ar=NULL;
337   int totComparators=0;
338   int numPrevLayerNodes=numLayerNodes;
339   int id=0, sid=0;
340   char nm[BLOCK_SIZE];
341
342   sprintf(nm,"DT_BH.%c",cls);
343   dg=newDGraph(nm);
344
345   for(i=0;i<numSources;i++){
346     sprintf(nm,"Source.%d",i);
347     nd=newNode(nm);
348     AttachNode(dg,nd);
349   }
350   while(numLayerNodes>maxInDeg){
351     numLayerNodes=numLayerNodes/maxInDeg;
352     if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
353     for(i=0;i<numLayerNodes;i++){
354       sprintf(nm,"Comparator.%d",totComparators);
355       totComparators++;
356       nd=newNode(nm);
357       id=AttachNode(dg,nd);
358       for(j=0;j<maxInDeg;j++){
359         sid=i*maxInDeg+j;
360   if(sid>=numPrevLayerNodes) break;
361         snd=dg->node[firstLayerNode+sid];
362         ar=newArc(snd,dg->node[id]);
363         AttachArc(dg,ar);
364       }
365     }
366     firstLayerNode+=numPrevLayerNodes;
367     numPrevLayerNodes=numLayerNodes;
368   }
369   sink=newNode("Sink");
370   AttachNode(dg,sink);   
371   for(i=0;i<numPrevLayerNodes;i++){
372     nd=dg->node[firstLayerNode+i];
373     ar=newArc(nd,sink);
374     AttachArc(dg,ar);
375   }
376 return dg;
377 }
378
379 typedef struct{
380   int len;
381   double* val;
382 } Arr;
383 Arr *newArr(int len){
384   Arr *arr=(Arr *)SMPI_SHARED_MALLOC(sizeof(Arr));
385   arr->len=len;
386   arr->val=(double *)SMPI_SHARED_MALLOC(len*sizeof(double));
387   return arr;
388 }
389 void arrShow(Arr* a){
390   if(!a) fprintf(stderr,"-- NULL array\n");
391   else{
392     fprintf(stderr,"-- length=%d\n",a->len);
393   }
394 }
395 double CheckVal(Arr *feat){
396   double csum=0.0;
397   int i=0;
398   for(i=0;i<feat->len;i++){
399     csum+=feat->val[i]*feat->val[i]/feat->len; /* The truncation does not work since 
400                                                   result will be 0 for large len  */
401   }
402    return csum;
403 }
404 int GetFNumDPar(int* mean, int* stdev){
405   *mean=NUM_SAMPLES;
406   *stdev=STD_DEVIATION;
407   return 0;
408 }
409 int GetFeatureNum(char *mbname,int id){
410   double tran=314159265.0;
411   double A=2*id+1;
412   double denom=randlc(&tran,&A);
413   char cval='S';
414   int mean=NUM_SAMPLES,stdev=128;
415   int rtfs=0,len=0;
416   GetFNumDPar(&mean,&stdev);
417   rtfs=ipowMod((int)(1/denom)*(int)cval,(long long int) (2*id+1),2*stdev);
418   if(rtfs<0) rtfs=-rtfs;
419   len=mean-stdev+rtfs;
420   return len;
421 }
422 Arr* RandomFeatures(char *bmname,int fdim,int id){
423   int len=GetFeatureNum(bmname,id)*fdim;
424   Arr* feat=newArr(len);
425   int nxg=2,nyg=2,nzg=2,nfg=5;
426   int nx=421,ny=419,nz=1427,nf=3527;
427   long long int expon=(len*(id+1))%3141592;
428   int seedx=ipowMod(nxg,expon,nx),
429       seedy=ipowMod(nyg,expon,ny),
430       seedz=ipowMod(nzg,expon,nz),
431       seedf=ipowMod(nfg,expon,nf);
432   int i=0;
433   if(timer_on){
434     timer_clear(id+1);
435     timer_start(id+1);
436   }
437   for(i=0;i<len;i+=fdim){
438     seedx=(seedx*nxg)%nx;
439     seedy=(seedy*nyg)%ny;
440     seedz=(seedz*nzg)%nz;
441     seedf=(seedf*nfg)%nf;
442     feat->val[i]=seedx;
443     feat->val[i+1]=seedy;
444     feat->val[i+2]=seedz;
445     feat->val[i+3]=seedf;
446   }
447   if(timer_on){
448     timer_stop(id+1);
449     fprintf(stderr,"** RandomFeatures time in node %d = %f\n",id,timer_read(id+1));
450   }
451   return feat;   
452 }
453 void Resample(Arr *a,int blen){
454     long long int i=0,j=0,jlo=0,jhi=0;
455     double avval=0.0;
456     double *nval=(double *)SMPI_SHARED_MALLOC(blen*sizeof(double));
457     Arr *tmp=newArr(10);
458     for(i=0;i<blen;i++) nval[i]=0.0;
459     for(i=1;i<a->len-1;i++){
460       jlo=(int)(0.5*(2*i-1)*(blen/a->len)); 
461       jhi=(int)(0.5*(2*i+1)*(blen/a->len));
462
463       avval=a->val[i]/(jhi-jlo+1);    
464       for(j=jlo;j<=jhi;j++){
465         nval[j]+=avval;
466       }
467     }
468     nval[0]=a->val[0];
469     nval[blen-1]=a->val[a->len-1];
470     SMPI_SHARED_FREE(a->val);
471     a->val=nval;
472     a->len=blen;
473 }
474 #define fielddim 4
475 Arr* WindowFilter(Arr *a, Arr* b,int w){
476   int i=0,j=0,k=0;
477   double rms0=0.0,rms1=0.0,rmsm1=0.0;
478   double weight=((double) (w+1))/(w+2);
479  
480   w+=1;
481   if(timer_on){
482     timer_clear(w);
483     timer_start(w);
484   }
485   if(a->len<b->len) Resample(a,b->len);
486   if(a->len>b->len) Resample(b,a->len);
487   for(i=fielddim;i<a->len-fielddim;i+=fielddim){
488     rms0=(a->val[i]-b->val[i])*(a->val[i]-b->val[i])
489   +(a->val[i+1]-b->val[i+1])*(a->val[i+1]-b->val[i+1])
490   +(a->val[i+2]-b->val[i+2])*(a->val[i+2]-b->val[i+2])
491   +(a->val[i+3]-b->val[i+3])*(a->val[i+3]-b->val[i+3]);
492     j=i+fielddim;
493     rms1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
494       +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
495       +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
496       +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
497     j=i-fielddim;
498     rmsm1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
499    +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
500    +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
501    +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
502     k=0;
503     if(rms1<rms0){
504       k=1;
505       rms0=rms1;
506     }
507     if(rmsm1<rms0) k=-1;
508     if(k==0){
509       j=i+fielddim;
510       a->val[i]=weight*b->val[i];
511       a->val[i+1]=weight*b->val[i+1];
512       a->val[i+2]=weight*b->val[i+2];
513       a->val[i+3]=weight*b->val[i+3];  
514     }else if(k==1){
515       j=i+fielddim;
516       a->val[i]=weight*b->val[j];
517       a->val[i+1]=weight*b->val[j+1];
518       a->val[i+2]=weight*b->val[j+2];
519       a->val[i+3]=weight*b->val[j+3];  
520     }else { /*if(k==-1)*/
521       j=i-fielddim;
522       a->val[i]=weight*b->val[j];
523       a->val[i+1]=weight*b->val[j+1];
524       a->val[i+2]=weight*b->val[j+2];
525       a->val[i+3]=weight*b->val[j+3];  
526     }     
527   }
528   if(timer_on){
529     timer_stop(w);
530     fprintf(stderr,"** WindowFilter time in node %d = %f\n",(w-1),timer_read(w));
531   }
532   return a;
533 }
534
535 int SendResults(DGraph *dg,DGNode *nd,Arr *feat){
536   int i=0,tag=0;
537   DGArc *ar=NULL;
538   DGNode *head=NULL;
539   if(!feat) return 0;
540   for(i=0;i<nd->outDegree;i++){
541     ar=nd->outArc[i];
542     if(ar->tail!=nd) continue;
543     head=ar->head;
544     tag=ar->id;
545     if(head->address!=nd->address){
546       MPI_Send(&feat->len,1,MPI_INT,head->address,tag,MPI_COMM_WORLD);
547       MPI_Send(feat->val,feat->len,MPI_DOUBLE,head->address,tag,MPI_COMM_WORLD);
548     }
549   }
550   return 1;
551 }
552 Arr* CombineStreams(DGraph *dg,DGNode *nd){
553   Arr *resfeat=newArr(NUM_SAMPLES*fielddim);
554   int i=0,len=0,tag=0;
555   DGArc *ar=NULL;
556   DGNode *tail=NULL;
557   MPI_Status status;
558   Arr *feat=NULL,*featp=NULL;
559
560   if(nd->inDegree==0) return NULL;
561   for(i=0;i<nd->inDegree;i++){
562     ar=nd->inArc[i];
563     if(ar->head!=nd) continue;
564     tail=ar->tail;
565     if(tail->address!=nd->address){
566       len=0;
567       tag=ar->id;
568       MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
569       feat=newArr(len);
570       MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
571       resfeat=WindowFilter(resfeat,feat,nd->id);
572       SMPI_SHARED_FREE(feat);
573     }else{
574       featp=(Arr *)tail->feat;
575       feat=newArr(featp->len);
576       memcpy(feat->val,featp->val,featp->len*sizeof(double));
577       resfeat=WindowFilter(resfeat,feat,nd->id);  
578       SMPI_SHARED_FREE(feat);
579     }
580   }
581   for(i=0;i<resfeat->len;i++) resfeat->val[i]=((int)resfeat->val[i])/nd->inDegree;
582   nd->feat=resfeat;
583   return nd->feat;
584 }
585 double Reduce(Arr *a,int w){
586   double retv=0.0;
587   if(timer_on){
588     timer_clear(w);
589     timer_start(w);
590   }
591   retv=(int)(w*CheckVal(a));/* The casting needed for node  
592                                and array dependent verifcation */
593   if(timer_on){
594     timer_stop(w);
595     fprintf(stderr,"** Reduce time in node %d = %f\n",(w-1),timer_read(w));
596   }
597   return retv;
598 }
599
600 double ReduceStreams(DGraph *dg,DGNode *nd){
601   double csum=0.0;
602   int i=0,len=0,tag=0;
603   DGArc *ar=NULL;
604   DGNode *tail=NULL;
605   Arr *feat=NULL;
606   double retv=0.0;
607
608   for(i=0;i<nd->inDegree;i++){
609     ar=nd->inArc[i];
610     if(ar->head!=nd) continue;
611     tail=ar->tail;
612     if(tail->address!=nd->address){
613       MPI_Status status;
614       len=0;
615       tag=ar->id;
616       MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
617       feat=newArr(len);
618       MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
619       csum+=Reduce(feat,(nd->id+1));  
620       SMPI_SHARED_FREE(feat);
621     }else{
622       csum+=Reduce(tail->feat,(nd->id+1));  
623     }
624   }
625   if(nd->inDegree>0)csum=(((long long int)csum)/nd->inDegree);
626   retv=(nd->id+1)*csum;
627   return retv;
628 }
629
630 int ProcessNodes(DGraph *dg,int me){
631   double chksum=0.0;
632   Arr *feat=NULL;
633   int i=0,verified=0,tag;
634   DGNode *nd=NULL;
635   double rchksum=0.0;
636   MPI_Status status;
637
638   for(i=0;i<dg->numNodes;i++){
639     nd=dg->node[i];
640     if(nd->address!=me) continue;
641     if(strstr(nd->name,"Source")){
642       nd->feat=RandomFeatures(dg->name,fielddim,nd->id); 
643       SendResults(dg,nd,nd->feat);
644     }else if(strstr(nd->name,"Sink")){
645       chksum=ReduceStreams(dg,nd);
646       tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
647       MPI_Send(&chksum,1,MPI_DOUBLE,0,tag,MPI_COMM_WORLD);
648     }else{
649       feat=CombineStreams(dg,nd);
650       SendResults(dg,nd,feat);
651     }
652   }
653   if(me==0){ /* Report node */
654     rchksum=0.0;
655     chksum=0.0;
656     for(i=0;i<dg->numNodes;i++){
657       nd=dg->node[i];
658       if(!strstr(nd->name,"Sink")) continue;
659        tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
660       MPI_Recv(&rchksum,1,MPI_DOUBLE,nd->address,tag,MPI_COMM_WORLD,&status);
661       chksum+=rchksum;
662     }
663     verified=verify(dg->name,chksum);
664   }
665 return verified;
666 }
667
668 int main(int argc,char **argv ){
669   int my_rank,comm_size;
670   int i;
671   DGraph *dg=NULL;
672   int verified=0, featnum=0;
673   double bytes_sent=2.0,tot_time=0.0;
674
675     MPI_Init( &argc, &argv );
676     MPI_Comm_rank( MPI_COMM_WORLD, &my_rank );
677     MPI_Comm_size( MPI_COMM_WORLD, &comm_size );
678
679      if(argc!=2||
680                 (  strncmp(argv[1],"BH",2)!=0
681                  &&strncmp(argv[1],"WH",2)!=0
682                  &&strncmp(argv[1],"SH",2)!=0
683                 )
684       ){
685       if(my_rank==0){
686         fprintf(stderr,"** Usage: mpirun -np N ../bin/dt.S GraphName\n");
687         fprintf(stderr,"** Where \n   - N is integer number of MPI processes\n");
688         fprintf(stderr,"   - S is the class S, W, or A \n");
689         fprintf(stderr,"   - GraphName is the communication graph name BH, WH, or SH.\n");
690         fprintf(stderr,"   - the number of MPI processes N should not be be less than \n");
691         fprintf(stderr,"     the number of nodes in the graph\n");
692       }
693       MPI_Finalize();
694       exit(0);
695     } 
696    if(strncmp(argv[1],"BH",2)==0){
697       dg=buildBH(CLASS);
698     }else if(strncmp(argv[1],"WH",2)==0){
699       dg=buildWH(CLASS);
700     }else if(strncmp(argv[1],"SH",2)==0){
701       dg=buildSH(CLASS);
702     }
703
704     if(timer_on&&dg->numNodes+1>timers_tot){
705       timer_on=0;
706       if(my_rank==0)
707         fprintf(stderr,"Not enough timers. Node timeing is off. \n");
708     }
709     if(dg->numNodes>comm_size){
710       if(my_rank==0){
711         fprintf(stderr,"**  The number of MPI processes should not be less than \n");
712         fprintf(stderr,"**  the number of nodes in the graph\n");
713         fprintf(stderr,"**  Number of MPI processes = %d\n",comm_size);
714         fprintf(stderr,"**  Number nodes in the graph = %d\n",dg->numNodes);
715       }
716       MPI_Finalize();
717       exit(0);
718     }
719     for(i=0;i<dg->numNodes;i++){ 
720       dg->node[i]->address=i;
721     }
722     if( my_rank == 0 ){
723       printf( "\n\n NAS Parallel Benchmarks 3.3 -- DT Benchmark\n\n" );
724       graphShow(dg,0);
725       timer_clear(0);
726       timer_start(0);
727     }
728     verified=ProcessNodes(dg,my_rank);
729     
730     featnum=NUM_SAMPLES*fielddim;
731     bytes_sent=featnum*dg->numArcs;
732     bytes_sent/=1048576;
733     if(my_rank==0){
734       timer_stop(0);
735       tot_time=timer_read(0);
736       c_print_results( dg->name,
737                  CLASS,
738                  featnum,
739                  0,
740                  0,
741                  dg->numNodes,
742                  0,
743                  comm_size,
744                  tot_time,
745                  bytes_sent/tot_time,
746                  "bytes transmitted", 
747                  verified,
748                  NPBVERSION,
749                  COMPILETIME,
750                  MPICC,
751                  CLINK,
752                  CMPI_LIB,
753                  CMPI_INC,
754                  CFLAGS,
755                  CLINKFLAGS );
756     }          
757     MPI_Finalize();
758   return 1;
759 }