Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
using smpi_shared_[malloc|free] in DT-trace benchmark (as in dt-folding)
[simgrid.git] / examples / smpi / NAS / DT-trace / dt-trace.c
1 /*************************************************************************
2  *                                                                       * 
3  *        N  A  S     P A R A L L E L     B E N C H M A R K S  3.3       *
4  *                                                                       * 
5  *                                  D T                                  * 
6  *                                                                       * 
7  ************************************************************************* 
8  *                                                                       * 
9  *   This benchmark is part of the NAS Parallel Benchmark 3.3 suite.     *
10  *                                                                       * 
11  *   Permission to use, copy, distribute and modify this software        * 
12  *   for any purpose with or without fee is hereby granted.  We          * 
13  *   request, however, that all derived work reference the NAS           * 
14  *   Parallel Benchmarks 3.3. This software is provided "as is"          *
15  *   without express or implied warranty.                                * 
16  *                                                                       * 
17  *   Information on NPB 3.3, including the technical report, the         *
18  *   original specifications, source code, results and information       * 
19  *   on how to submit new results, is available at:                      * 
20  *                                                                       * 
21  *          http:  www.nas.nasa.gov/Software/NPB                         * 
22  *                                                                       * 
23  *   Send comments or suggestions to  npb@nas.nasa.gov                   * 
24  *   Send bug reports to              npb-bugs@nas.nasa.gov              * 
25  *                                                                       * 
26  *         NAS Parallel Benchmarks Group                                 * 
27  *         NASA Ames Research Center                                     * 
28  *         Mail Stop: T27A-1                                             * 
29  *         Moffett Field, CA   94035-1000                                * 
30  *                                                                       * 
31  *         E-mail:  npb@nas.nasa.gov                                     * 
32  *         Fax:     (650) 604-3957                                       * 
33  *                                                                       * 
34  ************************************************************************* 
35  *                                                                       * 
36  *   Author: M. Frumkin                                                  *                                               * 
37  *                                                                       * 
38  *************************************************************************/
39
40 #include <stdlib.h>
41 #include <stdio.h>
42 #include <string.h>
43
44 #include "mpi.h"
45 #include "npbparams.h"
46
47 #include "instr/instr.h" //TRACE_
48
49 #ifndef CLASS
50 #define CLASS 'S'
51 #define NUM_PROCS            1                 
52 #endif
53
54 //int      passed_verification;
55 extern double randlc( double *X, double *A );
56 extern
57 void c_print_results( char   *name,
58                       char   class,
59                       int    n1, 
60                       int    n2,
61                       int    n3,
62                       int    niter,
63                       int    nprocs_compiled,
64                       int    nprocs_total,
65                       double t,
66                       double mops,
67                       char   *optype,
68                       int    passed_verification,
69                       char   *npbversion,
70                       char   *compiletime,
71                       char   *mpicc,
72                       char   *clink,
73                       char   *cmpi_lib,
74                       char   *cmpi_inc,
75                       char   *cflags,
76                       char   *clinkflags );
77                       
78 void    timer_clear( int n );
79 void    timer_start( int n );
80 void    timer_stop( int n );
81 double  timer_read( int n );
82 int timer_on=0,timers_tot=64;
83
84 int verify(char *bmname,double rnm2){
85     double verify_value=0.0;
86     double epsilon=1.0E-8;
87     char cls=CLASS;
88     int verified=-1;
89     if (cls != 'U') {
90        if(cls=='S') {
91          if(strstr(bmname,"BH")){
92            verify_value=30892725.0;
93          }else if(strstr(bmname,"WH")){
94            verify_value=67349758.0;
95          }else if(strstr(bmname,"SH")){
96            verify_value=58875767.0;
97          }else{
98            fprintf(stderr,"No such benchmark as %s.\n",bmname);
99          }
100          verified = 0;
101        }else if(cls=='W') {
102          if(strstr(bmname,"BH")){
103            verify_value = 4102461.0;
104          }else if(strstr(bmname,"WH")){
105            verify_value = 204280762.0;
106          }else if(strstr(bmname,"SH")){
107            verify_value = 186944764.0;
108          }else{
109            fprintf(stderr,"No such benchmark as %s.\n",bmname);
110          }
111          verified = 0;
112        }else if(cls=='A') {
113          if(strstr(bmname,"BH")){
114            verify_value = 17809491.0;
115          }else if(strstr(bmname,"WH")){
116            verify_value = 1289925229.0;
117          }else if(strstr(bmname,"SH")){
118            verify_value = 610856482.0;
119          }else{
120            fprintf(stderr,"No such benchmark as %s.\n",bmname);
121          }
122          verified = 0;
123        }else if(cls=='B') {
124          if(strstr(bmname,"BH")){
125            verify_value = 4317114.0;
126          }else if(strstr(bmname,"WH")){
127            verify_value = 7877279917.0;
128          }else if(strstr(bmname,"SH")){
129            verify_value = 1836863082.0;
130          }else{
131            fprintf(stderr,"No such benchmark as %s.\n",bmname);
132            verified = 0;
133          }
134        }else if(cls=='C') {
135          if(strstr(bmname,"BH")){
136            verify_value = 0.0;
137          }else if(strstr(bmname,"WH")){
138            verify_value = 0.0;
139          }else if(strstr(bmname,"SH")){
140            verify_value = 0.0;
141          }else{
142            fprintf(stderr,"No such benchmark as %s.\n",bmname);
143            verified = -1;
144          }
145        }else if(cls=='D') {
146          if(strstr(bmname,"BH")){
147            verify_value = 0.0;
148          }else if(strstr(bmname,"WH")){
149            verify_value = 0.0;
150          }else if(strstr(bmname,"SH")){
151            verify_value = 0.0;
152          }else{
153            fprintf(stderr,"No such benchmark as %s.\n",bmname);
154          }
155          verified = -1;
156        }else{
157          fprintf(stderr,"No such class as %c.\n",cls);
158        }
159        fprintf(stderr," %s L2 Norm = %f\n",bmname,rnm2);
160        if(verified==-1){
161          fprintf(stderr," No verification was performed.\n");
162        }else if( rnm2 - verify_value < epsilon &&
163                  rnm2 - verify_value > -epsilon) {  /* abs here does not work on ALTIX */
164           verified = 1;
165           fprintf(stderr," Deviation = %f\n",(rnm2 - verify_value));
166        }else{
167          verified = 0;
168          fprintf(stderr," The correct verification value = %f\n",verify_value);
169          fprintf(stderr," Got value = %f\n",rnm2);
170        }
171     }else{
172        verified = -1;
173     }
174     return  verified;  
175   }
176
177 int ipowMod(int a,long long int n,int md){ 
178   int seed=1,q=a,r=1;
179   if(n<0){
180     fprintf(stderr,"ipowMod: exponent must be nonnegative exp=%lld\n",n);
181     n=-n; /* temp fix */
182 /*    return 1; */
183   }
184   if(md<=0){
185     fprintf(stderr,"ipowMod: module must be positive mod=%d",md);
186     return 1;
187   }
188   if(n==0) return 1;
189   while(n>1){
190     int n2 = n/2;
191     if (n2*2==n){
192        seed = (q*q)%md;
193        q=seed;
194        n = n2;
195     }else{
196        seed = (r*q)%md;
197        r=seed;
198        n = n-1;
199     }
200   }
201   seed = (r*q)%md;
202   return seed;
203 }
204
205 #include "DGraph.h"
206 DGraph *buildSH(char cls){
207 /*
208   Nodes of the graph must be topologically sorted
209   to avoid MPI deadlock.
210 */
211   DGraph *dg;
212   int numSources=NUM_SOURCES; /* must be power of 2 */
213   int numOfLayers=0,tmpS=numSources>>1;
214   int firstLayerNode=0;
215   DGArc *ar=NULL;
216   DGNode *nd=NULL;
217   int mask=0x0,ndid=0,ndoff=0;
218   int i=0,j=0;
219   char nm[BLOCK_SIZE];
220   
221   sprintf(nm,"DT_SH.%c",cls);
222   dg=newDGraph(nm);
223
224   while(tmpS>1){
225     numOfLayers++;
226     tmpS>>=1;
227   }
228   for(i=0;i<numSources;i++){
229     sprintf(nm,"Source.%d",i);
230     nd=newNode(nm);
231     AttachNode(dg,nd);
232   }
233   for(j=0;j<numOfLayers;j++){
234     mask=0x00000001<<j;
235     for(i=0;i<numSources;i++){
236       sprintf(nm,"Comparator.%d",(i+j*firstLayerNode));
237       nd=newNode(nm);
238       AttachNode(dg,nd);
239       ndoff=i&(~mask);
240       ndid=firstLayerNode+ndoff;
241       ar=newArc(dg->node[ndid],nd);     
242       AttachArc(dg,ar);
243       ndoff+=mask;
244       ndid=firstLayerNode+ndoff;
245       ar=newArc(dg->node[ndid],nd);     
246       AttachArc(dg,ar);
247     }
248     firstLayerNode+=numSources;
249   }
250   mask=0x00000001<<numOfLayers;
251   for(i=0;i<numSources;i++){
252     sprintf(nm,"Sink.%d",i);
253     nd=newNode(nm);
254     AttachNode(dg,nd);
255     ndoff=i&(~mask);
256     ndid=firstLayerNode+ndoff;
257     ar=newArc(dg->node[ndid],nd);     
258     AttachArc(dg,ar);
259     ndoff+=mask;
260     ndid=firstLayerNode+ndoff;
261     ar=newArc(dg->node[ndid],nd);     
262     AttachArc(dg,ar);
263   }
264 return dg;
265 }
266 DGraph *buildWH(char cls){
267 /*
268   Nodes of the graph must be topologically sorted
269   to avoid MPI deadlock.
270 */
271   int i=0,j=0;
272   int numSources=NUM_SOURCES,maxInDeg=4;
273   int numLayerNodes=numSources,firstLayerNode=0;
274   int totComparators=0;
275   int numPrevLayerNodes=numLayerNodes;
276   int id=0,sid=0;
277   DGraph *dg;
278   DGNode *nd=NULL,*source=NULL,*tmp=NULL,*snd=NULL;
279   DGArc *ar=NULL;
280   char nm[BLOCK_SIZE];
281
282   sprintf(nm,"DT_WH.%c",cls);
283   dg=newDGraph(nm);
284
285   for(i=0;i<numSources;i++){
286     sprintf(nm,"Sink.%d",i);
287     nd=newNode(nm);
288     AttachNode(dg,nd);
289   }
290   totComparators=0;
291   numPrevLayerNodes=numLayerNodes;
292   while(numLayerNodes>maxInDeg){
293     numLayerNodes=numLayerNodes/maxInDeg;
294     if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
295     for(i=0;i<numLayerNodes;i++){
296       sprintf(nm,"Comparator.%d",totComparators);
297       totComparators++;
298       nd=newNode(nm);
299       id=AttachNode(dg,nd);
300       for(j=0;j<maxInDeg;j++){
301         sid=i*maxInDeg+j;
302         if(sid>=numPrevLayerNodes) break;
303         snd=dg->node[firstLayerNode+sid];
304         ar=newArc(dg->node[id],snd);
305         AttachArc(dg,ar);
306       }
307     }
308     firstLayerNode+=numPrevLayerNodes;
309     numPrevLayerNodes=numLayerNodes;
310   }
311   source=newNode("Source");
312   AttachNode(dg,source);   
313   for(i=0;i<numPrevLayerNodes;i++){
314     nd=dg->node[firstLayerNode+i];
315     ar=newArc(source,nd);
316     AttachArc(dg,ar);
317   }
318
319   for(i=0;i<dg->numNodes/2;i++){  /* Topological sorting */
320     tmp=dg->node[i];
321     dg->node[i]=dg->node[dg->numNodes-1-i];
322     dg->node[i]->id=i;
323     dg->node[dg->numNodes-1-i]=tmp;
324     dg->node[dg->numNodes-1-i]->id=dg->numNodes-1-i;
325   }
326 return dg;
327 }
328 DGraph *buildBH(char cls){
329 /*
330   Nodes of the graph must be topologically sorted
331   to avoid MPI deadlock.
332 */
333   int i=0,j=0;
334   int numSources=NUM_SOURCES,maxInDeg=4;
335   int numLayerNodes=numSources,firstLayerNode=0;
336   DGraph *dg;
337   DGNode *nd=NULL, *snd=NULL, *sink=NULL;
338   DGArc *ar=NULL;
339   int totComparators=0;
340   int numPrevLayerNodes=numLayerNodes;
341   int id=0, sid=0;
342   char nm[BLOCK_SIZE];
343
344   sprintf(nm,"DT_BH.%c",cls);
345   dg=newDGraph(nm);
346
347   for(i=0;i<numSources;i++){
348     sprintf(nm,"Source.%d",i);
349     nd=newNode(nm);
350     AttachNode(dg,nd);
351   }
352   while(numLayerNodes>maxInDeg){
353     numLayerNodes=numLayerNodes/maxInDeg;
354     if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
355     for(i=0;i<numLayerNodes;i++){
356       sprintf(nm,"Comparator.%d",totComparators);
357       totComparators++;
358       nd=newNode(nm);
359       id=AttachNode(dg,nd);
360       for(j=0;j<maxInDeg;j++){
361         sid=i*maxInDeg+j;
362         if(sid>=numPrevLayerNodes) break;
363         snd=dg->node[firstLayerNode+sid];
364         ar=newArc(snd,dg->node[id]);
365         AttachArc(dg,ar);
366       }
367     }
368     firstLayerNode+=numPrevLayerNodes;
369     numPrevLayerNodes=numLayerNodes;
370   }
371   sink=newNode("Sink");
372   AttachNode(dg,sink);   
373   for(i=0;i<numPrevLayerNodes;i++){
374     nd=dg->node[firstLayerNode+i];
375     ar=newArc(nd,sink);
376     AttachArc(dg,ar);
377   }
378 return dg;
379 }
380
381 typedef struct{
382   int len;
383   double* val;
384 } Arr;
385 Arr *newArr(int len){
386   Arr *arr=(Arr *)SMPI_SHARED_MALLOC(sizeof(Arr));
387   arr->len=len;
388   arr->val=(double *)SMPI_SHARED_MALLOC(len*sizeof(double));
389   return arr;
390 }
391 void arrShow(Arr* a){
392   if(!a) fprintf(stderr,"-- NULL array\n");
393   else{
394     fprintf(stderr,"-- length=%d\n",a->len);
395   }
396 }
397 double CheckVal(Arr *feat){
398   double csum=0.0;
399   int i=0;
400   for(i=0;i<feat->len;i++){
401     csum+=feat->val[i]*feat->val[i]/feat->len; /* The truncation does not work since 
402                                                   result will be 0 for large len  */
403   }
404    return csum;
405 }
406 int GetFNumDPar(int* mean, int* stdev){
407   *mean=NUM_SAMPLES;
408   *stdev=STD_DEVIATION;
409   return 0;
410 }
411 int GetFeatureNum(char *mbname,int id){
412   double tran=314159265.0;
413   double A=2*id+1;
414   double denom=randlc(&tran,&A);
415   char cval='S';
416   int mean=NUM_SAMPLES,stdev=128;
417   int rtfs=0,len=0;
418   GetFNumDPar(&mean,&stdev);
419   rtfs=ipowMod((int)(1/denom)*(int)cval,(long long int) (2*id+1),2*stdev);
420   if(rtfs<0) rtfs=-rtfs;
421   len=mean-stdev+rtfs;
422   return len;
423 }
424 Arr* RandomFeatures(char *bmname,int fdim,int id){
425   int len=GetFeatureNum(bmname,id)*fdim;
426   Arr* feat=newArr(len);
427   int nxg=2,nyg=2,nzg=2,nfg=5;
428   int nx=421,ny=419,nz=1427,nf=3527;
429   long long int expon=(len*(id+1))%3141592;
430   int seedx=ipowMod(nxg,expon,nx),
431       seedy=ipowMod(nyg,expon,ny),
432       seedz=ipowMod(nzg,expon,nz),
433       seedf=ipowMod(nfg,expon,nf);
434   int i=0;
435   if(timer_on){
436     timer_clear(id+1);
437     timer_start(id+1);
438   }
439   for(i=0;i<len;i+=fdim){
440     seedx=(seedx*nxg)%nx;
441     seedy=(seedy*nyg)%ny;
442     seedz=(seedz*nzg)%nz;
443     seedf=(seedf*nfg)%nf;
444     feat->val[i]=seedx;
445     feat->val[i+1]=seedy;
446     feat->val[i+2]=seedz;
447     feat->val[i+3]=seedf;
448   }
449   if(timer_on){
450     timer_stop(id+1);
451     fprintf(stderr,"** RandomFeatures time in node %d = %f\n",id,timer_read(id+1));
452   }
453   return feat;   
454 }
455 void Resample(Arr *a,int blen){
456     long long int i=0,j=0,jlo=0,jhi=0;
457     double avval=0.0;
458     double *nval=(double *)SMPI_SHARED_MALLOC(blen*sizeof(double));
459     Arr *tmp=newArr(10);
460     for(i=0;i<blen;i++) nval[i]=0.0;
461     for(i=1;i<a->len-1;i++){
462       jlo=(int)(0.5*(2*i-1)*(blen/a->len)); 
463       jhi=(int)(0.5*(2*i+1)*(blen/a->len));
464
465       avval=a->val[i]/(jhi-jlo+1);    
466       for(j=jlo;j<=jhi;j++){
467         nval[j]+=avval;
468       }
469     }
470     nval[0]=a->val[0];
471     nval[blen-1]=a->val[a->len-1];
472     SMPI_SHARED_FREE(a->val);
473     a->val=nval;
474     a->len=blen;
475 }
476 #define fielddim 4
477 Arr* WindowFilter(Arr *a, Arr* b,int w){
478   int i=0,j=0,k=0;
479   double rms0=0.0,rms1=0.0,rmsm1=0.0;
480   double weight=((double) (w+1))/(w+2);
481  
482   w+=1;
483   if(timer_on){
484     timer_clear(w);
485     timer_start(w);
486   }
487   if(a->len<b->len) Resample(a,b->len);
488   if(a->len>b->len) Resample(b,a->len);
489   for(i=fielddim;i<a->len-fielddim;i+=fielddim){
490     rms0=(a->val[i]-b->val[i])*(a->val[i]-b->val[i])
491         +(a->val[i+1]-b->val[i+1])*(a->val[i+1]-b->val[i+1])
492         +(a->val[i+2]-b->val[i+2])*(a->val[i+2]-b->val[i+2])
493         +(a->val[i+3]-b->val[i+3])*(a->val[i+3]-b->val[i+3]);
494     j=i+fielddim;
495     rms1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
496         +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
497         +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
498         +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
499     j=i-fielddim;
500     rmsm1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
501          +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
502          +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
503          +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
504     k=0;
505     if(rms1<rms0){
506       k=1;
507       rms0=rms1;
508     }
509     if(rmsm1<rms0) k=-1;
510     if(k==0){
511       j=i+fielddim;
512       a->val[i]=weight*b->val[i];
513       a->val[i+1]=weight*b->val[i+1];
514       a->val[i+2]=weight*b->val[i+2];
515       a->val[i+3]=weight*b->val[i+3];  
516     }else if(k==1){
517       j=i+fielddim;
518       a->val[i]=weight*b->val[j];
519       a->val[i+1]=weight*b->val[j+1];
520       a->val[i+2]=weight*b->val[j+2];
521       a->val[i+3]=weight*b->val[j+3];  
522     }else { /*if(k==-1)*/
523       j=i-fielddim;
524       a->val[i]=weight*b->val[j];
525       a->val[i+1]=weight*b->val[j+1];
526       a->val[i+2]=weight*b->val[j+2];
527       a->val[i+3]=weight*b->val[j+3];  
528     }      
529   }
530   if(timer_on){
531     timer_stop(w);
532     fprintf(stderr,"** WindowFilter time in node %d = %f\n",(w-1),timer_read(w));
533   }
534   return a;
535 }
536
537 int SendResults(DGraph *dg,DGNode *nd,Arr *feat){
538   int i=0,tag=0;
539   DGArc *ar=NULL;
540   DGNode *head=NULL;
541   if(!feat) return 0;
542   for(i=0;i<nd->outDegree;i++){
543     ar=nd->outArc[i];
544     if(ar->tail!=nd) continue;
545     head=ar->head;
546     tag=ar->id;
547     if(head->address!=nd->address){
548       MPI_Send(&feat->len,1,MPI_INT,head->address,tag,MPI_COMM_WORLD);
549       MPI_Send(feat->val,feat->len,MPI_DOUBLE,head->address,tag,MPI_COMM_WORLD);
550     }
551   }
552   return 1;
553 }
554 Arr* CombineStreams(DGraph *dg,DGNode *nd){
555   Arr *resfeat=newArr(NUM_SAMPLES*fielddim);
556   int i=0,len=0,tag=0;
557   DGArc *ar=NULL;
558   DGNode *tail=NULL;
559   MPI_Status status;
560   Arr *feat=NULL,*featp=NULL;
561
562   if(nd->inDegree==0) return NULL;
563   for(i=0;i<nd->inDegree;i++){
564     ar=nd->inArc[i];
565     if(ar->head!=nd) continue;
566     tail=ar->tail;
567     if(tail->address!=nd->address){
568       len=0;
569       tag=ar->id;
570       MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
571       feat=newArr(len);
572       MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
573       resfeat=WindowFilter(resfeat,feat,nd->id);
574       SMPI_SHARED_FREE(feat);
575     }else{
576       featp=(Arr *)tail->feat;
577       feat=newArr(featp->len);
578       memcpy(feat->val,featp->val,featp->len*sizeof(double));
579       resfeat=WindowFilter(resfeat,feat,nd->id);  
580       SMPI_SHARED_FREE(feat);
581     }
582   }
583   for(i=0;i<resfeat->len;i++) resfeat->val[i]=((int)resfeat->val[i])/nd->inDegree;
584   nd->feat=resfeat;
585   return nd->feat;
586 }
587 double Reduce(Arr *a,int w){
588   double retv=0.0;
589   if(timer_on){
590     timer_clear(w);
591     timer_start(w);
592   }
593   retv=(int)(w*CheckVal(a));/* The casting needed for node  
594                                and array dependent verifcation */
595   if(timer_on){
596     timer_stop(w);
597     fprintf(stderr,"** Reduce time in node %d = %f\n",(w-1),timer_read(w));
598   }
599   return retv;
600 }
601
602 double ReduceStreams(DGraph *dg,DGNode *nd){
603   double csum=0.0;
604   int i=0,len=0,tag=0;
605   DGArc *ar=NULL;
606   DGNode *tail=NULL;
607   Arr *feat=NULL;
608   double retv=0.0;
609
610   for(i=0;i<nd->inDegree;i++){
611     ar=nd->inArc[i];
612     if(ar->head!=nd) continue;
613     tail=ar->tail;
614     if(tail->address!=nd->address){
615       MPI_Status status;
616       len=0;
617       tag=ar->id;
618       MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
619       feat=newArr(len);
620       MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
621       csum+=Reduce(feat,(nd->id+1));  
622       SMPI_SHARED_FREE(feat);
623     }else{
624       csum+=Reduce(tail->feat,(nd->id+1));  
625     }
626   }
627   if(nd->inDegree>0)csum=(((long long int)csum)/nd->inDegree);
628   retv=(nd->id+1)*csum;
629   return retv;
630 }
631
632 int ProcessNodes(DGraph *dg,int me){
633   double chksum=0.0;
634   Arr *feat=NULL;
635   int i=0,verified=0,tag;
636   DGNode *nd=NULL;
637   double rchksum=0.0;
638   MPI_Status status;
639
640   for(i=0;i<dg->numNodes;i++){
641     nd=dg->node[i];
642     if(nd->address!=me) continue;
643     if(strstr(nd->name,"Source")){
644       nd->feat=RandomFeatures(dg->name,fielddim,nd->id); 
645       SendResults(dg,nd,nd->feat);
646     }else if(strstr(nd->name,"Sink")){
647       chksum=ReduceStreams(dg,nd);
648       tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
649       MPI_Send(&chksum,1,MPI_DOUBLE,0,tag,MPI_COMM_WORLD);
650     }else{
651       feat=CombineStreams(dg,nd);
652       SendResults(dg,nd,feat);
653     }
654   }
655   if(me==0){ /* Report node */
656     rchksum=0.0;
657     chksum=0.0;
658     for(i=0;i<dg->numNodes;i++){
659       nd=dg->node[i];
660       if(!strstr(nd->name,"Sink")) continue;
661        tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
662       MPI_Recv(&rchksum,1,MPI_DOUBLE,nd->address,tag,MPI_COMM_WORLD,&status);
663       chksum+=rchksum;
664     }
665     verified=verify(dg->name,chksum);
666   }
667 return verified;
668 }
669
670 int main(int argc,char **argv ){
671   int my_rank,comm_size;
672   int i;
673   DGraph *dg=NULL;
674   int verified=0, featnum=0;
675   double bytes_sent=2.0,tot_time=0.0;
676
677
678
679     MPI_Init( &argc, &argv );
680     MPI_Comm_rank( MPI_COMM_WORLD, &my_rank );
681     MPI_Comm_size( MPI_COMM_WORLD, &comm_size );
682     TRACE_smpi_set_category ("start");
683
684      if(argc!=2||
685                 (  strncmp(argv[1],"BH",2)!=0
686                  &&strncmp(argv[1],"WH",2)!=0
687                  &&strncmp(argv[1],"SH",2)!=0
688                 )
689       ){
690       if(my_rank==0){
691         fprintf(stderr,"** Usage: mpirun -np N ../bin/dt.S GraphName\n");
692         fprintf(stderr,"** Where \n   - N is integer number of MPI processes\n");
693         fprintf(stderr,"   - S is the class S, W, or A \n");
694         fprintf(stderr,"   - GraphName is the communication graph name BH, WH, or SH.\n");
695         fprintf(stderr,"   - the number of MPI processes N should not be be less than \n");
696         fprintf(stderr,"     the number of nodes in the graph\n");
697       }
698       MPI_Finalize();
699       exit(0);
700     } 
701    if(strncmp(argv[1],"BH",2)==0){
702       dg=buildBH(CLASS);
703     }else if(strncmp(argv[1],"WH",2)==0){
704       dg=buildWH(CLASS);
705     }else if(strncmp(argv[1],"SH",2)==0){
706       dg=buildSH(CLASS);
707     }
708
709     if(timer_on&&dg->numNodes+1>timers_tot){
710       timer_on=0;
711       if(my_rank==0)
712         fprintf(stderr,"Not enough timers. Node timeing is off. \n");
713     }
714     if(dg->numNodes>comm_size){
715       if(my_rank==0){
716         fprintf(stderr,"**  The number of MPI processes should not be less than \n");
717         fprintf(stderr,"**  the number of nodes in the graph\n");
718         fprintf(stderr,"**  Number of MPI processes = %d\n",comm_size);
719         fprintf(stderr,"**  Number nodes in the graph = %d\n",dg->numNodes);
720       }
721       MPI_Finalize();
722       exit(0);
723     }
724     for(i=0;i<dg->numNodes;i++){ 
725       dg->node[i]->address=i;
726     }
727     if( my_rank == 0 ){
728       printf( "\n\n NAS Parallel Benchmarks 3.3 -- DT Benchmark\n\n" );
729       graphShow(dg,0);
730       timer_clear(0);
731       timer_start(0);
732     }
733
734     TRACE_smpi_set_category ("dt");
735     verified=ProcessNodes(dg,my_rank);
736     TRACE_smpi_set_category ("finalize");
737  
738     featnum=NUM_SAMPLES*fielddim;
739     bytes_sent=featnum*dg->numArcs;
740     bytes_sent/=1048576;
741     if(my_rank==0){
742       timer_stop(0);
743       tot_time=timer_read(0);
744       c_print_results( dg->name,
745                        CLASS,
746                        featnum,
747                        0,
748                        0,
749                        dg->numNodes,
750                        0,
751                        comm_size,
752                        tot_time,
753                        bytes_sent/tot_time,
754                        "bytes transmitted", 
755                        verified,
756                        NPBVERSION,
757                        COMPILETIME,
758                        MPICC,
759                        CLINK,
760                        CMPI_LIB,
761                        CMPI_INC,
762                        CFLAGS,
763                        CLINKFLAGS );
764     }          
765     MPI_Finalize();
766   return 1;
767 }