Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'coverity_scan' of github.com:mquinson/simgrid
[simgrid.git] / examples / smpi / NAS / DT / dt.c
1 /*************************************************************************
2  *                                                                       * 
3  *        N  A  S     P A R A L L E L     B E N C H M A R K S  3.3       *
4  *                                                                       * 
5  *                                  D T           * 
6  *                                                                       * 
7  ************************************************************************* 
8  *                                                                       * 
9  *   This benchmark is part of the NAS Parallel Benchmark 3.3 suite.     *
10  *                                                                       * 
11  *   Permission to use, copy, distribute and modify this software        * 
12  *   for any purpose with or without fee is hereby granted.  We          * 
13  *   request, however, that all derived work reference the NAS           * 
14  *   Parallel Benchmarks 3.3. This software is provided "as is"          *
15  *   without express or implied warranty.                                * 
16  *                                                                       * 
17  *   Information on NPB 3.3, including the technical report, the         *
18  *   original specifications, source code, results and information       * 
19  *   on how to submit new results, is available at:                      * 
20  *                                                                       * 
21  *          http:  www.nas.nasa.gov/Software/NPB                         * 
22  *                                                                       * 
23  *   Send comments or suggestions to  npb@nas.nasa.gov                   * 
24  *   Send bug reports to              npb-bugs@nas.nasa.gov              * 
25  *                                                                       * 
26  *         NAS Parallel Benchmarks Group                                 * 
27  *         NASA Ames Research Center                                     * 
28  *         Mail Stop: T27A-1                                             * 
29  *         Moffett Field, CA   94035-1000                                * 
30  *                                                                       * 
31  *         E-mail:  npb@nas.nasa.gov                                     * 
32  *         Fax:     (650) 604-3957                                       * 
33  *                                                                       * 
34  ************************************************************************* 
35  *                                                                       * 
36  *   Author: M. Frumkin               *             * 
37  *                                                                       * 
38  *************************************************************************/
39
40 #include <stdlib.h>
41 #include <stdio.h>
42 #include <string.h>
43
44 #include "mpi.h"
45 #include "npbparams.h"
46
47 #include "simgrid/instr.h" //TRACE_
48
49 #ifndef CLASS
50 #define CLASS 'S'
51 #define NUM_PROCS            1                 
52 #endif
53
54 //int      passed_verification;
55 extern double randlc( double *X, double *A );
56 extern
57 void c_print_results( char   *name,
58                       char   class,
59                       int    n1, 
60                       int    n2,
61                       int    n3,
62                       int    niter,
63                       int    nprocs_compiled,
64                       int    nprocs_total,
65                       double t,
66                       double mops,
67           char   *optype,
68                       int    passed_verification,
69                       char   *npbversion,
70                       char   *compiletime,
71                       char   *mpicc,
72                       char   *clink,
73                       char   *cmpi_lib,
74                       char   *cmpi_inc,
75                       char   *cflags,
76                       char   *clinkflags );
77           
78 void    timer_clear( int n );
79 void    timer_start( int n );
80 void    timer_stop( int n );
81 double  timer_read( int n );
82 int timer_on=0,timers_tot=64;
83
84 int verify(char *bmname,double rnm2){
85     double verify_value=0.0;
86     double epsilon=1.0E-8;
87     char cls=CLASS;
88     int verified=-1;
89     if (cls != 'U') {
90        if(cls=='S') {
91          if(strstr(bmname,"BH")){
92            verify_value=30892725.0;
93          }else if(strstr(bmname,"WH")){
94            verify_value=67349758.0;
95          }else if(strstr(bmname,"SH")){
96            verify_value=58875767.0;
97          }else{
98            fprintf(stderr,"No such benchmark as %s.\n",bmname);
99          }
100          verified = 0;
101        }else if(cls=='W') {
102          if(strstr(bmname,"BH")){
103        verify_value = 4102461.0;
104          }else if(strstr(bmname,"WH")){
105        verify_value = 204280762.0;
106          }else if(strstr(bmname,"SH")){
107        verify_value = 186944764.0;
108          }else{
109            fprintf(stderr,"No such benchmark as %s.\n",bmname);
110          }
111          verified = 0;
112        }else if(cls=='A') {
113          if(strstr(bmname,"BH")){
114        verify_value = 17809491.0;
115          }else if(strstr(bmname,"WH")){
116        verify_value = 1289925229.0;
117          }else if(strstr(bmname,"SH")){
118        verify_value = 610856482.0;
119          }else{
120            fprintf(stderr,"No such benchmark as %s.\n",bmname);
121          }
122      verified = 0;
123        }else if(cls=='B') {
124          if(strstr(bmname,"BH")){
125        verify_value = 4317114.0;
126          }else if(strstr(bmname,"WH")){
127        verify_value = 7877279917.0;
128          }else if(strstr(bmname,"SH")){
129        verify_value = 1836863082.0;
130          }else{
131            fprintf(stderr,"No such benchmark as %s.\n",bmname);
132        verified = 0;
133          }
134        }else if(cls=='C') {
135          if(strstr(bmname,"BH")){
136        verify_value = 0.0;
137          }else if(strstr(bmname,"WH")){
138        verify_value = 0.0;
139          }else if(strstr(bmname,"SH")){
140        verify_value = 0.0;
141          }else{
142            fprintf(stderr,"No such benchmark as %s.\n",bmname);
143        verified = -1;
144          }
145        }else if(cls=='D') {
146          if(strstr(bmname,"BH")){
147        verify_value = 0.0;
148          }else if(strstr(bmname,"WH")){
149        verify_value = 0.0;
150          }else if(strstr(bmname,"SH")){
151        verify_value = 0.0;
152          }else{
153            fprintf(stderr,"No such benchmark as %s.\n",bmname);
154          }
155          verified = -1;
156        }else{
157          fprintf(stderr,"No such class as %c.\n",cls);
158        }
159        fprintf(stderr," %s L2 Norm = %f\n",bmname,rnm2);
160        if(verified==-1){
161      fprintf(stderr," No verification was performed.\n");
162        }else if( rnm2 - verify_value < epsilon &&
163                  rnm2 - verify_value > -epsilon) {  /* abs here does not work on ALTIX */
164       verified = 1;
165       fprintf(stderr," Deviation = %f\n",(rnm2 - verify_value));
166        }else{
167      verified = 0;
168      fprintf(stderr," The correct verification value = %f\n",verify_value);
169      fprintf(stderr," Got value = %f\n",rnm2);
170        }
171     }else{
172        verified = -1;
173     }
174     return  verified;  
175   }
176
177 int ipowMod(int a,long long int n,int md){ 
178   int seed=1,q=a,r=1;
179   if(n<0){
180     fprintf(stderr,"ipowMod: exponent must be nonnegative exp=%lld\n",n);
181     n=-n; /* temp fix */
182 /*    return 1; */
183   }
184   if(md<=0){
185     fprintf(stderr,"ipowMod: module must be positive mod=%d",md);
186     return 1;
187   }
188   if(n==0) return 1;
189   while(n>1){
190     int n2 = n/2;
191     if (n2*2==n){
192        seed = (q*q)%md;
193        q=seed;
194        n = n2;
195     }else{
196        seed = (r*q)%md;
197        r=seed;
198        n = n-1;
199     }
200   }
201   seed = (r*q)%md;
202   return seed;
203 }
204
205 #include "DGraph.h"
206 DGraph *buildSH(char cls){
207 /*
208   Nodes of the graph must be topologically sorted
209   to avoid MPI deadlock.
210 */
211   DGraph *dg;
212   int numSources=NUM_SOURCES; /* must be power of 2 */
213   int numOfLayers=0,tmpS=numSources>>1;
214   int firstLayerNode=0;
215   DGArc *ar=NULL;
216   DGNode *nd=NULL;
217   int mask=0x0,ndid=0,ndoff=0;
218   int i=0,j=0;
219   char nm[BLOCK_SIZE];
220   
221   sprintf(nm,"DT_SH.%c",cls);
222   dg=newDGraph(nm);
223
224   while(tmpS>1){
225     numOfLayers++;
226     tmpS>>=1;
227   }
228   for(i=0;i<numSources;i++){
229     sprintf(nm,"Source.%d",i);
230     nd=newNode(nm);
231     AttachNode(dg,nd);
232   }
233   for(j=0;j<numOfLayers;j++){
234     mask=0x00000001<<j;
235     for(i=0;i<numSources;i++){
236       sprintf(nm,"Comparator.%d",(i+j*firstLayerNode));
237       nd=newNode(nm);
238       AttachNode(dg,nd);
239       ndoff=i&(~mask);
240       ndid=firstLayerNode+ndoff;
241       ar=newArc(dg->node[ndid],nd);     
242       AttachArc(dg,ar);
243       ndoff+=mask;
244       ndid=firstLayerNode+ndoff;
245       ar=newArc(dg->node[ndid],nd);     
246       AttachArc(dg,ar);
247     }
248     firstLayerNode+=numSources;
249   }
250   mask=0x00000001<<numOfLayers;
251   for(i=0;i<numSources;i++){
252     sprintf(nm,"Sink.%d",i);
253     nd=newNode(nm);
254     AttachNode(dg,nd);
255     ndoff=i&(~mask);
256     ndid=firstLayerNode+ndoff;
257     ar=newArc(dg->node[ndid],nd);     
258     AttachArc(dg,ar);
259     ndoff+=mask;
260     ndid=firstLayerNode+ndoff;
261     ar=newArc(dg->node[ndid],nd);     
262     AttachArc(dg,ar);
263   }
264 return dg;
265 }
266 DGraph *buildWH(char cls){
267 /*
268   Nodes of the graph must be topologically sorted
269   to avoid MPI deadlock.
270 */
271   int i=0,j=0;
272   int numSources=NUM_SOURCES,maxInDeg=4;
273   int numLayerNodes=numSources,firstLayerNode=0;
274   int totComparators=0;
275   int numPrevLayerNodes=numLayerNodes;
276   int id=0,sid=0;
277   DGraph *dg;
278   DGNode *nd=NULL,*source=NULL,*tmp=NULL,*snd=NULL;
279   DGArc *ar=NULL;
280   char nm[BLOCK_SIZE];
281
282   sprintf(nm,"DT_WH.%c",cls);
283   dg=newDGraph(nm);
284
285   for(i=0;i<numSources;i++){
286     sprintf(nm,"Sink.%d",i);
287     nd=newNode(nm);
288     AttachNode(dg,nd);
289   }
290   totComparators=0;
291   numPrevLayerNodes=numLayerNodes;
292   while(numLayerNodes>maxInDeg){
293     numLayerNodes=numLayerNodes/maxInDeg;
294     if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
295     for(i=0;i<numLayerNodes;i++){
296       sprintf(nm,"Comparator.%d",totComparators);
297       totComparators++;
298       nd=newNode(nm);
299       id=AttachNode(dg,nd);
300       for(j=0;j<maxInDeg;j++){
301         sid=i*maxInDeg+j;
302   if(sid>=numPrevLayerNodes) break;
303         snd=dg->node[firstLayerNode+sid];
304         ar=newArc(dg->node[id],snd);
305         AttachArc(dg,ar);
306       }
307     }
308     firstLayerNode+=numPrevLayerNodes;
309     numPrevLayerNodes=numLayerNodes;
310   }
311   source=newNode("Source");
312   AttachNode(dg,source);   
313   for(i=0;i<numPrevLayerNodes;i++){
314     nd=dg->node[firstLayerNode+i];
315     ar=newArc(source,nd);
316     AttachArc(dg,ar);
317   }
318
319   for(i=0;i<dg->numNodes/2;i++){  /* Topological sorting */
320     tmp=dg->node[i];
321     dg->node[i]=dg->node[dg->numNodes-1-i];
322     dg->node[i]->id=i;
323     dg->node[dg->numNodes-1-i]=tmp;
324     dg->node[dg->numNodes-1-i]->id=dg->numNodes-1-i;
325   }
326 return dg;
327 }
328 DGraph *buildBH(char cls){
329 /*
330   Nodes of the graph must be topologically sorted
331   to avoid MPI deadlock.
332 */
333   int i=0,j=0;
334   int numSources=NUM_SOURCES,maxInDeg=4;
335   int numLayerNodes=numSources,firstLayerNode=0;
336   DGraph *dg;
337   DGNode *nd=NULL, *snd=NULL, *sink=NULL;
338   DGArc *ar=NULL;
339   int totComparators=0;
340   int numPrevLayerNodes=numLayerNodes;
341   int id=0, sid=0;
342   char nm[BLOCK_SIZE];
343
344   sprintf(nm,"DT_BH.%c",cls);
345   dg=newDGraph(nm);
346
347   for(i=0;i<numSources;i++){
348     sprintf(nm,"Source.%d",i);
349     nd=newNode(nm);
350     AttachNode(dg,nd);
351   }
352   while(numLayerNodes>maxInDeg){
353     numLayerNodes=numLayerNodes/maxInDeg;
354     if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
355     for(i=0;i<numLayerNodes;i++){
356       sprintf(nm,"Comparator.%d",totComparators);
357       totComparators++;
358       nd=newNode(nm);
359       id=AttachNode(dg,nd);
360       for(j=0;j<maxInDeg;j++){
361         sid=i*maxInDeg+j;
362   if(sid>=numPrevLayerNodes) break;
363         snd=dg->node[firstLayerNode+sid];
364         ar=newArc(snd,dg->node[id]);
365         AttachArc(dg,ar);
366       }
367     }
368     firstLayerNode+=numPrevLayerNodes;
369     numPrevLayerNodes=numLayerNodes;
370   }
371   sink=newNode("Sink");
372   AttachNode(dg,sink);   
373   for(i=0;i<numPrevLayerNodes;i++){
374     nd=dg->node[firstLayerNode+i];
375     ar=newArc(nd,sink);
376     AttachArc(dg,ar);
377   }
378 return dg;
379 }
380
381 typedef struct{
382   int len;
383   double* val;
384 } Arr;
385 Arr *newArr(int len){
386   Arr *arr=(Arr *)malloc(sizeof(Arr));
387   arr->len=len;
388   arr->val=(double *)malloc(len*sizeof(double));
389   return arr;
390 }
391 void arrShow(Arr* a){
392   if(!a) fprintf(stderr,"-- NULL array\n");
393   else{
394     fprintf(stderr,"-- length=%d\n",a->len);
395   }
396 }
397 double CheckVal(Arr *feat){
398   double csum=0.0;
399   int i=0;
400   for(i=0;i<feat->len;i++){
401     csum+=feat->val[i]*feat->val[i]/feat->len; /* The truncation does not work since 
402                                                   result will be 0 for large len  */
403   }
404    return csum;
405 }
406 int GetFNumDPar(int* mean, int* stdev){
407   *mean=NUM_SAMPLES;
408   *stdev=STD_DEVIATION;
409   return 0;
410 }
411 int GetFeatureNum(char *mbname,int id){
412   double tran=314159265.0;
413   double A=2*id+1;
414   double denom=randlc(&tran,&A);
415   char cval='S';
416   int mean=NUM_SAMPLES,stdev=128;
417   int rtfs=0,len=0;
418   GetFNumDPar(&mean,&stdev);
419   rtfs=ipowMod((int)(1/denom)*(int)cval,(long long int) (2*id+1),2*stdev);
420   if(rtfs<0) rtfs=-rtfs;
421   len=mean-stdev+rtfs;
422   return len;
423 }
424 Arr* RandomFeatures(char *bmname,int fdim,int id){
425   int len=GetFeatureNum(bmname,id)*fdim;
426   Arr* feat=newArr(len);
427   int nxg=2,nyg=2,nzg=2,nfg=5;
428   int nx=421,ny=419,nz=1427,nf=3527;
429   long long int expon=(len*(id+1))%3141592;
430   int seedx=ipowMod(nxg,expon,nx),
431       seedy=ipowMod(nyg,expon,ny),
432       seedz=ipowMod(nzg,expon,nz),
433       seedf=ipowMod(nfg,expon,nf);
434   int i=0;
435   if(timer_on){
436     timer_clear(id+1);
437     timer_start(id+1);
438   }
439   for(i=0;i<len;i+=fdim){
440     seedx=(seedx*nxg)%nx;
441     seedy=(seedy*nyg)%ny;
442     seedz=(seedz*nzg)%nz;
443     seedf=(seedf*nfg)%nf;
444     feat->val[i]=seedx;
445     feat->val[i+1]=seedy;
446     feat->val[i+2]=seedz;
447     feat->val[i+3]=seedf;
448   }
449   if(timer_on){
450     timer_stop(id+1);
451     fprintf(stderr,"** RandomFeatures time in node %d = %f\n",id,timer_read(id+1));
452   }
453   return feat;   
454 }
455 void Resample(Arr *a,int blen){
456     long long int i=0,j=0,jlo=0,jhi=0;
457     double avval=0.0;
458     double *nval=(double *)malloc(blen*sizeof(double));
459     Arr *tmp=newArr(10);
460     for(i=0;i<blen;i++) nval[i]=0.0;
461     for(i=1;i<a->len-1;i++){
462       jlo=(int)(0.5*(2*i-1)*(blen/a->len)); 
463       jhi=(int)(0.5*(2*i+1)*(blen/a->len));
464
465       avval=a->val[i]/(jhi-jlo+1);    
466       for(j=jlo;j<=jhi;j++){
467         nval[j]+=avval;
468       }
469     }
470     nval[0]=a->val[0];
471     nval[blen-1]=a->val[a->len-1];
472     free(a->val);
473     a->val=nval;
474     a->len=blen;
475 }
476 #define fielddim 4
477 Arr* WindowFilter(Arr *a, Arr* b,int w){
478   int i=0,j=0,k=0;
479   double rms0=0.0,rms1=0.0,rmsm1=0.0;
480   double weight=((double) (w+1))/(w+2);
481  
482   w+=1;
483   if(timer_on){
484     timer_clear(w);
485     timer_start(w);
486   }
487   if(a->len<b->len) Resample(a,b->len);
488   if(a->len>b->len) Resample(b,a->len);
489   for(i=fielddim;i<a->len-fielddim;i+=fielddim){
490     rms0=(a->val[i]-b->val[i])*(a->val[i]-b->val[i])
491   +(a->val[i+1]-b->val[i+1])*(a->val[i+1]-b->val[i+1])
492   +(a->val[i+2]-b->val[i+2])*(a->val[i+2]-b->val[i+2])
493   +(a->val[i+3]-b->val[i+3])*(a->val[i+3]-b->val[i+3]);
494     j=i+fielddim;
495     rms1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
496       +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
497       +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
498       +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
499     j=i-fielddim;
500     rmsm1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
501    +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
502    +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
503    +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
504     k=0;
505     if(rms1<rms0){
506       k=1;
507       rms0=rms1;
508     }
509     if(rmsm1<rms0) k=-1;
510     if(k==0){
511       j=i+fielddim;
512       a->val[i]=weight*b->val[i];
513       a->val[i+1]=weight*b->val[i+1];
514       a->val[i+2]=weight*b->val[i+2];
515       a->val[i+3]=weight*b->val[i+3];  
516     }else if(k==1){
517       j=i+fielddim;
518       a->val[i]=weight*b->val[j];
519       a->val[i+1]=weight*b->val[j+1];
520       a->val[i+2]=weight*b->val[j+2];
521       a->val[i+3]=weight*b->val[j+3];  
522     }else { /*if(k==-1)*/
523       j=i-fielddim;
524       a->val[i]=weight*b->val[j];
525       a->val[i+1]=weight*b->val[j+1];
526       a->val[i+2]=weight*b->val[j+2];
527       a->val[i+3]=weight*b->val[j+3];  
528     }     
529   }
530   if(timer_on){
531     timer_stop(w);
532     fprintf(stderr,"** WindowFilter time in node %d = %f\n",(w-1),timer_read(w));
533   }
534   return a;
535 }
536
537 int SendResults(DGraph *dg,DGNode *nd,Arr *feat){
538   int i=0,tag=0;
539   DGArc *ar=NULL;
540   DGNode *head=NULL;
541   if(!feat) return 0;
542   TRACE_smpi_set_category ("SendResults");
543   for(i=0;i<nd->outDegree;i++){
544     ar=nd->outArc[i];
545     if(ar->tail!=nd) continue;
546     head=ar->head;
547     tag=ar->id;
548     if(head->address!=nd->address){
549       MPI_Send(&feat->len,1,MPI_INT,head->address,tag,MPI_COMM_WORLD);
550       MPI_Send(feat->val,feat->len,MPI_DOUBLE,head->address,tag,MPI_COMM_WORLD);
551     }
552   }
553   TRACE_smpi_set_category (NULL);
554   return 1;
555 }
556 Arr* CombineStreams(DGraph *dg,DGNode *nd){
557   Arr *resfeat=newArr(NUM_SAMPLES*fielddim);
558   int i=0,len=0,tag=0;
559   DGArc *ar=NULL;
560   DGNode *tail=NULL;
561   MPI_Status status;
562   Arr *feat=NULL,*featp=NULL;
563
564   if(nd->inDegree==0) return NULL;
565   for(i=0;i<nd->inDegree;i++){
566     ar=nd->inArc[i];
567     if(ar->head!=nd) continue;
568     tail=ar->tail;
569     if(tail->address!=nd->address){
570       len=0;
571       tag=ar->id;
572       MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
573       feat=newArr(len);
574       MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
575       resfeat=WindowFilter(resfeat,feat,nd->id);
576       free(feat);
577     }else{
578       featp=(Arr *)tail->feat;
579       feat=newArr(featp->len);
580       memcpy(feat->val,featp->val,featp->len*sizeof(double));
581       resfeat=WindowFilter(resfeat,feat,nd->id);  
582       free(feat);
583     }
584   }
585   for(i=0;i<resfeat->len;i++) resfeat->val[i]=((int)resfeat->val[i])/nd->inDegree;
586   nd->feat=resfeat;
587   return nd->feat;
588 }
589 double Reduce(Arr *a,int w){
590   double retv=0.0;
591   if(timer_on){
592     timer_clear(w);
593     timer_start(w);
594   }
595   retv=(int)(w*CheckVal(a));/* The casting needed for node  
596                                and array dependent verifcation */
597   if(timer_on){
598     timer_stop(w);
599     fprintf(stderr,"** Reduce time in node %d = %f\n",(w-1),timer_read(w));
600   }
601   return retv;
602 }
603
604 double ReduceStreams(DGraph *dg,DGNode *nd){
605   double csum=0.0;
606   int i=0,len=0,tag=0;
607   DGArc *ar=NULL;
608   DGNode *tail=NULL;
609   Arr *feat=NULL;
610   double retv=0.0;
611
612   TRACE_smpi_set_category ("ReduceStreams");
613
614   for(i=0;i<nd->inDegree;i++){
615     ar=nd->inArc[i];
616     if(ar->head!=nd) continue;
617     tail=ar->tail;
618     if(tail->address!=nd->address){
619       MPI_Status status;
620       len=0;
621       tag=ar->id;
622       MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
623       feat=newArr(len);
624       MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
625       csum+=Reduce(feat,(nd->id+1));  
626       free(feat);
627     }else{
628       csum+=Reduce(tail->feat,(nd->id+1));  
629     }
630   }
631   if(nd->inDegree>0)csum=(((long long int)csum)/nd->inDegree);
632   retv=(nd->id+1)*csum;
633   return retv;
634 }
635
636 int ProcessNodes(DGraph *dg,int me){
637   double chksum=0.0;
638   Arr *feat=NULL;
639   int i=0,verified=0,tag;
640   DGNode *nd=NULL;
641   double rchksum=0.0;
642   MPI_Status status;
643
644   TRACE_smpi_set_category ("ProcessNodes");
645
646   for(i=0;i<dg->numNodes;i++){
647     nd=dg->node[i];
648     if(nd->address!=me) continue;
649     if(strstr(nd->name,"Source")){
650       nd->feat=RandomFeatures(dg->name,fielddim,nd->id); 
651       SendResults(dg,nd,nd->feat);
652     }else if(strstr(nd->name,"Sink")){
653       chksum=ReduceStreams(dg,nd);
654       tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
655       MPI_Send(&chksum,1,MPI_DOUBLE,0,tag,MPI_COMM_WORLD);
656     }else{
657       feat=CombineStreams(dg,nd);
658       SendResults(dg,nd,feat);
659     }
660   }
661
662   TRACE_smpi_set_category ("ProcessNodes");
663
664   if(me==0){ /* Report node */
665     rchksum=0.0;
666     chksum=0.0;
667     for(i=0;i<dg->numNodes;i++){
668       nd=dg->node[i];
669       if(!strstr(nd->name,"Sink")) continue;
670        tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
671       MPI_Recv(&rchksum,1,MPI_DOUBLE,nd->address,tag,MPI_COMM_WORLD,&status);
672       chksum+=rchksum;
673     }
674     verified=verify(dg->name,chksum);
675   }
676 return verified;
677 }
678
679 int main(int argc,char **argv ){
680   int my_rank,comm_size;
681   int i;
682   DGraph *dg=NULL;
683   int verified=0, featnum=0;
684   double bytes_sent=2.0,tot_time=0.0;
685
686     MPI_Init( &argc, &argv );
687     MPI_Comm_rank( MPI_COMM_WORLD, &my_rank );
688     MPI_Comm_size( MPI_COMM_WORLD, &comm_size );
689     TRACE_smpi_set_category ("begin");
690
691      if(argc!=2||
692                 (  strncmp(argv[1],"BH",2)!=0
693                  &&strncmp(argv[1],"WH",2)!=0
694                  &&strncmp(argv[1],"SH",2)!=0
695                 )
696       ){
697       if(my_rank==0){
698         fprintf(stderr,"** Usage: mpirun -np N ../bin/dt.S GraphName\n");
699         fprintf(stderr,"** Where \n   - N is integer number of MPI processes\n");
700         fprintf(stderr,"   - S is the class S, W, or A \n");
701         fprintf(stderr,"   - GraphName is the communication graph name BH, WH, or SH.\n");
702         fprintf(stderr,"   - the number of MPI processes N should not be be less than \n");
703         fprintf(stderr,"     the number of nodes in the graph\n");
704       }
705       MPI_Finalize();
706       exit(0);
707     } 
708    if(strncmp(argv[1],"BH",2)==0){
709       dg=buildBH(CLASS);
710     }else if(strncmp(argv[1],"WH",2)==0){
711       dg=buildWH(CLASS);
712     }else if(strncmp(argv[1],"SH",2)==0){
713       dg=buildSH(CLASS);
714     }
715
716     if(timer_on&&dg->numNodes+1>timers_tot){
717       timer_on=0;
718       if(my_rank==0)
719         fprintf(stderr,"Not enough timers. Node timeing is off. \n");
720     }
721     if(dg->numNodes>comm_size){
722       if(my_rank==0){
723         fprintf(stderr,"**  The number of MPI processes should not be less than \n");
724         fprintf(stderr,"**  the number of nodes in the graph\n");
725         fprintf(stderr,"**  Number of MPI processes = %d\n",comm_size);
726         fprintf(stderr,"**  Number nodes in the graph = %d\n",dg->numNodes);
727       }
728       MPI_Finalize();
729       exit(0);
730     }
731     for(i=0;i<dg->numNodes;i++){ 
732       dg->node[i]->address=i;
733     }
734     if( my_rank == 0 ){
735       printf( "\n\n NAS Parallel Benchmarks 3.3 -- DT Benchmark\n\n" );
736       graphShow(dg,0);
737       timer_clear(0);
738       timer_start(0);
739     }
740     verified=ProcessNodes(dg,my_rank);
741     TRACE_smpi_set_category ("end");
742
743     featnum=NUM_SAMPLES*fielddim;
744     bytes_sent=featnum*dg->numArcs;
745     bytes_sent/=1048576;
746     if(my_rank==0){
747       timer_stop(0);
748       tot_time=timer_read(0);
749       c_print_results( dg->name,
750                  CLASS,
751                  featnum,
752                  0,
753                  0,
754                  dg->numNodes,
755                  0,
756                  comm_size,
757                  tot_time,
758                  bytes_sent/tot_time,
759                  "bytes transmitted", 
760                  verified,
761                  NPBVERSION,
762                  COMPILETIME,
763                  MPICC,
764                  CLINK,
765                  CMPI_LIB,
766                  CMPI_INC,
767                  CFLAGS,
768                  CLINKFLAGS );
769     }          
770     MPI_Finalize();
771   return 1;
772 }