Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'master' of scm.gforge.inria.fr:/gitroot/simgrid/simgrid
[simgrid.git] / examples / smpi / NAS / dt.c
1 /*************************************************************************
2  *                                                                       * 
3  *        N  A  S     P A R A L L E L     B E N C H M A R K S  3.3       *
4  *                                                                       * 
5  *                                  D T           * 
6  *                                                                       * 
7  ************************************************************************* 
8  *                                                                       * 
9  *   This benchmark is part of the NAS Parallel Benchmark 3.3 suite.     *
10  *                                                                       * 
11  *   Permission to use, copy, distribute and modify this software        * 
12  *   for any purpose with or without fee is hereby granted.  We          * 
13  *   request, however, that all derived work reference the NAS           * 
14  *   Parallel Benchmarks 3.3. This software is provided "as is"          *
15  *   without express or implied warranty.                                * 
16  *                                                                       * 
17  *   Information on NPB 3.3, including the technical report, the         *
18  *   original specifications, source code, results and information       * 
19  *   on how to submit new results, is available at:                      * 
20  *                                                                       * 
21  *          http:  www.nas.nasa.gov/Software/NPB                         * 
22  *                                                                       * 
23  *   Send comments or suggestions to  npb@nas.nasa.gov                   * 
24  *   Send bug reports to              npb-bugs@nas.nasa.gov              * 
25  *                                                                       * 
26  *         NAS Parallel Benchmarks Group                                 * 
27  *         NASA Ames Research Center                                     * 
28  *         Mail Stop: T27A-1                                             * 
29  *         Moffett Field, CA   94035-1000                                * 
30  *                                                                       * 
31  *         E-mail:  npb@nas.nasa.gov                                     * 
32  *         Fax:     (650) 604-3957                                       * 
33  *                                                                       * 
34  ************************************************************************* 
35  *                                                                       * 
36  *   Author: M. Frumkin               *             * 
37  *                                                                       * 
38  *************************************************************************/
39
40 #include <stdlib.h>
41 #include <stdio.h>
42 #include <string.h>
43
44 #include "smpi/mpi.h"
45 #include "nas_common.h"
46 #include "simgrid/instr.h" //TRACE_
47
48
49 int timer_on=0,timers_tot=64;
50 double start[64], elapsed[64];
51
52 char class;
53 int nprocs;
54 int num_samples;
55 int deviation;
56 int num_sources;
57
58 static int verify(char *bmname,double rnm2){
59     double verify_value=0.0;
60     double epsilon=1.0E-8;
61     int verified=-1;
62     if (class != 'U') {
63        if(class=='S') {
64          if(strstr(bmname,"BH")){
65            verify_value=30892725.0;
66          }else if(strstr(bmname,"WH")){
67            verify_value=67349758.0;
68          }else if(strstr(bmname,"SH")){
69            verify_value=58875767.0;
70          }else{
71            fprintf(stderr,"No such benchmark as %s.\n",bmname);
72          }
73          verified = 0;
74        }else if(class=='W') {
75          if(strstr(bmname,"BH")){
76        verify_value = 4102461.0;
77          }else if(strstr(bmname,"WH")){
78                  verify_value = 204280762.0;
79          }else if(strstr(bmname,"SH")){
80        verify_value = 186944764.0;
81          }else{
82            fprintf(stderr,"No such benchmark as %s.\n",bmname);
83          }
84          verified = 0;
85        }else if(class=='A') {
86          if(strstr(bmname,"BH")){
87        verify_value = 17809491.0;
88          }else if(strstr(bmname,"WH")){
89        verify_value = 1289925229.0;
90          }else if(strstr(bmname,"SH")){
91        verify_value = 610856482.0;
92          }else{
93            fprintf(stderr,"No such benchmark as %s.\n",bmname);
94          }
95      verified = 0;
96        }else if(class=='B') {
97          if(strstr(bmname,"BH")){
98        verify_value = 4317114.0;
99          }else if(strstr(bmname,"WH")){
100        verify_value = 7877279917.0;
101          }else if(strstr(bmname,"SH")){
102        verify_value = 1836863082.0;
103          }else{
104            fprintf(stderr,"No such benchmark as %s.\n",bmname);
105        verified = 0;
106          }
107        }else if(class=='C') {
108          if(strstr(bmname,"BH")){
109        verify_value = 0.0;
110          }else if(strstr(bmname,"WH")){
111        verify_value = 0.0;
112          }else if(strstr(bmname,"SH")){
113        verify_value = 0.0;
114          }else{
115            fprintf(stderr,"No such benchmark as %s.\n",bmname);
116        verified = -1;
117          }
118        }else if(class=='D') {
119          if(strstr(bmname,"BH")){
120        verify_value = 0.0;
121          }else if(strstr(bmname,"WH")){
122        verify_value = 0.0;
123          }else if(strstr(bmname,"SH")){
124        verify_value = 0.0;
125          }else{
126            fprintf(stderr,"No such benchmark as %s.\n",bmname);
127          }
128          verified = -1;
129        }else{
130          fprintf(stderr,"No such class as %c.\n",class);
131        }
132        fprintf(stderr," %s L2 Norm = %f\n",bmname,rnm2);
133        if(verified==-1){
134      fprintf(stderr," No verification was performed.\n");
135        }else if( rnm2 - verify_value < epsilon &&
136                  rnm2 - verify_value > -epsilon) {  /* abs here does not work on ALTIX */
137       verified = 1;
138       fprintf(stderr," Deviation = %f\n",(rnm2 - verify_value));
139        }else{
140      verified = 0;
141      fprintf(stderr," The correct verification value = %f\n",verify_value);
142      fprintf(stderr," Got value = %f\n",rnm2);
143        }
144     }else{
145        verified = -1;
146     }
147     return  verified;  
148   }
149
150 static int ipowMod(int a,long long int n,int md){
151   int seed=1,q=a,r=1;
152   if(n<0){
153     fprintf(stderr,"ipowMod: exponent must be nonnegative exp=%lld\n",n);
154     n=-n; /* temp fix */
155 /*    return 1; */
156   }
157   if(md<=0){
158     fprintf(stderr,"ipowMod: module must be positive mod=%d",md);
159     return 1;
160   }
161   if(n==0) return 1;
162   while(n>1){
163     int n2 = n/2;
164     if (n2*2==n){
165        seed = (q*q)%md;
166        q=seed;
167        n = n2;
168     }else{
169        seed = (r*q)%md;
170        r=seed;
171        n = n-1;
172     }
173   }
174   seed = (r*q)%md;
175   return seed;
176 }
177
178 #include "DGraph.h"
179 static DGraph *buildSH(const char cls){
180 /*
181   Nodes of the graph must be topologically sorted
182   to avoid MPI deadlock.
183 */
184   DGraph *dg;
185   int numSources=num_sources; /* must be power of 2 */
186   int numOfLayers=0,tmpS=numSources>>1;
187   int firstLayerNode=0;
188   DGArc *ar=NULL;
189   DGNode *nd=NULL;
190   int mask=0x0,ndid=0,ndoff=0;
191   int i=0,j=0;
192   char nm[BLOCK_SIZE];
193   
194   sprintf(nm,"DT_SH.%c",cls);
195   dg=newDGraph(nm);
196
197   while(tmpS>1){
198     numOfLayers++;
199     tmpS>>=1;
200   }
201   for(i=0;i<numSources;i++){
202     sprintf(nm,"Source.%d",i);
203     nd=newNode(nm);
204     AttachNode(dg,nd);
205   }
206   for(j=0;j<numOfLayers;j++){
207     mask=0x00000001<<j;
208     for(i=0;i<numSources;i++){
209       sprintf(nm,"Comparator.%d",(i+j*firstLayerNode));
210       nd=newNode(nm);
211       AttachNode(dg,nd);
212       ndoff=i&(~mask);
213       ndid=firstLayerNode+ndoff;
214       ar=newArc(dg->node[ndid],nd);     
215       AttachArc(dg,ar);
216       ndoff+=mask;
217       ndid=firstLayerNode+ndoff;
218       ar=newArc(dg->node[ndid],nd);     
219       AttachArc(dg,ar);
220     }
221     firstLayerNode+=numSources;
222   }
223   mask=0x00000001<<numOfLayers;
224   for(i=0;i<numSources;i++){
225     sprintf(nm,"Sink.%d",i);
226     nd=newNode(nm);
227     AttachNode(dg,nd);
228     ndoff=i&(~mask);
229     ndid=firstLayerNode+ndoff;
230     ar=newArc(dg->node[ndid],nd);     
231     AttachArc(dg,ar);
232     ndoff+=mask;
233     ndid=firstLayerNode+ndoff;
234     ar=newArc(dg->node[ndid],nd);     
235     AttachArc(dg,ar);
236   }
237 return dg;
238 }
239 static DGraph *buildWH(const char cls){
240 /*  Nodes of the graph must be topologically sorted to avoid MPI deadlock. */
241   int i=0,j=0;
242   int numSources=num_sources,maxInDeg=4;
243   int numLayerNodes=numSources,firstLayerNode=0;
244   int totComparators=0;
245   int numPrevLayerNodes=numLayerNodes;
246   int id=0,sid=0;
247   DGraph *dg;
248   DGNode *nd=NULL,*source=NULL,*tmp=NULL,*snd=NULL;
249   DGArc *ar=NULL;
250   char nm[BLOCK_SIZE];
251
252   sprintf(nm,"DT_WH.%c",cls);
253   dg=newDGraph(nm);
254
255   for(i=0;i<numSources;i++){
256     sprintf(nm,"Sink.%d",i);
257     nd=newNode(nm);
258     AttachNode(dg,nd);
259   }
260   totComparators=0;
261   numPrevLayerNodes=numLayerNodes;
262   while(numLayerNodes>maxInDeg){
263     numLayerNodes=numLayerNodes/maxInDeg;
264     if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
265     for(i=0;i<numLayerNodes;i++){
266       sprintf(nm,"Comparator.%d",totComparators);
267       totComparators++;
268       nd=newNode(nm);
269       id=AttachNode(dg,nd);
270       for(j=0;j<maxInDeg;j++){
271         sid=i*maxInDeg+j;
272   if(sid>=numPrevLayerNodes) break;
273         snd=dg->node[firstLayerNode+sid];
274         ar=newArc(dg->node[id],snd);
275         AttachArc(dg,ar);
276       }
277     }
278     firstLayerNode+=numPrevLayerNodes;
279     numPrevLayerNodes=numLayerNodes;
280   }
281   source=newNode((char*)"Source");
282   AttachNode(dg,source);   
283   for(i=0;i<numPrevLayerNodes;i++){
284     nd=dg->node[firstLayerNode+i];
285     ar=newArc(source,nd);
286     AttachArc(dg,ar);
287   }
288
289   for(i=0;i<dg->numNodes/2;i++){  /* Topological sorting */
290     tmp=dg->node[i];
291     dg->node[i]=dg->node[dg->numNodes-1-i];
292     dg->node[i]->id=i;
293     dg->node[dg->numNodes-1-i]=tmp;
294     dg->node[dg->numNodes-1-i]->id=dg->numNodes-1-i;
295   }
296 return dg;
297 }
298 static DGraph *buildBH(const char cls){
299 /* Nodes of the graph must be topologically sorted to avoid MPI deadlock.*/
300   int i=0,j=0;
301   int numSources=num_sources,maxInDeg=4;
302   int numLayerNodes=numSources,firstLayerNode=0;
303   DGraph *dg;
304   DGNode *nd=NULL, *snd=NULL, *sink=NULL;
305   DGArc *ar=NULL;
306   int totComparators=0;
307   int numPrevLayerNodes=numLayerNodes;
308   int id=0, sid=0;
309   char nm[BLOCK_SIZE];
310
311   sprintf(nm,"DT_BH.%c",cls);
312   dg=newDGraph(nm);
313
314   for(i=0;i<numSources;i++){
315     sprintf(nm,"Source.%d",i);
316     nd=newNode(nm);
317     AttachNode(dg,nd);
318   }
319   while(numLayerNodes>maxInDeg){
320     numLayerNodes=numLayerNodes/maxInDeg;
321     if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
322     for(i=0;i<numLayerNodes;i++){
323       sprintf(nm,"Comparator.%d",totComparators);
324       totComparators++;
325       nd=newNode(nm);
326       id=AttachNode(dg,nd);
327       for(j=0;j<maxInDeg;j++){
328         sid=i*maxInDeg+j;
329   if(sid>=numPrevLayerNodes) break;
330         snd=dg->node[firstLayerNode+sid];
331         ar=newArc(snd,dg->node[id]);
332         AttachArc(dg,ar);
333       }
334     }
335     firstLayerNode+=numPrevLayerNodes;
336     numPrevLayerNodes=numLayerNodes;
337   }
338   sink=newNode((char*)"Sink");
339   AttachNode(dg,sink);   
340   for(i=0;i<numPrevLayerNodes;i++){
341     nd=dg->node[firstLayerNode+i];
342     ar=newArc(nd,sink);
343     AttachArc(dg,ar);
344   }
345 return dg;
346 }
347
348 typedef struct{
349   int len;
350   double* val;
351 } Arr;
352
353 static Arr *newArr(int len){
354   Arr *arr=(Arr *)malloc(sizeof(Arr)); //Arr *arr=(Arr *)SMPI_SHARED_MALLOC(sizeof(Arr));
355   arr->len=len;
356   arr->val=(double *)malloc(len*sizeof(double)); //arr->val=(double *)SMPI_SHARED_MALLOC(len*sizeof(double));
357   return arr;
358 }
359
360 static void arrShow(Arr* a){
361   if(!a) fprintf(stderr,"-- NULL array\n");
362   else{
363     fprintf(stderr,"-- length=%d\n",a->len);
364   }
365 }
366
367 static double CheckVal(Arr *feat){
368   double csum=0.0;
369   int i=0;
370   for(i=0;i<feat->len;i++){
371     csum+=feat->val[i]*feat->val[i]/feat->len; /* The truncation does not work since result will be 0 for large len  */
372   }
373   return csum;
374 }
375
376 static int GetFNumDPar(int* mean, int* stdev){
377   *mean=num_samples;
378   *stdev=deviation;
379   return 0;
380 }
381
382 static int GetFeatureNum(char *mbname,int id){
383   double tran=314159265.0;
384   double A=2*id+1;
385   double denom=randlc(&tran,&A);
386   char cval='S';
387   int mean=num_samples,stdev=128;
388   int rtfs=0,len=0;
389   GetFNumDPar(&mean,&stdev);
390   rtfs=ipowMod((int)(1/denom)*(int)cval,(long long int) (2*id+1),2*stdev);
391   if(rtfs<0) rtfs=-rtfs;
392   len=mean-stdev+rtfs;
393   return len;
394 }
395
396 static Arr* RandomFeatures(char *bmname,int fdim,int id){
397   int len=GetFeatureNum(bmname,id)*fdim;
398   Arr* feat=newArr(len);
399   int nxg=2,nyg=2,nzg=2,nfg=5;
400   int nx=421,ny=419,nz=1427,nf=3527;
401   long long int expon=(len*(id+1))%3141592;
402   int seedx=ipowMod(nxg,expon,nx),
403       seedy=ipowMod(nyg,expon,ny),
404       seedz=ipowMod(nzg,expon,nz),
405       seedf=ipowMod(nfg,expon,nf);
406   int i=0;
407   if(timer_on){
408     timer_clear(id+1);
409     timer_start(id+1);
410   }
411   for(i=0;i<len;i+=fdim){
412     seedx=(seedx*nxg)%nx;
413     seedy=(seedy*nyg)%ny;
414     seedz=(seedz*nzg)%nz;
415     seedf=(seedf*nfg)%nf;
416     feat->val[i]=seedx;
417     feat->val[i+1]=seedy;
418     feat->val[i+2]=seedz;
419     feat->val[i+3]=seedf;
420   }
421   if(timer_on){
422     timer_stop(id+1);
423     fprintf(stderr,"** RandomFeatures time in node %d = %f\n",id,timer_read(id+1));
424   }
425   return feat;
426 }
427
428 static void Resample(Arr *a,int blen){
429     long long int i=0,j=0,jlo=0,jhi=0;
430     double avval=0.0;
431     double *nval=(double *)malloc(blen*sizeof(double));
432     //double *nval=(double *)SMPI_SHARED_MALLOC(blen*sizeof(double));
433     for(i=0;i<blen;i++) nval[i]=0.0;
434     for(i=1;i<a->len-1;i++){
435       jlo=(int)(0.5*(2*i-1)*(blen/a->len)); 
436       jhi=(int)(0.5*(2*i+1)*(blen/a->len));
437
438       avval=a->val[i]/(jhi-jlo+1);
439       for(j=jlo;j<=jhi;j++){
440         nval[j]+=avval;
441       }
442     }
443     nval[0]=a->val[0];
444     nval[blen-1]=a->val[a->len-1];
445     free(a->val); //SMPI_SHARED_FREE(a->val);
446     a->val=nval;
447     a->len=blen;
448 }
449
450 #define fielddim 4
451 static Arr* WindowFilter(Arr *a, Arr* b,int w){
452   int i=0,j=0,k=0;
453   double rms0=0.0,rms1=0.0,rmsm1=0.0;
454   double weight=((double) (w+1))/(w+2);
455  
456   w+=1;
457   if(timer_on){
458     timer_clear(w);
459     timer_start(w);
460   }
461   if(a->len<b->len) Resample(a,b->len);
462   if(a->len>b->len) Resample(b,a->len);
463   for(i=fielddim;i<a->len-fielddim;i+=fielddim){
464     rms0=(a->val[i]-b->val[i])*(a->val[i]-b->val[i])
465   +(a->val[i+1]-b->val[i+1])*(a->val[i+1]-b->val[i+1])
466   +(a->val[i+2]-b->val[i+2])*(a->val[i+2]-b->val[i+2])
467   +(a->val[i+3]-b->val[i+3])*(a->val[i+3]-b->val[i+3]);
468     j=i+fielddim;
469     rms1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
470       +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
471       +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
472       +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
473     j=i-fielddim;
474     rmsm1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
475    +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
476    +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
477    +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
478     k=0;
479     if(rms1<rms0){
480       k=1;
481       rms0=rms1;
482     }
483     if(rmsm1<rms0) k=-1;
484     if(k==0){
485       j=i+fielddim;
486       a->val[i]=weight*b->val[i];
487       a->val[i+1]=weight*b->val[i+1];
488       a->val[i+2]=weight*b->val[i+2];
489       a->val[i+3]=weight*b->val[i+3];  
490     }else if(k==1){
491       j=i+fielddim;
492       a->val[i]=weight*b->val[j];
493       a->val[i+1]=weight*b->val[j+1];
494       a->val[i+2]=weight*b->val[j+2];
495       a->val[i+3]=weight*b->val[j+3];  
496     }else { /*if(k==-1)*/
497       j=i-fielddim;
498       a->val[i]=weight*b->val[j];
499       a->val[i+1]=weight*b->val[j+1];
500       a->val[i+2]=weight*b->val[j+2];
501       a->val[i+3]=weight*b->val[j+3];  
502     }     
503   }
504   if(timer_on){
505     timer_stop(w);
506     fprintf(stderr,"** WindowFilter time in node %d = %f\n",(w-1),timer_read(w));
507   }
508   return a;
509 }
510
511 static int SendResults(DGraph *dg,DGNode *nd,Arr *feat){
512   int i=0,tag=0;
513   DGArc *ar=NULL;
514   DGNode *head=NULL;
515   if(!feat) return 0;
516   TRACE_smpi_set_category ("SendResults");
517   for(i=0;i<nd->outDegree;i++){
518     ar=nd->outArc[i];
519     if(ar->tail!=nd) continue;
520     head=ar->head;
521     tag=ar->id;
522     if(head->address!=nd->address){
523       MPI_Send(&feat->len,1,MPI_INT,head->address,tag,MPI_COMM_WORLD);
524       MPI_Send(feat->val,feat->len,MPI_DOUBLE,head->address,tag,MPI_COMM_WORLD);
525     }
526   }
527   TRACE_smpi_set_category (NULL);
528   return 1;
529 }
530 static Arr* CombineStreams(DGraph *dg,DGNode *nd){
531   Arr *resfeat=newArr(num_samples*fielddim);
532   int i=0,len=0,tag=0;
533   DGArc *ar=NULL;
534   DGNode *tail=NULL;
535   MPI_Status status;
536   Arr *feat=NULL,*featp=NULL;
537
538   if(nd->inDegree==0) return NULL;
539   for(i=0;i<nd->inDegree;i++){
540     ar=nd->inArc[i];
541     if(ar->head!=nd) continue;
542     tail=ar->tail;
543     if(tail->address!=nd->address){
544       len=0;
545       tag=ar->id;
546       MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
547       feat=newArr(len);
548       MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
549       resfeat=WindowFilter(resfeat,feat,nd->id);
550       free(feat);//SMPI_SHARED_FREE(feat);
551     }else{
552       featp=(Arr *)tail->feat;
553       feat=newArr(featp->len);
554       memcpy(feat->val,featp->val,featp->len*sizeof(double));
555       resfeat=WindowFilter(resfeat,feat,nd->id);  
556       free(feat);//SMPI_SHARED_FREE(feat);
557     }
558   }
559   for(i=0;i<resfeat->len;i++) resfeat->val[i]=((int)resfeat->val[i])/nd->inDegree;
560   nd->feat=resfeat;
561   return nd->feat;
562 }
563
564 static double Reduce(Arr *a,int w){
565   double retv=0.0;
566   if(timer_on){
567     timer_clear(w);
568     timer_start(w);
569   }
570   retv=(int)(w*CheckVal(a));/* The casting needed for node and array dependent verifcation */
571   if(timer_on){
572     timer_stop(w);
573     fprintf(stderr,"** Reduce time in node %d = %f\n",(w-1),timer_read(w));
574   }
575   return retv;
576 }
577
578 static double ReduceStreams(DGraph *dg,DGNode *nd){
579   double csum=0.0;
580   int i=0,len=0,tag=0;
581   DGArc *ar=NULL;
582   DGNode *tail=NULL;
583   Arr *feat=NULL;
584   double retv=0.0;
585
586   TRACE_smpi_set_category ("ReduceStreams");
587
588   for(i=0;i<nd->inDegree;i++){
589     ar=nd->inArc[i];
590     if(ar->head!=nd) continue;
591     tail=ar->tail;
592     if(tail->address!=nd->address){
593       MPI_Status status;
594       len=0;
595       tag=ar->id;
596       MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
597       feat=newArr(len);
598       MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
599       csum+=Reduce(feat,(nd->id+1));  
600       free(feat);//SMPI_SHARED_FREE(feat);
601     }else{
602       csum+=Reduce(tail->feat,(nd->id+1));  
603     }
604   }
605   if(nd->inDegree>0)csum=(((long long int)csum)/nd->inDegree);
606   retv=(nd->id+1)*csum;
607   return retv;
608 }
609
610 static int ProcessNodes(DGraph *dg,int me){
611   double chksum=0.0;
612   Arr *feat=NULL;
613   int i=0,verified=0,tag;
614   DGNode *nd=NULL;
615   double rchksum=0.0;
616   MPI_Status status;
617
618   TRACE_smpi_set_category ("ProcessNodes");
619
620   for(i=0;i<dg->numNodes;i++){
621     nd=dg->node[i];
622     if(nd->address!=me) continue;
623     if(strstr(nd->name,"Source")){
624       nd->feat=RandomFeatures(dg->name,fielddim,nd->id); 
625       SendResults(dg,nd,nd->feat);
626     }else if(strstr(nd->name,"Sink")){
627       chksum=ReduceStreams(dg,nd);
628       tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
629       MPI_Send(&chksum,1,MPI_DOUBLE,0,tag,MPI_COMM_WORLD);
630     }else{
631       feat=CombineStreams(dg,nd);
632       SendResults(dg,nd,feat);
633     }
634   }
635
636   TRACE_smpi_set_category ("ProcessNodes");
637
638   if(me==0){ /* Report node */
639     rchksum=0.0;
640     chksum=0.0;
641     for(i=0;i<dg->numNodes;i++){
642       nd=dg->node[i];
643       if(!strstr(nd->name,"Sink")) continue;
644        tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
645       MPI_Recv(&rchksum,1,MPI_DOUBLE,nd->address,tag,MPI_COMM_WORLD,&status);
646       chksum+=rchksum;
647     }
648     verified=verify(dg->name,chksum);
649   }
650 return verified;
651 }
652
653 int main(int argc,char **argv ){
654   int my_rank,comm_size;
655   int i;
656   DGraph *dg=NULL;
657   int verified=0, featnum=0;
658   double bytes_sent=2.0,tot_time=0.0;
659
660   MPI_Init( &argc, &argv );
661   MPI_Comm_rank( MPI_COMM_WORLD, &my_rank );
662   MPI_Comm_size( MPI_COMM_WORLD, &comm_size );
663
664   TRACE_smpi_set_category ("begin");
665   get_info(argc, argv, &nprocs, &class);
666   check_info(DT, nprocs, class);
667
668   if      (class == 'S') { num_samples=1728; deviation=128; num_sources=4; }
669   else if (class == 'W') { num_samples=1728*8; deviation=128*2; num_sources=4*2; }
670   else if (class == 'A') { num_samples=1728*64; deviation=128*4; num_sources=4*4; }
671   else if (class == 'B') { num_samples=1728*512; deviation=128*8; num_sources=4*8; }
672   else if (class == 'C') { num_samples=1728*4096; deviation=128*16; num_sources=4*16; }
673   else if (class == 'D') { num_samples=1728*4096*8; deviation=128*32; num_sources=4*32; }
674   else {
675     printf("setparams: Internal error: invalid class type %c\n", class);
676     exit(1);
677   }
678
679
680      if(argc!=2|| (  strncmp(argv[1],"BH",2)!=0 && strncmp(argv[1],"WH",2)!=0 &&strncmp(argv[1],"SH",2)!=0)){
681       if(my_rank==0){
682         fprintf(stderr,"** Usage: mpirun -np N ../bin/dt.S GraphName\n");
683         fprintf(stderr,"** Where \n   - N is integer number of MPI processes\n");
684         fprintf(stderr,"   - S is the class S, W, or A \n");
685         fprintf(stderr,"   - GraphName is the communication graph name BH, WH, or SH.\n");
686         fprintf(stderr,"   - the number of MPI processes N should not be be less than \n");
687         fprintf(stderr,"     the number of nodes in the graph\n");
688       }
689       MPI_Finalize();
690       exit(0);
691     } 
692    if(strncmp(argv[1],"BH",2)==0){
693       dg=buildBH(class);
694     }else if(strncmp(argv[1],"WH",2)==0){
695       dg=buildWH(class);
696     }else if(strncmp(argv[1],"SH",2)==0){
697       dg=buildSH(class);
698     }
699
700     if(timer_on&&dg->numNodes+1>timers_tot){
701       timer_on=0;
702       if(my_rank==0)
703         fprintf(stderr,"Not enough timers. Node timeing is off. \n");
704     }
705     if(dg->numNodes>comm_size){
706       if(my_rank==0){
707         fprintf(stderr,"**  The number of MPI processes should not be less than \n");
708         fprintf(stderr,"**  the number of nodes in the graph\n");
709         fprintf(stderr,"**  Number of MPI processes = %d\n",comm_size);
710         fprintf(stderr,"**  Number nodes in the graph = %d\n",dg->numNodes);
711       }
712       MPI_Finalize();
713       exit(0);
714     }
715     for(i=0;i<dg->numNodes;i++){ 
716       dg->node[i]->address=i;
717     }
718     if( my_rank == 0 ){
719       printf( "\n\n NAS Parallel Benchmarks 3.3 -- DT Benchmark\n\n" );
720       graphShow(dg,0);
721       timer_clear(0);
722       timer_start(0);
723     }
724     verified=ProcessNodes(dg,my_rank);
725     TRACE_smpi_set_category ("end");
726
727     featnum=num_samples*fielddim;
728     bytes_sent=featnum*dg->numArcs;
729     bytes_sent/=1048576;
730     if(my_rank==0){
731       timer_stop(0);
732       tot_time=timer_read(0);
733       c_print_results( dg->name, class, featnum, 0, 0, dg->numNodes, 0, comm_size, tot_time, bytes_sent/tot_time,
734                  "bytes transmitted", verified);
735     }          
736     MPI_Finalize();
737   return 1;
738 }