Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
don't protect only some paths against NULLity
[simgrid.git] / examples / smpi / NAS / dt.c
index ca3a18a..55b29dd 100644 (file)
@@ -1,51 +1,51 @@
 /*************************************************************************
  *                                                                       * 
- *        N  A  S     P A R A L L E L     B E N C H M A R K S  3.3       *
- *                                                                       * 
- *                                  D T           * 
- *                                                                       * 
+ *    N  A  S   P A R A L L E L   B E N C H M A R K S  3.3     *
+ *                                     *
+ *                  D T       *
+ *                                     *
  ************************************************************************* 
- *                                                                       * 
- *   This benchmark is part of the NAS Parallel Benchmark 3.3 suite.     *
- *                                                                       * 
- *   Permission to use, copy, distribute and modify this software        * 
- *   for any purpose with or without fee is hereby granted.  We          * 
- *   request, however, that all derived work reference the NAS           * 
- *   Parallel Benchmarks 3.3. This software is provided "as is"          *
- *   without express or implied warranty.                                * 
- *                                                                       * 
- *   Information on NPB 3.3, including the technical report, the         *
- *   original specifications, source code, results and information       * 
- *   on how to submit new results, is available at:                      * 
- *                                                                       * 
- *          http:  www.nas.nasa.gov/Software/NPB                         * 
- *                                                                       * 
- *   Send comments or suggestions to  npb@nas.nasa.gov                   * 
- *   Send bug reports to              npb-bugs@nas.nasa.gov              * 
- *                                                                       * 
- *         NAS Parallel Benchmarks Group                                 * 
- *         NASA Ames Research Center                                     * 
- *         Mail Stop: T27A-1                                             * 
- *         Moffett Field, CA   94035-1000                                * 
- *                                                                       * 
- *         E-mail:  npb@nas.nasa.gov                                     * 
- *         Fax:     (650) 604-3957                                       * 
- *                                                                       * 
+ *                                     *
+ *   This benchmark is part of the NAS Parallel Benchmark 3.3 suite.   *
+ *                                     *
+ *   Permission to use, copy, distribute and modify this software    *
+ *   for any purpose with or without fee is hereby granted.  We      *
+ *   request, however, that all derived work reference the NAS       *
+ *   Parallel Benchmarks 3.3. This software is provided "as is"      *
+ *   without express or implied warranty.                *
+ *                                     *
+ *   Information on NPB 3.3, including the technical report, the     *
+ *   original specifications, source code, results and information     *
+ *   on how to submit new results, is available at:            *
+ *                                     *
+ *      http:  www.nas.nasa.gov/Software/NPB             *
+ *                                     *
+ *   Send comments or suggestions to  npb@nas.nasa.gov           *
+ *   Send bug reports to        npb-bugs@nas.nasa.gov        *
+ *                                     *
+ *     NAS Parallel Benchmarks Group                 *
+ *     NASA Ames Research Center                   *
+ *     Mail Stop: T27A-1                       *
+ *     Moffett Field, CA   94035-1000                *
+ *                                     *
+ *     E-mail:  npb@nas.nasa.gov                   *
+ *     Fax:   (650) 604-3957                     *
+ *                                     *
  ************************************************************************* 
- *                                                                       * 
- *   Author: M. Frumkin               *             * 
- *                                                                       * 
+ *                                     *
+ *   Author: M. Frumkin         *       *
+ *                                     *
  *************************************************************************/
 
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 
+#include "DGraph.h"
 #include "smpi/mpi.h"
 #include "nas_common.h"
 #include "simgrid/instr.h" //TRACE_
 
-
 int timer_on=0,timers_tot=64;
 double start[64], elapsed[64];
 
@@ -56,186 +56,168 @@ int deviation;
 int num_sources;
 
 static int verify(char *bmname,double rnm2){
-    double verify_value=0.0;
-    double epsilon=1.0E-8;
-    int verified=-1;
-    if (class != 'U') {
-       if(class=='S') {
-         if(strstr(bmname,"BH")){
-           verify_value=30892725.0;
-         }else if(strstr(bmname,"WH")){
-           verify_value=67349758.0;
-         }else if(strstr(bmname,"SH")){
-           verify_value=58875767.0;
-         }else{
-           fprintf(stderr,"No such benchmark as %s.\n",bmname);
-         }
-         verified = 0;
-       }else if(class=='W') {
-         if(strstr(bmname,"BH")){
-       verify_value = 4102461.0;
-         }else if(strstr(bmname,"WH")){
-                verify_value = 204280762.0;
-         }else if(strstr(bmname,"SH")){
-       verify_value = 186944764.0;
-         }else{
-           fprintf(stderr,"No such benchmark as %s.\n",bmname);
-         }
-         verified = 0;
-       }else if(class=='A') {
-         if(strstr(bmname,"BH")){
-       verify_value = 17809491.0;
-         }else if(strstr(bmname,"WH")){
-       verify_value = 1289925229.0;
-         }else if(strstr(bmname,"SH")){
-       verify_value = 610856482.0;
-         }else{
-           fprintf(stderr,"No such benchmark as %s.\n",bmname);
-         }
+  double verify_value=0.0;
+  double epsilon=1.0E-8;
+  int verified=-1;
+  if (class != 'U') {
+    if(class=='S') {
+     if(strstr(bmname,"BH")){
+       verify_value=30892725.0;
+     }else if(strstr(bmname,"WH")){
+       verify_value=67349758.0;
+     }else if(strstr(bmname,"SH")){
+       verify_value=58875767.0;
+     }else{
+       fprintf(stderr,"No such benchmark as %s.\n",bmname);
+     }
      verified = 0;
-       }else if(class=='B') {
-         if(strstr(bmname,"BH")){
-       verify_value = 4317114.0;
-         }else if(strstr(bmname,"WH")){
-       verify_value = 7877279917.0;
-         }else if(strstr(bmname,"SH")){
-       verify_value = 1836863082.0;
-         }else{
-           fprintf(stderr,"No such benchmark as %s.\n",bmname);
-       verified = 0;
-         }
-       }else if(class=='C') {
-         if(strstr(bmname,"BH")){
-       verify_value = 0.0;
-         }else if(strstr(bmname,"WH")){
-       verify_value = 0.0;
-         }else if(strstr(bmname,"SH")){
-       verify_value = 0.0;
-         }else{
-           fprintf(stderr,"No such benchmark as %s.\n",bmname);
-       verified = -1;
-         }
-       }else if(class=='D') {
-         if(strstr(bmname,"BH")){
-       verify_value = 0.0;
-         }else if(strstr(bmname,"WH")){
-       verify_value = 0.0;
-         }else if(strstr(bmname,"SH")){
-       verify_value = 0.0;
-         }else{
-           fprintf(stderr,"No such benchmark as %s.\n",bmname);
-         }
-         verified = -1;
-       }else{
-         fprintf(stderr,"No such class as %c.\n",class);
-       }
-       fprintf(stderr," %s L2 Norm = %f\n",bmname,rnm2);
-       if(verified==-1){
-     fprintf(stderr," No verification was performed.\n");
-       }else if( rnm2 - verify_value < epsilon &&
-                 rnm2 - verify_value > -epsilon) {  /* abs here does not work on ALTIX */
+    }else if(class=='W') {
+      if(strstr(bmname,"BH")){
+        verify_value = 4102461.0;
+      }else if(strstr(bmname,"WH")){
+        verify_value = 204280762.0;
+      }else if(strstr(bmname,"SH")){
+        verify_value = 186944764.0;
+      }else{
+        fprintf(stderr,"No such benchmark as %s.\n",bmname);
+      }
+      verified = 0;
+    }else if(class=='A') {
+      if(strstr(bmname,"BH")){
+        verify_value = 17809491.0;
+      }else if(strstr(bmname,"WH")){
+        verify_value = 1289925229.0;
+      }else if(strstr(bmname,"SH")){
+        verify_value = 610856482.0;
+      }else{
+        fprintf(stderr,"No such benchmark as %s.\n",bmname);
+      }
+      verified = 0;
+    }else if(class=='B') {
+      if(strstr(bmname,"BH")){
+        verify_value = 4317114.0;
+      }else if(strstr(bmname,"WH")){
+        verify_value = 7877279917.0;
+      }else if(strstr(bmname,"SH")){
+        verify_value = 1836863082.0;
+      }else{
+        fprintf(stderr,"No such benchmark as %s.\n",bmname);
+        verified = 0;
+      }
+    }else if(class=='C' || class == 'D') {
+        verify_value = 0.0;
+    }else{
+      fprintf(stderr,"No such class as %c.\n",class);
+    }
+    fprintf(stderr," %s L2 Norm = %f\n",bmname,rnm2);
+    if(verified==-1){
+      fprintf(stderr," No verification was performed.\n");
+    }else if( rnm2 - verify_value < epsilon && rnm2 - verify_value > -epsilon) {  /* abs here does not work on ALTIX */
       verified = 1;
       fprintf(stderr," Deviation = %f\n",(rnm2 - verify_value));
-       }else{
-     verified = 0;
-     fprintf(stderr," The correct verification value = %f\n",verify_value);
-     fprintf(stderr," Got value = %f\n",rnm2);
-       }
     }else{
-       verified = -1;
+      verified = 0;
+      fprintf(stderr," The correct verification value = %f\n",verify_value);
+      fprintf(stderr," Got value = %f\n",rnm2);
     }
-    return  verified;  
+  }else{
+    verified = -1;
   }
+  return  verified;
+}
 
 static int ipowMod(int a,long long int n,int md){
-  int seed=1,q=a,r=1;
+  int seed=1;
+  int q=a;
+  int r=1;
+  int exp = n;
   if(n<0){
     fprintf(stderr,"ipowMod: exponent must be nonnegative exp=%lld\n",n);
-    n=-n; /* temp fix */
-/*    return 1; */
+    exp=-n; /* temp fix */
   }
   if(md<=0){
     fprintf(stderr,"ipowMod: module must be positive mod=%d",md);
     return 1;
   }
-  if(n==0) return 1;
-  while(n>1){
-    int n2 = n/2;
+  if(n==0)
+    return 1;
+  while(exp>1){
+    int n2 = exp/2;
     if (n2*2==n){
-       seed = (q*q)%md;
-       q=seed;
-       n = n2;
+      seed = (q*q)%md;
+      q=seed;
+      exp = n2;
     }else{
-       seed = (r*q)%md;
-       r=seed;
-       n = n-1;
+      seed = (r*q)%md;
+      r=seed;
+      exp = exp -1;
     }
   }
   seed = (r*q)%md;
   return seed;
 }
 
-#include "DGraph.h"
 static DGraph *buildSH(const char cls){
-/*
-  Nodes of the graph must be topologically sorted
-  to avoid MPI deadlock.
-*/
+/* Nodes of the graph must be topologically sorted to avoid MPI deadlock. */
   DGraph *dg;
-  int numSources=num_sources; /* must be power of 2 */
-  int numOfLayers=0,tmpS=numSources>>1;
+  unsigned int numSources=num_sources; /* must be power of 2 */
+  unsigned int numOfLayers=0;
+  unsigned int tmpS=numSources>>1;
   int firstLayerNode=0;
   DGArc *ar=NULL;
   DGNode *nd=NULL;
-  int mask=0x0,ndid=0,ndoff=0;
-  int i=0,j=0;
+  unsigned int mask=0x0;
+  int ndid=0,ndoff=0;
+  unsigned int i=0;
+  unsigned int j=0;
   char nm[BLOCK_SIZE];
-  
-  sprintf(nm,"DT_SH.%c",cls);
+
+  snprintf(nm,BLOCK_SIZE,"DT_SH.%c",cls);
   dg=newDGraph(nm);
 
   while(tmpS>1){
-    numOfLayers++;
-    tmpS>>=1;
+  numOfLayers++;
+  tmpS>>=1;
   }
   for(i=0;i<numSources;i++){
-    sprintf(nm,"Source.%d",i);
-    nd=newNode(nm);
-    AttachNode(dg,nd);
+  snprintf(nm,BLOCK_SIZE,"Source.%d",i);
+  nd=newNode(nm);
+  AttachNode(dg,nd);
   }
   for(j=0;j<numOfLayers;j++){
     mask=0x00000001<<j;
     for(i=0;i<numSources;i++){
-      sprintf(nm,"Comparator.%d",(i+j*firstLayerNode));
+      snprintf(nm,BLOCK_SIZE,"Comparator.%d",(i+j*firstLayerNode));
       nd=newNode(nm);
       AttachNode(dg,nd);
       ndoff=i&(~mask);
       ndid=firstLayerNode+ndoff;
-      ar=newArc(dg->node[ndid],nd);     
+      ar=newArc(dg->node[ndid],nd);
       AttachArc(dg,ar);
       ndoff+=mask;
       ndid=firstLayerNode+ndoff;
-      ar=newArc(dg->node[ndid],nd);     
+      ar=newArc(dg->node[ndid],nd);
       AttachArc(dg,ar);
     }
     firstLayerNode+=numSources;
   }
   mask=0x00000001<<numOfLayers;
   for(i=0;i<numSources;i++){
-    sprintf(nm,"Sink.%d",i);
+    snprintf(nm,BLOCK_SIZE,"Sink.%d",i);
     nd=newNode(nm);
     AttachNode(dg,nd);
     ndoff=i&(~mask);
     ndid=firstLayerNode+ndoff;
-    ar=newArc(dg->node[ndid],nd);     
+    ar=newArc(dg->node[ndid],nd);
     AttachArc(dg,ar);
     ndoff+=mask;
     ndid=firstLayerNode+ndoff;
-    ar=newArc(dg->node[ndid],nd);     
+    ar=newArc(dg->node[ndid],nd);
     AttachArc(dg,ar);
   }
-return dg;
+  return dg;
 }
+
 static DGraph *buildWH(const char cls){
 /*  Nodes of the graph must be topologically sorted to avoid MPI deadlock. */
   int i=0,j=0;
@@ -249,11 +231,11 @@ static DGraph *buildWH(const char cls){
   DGArc *ar=NULL;
   char nm[BLOCK_SIZE];
 
-  sprintf(nm,"DT_WH.%c",cls);
+  snprintf(nm,BLOCK_SIZE,"DT_WH.%c",cls);
   dg=newDGraph(nm);
 
   for(i=0;i<numSources;i++){
-    sprintf(nm,"Sink.%d",i);
+    snprintf(nm,BLOCK_SIZE,"Sink.%d",i);
     nd=newNode(nm);
     AttachNode(dg,nd);
   }
@@ -261,15 +243,17 @@ static DGraph *buildWH(const char cls){
   numPrevLayerNodes=numLayerNodes;
   while(numLayerNodes>maxInDeg){
     numLayerNodes=numLayerNodes/maxInDeg;
-    if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
+    if(numLayerNodes*maxInDeg<numPrevLayerNodes)
+      numLayerNodes++;
     for(i=0;i<numLayerNodes;i++){
-      sprintf(nm,"Comparator.%d",totComparators);
+      snprintf(nm,BLOCK_SIZE,"Comparator.%d",totComparators);
       totComparators++;
       nd=newNode(nm);
       id=AttachNode(dg,nd);
       for(j=0;j<maxInDeg;j++){
         sid=i*maxInDeg+j;
-  if(sid>=numPrevLayerNodes) break;
+        if(sid>=numPrevLayerNodes)
+          break;
         snd=dg->node[firstLayerNode+sid];
         ar=newArc(dg->node[id],snd);
         AttachArc(dg,ar);
@@ -295,6 +279,7 @@ static DGraph *buildWH(const char cls){
   }
 return dg;
 }
+
 static DGraph *buildBH(const char cls){
 /* Nodes of the graph must be topologically sorted to avoid MPI deadlock.*/
   int i=0,j=0;
@@ -308,25 +293,27 @@ static DGraph *buildBH(const char cls){
   int id=0, sid=0;
   char nm[BLOCK_SIZE];
 
-  sprintf(nm,"DT_BH.%c",cls);
+  snprintf(nm,BLOCK_SIZE,"DT_BH.%c",cls);
   dg=newDGraph(nm);
 
   for(i=0;i<numSources;i++){
-    sprintf(nm,"Source.%d",i);
+    snprintf(nm,BLOCK_SIZE,"Source.%d",i);
     nd=newNode(nm);
     AttachNode(dg,nd);
   }
   while(numLayerNodes>maxInDeg){
     numLayerNodes=numLayerNodes/maxInDeg;
-    if(numLayerNodes*maxInDeg<numPrevLayerNodes)numLayerNodes++;
+    if(numLayerNodes*maxInDeg<numPrevLayerNodes)
+      numLayerNodes++;
     for(i=0;i<numLayerNodes;i++){
-      sprintf(nm,"Comparator.%d",totComparators);
+      snprintf(nm,BLOCK_SIZE,"Comparator.%d",totComparators);
       totComparators++;
       nd=newNode(nm);
       id=AttachNode(dg,nd);
       for(j=0;j<maxInDeg;j++){
         sid=i*maxInDeg+j;
-  if(sid>=numPrevLayerNodes) break;
+        if(sid>=numPrevLayerNodes)
+          break;
         snd=dg->node[firstLayerNode+sid];
         ar=newArc(snd,dg->node[id]);
         AttachArc(dg,ar);
@@ -342,7 +329,7 @@ static DGraph *buildBH(const char cls){
     ar=newArc(nd,sink);
     AttachArc(dg,ar);
   }
-return dg;
+  return dg;
 }
 
 typedef struct{
@@ -357,17 +344,9 @@ static Arr *newArr(int len){
   return arr;
 }
 
-static void arrShow(Arr* a){
-  if(!a) fprintf(stderr,"-- NULL array\n");
-  else{
-    fprintf(stderr,"-- length=%d\n",a->len);
-  }
-}
-
 static double CheckVal(Arr *feat){
   double csum=0.0;
-  int i=0;
-  for(i=0;i<feat->len;i++){
+  for(int i=0;i<feat->len;i++){
     csum+=feat->val[i]*feat->val[i]/feat->len; /* The truncation does not work since result will be 0 for large len  */
   }
   return csum;
@@ -399,16 +378,13 @@ static Arr* RandomFeatures(char *bmname,int fdim,int id){
   int nxg=2,nyg=2,nzg=2,nfg=5;
   int nx=421,ny=419,nz=1427,nf=3527;
   long long int expon=(len*(id+1))%3141592;
-  int seedx=ipowMod(nxg,expon,nx),
-      seedy=ipowMod(nyg,expon,ny),
-      seedz=ipowMod(nzg,expon,nz),
-      seedf=ipowMod(nfg,expon,nf);
-  int i=0;
+  int seedx=ipowMod(nxg,expon,nx), seedy=ipowMod(nyg,expon,ny), seedz=ipowMod(nzg,expon,nz),seedf=ipowMod(nfg,expon,nf);
+
   if(timer_on){
     timer_clear(id+1);
     timer_start(id+1);
   }
-  for(i=0;i<len;i+=fdim){
+  for(int i=0;i<len;i+=fdim){
     seedx=(seedx*nxg)%nx;
     seedy=(seedy*nyg)%ny;
     seedz=(seedz*nzg)%nz;
@@ -419,32 +395,32 @@ static Arr* RandomFeatures(char *bmname,int fdim,int id){
     feat->val[i+3]=seedf;
   }
   if(timer_on){
-    timer_stop(id+1);
-    fprintf(stderr,"** RandomFeatures time in node %d = %f\n",id,timer_read(id+1));
+  timer_stop(id+1);
+  fprintf(stderr,"** RandomFeatures time in node %d = %f\n",id,timer_read(id+1));
   }
   return feat;
 }
 
 static void Resample(Arr *a,int blen){
-    long long int i=0,j=0,jlo=0,jhi=0;
-    double avval=0.0;
-    double *nval=(double *)malloc(blen*sizeof(double));
-    //double *nval=(double *)SMPI_SHARED_MALLOC(blen*sizeof(double));
-    for(i=0;i<blen;i++) nval[i]=0.0;
-    for(i=1;i<a->len-1;i++){
-      jlo=(int)(0.5*(2*i-1)*(blen/a->len)); 
-      jhi=(int)(0.5*(2*i+1)*(blen/a->len));
-
-      avval=a->val[i]/(jhi-jlo+1);
-      for(j=jlo;j<=jhi;j++){
-        nval[j]+=avval;
-      }
+  long long int i=0,j=0,jlo=0,jhi=0;
+  double avval=0.0;
+  double *nval=(double *)malloc(blen*sizeof(double));
+  //double *nval=(double *)SMPI_SHARED_MALLOC(blen*sizeof(double));
+  for(i=0;i<blen;i++) nval[i]=0.0;
+  for(i=1;i<a->len-1;i++){
+    jlo=(int)(0.5*(2*i-1)*(blen/a->len));
+    jhi=(int)(0.5*(2*i+1)*(blen/a->len));
+
+    avval=a->val[i]/(jhi-jlo+1);
+    for(j=jlo;j<=jhi;j++){
+    nval[j]+=avval;
     }
-    nval[0]=a->val[0];
-    nval[blen-1]=a->val[a->len-1];
-    free(a->val); //SMPI_SHARED_FREE(a->val);
-    a->val=nval;
-    a->len=blen;
+  }
+  nval[0]=a->val[0];
+  nval[blen-1]=a->val[a->len-1];
+  free(a->val); //SMPI_SHARED_FREE(a->val);
+  a->val=nval;
+  a->len=blen;
 }
 
 #define fielddim 4
@@ -461,20 +437,14 @@ static Arr* WindowFilter(Arr *a, Arr* b,int w){
   if(a->len<b->len) Resample(a,b->len);
   if(a->len>b->len) Resample(b,a->len);
   for(i=fielddim;i<a->len-fielddim;i+=fielddim){
-    rms0=(a->val[i]-b->val[i])*(a->val[i]-b->val[i])
-  +(a->val[i+1]-b->val[i+1])*(a->val[i+1]-b->val[i+1])
-  +(a->val[i+2]-b->val[i+2])*(a->val[i+2]-b->val[i+2])
-  +(a->val[i+3]-b->val[i+3])*(a->val[i+3]-b->val[i+3]);
+    rms0=(a->val[i]-b->val[i])*(a->val[i]-b->val[i]) +(a->val[i+1]-b->val[i+1])*(a->val[i+1]-b->val[i+1])
+          +(a->val[i+2]-b->val[i+2])*(a->val[i+2]-b->val[i+2]) +(a->val[i+3]-b->val[i+3])*(a->val[i+3]-b->val[i+3]);
     j=i+fielddim;
-    rms1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
-      +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
-      +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
-      +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
+    rms1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j]) +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
+          +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2]) +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
     j=i-fielddim;
-    rmsm1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j])
-   +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
-   +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2])
-   +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
+    rmsm1=(a->val[j]-b->val[j])*(a->val[j]-b->val[j]) +(a->val[j+1]-b->val[j+1])*(a->val[j+1]-b->val[j+1])
+           +(a->val[j+2]-b->val[j+2])*(a->val[j+2]-b->val[j+2]) +(a->val[j+3]-b->val[j+3])*(a->val[j+3]-b->val[j+3]);
     k=0;
     if(rms1<rms0){
       k=1;
@@ -486,20 +456,20 @@ static Arr* WindowFilter(Arr *a, Arr* b,int w){
       a->val[i]=weight*b->val[i];
       a->val[i+1]=weight*b->val[i+1];
       a->val[i+2]=weight*b->val[i+2];
-      a->val[i+3]=weight*b->val[i+3];  
+      a->val[i+3]=weight*b->val[i+3];
     }else if(k==1){
       j=i+fielddim;
       a->val[i]=weight*b->val[j];
       a->val[i+1]=weight*b->val[j+1];
       a->val[i+2]=weight*b->val[j+2];
-      a->val[i+3]=weight*b->val[j+3];  
+      a->val[i+3]=weight*b->val[j+3];
     }else { /*if(k==-1)*/
       j=i-fielddim;
       a->val[i]=weight*b->val[j];
       a->val[i+1]=weight*b->val[j+1];
       a->val[i+2]=weight*b->val[j+2];
-      a->val[i+3]=weight*b->val[j+3];  
-    }     
+      a->val[i+3]=weight*b->val[j+3];
+    }
   }
   if(timer_on){
     timer_stop(w);
@@ -512,21 +482,24 @@ static int SendResults(DGraph *dg,DGNode *nd,Arr *feat){
   int i=0,tag=0;
   DGArc *ar=NULL;
   DGNode *head=NULL;
-  if(!feat) return 0;
+  if(feat == 0)
+    return 0;
   TRACE_smpi_set_category ("SendResults");
   for(i=0;i<nd->outDegree;i++){
     ar=nd->outArc[i];
-    if(ar->tail!=nd) continue;
-    head=ar->head;
-    tag=ar->id;
-    if(head->address!=nd->address){
-      MPI_Send(&feat->len,1,MPI_INT,head->address,tag,MPI_COMM_WORLD);
-      MPI_Send(feat->val,feat->len,MPI_DOUBLE,head->address,tag,MPI_COMM_WORLD);
+    if(ar->tail ==nd){
+      head=ar->head;
+      tag=ar->id;
+      if(head->address!=nd->address){
+        MPI_Send(&feat->len,1,MPI_INT,head->address,tag,MPI_COMM_WORLD);
+        MPI_Send(feat->val,feat->len,MPI_DOUBLE,head->address,tag,MPI_COMM_WORLD);
+      }
     }
   }
   TRACE_smpi_set_category (NULL);
   return 1;
 }
+
 static Arr* CombineStreams(DGraph *dg,DGNode *nd){
   Arr *resfeat=newArr(num_samples*fielddim);
   int i=0,len=0,tag=0;
@@ -538,25 +511,27 @@ static Arr* CombineStreams(DGraph *dg,DGNode *nd){
   if(nd->inDegree==0) return NULL;
   for(i=0;i<nd->inDegree;i++){
     ar=nd->inArc[i];
-    if(ar->head!=nd) continue;
-    tail=ar->tail;
-    if(tail->address!=nd->address){
-      len=0;
-      tag=ar->id;
-      MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
-      feat=newArr(len);
-      MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
-      resfeat=WindowFilter(resfeat,feat,nd->id);
-      free(feat);//SMPI_SHARED_FREE(feat);
-    }else{
-      featp=(Arr *)tail->feat;
-      feat=newArr(featp->len);
-      memcpy(feat->val,featp->val,featp->len*sizeof(double));
-      resfeat=WindowFilter(resfeat,feat,nd->id);  
-      free(feat);//SMPI_SHARED_FREE(feat);
+    if(ar->head == nd){
+      tail=ar->tail;
+      if(tail->address!=nd->address){
+        len=0;
+        tag=ar->id;
+        MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
+        feat=newArr(len);
+        MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
+        resfeat=WindowFilter(resfeat,feat,nd->id);
+        free(feat);//SMPI_SHARED_FREE(feat);
+      }else{
+        featp=(Arr *)tail->feat;
+        feat=newArr(featp->len);
+        memcpy(feat->val,featp->val,featp->len*sizeof(double));
+        resfeat=WindowFilter(resfeat,feat,nd->id);
+        free(feat);//SMPI_SHARED_FREE(feat);
+      }
     }
   }
-  for(i=0;i<resfeat->len;i++) resfeat->val[i]=((int)resfeat->val[i])/nd->inDegree;
+  for(i=0;i<resfeat->len;i++)
+    resfeat->val[i]=((int)resfeat->val[i])/nd->inDegree;
   nd->feat=resfeat;
   return nd->feat;
 }
@@ -596,10 +571,10 @@ static double ReduceStreams(DGraph *dg,DGNode *nd){
       MPI_Recv(&len,1,MPI_INT,tail->address,tag,MPI_COMM_WORLD,&status);
       feat=newArr(len);
       MPI_Recv(feat->val,feat->len,MPI_DOUBLE,tail->address,tag,MPI_COMM_WORLD,&status);
-      csum+=Reduce(feat,(nd->id+1));  
+      csum+=Reduce(feat,(nd->id+1));
       free(feat);//SMPI_SHARED_FREE(feat);
     }else{
-      csum+=Reduce(tail->feat,(nd->id+1));  
+      csum+=Reduce(tail->feat,(nd->id+1));
     }
   }
   if(nd->inDegree>0)csum=(((long long int)csum)/nd->inDegree);
@@ -621,7 +596,7 @@ static int ProcessNodes(DGraph *dg,int me){
     nd=dg->node[i];
     if(nd->address!=me) continue;
     if(strstr(nd->name,"Source")){
-      nd->feat=RandomFeatures(dg->name,fielddim,nd->id); 
+      nd->feat=RandomFeatures(dg->name,fielddim,nd->id);
       SendResults(dg,nd,nd->feat);
     }else if(strstr(nd->name,"Sink")){
       chksum=ReduceStreams(dg,nd);
@@ -641,19 +616,19 @@ static int ProcessNodes(DGraph *dg,int me){
     for(i=0;i<dg->numNodes;i++){
       nd=dg->node[i];
       if(!strstr(nd->name,"Sink")) continue;
-       tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
+      tag=dg->numArcs+nd->id; /* make these to avoid clash with arc tags */
       MPI_Recv(&rchksum,1,MPI_DOUBLE,nd->address,tag,MPI_COMM_WORLD,&status);
       chksum+=rchksum;
     }
     verified=verify(dg->name,chksum);
   }
-return verified;
+  return verified;
 }
 
 int main(int argc,char **argv ){
   int my_rank,comm_size;
   int i;
-  DGraph *dg=NULL;
+  DGraph *dg = NULL;
   int verified=0, featnum=0;
   double bytes_sent=2.0,tot_time=0.0;
 
@@ -665,7 +640,7 @@ int main(int argc,char **argv ){
   get_info(argc, argv, &nprocs, &class);
   check_info(DT, nprocs, class);
 
-  if      (class == 'S') { num_samples=1728; deviation=128; num_sources=4; }
+  if    (class == 'S') { num_samples=1728; deviation=128; num_sources=4; }
   else if (class == 'W') { num_samples=1728*8; deviation=128*2; num_sources=4*2; }
   else if (class == 'A') { num_samples=1728*64; deviation=128*4; num_sources=4*4; }
   else if (class == 'B') { num_samples=1728*512; deviation=128*8; num_sources=4*8; }
@@ -676,63 +651,63 @@ int main(int argc,char **argv ){
     exit(1);
   }
 
-
-     if(argc!=2|| (  strncmp(argv[1],"BH",2)!=0 && strncmp(argv[1],"WH",2)!=0 &&strncmp(argv[1],"SH",2)!=0)){
-      if(my_rank==0){
-        fprintf(stderr,"** Usage: mpirun -np N ../bin/dt.S GraphName\n");
-        fprintf(stderr,"** Where \n   - N is integer number of MPI processes\n");
-        fprintf(stderr,"   - S is the class S, W, or A \n");
-        fprintf(stderr,"   - GraphName is the communication graph name BH, WH, or SH.\n");
-        fprintf(stderr,"   - the number of MPI processes N should not be be less than \n");
-        fprintf(stderr,"     the number of nodes in the graph\n");
-      }
-      MPI_Finalize();
-      exit(0);
-    } 
-   if(strncmp(argv[1],"BH",2)==0){
-      dg=buildBH(class);
-    }else if(strncmp(argv[1],"WH",2)==0){
-      dg=buildWH(class);
-    }else if(strncmp(argv[1],"SH",2)==0){
-      dg=buildSH(class);
+  if(argc!=4 || (strncmp(argv[3],"BH",2)!=0 && strncmp(argv[3],"WH",2)!=0 && strncmp(argv[3],"SH",2)!=0)){
+    if(my_rank==0){
+    fprintf(stderr,"** Usage: mpirun -np N ../bin/dt.S GraphName\n");
+    fprintf(stderr,"** Where \n   - N is integer number of MPI processes\n");
+    fprintf(stderr,"   - S is the class S, W, or A \n");
+    fprintf(stderr,"   - GraphName is the communication graph name BH, WH, or SH.\n");
+    fprintf(stderr,"   - the number of MPI processes N should not be be less than \n");
+    fprintf(stderr,"   the number of nodes in the graph\n");
     }
+    MPI_Finalize();
+    exit(0);
+  }
 
-    if(timer_on&&dg->numNodes+1>timers_tot){
-      timer_on=0;
-      if(my_rank==0)
-        fprintf(stderr,"Not enough timers. Node timeing is off. \n");
-    }
-    if(dg->numNodes>comm_size){
-      if(my_rank==0){
-        fprintf(stderr,"**  The number of MPI processes should not be less than \n");
-        fprintf(stderr,"**  the number of nodes in the graph\n");
-        fprintf(stderr,"**  Number of MPI processes = %d\n",comm_size);
-        fprintf(stderr,"**  Number nodes in the graph = %d\n",dg->numNodes);
-      }
-      MPI_Finalize();
-      exit(0);
-    }
-    for(i=0;i<dg->numNodes;i++){ 
-      dg->node[i]->address=i;
-    }
-    if( my_rank == 0 ){
-      printf( "\n\n NAS Parallel Benchmarks 3.3 -- DT Benchmark\n\n" );
-      graphShow(dg,0);
-      timer_clear(0);
-      timer_start(0);
-    }
-    verified=ProcessNodes(dg,my_rank);
-    TRACE_smpi_set_category ("end");
+  if(strncmp(argv[3],"BH",2)==0){
+    dg=buildBH(class);
+  }else if(strncmp(argv[3],"WH",2)==0){
+    dg=buildWH(class);
+  }else if(strncmp(argv[3],"SH",2)==0){
+    dg=buildSH(class);
+  }
 
-    featnum=num_samples*fielddim;
-    bytes_sent=featnum*dg->numArcs;
-    bytes_sent/=1048576;
+  if(timer_on != 0 && dg->numNodes+1>timers_tot){
+    timer_on=0;
+    if(my_rank==0)
+    fprintf(stderr,"Not enough timers. Node timeing is off. \n");
+  }
+  if(dg->numNodes>comm_size){
     if(my_rank==0){
-      timer_stop(0);
-      tot_time=timer_read(0);
-      c_print_results( dg->name, class, featnum, 0, 0, dg->numNodes, 0, comm_size, tot_time, bytes_sent/tot_time,
-                 "bytes transmitted", verified);
-    }          
+    fprintf(stderr,"**  The number of MPI processes should not be less than \n");
+    fprintf(stderr,"**  the number of nodes in the graph\n");
+    fprintf(stderr,"**  Number of MPI processes = %d\n",comm_size);
+    fprintf(stderr,"**  Number nodes in the graph = %d\n",dg->numNodes);
+    }
     MPI_Finalize();
+    exit(0);
+  }
+  for(i=0; i<dg->numNodes; i++){
+    dg->node[i]->address=i;
+  }
+  if( my_rank == 0 ){
+    printf( "\n\n NAS Parallel Benchmarks 3.3 -- DT Benchmark\n\n" );
+    graphShow(dg,0);
+    timer_clear(0);
+    timer_start(0);
+  }
+  verified=ProcessNodes(dg,my_rank);
+  TRACE_smpi_set_category ("end");
+
+  featnum=num_samples*fielddim;
+  bytes_sent=featnum*dg->numArcs;
+  bytes_sent/=1048576;
+  if(my_rank==0){
+    timer_stop(0);
+    tot_time=timer_read(0);
+    c_print_results( dg->name, class, featnum, 0, 0, dg->numNodes, 0, comm_size, tot_time, bytes_sent/tot_time,
+         "bytes transmitted", verified);
+  }
+  MPI_Finalize();
   return 1;
 }