From 594e990878254062bcbb022525407d24d2591653 Mon Sep 17 00:00:00 2001
From: Frederic Suter <frederic.suter@cc.in2p3.fr>
Date: Mon, 1 Feb 2016 12:55:13 +0100
Subject: [PATCH] further cleaning in NAS  + fortran files are not used (and
 not smpified)  + Don't need a MPI_dummy implementation, we have SMPI  + have
 DT and DT-folding in the same dir to avoid replication of    DGraph.*

---
 examples/smpi/NAS/DT-folding/DGraph.c         | 184 ----------
 examples/smpi/NAS/DT-folding/DGraph.h         |  43 ---
 examples/smpi/NAS/DT-folding/Makefile         |  26 --
 examples/smpi/NAS/DT-folding/README           |  22 --
 examples/smpi/NAS/DT/Makefile                 |  12 +-
 .../NAS/{DT-folding/dt.c => DT/dt-folding.c}  |   0
 examples/smpi/NAS/EP/Makefile                 |   6 -
 examples/smpi/NAS/EP/ep.f                     | 316 ------------------
 examples/smpi/NAS/MPI_dummy/Makefile          |  38 ---
 examples/smpi/NAS/MPI_dummy/README            |  52 ---
 examples/smpi/NAS/MPI_dummy/mpi.h             | 112 -------
 examples/smpi/NAS/MPI_dummy/mpi_dummy.c       | 265 ---------------
 examples/smpi/NAS/MPI_dummy/mpi_dummy.f       | 309 -----------------
 examples/smpi/NAS/MPI_dummy/mpif.h            |  27 --
 examples/smpi/NAS/MPI_dummy/test.f            |  10 -
 examples/smpi/NAS/MPI_dummy/wtime.c           |  13 -
 examples/smpi/NAS/MPI_dummy/wtime.f           |  12 -
 examples/smpi/NAS/MPI_dummy/wtime.h           |  12 -
 examples/smpi/NAS/MPI_dummy/wtime_sgi64.c     |  74 ----
 examples/smpi/NAS/Makefile                    |  25 +-
 examples/smpi/NAS/common/print_results.f      | 115 -------
 examples/smpi/NAS/common/randdp.f             | 137 --------
 examples/smpi/NAS/common/randdpvec.f          | 186 -----------
 examples/smpi/NAS/common/randi8.f             |  79 -----
 examples/smpi/NAS/common/randi8_safe.f        |  64 ----
 examples/smpi/NAS/common/timers.f             |  78 -----
 examples/smpi/NAS/config/make.dummy           |   7 -
 27 files changed, 11 insertions(+), 2213 deletions(-)
 delete mode 100644 examples/smpi/NAS/DT-folding/DGraph.c
 delete mode 100644 examples/smpi/NAS/DT-folding/DGraph.h
 delete mode 100644 examples/smpi/NAS/DT-folding/Makefile
 delete mode 100644 examples/smpi/NAS/DT-folding/README
 rename examples/smpi/NAS/{DT-folding/dt.c => DT/dt-folding.c} (100%)
 delete mode 100644 examples/smpi/NAS/EP/ep.f
 delete mode 100644 examples/smpi/NAS/MPI_dummy/Makefile
 delete mode 100644 examples/smpi/NAS/MPI_dummy/README
 delete mode 100644 examples/smpi/NAS/MPI_dummy/mpi.h
 delete mode 100644 examples/smpi/NAS/MPI_dummy/mpi_dummy.c
 delete mode 100644 examples/smpi/NAS/MPI_dummy/mpi_dummy.f
 delete mode 100644 examples/smpi/NAS/MPI_dummy/mpif.h
 delete mode 100644 examples/smpi/NAS/MPI_dummy/test.f
 delete mode 100644 examples/smpi/NAS/MPI_dummy/wtime.c
 delete mode 100644 examples/smpi/NAS/MPI_dummy/wtime.f
 delete mode 100644 examples/smpi/NAS/MPI_dummy/wtime.h
 delete mode 100644 examples/smpi/NAS/MPI_dummy/wtime_sgi64.c
 delete mode 100644 examples/smpi/NAS/common/print_results.f
 delete mode 100644 examples/smpi/NAS/common/randdp.f
 delete mode 100644 examples/smpi/NAS/common/randdpvec.f
 delete mode 100644 examples/smpi/NAS/common/randi8.f
 delete mode 100644 examples/smpi/NAS/common/randi8_safe.f
 delete mode 100644 examples/smpi/NAS/common/timers.f
 delete mode 100644 examples/smpi/NAS/config/make.dummy

diff --git a/examples/smpi/NAS/DT-folding/DGraph.c b/examples/smpi/NAS/DT-folding/DGraph.c
deleted file mode 100644
index f573786db8..0000000000
--- a/examples/smpi/NAS/DT-folding/DGraph.c
+++ /dev/null
@@ -1,184 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "DGraph.h"
-
-DGArc *newArc(DGNode *tl,DGNode *hd){
-  DGArc *ar=(DGArc *)malloc(sizeof(DGArc));
-  ar->tail=tl;
-  ar->head=hd;
-  return ar;
-}
-void arcShow(DGArc *ar){
-  DGNode *tl=(DGNode *)ar->tail,
-         *hd=(DGNode *)ar->head;
-  fprintf(stderr,"%d. |%s ->%s\n",ar->id,tl->name,hd->name);
-}
-
-DGNode *newNode(char *nm){
-  DGNode *nd=(DGNode *)malloc(sizeof(DGNode));
-  nd->attribute=0;
-  nd->color=0;
-  nd->inDegree=0;
-  nd->outDegree=0;
-  nd->maxInDegree=SMALL_BLOCK_SIZE;
-  nd->maxOutDegree=SMALL_BLOCK_SIZE;
-  nd->inArc=(DGArc **)malloc(nd->maxInDegree*sizeof(DGArc*));
-  nd->outArc=(DGArc **)malloc(nd->maxOutDegree*sizeof(DGArc*));
-  nd->name=strdup(nm);
-  nd->feat=NULL;
-  return nd;
-}
-void nodeShow(DGNode* nd){
-  fprintf( stderr,"%3d.%s: (%d,%d)\n",
-             nd->id,nd->name,nd->inDegree,nd->outDegree);
-/*
-  if(nd->verified==1) fprintf(stderr,"%ld.%s\t: usable.",nd->id,nd->name);
-  else if(nd->verified==0)  fprintf(stderr,"%ld.%s\t: unusable.",nd->id,nd->name);
-  else  fprintf(stderr,"%ld.%s\t: notverified.",nd->id,nd->name);   
-*/
-}
-
-DGraph* newDGraph(char* nm){
-  DGraph *dg=(DGraph *)malloc(sizeof(DGraph));
-  dg->numNodes=0;
-  dg->numArcs=0;
-  dg->maxNodes=BLOCK_SIZE;
-  dg->maxArcs=BLOCK_SIZE;
-  dg->node=(DGNode **)malloc(dg->maxNodes*sizeof(DGNode*));
-  dg->arc=(DGArc **)malloc(dg->maxArcs*sizeof(DGArc*));
-  dg->name=strdup(nm);
-  return dg;
-}
-int AttachNode(DGraph* dg, DGNode* nd) {
-  int i=0,j,len=0;
-  DGNode **nds =NULL, *tmpnd=NULL;
-  DGArc **ar=NULL;
-
-  if (dg->numNodes == dg->maxNodes-1 ) {
-    dg->maxNodes += BLOCK_SIZE;
-          nds =(DGNode **) calloc(dg->maxNodes,sizeof(DGNode*));
-    memcpy(nds,dg->node,(dg->maxNodes-BLOCK_SIZE)*sizeof(DGNode*));
-    free(dg->node);
-    dg->node=nds;
-  }
-
-        len = strlen( nd->name);
-  for (i = 0; i < dg->numNodes; i++) {
-    tmpnd =dg->node[ i];
-    ar=NULL;
-    if ( strlen( tmpnd->name) != len ) continue;
-    if ( strncmp( nd->name, tmpnd->name, len) ) continue;
-    if ( nd->inDegree > 0 ) {
-      tmpnd->maxInDegree += nd->maxInDegree;
-            ar =(DGArc **) calloc(tmpnd->maxInDegree,sizeof(DGArc*));
-      memcpy(ar,tmpnd->inArc,(tmpnd->inDegree)*sizeof(DGArc*));
-      free(tmpnd->inArc);
-      tmpnd->inArc=ar;
-      for (j = 0; j < nd->inDegree; j++ ) {
-        nd->inArc[ j]->head = tmpnd;
-      }
-      memcpy( &(tmpnd->inArc[ tmpnd->inDegree]), nd->inArc, nd->inDegree*sizeof( DGArc *));
-      tmpnd->inDegree += nd->inDegree;
-    }   
-    if ( nd->outDegree > 0 ) {
-      tmpnd->maxOutDegree += nd->maxOutDegree;
-            ar =(DGArc **) calloc(tmpnd->maxOutDegree,sizeof(DGArc*));
-      memcpy(ar,tmpnd->outArc,(tmpnd->outDegree)*sizeof(DGArc*));
-      free(tmpnd->outArc);
-      tmpnd->outArc=ar;
-      for (j = 0; j < nd->outDegree; j++ ) {
-        nd->outArc[ j]->tail = tmpnd;
-      }      
-      memcpy( &(tmpnd->outArc[tmpnd->outDegree]),nd->outArc,nd->outDegree*sizeof( DGArc *));
-      tmpnd->outDegree += nd->outDegree;
-    } 
-    free(nd); 
-    return i;
-  }
-  nd->id = dg->numNodes;
-  dg->node[dg->numNodes] = nd;
-  dg->numNodes++;
-return nd->id;
-}
-int AttachArc(DGraph *dg,DGArc* nar){
-int  arcId = -1;
-int i=0,newNumber=0;
-DGNode  *head = nar->head,
-  *tail = nar->tail; 
-DGArc **ars=NULL,*probe=NULL;
-/*fprintf(stderr,"AttachArc %ld\n",dg->numArcs); */
-  if ( !tail || !head ) return arcId;
-  if ( dg->numArcs == dg->maxArcs-1 ) {
-    dg->maxArcs += BLOCK_SIZE;
-          ars =(DGArc **) calloc(dg->maxArcs,sizeof(DGArc*));
-    memcpy(ars,dg->arc,(dg->maxArcs-BLOCK_SIZE)*sizeof(DGArc*));
-    free(dg->arc);
-    dg->arc=ars;
-  }
-  for(i = 0; i < tail->outDegree; i++ ) { /* parallel arc */
-    probe = tail->outArc[ i];
-    if(probe->head == head
-       &&
-       probe->length == nar->length
-            ){
-            free(nar);
-      return probe->id;   
-    }
-  }
-  
-  nar->id = dg->numArcs;
-  arcId=dg->numArcs;
-  dg->arc[dg->numArcs] = nar;
-  dg->numArcs++;
-  
-  head->inArc[ head->inDegree] = nar;
-  head->inDegree++;
-  if ( head->inDegree >= head->maxInDegree ) {
-    newNumber = head->maxInDegree + SMALL_BLOCK_SIZE;
-          ars =(DGArc **) calloc(newNumber,sizeof(DGArc*));
-    memcpy(ars,head->inArc,(head->inDegree)*sizeof(DGArc*));
-    free(head->inArc);
-    head->inArc=ars;
-    head->maxInDegree = newNumber;
-  }
-  tail->outArc[ tail->outDegree] = nar;
-  tail->outDegree++;
-  if(tail->outDegree >= tail->maxOutDegree ) {
-    newNumber = tail->maxOutDegree + SMALL_BLOCK_SIZE;
-          ars =(DGArc **) calloc(newNumber,sizeof(DGArc*));
-    memcpy(ars,tail->outArc,(tail->outDegree)*sizeof(DGArc*));
-    free(tail->outArc);
-    tail->outArc=ars;
-    tail->maxOutDegree = newNumber;
-  }
-/*fprintf(stderr,"AttachArc: head->in=%d tail->out=%ld\n",head->inDegree,tail->outDegree);*/
-return arcId;
-}
-void graphShow(DGraph *dg,int DetailsLevel){
-  int i=0,j=0;
-  fprintf(stderr,"%d.%s: (%d,%d)\n",dg->id,dg->name,dg->numNodes,dg->numArcs);
-  if ( DetailsLevel < 1) return;
-  for (i = 0; i < dg->numNodes; i++ ) {
-    DGNode *focusNode = dg->node[ i];
-    if(DetailsLevel >= 2) {
-      for (j = 0; j < focusNode->inDegree; j++ ) {
-  fprintf(stderr,"\t ");
-  nodeShow(focusNode->inArc[ j]->tail);
-      }
-    }
-    nodeShow(focusNode);
-    if ( DetailsLevel < 2) continue;
-    for (j = 0; j < focusNode->outDegree; j++ ) {
-      fprintf(stderr, "\t ");
-      nodeShow(focusNode->outArc[ j]->head);
-    }  
-    fprintf(stderr, "---\n");
-  }
-  fprintf(stderr,"----------------------------------------\n");
-  if ( DetailsLevel < 3) return;
-}
-
-
-
diff --git a/examples/smpi/NAS/DT-folding/DGraph.h b/examples/smpi/NAS/DT-folding/DGraph.h
deleted file mode 100644
index f38f898b24..0000000000
--- a/examples/smpi/NAS/DT-folding/DGraph.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef _DGRAPH
-#define _DGRAPH
-
-#define BLOCK_SIZE  128
-#define SMALL_BLOCK_SIZE 32
-
-typedef struct{
-  int id;
-  void *tail,*head;
-  int length,width,attribute,maxWidth;
-}DGArc;
-
-typedef struct{
-  int maxInDegree,maxOutDegree;
-  int inDegree,outDegree;
-  int id;
-  char *name;
-  DGArc **inArc,**outArc;
-  int depth,height,width;
-  int color,attribute,address,verified;
-  void *feat;
-}DGNode;
-
-typedef struct{
-  int maxNodes,maxArcs;
-  int id;
-  char *name;
-  int numNodes,numArcs;
-  DGNode **node;
-  DGArc **arc;
-} DGraph;
-
-DGArc *newArc(DGNode *tl,DGNode *hd);
-void arcShow(DGArc *ar);
-DGNode *newNode(char *nm);
-void nodeShow(DGNode* nd);
-
-DGraph* newDGraph(char *nm);
-int AttachNode(DGraph *dg,DGNode *nd);
-int AttachArc(DGraph *dg,DGArc* nar);
-void graphShow(DGraph *dg,int DetailsLevel);
-
-#endif
diff --git a/examples/smpi/NAS/DT-folding/Makefile b/examples/smpi/NAS/DT-folding/Makefile
deleted file mode 100644
index d1b3702916..0000000000
--- a/examples/smpi/NAS/DT-folding/Makefile
+++ /dev/null
@@ -1,26 +0,0 @@
-SHELL=/bin/sh
-BENCHMARK=dt
-BENCHMARKU=DT
-
-include ../config/make.def
-
-include ../sys/make.common
-#Override PROGRAM
-DTPROGRAM  = $(BINDIR)/$(BENCHMARK)-folding.$(CLASS)
-
-OBJS = dt.o DGraph.o \
-	${COMMON}/c_print_results.o ${COMMON}/c_timers.o ${COMMON}/c_randdp.o
-
-
-${PROGRAM}: config ${OBJS}
-	${CLINK} ${CLINKFLAGS} -o ${DTPROGRAM} ${OBJS} ${CMPI_LIB}
-
-.c.o:
-	${CCOMPILE} $<
-
-dt.o:             dt.c  npbparams.h
-DGraph.o:	DGraph.c DGraph.h
-
-clean:
-	- rm -f *.o *~ mputil*
-	- rm -f dt npbparams.h core
diff --git a/examples/smpi/NAS/DT-folding/README b/examples/smpi/NAS/DT-folding/README
deleted file mode 100644
index 873e3ae6f2..0000000000
--- a/examples/smpi/NAS/DT-folding/README
+++ /dev/null
@@ -1,22 +0,0 @@
-Data Traffic benchmark DT is new in the NPB suite 
-(released as part of NPB3.x-MPI package).
-----------------------------------------------------
-
-DT is written in C and same executable can run on any number of processors,
-provided this number is not less than the number of nodes in the communication
-graph.  DT benchmark takes one argument: BH, WH, or SH. This argument 
-specifies the communication graph Black Hole, White Hole, or SHuffle 
-respectively. The current release contains verification numbers for 
-CLASSES S, W, A, and B only.  Classes C and D are defined, but verification 
-numbers are not provided in this release.
-
-The following table summarizes the number of nodes in the communication
-graph based on CLASS and graph TYPE.
-
-CLASS  N_Source N_Nodes(BH,WH) N_Nodes(SH)
- S      4        5              12
- W      8        11             32
- A      16       21             80
- B      32       43             192
- C      64       85             448
- D      128      171            1024
diff --git a/examples/smpi/NAS/DT/Makefile b/examples/smpi/NAS/DT/Makefile
index 28d9502fe2..f21a8a64e4 100644
--- a/examples/smpi/NAS/DT/Makefile
+++ b/examples/smpi/NAS/DT/Makefile
@@ -1,6 +1,5 @@
 SHELL=/bin/sh
 BENCHMARK=dt
-BENCHMARKU=DT
 
 include ../config/make.def
 
@@ -8,19 +7,24 @@ include ../sys/make.common
 #Override PROGRAM
 DTPROGRAM  = $(BINDIR)/$(BENCHMARK).$(CLASS)
 
-OBJS = dt.o DGraph.o \
+OBJS = dt.o  DGraph.o \
+	${COMMON}/c_print_results.o ${COMMON}/c_timers.o ${COMMON}/c_randdp.o
+
+OBJS-F = dt-folding.o DGraph.o \
 	${COMMON}/c_print_results.o ${COMMON}/c_timers.o ${COMMON}/c_randdp.o
 
 
-${PROGRAM}: config ${OBJS}
+${PROGRAM}: config ${OBJS} ${OBJS-F}
 	${CLINK} ${CLINKFLAGS} -o ${DTPROGRAM} ${OBJS} ${CMPI_LIB}
+	${CLINK} ${CLINKFLAGS} -o ${DTPROGRAM}-folding ${OBJS-F} ${CMPI_LIB}
 
 .c.o:
 	${CCOMPILE} $<
 
 dt.o:             dt.c  npbparams.h
+dt-folding.o:     dt-folding.c  npbparams.h
 DGraph.o:	DGraph.c DGraph.h
 
 clean:
 	- rm -f *.o *~ mputil*
-	- rm -f dt npbparams.h core
+	- rm -f dt dt-folding npbparams.h
diff --git a/examples/smpi/NAS/DT-folding/dt.c b/examples/smpi/NAS/DT/dt-folding.c
similarity index 100%
rename from examples/smpi/NAS/DT-folding/dt.c
rename to examples/smpi/NAS/DT/dt-folding.c
diff --git a/examples/smpi/NAS/EP/Makefile b/examples/smpi/NAS/EP/Makefile
index 2014329288..217f57df69 100644
--- a/examples/smpi/NAS/EP/Makefile
+++ b/examples/smpi/NAS/EP/Makefile
@@ -4,19 +4,13 @@ BENCHMARKU=EP
 
 include ../config/make.def
 
-#OBJS = ep.o ${COMMON}/print_results.o ${COMMON}/${RAND}.o ${COMMON}/timers.o
 OBJS = ep.o randlc.o
 
 include ../sys/make.common
 
 ${PROGRAM}: config ${OBJS}
-#	${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${FMPI_LIB}
 	${CLINK} ${CLINKFLAGS} -o ${PROGRAM} ${OBJS} ${CMPI_LIB}
 
-
-#ep.o:		ep.f  mpinpb.h npbparams.h
-#	${FCOMPILE} ep.f
-
 ep.o:	ep.c randlc.c mpinpb.h npbparams.h
 	${CCOMPILE} ep.c
 
diff --git a/examples/smpi/NAS/EP/ep.f b/examples/smpi/NAS/EP/ep.f
deleted file mode 100644
index ca7cc24620..0000000000
--- a/examples/smpi/NAS/EP/ep.f
+++ /dev/null
@@ -1,316 +0,0 @@
-!-------------------------------------------------------------------------!
-!                                                                         !
-!        N  A  S     P A R A L L E L     B E N C H M A R K S  3.3         !
-!                                                                         !
-!                                   E P                                   !
-!                                                                         !
-!-------------------------------------------------------------------------!
-!                                                                         !
-!    This benchmark is part of the NAS Parallel Benchmark 3.3 suite.      !
-!    It is described in NAS Technical Reports 95-020 and 02-007           !
-!                                                                         !
-!    Permission to use, copy, distribute and modify this software         !
-!    for any purpose with or without fee is hereby granted.  We           !
-!    request, however, that all derived work reference the NAS            !
-!    Parallel Benchmarks 3.3. This software is provided "as is"           !
-!    without express or implied warranty.                                 !
-!                                                                         !
-!    Information on NPB 3.3, including the technical report, the          !
-!    original specifications, source code, results and information        !
-!    on how to submit new results, is available at:                       !
-!                                                                         !
-!           http://www.nas.nasa.gov/Software/NPB/                         !
-!                                                                         !
-!    Send comments or suggestions to  npb@nas.nasa.gov                    !
-!                                                                         !
-!          NAS Parallel Benchmarks Group                                  !
-!          NASA Ames Research Center                                      !
-!          Mail Stop: T27A-1                                              !
-!          Moffett Field, CA   94035-1000                                 !
-!                                                                         !
-!          E-mail:  npb@nas.nasa.gov                                      !
-!          Fax:     (650) 604-3957                                        !
-!                                                                         !
-!-------------------------------------------------------------------------!
-
-
-c---------------------------------------------------------------------
-c
-c Authors: P. O. Frederickson 
-c          D. H. Bailey
-c          A. C. Woo
-c          R. F. Van der Wijngaart
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-      program EMBAR
-c---------------------------------------------------------------------
-C
-c   This is the MPI version of the APP Benchmark 1,
-c   the "embarassingly parallel" benchmark.
-c
-c
-c   M is the Log_2 of the number of complex pairs of uniform (0, 1) random
-c   numbers.  MK is the Log_2 of the size of each batch of uniform random
-c   numbers.  MK can be set for convenience on a given system, since it does
-c   not affect the results.
-
-      implicit none
-
-      include 'npbparams.h'
-      include 'mpinpb.h'
-
-      double precision Mops, epsilon, a, s, t1, t2, t3, t4, x, x1, 
-     >                 x2, q, sx, sy, tm, an, tt, gc, dum(3),
-     >                 timer_read
-      double precision sx_verify_value, sy_verify_value, sx_err, sy_err
-      integer          mk, mm, nn, nk, nq, np, ierr, node, no_nodes, 
-     >                 i, ik, kk, l, k, nit, ierrcode, no_large_nodes,
-     >                 np_add, k_offset, j
-      logical          verified, timers_enabled
-      parameter       (timers_enabled = .false.)
-      external         randlc, timer_read
-      double precision randlc, qq
-      character*15     size
-
-      parameter (mk = 16, mm = m - mk, nn = 2 ** mm,
-     >           nk = 2 ** mk, nq = 10, epsilon=1.d-8,
-     >           a = 1220703125.d0, s = 271828183.d0)
-
-      common/storage/ x(2*nk), q(0:nq-1), qq(10000)
-      data             dum /1.d0, 1.d0, 1.d0/
-
-      call mpi_init(ierr)
-      call mpi_comm_rank(MPI_COMM_WORLD,node,ierr)
-      call mpi_comm_size(MPI_COMM_WORLD,no_nodes,ierr)
-
-      root = 0
-
-      if (.not. convertdouble) then
-         dp_type = MPI_DOUBLE_PRECISION
-      else
-         dp_type = MPI_REAL
-      endif
-
-      if (node.eq.root)  then
-
-c   Because the size of the problem is too large to store in a 32-bit
-c   integer for some classes, we put it into a string (for printing).
-c   Have to strip off the decimal point put in there by the floating
-c   point print statement (internal file)
-
-          write(*, 1000)
-          write(size, '(f15.0)' ) 2.d0**(m+1)
-          j = 15
-          if (size(j:j) .eq. '.') j = j - 1
-          write (*,1001) size(1:j)
-          write(*, 1003) no_nodes
-
- 1000 format(/,' NAS Parallel Benchmarks 3.3 -- EP Benchmark',/)
- 1001     format(' Number of random numbers generated: ', a15)
- 1003     format(' Number of active processes:         ', 2x, i13, /)
-
-      endif
-
-      verified = .false.
-
-c   Compute the number of "batches" of random number pairs generated 
-c   per processor. Adjust if the number of processors does not evenly 
-c   divide the total number
-
-      np = nn / no_nodes
-      no_large_nodes = mod(nn, no_nodes)
-      if (node .lt. no_large_nodes) then
-         np_add = 1
-      else
-         np_add = 0
-      endif
-      np = np + np_add
-
-      if (np .eq. 0) then
-         write (6, 1) no_nodes, nn
- 1       format ('Too many nodes:',2i6)
-         call mpi_abort(MPI_COMM_WORLD,ierrcode,ierr)
-         stop
-      endif
-
-c   Call the random number generator functions and initialize
-c   the x-array to reduce the effects of paging on the timings.
-c   Also, call all mathematical functions that are used. Make
-c   sure these initializations cannot be eliminated as dead code.
-
-      call vranlc(0, dum(1), dum(2), dum(3))
-      dum(1) = randlc(dum(2), dum(3))
-      do 5    i = 1, 2*nk
-         x(i) = -1.d99
- 5    continue
-      Mops = log(sqrt(abs(max(1.d0,1.d0))))
-
-c---------------------------------------------------------------------
-c      Synchronize before placing time stamp
-c---------------------------------------------------------------------
-      call mpi_barrier(MPI_COMM_WORLD, ierr)
-      
-      call timer_clear(1)
-      call timer_clear(2)
-      call timer_clear(3)
-      call timer_start(1)
-
-      t1 = a
-      call vranlc(0, t1, a, x)
-
-c   Compute AN = A ^ (2 * NK) (mod 2^46).
-
-      t1 = a
-
-      do 100 i = 1, mk + 1
-         t2 = randlc(t1, t1)
- 100  continue
-
-      an = t1
-      tt = s
-      gc = 0.d0
-      sx = 0.d0
-      sy = 0.d0
-
-      do 110 i = 0, nq - 1
-         q(i) = 0.d0
- 110  continue
-
-c   Each instance of this loop may be performed independently. We compute
-c   the k offsets separately to take into account the fact that some nodes
-c   have more numbers to generate than others
-
-      if (np_add .eq. 1) then
-         k_offset = node * np -1
-      else
-         k_offset = no_large_nodes*(np+1) + (node-no_large_nodes)*np -1
-      endif
-
-      do 150 k = 1, np
-         kk = k_offset + k 
-         t1 = s
-         t2 = an
-
-c        Find starting seed t1 for this kk.
-
-         do 120 i = 1, 100
-            ik = kk / 2
-            if (2 * ik .ne. kk) t3 = randlc(t1, t2)
-            if (ik .eq. 0) goto 130
-            t3 = randlc(t2, t2)
-            kk = ik
- 120     continue
-
-c        Compute uniform pseudorandom numbers.
- 130     continue
-
-         if (timers_enabled) call timer_start(3)
-         call vranlc(2 * nk, t1, a, x)
-         if (timers_enabled) call timer_stop(3)
-
-c        Compute Gaussian deviates by acceptance-rejection method and 
-c        tally counts in concentric square annuli.  This loop is not 
-c        vectorizable. 
-
-         if (timers_enabled) call timer_start(2)
-
-         do 140 i = 1, nk
-            x1 = 2.d0 * x(2*i-1) - 1.d0
-            x2 = 2.d0 * x(2*i) - 1.d0
-            t1 = x1 ** 2 + x2 ** 2
-            if (t1 .le. 1.d0) then
-               t2   = sqrt(-2.d0 * log(t1) / t1)
-               t3   = (x1 * t2)
-               t4   = (x2 * t2)
-               l    = max(abs(t3), abs(t4))
-               q(l) = q(l) + 1.d0
-               sx   = sx + t3
-               sy   = sy + t4
-            endif
- 140     continue
-
-         if (timers_enabled) call timer_stop(2)
-
- 150  continue
-
-      call mpi_allreduce(sx, x, 1, dp_type,
-     >                   MPI_SUM, MPI_COMM_WORLD, ierr)
-      sx = x(1)
-      call mpi_allreduce(sy, x, 1, dp_type,
-     >                   MPI_SUM, MPI_COMM_WORLD, ierr)
-      sy = x(1)
-      call mpi_allreduce(q, x, nq, dp_type,
-     >                   MPI_SUM, MPI_COMM_WORLD, ierr)
-
-      do i = 1, nq
-         q(i-1) = x(i)
-      enddo
-
-      do 160 i = 0, nq - 1
-        gc = gc + q(i)
- 160  continue
-
-      call timer_stop(1)
-      tm  = timer_read(1)
-
-      call mpi_allreduce(tm, x, 1, dp_type,
-     >                   MPI_MAX, MPI_COMM_WORLD, ierr)
-      tm = x(1)
-
-      if (node.eq.root) then
-         nit=0
-         verified = .true.
-         if (m.eq.24) then
-            sx_verify_value = -3.247834652034740D+3
-            sy_verify_value = -6.958407078382297D+3
-         elseif (m.eq.25) then
-            sx_verify_value = -2.863319731645753D+3
-            sy_verify_value = -6.320053679109499D+3
-         elseif (m.eq.28) then
-            sx_verify_value = -4.295875165629892D+3
-            sy_verify_value = -1.580732573678431D+4
-         elseif (m.eq.30) then
-            sx_verify_value =  4.033815542441498D+4
-            sy_verify_value = -2.660669192809235D+4
-         elseif (m.eq.32) then
-            sx_verify_value =  4.764367927995374D+4
-            sy_verify_value = -8.084072988043731D+4
-         elseif (m.eq.36) then
-            sx_verify_value =  1.982481200946593D+5
-            sy_verify_value = -1.020596636361769D+5
-         elseif (m.eq.40) then
-            sx_verify_value = -5.319717441530D+05
-            sy_verify_value = -3.688834557731D+05
-         else
-            verified = .false.
-         endif
-         if (verified) then
-            sx_err = abs((sx - sx_verify_value)/sx_verify_value)
-            sy_err = abs((sy - sy_verify_value)/sy_verify_value)
-            verified = ((sx_err.le.epsilon) .and. (sy_err.le.epsilon))
-         endif
-         Mops = 2.d0**(m+1)/tm/1000000.d0
-
-         write (6,11) tm, m, gc, sx, sy, (i, q(i), i = 0, nq - 1)
- 11      format ('EP Benchmark Results:'//'CPU Time =',f10.4/'N = 2^',
-     >           i5/'No. Gaussian Pairs =',f15.0/'Sums = ',1p,2d25.15/
-     >           'Counts:'/(i3,0p,f15.0))
-
-         call print_results('EP', class, m+1, 0, 0, nit, npm, 
-     >                      no_nodes, tm, Mops, 
-     >                      'Random numbers generated', 
-     >                      verified, npbversion, compiletime, cs1,
-     >                      cs2, cs3, cs4, cs5, cs6, cs7)
-
-      endif
-
-      if (timers_enabled .and. (node .eq. root)) then
-          print *, 'Total time:     ', timer_read(1)
-          print *, 'Gaussian pairs: ', timer_read(2)
-          print *, 'Random numbers: ', timer_read(3)
-      endif
-
-      call mpi_finalize(ierr)
-
-      end
diff --git a/examples/smpi/NAS/MPI_dummy/Makefile b/examples/smpi/NAS/MPI_dummy/Makefile
deleted file mode 100644
index 86288d7a1d..0000000000
--- a/examples/smpi/NAS/MPI_dummy/Makefile
+++ /dev/null
@@ -1,38 +0,0 @@
-# Makefile for MPI dummy library. 
-# Must be edited for a specific machine. Does NOT read in 
-# the make.def file of NPB 2.3
-F77 = f77
-CC = cc
-AR = ar
-
-# Enable if either Cray or IBM: (no such flag for most machines: see wtime.h)
-# MACHINE	=	-DCRAY
-# MACHINE	=	-DIBM
-
-libmpi.a: mpi_dummy.o mpi_dummy_c.o wtime.o
-	$(AR) r libmpi.a mpi_dummy.o mpi_dummy_c.o wtime.o
-
-mpi_dummy.o: mpi_dummy.f mpif.h
-	$(F77) -c mpi_dummy.f
-# For a Cray C90, try:
-#	cf77 -dp -c mpi_dummy.f
-# For an IBM 590, try:
-#	xlf -c mpi_dummy.f
-
-mpi_dummy_c.o: mpi_dummy.c mpi.h
-	$(CC) -c ${MACHINE} -o mpi_dummy_c.o mpi_dummy.c
-
-wtime.o: wtime.c
-# For most machines or CRAY or IBM
-	$(CC) -c ${MACHINE} wtime.c
-# For a precise timer on an SGI Power Challenge, try:
-#	$(CC) -o wtime.o -c wtime_sgi64.c
-
-test: test.f
-	$(F77) -o test -I. test.f -L. -lmpi
-
-
-
-clean: 
-	- rm -f *~ *.o
-	- rm -f test libmpi.a
diff --git a/examples/smpi/NAS/MPI_dummy/README b/examples/smpi/NAS/MPI_dummy/README
deleted file mode 100644
index 9096a0b350..0000000000
--- a/examples/smpi/NAS/MPI_dummy/README
+++ /dev/null
@@ -1,52 +0,0 @@
-###########################################
-# NAS Parallel Benchmarks 2&3             #
-# MPI/F77/C                               #
-# Revision 3.3                            #
-# NASA Ames Research Center               #
-# npb@nas.nasa.gov                        #
-# http://www.nas.nasa.gov/Software/NPB/   #
-###########################################
-
-MPI Dummy Library
-
-
-The MPI dummy library is supplied as a convenience for people who do
-not have an MPI library but would like to try running on one processor
-anyway. The NPB 2.x/3.x benchmarks are designed so that they do not
-actually try to do any message passing when run on one node. The MPI
-dummy library is just that - a set of dummy MPI routines which don't
-do anything, but allow you to link the benchmarks. Actually they do a
-few things, but nothing important. Note that the dummy library is 
-sufficient only for the NPB 2.x/3.x benchmarks. It probably won't be
-useful for anything else because it implements only a handful of
-functions. 
-
-Because the dummy library is just an extra goody, and since we don't
-have an infinite amount of time, it may be a bit trickier to configure
-than the rest of the benchmarks. You need to:
-
-1. Find out how C and Fortran interact on your machine. On most machines, 
-the fortran functon foo(x) is declared in C as foo_(xp) where xp is 
-a pointer, not a value. On IBMs, it's just foo(xp). On Cray C90s, its
-FOO(xp). You can define CRAY or IBM to get these, or you need to
-edit wtime.c if you've got something else. 
-
-2. Edit the Makefile to compile mpi_dummy.f and wtime.c correctly
-for your machine (including -DCRAY or -DIBM if necessary). 
-
-3. The substitute MPI timer gives wall clock time, not CPU time. 
-If you're running on a timeshared machine, you may want to 
-use a CPU timer. Edit the function mpi_wtime() in mpi_dummy.f
-to change this timer. (NOTE: for official benchmark results, 
-ONLY wall clock times are valid. Using a CPU timer is ok 
-if you want to get things running, but don't report any results
-measured with a CPU timer. )
-
-TROUBLESHOOTING
-
-o Compiling or linking of the benchmark aborts because the dummy MPI
-  header file or the dummy MPI library cannot be found.
-  - the file make.dummy in subdirectory config relies on the use
-    of the -I"path" and -L"path" -l"library" constructs to pass
-    information to the compilers and linkers. Edit this file to conform
-    to your system.
diff --git a/examples/smpi/NAS/MPI_dummy/mpi.h b/examples/smpi/NAS/MPI_dummy/mpi.h
deleted file mode 100644
index 70eb3138b4..0000000000
--- a/examples/smpi/NAS/MPI_dummy/mpi.h
+++ /dev/null
@@ -1,112 +0,0 @@
-#define MPI_DOUBLE          1
-#define MPI_INT             2
-#define MPI_BYTE            3
-#define MPI_FLOAT           4
-#define MPI_LONG            5
-
-#define MPI_COMM_WORLD      0
-
-#define MPI_MAX             1
-#define MPI_SUM             2
-#define MPI_MIN             3
-
-#define MPI_SUCCESS         0
-#define MPI_ANY_SOURCE     -1
-#define MPI_ERR_OTHER      -1
-#define MPI_STATUS_SIZE     3
-
-
-/* 
-   Status object.  It is the only user-visible MPI data-structure 
-   The "count" field is PRIVATE; use MPI_Get_count to access it. 
- */
-typedef struct { 
-    int count;
-    int MPI_SOURCE;
-    int MPI_TAG;
-    int MPI_ERROR;
-} MPI_Status;
-
-
-/* MPI request objects */
-typedef int MPI_Request;
-
-/* MPI datatype */
-typedef int MPI_Datatype;
-
-/* MPI comm */
-typedef int MPI_Comm;
-
-/* MPI operation */
-typedef int MPI_Op;
-
-
-
-/* Prototypes: */
-void  mpi_error( void );
-
-int   MPI_Irecv( void         *buf,
-                 int          count,
-                 MPI_Datatype datatype,
-                 int          source,
-                 int          tag,
-                 MPI_Comm     comm,
-                 MPI_Request  *request );
-
-int   MPI_Send( void         *buf,
-                int          count,
-                MPI_Datatype datatype,
-                int          dest,
-                int          tag,
-                MPI_Comm     comm );
-
-int   MPI_Wait( MPI_Request *request,
-                MPI_Status  *status );
-
-int   MPI_Init( int  *argc,
-                char ***argv );
-
-int   MPI_Comm_rank( MPI_Comm comm, 
-                     int      *rank );
-
-int   MPI_Comm_size( MPI_Comm comm, 
-                     int      *size );
-
-double MPI_Wtime( void );
-
-int  MPI_Barrier( MPI_Comm comm );
-
-int  MPI_Finalize( void );
-
-int  MPI_Allreduce( void         *sendbuf,
-                    void         *recvbuf,
-                    int          nitems,
-                    MPI_Datatype type,
-                    MPI_Op       op,
-                    MPI_Comm     comm );
-
-int  MPI_Reduce( void         *sendbuf,
-                 void         *recvbuf,
-                 int          nitems,
-                 MPI_Datatype type,
-                 MPI_Op       op,
-                 int          root,
-                 MPI_Comm     comm );
-
-int  MPI_Alltoall( void         *sendbuf,
-                   int          sendcount,
-                   MPI_Datatype sendtype,
-                   void         *recvbuf,
-                   int          recvcount,
-                   MPI_Datatype recvtype,
-                   MPI_Comm     comm );
-
-int  MPI_Alltoallv( void         *sendbuf,
-                    int          *sendcounts,
-                    int          *senddispl,
-                    MPI_Datatype sendtype,
-                    void         *recvbuf,
-                    int          *recvcounts,
-                    int          *recvdispl,
-                    MPI_Datatype recvtype,
-                    MPI_Comm     comm );
diff --git a/examples/smpi/NAS/MPI_dummy/mpi_dummy.c b/examples/smpi/NAS/MPI_dummy/mpi_dummy.c
deleted file mode 100644
index 4cb7f06e63..0000000000
--- a/examples/smpi/NAS/MPI_dummy/mpi_dummy.c
+++ /dev/null
@@ -1,265 +0,0 @@
-#include <stdlib.h>
-#include "mpi.h"
-#include "wtime.h"
-
-void  mpi_error( void )
-{
-    printf( "mpi_error called\n" );
-    abort();
-}
-
-
-
-
-int   MPI_Irecv( void         *buf,
-                 int          count,
-                 MPI_Datatype datatype,
-                 int          source,
-                 int          tag,
-                 MPI_Comm     comm,
-                 MPI_Request  *request )
-{
-    mpi_error();
-    return( MPI_ERR_OTHER );
-}
-
-
-
-
-int   MPI_Recv( void         *buf,
-                int          count,
-                MPI_Datatype datatype,
-                int          source,
-                int          tag,
-                MPI_Comm     comm,
-                MPI_Status   *status )
-{
-    mpi_error();
-    return( MPI_ERR_OTHER );
-}
-
-
-
-
-int   MPI_Send( void         *buf,
-                int          count,
-                MPI_Datatype datatype,
-                int          dest,
-                int          tag,
-                MPI_Comm     comm )
-{
-    mpi_error();
-    return( MPI_ERR_OTHER );
-}
-
-
-
-
-int   MPI_Wait( MPI_Request *request,
-                MPI_Status  *status )
-{
-    mpi_error();
-    return( MPI_ERR_OTHER );
-}
-
-
-
-
-int   MPI_Init( int  *argc,
-                char ***argv )
-{
-    return( MPI_SUCCESS );
-}
-
-
-
-
-int   MPI_Comm_rank( MPI_Comm comm, 
-                     int      *rank )
-{
-    *rank = 0;
-    return( MPI_SUCCESS );
-}
-
-
-
-
-int   MPI_Comm_size( MPI_Comm comm, 
-                     int      *size )
-{
-    *size = 1;
-    return( MPI_SUCCESS );
-}
-
-
-
-
-double MPI_Wtime( void )
-{
-    void wtime();
-
-    double t;
-    wtime( &t );
-    return( t );
-}
-
-
-
-
-int  MPI_Barrier( MPI_Comm comm )
-{
-    return( MPI_SUCCESS );
-}
-
-
-
-
-int  MPI_Finalize( void )
-{
-    return( MPI_SUCCESS );
-}
-
-
-
-
-int  MPI_Allreduce( void         *sendbuf,
-                    void         *recvbuf,
-                    int          nitems,
-                    MPI_Datatype type,
-                    MPI_Op       op,
-                    MPI_Comm     comm )
-{
-    int i;
-    if( type == MPI_INT )
-    {
-        int *pd_sendbuf, *pd_recvbuf;
-        pd_sendbuf = (int *) sendbuf;    
-        pd_recvbuf = (int *) recvbuf;    
-        for( i=0; i<nitems; i++ )
-            *(pd_recvbuf+i) = *(pd_sendbuf+i);
-    }
-    if( type == MPI_LONG )
-    {
-        long *pd_sendbuf, *pd_recvbuf;
-        pd_sendbuf = (long *) sendbuf;    
-        pd_recvbuf = (long *) recvbuf;    
-        for( i=0; i<nitems; i++ )
-            *(pd_recvbuf+i) = *(pd_sendbuf+i);
-    }
-    if( type == MPI_DOUBLE )
-    {
-        double *pd_sendbuf, *pd_recvbuf;
-        pd_sendbuf = (double *) sendbuf;    
-        pd_recvbuf = (double *) recvbuf;    
-        for( i=0; i<nitems; i++ )
-            *(pd_recvbuf+i) = *(pd_sendbuf+i);
-    }
-    return( MPI_SUCCESS );
-}
-  
-
-
-
-int  MPI_Reduce( void         *sendbuf,
-                 void         *recvbuf,
-                 int          nitems,
-                 MPI_Datatype type,
-                 MPI_Op       op,
-                 int          root,
-                 MPI_Comm     comm )
-{
-    int i;
-    if( type == MPI_INT )
-    {
-        int *pi_sendbuf, *pi_recvbuf;
-        pi_sendbuf = (int *) sendbuf;    
-        pi_recvbuf = (int *) recvbuf;    
-        for( i=0; i<nitems; i++ )
-            *(pi_recvbuf+i) = *(pi_sendbuf+i);
-    }
-    if( type == MPI_LONG )
-    {
-        long *pi_sendbuf, *pi_recvbuf;
-        pi_sendbuf = (long *) sendbuf;    
-        pi_recvbuf = (long *) recvbuf;    
-        for( i=0; i<nitems; i++ )
-            *(pi_recvbuf+i) = *(pi_sendbuf+i);
-    }
-    if( type == MPI_DOUBLE )
-    {
-        double *pd_sendbuf, *pd_recvbuf;
-        pd_sendbuf = (double *) sendbuf;    
-        pd_recvbuf = (double *) recvbuf;    
-        for( i=0; i<nitems; i++ )
-            *(pd_recvbuf+i) = *(pd_sendbuf+i);
-    }
-    return( MPI_SUCCESS );
-}
-  
-
-
-
-int  MPI_Alltoall( void         *sendbuf,
-                   int          sendcount,
-                   MPI_Datatype sendtype,
-                   void         *recvbuf,
-                   int          recvcount,
-                   MPI_Datatype recvtype,
-                   MPI_Comm     comm )
-{
-    int i;
-    if( recvtype == MPI_INT )
-    {
-        int *pd_sendbuf, *pd_recvbuf;
-        pd_sendbuf = (int *) sendbuf;    
-        pd_recvbuf = (int *) recvbuf;    
-        for( i=0; i<sendcount; i++ )
-            *(pd_recvbuf+i) = *(pd_sendbuf+i);
-    }
-    if( recvtype == MPI_LONG )
-    {
-        long *pd_sendbuf, *pd_recvbuf;
-        pd_sendbuf = (long *) sendbuf;    
-        pd_recvbuf = (long *) recvbuf;    
-        for( i=0; i<sendcount; i++ )
-            *(pd_recvbuf+i) = *(pd_sendbuf+i);
-    }
-    return( MPI_SUCCESS );
-}
-  
-
-
-
-int  MPI_Alltoallv( void         *sendbuf,
-                    int          *sendcounts,
-                    int          *senddispl,
-                    MPI_Datatype sendtype,
-                    void         *recvbuf,
-                    int          *recvcounts,
-                    int          *recvdispl,
-                    MPI_Datatype recvtype,
-                    MPI_Comm     comm )
-{
-    int i;
-    if( recvtype == MPI_INT )
-    {
-        int *pd_sendbuf, *pd_recvbuf;
-        pd_sendbuf = (int *) sendbuf;    
-        pd_recvbuf = (int *) recvbuf;    
-        for( i=0; i<sendcounts[0]; i++ )
-            *(pd_recvbuf+i+recvdispl[0]) = *(pd_sendbuf+i+senddispl[0]);
-    }
-    if( recvtype == MPI_LONG )
-    {
-        long *pd_sendbuf, *pd_recvbuf;
-        pd_sendbuf = (long *) sendbuf;    
-        pd_recvbuf = (long *) recvbuf;    
-        for( i=0; i<sendcounts[0]; i++ )
-            *(pd_recvbuf+i+recvdispl[0]) = *(pd_sendbuf+i+senddispl[0]);
-    }
-    return( MPI_SUCCESS );
-}
-  
-
-
-
diff --git a/examples/smpi/NAS/MPI_dummy/mpi_dummy.f b/examples/smpi/NAS/MPI_dummy/mpi_dummy.f
deleted file mode 100644
index 2550aa3452..0000000000
--- a/examples/smpi/NAS/MPI_dummy/mpi_dummy.f
+++ /dev/null
@@ -1,309 +0,0 @@
-      subroutine mpi_isend(buf,count,datatype,source,
-     & tag,comm,request,ierror)
-      integer buf(*), count,datatype,source,tag,comm,
-     & request,ierror
-      call mpi_error()
-      return
-      end  
-
-      subroutine mpi_irecv(buf,count,datatype,source,
-     & tag,comm,request,ierror)
-      integer buf(*), count,datatype,source,tag,comm,
-     & request,ierror
-      call mpi_error()
-      return
-      end
-
-      subroutine mpi_send(buf,count,datatype,dest,tag,comm,ierror)
-      integer buf(*), count,datatype,dest,tag,comm,ierror
-      call mpi_error()
-      return
-      end
-      
-      subroutine mpi_recv(buf,count,datatype,source,
-     & tag,comm,status,ierror)
-      integer buf(*), count,datatype,source,tag,comm,
-     & status(*),ierror
-      call mpi_error()
-      return
-      end
-
-      subroutine mpi_comm_split(comm,color,key,newcomm,ierror)
-      integer comm,color,key,newcomm,ierror
-      return
-      end
-
-      subroutine mpi_comm_rank(comm, rank,ierr)
-      implicit none
-      integer comm, rank,ierr
-      rank = 0
-      return
-      end
-
-      subroutine mpi_comm_size(comm, size, ierr)
-      implicit none
-      integer comm, size, ierr
-      size = 1
-      return
-      end
-
-      double precision function mpi_wtime()
-      implicit none
-      double precision t
-c This function must measure wall clock time, not CPU time. 
-c Since there is no portable timer in Fortran (77)
-c we call a routine compiled in C (though the C source may have
-c to be tweaked). 
-      call wtime(t)
-c The following is not ok for "official" results because it reports
-c CPU time not wall clock time. It may be useful for developing/testing
-c on timeshared Crays, though. 
-c     call second(t)
-
-      mpi_wtime = t
-
-      return
-      end
-
-
-c may be valid to call this in single processor case
-      subroutine mpi_barrier(comm,ierror)
-      return
-      end
-
-c may be valid to call this in single processor case
-      subroutine mpi_bcast(buf, nitems, type, root, comm, ierr)
-      implicit none
-      integer buf(*), nitems, type, root, comm, ierr
-      return
-      end
-
-      subroutine mpi_comm_dup(oldcomm, newcomm,ierror)
-      integer oldcomm, newcomm,ierror
-      newcomm= oldcomm
-      return
-      end
-
-      subroutine mpi_error()
-      print *, 'mpi_error called'
-      stop
-      end 
-
-      subroutine mpi_abort(comm, errcode, ierr)
-      implicit none
-      integer comm, errcode, ierr
-      print *, 'mpi_abort called'
-      stop
-      end
-
-      subroutine mpi_finalize(ierr)
-      return
-      end
-
-      subroutine mpi_init(ierr)
-      return
-      end
-
-
-c assume double precision, which is all SP uses 
-      subroutine mpi_reduce(inbuf, outbuf, nitems, 
-     $                      type, op, root, comm, ierr)
-      implicit none
-      include 'mpif.h'
-      integer nitems, type, op, root, comm, ierr
-      double precision inbuf(*), outbuf(*)
-
-      if (type .eq. mpi_double_precision) then
-         call mpi_reduce_dp(inbuf, outbuf, nitems, 
-     $                      type, op, root, comm, ierr)
-      else if (type .eq.  mpi_double_complex) then
-         call mpi_reduce_dc(inbuf, outbuf, nitems, 
-     $                      type, op, root, comm, ierr)
-      else if (type .eq.  mpi_complex) then
-         call mpi_reduce_complex(inbuf, outbuf, nitems, 
-     $                      type, op, root, comm, ierr)
-      else if (type .eq.  mpi_real) then
-         call mpi_reduce_real(inbuf, outbuf, nitems, 
-     $                      type, op, root, comm, ierr)
-      else if (type .eq.  mpi_integer) then
-         call mpi_reduce_int(inbuf, outbuf, nitems, 
-     $                      type, op, root, comm, ierr)
-      else 
-         print *, 'mpi_reduce: unknown type ', type
-      end if
-      return
-      end
-
-
-      subroutine mpi_reduce_real(inbuf, outbuf, nitems, 
-     $                      type, op, root, comm, ierr)
-      implicit none
-      integer nitems, type, op, root, comm, ierr, i
-      real inbuf(*), outbuf(*)
-      do i = 1, nitems
-         outbuf(i) = inbuf(i)
-      end do
-      
-      return
-      end
-
-      subroutine mpi_reduce_dp(inbuf, outbuf, nitems, 
-     $                      type, op, root, comm, ierr)
-      implicit none
-      integer nitems, type, op, root, comm, ierr, i
-      double precision inbuf(*), outbuf(*)
-      do i = 1, nitems
-         outbuf(i) = inbuf(i)
-      end do
-      
-      return
-      end
-
-      subroutine mpi_reduce_dc(inbuf, outbuf, nitems, 
-     $                      type, op, root, comm, ierr)
-      implicit none
-      integer nitems, type, op, root, comm, ierr, i
-      double complex inbuf(*), outbuf(*)
-      do i = 1, nitems
-         outbuf(i) = inbuf(i)
-      end do
-      
-      return
-      end
-
-
-      subroutine mpi_reduce_complex(inbuf, outbuf, nitems, 
-     $                      type, op, root, comm, ierr)
-      implicit none
-      integer nitems, type, op, root, comm, ierr, i
-      complex inbuf(*), outbuf(*)
-      do i = 1, nitems
-         outbuf(i) = inbuf(i)
-      end do
-      
-      return
-      end
-
-      subroutine mpi_reduce_int(inbuf, outbuf, nitems, 
-     $                      type, op, root, comm, ierr)
-      implicit none
-      integer nitems, type, op, root, comm, ierr, i
-      integer inbuf(*), outbuf(*)
-      do i = 1, nitems
-         outbuf(i) = inbuf(i)
-      end do
-      
-      return
-      end
-
-      subroutine mpi_allreduce(inbuf, outbuf, nitems, 
-     $                      type, op, comm, ierr)
-      implicit none
-      integer nitems, type, op, comm, ierr
-      double precision inbuf(*), outbuf(*)
-
-      call mpi_reduce(inbuf, outbuf, nitems, 
-     $                      type, op, 0, comm, ierr)
-      return
-      end
-
-      subroutine mpi_alltoall(inbuf, nitems, type, outbuf, nitems_dum, 
-     $                        type_dum, comm, ierr)
-      implicit none
-      include 'mpif.h'
-      integer nitems, type, comm, ierr, nitems_dum, type_dum
-      double precision inbuf(*), outbuf(*)
-      if (type .eq. mpi_double_precision) then
-         call mpi_alltoall_dp(inbuf, outbuf, nitems, 
-     $                      type, comm, ierr)
-      else if (type .eq.  mpi_double_complex) then
-         call mpi_alltoall_dc(inbuf, outbuf, nitems, 
-     $                      type, comm, ierr)
-      else if (type .eq.  mpi_complex) then
-         call mpi_alltoall_complex(inbuf, outbuf, nitems, 
-     $                      type, comm, ierr)
-      else if (type .eq.  mpi_real) then
-         call mpi_alltoall_real(inbuf, outbuf, nitems, 
-     $                      type, comm, ierr)
-      else if (type .eq.  mpi_integer) then
-         call mpi_alltoall_int(inbuf, outbuf, nitems, 
-     $                      type, comm, ierr)
-      else 
-         print *, 'mpi_alltoall: unknown type ', type
-      end if
-      return
-      end
-
-      subroutine mpi_alltoall_dc(inbuf, outbuf, nitems, 
-     $                           type, comm, ierr)
-      implicit none
-      integer nitems, type, comm, ierr, i
-      double complex inbuf(*), outbuf(*)
-      do i = 1, nitems
-         outbuf(i) = inbuf(i)
-      end do
-      
-      return
-      end
-
-
-      subroutine mpi_alltoall_complex(inbuf, outbuf, nitems, 
-     $                           type, comm, ierr)
-      implicit none
-      integer nitems, type, comm, ierr, i
-      double complex inbuf(*), outbuf(*)
-      do i = 1, nitems
-         outbuf(i) = inbuf(i)
-      end do
-      
-      return
-      end
-
-      subroutine mpi_alltoall_dp(inbuf, outbuf, nitems, 
-     $                           type, comm, ierr)
-      implicit none
-      integer nitems, type, comm, ierr, i
-      double precision inbuf(*), outbuf(*)
-      do i = 1, nitems
-         outbuf(i) = inbuf(i)
-      end do
-      
-      return
-      end
-
-      subroutine mpi_alltoall_real(inbuf, outbuf, nitems, 
-     $                             type, comm, ierr)
-      implicit none
-      integer nitems, type, comm, ierr, i
-      real inbuf(*), outbuf(*)
-      do i = 1, nitems
-         outbuf(i) = inbuf(i)
-      end do
-      
-      return
-      end
-
-      subroutine mpi_alltoall_int(inbuf, outbuf, nitems, 
-     $                            type, comm, ierr)
-      implicit none
-      integer nitems, type, comm, ierr, i
-      integer inbuf(*), outbuf(*)
-      do i = 1, nitems
-         outbuf(i) = inbuf(i)
-      end do
-      
-      return
-      end
-
-      subroutine mpi_wait(request,status,ierror)
-      integer request,status,ierror
-      call mpi_error()
-      return
-      end
-
-      subroutine mpi_waitall(count,requests,status,ierror)
-      integer count,requests(*),status(*),ierror
-      call mpi_error()
-      return
-      end
-
diff --git a/examples/smpi/NAS/MPI_dummy/mpif.h b/examples/smpi/NAS/MPI_dummy/mpif.h
deleted file mode 100644
index 92686aa361..0000000000
--- a/examples/smpi/NAS/MPI_dummy/mpif.h
+++ /dev/null
@@ -1,27 +0,0 @@
-      integer mpi_comm_world
-      parameter (mpi_comm_world = 0)
-
-      integer mpi_max, mpi_min, mpi_sum
-      parameter (mpi_max = 1, mpi_sum = 2, mpi_min = 3)
-
-      integer mpi_byte, mpi_integer, mpi_real,
-     >                  mpi_double_precision,  mpi_complex,
-     >                  mpi_double_complex
-      parameter (mpi_double_precision = 1,
-     $           mpi_integer = 2, 
-     $           mpi_byte = 3, 
-     $           mpi_real= 4, 
-     $           mpi_complex = 5,
-     $           mpi_double_complex = 6)
-
-      integer mpi_any_source
-      parameter (mpi_any_source = -1)
-
-      integer mpi_err_other
-      parameter (mpi_err_other = -1)
-
-      double precision mpi_wtime
-      external mpi_wtime
-
-      integer mpi_status_size
-      parameter (mpi_status_size=3)
diff --git a/examples/smpi/NAS/MPI_dummy/test.f b/examples/smpi/NAS/MPI_dummy/test.f
deleted file mode 100644
index 081c73c72f..0000000000
--- a/examples/smpi/NAS/MPI_dummy/test.f
+++ /dev/null
@@ -1,10 +0,0 @@
-      program
-      implicit none
-      double precision t, mpi_wtime
-      external mpi_wtime
-      t = 0.0
-      t = mpi_wtime()
-      print *, t
-      t = mpi_wtime()
-      print *, t
-      end
diff --git a/examples/smpi/NAS/MPI_dummy/wtime.c b/examples/smpi/NAS/MPI_dummy/wtime.c
deleted file mode 100644
index 221d2225ae..0000000000
--- a/examples/smpi/NAS/MPI_dummy/wtime.c
+++ /dev/null
@@ -1,13 +0,0 @@
-#include "wtime.h"
-#include <sys/time.h>
-
-void wtime(double *t)
-{
-  static int sec = -1;
-  struct timeval tv;
-  gettimeofday(&tv, (void *)0);
-  if (sec < 0) sec = tv.tv_sec;
-  *t = (tv.tv_sec - sec) + 1.0e-6*tv.tv_usec;
-}
-
-    
diff --git a/examples/smpi/NAS/MPI_dummy/wtime.f b/examples/smpi/NAS/MPI_dummy/wtime.f
deleted file mode 100644
index a1cfde9aa3..0000000000
--- a/examples/smpi/NAS/MPI_dummy/wtime.f
+++ /dev/null
@@ -1,12 +0,0 @@
-      subroutine wtime(tim)
-      real*8 tim
-      dimension tarray(2)
-      call etime(tarray)
-      tim = tarray(1)
-      return
-      end
-
-
-
-
-
diff --git a/examples/smpi/NAS/MPI_dummy/wtime.h b/examples/smpi/NAS/MPI_dummy/wtime.h
deleted file mode 100644
index 12eb0cb0ee..0000000000
--- a/examples/smpi/NAS/MPI_dummy/wtime.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* C/Fortran interface is different on different machines. 
- * You may need to tweak this.
- */
-
-
-#if defined(IBM)
-#define wtime wtime
-#elif defined(CRAY)
-#define wtime WTIME
-#else
-#define wtime wtime_
-#endif
diff --git a/examples/smpi/NAS/MPI_dummy/wtime_sgi64.c b/examples/smpi/NAS/MPI_dummy/wtime_sgi64.c
deleted file mode 100644
index d08d50cd34..0000000000
--- a/examples/smpi/NAS/MPI_dummy/wtime_sgi64.c
+++ /dev/null
@@ -1,74 +0,0 @@
-#include <sys/types.h>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/syssgi.h>
-#include <sys/immu.h>
-#include <errno.h>
-#include <stdio.h>
-
-/* The following works on SGI Power Challenge systems */
-
-typedef unsigned long iotimer_t;
-
-unsigned int cycleval;
-volatile iotimer_t *iotimer_addr, base_counter;
-double resolution;
-
-/* address_t is an integer type big enough to hold an address */
-typedef unsigned long address_t;
-
-
-
-void timer_init() 
-{
-  
-  int fd;
-  char *virt_addr;
-  address_t phys_addr, page_offset, pagemask, pagebase_addr;
-  
-  pagemask = getpagesize() - 1;
-  errno = 0;
-  phys_addr = syssgi(SGI_QUERY_CYCLECNTR, &cycleval);
-  if (errno != 0) {
-    perror("SGI_QUERY_CYCLECNTR");
-    exit(1);
-  }
-  /* rel_addr = page offset of physical address */
-  page_offset = phys_addr & pagemask;
-  pagebase_addr = phys_addr - page_offset;
-  fd = open("/dev/mmem", O_RDONLY);
-
-  virt_addr = mmap(0, pagemask, PROT_READ, MAP_PRIVATE, fd, pagebase_addr);
-  virt_addr = virt_addr + page_offset;
-  iotimer_addr = (iotimer_t *)virt_addr;
-  /* cycleval in picoseconds to this gives resolution in seconds */
-  resolution = 1.0e-12*cycleval; 
-  base_counter = *iotimer_addr;
-}
-
-void wtime_(double *time) 
-{
-  static int initialized = 0;
-  volatile iotimer_t counter_value;
-  if (!initialized) { 
-    timer_init();
-    initialized = 1;
-  }
-  counter_value = *iotimer_addr - base_counter;
-  *time = (double)counter_value * resolution;
-}
-
-
-void wtime(double *time) 
-{
-  static int initialized = 0;
-  volatile iotimer_t counter_value;
-  if (!initialized) { 
-    timer_init();
-    initialized = 1;
-  }
-  counter_value = *iotimer_addr - base_counter;
-  *time = (double)counter_value * resolution;
-}
-
-
diff --git a/examples/smpi/NAS/Makefile b/examples/smpi/NAS/Makefile
index 7f1bee88a4..04524009d1 100644
--- a/examples/smpi/NAS/Makefile
+++ b/examples/smpi/NAS/Makefile
@@ -1,5 +1,5 @@
 SHELL=/bin/sh
-CLASS=U
+CLASS=S
 NPROCS=1
 SUBTYPE=
 VERSION=
@@ -12,18 +12,11 @@ IS: is
 is: header
 	cd IS; $(MAKE) NPROCS=$(NPROCS) CLASS=$(CLASS)
 
-IS-trace: is-trace
-is-trace: header
-	cd IS-trace; $(MAKE) NPROCS=$(NPROCS) CLASS=$(CLASS)
 
 EP: ep
 ep: header
 	cd EP; $(MAKE) NPROCS=$(NPROCS) CLASS=$(CLASS)
 
-EP-trace: ep-trace
-ep-trace: header
-	cd EP-trace; $(MAKE) NPROCS=$(NPROCS) CLASS=$(CLASS)
-
 EP-sampling: ep-sampling
 ep-sampling: header
 	cd EP-sampling; $(MAKE) NPROCS=$(NPROCS) CLASS=$(CLASS)
@@ -32,14 +25,6 @@ DT: dt
 dt: header
 	cd DT; $(MAKE) CLASS=$(CLASS)
 
-DT-trace: dt-trace
-dt-trace: header
-	cd DT-trace; $(MAKE) CLASS=$(CLASS)
-
-DT-folding: dt-folding
-dt-folding: header
-	cd DT-folding; $(MAKE) CLASS=$(CLASS)
-
 # Awk script courtesy cmg@cray.com, modified by Haoqiang Jin
 suite:
 	@ awk -f sys/suite.awk SMAKE=$(MAKE) $(SFILE) | $(SHELL)
@@ -49,16 +34,12 @@ suite:
 # are defined) but on a really clean system this will won't work
 # because those makefiles need config/make.def
 clean:
-	- rm -f core 
-	- rm -f *~ */core */*~ */*.o */npbparams.h */*.obj */*.exe
-	- rm -f MPI_dummy/test MPI_dummy/libmpi.a
+	- rm -f *~  */*~ */*.o */npbparams.h 
 	- rm -f sys/setparams sys/makesuite sys/setparams.h
-	- rm -f btio.*.out*
 
 veryclean: clean
 	- rm -f config/make.def config/suite.def 
-	- rm -f bin/sp.* bin/lu.* bin/mg.* bin/ft.* bin/bt.* bin/is.* 
-	- rm -f bin/ep.* bin/cg.* bin/dt.*
+	- rm -f bin/is.* bin/ep.* bin/dt.*
 
 header:
 	@ sys/print_header
diff --git a/examples/smpi/NAS/common/print_results.f b/examples/smpi/NAS/common/print_results.f
deleted file mode 100644
index 9feddac482..0000000000
--- a/examples/smpi/NAS/common/print_results.f
+++ /dev/null
@@ -1,115 +0,0 @@
-
-      subroutine print_results(name, class, n1, n2, n3, niter, 
-     >               nprocs_compiled, nprocs_total,
-     >               t, mops, optype, verified, npbversion, 
-     >               compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7)
-      
-      implicit none
-      character*2 name
-      character*1 class
-      integer n1, n2, n3, niter, nprocs_compiled, nprocs_total, j
-      double precision t, mops
-      character optype*24, size*15
-      logical verified
-      character*(*) npbversion, compiletime, 
-     >              cs1, cs2, cs3, cs4, cs5, cs6, cs7
-
-         write (*, 2) name 
- 2       format(//, ' ', A2, ' Benchmark Completed.')
-
-         write (*, 3) Class
- 3       format(' Class           = ', 12x, a12)
-
-c   If this is not a grid-based problem (EP, FT, CG), then
-c   we only print n1, which contains some measure of the
-c   problem size. In that case, n2 and n3 are both zero.
-c   Otherwise, we print the grid size n1xn2xn3
-
-         if ((n2 .eq. 0) .and. (n3 .eq. 0)) then
-            if (name(1:2) .eq. 'EP') then
-               write(size, '(f15.0)' ) 2.d0**n1
-               j = 15
-               if (size(j:j) .eq. '.') j = j - 1
-               write (*,42) size(1:j)
- 42            format(' Size            = ',9x, a15)
-            else
-               write (*,44) n1
- 44            format(' Size            = ',12x, i12)
-            endif
-         else
-            write (*, 4) n1,n2,n3
- 4          format(' Size            =  ',9x, i4,'x',i4,'x',i4)
-         endif
-
-         write (*, 5) niter
- 5       format(' Iterations      = ', 12x, i12)
-         
-         write (*, 6) t
- 6       format(' Time in seconds = ',12x, f12.2)
-         
-         write (*,7) nprocs_total
- 7       format(' Total processes = ', 12x, i12)
-         
-         write (*,8) nprocs_compiled
- 8       format(' Compiled procs  = ', 12x, i12)
-
-         write (*,9) mops
- 9       format(' Mop/s total     = ',12x, f12.2)
-
-         write (*,10) mops/float( nprocs_total )
- 10      format(' Mop/s/process   = ', 12x, f12.2)        
-         
-         write(*, 11) optype
- 11      format(' Operation type  = ', a24)
-
-         if (verified) then 
-            write(*,12) '  SUCCESSFUL'
-         else
-            write(*,12) 'UNSUCCESSFUL'
-         endif
- 12      format(' Verification    = ', 12x, a)
-
-         write(*,13) npbversion
- 13      format(' Version         = ', 12x, a12)
-
-         write(*,14) compiletime
- 14      format(' Compile date    = ', 12x, a12)
-
-
-         write (*,121) cs1
- 121     format(/, ' Compile options:', /, 
-     >          '    MPIF77       = ', A)
-
-         write (*,122) cs2
- 122     format('    FLINK        = ', A)
-
-         write (*,123) cs3
- 123     format('    FMPI_LIB     = ', A)
-
-         write (*,124) cs4
- 124     format('    FMPI_INC     = ', A)
-
-         write (*,125) cs5
- 125     format('    FFLAGS       = ', A)
-
-         write (*,126) cs6
- 126     format('    FLINKFLAGS   = ', A)
-
-         write(*, 127) cs7
- 127     format('    RAND         = ', A)
-        
-         write (*,130)
- 130     format(//' Please send the results of this run to:'//
-     >            ' NPB Development Team '/
-     >            ' Internet: npb@nas.nasa.gov'/
-     >            ' '/
-     >            ' If email is not available, send this to:'//
-     >            ' MS T27A-1'/
-     >            ' NASA Ames Research Center'/
-     >            ' Moffett Field, CA  94035-1000'//
-     >            ' Fax: 650-604-3957'//)
-
-
-         return
-         end
-
diff --git a/examples/smpi/NAS/common/randdp.f b/examples/smpi/NAS/common/randdp.f
deleted file mode 100644
index 64860d96d0..0000000000
--- a/examples/smpi/NAS/common/randdp.f
+++ /dev/null
@@ -1,137 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      double precision function randlc (x, a)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c
-c   This routine returns a uniform pseudorandom double precision number in the
-c   range (0, 1) by using the linear congruential generator
-c
-c   x_{k+1} = a x_k  (mod 2^46)
-c
-c   where 0 < x_k < 2^46 and 0 < a < 2^46.  This scheme generates 2^44 numbers
-c   before repeating.  The argument A is the same as 'a' in the above formula,
-c   and X is the same as x_0.  A and X must be odd double precision integers
-c   in the range (1, 2^46).  The returned value RANDLC is normalized to be
-c   between 0 and 1, i.e. RANDLC = 2^(-46) * x_1.  X is updated to contain
-c   the new seed x_1, so that subsequent calls to RANDLC using the same
-c   arguments will generate a continuous sequence.
-c
-c   This routine should produce the same results on any computer with at least
-c   48 mantissa bits in double precision floating point data.  On 64 bit
-c   systems, double precision should be disabled.
-c
-c   David H. Bailey     October 26, 1990
-c
-c---------------------------------------------------------------------
-
-      implicit none
-
-      double precision r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z
-      parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
-     >  t46 = t23 ** 2)
-
-c---------------------------------------------------------------------
-c   Break A into two parts such that A = 2^23 * A1 + A2.
-c---------------------------------------------------------------------
-      t1 = r23 * a
-      a1 = int (t1)
-      a2 = a - t23 * a1
-
-c---------------------------------------------------------------------
-c   Break X into two parts such that X = 2^23 * X1 + X2, compute
-c   Z = A1 * X2 + A2 * X1  (mod 2^23), and then
-c   X = 2^23 * Z + A2 * X2  (mod 2^46).
-c---------------------------------------------------------------------
-      t1 = r23 * x
-      x1 = int (t1)
-      x2 = x - t23 * x1
-      t1 = a1 * x2 + a2 * x1
-      t2 = int (r23 * t1)
-      z = t1 - t23 * t2
-      t3 = t23 * z + a2 * x2
-      t4 = int (r46 * t3)
-      x = t3 - t46 * t4
-      randlc = r46 * x
-
-      return
-      end
-
-
-
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine vranlc (n, x, a, y)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c
-c   This routine generates N uniform pseudorandom double precision numbers in
-c   the range (0, 1) by using the linear congruential generator
-c
-c   x_{k+1} = a x_k  (mod 2^46)
-c
-c   where 0 < x_k < 2^46 and 0 < a < 2^46.  This scheme generates 2^44 numbers
-c   before repeating.  The argument A is the same as 'a' in the above formula,
-c   and X is the same as x_0.  A and X must be odd double precision integers
-c   in the range (1, 2^46).  The N results are placed in Y and are normalized
-c   to be between 0 and 1.  X is updated to contain the new seed, so that
-c   subsequent calls to VRANLC using the same arguments will generate a
-c   continuous sequence.  If N is zero, only initialization is performed, and
-c   the variables X, A and Y are ignored.
-c
-c   This routine is the standard version designed for scalar or RISC systems.
-c   However, it should produce the same results on any single processor
-c   computer with at least 48 mantissa bits in double precision floating point
-c   data.  On 64 bit systems, double precision should be disabled.
-c
-c---------------------------------------------------------------------
-
-      implicit none
-
-      integer i,n
-      double precision y,r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z
-      dimension y(*)
-      parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
-     >  t46 = t23 ** 2)
-
-
-c---------------------------------------------------------------------
-c   Break A into two parts such that A = 2^23 * A1 + A2.
-c---------------------------------------------------------------------
-      t1 = r23 * a
-      a1 = int (t1)
-      a2 = a - t23 * a1
-
-c---------------------------------------------------------------------
-c   Generate N results.   This loop is not vectorizable.
-c---------------------------------------------------------------------
-      do i = 1, n
-
-c---------------------------------------------------------------------
-c   Break X into two parts such that X = 2^23 * X1 + X2, compute
-c   Z = A1 * X2 + A2 * X1  (mod 2^23), and then
-c   X = 2^23 * Z + A2 * X2  (mod 2^46).
-c---------------------------------------------------------------------
-        t1 = r23 * x
-        x1 = int (t1)
-        x2 = x - t23 * x1
-        t1 = a1 * x2 + a2 * x1
-        t2 = int (r23 * t1)
-        z = t1 - t23 * t2
-        t3 = t23 * z + a2 * x2
-        t4 = int (r46 * t3)
-        x = t3 - t46 * t4
-        y(i) = r46 * x
-      enddo
-
-      return
-      end
diff --git a/examples/smpi/NAS/common/randdpvec.f b/examples/smpi/NAS/common/randdpvec.f
deleted file mode 100644
index c7080717ce..0000000000
--- a/examples/smpi/NAS/common/randdpvec.f
+++ /dev/null
@@ -1,186 +0,0 @@
-c---------------------------------------------------------------------
-      double precision function randlc (x, a)
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c
-c   This routine returns a uniform pseudorandom double precision number in the
-c   range (0, 1) by using the linear congruential generator
-c
-c   x_{k+1} = a x_k  (mod 2^46)
-c
-c   where 0 < x_k < 2^46 and 0 < a < 2^46.  This scheme generates 2^44 numbers
-c   before repeating.  The argument A is the same as 'a' in the above formula,
-c   and X is the same as x_0.  A and X must be odd double precision integers
-c   in the range (1, 2^46).  The returned value RANDLC is normalized to be
-c   between 0 and 1, i.e. RANDLC = 2^(-46) * x_1.  X is updated to contain
-c   the new seed x_1, so that subsequent calls to RANDLC using the same
-c   arguments will generate a continuous sequence.
-c
-c   This routine should produce the same results on any computer with at least
-c   48 mantissa bits in double precision floating point data.  On 64 bit
-c   systems, double precision should be disabled.
-c
-c   David H. Bailey     October 26, 1990
-c
-c---------------------------------------------------------------------
-
-      implicit none
-
-      double precision r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z
-      parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
-     >  t46 = t23 ** 2)
-
-c---------------------------------------------------------------------
-c   Break A into two parts such that A = 2^23 * A1 + A2.
-c---------------------------------------------------------------------
-      t1 = r23 * a
-      a1 = int (t1)
-      a2 = a - t23 * a1
-
-c---------------------------------------------------------------------
-c   Break X into two parts such that X = 2^23 * X1 + X2, compute
-c   Z = A1 * X2 + A2 * X1  (mod 2^23), and then
-c   X = 2^23 * Z + A2 * X2  (mod 2^46).
-c---------------------------------------------------------------------
-      t1 = r23 * x
-      x1 = int (t1)
-      x2 = x - t23 * x1
-
-
-      t1 = a1 * x2 + a2 * x1
-      t2 = int (r23 * t1)
-      z = t1 - t23 * t2
-      t3 = t23 * z + a2 * x2
-      t4 = int (r46 * t3)
-      x = t3 - t46 * t4
-      randlc = r46 * x
-      return
-      end
-
-
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine vranlc (n, x, a, y)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-c---------------------------------------------------------------------
-c   This routine generates N uniform pseudorandom double precision numbers in
-c   the range (0, 1) by using the linear congruential generator
-c   
-c   x_{k+1} = a x_k  (mod 2^46)
-c   
-c   where 0 < x_k < 2^46 and 0 < a < 2^46.  This scheme generates 2^44 numbers
-c   before repeating.  The argument A is the same as 'a' in the above formula,
-c   and X is the same as x_0.  A and X must be odd double precision integers
-c   in the range (1, 2^46).  The N results are placed in Y and are normalized
-c   to be between 0 and 1.  X is updated to contain the new seed, so that
-c   subsequent calls to RANDLC using the same arguments will generate a
-c   continuous sequence.
-c   
-c   This routine generates the output sequence in batches of length NV, for
-c   convenience on vector computers.  This routine should produce the same
-c   results on any computer with at least 48 mantissa bits in double precision
-c   floating point data.  On Cray systems, double precision should be disabled.
-c   
-c   David H. Bailey    August 30, 1990
-c---------------------------------------------------------------------
-
-      integer n
-      double precision x, a, y(*)
-      
-      double precision r23, r46, t23, t46
-      integer nv
-      parameter (r23 = 2.d0 ** (-23), r46 = r23 * r23, t23 = 2.d0 ** 23,
-     >     t46 = t23 * t23, nv = 64)
-      double precision  xv(nv), t1, t2, t3, t4, an, a1, a2, x1, x2, yy
-      integer n1, i, j
-      external randlc
-      double precision randlc
-
-c---------------------------------------------------------------------
-c     Compute the first NV elements of the sequence using RANDLC.
-c---------------------------------------------------------------------
-      t1 = x
-      n1 = min (n, nv)
-
-      do  i = 1, n1
-         xv(i) = t46 * randlc (t1, a)
-      enddo
-
-c---------------------------------------------------------------------
-c     It is not necessary to compute AN, A1 or A2 unless N is greater than NV.
-c---------------------------------------------------------------------
-      if (n .gt. nv) then
-
-c---------------------------------------------------------------------
-c     Compute AN = AA ^ NV (mod 2^46) using successive calls to RANDLC.
-c---------------------------------------------------------------------
-         t1 = a
-         t2 = r46 * a
-
-         do  i = 1, nv - 1
-            t2 = randlc (t1, a)
-         enddo
-
-         an = t46 * t2
-
-c---------------------------------------------------------------------
-c     Break AN into two parts such that AN = 2^23 * A1 + A2.
-c---------------------------------------------------------------------
-         t1 = r23 * an
-         a1 = aint (t1)
-         a2 = an - t23 * a1
-      endif
-
-c---------------------------------------------------------------------
-c     Compute N pseudorandom results in batches of size NV.
-c---------------------------------------------------------------------
-      do  j = 0, n - 1, nv
-         n1 = min (nv, n - j)
-
-c---------------------------------------------------------------------
-c     Compute up to NV results based on the current seed vector XV.
-c---------------------------------------------------------------------
-         do  i = 1, n1
-            y(i+j) = r46 * xv(i)
-         enddo
-
-c---------------------------------------------------------------------
-c     If this is the last pass through the 140 loop, it is not necessary to
-c     update the XV vector.
-c---------------------------------------------------------------------
-         if (j + n1 .eq. n) goto 150
-
-c---------------------------------------------------------------------
-c     Update the XV vector by multiplying each element by AN (mod 2^46).
-c---------------------------------------------------------------------
-         do  i = 1, nv
-            t1 = r23 * xv(i)
-            x1 = aint (t1)
-            x2 = xv(i) - t23 * x1
-            t1 = a1 * x2 + a2 * x1
-            t2 = aint (r23 * t1)
-            yy = t1 - t23 * t2
-            t3 = t23 * yy + a2 * x2
-            t4 = aint (r46 * t3)
-            xv(i) = t3 - t46 * t4
-         enddo
-
-      enddo
-
-c---------------------------------------------------------------------
-c     Save the last seed in X so that subsequent calls to VRANLC will generate
-c     a continuous sequence.
-c---------------------------------------------------------------------
- 150  x = xv(n1)
-
-      return
-      end
-
-c----- end of program ------------------------------------------------
-
diff --git a/examples/smpi/NAS/common/randi8.f b/examples/smpi/NAS/common/randi8.f
deleted file mode 100644
index 21ab8815db..0000000000
--- a/examples/smpi/NAS/common/randi8.f
+++ /dev/null
@@ -1,79 +0,0 @@
-      double precision function randlc(x, a)
-
-c---------------------------------------------------------------------
-c
-c   This routine returns a uniform pseudorandom double precision number in the
-c   range (0, 1) by using the linear congruential generator
-c
-c   x_{k+1} = a x_k  (mod 2^46)
-c
-c   where 0 < x_k < 2^46 and 0 < a < 2^46.  This scheme generates 2^44 numbers
-c   before repeating.  The argument A is the same as 'a' in the above formula,
-c   and X is the same as x_0.  A and X must be odd double precision integers
-c   in the range (1, 2^46).  The returned value RANDLC is normalized to be
-c   between 0 and 1, i.e. RANDLC = 2^(-46) * x_1.  X is updated to contain
-c   the new seed x_1, so that subsequent calls to RANDLC using the same
-c   arguments will generate a continuous sequence.
-
-      implicit none
-      double precision x, a
-      integer*8 i246m1, Lx, La
-      double precision d2m46
-
-      parameter(d2m46=0.5d0**46)
-
-      save i246m1
-      data i246m1/X'00003FFFFFFFFFFF'/
-
-      Lx = X
-      La = A
-
-      Lx   = iand(Lx*La,i246m1)
-      randlc = d2m46*dble(Lx)
-      x    = dble(Lx)
-      return
-      end
-
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-
-      SUBROUTINE VRANLC (N, X, A, Y)
-      implicit none
-      integer n, i
-      double precision x, a, y(*)
-      integer*8 i246m1, Lx, La
-      double precision d2m46
-
-c This doesn't work, because the compiler does the calculation in 32
-c bits and overflows. No standard way (without f90 stuff) to specify
-c that the rhs should be done in 64 bit arithmetic. 
-c      parameter(i246m1=2**46-1)
-
-      parameter(d2m46=0.5d0**46)
-
-      save i246m1
-      data i246m1/X'00003FFFFFFFFFFF'/
-
-c Note that the v6 compiler on an R8000 does something stupid with
-c the above. Using the following instead (or various other things)
-c makes the calculation run almost 10 times as fast. 
-c 
-c      save d2m46
-c      data d2m46/0.0d0/
-c      if (d2m46 .eq. 0.0d0) then
-c         d2m46 = 0.5d0**46
-c      endif
-
-      Lx = X
-      La = A
-      do i = 1, N
-         Lx   = iand(Lx*La,i246m1)
-         y(i) = d2m46*dble(Lx)
-      end do
-      x    = dble(Lx)
-
-      return
-      end
-
diff --git a/examples/smpi/NAS/common/randi8_safe.f b/examples/smpi/NAS/common/randi8_safe.f
deleted file mode 100644
index f725b6a1fb..0000000000
--- a/examples/smpi/NAS/common/randi8_safe.f
+++ /dev/null
@@ -1,64 +0,0 @@
-      double precision function randlc(x, a)
-
-c---------------------------------------------------------------------
-c
-c   This routine returns a uniform pseudorandom double precision number in the
-c   range (0, 1) by using the linear congruential generator
-c
-c   x_{k+1} = a x_k  (mod 2^46)
-c
-c   where 0 < x_k < 2^46 and 0 < a < 2^46.  This scheme generates 2^44 numbers
-c   before repeating.  The argument A is the same as 'a' in the above formula,
-c   and X is the same as x_0.  A and X must be odd double precision integers
-c   in the range (1, 2^46).  The returned value RANDLC is normalized to be
-c   between 0 and 1, i.e. RANDLC = 2^(-46) * x_1.  X is updated to contain
-c   the new seed x_1, so that subsequent calls to RANDLC using the same
-c   arguments will generate a continuous sequence.
-
-      implicit none
-      double precision x, a
-      integer*8 Lx, La, a1, a2, x1, x2, xa
-      double precision d2m46
-      parameter(d2m46=0.5d0**46)
-
-      Lx = x
-      La = A
-      a1 = ibits(La, 23, 23)
-      a2 = ibits(La, 0, 23)
-      x1 = ibits(Lx, 23, 23)
-      x2 = ibits(Lx, 0, 23)
-      xa = ishft(ibits(a1*x2+a2*x1, 0, 23), 23) + a2*x2
-      Lx   = ibits(xa,0, 46)
-      x    = dble(Lx)
-      randlc = d2m46*x
-      return
-      end
-
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-
-      SUBROUTINE VRANLC (N, X, A, Y)
-      implicit none
-      integer n, i
-      double precision x, a, y(*)
-      integer*8 Lx, La, a1, a2, x1, x2, xa
-      double precision d2m46
-      parameter(d2m46=0.5d0**46)
-
-      Lx = X
-      La = A
-      a1 = ibits(La, 23, 23)
-      a2 = ibits(La, 0, 23)
-      do i = 1, N
-         x1 = ibits(Lx, 23, 23)
-         x2 = ibits(Lx, 0, 23)
-         xa = ishft(ibits(a1*x2+a2*x1, 0, 23), 23) + a2*x2
-         Lx   = ibits(xa,0, 46)
-         y(i) = d2m46*dble(Lx)
-      end do
-      x = dble(Lx)
-      return
-      end
-
diff --git a/examples/smpi/NAS/common/timers.f b/examples/smpi/NAS/common/timers.f
deleted file mode 100644
index 7a19ccf56f..0000000000
--- a/examples/smpi/NAS/common/timers.f
+++ /dev/null
@@ -1,78 +0,0 @@
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-      
-      subroutine timer_clear(n)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      implicit none
-      integer n
-      
-      double precision start(64), elapsed(64)
-      common /tt/ start, elapsed
-
-      elapsed(n) = 0.0
-      return
-      end
-
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine timer_start(n)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      implicit none
-      integer n
-      include 'mpif.h'
-      double precision start(64), elapsed(64)
-      common /tt/ start, elapsed
-
-      start(n) = MPI_Wtime()
-
-      return
-      end
-      
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      subroutine timer_stop(n)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      implicit none
-      integer n
-      include 'mpif.h'
-      double precision start(64), elapsed(64)
-      common /tt/ start, elapsed
-      double precision t, now
-      now = MPI_Wtime()
-      t = now - start(n)
-      elapsed(n) = elapsed(n) + t
-
-      return
-      end
-
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      double precision function timer_read(n)
-
-c---------------------------------------------------------------------
-c---------------------------------------------------------------------
-
-      implicit none
-      integer n
-      double precision start(64), elapsed(64)
-      common /tt/ start, elapsed
-      
-      timer_read = elapsed(n)
-      return
-      end
-
diff --git a/examples/smpi/NAS/config/make.dummy b/examples/smpi/NAS/config/make.dummy
deleted file mode 100644
index 16b2350667..0000000000
--- a/examples/smpi/NAS/config/make.dummy
+++ /dev/null
@@ -1,7 +0,0 @@
-FMPI_LIB  = -L../MPI_dummy -lmpi
-FMPI_INC  = -I../MPI_dummy
-CMPI_LIB  = -L../MPI_dummy -lmpi
-CMPI_INC  = -I../MPI_dummy
-default:: ${PROGRAM} libmpi.a
-libmpi.a: 
-	cd ../MPI_dummy; $(MAKE) F77=$(MPIF77) CC=$(MPICC)
-- 
2.20.1