From aac4ba41ea27fe8168bebed009abebd7e5c3f006 Mon Sep 17 00:00:00 2001
From: degomme <augustin.degomme@unibas.ch>
Date: Sat, 11 Feb 2017 21:48:07 +0100
Subject: [PATCH] Update perf

---
 .../smpi/mpich3-test/perf/CMakeLists.txt      |   4 +-
 teshsuite/smpi/mpich3-test/perf/allredtrace.c | 160 ++--
 teshsuite/smpi/mpich3-test/perf/commcreatep.c |  93 +--
 teshsuite/smpi/mpich3-test/perf/dtpack.c      | 764 +++++++++---------
 teshsuite/smpi/mpich3-test/perf/indexperf.c   | 194 ++---
 teshsuite/smpi/mpich3-test/perf/manyrma.c     | 552 ++++++-------
 teshsuite/smpi/mpich3-test/perf/nestvec.c     | 200 ++---
 teshsuite/smpi/mpich3-test/perf/nestvec2.c    | 241 +++---
 .../smpi/mpich3-test/perf/non_zero_root.c     | 105 +--
 teshsuite/smpi/mpich3-test/perf/sendrecvl.c   | 371 +++++----
 teshsuite/smpi/mpich3-test/perf/testlist      |   8 +-
 teshsuite/smpi/mpich3-test/perf/timer.c       |  48 +-
 .../smpi/mpich3-test/perf/transp-datatype.c   | 124 ++-
 teshsuite/smpi/mpich3-test/perf/twovec.c      |  49 +-
 14 files changed, 1471 insertions(+), 1442 deletions(-)

diff --git a/teshsuite/smpi/mpich3-test/perf/CMakeLists.txt b/teshsuite/smpi/mpich3-test/perf/CMakeLists.txt
index afeca4ccd5..9bdedeb313 100644
--- a/teshsuite/smpi/mpich3-test/perf/CMakeLists.txt
+++ b/teshsuite/smpi/mpich3-test/perf/CMakeLists.txt
@@ -9,8 +9,8 @@ if(enable_smpi AND enable_smpi_MPICH3_testsuite)
   include_directories(BEFORE "${CMAKE_HOME_DIRECTORY}/include/smpi")
   include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include/")
 
-  foreach(file commcreatep non_zero_root sendrecvl timer transp-datatype twovec)
-    #not compiled files  dtpack indexperf manyrma nestvec2 nestvec
+  foreach(file commcreatep non_zero_root sendrecvl timer transp-datatype twovec dtpack indexperf  nestvec2 nestvec)
+    #not compiled files manyrma
     add_executable(${file} ${file}.c)
     target_link_libraries(${file} simgrid mtest_c)
   endforeach()
diff --git a/teshsuite/smpi/mpich3-test/perf/allredtrace.c b/teshsuite/smpi/mpich3-test/perf/allredtrace.c
index 070271be19..8fb077bfbf 100644
--- a/teshsuite/smpi/mpich3-test/perf/allredtrace.c
+++ b/teshsuite/smpi/mpich3-test/perf/allredtrace.c
@@ -5,13 +5,13 @@
  */
 
 /*
- * This code is intended to test the trace overhead when using an 
+ * This code is intended to test the trace overhead when using an
  * MPI tracing package.  To perform the test, follow these steps:
  *
  * 1) Run with the versbose mode selected to determine the delay argument
  *    to use in subsequent tests:
  *      mpiexec -n 4096 allredtrace -v
- *    Assume that the computed delay count is 6237; that value is used in 
+ *    Assume that the computed delay count is 6237; that value is used in
  *    the following.
  *
  * 2) Run with an explicit delay count, without tracing enabled:
@@ -20,7 +20,7 @@
  * 3) Build allredtrace with tracing enabled, then run:
  *      mpiexec -n 4096 allredtrace -delaycount 6237
  *
- * Compare the total times.  The tracing version should take slightly 
+ * Compare the total times.  The tracing version should take slightly
  * longer but no more than, for example, 15%.
  */
 #include "mpi.h"
@@ -30,126 +30,132 @@
 
 static int verbose = 0;
 static int lCount = 0;
-void Delay( int );
-void SetupDelay( double );
+void Delay(int);
+void SetupDelay(double);
 
-int main( int argc, char *argv[] )
+int main(int argc, char *argv[])
 {
     double usecPerCall = 100;
     double t, t1, tsum;
     int i, nLoop = 100;
     int rank;
 
-    MPI_Init( &argc, &argv );
-    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
-
-    /* Process arguments.  We allow the delay count to be set from the 
-       command line to ensure reproducibility*/
-    for (i=1; i<argc; i++) {
-	if (strcmp( argv[i], "-delaycount" ) == 0) {
-	    i++;
-	    lCount = atoi( argv[i] );
-	}
-	else if (strcmp( argv[i], "-v" ) == 0) {
-	    verbose = 1;
-	}
-	else {
-	    fprintf( stderr, "Unrecognized argument %s\n", argv[i] );
-	    exit(1);
-	}
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    /* Process arguments.  We allow the delay count to be set from the
+     * command line to ensure reproducibility */
+    for (i = 1; i < argc; i++) {
+        if (strcmp(argv[i], "-delaycount") == 0) {
+            i++;
+            lCount = atoi(argv[i]);
+        }
+        else if (strcmp(argv[i], "-v") == 0) {
+            verbose = 1;
+        }
+        else {
+            fprintf(stderr, "Unrecognized argument %s\n", argv[i]);
+            exit(1);
+        }
     }
 
     if (lCount == 0) {
-	SetupDelay( usecPerCall );
+        SetupDelay(usecPerCall);
     }
-    
-    MPI_Barrier( MPI_COMM_WORLD );
+
+    MPI_Barrier(MPI_COMM_WORLD);
 
     t = MPI_Wtime();
-    for (i=0; i<nLoop; i++) {
-	MPI_Allreduce( &t1, &tsum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );
-	Delay( lCount );
+    for (i = 0; i < nLoop; i++) {
+        MPI_Allreduce(&t1, &tsum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+        Delay(lCount);
     }
     t = MPI_Wtime() - t;
-    MPI_Barrier( MPI_COMM_WORLD );
+    MPI_Barrier(MPI_COMM_WORLD);
     if (rank == 0) {
-	printf( "For delay count %d, time is %e\n", lCount, t );
+        printf("For delay count %d, time is %e\n", lCount, t);
     }
-    
-    MPI_Barrier( MPI_COMM_WORLD );
+
+    MPI_Barrier(MPI_COMM_WORLD);
 
     MPI_Finalize();
-    
+
     return 0;
 }
 
-void SetupDelay( double usec )
+void SetupDelay(double usec)
 {
     double t, tick;
     double sec = 1.0e-6 * usec;
     int nLoop, i, direction;
-    
+
 
     /* Compute the number of times to run the tests to get an accurate
-       number given the timer resolution. */
+     * number given the timer resolution. */
     nLoop = 1;
     tick = 100 * MPI_Wtick();
     do {
-	nLoop = 2 * nLoop;
-	t = MPI_Wtime();
-	for (i=0; i<nLoop; i++) {
-	    MPI_Wtime();
-	}
-	t = MPI_Wtime() - t;
+        nLoop = 2 * nLoop;
+        t = MPI_Wtime();
+        for (i = 0; i < nLoop; i++) {
+            MPI_Wtime();
+        }
+        t = MPI_Wtime() - t;
     }
-    while ( t < tick && nLoop < 100000 );
+    while (t < tick && nLoop < 100000);
+
+    if (verbose)
+        printf("nLoop = %d\n", nLoop);
 
-    if (verbose) printf( "nLoop = %d\n", nLoop );
-    
     /* Start with an estimated count */
     lCount = 128;
     direction = 0;
     while (1) {
-	t = MPI_Wtime();
-	for (i=0; i<nLoop; i++) {
-	    Delay( lCount );
-	}
-	t = MPI_Wtime() - t;
-	t = t / nLoop;
-	if (verbose) printf( "lCount = %d, time = %e\n", lCount, t );
-	if (t > 10 * tick) nLoop = nLoop / 2;
-	
-	/* Compare measured delay */
-	if (t > 2*sec) {
-	    lCount = lCount / 2;
-	    if (direction == 1) break;
-	    direction = -1;
-	}
-	else if (t < sec / 2) {
-	    lCount = lCount * 2;
-	    if (direction == -1) break;
-	    direction = 1;
-	}
-	else if (t < sec) {
-	    /* sec/2 <= t < sec , so estimate the lCount to hit sec */
-	    lCount = (sec/t) * lCount;
-	}
-	else 
-	    break;
+        t = MPI_Wtime();
+        for (i = 0; i < nLoop; i++) {
+            Delay(lCount);
+        }
+        t = MPI_Wtime() - t;
+        t = t / nLoop;
+        if (verbose)
+            printf("lCount = %d, time = %e\n", lCount, t);
+        if (t > 10 * tick)
+            nLoop = nLoop / 2;
+
+        /* Compare measured delay */
+        if (t > 2 * sec) {
+            lCount = lCount / 2;
+            if (direction == 1)
+                break;
+            direction = -1;
+        }
+        else if (t < sec / 2) {
+            lCount = lCount * 2;
+            if (direction == -1)
+                break;
+            direction = 1;
+        }
+        else if (t < sec) {
+            /* sec/2 <= t < sec , so estimate the lCount to hit sec */
+            lCount = (sec / t) * lCount;
+        }
+        else
+            break;
     }
 
-    if (verbose) printf( "lCount = %d, t = %e\n", lCount, t );
+    if (verbose)
+        printf("lCount = %d, t = %e\n", lCount, t);
 
     /* Should coordinate with the other processes - take the max? */
 }
 
-double delayCounter = 0;
-void Delay( int count )
+volatile double delayCounter = 0;
+void Delay(int count)
 {
     int i;
 
     delayCounter = 0.0;
-    for (i=0; i<count; i++) {
-	delayCounter += 2.73;
+    for (i = 0; i < count; i++) {
+        delayCounter += 2.73;
     }
 }
diff --git a/teshsuite/smpi/mpich3-test/perf/commcreatep.c b/teshsuite/smpi/mpich3-test/perf/commcreatep.c
index 975dc8951f..e8d1f63e2f 100644
--- a/teshsuite/smpi/mpich3-test/perf/commcreatep.c
+++ b/teshsuite/smpi/mpich3-test/perf/commcreatep.c
@@ -12,68 +12,69 @@
 #define MAX_LOG_WSIZE 31
 #define MAX_LOOP 20
 
-int main( int argc, char *argv[] )
+int main(int argc, char *argv[])
 {
     MPI_Group gworld, g;
-    MPI_Comm  comm, newcomm[MAX_LOOP];
-    int       wsize, wrank, range[1][3], errs=0;
-    double    t[MAX_LOG_WSIZE], tf;
-    int       maxi, i, k, ts, gsize[MAX_LOG_WSIZE];
+    MPI_Comm comm, newcomm[MAX_LOOP];
+    int wsize, wrank, range[1][3], errs = 0;
+    double t[MAX_LOG_WSIZE], tf;
+    int maxi, i, k, ts, gsize[MAX_LOG_WSIZE];
 
-    MTest_Init( &argc, &argv );
+    MTest_Init(&argc, &argv);
 
-    MPI_Comm_size( MPI_COMM_WORLD, &wsize );
-    MPI_Comm_rank( MPI_COMM_WORLD, &wrank );
+    MPI_Comm_size(MPI_COMM_WORLD, &wsize);
+    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
 
     if (wrank == 0)
-	MTestPrintfMsg( 1, "size\ttime\n" );
+        MTestPrintfMsg(1, "size\ttime\n");
 
-    MPI_Comm_group( MPI_COMM_WORLD, &gworld );
+    MPI_Comm_group(MPI_COMM_WORLD, &gworld);
     ts = 1;
     comm = MPI_COMM_WORLD;
-    for (i=0; ts<=wsize; i++, ts = ts + ts) {
-	/* Create some groups with at most ts members */
-	range[0][0] = ts-1;
-	range[0][1] = 0;
-	range[0][2] = -1;
-	MPI_Group_range_incl( gworld, 1, range, &g );
-	
-	MPI_Barrier( MPI_COMM_WORLD );
-	tf       = MPI_Wtime();
-	for (k=0; k<MAX_LOOP; k++) 
-	    MPI_Comm_create( comm, g, &newcomm[k] );
-	tf     = MPI_Wtime() - tf;
-	MPI_Allreduce( &tf, &t[i], 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );
-	t[i] = t[i] / MAX_LOOP;
-	gsize[i] = ts;
-	if (wrank == 0)
-	    MTestPrintfMsg( 1, "%d\t%e\n", ts, t[i] );
-	MPI_Group_free( &g );
-	if (newcomm[0] != MPI_COMM_NULL) 
-	    for (k=0; k<MAX_LOOP; k++) 
-		MPI_Comm_free( &newcomm[k] );
+    for (i = 0; ts <= wsize; i++, ts = ts + ts) {
+        /* Create some groups with at most ts members */
+        range[0][0] = ts - 1;
+        range[0][1] = 0;
+        range[0][2] = -1;
+        MPI_Group_range_incl(gworld, 1, range, &g);
+
+        MPI_Barrier(MPI_COMM_WORLD);
+        tf = MPI_Wtime();
+        for (k = 0; k < MAX_LOOP; k++)
+            MPI_Comm_create(comm, g, &newcomm[k]);
+        tf = MPI_Wtime() - tf;
+        MPI_Allreduce(&tf, &t[i], 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+        t[i] = t[i] / MAX_LOOP;
+        gsize[i] = ts;
+        if (wrank == 0)
+            MTestPrintfMsg(1, "%d\t%e\n", ts, t[i]);
+        MPI_Group_free(&g);
+        if (newcomm[0] != MPI_COMM_NULL)
+            for (k = 0; k < MAX_LOOP; k++)
+                MPI_Comm_free(&newcomm[k]);
     }
-    MPI_Group_free( &gworld );
-    maxi = i-1;
+    MPI_Group_free(&gworld);
+    maxi = i - 1;
 
     /* The cost should be linear or at worst ts*log(ts).
-       We can check this in a number of ways.  
+     * We can check this in a number of ways.
      */
     if (wrank == 0) {
-	for (i=4; i<=maxi; i++) {
-	    double rdiff;
-	    if (t[i] > 0) {
-		rdiff = (t[i] - t[i-1]) / t[i];
-		if (rdiff >= 4) {
-		    errs++;
-		    fprintf( stderr, "Relative difference between group of size %d and %d is %e exceeds 4\n", 
-			     gsize[i-1], gsize[i], rdiff );
-		}
-	    }
-	}
+        for (i = 4; i <= maxi; i++) {
+            double rdiff;
+            if (t[i] > 0) {
+                rdiff = (t[i] - t[i - 1]) / t[i];
+                if (rdiff >= 4) {
+                    errs++;
+                    fprintf(stderr,
+                            "Relative difference between group of size %d and %d is %e exceeds 4\n",
+                            gsize[i - 1], gsize[i], rdiff);
+                }
+            }
+        }
     }
 
-    MTest_Finalize( errs );
+    MTest_Finalize(errs);
 
     MPI_Finalize();
 
diff --git a/teshsuite/smpi/mpich3-test/perf/dtpack.c b/teshsuite/smpi/mpich3-test/perf/dtpack.c
index a31a55dc20..8b2e05318e 100644
--- a/teshsuite/smpi/mpich3-test/perf/dtpack.c
+++ b/teshsuite/smpi/mpich3-test/perf/dtpack.c
@@ -4,13 +4,13 @@
  *      See COPYRIGHT in top-level directory.
  */
 /*
- * This code may be used to test the performance of some of the 
+ * This code may be used to test the performance of some of the
  * noncontiguous datatype operations, including vector and indexed
- * pack and unpack operations.  To simplify the use of this code for 
+ * pack and unpack operations.  To simplify the use of this code for
  * tuning an MPI implementation, it uses no communication, just the
  * MPI_Pack and MPI_Unpack routines.  In addition, the individual tests are
  * in separate routines, making it easier to compare the compiler-generated
- * code for the user (manual) pack/unpack with the code used by 
+ * code for the user (manual) pack/unpack with the code used by
  * the MPI implementation.  Further, to be fair to the MPI implementation,
  * the routines are passed the source and destination buffers; this ensures
  * that the compiler can't optimize for statically allocated buffers.
@@ -33,441 +33,455 @@ static int verbose = 0;
 double mean(double *list, int count);
 double mean(double *list, int count)
 {
-	double retval;
-	int i;
+    double retval;
+    int i;
 
-	retval = 0;
-	for (i = 0; i < count; i++)
-		retval += list[i];
-	retval /= count;
+    retval = 0;
+    for (i = 0; i < count; i++)
+        retval += list[i];
+    retval /= count;
 
-	return retval;
+    return retval;
 }
 
 double noise(double *list, int count);
 double noise(double *list, int count)
 {
-	double *margin, retval;
-	int i;
+    double *margin, retval;
+    int i;
 
-	if (!(margin = malloc(count * sizeof(double)))) {
-		printf("Unable to allocate memory\n");
-		return -1;
-	}
+    if (!(margin = malloc(count * sizeof(double)))) {
+        printf("Unable to allocate memory\n");
+        return -1;
+    }
 
-	for (i = 0; i < count; i++)
-		margin[i] = list[i] / mean(list, count);
+    for (i = 0; i < count; i++)
+        margin[i] = list[i] / mean(list, count);
 
-	retval = 0;
-	for (i = 0; i < count; i++) {
-		retval += ((margin[i] - 1) * (margin[i] - 1));
-	}
-	retval /= count;
-	if (retval < 0) retval = -retval;
+    retval = 0;
+    for (i = 0; i < count; i++) {
+        retval += ((margin[i] - 1) * (margin[i] - 1));
+    }
+    retval /= count;
+    if (retval < 0)
+        retval = -retval;
 
-	return retval;
+    return retval;
 }
 
 /* Here are the tests */
 
 /* Test packing a vector of individual doubles */
 /* We don't use restrict in the function args because assignments between
-   restrict pointers is not valid in C and some compilers, such as the 
+   restrict pointers is not valid in C and some compilers, such as the
    IBM xlc compilers, flag that use as an error.*/
-int TestVecPackDouble( int n, int stride, 
-		       double *avgTimeUser, double *avgTimeMPI,
-		       double *dest, const double *src );
-int TestVecPackDouble( int n, int stride, 
-		       double *avgTimeUser, double *avgTimeMPI,
-		       double *dest, const double *src )
+int TestVecPackDouble(int n, int stride,
+                      double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src);
+int TestVecPackDouble(int n, int stride,
+                      double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src)
 {
-	double *restrict d_dest;
-	const double *restrict d_src;
-	register int i, j;
-	int          rep, position;
-	double       t1, t2, t[NTRIALS];
-	MPI_Datatype vectype;
-
-	/* User code */
-	if (verbose) printf("TestVecPackDouble (USER): ");
-	for (j = 0; j < NTRIALS; j++) {
-		t1 = MPI_Wtime();
-		for (rep=0; rep<N_REPS; rep++) {
-			i = n;
-			d_dest = dest;
-			d_src  = src;
-			while (i--) {
-				*d_dest++ = *d_src;
-				d_src += stride;
-			}
-		}
-		t2 = MPI_Wtime() - t1;
-		t[j] = t2;
-		if (verbose) printf("%.3f ", t[j]);
-	}
-	if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
-	/* If there is too much noise, discard the test */
-	if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
-		*avgTimeUser = 0;
-		*avgTimeMPI = 0;
-		if (verbose)
-			printf("Too much noise; discarding measurement\n");
-		return 0;
-	}
-	*avgTimeUser = mean(t, NTRIALS) / N_REPS;
-
-	/* MPI Vector code */
-	MPI_Type_vector( n, 1, stride, MPI_DOUBLE, &vectype );
-	MPI_Type_commit( &vectype );
-
-	if (verbose) printf("TestVecPackDouble (MPI): ");
-	for (j = 0; j < NTRIALS; j++) {
-		t1 = MPI_Wtime();
-		for (rep=0; rep<N_REPS; rep++) {
-			position = 0;
-			MPI_Pack( (void *)src, 1, vectype, dest, n*sizeof(double),
-				  &position, MPI_COMM_SELF );
-		}
-		t2 = MPI_Wtime() - t1;
-		t[j] = t2;
-		if (verbose) printf("%.3f ", t[j]);
-	}
-	if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
-	/* If there is too much noise, discard the test */
-	if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
-		*avgTimeUser = 0;
-		*avgTimeMPI = 0;
-		if (verbose)
-			printf("Too much noise; discarding measurement\n");
-	}
-	else {
-	    *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
-	}
-
-	MPI_Type_free( &vectype );
-
-	return 0;
+    double *restrict d_dest;
+    const double *restrict d_src;
+    register int i, j;
+    int rep, position;
+    double t1, t2, t[NTRIALS];
+    MPI_Datatype vectype;
+
+    /* User code */
+    if (verbose)
+        printf("TestVecPackDouble (USER): ");
+    for (j = 0; j < NTRIALS; j++) {
+        t1 = MPI_Wtime();
+        for (rep = 0; rep < N_REPS; rep++) {
+            i = n;
+            d_dest = dest;
+            d_src = src;
+            while (i--) {
+                *d_dest++ = *d_src;
+                d_src += stride;
+            }
+        }
+        t2 = MPI_Wtime() - t1;
+        t[j] = t2;
+        if (verbose)
+            printf("%.3f ", t[j]);
+    }
+    if (verbose)
+        printf("[%.3f]\n", noise(t, NTRIALS));
+    /* If there is too much noise, discard the test */
+    if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
+        *avgTimeUser = 0;
+        *avgTimeMPI = 0;
+        if (verbose)
+            printf("Too much noise; discarding measurement\n");
+        return 0;
+    }
+    *avgTimeUser = mean(t, NTRIALS) / N_REPS;
+
+    /* MPI Vector code */
+    MPI_Type_vector(n, 1, stride, MPI_DOUBLE, &vectype);
+    MPI_Type_commit(&vectype);
+
+    if (verbose)
+        printf("TestVecPackDouble (MPI): ");
+    for (j = 0; j < NTRIALS; j++) {
+        t1 = MPI_Wtime();
+        for (rep = 0; rep < N_REPS; rep++) {
+            position = 0;
+            MPI_Pack((void *) src, 1, vectype, dest, n * sizeof(double), &position, MPI_COMM_SELF);
+        }
+        t2 = MPI_Wtime() - t1;
+        t[j] = t2;
+        if (verbose)
+            printf("%.3f ", t[j]);
+    }
+    if (verbose)
+        printf("[%.3f]\n", noise(t, NTRIALS));
+    /* If there is too much noise, discard the test */
+    if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
+        *avgTimeUser = 0;
+        *avgTimeMPI = 0;
+        if (verbose)
+            printf("Too much noise; discarding measurement\n");
+    }
+    else {
+        *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
+    }
+
+    MPI_Type_free(&vectype);
+
+    return 0;
 }
 
 /* Test unpacking a vector of individual doubles */
 /* See above for why restrict is not used in the function args */
-int TestVecUnPackDouble( int n, int stride, 
-		       double *avgTimeUser, double *avgTimeMPI,
-		       double *dest, const double *src );
-int TestVecUnPackDouble( int n, int stride, 
-		       double *avgTimeUser, double *avgTimeMPI,
-		       double *dest, const double *src )
+int TestVecUnPackDouble(int n, int stride,
+                        double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src);
+int TestVecUnPackDouble(int n, int stride,
+                        double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src)
 {
-	double *restrict d_dest;
-	const double *restrict d_src;
-	register int i, j;
-	int          rep, position;
-	double       t1, t2, t[NTRIALS];
-	MPI_Datatype vectype;
-
-	/* User code */
-	if (verbose) printf("TestVecUnPackDouble (USER): ");
-	for (j = 0; j < NTRIALS; j++) {
-		t1 = MPI_Wtime();
-		for (rep=0; rep<N_REPS; rep++) {
-			i = n;
-			d_dest = dest;
-			d_src  = src;
-			while (i--) {
-				*d_dest = *d_src++;
-				d_dest += stride;
-			}
-		}
-		t2 = MPI_Wtime() - t1;
-		t[j] = t2;
-		if (verbose) printf("%.3f ", t[j]);
-	}
-	if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
-	/* If there is too much noise, discard the test */
-	if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
-		*avgTimeUser = 0;
-		*avgTimeMPI = 0;
-		if (verbose)
-			printf("Too much noise; discarding measurement\n");
-		return 0;
-	}
-	*avgTimeUser = mean(t, NTRIALS) / N_REPS;
-    
-	/* MPI Vector code */
-	MPI_Type_vector( n, 1, stride, MPI_DOUBLE, &vectype );
-	MPI_Type_commit( &vectype );
-
-	if (verbose) printf("TestVecUnPackDouble (MPI): ");
-	for (j = 0; j < NTRIALS; j++) {
-		t1 = MPI_Wtime();
-		for (rep=0; rep<N_REPS; rep++) {
-			position = 0;
-			MPI_Unpack( (void *)src, n*sizeof(double), 
-				    &position, dest, 1, vectype, MPI_COMM_SELF );
-		}
-		t2 = MPI_Wtime() - t1;
-		t[j] = t2;
-		if (verbose) printf("%.3f ", t[j]);
-	}
-	if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
-	/* If there is too much noise, discard the test */
-	if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
-		*avgTimeUser = 0;
-		*avgTimeMPI = 0;
-		if (verbose)
-			printf("Too much noise; discarding measurement\n");
-	}
-	else {
-	    *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
-	}
-
-	MPI_Type_free( &vectype );
-
-	return 0;
+    double *restrict d_dest;
+    const double *restrict d_src;
+    register int i, j;
+    int rep, position;
+    double t1, t2, t[NTRIALS];
+    MPI_Datatype vectype;
+
+    /* User code */
+    if (verbose)
+        printf("TestVecUnPackDouble (USER): ");
+    for (j = 0; j < NTRIALS; j++) {
+        t1 = MPI_Wtime();
+        for (rep = 0; rep < N_REPS; rep++) {
+            i = n;
+            d_dest = dest;
+            d_src = src;
+            while (i--) {
+                *d_dest = *d_src++;
+                d_dest += stride;
+            }
+        }
+        t2 = MPI_Wtime() - t1;
+        t[j] = t2;
+        if (verbose)
+            printf("%.3f ", t[j]);
+    }
+    if (verbose)
+        printf("[%.3f]\n", noise(t, NTRIALS));
+    /* If there is too much noise, discard the test */
+    if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
+        *avgTimeUser = 0;
+        *avgTimeMPI = 0;
+        if (verbose)
+            printf("Too much noise; discarding measurement\n");
+        return 0;
+    }
+    *avgTimeUser = mean(t, NTRIALS) / N_REPS;
+
+    /* MPI Vector code */
+    MPI_Type_vector(n, 1, stride, MPI_DOUBLE, &vectype);
+    MPI_Type_commit(&vectype);
+
+    if (verbose)
+        printf("TestVecUnPackDouble (MPI): ");
+    for (j = 0; j < NTRIALS; j++) {
+        t1 = MPI_Wtime();
+        for (rep = 0; rep < N_REPS; rep++) {
+            position = 0;
+            MPI_Unpack((void *) src, n * sizeof(double),
+                       &position, dest, 1, vectype, MPI_COMM_SELF);
+        }
+        t2 = MPI_Wtime() - t1;
+        t[j] = t2;
+        if (verbose)
+            printf("%.3f ", t[j]);
+    }
+    if (verbose)
+        printf("[%.3f]\n", noise(t, NTRIALS));
+    /* If there is too much noise, discard the test */
+    if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
+        *avgTimeUser = 0;
+        *avgTimeMPI = 0;
+        if (verbose)
+            printf("Too much noise; discarding measurement\n");
+    }
+    else {
+        *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
+    }
+
+    MPI_Type_free(&vectype);
+
+    return 0;
 }
 
 /* Test packing a vector of 2-individual doubles */
 /* See above for why restrict is not used in the function args */
-int TestVecPack2Double( int n, int stride, 
-			double *avgTimeUser, double *avgTimeMPI,
-			double *dest, const double *src );
-int TestVecPack2Double( int n, int stride, 
-			double *avgTimeUser, double *avgTimeMPI,
-			double *dest, const double *src )
+int TestVecPack2Double(int n, int stride,
+                       double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src);
+int TestVecPack2Double(int n, int stride,
+                       double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src)
 {
-	double *restrict d_dest;
-	const double *restrict d_src;
-	register int i, j;
-	int          rep, position;
-	double       t1, t2, t[NTRIALS];
-	MPI_Datatype vectype;
-
-	/* User code */
-	if (verbose) printf("TestVecPack2Double (USER): ");
-	for (j = 0; j < NTRIALS; j++) {
-		t1 = MPI_Wtime();
-		for (rep=0; rep<N_REPS; rep++) {
-			i = n;
-			d_dest = dest;
-			d_src  = src;
-			while (i--) {
-				*d_dest++ = d_src[0];
-				*d_dest++ = d_src[1];
-				d_src += stride;
-			}
-		}
-		t2 = MPI_Wtime() - t1;
-		t[j] = t2;
-		if (verbose) printf("%.3f ", t[j]);
-	}
-	if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
-	/* If there is too much noise, discard the test */
-	if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
-		*avgTimeUser = 0;
-		*avgTimeMPI = 0;
-		if (verbose)
-			printf("Too much noise; discarding measurement\n");
-		return 0;
-	}
-	*avgTimeUser = mean(t, NTRIALS) / N_REPS;
-    
-	/* MPI Vector code */
-	MPI_Type_vector( n, 2, stride, MPI_DOUBLE, &vectype );
-	MPI_Type_commit( &vectype );
-    
-	if (verbose) printf("TestVecPack2Double (MPI): ");
-	for (j = 0; j < NTRIALS; j++) {
-		t1 = MPI_Wtime();
-		for (rep=0; rep<N_REPS; rep++) {
-			position = 0;
-			MPI_Pack( (void *)src, 1, vectype, dest, 2*n*sizeof(double),
-				  &position, MPI_COMM_SELF );
-		}
-		t2 = MPI_Wtime() - t1;
-		t[j] = t2;
-		if (verbose) printf("%.3f ", t[j]);
-	}
-	if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
-	/* If there is too much noise, discard the test */
-	if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
-		*avgTimeUser = 0;
-		*avgTimeMPI = 0;
-		if (verbose)
-			printf("Too much noise; discarding measurement\n");
-	}
-	else {
-	    *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
-	}
-	MPI_Type_free( &vectype );
-
-	return 0;
+    double *restrict d_dest;
+    const double *restrict d_src;
+    register int i, j;
+    int rep, position;
+    double t1, t2, t[NTRIALS];
+    MPI_Datatype vectype;
+
+    /* User code */
+    if (verbose)
+        printf("TestVecPack2Double (USER): ");
+    for (j = 0; j < NTRIALS; j++) {
+        t1 = MPI_Wtime();
+        for (rep = 0; rep < N_REPS; rep++) {
+            i = n;
+            d_dest = dest;
+            d_src = src;
+            while (i--) {
+                *d_dest++ = d_src[0];
+                *d_dest++ = d_src[1];
+                d_src += stride;
+            }
+        }
+        t2 = MPI_Wtime() - t1;
+        t[j] = t2;
+        if (verbose)
+            printf("%.3f ", t[j]);
+    }
+    if (verbose)
+        printf("[%.3f]\n", noise(t, NTRIALS));
+    /* If there is too much noise, discard the test */
+    if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
+        *avgTimeUser = 0;
+        *avgTimeMPI = 0;
+        if (verbose)
+            printf("Too much noise; discarding measurement\n");
+        return 0;
+    }
+    *avgTimeUser = mean(t, NTRIALS) / N_REPS;
+
+    /* MPI Vector code */
+    MPI_Type_vector(n, 2, stride, MPI_DOUBLE, &vectype);
+    MPI_Type_commit(&vectype);
+
+    if (verbose)
+        printf("TestVecPack2Double (MPI): ");
+    for (j = 0; j < NTRIALS; j++) {
+        t1 = MPI_Wtime();
+        for (rep = 0; rep < N_REPS; rep++) {
+            position = 0;
+            MPI_Pack((void *) src, 1, vectype, dest, 2 * n * sizeof(double),
+                     &position, MPI_COMM_SELF);
+        }
+        t2 = MPI_Wtime() - t1;
+        t[j] = t2;
+        if (verbose)
+            printf("%.3f ", t[j]);
+    }
+    if (verbose)
+        printf("[%.3f]\n", noise(t, NTRIALS));
+    /* If there is too much noise, discard the test */
+    if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
+        *avgTimeUser = 0;
+        *avgTimeMPI = 0;
+        if (verbose)
+            printf("Too much noise; discarding measurement\n");
+    }
+    else {
+        *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
+    }
+    MPI_Type_free(&vectype);
+
+    return 0;
 }
 
 /* This creates an indexed type that is like a vector (for simplicity
-   of construction).  There is a possibility that the MPI implementation 
+   of construction).  There is a possibility that the MPI implementation
    will recognize and simplify this (e.g., in MPI_Type_commit); if so,
-   let us know and we'll add a version that is not as regular 
+   let us know and we'll add a version that is not as regular
 */
 /* See above for why restrict is not used in the function args */
-int TestIndexPackDouble( int n, int stride, 
-			 double *avgTimeUser, double *avgTimeMPI,
-			 double *dest, const double *src );
-int TestIndexPackDouble( int n, int stride, 
-			 double *avgTimeUser, double *avgTimeMPI,
-			 double *dest, const double *src )
+int TestIndexPackDouble(int n, int stride,
+                        double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src);
+int TestIndexPackDouble(int n, int stride,
+                        double *avgTimeUser, double *avgTimeMPI, double *dest, const double *src)
 {
-	double *restrict d_dest;
-	const double *restrict d_src;
-	register int i, j;
-	int          rep, position;
-	int          *restrict displs = 0;
-	double       t1, t2, t[NTRIALS];
-	MPI_Datatype indextype;
-
-	displs = (int *)malloc( n * sizeof(int) );
-	for (i=0; i<n; i++) displs[i] = i * stride;
-
-	/* User code */
-	if (verbose) printf("TestIndexPackDouble (USER): ");
-	for (j = 0; j < NTRIALS; j++) {
-		t1 = MPI_Wtime();
-		for (rep=0; rep<N_REPS; rep++) {
-			i = n;
-			d_dest = dest;
-			d_src  = src;
-			for (i=0; i<n; i++) {
-				*d_dest++ = d_src[displs[i]];
-			}
-		}
-		t2 = MPI_Wtime() - t1;
-		t[j] = t2;
-		if (verbose) printf("%.3f ", t[j]);
-	}
-	if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
-	/* If there is too much noise, discard the test */
-	if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
-		*avgTimeUser = 0;
-		*avgTimeMPI = 0;
-		if (verbose)
-			printf("Too much noise; discarding measurement\n");
-		return 0;
-	}
-	*avgTimeUser = mean(t, NTRIALS) / N_REPS;
-    
-	/* MPI Index code */
-	MPI_Type_create_indexed_block( n, 1, displs, MPI_DOUBLE, &indextype );
-	MPI_Type_commit( &indextype );
-
-	free( displs );
-    
-	if (verbose) printf("TestIndexPackDouble (MPI): ");
-	for (j = 0; j < NTRIALS; j++) {
-		t1 = MPI_Wtime();
-		for (rep=0; rep<N_REPS; rep++) {
-			position = 0;
-			MPI_Pack( (void *)src, 1, indextype, dest, n*sizeof(double),
-				  &position, MPI_COMM_SELF );
-		}
-		t2 = MPI_Wtime() - t1;
-		t[j] = t2;
-		if (verbose) printf("%.3f ", t[j]);
-	}
-	if (verbose) printf("[%.3f]\n", noise(t, NTRIALS));
-	/* If there is too much noise, discard the test */
-	if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
-		*avgTimeUser = 0;
-		*avgTimeMPI = 0;
-		if (verbose)
-			printf("Too much noise; discarding measurement\n");
-	}
-	else {
-	    *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
-	}
-	MPI_Type_free( &indextype );
-
-	return 0;
+    double *restrict d_dest;
+    const double *restrict d_src;
+    register int i, j;
+    int rep, position;
+    int *restrict displs = 0;
+    double t1, t2, t[NTRIALS];
+    MPI_Datatype indextype;
+
+    displs = (int *) malloc(n * sizeof(int));
+    for (i = 0; i < n; i++)
+        displs[i] = i * stride;
+
+    /* User code */
+    if (verbose)
+        printf("TestIndexPackDouble (USER): ");
+    for (j = 0; j < NTRIALS; j++) {
+        t1 = MPI_Wtime();
+        for (rep = 0; rep < N_REPS; rep++) {
+            i = n;
+            d_dest = dest;
+            d_src = src;
+            for (i = 0; i < n; i++) {
+                *d_dest++ = d_src[displs[i]];
+            }
+        }
+        t2 = MPI_Wtime() - t1;
+        t[j] = t2;
+        if (verbose)
+            printf("%.3f ", t[j]);
+    }
+    if (verbose)
+        printf("[%.3f]\n", noise(t, NTRIALS));
+    /* If there is too much noise, discard the test */
+    if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
+        *avgTimeUser = 0;
+        *avgTimeMPI = 0;
+        if (verbose)
+            printf("Too much noise; discarding measurement\n");
+        return 0;
+    }
+    *avgTimeUser = mean(t, NTRIALS) / N_REPS;
+
+    /* MPI Index code */
+    MPI_Type_create_indexed_block(n, 1, displs, MPI_DOUBLE, &indextype);
+    MPI_Type_commit(&indextype);
+
+    free(displs);
+
+    if (verbose)
+        printf("TestIndexPackDouble (MPI): ");
+    for (j = 0; j < NTRIALS; j++) {
+        t1 = MPI_Wtime();
+        for (rep = 0; rep < N_REPS; rep++) {
+            position = 0;
+            MPI_Pack((void *) src, 1, indextype, dest, n * sizeof(double),
+                     &position, MPI_COMM_SELF);
+        }
+        t2 = MPI_Wtime() - t1;
+        t[j] = t2;
+        if (verbose)
+            printf("%.3f ", t[j]);
+    }
+    if (verbose)
+        printf("[%.3f]\n", noise(t, NTRIALS));
+    /* If there is too much noise, discard the test */
+    if (noise(t, NTRIALS) > VARIANCE_THRESHOLD) {
+        *avgTimeUser = 0;
+        *avgTimeMPI = 0;
+        if (verbose)
+            printf("Too much noise; discarding measurement\n");
+    }
+    else {
+        *avgTimeMPI = mean(t, NTRIALS) / N_REPS;
+    }
+    MPI_Type_free(&indextype);
+
+    return 0;
 }
 
-int Report( const char *name, const char *packname, 
-	    double avgTimeMPI, double avgTimeUser );
-int Report( const char *name, const char *packname, 
-	    double avgTimeMPI, double avgTimeUser )
+int Report(const char *name, const char *packname, double avgTimeMPI, double avgTimeUser);
+int Report(const char *name, const char *packname, double avgTimeMPI, double avgTimeUser)
 {
-	double diffTime, maxTime;
-	int errs=0;
-
-	/* Move this into a common routine */
-	diffTime = avgTimeMPI - avgTimeUser;
-	if (diffTime < 0) diffTime = - diffTime;
-	if (avgTimeMPI > avgTimeUser) maxTime = avgTimeMPI;
-	else                          maxTime = avgTimeUser;
-
-	if (verbose) {
-		printf( "%-30s:\t%g\t%g\t(%g%%)\n", name, 
-			avgTimeMPI, avgTimeUser,
-			100 * (diffTime / maxTime) );
-		fflush(stdout);
-	}
-	if (avgTimeMPI > avgTimeUser && (diffTime > THRESHOLD * maxTime)) {
-		errs++;
-		printf( "%s:\tMPI %s code is too slow: MPI %g\t User %g\n",
-			name, packname, avgTimeMPI, avgTimeUser );
-	}
-
-	return errs;
+    double diffTime, maxTime;
+    int errs = 0;
+
+    /* Move this into a common routine */
+    diffTime = avgTimeMPI - avgTimeUser;
+    if (diffTime < 0)
+        diffTime = -diffTime;
+    if (avgTimeMPI > avgTimeUser)
+        maxTime = avgTimeMPI;
+    else
+        maxTime = avgTimeUser;
+
+    if (verbose) {
+        printf("%-30s:\t%g\t%g\t(%g%%)\n", name,
+               avgTimeMPI, avgTimeUser, 100 * (diffTime / maxTime));
+        fflush(stdout);
+    }
+    if (avgTimeMPI > avgTimeUser && (diffTime > THRESHOLD * maxTime)) {
+        errs++;
+        printf("%s:\tMPI %s code is too slow: MPI %g\t User %g\n",
+               name, packname, avgTimeMPI, avgTimeUser);
+    }
+
+    return errs;
 }
 
 /* Finally, here's the main program */
-int main( int argc, char *argv[] )
+int main(int argc, char *argv[])
 {
-    int n, stride, err, errs = 0;
+    int n, stride, errs = 0;
     void *dest, *src;
     double avgTimeUser, avgTimeMPI;
 
-    MPI_Init( &argc, &argv );
-    if (getenv("MPITEST_VERBOSE")) verbose = 1;
+    MPI_Init(&argc, &argv);
+    if (getenv("MPITEST_VERBOSE"))
+        verbose = 1;
 
-    n      = 30000;
+    n = 30000;
     stride = 4;
-    dest = (void *)malloc( n * sizeof(double) );
-    src  = (void *)malloc( n * ((1+stride)*sizeof(double)) );
+    dest = (void *) malloc(n * sizeof(double));
+    src = (void *) malloc(n * ((1 + stride) * sizeof(double)));
     /* Touch the source and destination arrays */
-    memset( src, 0, n * (1+stride)*sizeof(double) );
-    memset( dest, 0, n * sizeof(double) );
+    memset(src, 0, n * (1 + stride) * sizeof(double));
+    memset(dest, 0, n * sizeof(double));
 
-    err = TestVecPackDouble( n, stride, &avgTimeUser, &avgTimeMPI,
-			     dest, src );
-    errs += Report( "VecPackDouble", "Pack", avgTimeMPI, avgTimeUser );
+    TestVecPackDouble(n, stride, &avgTimeUser, &avgTimeMPI, dest, src);
+    errs += Report("VecPackDouble", "Pack", avgTimeMPI, avgTimeUser);
 
-    err = TestVecUnPackDouble( n, stride, &avgTimeUser, &avgTimeMPI,
-			       src, dest );
-    errs += Report( "VecUnPackDouble", "Unpack", avgTimeMPI, avgTimeUser );
+    TestVecUnPackDouble(n, stride, &avgTimeUser, &avgTimeMPI, src, dest);
+    errs += Report("VecUnPackDouble", "Unpack", avgTimeMPI, avgTimeUser);
 
-    err = TestIndexPackDouble( n, stride, &avgTimeUser, &avgTimeMPI,
-			     dest, src );
-    errs += Report( "VecIndexDouble", "Pack", avgTimeMPI, avgTimeUser );
+    TestIndexPackDouble(n, stride, &avgTimeUser, &avgTimeMPI, dest, src);
+    errs += Report("VecIndexDouble", "Pack", avgTimeMPI, avgTimeUser);
 
     free(dest);
     free(src);
-    
-    dest = (void *)malloc( 2*n * sizeof(double) );
-    src  = (void *)malloc( (1 + n) * ((1+stride)*sizeof(double)) );
-    memset( dest, 0, 2*n * sizeof(double) );
-    memset( src, 0, (1+n) * (1+stride)*sizeof(double) );
-    err = TestVecPack2Double( n, stride, &avgTimeUser, &avgTimeMPI,
-			      dest, src );
-    errs += Report( "VecPack2Double", "Pack", avgTimeMPI, avgTimeUser );
+
+    dest = (void *) malloc(2 * n * sizeof(double));
+    src = (void *) malloc((1 + n) * ((1 + stride) * sizeof(double)));
+    memset(dest, 0, 2 * n * sizeof(double));
+    memset(src, 0, (1 + n) * (1 + stride) * sizeof(double));
+    TestVecPack2Double(n, stride, &avgTimeUser, &avgTimeMPI, dest, src);
+    errs += Report("VecPack2Double", "Pack", avgTimeMPI, avgTimeUser);
 
     free(dest);
     free(src);
-    
+
 
 
     if (errs == 0) {
-	printf( " No Errors\n" );
+        printf(" No Errors\n");
     }
     else {
-	printf( " Found %d performance problems\n", errs );
+        printf(" Found %d performance problems\n", errs);
     }
 
     fflush(stdout);
diff --git a/teshsuite/smpi/mpich3-test/perf/indexperf.c b/teshsuite/smpi/mpich3-test/perf/indexperf.c
index e05463bc89..38c3822533 100644
--- a/teshsuite/smpi/mpich3-test/perf/indexperf.c
+++ b/teshsuite/smpi/mpich3-test/perf/indexperf.c
@@ -23,163 +23,163 @@
 
 static int verbose = 0;
 
-int main( int argc, char **argv )
+int main(int argc, char **argv)
 {
-    double       *inbuf, *outbuf, *outbuf2;
-    MPI_Aint     lb, extent;
-    int          *index_displacement;
-    int          icount, errs=0;
-    int          i, packsize, position, inbufsize;
+    double *inbuf, *outbuf, *outbuf2;
+    MPI_Aint lb, extent;
+    int *index_displacement;
+    int icount, errs = 0;
+    int i, packsize, position, inbufsize;
     MPI_Datatype itype1, stype1;
-    double       t0, t1;
-    double       tpack, tspack, tmanual;
-    int          ntry;
+    double t0, t1;
+    double tpack, tspack, tmanual;
+    int ntry;
 
-    MPI_Init( &argc, &argv );
+    MPI_Init(&argc, &argv);
 
     icount = 2014;
 
     /* Create a simple block indexed datatype */
-    index_displacement = (int *)malloc( icount * sizeof(int) );
+    index_displacement = (int *) malloc(icount * sizeof(int));
     if (!index_displacement) {
-	fprintf( stderr, "Unable to allocated index array of size %d\n",
-		 icount );
-	MPI_Abort( MPI_COMM_WORLD, 1 );
+        fprintf(stderr, "Unable to allocated index array of size %d\n", icount);
+        MPI_Abort(MPI_COMM_WORLD, 1);
     }
 
-    for (i=0; i<icount; i++) {
-	index_displacement[i] = (i * 3 + (i%3));
+    for (i = 0; i < icount; i++) {
+        index_displacement[i] = (i * 3 + (i % 3));
     }
 
-    MPI_Type_create_indexed_block( icount, 1, index_displacement, MPI_DOUBLE, 
-				   &itype1 );
-    MPI_Type_commit( &itype1 );
-    
+    MPI_Type_create_indexed_block(icount, 1, index_displacement, MPI_DOUBLE, &itype1);
+    MPI_Type_commit(&itype1);
+
 #if defined(MPICH) && defined(PRINT_DATATYPE_INTERNALS)
     /* To use MPIDU_Datatype_debug to print the datatype internals,
-       you must configure MPICH with --enable-g=log */
+     * you must configure MPICH with --enable-g=log */
     if (verbose) {
-	printf( "Block index datatype:\n" );
-	MPIDU_Datatype_debug( itype1, 10 );
+        printf("Block index datatype:\n");
+        MPIDU_Datatype_debug(itype1, 10);
     }
 #endif
-    MPI_Type_get_extent( itype1, &lb, &extent );
+    MPI_Type_get_extent(itype1, &lb, &extent);
 
-    MPI_Pack_size( 1, itype1, MPI_COMM_WORLD, &packsize );
+    MPI_Pack_size(1, itype1, MPI_COMM_WORLD, &packsize);
 
     inbufsize = extent / sizeof(double);
 
-    inbuf   = (double *)malloc( extent );
-    outbuf  = (double *)malloc( packsize );
-    outbuf2 = (double *)malloc( icount * sizeof(double) );
+    inbuf = (double *) malloc(extent);
+    outbuf = (double *) malloc(packsize);
+    outbuf2 = (double *) malloc(icount * sizeof(double));
     if (!inbuf) {
-	fprintf( stderr, "Unable to allocate %ld for inbuf\n", (long)extent );
-	MPI_Abort( MPI_COMM_WORLD, 1 );
+        fprintf(stderr, "Unable to allocate %ld for inbuf\n", (long) extent);
+        MPI_Abort(MPI_COMM_WORLD, 1);
     }
     if (!outbuf) {
-	fprintf( stderr, "Unable to allocate %ld for outbuf\n", (long)packsize );
-	MPI_Abort( MPI_COMM_WORLD, 1 );
+        fprintf(stderr, "Unable to allocate %ld for outbuf\n", (long) packsize);
+        MPI_Abort(MPI_COMM_WORLD, 1);
     }
     if (!outbuf2) {
-	fprintf( stderr, "Unable to allocate %ld for outbuf2\n", (long)packsize );
-	MPI_Abort( MPI_COMM_WORLD, 1 );
+        fprintf(stderr, "Unable to allocate %ld for outbuf2\n", (long) packsize);
+        MPI_Abort(MPI_COMM_WORLD, 1);
     }
-    for (i=0; i<inbufsize; i++) {
-	inbuf[i] = (double)i;
+    for (i = 0; i < inbufsize; i++) {
+        inbuf[i] = (double) i;
     }
     position = 0;
     /* Warm up the code and data */
-    MPI_Pack( inbuf, 1, itype1, outbuf, packsize, &position, MPI_COMM_WORLD );
+    MPI_Pack(inbuf, 1, itype1, outbuf, packsize, &position, MPI_COMM_WORLD);
 
     tpack = 1e12;
     for (ntry = 0; ntry < 5; ntry++) {
-	position = 0;
-	t0 = MPI_Wtime();
-	MPI_Pack( inbuf, 1, itype1, outbuf, packsize, &position, 
-		  MPI_COMM_WORLD );
-	t1 = MPI_Wtime() - t0;
-	if (t1 < tpack) tpack = t1;
+        position = 0;
+        t0 = MPI_Wtime();
+        MPI_Pack(inbuf, 1, itype1, outbuf, packsize, &position, MPI_COMM_WORLD);
+        t1 = MPI_Wtime() - t0;
+        if (t1 < tpack)
+            tpack = t1;
     }
 
-    { int one = 1; MPI_Aint displ = (MPI_Aint) inbuf;
-    MPI_Type_create_struct( 1, &one, &displ, &itype1, &stype1 );
-    MPI_Type_commit( &stype1 );
+    {
+        int one = 1;
+        MPI_Aint displ = (MPI_Aint) inbuf;
+        MPI_Type_create_struct(1, &one, &displ, &itype1, &stype1);
+        MPI_Type_commit(&stype1);
     }
 
     position = 0;
     /* Warm up the code and data */
-    MPI_Pack( MPI_BOTTOM, 1, stype1, outbuf, packsize, &position, MPI_COMM_WORLD );
+    MPI_Pack(MPI_BOTTOM, 1, stype1, outbuf, packsize, &position, MPI_COMM_WORLD);
 
     tspack = 1e12;
     for (ntry = 0; ntry < 5; ntry++) {
-	position = 0;
-	t0 = MPI_Wtime();
-	MPI_Pack( MPI_BOTTOM, 1, stype1, outbuf, packsize, &position, 
-		  MPI_COMM_WORLD );
-	t1 = MPI_Wtime() - t0;
-	if (t1 < tspack) tspack = t1;
+        position = 0;
+        t0 = MPI_Wtime();
+        MPI_Pack(MPI_BOTTOM, 1, stype1, outbuf, packsize, &position, MPI_COMM_WORLD);
+        t1 = MPI_Wtime() - t0;
+        if (t1 < tspack)
+            tspack = t1;
     }
 
-    /* 
-       Simple manual pack (without explicitly unrolling the index block)
-    */
+    /*
+     * Simple manual pack (without explicitly unrolling the index block)
+     */
     tmanual = 1e12;
     for (ntry = 0; ntry < 5; ntry++) {
-	const double *ppe = (const double *)inbuf;
-	const int    *id  = (const int *)index_displacement;
-	int k, j;
-	t0 = MPI_Wtime();
-	position = 0;
-	for (i=0; i<icount; i++) { 
-	    outbuf2[position++] = ppe[id[i]];
-	}
-	t1 = MPI_Wtime() - t0;
-	if (t1 < tmanual) tmanual = t1;
-	/* Check on correctness */
+        const double *ppe = (const double *) inbuf;
+        const int *id = (const int *) index_displacement;
+        t0 = MPI_Wtime();
+        position = 0;
+        for (i = 0; i < icount; i++) {
+            outbuf2[position++] = ppe[id[i]];
+        }
+        t1 = MPI_Wtime() - t0;
+        if (t1 < tmanual)
+            tmanual = t1;
+        /* Check on correctness */
 #ifdef PACK_IS_NATIVE
-	if (memcmp( outbuf, outbuf2, position ) != 0) {
-	    printf( "Panic - pack buffers differ\n" );
-	}
+        if (memcmp(outbuf, outbuf2, position) != 0) {
+            printf("Panic - pack buffers differ\n");
+        }
 #endif
     }
 
     if (verbose) {
-	printf( "Bytes packed = %d\n", position );
-	printf( "MPI_Pack time = %e, manual pack time = %e\n", 
-		tpack, tmanual );
-	printf( "Pack with struct = %e\n", tspack );
+        printf("Bytes packed = %d\n", position);
+        printf("MPI_Pack time = %e, manual pack time = %e\n", tpack, tmanual);
+        printf("Pack with struct = %e\n", tspack);
     }
 
-    /* The threshold here permits the MPI datatype to perform at up to 
-       only one half the performance of simple user code.  Note that the
-       example code above may be made faster through careful use of const, 
-       restrict, and unrolling if the compiler doesn't already do that. */
+    /* The threshold here permits the MPI datatype to perform at up to
+     * only one half the performance of simple user code.  Note that the
+     * example code above may be made faster through careful use of const,
+     * restrict, and unrolling if the compiler doesn't already do that. */
     if (2 * tmanual < tpack) {
-	errs++;
-	printf( "MPI_Pack (block index) time = %e, manual pack time = %e\n", tpack, tmanual );
-	printf( "MPI_Pack time should be less than 2 times the manual time\n" );
-	printf( "For most informative results, be sure to compile this test with optimization\n" );
+        errs++;
+        printf("MPI_Pack (block index) time = %e, manual pack time = %e\n", tpack, tmanual);
+        printf("MPI_Pack time should be less than 2 times the manual time\n");
+        printf("For most informative results, be sure to compile this test with optimization\n");
     }
     if (2 * tmanual < tspack) {
-	errs++;
-	printf( "MPI_Pack (struct of block index)) time = %e, manual pack time = %e\n", tspack, tmanual );
-	printf( "MPI_Pack time should be less than 2 times the manual time\n" );
-	printf( "For most informative results, be sure to compile this test with optimization\n" );
+        errs++;
+        printf("MPI_Pack (struct of block index)) time = %e, manual pack time = %e\n", tspack,
+               tmanual);
+        printf("MPI_Pack time should be less than 2 times the manual time\n");
+        printf("For most informative results, be sure to compile this test with optimization\n");
     }
     if (errs) {
-        printf( " Found %d errors\n", errs );
+        printf(" Found %d errors\n", errs);
     }
     else {
-        printf( " No Errors\n" );
-    } 
-
-    MPI_Type_free( &itype1 );
-    MPI_Type_free( &stype1 );
-    
-    free( inbuf );
-    free( outbuf );
-    free( outbuf2 );
-    free( index_displacement );
+        printf(" No Errors\n");
+    }
+
+    MPI_Type_free(&itype1);
+    MPI_Type_free(&stype1);
+
+    free(inbuf);
+    free(outbuf);
+    free(outbuf2);
+    free(index_displacement);
 
     MPI_Finalize();
     return 0;
diff --git a/teshsuite/smpi/mpich3-test/perf/manyrma.c b/teshsuite/smpi/mpich3-test/perf/manyrma.c
index 618581d07f..c7eb0be4a2 100644
--- a/teshsuite/smpi/mpich3-test/perf/manyrma.c
+++ b/teshsuite/smpi/mpich3-test/perf/manyrma.c
@@ -4,11 +4,11 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-/* This test measures the performance of many rma operations to a single 
+/* This test measures the performance of many rma operations to a single
    target process.
    It uses a number of operations (put or accumulate) to different
-   locations in the target window 
-   This is one of the ways that RMA may be used, and is used in the 
+   locations in the target window
+   This is one of the ways that RMA may be used, and is used in the
    reference implementation of the graph500 benchmark.
 */
 #include "mpi.h"
@@ -20,9 +20,10 @@
 #define MAX_RMA_SIZE 16
 #define MAX_RUNS 10
 
-typedef enum { SYNC_NONE=0, 
-	       SYNC_ALL=-1, SYNC_FENCE=1, SYNC_LOCK=2, SYNC_PSCW=4 } sync_t;
-typedef enum { RMA_NONE=0, RMA_ALL=-1, RMA_PUT=1, RMA_ACC=2, RMA_GET=4 } rma_t;
+typedef enum { SYNC_NONE = 0,
+    SYNC_ALL = -1, SYNC_FENCE = 1, SYNC_LOCK = 2, SYNC_PSCW = 4
+} sync_t;
+typedef enum { RMA_NONE = 0, RMA_ALL = -1, RMA_PUT = 1, RMA_ACC = 2, RMA_GET = 4 } rma_t;
 /* Note GET not yet implemented */
 sync_t syncChoice = SYNC_ALL;
 rma_t rmaChoice = RMA_ALL;
@@ -35,361 +36,370 @@ static int verbose = 1;
 static int barrierSync = 0;
 static double tickThreshold = 0.0;
 
-void PrintResults( int cnt, timing t[] );
-void RunAccFence( MPI_Win win, int destRank, int cnt, int sz, timing t[] );
-void RunAccLock( MPI_Win win, int destRank, int cnt, int sz, timing t[] );
-void RunPutFence( MPI_Win win, int destRank, int cnt, int sz, timing t[] );
-void RunPutLock( MPI_Win win, int destRank, int cnt, int sz, timing t[] );
-void RunAccPSCW( MPI_Win win, int destRank, int cnt, int sz, 
-		 MPI_Group exposureGroup, MPI_Group accessGroup, timing t[] );
-void RunPutPSCW( MPI_Win win, int destRank, int cnt, int sz, 
-		 MPI_Group exposureGroup, MPI_Group accessGroup, timing t[] );
-
-int main( int argc, char *argv[] )
+void PrintResults(int cnt, timing t[]);
+void RunAccFence(MPI_Win win, int destRank, int cnt, int sz, timing t[]);
+void RunAccLock(MPI_Win win, int destRank, int cnt, int sz, timing t[]);
+void RunPutFence(MPI_Win win, int destRank, int cnt, int sz, timing t[]);
+void RunPutLock(MPI_Win win, int destRank, int cnt, int sz, timing t[]);
+void RunAccPSCW(MPI_Win win, int destRank, int cnt, int sz,
+                MPI_Group exposureGroup, MPI_Group accessGroup, timing t[]);
+void RunPutPSCW(MPI_Win win, int destRank, int cnt, int sz,
+                MPI_Group exposureGroup, MPI_Group accessGroup, timing t[]);
+
+int main(int argc, char *argv[])
 {
-    int arraysize, i, cnt, sz, maxCount=MAX_COUNT, *arraybuffer;
+    int arraysize, i, cnt, sz, maxCount = MAX_COUNT, *arraybuffer;
     int wrank, wsize, destRank, srcRank;
     MPI_Win win;
     MPI_Group wgroup, accessGroup, exposureGroup;
     timing t[MAX_RUNS];
-    int    maxSz = MAX_RMA_SIZE;
+    int maxSz = MAX_RMA_SIZE;
 
-    MPI_Init( &argc, &argv );
+    MPI_Init(&argc, &argv);
 
     /* Determine clock accuracy */
     tickThreshold = 10.0 * MPI_Wtick();
-    MPI_Allreduce( MPI_IN_PLACE, &tickThreshold, 1, MPI_DOUBLE, MPI_MAX, 
-		   MPI_COMM_WORLD );
-
-    for (i=1; i<argc; i++) {
-	if (strcmp( argv[i], "-put" ) == 0) {
-	    if (rmaChoice == RMA_ALL) rmaChoice = RMA_NONE;
-	    rmaChoice  |= RMA_PUT;
-	}
-	else if (strcmp( argv[i], "-acc" ) == 0) {
-	    if (rmaChoice == RMA_ALL) rmaChoice = RMA_NONE;
-	    rmaChoice  |= RMA_ACC;
-	}
-	else if (strcmp( argv[i], "-fence" ) == 0) {
-	    if (syncChoice == SYNC_ALL) syncChoice = SYNC_NONE;
-	    syncChoice |= SYNC_FENCE;
-	}
-	else if (strcmp( argv[i], "-lock" ) == 0) {
-	    if (syncChoice == SYNC_ALL) syncChoice = SYNC_NONE;
-	    syncChoice |= SYNC_LOCK;
-	}
-	else if (strcmp( argv[i], "-pscw" ) == 0) {
-	    if (syncChoice == SYNC_ALL) syncChoice = SYNC_NONE;
-	    syncChoice |= SYNC_PSCW;
-	}
-	else if (strcmp( argv[i], "-maxsz" ) == 0) {
-	    i++;
-	    maxSz = atoi( argv[i] );
-	}
-	else if (strcmp( argv[i], "-maxcount" ) == 0) {
-	    i++;
-	    maxCount = atoi( argv[i] );
-	}
-	else if (strcmp( argv[i], "-barrier" ) == 0) {
-	    barrierSync = 1;
-	}
-	else {
-	    fprintf( stderr, "Unrecognized argument %s\n", argv[i] );
-	    fprintf( stderr, "%s [ -put ] [ -acc ] [ -lock ] [ -fence ] [ -pscw ] [ -barrier ]  [ -maxsz msgsize ]\n", argv[0] );
-	    MPI_Abort( MPI_COMM_WORLD, 1 );
-	}
+    MPI_Allreduce(MPI_IN_PLACE, &tickThreshold, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+
+    for (i = 1; i < argc; i++) {
+        if (strcmp(argv[i], "-put") == 0) {
+            if (rmaChoice == RMA_ALL)
+                rmaChoice = RMA_NONE;
+            rmaChoice |= RMA_PUT;
+        }
+        else if (strcmp(argv[i], "-acc") == 0) {
+            if (rmaChoice == RMA_ALL)
+                rmaChoice = RMA_NONE;
+            rmaChoice |= RMA_ACC;
+        }
+        else if (strcmp(argv[i], "-fence") == 0) {
+            if (syncChoice == SYNC_ALL)
+                syncChoice = SYNC_NONE;
+            syncChoice |= SYNC_FENCE;
+        }
+        else if (strcmp(argv[i], "-lock") == 0) {
+            if (syncChoice == SYNC_ALL)
+                syncChoice = SYNC_NONE;
+            syncChoice |= SYNC_LOCK;
+        }
+        else if (strcmp(argv[i], "-pscw") == 0) {
+            if (syncChoice == SYNC_ALL)
+                syncChoice = SYNC_NONE;
+            syncChoice |= SYNC_PSCW;
+        }
+        else if (strcmp(argv[i], "-maxsz") == 0) {
+            i++;
+            maxSz = atoi(argv[i]);
+        }
+        else if (strcmp(argv[i], "-maxcount") == 0) {
+            i++;
+            maxCount = atoi(argv[i]);
+        }
+        else if (strcmp(argv[i], "-barrier") == 0) {
+            barrierSync = 1;
+        }
+        else {
+            fprintf(stderr, "Unrecognized argument %s\n", argv[i]);
+            fprintf(stderr,
+                    "%s [ -put ] [ -acc ] [ -lock ] [ -fence ] [ -pscw ] [ -barrier ]  [ -maxsz msgsize ]\n",
+                    argv[0]);
+            MPI_Abort(MPI_COMM_WORLD, 1);
+        }
     }
-    
-    MPI_Comm_rank( MPI_COMM_WORLD, &wrank );
-    MPI_Comm_size( MPI_COMM_WORLD, &wsize );
+
+    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
+    MPI_Comm_size(MPI_COMM_WORLD, &wsize);
     destRank = wrank + 1;
-    while (destRank >= wsize) destRank = destRank - wsize;
+    while (destRank >= wsize)
+        destRank = destRank - wsize;
     srcRank = wrank - 1;
-    if (srcRank < 0) srcRank += wsize;
+    if (srcRank < 0)
+        srcRank += wsize;
 
     /* Create groups for PSCW */
-    MPI_Comm_group( MPI_COMM_WORLD, &wgroup );
-    MPI_Group_incl( wgroup, 1, &destRank, &accessGroup );
-    MPI_Group_incl( wgroup, 1, &srcRank, &exposureGroup );
-    MPI_Group_free( &wgroup );
+    MPI_Comm_group(MPI_COMM_WORLD, &wgroup);
+    MPI_Group_incl(wgroup, 1, &destRank, &accessGroup);
+    MPI_Group_incl(wgroup, 1, &srcRank, &exposureGroup);
+    MPI_Group_free(&wgroup);
 
     arraysize = maxSz * MAX_COUNT;
-    arraybuffer = (int*)malloc( arraysize * sizeof(int) );
+    arraybuffer = (int *) malloc(arraysize * sizeof(int));
     if (!arraybuffer) {
-	fprintf( stderr, "Unable to allocate %d words\n", arraysize );
-	MPI_Abort( MPI_COMM_WORLD, 1 );
+        fprintf(stderr, "Unable to allocate %d words\n", arraysize);
+        MPI_Abort(MPI_COMM_WORLD, 1);
     }
 
-    MPI_Win_create( arraybuffer, arraysize*sizeof(int), (int)sizeof(int),
-		    MPI_INFO_NULL, MPI_COMM_WORLD, &win );
+    MPI_Win_create(arraybuffer, arraysize * sizeof(int), (int) sizeof(int),
+                   MPI_INFO_NULL, MPI_COMM_WORLD, &win);
 
     /* FIXME: we need a test on performance consistency.
-       The test needs to have both a relative growth limit and
-       an absolute limit.
-    */
+     * The test needs to have both a relative growth limit and
+     * an absolute limit.
+     */
 
     if (maxCount > MAX_COUNT) {
-	fprintf( stderr, "MaxCount must not exceed %d\n", MAX_COUNT );
-	MPI_Abort( MPI_COMM_WORLD, 1 );
+        fprintf(stderr, "MaxCount must not exceed %d\n", MAX_COUNT);
+        MPI_Abort(MPI_COMM_WORLD, 1);
     }
 
     if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_ACC)) {
-	for (sz=1; sz<=maxSz; sz = sz + sz) {
-	    if (wrank == 0) 
-		printf( "Accumulate with fence, %d elements\n", sz );
-	    cnt = 1;
-	    while (cnt <= maxCount) {
-		RunAccFence( win, destRank, cnt, sz, t );
-		if (wrank == 0) {
-		    PrintResults( cnt, t );
-		}
-		cnt = 2 * cnt;
-	    }
-	}
+        for (sz = 1; sz <= maxSz; sz = sz + sz) {
+            if (wrank == 0)
+                printf("Accumulate with fence, %d elements\n", sz);
+            cnt = 1;
+            while (cnt <= maxCount) {
+                RunAccFence(win, destRank, cnt, sz, t);
+                if (wrank == 0) {
+                    PrintResults(cnt, t);
+                }
+                cnt = 2 * cnt;
+            }
+        }
     }
 
     if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_ACC)) {
-	for (sz=1; sz<=maxSz; sz = sz + sz) {
-	    if (wrank == 0) 
-		printf( "Accumulate with lock, %d elements\n", sz );
-	    cnt = 1;
-	    while (cnt <= maxCount) {
-		RunAccLock( win, destRank, cnt, sz, t );
-		if (wrank == 0) {
-		    PrintResults( cnt, t );
-		}
-		cnt = 2 * cnt;
-	    }
-	}
+        for (sz = 1; sz <= maxSz; sz = sz + sz) {
+            if (wrank == 0)
+                printf("Accumulate with lock, %d elements\n", sz);
+            cnt = 1;
+            while (cnt <= maxCount) {
+                RunAccLock(win, destRank, cnt, sz, t);
+                if (wrank == 0) {
+                    PrintResults(cnt, t);
+                }
+                cnt = 2 * cnt;
+            }
+        }
     }
 
     if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_PUT)) {
-	for (sz=1; sz<=maxSz; sz = sz + sz) {
-	    if (wrank == 0) 
-		printf( "Put with fence, %d elements\n", sz );
-	    cnt = 1;
-	    while (cnt <= maxCount) {
-		RunPutFence( win, destRank, cnt, sz, t );
-		if (wrank == 0) {
-		    PrintResults( cnt, t );
-		}
-		cnt = 2 * cnt;
-	    }
-	}
+        for (sz = 1; sz <= maxSz; sz = sz + sz) {
+            if (wrank == 0)
+                printf("Put with fence, %d elements\n", sz);
+            cnt = 1;
+            while (cnt <= maxCount) {
+                RunPutFence(win, destRank, cnt, sz, t);
+                if (wrank == 0) {
+                    PrintResults(cnt, t);
+                }
+                cnt = 2 * cnt;
+            }
+        }
     }
 
     if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_PUT)) {
-	for (sz=1; sz<=maxSz; sz = sz + sz) {
-	    if (wrank == 0) 
-		printf( "Put with lock, %d elements\n", sz );
-	    cnt = 1;
-	    while (cnt <= maxCount) {
-		RunPutLock( win, destRank, cnt, sz, t );
-		if (wrank == 0) {
-		    PrintResults( cnt, t );
-		}
-		cnt = 2 * cnt;
-	    }
-	}
+        for (sz = 1; sz <= maxSz; sz = sz + sz) {
+            if (wrank == 0)
+                printf("Put with lock, %d elements\n", sz);
+            cnt = 1;
+            while (cnt <= maxCount) {
+                RunPutLock(win, destRank, cnt, sz, t);
+                if (wrank == 0) {
+                    PrintResults(cnt, t);
+                }
+                cnt = 2 * cnt;
+            }
+        }
     }
 
     if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_PUT)) {
-	for (sz=1; sz<=maxSz; sz = sz + sz) {
-	    if (wrank == 0) 
-		printf( "Put with pscw, %d elements\n", sz );
-	    cnt = 1;
-	    while (cnt <= maxCount) {
-		RunPutPSCW( win, destRank, cnt, sz, 
-			    exposureGroup, accessGroup, t );
-		if (wrank == 0) {
-		    PrintResults( cnt, t );
-		}
-		cnt = 2 * cnt;
-	    }
-	}
+        for (sz = 1; sz <= maxSz; sz = sz + sz) {
+            if (wrank == 0)
+                printf("Put with pscw, %d elements\n", sz);
+            cnt = 1;
+            while (cnt <= maxCount) {
+                RunPutPSCW(win, destRank, cnt, sz, exposureGroup, accessGroup, t);
+                if (wrank == 0) {
+                    PrintResults(cnt, t);
+                }
+                cnt = 2 * cnt;
+            }
+        }
     }
 
     if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_ACC)) {
-	for (sz=1; sz<=maxSz; sz = sz + sz) {
-	    if (wrank == 0) 
-		printf( "Accumulate with pscw, %d elements\n", sz );
-	    cnt = 1;
-	    while (cnt <= maxCount) {
-		RunAccPSCW( win, destRank, cnt, sz, 
-			    exposureGroup, accessGroup, t );
-		if (wrank == 0) {
-		    PrintResults( cnt, t );
-		}
-		cnt = 2 * cnt;
-	    }
-	}
+        for (sz = 1; sz <= maxSz; sz = sz + sz) {
+            if (wrank == 0)
+                printf("Accumulate with pscw, %d elements\n", sz);
+            cnt = 1;
+            while (cnt <= maxCount) {
+                RunAccPSCW(win, destRank, cnt, sz, exposureGroup, accessGroup, t);
+                if (wrank == 0) {
+                    PrintResults(cnt, t);
+                }
+                cnt = 2 * cnt;
+            }
+        }
     }
 
-    MPI_Win_free( &win );
+    MPI_Win_free(&win);
+
+    MPI_Group_free(&accessGroup);
+    MPI_Group_free(&exposureGroup);
 
-    MPI_Group_free( &accessGroup );
-    MPI_Group_free( &exposureGroup );
-    
     MPI_Finalize();
     return 0;
 }
 
 
-void RunAccFence( MPI_Win win, int destRank, int cnt, int sz, timing t[] )
+void RunAccFence(MPI_Win win, int destRank, int cnt, int sz, timing t[])
 {
     int k, i, j, one = 1;
 
-    for (k=0; k<MAX_RUNS; k++) {
-	MPI_Barrier( MPI_COMM_WORLD );
-	MPI_Win_fence( 0, win );
-	j = 0;
-	t[k].startOp = MPI_Wtime();
-	for (i=0; i<cnt; i++) {
-	    MPI_Accumulate( &one, sz, MPI_INT, destRank, 
-			    j, sz, MPI_INT, MPI_SUM, win );
-	    j += sz;
-	}
-	t[k].endOp = MPI_Wtime();
-	if (barrierSync) MPI_Barrier( MPI_COMM_WORLD );
-	MPI_Win_fence( 0, win );
-	t[k].endSync = MPI_Wtime();
+    for (k = 0; k < MAX_RUNS; k++) {
+        MPI_Barrier(MPI_COMM_WORLD);
+        MPI_Win_fence(0, win);
+        j = 0;
+        t[k].startOp = MPI_Wtime();
+        for (i = 0; i < cnt; i++) {
+            MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
+            j += sz;
+        }
+        t[k].endOp = MPI_Wtime();
+        if (barrierSync)
+            MPI_Barrier(MPI_COMM_WORLD);
+        MPI_Win_fence(0, win);
+        t[k].endSync = MPI_Wtime();
     }
 }
 
-void RunAccLock( MPI_Win win, int destRank, int cnt, int sz, timing t[] )
+void RunAccLock(MPI_Win win, int destRank, int cnt, int sz, timing t[])
 {
     int k, i, j, one = 1;
 
-    for (k=0; k<MAX_RUNS; k++) {
-	MPI_Barrier( MPI_COMM_WORLD );
-	MPI_Win_lock( MPI_LOCK_SHARED, destRank, 0, win );
-	j = 0;
-	t[k].startOp = MPI_Wtime();
-	for (i=0; i<cnt; i++) {
-	    MPI_Accumulate( &one, sz, MPI_INT, destRank, 
-			    j, sz, MPI_INT, MPI_SUM, win );
-	    j += sz;
-	}
-	t[k].endOp = MPI_Wtime();
-	if (barrierSync) MPI_Barrier( MPI_COMM_WORLD );
-	MPI_Win_unlock( destRank, win );
-	t[k].endSync = MPI_Wtime();
+    for (k = 0; k < MAX_RUNS; k++) {
+        MPI_Barrier(MPI_COMM_WORLD);
+        MPI_Win_lock(MPI_LOCK_SHARED, destRank, 0, win);
+        j = 0;
+        t[k].startOp = MPI_Wtime();
+        for (i = 0; i < cnt; i++) {
+            MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
+            j += sz;
+        }
+        t[k].endOp = MPI_Wtime();
+        if (barrierSync)
+            MPI_Barrier(MPI_COMM_WORLD);
+        MPI_Win_unlock(destRank, win);
+        t[k].endSync = MPI_Wtime();
     }
 }
 
-void RunPutFence( MPI_Win win, int destRank, int cnt, int sz, timing t[] )
+void RunPutFence(MPI_Win win, int destRank, int cnt, int sz, timing t[])
 {
     int k, i, j, one = 1;
 
-    for (k=0; k<MAX_RUNS; k++) {
-	MPI_Barrier( MPI_COMM_WORLD );
-	MPI_Win_fence( 0, win );
-	j = 0;
-	t[k].startOp = MPI_Wtime();
-	for (i=0; i<cnt; i++) {
-	    MPI_Put( &one, sz, MPI_INT, destRank, 
-			    j, sz, MPI_INT, win );
-	    j += sz;
-	}
-	t[k].endOp = MPI_Wtime();
-	if (barrierSync) MPI_Barrier( MPI_COMM_WORLD );
-	MPI_Win_fence( 0, win );
-	t[k].endSync = MPI_Wtime();
+    for (k = 0; k < MAX_RUNS; k++) {
+        MPI_Barrier(MPI_COMM_WORLD);
+        MPI_Win_fence(0, win);
+        j = 0;
+        t[k].startOp = MPI_Wtime();
+        for (i = 0; i < cnt; i++) {
+            MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
+            j += sz;
+        }
+        t[k].endOp = MPI_Wtime();
+        if (barrierSync)
+            MPI_Barrier(MPI_COMM_WORLD);
+        MPI_Win_fence(0, win);
+        t[k].endSync = MPI_Wtime();
     }
 }
 
-void RunPutLock( MPI_Win win, int destRank, int cnt, int sz, timing t[] )
+void RunPutLock(MPI_Win win, int destRank, int cnt, int sz, timing t[])
 {
     int k, i, j, one = 1;
 
-    for (k=0; k<MAX_RUNS; k++) {
-	MPI_Barrier( MPI_COMM_WORLD );
-	MPI_Win_lock( MPI_LOCK_SHARED, destRank, 0, win );
-	j = 0;
-	t[k].startOp = MPI_Wtime();
-	for (i=0; i<cnt; i++) {
-	    MPI_Put( &one, sz, MPI_INT, destRank, j, sz, MPI_INT, win );
-	    j += sz;
-	}
-	t[k].endOp = MPI_Wtime();
-	if (barrierSync) MPI_Barrier( MPI_COMM_WORLD );
-	MPI_Win_unlock( destRank, win );
-	t[k].endSync = MPI_Wtime();
+    for (k = 0; k < MAX_RUNS; k++) {
+        MPI_Barrier(MPI_COMM_WORLD);
+        MPI_Win_lock(MPI_LOCK_SHARED, destRank, 0, win);
+        j = 0;
+        t[k].startOp = MPI_Wtime();
+        for (i = 0; i < cnt; i++) {
+            MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
+            j += sz;
+        }
+        t[k].endOp = MPI_Wtime();
+        if (barrierSync)
+            MPI_Barrier(MPI_COMM_WORLD);
+        MPI_Win_unlock(destRank, win);
+        t[k].endSync = MPI_Wtime();
     }
 }
 
-void RunPutPSCW( MPI_Win win, int destRank, int cnt, int sz, 
-		 MPI_Group exposureGroup, MPI_Group accessGroup, timing t[] )
+void RunPutPSCW(MPI_Win win, int destRank, int cnt, int sz,
+                MPI_Group exposureGroup, MPI_Group accessGroup, timing t[])
 {
     int k, i, j, one = 1;
 
-    for (k=0; k<MAX_RUNS; k++) {
-	MPI_Barrier( MPI_COMM_WORLD );
-	MPI_Win_post( exposureGroup, 0, win );
-	MPI_Win_start( accessGroup, 0, win );
-	j = 0;
-	t[k].startOp = MPI_Wtime();
-	for (i=0; i<cnt; i++) {
-	    MPI_Put( &one, sz, MPI_INT, destRank, j, sz, MPI_INT, win );
-	    j += sz;
-	}
-	t[k].endOp = MPI_Wtime();
-	if (barrierSync) MPI_Barrier( MPI_COMM_WORLD );
-	MPI_Win_complete( win );
-	MPI_Win_wait( win );
-	t[k].endSync = MPI_Wtime();
+    for (k = 0; k < MAX_RUNS; k++) {
+        MPI_Barrier(MPI_COMM_WORLD);
+        MPI_Win_post(exposureGroup, 0, win);
+        MPI_Win_start(accessGroup, 0, win);
+        j = 0;
+        t[k].startOp = MPI_Wtime();
+        for (i = 0; i < cnt; i++) {
+            MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
+            j += sz;
+        }
+        t[k].endOp = MPI_Wtime();
+        if (barrierSync)
+            MPI_Barrier(MPI_COMM_WORLD);
+        MPI_Win_complete(win);
+        MPI_Win_wait(win);
+        t[k].endSync = MPI_Wtime();
     }
 }
 
-void RunAccPSCW( MPI_Win win, int destRank, int cnt, int sz, 
-		 MPI_Group exposureGroup, MPI_Group accessGroup, timing t[] )
+void RunAccPSCW(MPI_Win win, int destRank, int cnt, int sz,
+                MPI_Group exposureGroup, MPI_Group accessGroup, timing t[])
 {
     int k, i, j, one = 1;
 
-    for (k=0; k<MAX_RUNS; k++) {
-	MPI_Barrier( MPI_COMM_WORLD );
-	MPI_Win_post( exposureGroup, 0, win );
-	MPI_Win_start( accessGroup, 0, win );
-	j = 0;
-	t[k].startOp = MPI_Wtime();
-	for (i=0; i<cnt; i++) {
-	    MPI_Accumulate( &one, sz, MPI_INT, destRank, 
-			    j, sz, MPI_INT, MPI_SUM, win );
-	    j += sz;
-	}
-	t[k].endOp = MPI_Wtime();
-	if (barrierSync) MPI_Barrier( MPI_COMM_WORLD );
-	MPI_Win_complete( win );
-	MPI_Win_wait( win );
-	t[k].endSync = MPI_Wtime();
+    for (k = 0; k < MAX_RUNS; k++) {
+        MPI_Barrier(MPI_COMM_WORLD);
+        MPI_Win_post(exposureGroup, 0, win);
+        MPI_Win_start(accessGroup, 0, win);
+        j = 0;
+        t[k].startOp = MPI_Wtime();
+        for (i = 0; i < cnt; i++) {
+            MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
+            j += sz;
+        }
+        t[k].endOp = MPI_Wtime();
+        if (barrierSync)
+            MPI_Barrier(MPI_COMM_WORLD);
+        MPI_Win_complete(win);
+        MPI_Win_wait(win);
+        t[k].endSync = MPI_Wtime();
     }
 }
 
-void PrintResults( int cnt, timing t[] )
+void PrintResults(int cnt, timing t[])
 {
     int k;
-    double d1=0, d2=0;
+    double d1 = 0, d2 = 0;
     double minD1 = 1e10, minD2 = 1e10;
     double tOp, tSync;
-    for (k=0; k<MAX_RUNS; k++) {
-	tOp   = t[k].endOp - t[k].startOp;
-	tSync = t[k].endSync - t[k].endOp;
-	d1    += tOp;
-	d2    += tSync;
-	if (tOp < minD1)   minD1 = tOp;
-	if (tSync < minD2) minD2 = tSync;
+    for (k = 0; k < MAX_RUNS; k++) {
+        tOp = t[k].endOp - t[k].startOp;
+        tSync = t[k].endSync - t[k].endOp;
+        d1 += tOp;
+        d2 += tSync;
+        if (tOp < minD1)
+            minD1 = tOp;
+        if (tSync < minD2)
+            minD2 = tSync;
     }
     if (verbose) {
-	long rate = 0;
-	/* Use the minimum times because they are more stable - if timing
-	   accuracy is an issue, use the min over multiple trials */
-	d1 = minD1;
-	d2 = minD2;
-	/* d1 = d1 / MAX_RUNS; d2 = d2 / MAX_RUNS); */
-	if (d2 > 0) rate = (long)(cnt) / d2;
-	/* count, op, sync, op/each, sync/each, rate */
-	printf( "%d\t%e\t%e\t%e\t%e\t%ld\n", cnt, 
-		d1, d2, 
-		d1 / cnt, d2 / cnt, rate );
+        long rate = 0;
+        /* Use the minimum times because they are more stable - if timing
+         * accuracy is an issue, use the min over multiple trials */
+        d1 = minD1;
+        d2 = minD2;
+        /* d1 = d1 / MAX_RUNS; d2 = d2 / MAX_RUNS); */
+        if (d2 > 0)
+            rate = (long) (cnt) / d2;
+        /* count, op, sync, op/each, sync/each, rate */
+        printf("%d\t%e\t%e\t%e\t%e\t%ld\n", cnt, d1, d2, d1 / cnt, d2 / cnt, rate);
     }
 }
diff --git a/teshsuite/smpi/mpich3-test/perf/nestvec.c b/teshsuite/smpi/mpich3-test/perf/nestvec.c
index 494f847f20..6ee12f5e65 100644
--- a/teshsuite/smpi/mpich3-test/perf/nestvec.c
+++ b/teshsuite/smpi/mpich3-test/perf/nestvec.c
@@ -30,161 +30,161 @@
 
 static int verbose = 0;
 
-int main( int argc, char **argv )
+int main(int argc, char **argv)
 {
-    int          vcount = 16, vblock = vcount*vcount/2, vstride=2*vcount*vblock;
-    int          v2stride, typesize, packsize, i, position, errs = 0;
-    char         *inbuf, *outbuf, *outbuf2;
+    int vcount = 16, vblock = vcount * vcount / 2, vstride = 2 * vcount * vblock;
+    int v2stride, typesize, packsize, i, position, errs = 0;
+    char *inbuf, *outbuf, *outbuf2;
     MPI_Datatype ft1type, ft2type, ft3type;
     MPI_Datatype ftopttype;
-    MPI_Aint     lb, extent;
-    double       t0, t1;
-    double       tpack, tmanual, tpackopt;
-    int          ntry;
-
-    MPI_Init( &argc, &argv );
-    
-    MPI_Type_contiguous( 6, MPI_FLOAT, &ft1type );
-    MPI_Type_size( ft1type, &typesize );
+    MPI_Aint lb, extent;
+    double t0, t1;
+    double tpack, tmanual, tpackopt;
+    int ntry;
+
+    MPI_Init(&argc, &argv);
+
+    MPI_Type_contiguous(6, MPI_FLOAT, &ft1type);
+    MPI_Type_size(ft1type, &typesize);
     v2stride = vcount * vcount * vcount * vcount * typesize;
-    MPI_Type_vector( vcount, vblock, vstride, ft1type, &ft2type );
-    MPI_Type_create_hvector( 2, 1, v2stride, ft2type, &ft3type );
-    MPI_Type_commit( &ft3type );
-    MPI_Type_free( &ft1type );
-    MPI_Type_free( &ft2type );
+    MPI_Type_vector(vcount, vblock, vstride, ft1type, &ft2type);
+    MPI_Type_create_hvector(2, 1, v2stride, ft2type, &ft3type);
+    MPI_Type_commit(&ft3type);
+    MPI_Type_free(&ft1type);
+    MPI_Type_free(&ft2type);
 #if defined(MPICH) && defined(PRINT_DATATYPE_INTERNALS)
     /* To use MPIDU_Datatype_debug to print the datatype internals,
-       you must configure MPICH with --enable-g=log */
+     * you must configure MPICH with --enable-g=log */
     if (verbose) {
-	printf( "Original datatype:\n" );
-	MPIDU_Datatype_debug( ft3type, 10 );
+        printf("Original datatype:\n");
+        MPIDU_Datatype_debug(ft3type, 10);
     }
 #endif
     /* The same type, but without using the contiguous type */
-    MPI_Type_vector( vcount, 6*vblock, 6*vstride, MPI_FLOAT, &ft2type );
-    MPI_Type_create_hvector( 2, 1, v2stride, ft2type, &ftopttype );
-    MPI_Type_commit( &ftopttype );
-    MPI_Type_free( &ft2type );
+    MPI_Type_vector(vcount, 6 * vblock, 6 * vstride, MPI_FLOAT, &ft2type);
+    MPI_Type_create_hvector(2, 1, v2stride, ft2type, &ftopttype);
+    MPI_Type_commit(&ftopttype);
+    MPI_Type_free(&ft2type);
 #if defined(MPICH) && defined(PRINT_DATATYPE_INTERNALS)
     if (verbose) {
-	printf( "\n\nMerged datatype:\n" );
-	MPIDU_Datatype_debug( ftopttype, 10 );
+        printf("\n\nMerged datatype:\n");
+        MPIDU_Datatype_debug(ftopttype, 10);
     }
 #endif
 
-    MPI_Type_get_extent( ft3type, &lb, &extent );
-    MPI_Type_size( ft3type, &typesize );
+    MPI_Type_get_extent(ft3type, &lb, &extent);
+    MPI_Type_size(ft3type, &typesize);
 
-    MPI_Pack_size( 1, ft3type, MPI_COMM_WORLD, &packsize );
+    MPI_Pack_size(1, ft3type, MPI_COMM_WORLD, &packsize);
 
-    inbuf   = (char *)malloc( extent );
-    outbuf  = (char *)malloc( packsize );
-    outbuf2 = (char *)malloc( packsize );
+    inbuf = (char *) malloc(extent);
+    outbuf = (char *) malloc(packsize);
+    outbuf2 = (char *) malloc(packsize);
     if (!inbuf) {
-	fprintf( stderr, "Unable to allocate %ld for inbuf\n", (long)extent );
-	MPI_Abort( MPI_COMM_WORLD, 1 );
+        fprintf(stderr, "Unable to allocate %ld for inbuf\n", (long) extent);
+        MPI_Abort(MPI_COMM_WORLD, 1);
     }
     if (!outbuf) {
-	fprintf( stderr, "Unable to allocate %ld for outbuf\n", (long)packsize );
-	MPI_Abort( MPI_COMM_WORLD, 1 );
+        fprintf(stderr, "Unable to allocate %ld for outbuf\n", (long) packsize);
+        MPI_Abort(MPI_COMM_WORLD, 1);
     }
     if (!outbuf2) {
-	fprintf( stderr, "Unable to allocate %ld for outbuf2\n", (long)packsize );
-	MPI_Abort( MPI_COMM_WORLD, 1 );
+        fprintf(stderr, "Unable to allocate %ld for outbuf2\n", (long) packsize);
+        MPI_Abort(MPI_COMM_WORLD, 1);
     }
-    for (i=0; i<extent; i++) {
-	inbuf[i] = i & 0x7f;
+    for (i = 0; i < extent; i++) {
+        inbuf[i] = i & 0x7f;
     }
     position = 0;
     /* Warm up the code and data */
-    MPI_Pack( inbuf, 1, ft3type, outbuf, packsize, &position, MPI_COMM_WORLD );
+    MPI_Pack(inbuf, 1, ft3type, outbuf, packsize, &position, MPI_COMM_WORLD);
 
     /* Pack using the vector of vector of contiguous */
     tpack = 1e12;
     for (ntry = 0; ntry < 5; ntry++) {
-	position = 0;
-	t0 = MPI_Wtime();
-	MPI_Pack( inbuf, 1, ft3type, outbuf, packsize, &position, 
-		  MPI_COMM_WORLD );
-	t1 = MPI_Wtime() - t0;
-	if (t1 < tpack) tpack = t1;
+        position = 0;
+        t0 = MPI_Wtime();
+        MPI_Pack(inbuf, 1, ft3type, outbuf, packsize, &position, MPI_COMM_WORLD);
+        t1 = MPI_Wtime() - t0;
+        if (t1 < tpack)
+            tpack = t1;
     }
-    MPI_Type_free( &ft3type );
+    MPI_Type_free(&ft3type);
 
     /* Pack using vector of vector with big blocks (same type map) */
     tpackopt = 1e12;
     for (ntry = 0; ntry < 5; ntry++) {
-	position = 0;
-	t0 = MPI_Wtime();
-	MPI_Pack( inbuf, 1, ftopttype, outbuf, packsize, &position, 
-		  MPI_COMM_WORLD );
-	t1 = MPI_Wtime() - t0;
-	if (t1 < tpackopt) tpackopt = t1;
+        position = 0;
+        t0 = MPI_Wtime();
+        MPI_Pack(inbuf, 1, ftopttype, outbuf, packsize, &position, MPI_COMM_WORLD);
+        t1 = MPI_Wtime() - t0;
+        if (t1 < tpackopt)
+            tpackopt = t1;
     }
-    MPI_Type_free( &ftopttype );
+    MPI_Type_free(&ftopttype);
 
     /* User (manual) packing code.
-       Note that we exploit the fact that the vector type contains vblock 
-       instances of a contiguous type of size 24, or equivalently a 
-       single block of 24*vblock bytes.
-    */
+     * Note that we exploit the fact that the vector type contains vblock
+     * instances of a contiguous type of size 24, or equivalently a
+     * single block of 24*vblock bytes.
+     */
     tmanual = 1e12;
     for (ntry = 0; ntry < 5; ntry++) {
-	const char *ppe = (const char *)inbuf;
-	int k, j;
-	t0 = MPI_Wtime();
-	position = 0;
-	for (k=0; k<2; k++) {  /* hvector count; blocksize is 1 */
-	    const char *ptr = ppe;
-	    for (j=0; j<vcount; j++) { /* vector count */
-		memcpy( outbuf2 + position, ptr, 24*vblock );
-		ptr      += vstride * 24;
-		position += 24*vblock;
-	    }
-	    ppe += v2stride;
-	}
-	t1 = MPI_Wtime() - t0;
-	if (t1 < tmanual) tmanual = t1;
-
-	/* Check on correctness */
+        const char *ppe = (const char *) inbuf;
+        int k, j;
+        t0 = MPI_Wtime();
+        position = 0;
+        for (k = 0; k < 2; k++) {       /* hvector count; blocksize is 1 */
+            const char *ptr = ppe;
+            for (j = 0; j < vcount; j++) {      /* vector count */
+                memcpy(outbuf2 + position, ptr, 24 * vblock);
+                ptr += vstride * 24;
+                position += 24 * vblock;
+            }
+            ppe += v2stride;
+        }
+        t1 = MPI_Wtime() - t0;
+        if (t1 < tmanual)
+            tmanual = t1;
+
+        /* Check on correctness */
 #ifdef PACK_IS_NATIVE
-	if (memcmp( outbuf, outbuf2, position ) != 0) {
-	    printf( "Panic - pack buffers differ\n" );
-	}
+        if (memcmp(outbuf, outbuf2, position) != 0) {
+            printf("Panic - pack buffers differ\n");
+        }
 #endif
     }
 
     if (verbose) {
-	printf( "Bytes packed = %d\n", position );
-	printf( "MPI_Pack time = %e, opt version = %e, manual pack time = %e\n", 
-		tpack, tpackopt, tmanual );
+        printf("Bytes packed = %d\n", position);
+        printf("MPI_Pack time = %e, opt version = %e, manual pack time = %e\n",
+               tpack, tpackopt, tmanual);
     }
 
     /* A factor of 4 is extremely generous, especially since the test suite
-       no longer builds any of the tests with optimization */
+     * no longer builds any of the tests with optimization */
     if (4 * tmanual < tpack) {
-	errs++;
-	printf( "MPI_Pack time = %e, manual pack time = %e\n", tpack, tmanual );
-	printf( "MPI_Pack time should be less than 4 times the manual time\n" );
-	printf( "For most informative results, be sure to compile this test with optimization\n" );
+        errs++;
+        printf("MPI_Pack time = %e, manual pack time = %e\n", tpack, tmanual);
+        printf("MPI_Pack time should be less than 4 times the manual time\n");
+        printf("For most informative results, be sure to compile this test with optimization\n");
     }
     if (4 * tmanual < tpackopt) {
-	errs++;
-	printf( "MPI_Pack with opt = %e, manual pack time = %e\n", tpackopt, 
-		tmanual );
-	printf( "MPI_Pack time should be less than 4 times the manual time\n" );
-	printf( "For most informative results, be sure to compile this test with optimization\n" );
+        errs++;
+        printf("MPI_Pack with opt = %e, manual pack time = %e\n", tpackopt, tmanual);
+        printf("MPI_Pack time should be less than 4 times the manual time\n");
+        printf("For most informative results, be sure to compile this test with optimization\n");
     }
     if (errs) {
-        printf( " Found %d errors\n", errs );
+        printf(" Found %d errors\n", errs);
     }
     else {
-        printf( " No Errors\n" );
-    } 
-    
-    free( inbuf );
-    free( outbuf );
-    free( outbuf2 );
+        printf(" No Errors\n");
+    }
+
+    free(inbuf);
+    free(outbuf);
+    free(outbuf2);
 
     MPI_Finalize();
     return 0;
diff --git a/teshsuite/smpi/mpich3-test/perf/nestvec2.c b/teshsuite/smpi/mpich3-test/perf/nestvec2.c
index c54400f7d4..c32854f75e 100644
--- a/teshsuite/smpi/mpich3-test/perf/nestvec2.c
+++ b/teshsuite/smpi/mpich3-test/perf/nestvec2.c
@@ -30,182 +30,181 @@
 
 static int verbose = 0;
 
-int main( int argc, char **argv )
+int main(int argc, char **argv)
 {
-    int          vcount, vstride;
-    int32_t      counts[2];
-    int          v2stride, typesize, packsize, i, position, errs = 0;
-    double       *outbuf, *outbuf2;
-    double       *vsource;
+    int vcount, vstride;
+    int32_t counts[2];
+    int packsize, i, position, errs = 0;
+    double *outbuf, *outbuf2;
+    double *vsource;
     MPI_Datatype vtype, stype;
-    MPI_Aint     lb, extent;
-    double       t0, t1;
-    double       tspack, tvpack, tmanual;
-    int          ntry;
-    int          blocklengths[2];
-    MPI_Aint     displacements[2];
+    double t0, t1;
+    double tspack, tvpack, tmanual;
+    int ntry;
+    int blocklengths[2];
+    MPI_Aint displacements[2];
     MPI_Datatype typesArray[2];
 
-    MPI_Init( &argc, &argv );
-    
-    /* Create a struct consisting of a two 32-bit ints, followed by a 
-       vector of stride 3 but count 128k (less than a few MB of data area) */
-    vcount  = 128000;
+    MPI_Init(&argc, &argv);
+
+    /* Create a struct consisting of a two 32-bit ints, followed by a
+     * vector of stride 3 but count 128k (less than a few MB of data area) */
+    vcount = 128000;
     vstride = 3;
-    MPI_Type_vector( vcount, 1, vstride, MPI_DOUBLE, &vtype );
+    MPI_Type_vector(vcount, 1, vstride, MPI_DOUBLE, &vtype);
 
-    vsource = (double *)malloc( (vcount + 1) * (vstride + 1) * sizeof(double) );
+    vsource = (double *) malloc((vcount + 1) * (vstride + 1) * sizeof(double));
     if (!vsource) {
-	fprintf( stderr, "Unable to allocate vsource\n" );
-	MPI_Abort( MPI_COMM_WORLD, 1 );
+        fprintf(stderr, "Unable to allocate vsource\n");
+        MPI_Abort(MPI_COMM_WORLD, 1);
     }
-    for (i=0; i<vcount*vstride; i++) {
-	vsource[i] = i;
+    for (i = 0; i < vcount * vstride; i++) {
+        vsource[i] = i;
     }
-    blocklengths[0] = 2; MPI_Get_address( &counts[0], &displacements[0] );
-    blocklengths[1] = 1; MPI_Get_address( vsource, &displacements[1] );
+    blocklengths[0] = 2;
+    MPI_Get_address(&counts[0], &displacements[0]);
+    blocklengths[1] = 1;
+    MPI_Get_address(vsource, &displacements[1]);
     if (verbose) {
-	printf( "%p = %p?\n", vsource, (void *)displacements[1] );
+        printf("%p = %p?\n", vsource, (void *) displacements[1]);
     }
     typesArray[0] = MPI_INT32_T;
     typesArray[1] = vtype;
-    MPI_Type_create_struct( 2, blocklengths, displacements, typesArray, 
-			    &stype );
-    MPI_Type_commit( &stype );
-    MPI_Type_commit( &vtype );
+    MPI_Type_create_struct(2, blocklengths, displacements, typesArray, &stype);
+    MPI_Type_commit(&stype);
+    MPI_Type_commit(&vtype);
 
 #if defined(MPICH) && defined(PRINT_DATATYPE_INTERNALS)
     /* To use MPIDU_Datatype_debug to print the datatype internals,
-       you must configure MPICH with --enable-g=log */
+     * you must configure MPICH with --enable-g=log */
     if (verbose) {
-	printf( "Original struct datatype:\n" );
-	MPIDU_Datatype_debug( stype, 10 );
+        printf("Original struct datatype:\n");
+        MPIDU_Datatype_debug(stype, 10);
     }
 #endif
 
-    MPI_Pack_size( 1, stype, MPI_COMM_WORLD, &packsize );
-    outbuf  = (double *)malloc( packsize );
-    outbuf2 = (double *)malloc( packsize );
+    MPI_Pack_size(1, stype, MPI_COMM_WORLD, &packsize);
+    outbuf = (double *) malloc(packsize);
+    outbuf2 = (double *) malloc(packsize);
     if (!outbuf) {
-	fprintf( stderr, "Unable to allocate %ld for outbuf\n", (long)packsize );
-	MPI_Abort( MPI_COMM_WORLD, 1 );
+        fprintf(stderr, "Unable to allocate %ld for outbuf\n", (long) packsize);
+        MPI_Abort(MPI_COMM_WORLD, 1);
     }
     if (!outbuf2) {
-	fprintf( stderr, "Unable to allocate %ld for outbuf2\n", (long)packsize );
-	MPI_Abort( MPI_COMM_WORLD, 1 );
+        fprintf(stderr, "Unable to allocate %ld for outbuf2\n", (long) packsize);
+        MPI_Abort(MPI_COMM_WORLD, 1);
     }
     position = 0;
     /* Warm up the code and data */
-    MPI_Pack( MPI_BOTTOM, 1, stype, outbuf, packsize, &position, 
-	      MPI_COMM_WORLD );
+    MPI_Pack(MPI_BOTTOM, 1, stype, outbuf, packsize, &position, MPI_COMM_WORLD);
 
     tspack = 1e12;
     for (ntry = 0; ntry < 5; ntry++) {
-	position = 0;
-	t0 = MPI_Wtime();
-	MPI_Pack( MPI_BOTTOM, 1, stype, outbuf, packsize, &position, 
-		  MPI_COMM_WORLD );
-	t1 = MPI_Wtime() - t0;
-	if (t1 < tspack) tspack = t1;
+        position = 0;
+        t0 = MPI_Wtime();
+        MPI_Pack(MPI_BOTTOM, 1, stype, outbuf, packsize, &position, MPI_COMM_WORLD);
+        t1 = MPI_Wtime() - t0;
+        if (t1 < tspack)
+            tspack = t1;
     }
-    MPI_Type_free( &stype );
+    MPI_Type_free(&stype);
 
     /* An equivalent packing, using the 2 ints and the vector separately */
     tvpack = 1e12;
     for (ntry = 0; ntry < 5; ntry++) {
-	position = 0;
-	t0 = MPI_Wtime();
-	MPI_Pack( counts, 2, MPI_INT32_T, outbuf, packsize, &position, 
-		  MPI_COMM_WORLD );
-	MPI_Pack( vsource, 1, vtype, outbuf, packsize, &position, 
-		  MPI_COMM_WORLD );
-	t1 = MPI_Wtime() - t0;
-	if (t1 < tvpack) tvpack = t1;
+        position = 0;
+        t0 = MPI_Wtime();
+        MPI_Pack(counts, 2, MPI_INT32_T, outbuf, packsize, &position, MPI_COMM_WORLD);
+        MPI_Pack(vsource, 1, vtype, outbuf, packsize, &position, MPI_COMM_WORLD);
+        t1 = MPI_Wtime() - t0;
+        if (t1 < tvpack)
+            tvpack = t1;
     }
-    MPI_Type_free( &vtype );
+    MPI_Type_free(&vtype);
 
-    /* Note that we exploit the fact that the vector type contains vblock 
-       instances of a contiguous type of size 24, or a single block of 24*vblock
-       bytes.
-    */
+    /* Note that we exploit the fact that the vector type contains vblock
+     * instances of a contiguous type of size 24, or a single block of 24*vblock
+     * bytes.
+     */
     tmanual = 1e12;
     for (ntry = 0; ntry < 5; ntry++) {
-	const double * restrict ppe = (const double *)vsource;
-	double * restrict ppo = outbuf2;
-	int j;
-	t0 = MPI_Wtime();
-	position = 0;
-	*(int32_t *)ppo          = counts[0];
-	*( ((int32_t *)ppo) + 1) = counts[1];
-	ppo++;
-	/* Some hand optimization because this file is not normally 
-	   compiled with optimization by the test suite */
-	j = vcount;
-	while (j) {
-	    *ppo++ = *ppe;
-	    ppe += vstride;
-	    *ppo++ = *ppe;
-	    ppe += vstride;
-	    *ppo++ = *ppe;
-	    ppe += vstride;
-	    *ppo++ = *ppe;
-	    ppe += vstride;
-	    j -= 4;
-	}
-	position += (1 + vcount);
-	position *= sizeof(double);
-	t1 = MPI_Wtime() - t0;
-	if (t1 < tmanual) tmanual = t1;
-
-	/* Check on correctness */
+        const double *restrict ppe = (const double *) vsource;
+        double *restrict ppo = outbuf2;
+        int j;
+        t0 = MPI_Wtime();
+        position = 0;
+        *(int32_t *) ppo = counts[0];
+        *(((int32_t *) ppo) + 1) = counts[1];
+        ppo++;
+        /* Some hand optimization because this file is not normally
+         * compiled with optimization by the test suite */
+        j = vcount;
+        while (j) {
+            *ppo++ = *ppe;
+            ppe += vstride;
+            *ppo++ = *ppe;
+            ppe += vstride;
+            *ppo++ = *ppe;
+            ppe += vstride;
+            *ppo++ = *ppe;
+            ppe += vstride;
+            j -= 4;
+        }
+        position += (1 + vcount);
+        position *= sizeof(double);
+        t1 = MPI_Wtime() - t0;
+        if (t1 < tmanual)
+            tmanual = t1;
+
+        /* Check on correctness */
 #ifdef PACK_IS_NATIVE
-	if (memcmp( outbuf, outbuf2, position ) != 0) {
-	    printf( "Panic(manual) - pack buffers differ\n" );
-	    for (j=0; j<8; j++) {
-		printf( "%d: %llx\t%llx\n", j, (long long unsigned)outbuf[j], 
-			(long long unsigned)outbuf2[j] );
-	    }
-	}
+        if (memcmp(outbuf, outbuf2, position) != 0) {
+            printf("Panic(manual) - pack buffers differ\n");
+            for (j = 0; j < 8; j++) {
+                printf("%d: %llx\t%llx\n", j, (long long unsigned) outbuf[j],
+                       (long long unsigned) outbuf2[j]);
+            }
+        }
 #endif
     }
 
     if (verbose) {
-	printf( "Bytes packed = %d\n", position );
-	printf( "MPI_Pack time = %e (struct), = %e (vector), manual pack time = %e\n", 
-		tspack, tvpack, tmanual );
+        printf("Bytes packed = %d\n", position);
+        printf("MPI_Pack time = %e (struct), = %e (vector), manual pack time = %e\n",
+               tspack, tvpack, tmanual);
     }
 
     if (4 * tmanual < tspack) {
-	errs++;
-	printf( "MPI_Pack time using struct with vector = %e, manual pack time = %e\n", tspack, tmanual )
-;
-	printf( "MPI_Pack time should be less than 4 times the manual time\n" );
-	printf( "For most informative results, be sure to compile this test with optimization\n" );
+        errs++;
+        printf("MPI_Pack time using struct with vector = %e, manual pack time = %e\n", tspack,
+               tmanual);
+        printf("MPI_Pack time should be less than 4 times the manual time\n");
+        printf("For most informative results, be sure to compile this test with optimization\n");
     }
     if (4 * tmanual < tvpack) {
-	errs++;
-	printf( "MPI_Pack using vector = %e, manual pack time = %e\n", tvpack, 
-		tmanual );
-	printf( "MPI_Pack time should be less than 4 times the manual time\n" );
-	printf( "For most informative results, be sure to compile this test with optimization\n" );
+        errs++;
+        printf("MPI_Pack using vector = %e, manual pack time = %e\n", tvpack, tmanual);
+        printf("MPI_Pack time should be less than 4 times the manual time\n");
+        printf("For most informative results, be sure to compile this test with optimization\n");
     }
     if (4 * tvpack < tspack) {
-	errs++;
-	printf( "MPI_Pack using a vector = %e, using a struct with vector = %e\n", tvpack, tspack );
-	printf( "MPI_Pack time using vector should be about the same as the struct containing the vector\n" );
-	printf( "For most informative results, be sure to compile this test with optimization\n" );
+        errs++;
+        printf("MPI_Pack using a vector = %e, using a struct with vector = %e\n", tvpack, tspack);
+        printf
+            ("MPI_Pack time using vector should be about the same as the struct containing the vector\n");
+        printf("For most informative results, be sure to compile this test with optimization\n");
     }
 
     if (errs) {
-        printf( " Found %d errors\n", errs );
+        printf(" Found %d errors\n", errs);
     }
     else {
-        printf( " No Errors\n" );
-    } 
-    
-    free( vsource );
-    free( outbuf );
-    free( outbuf2 );
+        printf(" No Errors\n");
+    }
+
+    free(vsource);
+    free(outbuf);
+    free(outbuf2);
 
     MPI_Finalize();
     return 0;
diff --git a/teshsuite/smpi/mpich3-test/perf/non_zero_root.c b/teshsuite/smpi/mpich3-test/perf/non_zero_root.c
index 0be3f88b07..553c2db536 100644
--- a/teshsuite/smpi/mpich3-test/perf/non_zero_root.c
+++ b/teshsuite/smpi/mpich3-test/perf/non_zero_root.c
@@ -15,64 +15,67 @@
 
 static int verbose = 0;
 
-int main(int argc, char* argv[])
+int main(int argc, char *argv[])
 {
-	char *sbuf, *rbuf;
-	int i, j;
-	double t1, t2, t, ts;
-	int rank, size;
-	MPI_Status status;
+    char *sbuf, *rbuf;
+    int i, j;
+    double t1, t2, t, ts;
+    int rank, size;
+    MPI_Status status;
 
-	MPI_Init(&argc,&argv);
-	MPI_Comm_rank(MPI_COMM_WORLD,&rank);
-	MPI_Comm_size(MPI_COMM_WORLD, &size);
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-	if (getenv("MPITEST_VERBOSE")) verbose = 1;
+    if (getenv("MPITEST_VERBOSE"))
+        verbose = 1;
 
-	/* Allocate memory regions to communicate */
-	sbuf = (char*) malloc(SIZE);
-	rbuf = (char*) malloc(size * SIZE);
+    /* Allocate memory regions to communicate */
+    sbuf = (char *) malloc(SIZE);
+    rbuf = (char *) malloc(size * SIZE);
 
-	/* Touch the buffers to make sure they are allocated */
-	for (i = 0; i < SIZE; i++) sbuf[i] = '0';
-	for (i = 0; i < SIZE * size; i++) rbuf[i] = '0';
+    /* Touch the buffers to make sure they are allocated */
+    for (i = 0; i < SIZE; i++)
+        sbuf[i] = '0';
+    for (i = 0; i < SIZE * size; i++)
+        rbuf[i] = '0';
 
-	/* Time when rank 0 gathers the data */
-	MPI_Barrier(MPI_COMM_WORLD);
-	t1 = MPI_Wtime();
-	for (i = 0; i < ITER; i++) {
-		MPI_Gather(sbuf, SIZE, MPI_BYTE, rbuf, SIZE, MPI_BYTE, 0, MPI_COMM_WORLD);
-		MPI_Barrier(MPI_COMM_WORLD);
-	}
-	t2 = MPI_Wtime();
-	t = (t2-t1)/ITER;
+    /* Time when rank 0 gathers the data */
+    MPI_Barrier(MPI_COMM_WORLD);
+    t1 = MPI_Wtime();
+    for (i = 0; i < ITER; i++) {
+        MPI_Gather(sbuf, SIZE, MPI_BYTE, rbuf, SIZE, MPI_BYTE, 0, MPI_COMM_WORLD);
+        MPI_Barrier(MPI_COMM_WORLD);
+    }
+    t2 = MPI_Wtime();
+    t = (t2 - t1) / ITER;
 
-	/* Time when rank 1 gathers the data */
-	MPI_Barrier(MPI_COMM_WORLD);
-	t1 = MPI_Wtime();
-	for (j = 0; j < ITER; j++) {
-		MPI_Gather(sbuf, SIZE, MPI_BYTE, rbuf, SIZE, MPI_BYTE, 1, MPI_COMM_WORLD);
-		MPI_Barrier(MPI_COMM_WORLD);
-	}
-	t2 = MPI_Wtime();
-	ts = (t2-t1)/ITER;
+    /* Time when rank 1 gathers the data */
+    MPI_Barrier(MPI_COMM_WORLD);
+    t1 = MPI_Wtime();
+    for (j = 0; j < ITER; j++) {
+        MPI_Gather(sbuf, SIZE, MPI_BYTE, rbuf, SIZE, MPI_BYTE, 1, MPI_COMM_WORLD);
+        MPI_Barrier(MPI_COMM_WORLD);
+    }
+    t2 = MPI_Wtime();
+    ts = (t2 - t1) / ITER;
 
-	if (rank == 1)
-		MPI_Send(&ts, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
-	if (rank == 0)
-		MPI_Recv(&ts, 1, MPI_DOUBLE, 1, 0, MPI_COMM_WORLD, &status);
+    if (rank == 1)
+        MPI_Send(&ts, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
+    if (rank == 0)
+        MPI_Recv(&ts, 1, MPI_DOUBLE, 1, 0, MPI_COMM_WORLD, &status);
 
-	/* Print out the results */
-	if (!rank) {
-		if ((ts / t) > (1 + ERROR_MARGIN)) { /* If the difference is more than 10%, it's an error */
-			printf("%.3f\t%.3f\n", 1000000.0 * ts, 1000000.0 * t);
-			printf("Too much difference in performance\n");
-		}
-		else printf(" No Errors\n");
-	}
-	
-	MPI_Finalize();
-    free(sbuf);
-    free(rbuf);
-	return 0;
+    /* Print out the results */
+    if (!rank) {
+        if ((ts / t) > (1 + ERROR_MARGIN)) {    /* If the difference is more than 10%, it's an error */
+            printf("%.3f\t%.3f\n", 1000000.0 * ts, 1000000.0 * t);
+            printf("Too much difference in performance\n");
+        }
+        else
+            printf(" No Errors\n");
+    }
+
+    MPI_Finalize();
+
+    return 0;
 }
diff --git a/teshsuite/smpi/mpich3-test/perf/sendrecvl.c b/teshsuite/smpi/mpich3-test/perf/sendrecvl.c
index 1a54a70a63..74703d63ca 100644
--- a/teshsuite/smpi/mpich3-test/perf/sendrecvl.c
+++ b/teshsuite/smpi/mpich3-test/perf/sendrecvl.c
@@ -14,11 +14,11 @@
 #include <stdlib.h>
 
 #define MAXTESTS 32
-#define ERROR_MARGIN 1.0 /* FIXME: This number is pretty much randomly chosen */
+#define ERROR_MARGIN 1.0        /* FIXME: This number is pretty much randomly chosen */
 
 static int verbose = 0;
 
-int main( int argc, char *argv[] )
+int main(int argc, char *argv[])
 {
     int wsize, wrank, partner, len, maxlen, k, reps, repsleft;
     double t1;
@@ -26,239 +26,234 @@ int main( int argc, char *argv[] )
     char *rbuf, *sbuf;
     double times[3][MAXTESTS];
 
-    MPI_Init( &argc, &argv );
-    if (getenv("MPITEST_VERBOSE")) verbose = 1;
+    MPI_Init(&argc, &argv);
+    if (getenv("MPITEST_VERBOSE"))
+        verbose = 1;
+
+    MPI_Comm_size(MPI_COMM_WORLD, &wsize);
+    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
 
-    MPI_Comm_size( MPI_COMM_WORLD, &wsize );
-    MPI_Comm_rank( MPI_COMM_WORLD, &wrank );
-    
     if (wsize < 2) {
-	fprintf( stderr, "This program requires at least 2 processes\n" );
-	MPI_Abort( MPI_COMM_WORLD, 1 );
+        fprintf(stderr, "This program requires at least 2 processes\n");
+        MPI_Abort(MPI_COMM_WORLD, 1);
     }
     /* Set partner based on whether rank is odd or even */
     if (wrank & 0x1) {
-	partner = wrank - 1;
+        partner = wrank - 1;
     }
     else if (wrank < wsize - 1) {
-	partner = wrank + 1;
+        partner = wrank + 1;
     }
-    else 
-	/* Handle wsize odd */
-	partner = MPI_PROC_NULL;
+    else
+        /* Handle wsize odd */
+        partner = MPI_PROC_NULL;
 
     /* Allocate and initialize buffers */
-    maxlen = 1024*1024;
-    rbuf = (char *)malloc( maxlen );
-    sbuf = (char *)malloc( maxlen );
+    maxlen = 1024 * 1024;
+    rbuf = (char *) malloc(maxlen);
+    sbuf = (char *) malloc(maxlen);
     if (!rbuf || !sbuf) {
-	fprintf( stderr, "Could not allocate %d byte buffers\n", maxlen );
-	MPI_Abort( MPI_COMM_WORLD, 2 );
+        fprintf(stderr, "Could not allocate %d byte buffers\n", maxlen);
+        MPI_Abort(MPI_COMM_WORLD, 2);
     }
-    for (k=0; k<maxlen; k++) {
-	rbuf[k] = 0;
-	sbuf[k] = 0;
+    for (k = 0; k < maxlen; k++) {
+        rbuf[k] = 0;
+        sbuf[k] = 0;
     }
-    
-    MPI_Barrier( MPI_COMM_WORLD );
+
+    MPI_Barrier(MPI_COMM_WORLD);
 
     /* Test Irecv and send, head to head */
     if (wrank == 0 && verbose) {
-	printf( "Irecv-send\n" );
-	printf( "len\ttime    \trate\n" );
+        printf("Irecv-send\n");
+        printf("len\ttime    \trate\n");
     }
 
     /* Send powers of 2 bytes */
     len = 1;
-    for (k=0; k<20; k++) {
-	/* We use a simple linear form for the number of tests to 
-	   reduce the impact of the granularity of the timer */
-	reps     = 50-k;
-	repsleft = reps;
-	/* Make sure that both processes are ready to start */
-	MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, partner, 0, 
-		      MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, 
-		      MPI_STATUS_IGNORE );
-	t1 = MPI_Wtime();
-	while (repsleft--) {
-	    MPI_Irecv( rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, &rreq );
-	    MPI_Send( sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD );
-	    MPI_Wait( &rreq, MPI_STATUS_IGNORE );
-	}
-	t1 = MPI_Wtime() - t1;
-	times[0][k] = t1 / reps;
-	if (wrank == 0) {
-	    t1 = t1 / reps;
-	    if (t1 > 0) {
-		t1   = t1 * 1.e6;
-		if (verbose) 
-		    printf( "%d\t%g\t%g\n", len, t1, len/t1 );
-	    }
-	    else {
-		t1   = t1 * 1.e6;
-		if (verbose)
-		    printf( "%d\t%g\tINF\n", len, t1 );
-	    }
-	    if (verbose)
-		fflush( stdout );
-	}
+    for (k = 0; k < 20; k++) {
+        /* We use a simple linear form for the number of tests to
+         * reduce the impact of the granularity of the timer */
+        reps = 50 - k;
+        repsleft = reps;
+        /* Make sure that both processes are ready to start */
+        MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
+                     MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+        t1 = MPI_Wtime();
+        while (repsleft--) {
+            MPI_Irecv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, &rreq);
+            MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD);
+            MPI_Wait(&rreq, MPI_STATUS_IGNORE);
+        }
+        t1 = MPI_Wtime() - t1;
+        times[0][k] = t1 / reps;
+        if (wrank == 0) {
+            t1 = t1 / reps;
+            if (t1 > 0) {
+                t1 = t1 * 1.e6;
+                if (verbose)
+                    printf("%d\t%g\t%g\n", len, t1, len / t1);
+            }
+            else {
+                t1 = t1 * 1.e6;
+                if (verbose)
+                    printf("%d\t%g\tINF\n", len, t1);
+            }
+            if (verbose)
+                fflush(stdout);
+        }
 
-	len *= 2;
+        len *= 2;
     }
 
-    MPI_Barrier( MPI_COMM_WORLD );
+    MPI_Barrier(MPI_COMM_WORLD);
 
     /* Test Sendrecv, head to head */
     if (wrank == 0 && verbose) {
-	printf( "Sendrecv\n" );
-	printf( "len\ttime (usec)\trate (MB/s)\n" );
+        printf("Sendrecv\n");
+        printf("len\ttime (usec)\trate (MB/s)\n");
     }
 
     /* Send powers of 2 bytes */
     len = 1;
-    for (k=0; k<20; k++) {
-	/* We use a simple linear form for the number of tests to 
-	   reduce the impact of the granularity of the timer */
-	reps     = 50-k;
-	repsleft = reps;
-	/* Make sure that both processes are ready to start */
-	MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, partner, 0, 
-		      MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, 
-		      MPI_STATUS_IGNORE );
-	t1 = MPI_Wtime();
-	while (repsleft--) {
-	    MPI_Sendrecv( sbuf, len, MPI_BYTE, partner, k, 
-			  rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD,
-			  MPI_STATUS_IGNORE );
-	}
-	t1 = MPI_Wtime() - t1;
-	times[1][k] = t1 / reps;
-	if (wrank == 0) {
-	    t1 = t1 / reps;
-	    if (t1 > 0) {
-		t1   = t1 * 1.e6;
-		if (verbose)
-		    printf( "%d\t%g\t%g\n", len, t1, len/t1 );
-	    }
-	    else {
-		t1   = t1 * 1.e6;
-		if (verbose)
-		    printf( "%d\t%g\tINF\n", len, t1 );
-	    }
-	    if (verbose)
-		fflush( stdout );
-	}
+    for (k = 0; k < 20; k++) {
+        /* We use a simple linear form for the number of tests to
+         * reduce the impact of the granularity of the timer */
+        reps = 50 - k;
+        repsleft = reps;
+        /* Make sure that both processes are ready to start */
+        MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
+                     MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+        t1 = MPI_Wtime();
+        while (repsleft--) {
+            MPI_Sendrecv(sbuf, len, MPI_BYTE, partner, k,
+                         rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+        }
+        t1 = MPI_Wtime() - t1;
+        times[1][k] = t1 / reps;
+        if (wrank == 0) {
+            t1 = t1 / reps;
+            if (t1 > 0) {
+                t1 = t1 * 1.e6;
+                if (verbose)
+                    printf("%d\t%g\t%g\n", len, t1, len / t1);
+            }
+            else {
+                t1 = t1 * 1.e6;
+                if (verbose)
+                    printf("%d\t%g\tINF\n", len, t1);
+            }
+            if (verbose)
+                fflush(stdout);
+        }
 
-	len *= 2;
+        len *= 2;
     }
 
-    MPI_Barrier( MPI_COMM_WORLD );
+    MPI_Barrier(MPI_COMM_WORLD);
 
     /* Test Send/recv, ping-pong */
     if (wrank == 0 && verbose) {
-	printf( "Pingpong\n" );
-	printf( "len\ttime (usec)\trate (MB/s)\n" );
+        printf("Pingpong\n");
+        printf("len\ttime (usec)\trate (MB/s)\n");
     }
 
     /* Send powers of 2 bytes */
     len = 1;
-    for (k=0; k<20; k++) {
-	/* We use a simple linear form for the number of tests to 
-	   reduce the impact of the granularity of the timer */
-	reps     = 50-k;
-	repsleft = reps;
-	/* Make sure that both processes are ready to start */
-	MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, partner, 0, 
-		      MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, 
-		      MPI_STATUS_IGNORE );
-	t1 = MPI_Wtime();
-	while (repsleft--) {
-	    if (wrank & 0x1) {
-		MPI_Send( sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD );
-		MPI_Recv( rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, 
-			  MPI_STATUS_IGNORE );
-	    }
-	    else {
-		MPI_Recv( rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, 
-			  MPI_STATUS_IGNORE );
-		MPI_Send( sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD );
-	    }
-	}
-	t1 = MPI_Wtime() - t1;
-	times[2][k] = t1 / reps;
-	if (wrank == 0) {
-	    t1 = t1 / reps;
-	    if (t1 > 0) {
-		t1   = t1 * 1.e6;
-		if (verbose)
-		    printf( "%d\t%g\t%g\n", len, t1, len/t1 );
-	    }
-	    else {
-		t1   = t1 * 1.e6;
-		if (verbose)
-		    printf( "%d\t%g\tINF\n", len, t1 );
-	    }
-	    if (verbose)
-		fflush( stdout );
-	}
+    for (k = 0; k < 20; k++) {
+        /* We use a simple linear form for the number of tests to
+         * reduce the impact of the granularity of the timer */
+        reps = 50 - k;
+        repsleft = reps;
+        /* Make sure that both processes are ready to start */
+        MPI_Sendrecv(MPI_BOTTOM, 0, MPI_BYTE, partner, 0,
+                     MPI_BOTTOM, 0, MPI_BYTE, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+        t1 = MPI_Wtime();
+        while (repsleft--) {
+            if (wrank & 0x1) {
+                MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD);
+                MPI_Recv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            }
+            else {
+                MPI_Recv(rbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+                MPI_Send(sbuf, len, MPI_BYTE, partner, k, MPI_COMM_WORLD);
+            }
+        }
+        t1 = MPI_Wtime() - t1;
+        times[2][k] = t1 / reps;
+        if (wrank == 0) {
+            t1 = t1 / reps;
+            if (t1 > 0) {
+                t1 = t1 * 1.e6;
+                if (verbose)
+                    printf("%d\t%g\t%g\n", len, t1, len / t1);
+            }
+            else {
+                t1 = t1 * 1.e6;
+                if (verbose)
+                    printf("%d\t%g\tINF\n", len, t1);
+            }
+            if (verbose)
+                fflush(stdout);
+        }
 
-	len *= 2;
+        len *= 2;
     }
-    
-    
+
+
     /* At this point, we could optionally analyze the results and report
-       success or failure based on some criteria, such as near monotone
-       increases in bandwidth.  This test was created because of a 
-       fall-off in performance noted in the ch3:sock device:channel */
+     * success or failure based on some criteria, such as near monotone
+     * increases in bandwidth.  This test was created because of a
+     * fall-off in performance noted in the ch3:sock device:channel */
 
     if (wrank == 0) {
-	int nPerfErrors = 0;
-	len = 1;
-	for (k=0; k<20; k++) {
-	    double T0,T1,T2;
-	    T0 = times[0][k] * 1.e6;
-	    T1 = times[1][k] * 1.e6;
-	    T2 = times[2][k] * 1.e6;
-	    if (verbose)
-		printf( "%d\t%12.2f\t%12.2f\t%12.2f\n", len, T0, T1, T2 );
-	    /* Lets look at long messages only */
-	    if (k > 10) {
-		double T0Old, T1Old, T2Old;
-		T0Old = times[0][k-1] * 1.0e6;
-		T1Old = times[1][k-1] * 1.0e6;
-		T2Old = times[2][k-1] * 1.0e6;
-		if (T0 > (2+ERROR_MARGIN) * T0Old) {
-		    nPerfErrors++;
-		    if (verbose)
-			printf( "Irecv-Send:\t%d\t%12.2f\t%12.2f\n", len, T0Old, T0 );
-		}
-		if (T1 > (2+ERROR_MARGIN) * T1Old) {
-		    nPerfErrors++;
-		    if (verbose)
-			printf( "Sendrecv:\t%d\t%12.2f\t%12.2f\n", len, T1Old, T1 );
-		}
-		if (T2 > (2+ERROR_MARGIN) * T2Old) {
-		    nPerfErrors++;
-		    if (verbose)
-			printf( "Pingpong:\t%d\t%12.2f\t%12.2f\n", len, T2Old, T2 );
-		}
-	    }
-	    len *= 2;
-	}
-	if (nPerfErrors > 8) { 
-	    /* Allow for 1-2 errors for eager-rendezvous shifting
-	     * point and cache effects. There should be a better way
-	     * of doing this. */
-	    printf( " Found %d performance errors\n", nPerfErrors );
-	}
-	else {
-	    printf( " No Errors\n" );
-	}
-	fflush( stdout );
+        int nPerfErrors = 0;
+        len = 1;
+        for (k = 0; k < 20; k++) {
+            double T0, T1, T2;
+            T0 = times[0][k] * 1.e6;
+            T1 = times[1][k] * 1.e6;
+            T2 = times[2][k] * 1.e6;
+            if (verbose)
+                printf("%d\t%12.2f\t%12.2f\t%12.2f\n", len, T0, T1, T2);
+            /* Lets look at long messages only */
+            if (k > 10) {
+                double T0Old, T1Old, T2Old;
+                T0Old = times[0][k - 1] * 1.0e6;
+                T1Old = times[1][k - 1] * 1.0e6;
+                T2Old = times[2][k - 1] * 1.0e6;
+                if (T0 > (2 + ERROR_MARGIN) * T0Old) {
+                    nPerfErrors++;
+                    if (verbose)
+                        printf("Irecv-Send:\t%d\t%12.2f\t%12.2f\n", len, T0Old, T0);
+                }
+                if (T1 > (2 + ERROR_MARGIN) * T1Old) {
+                    nPerfErrors++;
+                    if (verbose)
+                        printf("Sendrecv:\t%d\t%12.2f\t%12.2f\n", len, T1Old, T1);
+                }
+                if (T2 > (2 + ERROR_MARGIN) * T2Old) {
+                    nPerfErrors++;
+                    if (verbose)
+                        printf("Pingpong:\t%d\t%12.2f\t%12.2f\n", len, T2Old, T2);
+                }
+            }
+            len *= 2;
+        }
+        if (nPerfErrors > 8) {
+            /* Allow for 1-2 errors for eager-rendezvous shifting
+             * point and cache effects. There should be a better way
+             * of doing this. */
+            printf(" Found %d performance errors\n", nPerfErrors);
+        }
+        else {
+            printf(" No Errors\n");
+        }
+        fflush(stdout);
     }
 
-    free( sbuf );
-    free( rbuf );
+    free(sbuf);
+    free(rbuf);
 
     MPI_Finalize();
 
diff --git a/teshsuite/smpi/mpich3-test/perf/testlist b/teshsuite/smpi/mpich3-test/perf/testlist
index 096a5bb51c..03ddbe37f2 100644
--- a/teshsuite/smpi/mpich3-test/perf/testlist
+++ b/teshsuite/smpi/mpich3-test/perf/testlist
@@ -2,10 +2,10 @@ transp-datatype 2
 sendrecvl 2
 twovec 1  xfail=ticket1788
 #Need MPI_Pack
-#dtpack 1  xfail=ticket1789
-#nestvec 1  xfail=ticket1788
-#nestvec2 1  xfail=ticket1788
-#indexperf 1  xfail=ticket1788
+dtpack 1  xfail=ticket1789
+nestvec 1  xfail=ticket1788
+nestvec2 1  xfail=ticket1788
+indexperf 1  xfail=ticket1788
 non_zero_root 4
 timer 1
 # The commcreatep test looks at how communicator creation scales with group
diff --git a/teshsuite/smpi/mpich3-test/perf/timer.c b/teshsuite/smpi/mpich3-test/perf/timer.c
index 1a778c4493..4a77d18b34 100644
--- a/teshsuite/smpi/mpich3-test/perf/timer.c
+++ b/teshsuite/smpi/mpich3-test/perf/timer.c
@@ -4,7 +4,7 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-/* 
+/*
  * Check that the timer produces monotone nondecreasing times and that
  * the Tick is reasonable
  */
@@ -17,51 +17,53 @@ static int verbose = 0;
 
 #define MAX_TIMER_TEST 5000
 
-int main(int argc, char* argv[])
+int main(int argc, char *argv[])
 {
     double t1[MAX_TIMER_TEST], tick[MAX_TIMER_TEST], tickval;
     double minDiff, maxDiff, diff;
     int i, nZeros = 0;
     int errs = 0;
 
-    MTest_Init(&argc,&argv);
+    MTest_Init(&argc, &argv);
 
-    for (i=0; i<MAX_TIMER_TEST; i++) {
-	t1[i] = MPI_Wtime();
+    for (i = 0; i < MAX_TIMER_TEST; i++) {
+        t1[i] = MPI_Wtime();
     }
 
-    for (i=0; i<MAX_TIMER_TEST; i++) {
-	tick[i] = MPI_Wtick();
+    for (i = 0; i < MAX_TIMER_TEST; i++) {
+        tick[i] = MPI_Wtick();
     }
 
     /* Look at the values */
     /* Look at the tick */
     tickval = MPI_Wtick();
-    for (i=0; i<MAX_TIMER_TEST; i++) {
-	if (tickval != tick[i]) {
-	    fprintf( stderr, "Nonconstant value for MPI_Wtick: %e != %e\n",
-		     tickval, tick[i] );
-	    errs ++;
-	}
+    for (i = 0; i < MAX_TIMER_TEST; i++) {
+        if (tickval != tick[i]) {
+            fprintf(stderr, "Nonconstant value for MPI_Wtick: %e != %e\n", tickval, tick[i]);
+            errs++;
+        }
     }
 
     /* Look at the timer */
     minDiff = 1.e20;
     maxDiff = -1.0;
-    nZeros  = 0;
-    for (i=1; i<MAX_TIMER_TEST; i++) {
-	diff = t1[i] - t1[i-1];
-	if (diff == 0.0) nZeros++;
-	else if (diff < minDiff) minDiff = diff;
-	if (diff > maxDiff) maxDiff = diff;
+    nZeros = 0;
+    for (i = 1; i < MAX_TIMER_TEST; i++) {
+        diff = t1[i] - t1[i - 1];
+        if (diff == 0.0)
+            nZeros++;
+        else if (diff < minDiff)
+            minDiff = diff;
+        if (diff > maxDiff)
+            maxDiff = diff;
     }
 
     /* Are the time diff values and tick values consistent */
     if (verbose) {
-	printf( "Tick = %e, timer range = [%e,%e]\n", tickval, minDiff, 
-		maxDiff );
-	if (nZeros) printf( "Wtime difference was 0 %d times\n", nZeros );
-    }    
+        printf("Tick = %e, timer range = [%e,%e]\n", tickval, minDiff, maxDiff);
+        if (nZeros)
+            printf("Wtime difference was 0 %d times\n", nZeros);
+    }
 
     MTest_Finalize(errs);
     MPI_Finalize();
diff --git a/teshsuite/smpi/mpich3-test/perf/transp-datatype.c b/teshsuite/smpi/mpich3-test/perf/transp-datatype.c
index b7ebc278b3..430831b6d0 100644
--- a/teshsuite/smpi/mpich3-test/perf/transp-datatype.c
+++ b/teshsuite/smpi/mpich3-test/perf/transp-datatype.c
@@ -21,11 +21,11 @@
 #define SIZE 100
 #define ITER 100
 
-int main(int argc, char* argv[])
+int main(int argc, char *argv[])
 {
     int i, j, k;
-    static double a[SIZE][SIZE],b[SIZE][SIZE];
-    double t1,t2,t,ts,tst;
+    static double a[SIZE][SIZE], b[SIZE][SIZE];
+    double t1, t2, t, ts, tst;
     double temp;
     int myrank, mysize, errs = 0;
     MPI_Status status;
@@ -33,94 +33,92 @@ int main(int argc, char* argv[])
 
     MPI_Datatype col, xpose;
 
-    MTest_Init( &argc, &argv );
-    MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
-    MPI_Comm_size( MPI_COMM_WORLD, &mysize );
+    MTest_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+    MPI_Comm_size(MPI_COMM_WORLD, &mysize);
     if (mysize != 2) {
-	fprintf( stderr, "This test must be run with 2 processes\n" );
-	MPI_Abort( MPI_COMM_WORLD, 1 );
+        fprintf(stderr, "This test must be run with 2 processes\n");
+        MPI_Abort(MPI_COMM_WORLD, 1);
     }
 
     MPI_Type_extent(MPI_DOUBLE, &sizeofreal);
- 
+
     MPI_Type_vector(SIZE, 1, SIZE, MPI_DOUBLE, &col);
     MPI_Type_hvector(SIZE, 1, sizeofreal, col, &xpose);
     MPI_Type_commit(&xpose);
 
     /* Preset the arrays so that they're in memory */
-    for (i=0; i<SIZE; i++)
-	for (j=0; j<SIZE; j++) {
-	    a[i][j]=0;
-	    b[i][j]=0;
-	    }
-    a[SIZE-1][0] = 1;
+    for (i = 0; i < SIZE; i++)
+        for (j = 0; j < SIZE; j++) {
+            a[i][j] = 0;
+            b[i][j] = 0;
+        }
+    a[SIZE - 1][0] = 1;
 
     /* Time the transpose example */
     MPI_Barrier(MPI_COMM_WORLD);
-    t1=MPI_Wtime();
-    for(i=0;i< ITER; i++)
-	{
-	    if(myrank==0)
-		MPI_Send(&a[0][0],SIZE*SIZE,MPI_DOUBLE,1,0,MPI_COMM_WORLD);
-	    else 
-		MPI_Recv(&b[0][0],1,xpose,0,0,MPI_COMM_WORLD,&status);
-	}
-    t2=MPI_Wtime();
-    t=(t2-t1)/ITER;
+    t1 = MPI_Wtime();
+    for (i = 0; i < ITER; i++) {
+        if (myrank == 0)
+            MPI_Send(&a[0][0], SIZE * SIZE, MPI_DOUBLE, 1, 0, MPI_COMM_WORLD);
+        else
+            MPI_Recv(&b[0][0], 1, xpose, 0, 0, MPI_COMM_WORLD, &status);
+    }
+    t2 = MPI_Wtime();
+    t = (t2 - t1) / ITER;
 
     /* Time sending the same amount of data, but without the transpose */
     MPI_Barrier(MPI_COMM_WORLD);
-    t1=MPI_Wtime();
-    for(i=0; i< ITER; i++){
-	if(myrank==0)
-	    {
-		MPI_Send(&a[0][0],sizeof(a),MPI_BYTE,1,0,MPI_COMM_WORLD);
-	    }
-	else {
-		MPI_Recv(&b[0][0],sizeof(b),MPI_BYTE,0,0,MPI_COMM_WORLD,&status);
-	}
+    t1 = MPI_Wtime();
+    for (i = 0; i < ITER; i++) {
+        if (myrank == 0) {
+            MPI_Send(&a[0][0], sizeof(a), MPI_BYTE, 1, 0, MPI_COMM_WORLD);
+        }
+        else {
+            MPI_Recv(&b[0][0], sizeof(b), MPI_BYTE, 0, 0, MPI_COMM_WORLD, &status);
+        }
     }
-    t2=MPI_Wtime();
-    ts=(t2-t1)/ITER;
+    t2 = MPI_Wtime();
+    ts = (t2 - t1) / ITER;
 
     /* Time sending the same amount of data, with the transpose done
-       as a separate step */
+     * as a separate step */
     MPI_Barrier(MPI_COMM_WORLD);
-    t1=MPI_Wtime();
-    for(k=0; k< ITER; k++){
-	if(myrank==0)
-	    {
-		MPI_Send(&a[0][0],sizeof(a),MPI_BYTE,1,0,MPI_COMM_WORLD);
-	    }
-	else {
-		MPI_Recv(&b[0][0],sizeof(b),MPI_BYTE,0,0,MPI_COMM_WORLD,&status);
-		for(i=0;i<SIZE;i++)
-		    for(j=i;j<SIZE;j++) {
-			temp=b[j][i];
-			b[j][i]=b[i][j];
-			b[i][j]=temp;
-		}
-	}
+    t1 = MPI_Wtime();
+    for (k = 0; k < ITER; k++) {
+        if (myrank == 0) {
+            MPI_Send(&a[0][0], sizeof(a), MPI_BYTE, 1, 0, MPI_COMM_WORLD);
+        }
+        else {
+            MPI_Recv(&b[0][0], sizeof(b), MPI_BYTE, 0, 0, MPI_COMM_WORLD, &status);
+            for (i = 0; i < SIZE; i++)
+                for (j = i; j < SIZE; j++) {
+                    temp = b[j][i];
+                    b[j][i] = b[i][j];
+                    b[i][j] = temp;
+                }
+        }
     }
-    t2=MPI_Wtime();
-    tst=(t2-t1)/ITER;
+    t2 = MPI_Wtime();
+    tst = (t2 - t1) / ITER;
 
     /* Print out the results */
     if (myrank == 1) {
-	/* if t and tst are too different, then there is a performance
-	   problem in the handling of the datatypes */
-	
-	if (t > 2 * tst) {
-	    errs ++;
-	    fprintf( stderr, "Transpose time with datatypes is more than twice time without datatypes\n" );
-	    fprintf( stderr, "%f\t%f\t%f\n", t, ts, tst );
-	}
+        /* if t and tst are too different, then there is a performance
+         * problem in the handling of the datatypes */
+
+        if (t > 2 * tst) {
+            errs++;
+            fprintf(stderr,
+                    "Transpose time with datatypes is more than twice time without datatypes\n");
+            fprintf(stderr, "%f\t%f\t%f\n", t, ts, tst);
+        }
     }
 
     MPI_Type_free(&col);
     MPI_Type_free(&xpose);
 
-    MTest_Finalize( errs );
+    MTest_Finalize(errs);
     MPI_Finalize();
     return 0;
 }
diff --git a/teshsuite/smpi/mpich3-test/perf/twovec.c b/teshsuite/smpi/mpich3-test/perf/twovec.c
index 653e8ced7b..e99e18ec17 100644
--- a/teshsuite/smpi/mpich3-test/perf/twovec.c
+++ b/teshsuite/smpi/mpich3-test/perf/twovec.c
@@ -21,7 +21,7 @@
  */
 
 #define SKIP 4
-#define NUM_SIZES 15
+#define NUM_SIZES 16
 #define FRACTION 1.0
 
 /* Don't make the number of loops too high; we create so many
@@ -39,7 +39,7 @@ int main(int argc, char *argv[])
     MPI_Init(&argc, &argv);
 
     tmean = 0;
-    size  = 1;
+    size = 1;
     for (i = -SKIP; i < NUM_SIZES; i++) {
         nrows = ncols = size;
 
@@ -55,8 +55,8 @@ int main(int argc, char *argv[])
             t[i] = MPI_Wtime() - ttmp;
             if (t[i] < 100 * MPI_Wtick()) {
                 /* Time is too inaccurate to use.  Set to zero.
-                   Consider increasing the LOOPS value to make this
-                   time large enough */
+                 * Consider increasing the LOOPS value to make this
+                 * time large enough */
                 t[i] = 0;
             }
             tmean += t[i];
@@ -73,31 +73,32 @@ int main(int argc, char *argv[])
     tmean /= NUM_SIZES;
 
     /* Now, analyze the times to see that they do not grow too fast
-       as a function of size.  As that is a vague criteria, we do the
-       following as a simple test:
-          Compute the mean of the first half and the second half of the
-          data
-          Compare the two means
-          If the mean of the second half is more than FRACTION times the
-          mean of the first half, then the time may be growing too fast.
+     * as a function of size.  As that is a vague criteria, we do the
+     * following as a simple test:
+     * Compute the mean of the first half and the second half of the
+     * data
+     * Compare the two means
+     * If the mean of the second half is more than FRACTION times the
+     * mean of the first half, then the time may be growing too fast.
      */
     tMeanLower = tMeanHigher = 0;
-    for (i=0; i<NUM_SIZES/2; i++)
+    for (i = 0; i < NUM_SIZES / 2; i++)
         tMeanLower += t[i];
-    tMeanLower /= (NUM_SIZES/2);
-    for (i=NUM_SIZES/2; i<NUM_SIZES; i++)
+    tMeanLower /= (NUM_SIZES / 2);
+    for (i = NUM_SIZES / 2; i < NUM_SIZES; i++)
         tMeanHigher += t[i];
-    tMeanHigher /= (NUM_SIZES - NUM_SIZES/2);
+    tMeanHigher /= (NUM_SIZES - NUM_SIZES / 2);
     /* A large value (even 1 or greater) is a good choice for
-       FRACTION here - the goal is to detect significant growth in
-       execution time as the size increases, and there is no MPI
-       standard requirement here to meet.
-
-       If the times were too small, then the test also passes - the
-       goal is to find implementation problems that lead to excessive
-       time in these routines.
-    */
-    if (tMeanLower > 0 && tMeanHigher > (1 + FRACTION) * tMeanLower) errs++;
+     * FRACTION here - the goal is to detect significant growth in
+     * execution time as the size increases, and there is no MPI
+     * standard requirement here to meet.
+     *
+     * If the times were too small, then the test also passes - the
+     * goal is to find implementation problems that lead to excessive
+     * time in these routines.
+     */
+    if (tMeanLower > 0 && tMeanHigher > (1 + FRACTION) * tMeanLower)
+        errs++;
 
     if (errs) {
         fprintf(stderr, "too much difference in performance: ");
-- 
2.20.1