A
lgorithmique
N
umérique
D
istribuée
Public GIT Repository
projects
/
simgrid.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
| inline |
side by side
New actions for the time independent trace replay framework:
[simgrid.git]
/
examples
/
smpi
/
NAS
/
EP-sampling
/
ep.c
diff --git
a/examples/smpi/NAS/EP-sampling/ep.c
b/examples/smpi/NAS/EP-sampling/ep.c
index
71e0f89
..
04b0c1b
100644
(file)
--- a/
examples/smpi/NAS/EP-sampling/ep.c
+++ b/
examples/smpi/NAS/EP-sampling/ep.c
@@
-46,7
+46,7
@@
{
int i;
long i246m1=0x00003FFFFFFFFFFF;
-
long LLx, Lx, La;
+ long LLx, Lx, La;
double d2m46;
// This doesn't work, because the compiler does the calculation in 32
@@
-70,21
+70,21
@@
La = (long)a;
//fprintf(stdout,("================== Vranlc ================");
//fprintf(stdout,("Before Loop: Lx = " + Lx + ", La = " + La);
-
LLx = Lx;
-
for (i=0; i< n; i++) {
-
Lx = Lx*La & i246m1 ;
-
LLx = Lx;
-
y[i] = d2m46 * (double)LLx;
-
/*
-
if(i == 0) {
-
fprintf(stdout,("After loop 0:");
-
fprintf(stdout,("Lx = " + Lx + ", La = " + La);
-
fprintf(stdout,("d2m46 = " + d2m46);
-
fprintf(stdout,("LLX(Lx) = " + LLX.doubleValue());
-
fprintf(stdout,("Y[0]" + y[0]);
-
}
-
*/
-
}
+ LLx = Lx;
+ for (i=0; i< n; i++) {
+ Lx = Lx*La & i246m1 ;
+ LLx = Lx;
+ y[i] = d2m46 * (double)LLx;
+ /*
+ if(i == 0) {
+ fprintf(stdout,("After loop 0:");
+ fprintf(stdout,("Lx = " + Lx + ", La = " + La);
+ fprintf(stdout,("d2m46 = " + d2m46);
+ fprintf(stdout,("LLX(Lx) = " + LLX.doubleValue());
+ fprintf(stdout,("Y[0]" + y[0]);
+ }
+ */
+ }
x = (double)LLx;
/*
@@
-98,75
+98,75
@@
//-------------- the core (unique function) -----------
void doTest(int argc, char **argv) {
-
double dum[3] = {1.,1.,1.};
-
double x1, x2, sx, sy, tm, an, tt, gc;
-
double Mops;
-
double epsilon=1.0E-8, a = 1220703125., s=271828183.;
-
double t1, t2, t3, t4;
-
double sx_verify_value, sy_verify_value, sx_err, sy_err;
+ double dum[3] = {1.,1.,1.};
+ double x1, x2, sx, sy, tm, an, tt, gc;
+ double Mops;
+ double epsilon=1.0E-8, a = 1220703125., s=271828183.;
+ double t1, t2, t3, t4;
+ double sx_verify_value, sy_verify_value, sx_err, sy_err;
#include "npbparams.h"
-
int mk=16,
-
// --> set by make : in npbparams.h
-
//m=28, // for CLASS=A
-
//m=30, // for CLASS=B
-
//npm=2, // NPROCS
-
mm = m-mk,
-
nn = (int)(pow(2,mm)),
-
nk = (int)(pow(2,mk)),
-
nq=10,
-
np,
-
node,
-
no_nodes,
-
i,
-
ik,
-
kk,
-
l,
-
k, nit, no_large_nodes,
-
np_add, k_offset, j;
-
int me, nprocs, root=0, dp_type;
-
int verified,
-
timers_enabled=true;
-
char size[500]; // mind the size of the string to represent a big number
-
-
//Use in randlc..
-
int KS = 0;
-
double R23, R46, T23, T46;
-
-
double *qq = (double *) malloc (10000*sizeof(double));
-
double *start = (double *) malloc (64*sizeof(double));
-
double *elapsed = (double *) malloc (64*sizeof(double));
-
-
double *x = (double *) malloc (2*nk*sizeof(double));
-
double *q = (double *) malloc (nq*sizeof(double));
-
-
MPI_Init( &argc, &argv );
-
MPI_Comm_size( MPI_COMM_WORLD, &no_nodes);
-
MPI_Comm_rank( MPI_COMM_WORLD, &node);
+ int mk=16,
+ // --> set by make : in npbparams.h
+ //m=28, // for CLASS=A
+ //m=30, // for CLASS=B
+ //npm=2, // NPROCS
+ mm = m-mk,
+ nn = (int)(pow(2,mm)),
+ nk = (int)(pow(2,mk)),
+ nq=10,
+ np,
+ node,
+ no_nodes,
+ i,
+ ik,
+ kk,
+ l,
+ k, nit, no_large_nodes,
+ np_add, k_offset, j;
+ int me, nprocs, root=0, dp_type;
+ int verified,
+ timers_enabled=true;
+ char size[500]; // mind the size of the string to represent a big number
+
+ //Use in randlc..
+ int KS = 0;
+ double R23, R46, T23, T46;
+
+ double *qq = (double *) malloc (10000*sizeof(double));
+ double *start = (double *) malloc (64*sizeof(double));
+ double *elapsed = (double *) malloc (64*sizeof(double));
+
+ double *x = (double *) malloc (2*nk*sizeof(double));
+ double *q = (double *) malloc (nq*sizeof(double));
+
+ MPI_Init( &argc, &argv );
+ MPI_Comm_size( MPI_COMM_WORLD, &no_nodes);
+ MPI_Comm_rank( MPI_COMM_WORLD, &node);
#ifdef USE_MPE
MPE_Init_log();
#endif
-
root = 0;
-
if (node == root ) {
-
-
/* Because the size of the problem is too large to store in a 32-bit
-
* integer for some classes, we put it into a string (for printing).
-
* Have to strip off the decimal point put in there by the floating
-
* point print statement (internal file)
-
*/
-
fprintf(stdout," NAS Parallel Benchmarks 3.2 -- EP Benchmark");
-
sprintf(size,"%d",pow(2,m+1));
-
//size = size.replace('.', ' ');
-
fprintf(stdout," Number of random numbers generated: %s\n",size);
-
fprintf(stdout," Number of active processes: %d\n",no_nodes);
-
-
}
-
verified = false;
-
-
/* c Compute the number of "batches" of random number pairs generated
-
c per processor. Adjust if the number of processors does not evenly
-
c divide the total number
+ root = 0;
+ if (node == root ) {
+
+ /* Because the size of the problem is too large to store in a 32-bit
+ * integer for some classes, we put it into a string (for printing).
+ * Have to strip off the decimal point put in there by the floating
+ * point print statement (internal file)
+ */
+ fprintf(stdout," NAS Parallel Benchmarks 3.2 -- EP Benchmark");
+ sprintf(size,"%d",pow(2,m+1));
+ //size = size.replace('.', ' ');
+ fprintf(stdout," Number of random numbers generated: %s\n",size);
+ fprintf(stdout," Number of active processes: %d\n",no_nodes);
+
+ }
+ verified = false;
+
+ /* c Compute the number of "batches" of random number pairs generated
+ c per processor. Adjust if the number of processors does not evenly
+ c divide the total number
*/
np = nn / no_nodes;
@@
-187,22
+187,22
@@
c sure these initializations cannot be eliminated as dead code.
*/
-
//call vranlc(0, dum[1], dum[2], dum[3]);
-
// Array indexes start at 1 in Fortran, 0 in Java
-
vranlc(0, dum[0], dum[1], &(dum[2]));
-
-
dum[0] = randlc(&(dum[1]),&(dum[2]));
-
/////////////////////////////////
-
for (i=0;i<2*nk;i++) {
-
x[i] = -1e99;
-
}
-
Mops = log(sqrt(abs(1)));
-
-
/*
-
c---------------------------------------------------------------------
-
c Synchronize before placing time stamp
-
c---------------------------------------------------------------------
-
*/
+ //call vranlc(0, dum[1], dum[2], dum[3]);
+ // Array indexes start at 1 in Fortran, 0 in Java
+ vranlc(0, dum[0], dum[1], &(dum[2]));
+
+ dum[0] = randlc(&(dum[1]),&(dum[2]));
+ /////////////////////////////////
+ for (i=0;i<2*nk;i++) {
+ x[i] = -1e99;
+ }
+ Mops = log(sqrt(abs(1)));
+
+ /*
+ c---------------------------------------------------------------------
+ c Synchronize before placing time stamp
+ c---------------------------------------------------------------------
+ */
MPI_Barrier( MPI_COMM_WORLD );
timer_clear(&(elapsed[1]));
@@
-211,21
+211,21
@@
timer_start(&(start[1]));
t1 = a;
-
//fprintf(stdout,("(ep.f:160) t1 = " + t1);
+ //fprintf(stdout,("(ep.f:160) t1 = " + t1);
t1 = vranlc(0, t1, a, x);
-
//fprintf(stdout,("(ep.f:161) t1 = " + t1);
-
+ //fprintf(stdout,("(ep.f:161) t1 = " + t1);
+
/* c Compute AN = A ^ (2 * NK) (mod 2^46). */
t1 = a;
-
//fprintf(stdout,("(ep.f:165) t1 = " + t1);
+ //fprintf(stdout,("(ep.f:165) t1 = " + t1);
for (i=1; i <= mk+1; i++) {
t2 = randlc(&t1, &t1);
-
//fprintf(stdout,("(ep.f:168)[loop i=" + i +"] t1 = " + t1);
+ //fprintf(stdout,("(ep.f:168)[loop i=" + i +"] t1 = " + t1);
}
an = t1;
-
//fprintf(stdout,("(ep.f:172) s = " + s);
+ //fprintf(stdout,("(ep.f:172) s = " + s);
tt = s;
gc = 0.;
sx = 0.;
@@
-257,7
+257,7
@@
for (i=1;i<=100 && !stop;i++) {
ik = kk / 2;
-
//fprintf(stdout,("(ep.f:199) ik = " +ik+", kk = " + kk);
+ //fprintf(stdout,("(ep.f:199) ik = " +ik+", kk = " + kk);
if (2 * ik != kk) {
t3 = randlc(&t1, &t2);
//fprintf(stdout,("(ep.f:200) t1= " +t1 );
@@
-272,34
+272,34
@@
// Compute uniform pseudorandom numbers.
//if (timers_enabled) timer_start(3);
-
timer_start(&(start[3]));
+ timer_start(&(start[3]));
//call vranlc(2 * nk, t1, a, x) --> t1 and y are modified
-
//fprintf(stdout,">>>>>>>>>>>Before vranlc(l.210)<<<<<<<<<<<<<");
-
//fprintf(stdout,"2*nk = " + (2*nk));
-
//fprintf(stdout,"t1 = " + t1);
-
//fprintf(stdout,"a = " + a);
-
//fprintf(stdout,"x[0] = " + x[0]);
-
//fprintf(stdout,">>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<");
+ //fprintf(stdout,">>>>>>>>>>>Before vranlc(l.210)<<<<<<<<<<<<<");
+ //fprintf(stdout,"2*nk = " + (2*nk));
+ //fprintf(stdout,"t1 = " + t1);
+ //fprintf(stdout,"a = " + a);
+ //fprintf(stdout,"x[0] = " + x[0]);
+ //fprintf(stdout,">>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<");
-
t1 = vranlc(2 * nk, t1, a, x);
-
-
//fprintf(stdout,(">>>>>>>>>>>After Enter vranlc (l.210)<<<<<<");
-
//fprintf(stdout,("2*nk = " + (2*nk));
-
//fprintf(stdout,("t1 = " + t1);
-
//fprintf(stdout,("a = " + a);
-
//fprintf(stdout,("x[0] = " + x[0]);
-
//fprintf(stdout,(">>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<");
+ t1 = vranlc(2 * nk, t1, a, x);
+
+ //fprintf(stdout,(">>>>>>>>>>>After Enter vranlc (l.210)<<<<<<");
+ //fprintf(stdout,("2*nk = " + (2*nk));
+ //fprintf(stdout,("t1 = " + t1);
+ //fprintf(stdout,("a = " + a);
+ //fprintf(stdout,("x[0] = " + x[0]);
+ //fprintf(stdout,(">>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<");
//if (timers_enabled) timer_stop(3);
-
timer_stop(3,elapsed,start);
+ timer_stop(3,elapsed,start);
/* Compute Gaussian deviates by acceptance-rejection method and
* tally counts in concentric square annuli. This loop is not
* vectorizable.
*/
//if (timers_enabled) timer_start(2);
-
timer_start(&(start[2]));
+ timer_start(&(start[2]));
for(i=1; i<=nk;i++) {
x1 = 2. * x[2*i-2] -1.0;
x2 = 2. * x[2*i-1] - 1.0;
@@
-313,8
+313,8
@@
sx = sx + t3;
sy = sy + t4;
}
-
/*
-
if(i == 1) {
+ /*
+ if(i == 1) {
fprintf(stdout,"x1 = " + x1);
fprintf(stdout,"x2 = " + x2);
fprintf(stdout,"t1 = " + t1);
@@
-325,109
+325,109
@@
fprintf(stdout,"q[l] = " + q[l]);
fprintf(stdout,"sx = " + sx);
fprintf(stdout,"sy = " + sy);
-
}
-
*/
+ }
+ */
}
//if (timers_enabled) timer_stop(2);
-
timer_stop(2,elapsed,start);
+ timer_stop(2,elapsed,start);
}
//int MPI_Allreduce(void *sbuf, void *rbuf, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
-
MPI_Allreduce(&sx, x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-
sx = x[0]; //FIXME : x[0] or x[1] => x[0] because fortran starts with 1
+ MPI_Allreduce(&sx, x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+ sx = x[0]; //FIXME : x[0] or x[1] => x[0] because fortran starts with 1
MPI_Allreduce(&sy, x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
sy = x[0];
MPI_Allreduce(q, x, nq, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
for(i = 0; i < nq; i++) {
-
q[i] = x[i];
-
}
-
for(i = 0; i < nq; i++) {
-
gc += q[i];
-
}
+ q[i] = x[i];
+ }
+ for(i = 0; i < nq; i++) {
+ gc += q[i];
+ }
-
timer_stop(1,elapsed,start);
+ timer_stop(1,elapsed,start);
tm = timer_read(1,elapsed);
-
MPI_Allreduce(&tm, x, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
-
tm = x[0];
-
-
if(node == root) {
-
nit = 0;
-
verified = true;
-
-
if(m == 24) {
-
sx_verify_value = -3.247834652034740E3;
-
sy_verify_value = -6.958407078382297E3;
-
} else if(m == 25) {
-
sx_verify_value = -2.863319731645753E3;
-
sy_verify_value = -6.320053679109499E3;
-
} else if(m == 28) {
-
sx_verify_value = -4.295875165629892E3;
-
sy_verify_value = -1.580732573678431E4;
-
} else if(m == 30) {
-
sx_verify_value = 4.033815542441498E4;
-
sy_verify_value = -2.660669192809235E4;
-
} else if(m == 32) {
-
sx_verify_value = 4.764367927995374E4;
-
sy_verify_value = -8.084072988043731E4;
-
} else if(m == 36) {
-
sx_verify_value = 1.982481200946593E5;
-
sy_verify_value = -1.020596636361769E5;
-
} else {
-
verified = false;
-
}
-
-
/*
-
fprintf(stdout,("sx = " + sx);
-
fprintf(stdout,("sx_verify = " + sx_verify_value);
-
fprintf(stdout,("sy = " + sy);
-
fprintf(stdout,("sy_verify = " + sy_verify_value);
-
*/
-
if(verified) {
-
sx_err = abs((sx - sx_verify_value)/sx_verify_value);
-
sy_err = abs((sy - sy_verify_value)/sy_verify_value);
-
/*
-
fprintf(stdout,("sx_err = " + sx_err);
-
fprintf(stdout,("sy_err = " + sx_err);
-
fprintf(stdout,("epsilon= " + epsilon);
-
*/
-
verified = ((sx_err < epsilon) && (sy_err < epsilon));
-
}
-
-
Mops = (pow(2.0, m+1))/tm/1000;
-
-
fprintf(stdout,"EP Benchmark Results:\n");
-
fprintf(stdout,"CPU Time=%d\n",tm);
-
fprintf(stdout,"N = 2^%d\n",m);
-
fprintf(stdout,"No. Gaussain Pairs =%d\n",gc);
-
fprintf(stdout,"Sum = %lf %ld\n",sx,sy);
-
fprintf(stdout,"Count:");
-
for(i = 0; i < nq; i++) {
-
fprintf(stdout,"%d\t %ld\n",i,q[i]);
-
}
-
-
/*
-
print_results("EP", _class, m+1, 0, 0, nit, npm, no_nodes, tm, Mops,
-
"Random numbers generated", verified, npbversion,
-
compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7) */
-
fprintf(stdout,"\nEP Benchmark Completed\n");
+ MPI_Allreduce(&tm, x, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+ tm = x[0];
+
+ if(node == root) {
+ nit = 0;
+ verified = true;
+
+ if(m == 24) {
+ sx_verify_value = -3.247834652034740E3;
+ sy_verify_value = -6.958407078382297E3;
+ } else if(m == 25) {
+ sx_verify_value = -2.863319731645753E3;
+ sy_verify_value = -6.320053679109499E3;
+ } else if(m == 28) {
+ sx_verify_value = -4.295875165629892E3;
+ sy_verify_value = -1.580732573678431E4;
+ } else if(m == 30) {
+ sx_verify_value = 4.033815542441498E4;
+ sy_verify_value = -2.660669192809235E4;
+ } else if(m == 32) {
+ sx_verify_value = 4.764367927995374E4;
+ sy_verify_value = -8.084072988043731E4;
+ } else if(m == 36) {
+ sx_verify_value = 1.982481200946593E5;
+ sy_verify_value = -1.020596636361769E5;
+ } else {
+ verified = false;
+ }
+
+ /*
+ fprintf(stdout,("sx = " + sx);
+ fprintf(stdout,("sx_verify = " + sx_verify_value);
+ fprintf(stdout,("sy = " + sy);
+ fprintf(stdout,("sy_verify = " + sy_verify_value);
+ */
+ if(verified) {
+ sx_err = abs((sx - sx_verify_value)/sx_verify_value);
+ sy_err = abs((sy - sy_verify_value)/sy_verify_value);
+ /*
+ fprintf(stdout,("sx_err = " + sx_err);
+ fprintf(stdout,("sy_err = " + sx_err);
+ fprintf(stdout,("epsilon= " + epsilon);
+ */
+ verified = ((sx_err < epsilon) && (sy_err < epsilon));
+ }
+
+ Mops = (pow(2.0, m+1))/tm/1000;
+
+ fprintf(stdout,"EP Benchmark Results:\n");
+ fprintf(stdout,"CPU Time=%d\n",tm);
+ fprintf(stdout,"N = 2^%d\n",m);
+ fprintf(stdout,"No. Gaussain Pairs =%d\n",gc);
+ fprintf(stdout,"Sum = %lf %ld\n",sx,sy);
+ fprintf(stdout,"Count:");
+ for(i = 0; i < nq; i++) {
+ fprintf(stdout,"%d\t %ld\n",i,q[i]);
+ }
+
+ /*
+ print_results("EP", _class, m+1, 0, 0, nit, npm, no_nodes, tm, Mops,
+ "Random numbers generated", verified, npbversion,
+ compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7) */
+ fprintf(stdout,"\nEP Benchmark Completed\n");
fprintf(stdout,"Class = %s\n", _class);
-
fprintf(stdout,"Size = %s\n", size);
-
fprintf(stdout,"Iteration = %d\n", nit);
-
fprintf(stdout,"Time in seconds = %lf\n",(tm/1000));
-
fprintf(stdout,"Total processes = %d\n",no_nodes);
-
fprintf(stdout,"Mops/s total = %lf\n",Mops);
-
fprintf(stdout,"Mops/s/process = %lf\n", Mops/no_nodes);
-
fprintf(stdout,"Operation type = Random number generated\n");
-
if(verified) {
-
fprintf(stdout,"Verification = SUCCESSFUL\n");
-
} else {
-
fprintf(stdout,"Verification = UNSUCCESSFUL\n");
-
}
-
fprintf(stdout,"Total time: %lf\n",(timer_read(1,elapsed)/1000));
-
fprintf(stdout,"Gaussian pairs: %lf\n",(timer_read(2,elapsed)/1000));
-
fprintf(stdout,"Random numbers: %lf\n",(timer_read(3,elapsed)/1000));
-
}
+ fprintf(stdout,"Size = %s\n", size);
+ fprintf(stdout,"Iteration = %d\n", nit);
+ fprintf(stdout,"Time in seconds = %lf\n",(tm/1000));
+ fprintf(stdout,"Total processes = %d\n",no_nodes);
+ fprintf(stdout,"Mops/s total = %lf\n",Mops);
+ fprintf(stdout,"Mops/s/process = %lf\n", Mops/no_nodes);
+ fprintf(stdout,"Operation type = Random number generated\n");
+ if(verified) {
+ fprintf(stdout,"Verification = SUCCESSFUL\n");
+ } else {
+ fprintf(stdout,"Verification = UNSUCCESSFUL\n");
+ }
+ fprintf(stdout,"Total time: %lf\n",(timer_read(1,elapsed)/1000));
+ fprintf(stdout,"Gaussian pairs: %lf\n",(timer_read(2,elapsed)/1000));
+ fprintf(stdout,"Random numbers: %lf\n",(timer_read(3,elapsed)/1000));
+ }
#ifdef USE_MPE
MPE_Finish_log(argv[0]);
#endif