X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/bc48db087894fd960073b3120cebf90e6b2f8c02..befbbbe1fbb31663a8f91e24ce12df271cf4ae79:/examples/smpi/NAS/ep.c diff --git a/examples/smpi/NAS/ep.c b/examples/smpi/NAS/ep.c index c56cf2043f..24257dc1b2 100644 --- a/examples/smpi/NAS/ep.c +++ b/examples/smpi/NAS/ep.c @@ -9,39 +9,28 @@ char class; int nprocs; -#define true 1 -#define false 0 +#define TRUE 1 +#define FALSE 0 int main(int argc, char **argv) { double dum[3] = {1.,1.,1.}; - double x1, x2, sx, sy, tm, an, tt, gc; + double x1, x2, sx, sy, tm, an, gc; + XBT_ATTRIB_UNUSED double tt; double Mops; double epsilon=1.0E-8, a = 1220703125., s=271828183.; double t1, t2, t3, t4; double sx_verify_value, sy_verify_value, sx_err, sy_err; - int m, mk=16, - mm, nn, - nk = (int)(pow(2,mk)), - nq=10, - np, - node, - no_nodes, - i, - ik, - kk, - l, - k, nit, no_large_nodes, - np_add, k_offset; - int root=0; - int verified; - char size[500]; // mind the size of the string to represent a big number - - double *start = (double *) malloc (64*sizeof(double)); - double *elapsed = (double *) malloc (64*sizeof(double)); - - double *x = (double *) malloc (2*nk*sizeof(double)); - double *q = (double *) malloc (nq*sizeof(double)); + int m; + int mk=16; + int nk = (int)(pow(2,mk)), + nq=10, + np, node, no_nodes, i, ik, kk, l, k, nit, no_large_nodes, np_add, k_offset; + int verified; + char size[500]; // mind the size of the string to represent a big number + + double *x = (double *) SMPI_SHARED_MALLOC (2*nk*sizeof(double)); + double *q = (double *) SMPI_SHARED_MALLOC (nq*sizeof(double)); MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &no_nodes); @@ -52,42 +41,48 @@ int main(int argc, char **argv) { get_info(argc, argv, &nprocs, &class); check_info(EP, nprocs, class); - if (class == 'S') { m = 24; } - else if (class == 'W') { m = 25; } - else if (class == 'A') { m = 28; } - else if (class == 'B') { m = 30; } - else if (class == 'C') { m = 32; } - else if (class == 'D') { m = 36; } - else if (class == 'E') { m = 40; } + if (class == 'S') + { m = 24; } + else if (class == 'W') + { m = 25; } + else if (class == 'A') + { m = 28; } + else if (class == 'B') + { m = 30; } + else if (class == 'C') + { m = 32; } + else if (class == 'D') + { m = 36; } + else if (class == 'E') + { m = 40; } else { printf("EP: Internal error: invalid class type %c\n", class); exit(1); } - mm = m -mk; - nn = (int)(pow(2,mm)), - root = 0; + int mm = m -mk; + int nn = (int)(pow(2,mm)); + + int root = 0; if (node == root ) { /* Because the size of the problem is too large to store in a 32-bit integer for some classes, we put it into a * string (for printing). Have to strip off the decimal point put in there by the floating point print statement - * (internal file) - */ + * (internal file) */ fprintf(stdout," NAS Parallel Benchmarks 3.2 -- EP Benchmark"); - sprintf(size,"%zu",(unsigned long)pow(2,m+1)); - //size = size.replace('.', ' '); + snprintf(size,500,"%lu",(unsigned long)pow(2,m+1)); fprintf(stdout," Number of random numbers generated: %s\n",size); fprintf(stdout," Number of active processes: %d\n",no_nodes); } - verified = false; + verified = FALSE; /* Compute the number of "batches" of random number pairs generated per processor. Adjust if the number of processors - * does not evenly divide the total number - */ - + * does not evenly divide the total number */ np = nn / no_nodes; no_large_nodes = nn % no_nodes; - if (node < no_large_nodes) np_add = 1; - else np_add = 0; + if (node < no_large_nodes) + np_add = 1; + else + np_add = 0; np = np + np_add; if (np == 0) { @@ -98,9 +93,6 @@ int main(int argc, char **argv) { /* Call the random number generator functions and initialize the x-array to reduce the effects of paging the timings. Also, call all mathematical functions that are used. Make sure initializations cannot be eliminated as dead code. */ - - //call vranlc(0, dum[1], dum[2], dum[3]); - // Array indexes start at 1 in Fortran, 0 in Java vranlc(0, dum[0], dum[1], &(dum[2])); dum[0] = randlc(&(dum[1]),&(dum[2])); @@ -109,32 +101,28 @@ int main(int argc, char **argv) { } Mops = log(sqrt(abs(1))); - /* Synchronize before placing time stamp */ + /* Synchronize before placing time stamp */ MPI_Barrier( MPI_COMM_WORLD ); TRACE_smpi_set_category ("ep"); - time_clear(&(elapsed[1])); - time_clear(&(elapsed[2])); - time_clear(&(elapsed[3])); - time_start(&(start[1])); + timer_clear(1); + timer_clear(2); + timer_clear(3); + timer_start(1); t1 = a; - //fprintf(stdout,("(ep.f:160) t1 = " + t1); t1 = vranlc(0, t1, a, x); - //fprintf(stdout,("(ep.f:161) t1 = " + t1); /* Compute AN = A ^ (2 * NK) (mod 2^46). */ t1 = a; - //fprintf(stdout,("(ep.f:165) t1 = " + t1); for (i=1; i <= mk+1; i++) { t2 = randlc(&t1, &t1); - //fprintf(stdout,("(ep.f:168)[loop i=" + i +"] t1 = " + t1); } an = t1; - //fprintf(stdout,("(ep.f:172) s = " + s); tt = s; - gc = tt = 0.; + gc = 0; + tt = 0.; sx = 0.; sy = 0.; for (i=0; i < nq ; i++) { @@ -143,30 +131,26 @@ int main(int argc, char **argv) { /* Each instance of this loop may be performed independently. We compute the k offsets separately to take into account * the fact that some nodes have more numbers to generate than others */ - if (np_add == 1) k_offset = node * np -1; else k_offset = no_large_nodes*(np+1) + (node-no_large_nodes)*np -1; - int stop = false; - for(k = 1; k <= np; k++) {// SMPI_SAMPLE_LOCAL(0.25 * np, 0.03) { - stop = false; + int stop = FALSE; + for(k = 1; k <= np; k++) { SMPI_SAMPLE_GLOBAL(0.25 * np, 0.03) { + stop = FALSE; kk = k_offset + k ; t1 = s; - //fprintf(stdout,("(ep.f:193) t1 = " + t1); t2 = an; // Find starting seed t1 for this kk. for (i=1;i<=100 && !stop;i++) { ik = kk / 2; - //fprintf(stdout,("(ep.f:199) ik = " +ik+", kk = " + kk); if (2 * ik != kk) { t3 = randlc(&t1, &t2); - //fprintf(stdout,("(ep.f:200) t1= " +t1 ); } if (ik==0) - stop = true; + stop = TRUE; else { t3 = randlc(&t2, &t2); kk = ik; @@ -174,32 +158,16 @@ int main(int argc, char **argv) { } // Compute uniform pseudorandom numbers. - //if (timers_enabled) timer_start(3); - time_start(&(start[3])); - //call vranlc(2 * nk, t1, a, x) --> t1 and y are modified + timer_start(3); - //fprintf(stdout,">>>>>>>>>>>Before vranlc(l.210)<<<<<<<<<<<<<"); - //fprintf(stdout,"2*nk = " + (2*nk)); - //fprintf(stdout,"t1 = " + t1); - //fprintf(stdout,"a = " + a); - //fprintf(stdout,"x[0] = " + x[0]); - //fprintf(stdout,">>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<"); t1 = vranlc(2 * nk, t1, a, x); - //fprintf(stdout,(">>>>>>>>>>>After Enter vranlc (l.210)<<<<<<"); - //fprintf(stdout,("2*nk = " + (2*nk)); - //fprintf(stdout,("t1 = " + t1); - //fprintf(stdout,("a = " + a); - //fprintf(stdout,("x[0] = " + x[0]); - //fprintf(stdout,(">>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<"); - - //if (timers_enabled) timer_stop(3); - time_stop(3,elapsed,start); + timer_stop(3); /* Compute Gaussian deviates by acceptance-rejection method and tally counts in concentric square annuli. * This loop is not vectorizable. */ - //if (timers_enabled) timer_start(2); - time_start(&(start[2])); + timer_start(2); + for(i=1; i<=nk;i++) { x1 = 2. * x[2*i-2] -1.0; x2 = 2. * x[2*i-1] - 1.0; @@ -208,34 +176,19 @@ int main(int argc, char **argv) { t2 = sqrt(-2. * log(t1) / t1); t3 = (x1 * t2); t4 = (x2 * t2); - l = (int)(abs(t3) > abs(t4) ? abs(t3) : abs(t4)); + l = (int)(fabs(t3) > fabs(t4) ? fabs(t3) : fabs(t4)); q[l] = q[l] + 1.; sx = sx + t3; sy = sy + t4; } - /* - if(i == 1) { - fprintf(stdout,"x1 = " + x1); - fprintf(stdout,"x2 = " + x2); - fprintf(stdout,"t1 = " + t1); - fprintf(stdout,"t2 = " + t2); - fprintf(stdout,"t3 = " + t3); - fprintf(stdout,"t4 = " + t4); - fprintf(stdout,"l = " + l); - fprintf(stdout,"q[l] = " + q[l]); - fprintf(stdout,"sx = " + sx); - fprintf(stdout,"sy = " + sy); - } - */ } - //if (timers_enabled) timer_stop(2); - time_stop(2,elapsed,start); - } + timer_stop(2); + } } TRACE_smpi_set_category ("finalize"); MPI_Allreduce(&sx, x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - sx = x[0]; //FIXME : x[0] or x[1] => x[0] because fortran starts with 1 + sx = x[0]; MPI_Allreduce(&sy, x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); sy = x[0]; MPI_Allreduce(q, x, nq, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); @@ -247,14 +200,16 @@ int main(int argc, char **argv) { gc += q[i]; } - time_stop(1,elapsed,start); - tm = time_read(1,elapsed); + timer_stop(1); + tm = timer_read(1); MPI_Allreduce(&tm, x, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); tm = x[0]; + SMPI_SHARED_FREE(x); + if(node == root) { nit = 0; - verified = true; + verified = TRUE; if(m == 24) { sx_verify_value = -3.247834652034740E3; @@ -275,23 +230,12 @@ int main(int argc, char **argv) { sx_verify_value = 1.982481200946593E5; sy_verify_value = -1.020596636361769E5; } else { - verified = false; + verified = FALSE; } - /* - fprintf(stdout,("sx = " + sx); - fprintf(stdout,("sx_verify = " + sx_verify_value); - fprintf(stdout,("sy = " + sy); - fprintf(stdout,("sy_verify = " + sy_verify_value); - */ if(verified) { - sx_err = abs((sx - sx_verify_value)/sx_verify_value); - sy_err = abs((sy - sy_verify_value)/sy_verify_value); - /* - fprintf(stdout,("sx_err = " + sx_err); - fprintf(stdout,("sy_err = " + sx_err); - fprintf(stdout,("epsilon= " + epsilon); - */ + sx_err = fabs((sx - sx_verify_value)/sx_verify_value); + sy_err = fabs((sy - sy_verify_value)/sy_verify_value); verified = ((sx_err < epsilon) && (sy_err < epsilon)); } @@ -308,11 +252,13 @@ int main(int argc, char **argv) { } c_print_results("EP", class, m+1, 0, 0, nit, nprocs, no_nodes, tm, Mops, "Random number generated",verified); - fprintf(stdout,"Total time: %f\n",(time_read(1,elapsed)/1000)); - fprintf(stdout,"Gaussian pairs: %f\n",(time_read(2,elapsed)/1000)); - fprintf(stdout,"Random numbers: %f\n",(time_read(3,elapsed)/1000)); + fprintf(stdout,"Total time: %f\n",(timer_read(1)/1000)); + fprintf(stdout,"Gaussian pairs: %f\n",(timer_read(2)/1000)); + fprintf(stdout,"Random numbers: %f\n",(timer_read(3)/1000)); } + SMPI_SHARED_FREE(q); + MPI_Finalize(); return 0; }