4 Modified 3/3/97 by David M. Doolin (dmd) doolin@cs.utk.edu
5 Fixed error in matgen() method. Added some comments.
7 Modified 1/22/97 by Paul McMahan mcmahan@cs.utk.edu
8 Added more MacOS options to form.
10 Optimized by Jonathan Hardwick (jch@cs.cmu.edu), 3/28/96
11 Compare to Linkpack.java.
12 Optimizations performed:
13 - added "final" modifier to performance-critical methods.
14 - changed lines of the form "a[i] = a[i] + x" to "a[i] += x".
15 - minimized array references using common subexpression elimination.
16 - eliminated unused variables.
17 - undid an unrolled loop.
18 - added temporary 1D arrays to hold frequently-used columns of 2D arrays.
19 - wrote my own abs() method
20 See http://www.cs.cmu.edu/~jch/java/linpack.html for more details.
23 Ported to Java by Reed Wade (wade@cs.utk.edu) 2/96
24 built using JDK 1.0 on solaris
25 using "javac -O Linpack.java"
28 Translated to C by Bonnie Toy 5/88
29 (modified on 2/25/94 to fix a problem with daxpy for
30 unequal increments or equal increments not equal to 1.
36 * Class allowing to retrieve the computing power of a
37 * computer; this power is represented by MFlops
39 * @author Bonnie Toy, Reed Wade
46 * Empty default constructor.
50 final double abs (double d) {
51 return (d >= 0) ? d : -d;
54 double second_orig = -1;
58 if (second_orig==-1) {
59 second_orig = System.currentTimeMillis();
61 return (System.currentTimeMillis() - second_orig)/1000;
65 * Estimate the computing power of the local computer
66 * and return as result the obtained MFlops.
67 * @return The MFlops of the computer
69 public double getMFlops()
71 double mflops_result = 0.0;
72 double residn_result = 0.0;
73 double time_result = 0.0;
74 double eps_result = 0.0;
76 double a[][] = new double[1000][1001];
77 double b[] = new double[1000];
78 double x[] = new double[1000];
79 double ops,total,norma,normx;
82 int ipvt[] = new int[1000];
84 //double mflops_result;
85 //double residn_result;
92 ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);
94 norma = matgen(a,lda,n,b);
97 dgesl(a,lda,n,ipvt,b,0);
98 total = second() - time;
100 for (i = 0; i < n; i++) {
103 norma = matgen(a,lda,n,b);
104 for (i = 0; i < n; i++) {
107 dmxpy(n,b,n,lda,x,a);
110 for (i = 0; i < n; i++) {
111 resid = (resid > abs(b[i])) ? resid : abs(b[i]);
112 normx = (normx > abs(x[i])) ? normx : abs(x[i]);
115 eps_result = epslon((double)1.0);
118 residn_result = resid/( n*norma*normx*eps_result );
120 mflops_result = ops/(1.0e6*total);
122 return ("Mflops/s: " + mflops_result +
123 " Time: " + time_result + " secs" +
124 " Norm Res: " + residn_result +
125 " Precision: " + eps_result);
127 residn_result = resid/( n*norma*normx*eps_result );
128 residn_result += 0.005; // for rounding
129 residn_result = (int)(residn_result*100);
130 residn_result /= 100;
133 time_result += 0.005; // for rounding
134 time_result = (int)(time_result*100);
137 mflops_result = ops/(1.0e6*total);
138 mflops_result += 0.0005; // for rounding
139 mflops_result = (int)(mflops_result*1000);
140 mflops_result /= 1000;
142 // System.out.println("Mflops/s: " + mflops_result +
143 // " Time: " + time_result + " secs" +
144 // " Norm Res: " + residn_result +
145 // " Precision: " + eps_result);
147 return mflops_result ;
152 final double matgen (double a[][], int lda, int n, double b[])
159 /* Next two for() statements switched. Solver wants
160 matrix in column order. --dmd 3/3/97
162 for (i = 0; i < n; i++) {
163 for (j = 0; j < n; j++) {
164 init = 3125*init % 65536;
165 a[j][i] = (init - 32768.0)/16384.0;
166 norma = (a[j][i] > norma) ? a[j][i] : norma;
169 for (i = 0; i < n; i++) {
172 for (j = 0; j < n; j++) {
173 for (i = 0; i < n; i++) {
184 dgefa factors a double precision matrix by gaussian elimination.
186 dgefa is usually called by dgeco, but it can be called
187 directly with a saving in time if rcond is not needed.
188 (time for dgeco) = (1 + 9/n)*(time for dgefa) .
192 a double precision[n][lda]
193 the matrix to be factored.
196 the leading dimension of the array a .
199 the order of the matrix a .
203 a an upper triangular matrix and the multipliers
204 which were used to obtain it.
205 the factorization can be written a = l*u where
206 l is a product of permutation and unit lower
207 triangular matrices and u is upper triangular.
210 an integer vector of pivot indices.
214 = k if u[k][k] .eq. 0.0 . this is not an error
215 condition for this subroutine, but it does
216 indicate that dgesl or dgedi will divide by zero
217 if called. use rcond in dgeco for a reliable
218 indication of singularity.
220 linpack. this version dated 08/14/78.
221 cleve moler, university of new mexico, argonne national lab.
225 blas daxpy,dscal,idamax
227 final int dgefa( double a[][], int lda, int n, int ipvt[])
229 double[] col_k, col_j;
234 // gaussian elimination with partial pivoting
239 for (k = 0; k < nm1; k++) {
243 // find l = pivot index
245 l = idamax(n-k,col_k,k,1) + k;
248 // zero pivot implies this column already triangularized
252 // interchange if necessary
260 // compute multipliers
263 dscal(n-(kp1),t,col_k,kp1,1);
265 // row elimination with column indexing
267 for (j = kp1; j < n; j++) {
274 daxpy(n-(kp1),t,col_k,kp1,1,
284 if (a[(n-1)][(n-1)] == 0) info = n-1;
292 dgesl solves the double precision system
293 a * x = b or trans(a) * x = b
294 using the factors computed by dgeco or dgefa.
298 a double precision[n][lda]
299 the output from dgeco or dgefa.
302 the leading dimension of the array a .
305 the order of the matrix a .
308 the pivot vector from dgeco or dgefa.
310 b double precision[n]
311 the right hand side vector.
314 = 0 to solve a*x = b ,
315 = nonzero to solve trans(a)*x = b where
316 trans(a) is the transpose.
320 b the solution vector x .
324 a division by zero will occur if the input factor contains a
325 zero on the diagonal. technically this indicates singularity
326 but it is often caused by improper arguments or improper
327 setting of lda . it will not occur if the subroutines are
328 called correctly and if dgeco has set rcond .gt. 0.0
329 or dgefa has set info .eq. 0 .
331 to compute inverse(a) * c where c is a matrix
333 dgeco(a,lda,n,ipvt,rcond,z)
334 if (!rcond is too small){
336 dgesl(a,lda,n,ipvt,c[j][0],0);
339 linpack. this version dated 08/14/78 .
340 cleve moler, university of new mexico, argonne national lab.
346 final void dgesl( double a[][], int lda, int n, int ipvt[], double b[], int job)
354 // job = 0 , solve a * x = b. first solve l*y = b
357 for (k = 0; k < nm1; k++) {
365 daxpy(n-(kp1),t,a[k],kp1,1,b,kp1,1);
371 for (kb = 0; kb < n; kb++) {
375 daxpy(k,t,a[k],0,1,b,0,1);
380 // job = nonzero, solve trans(a) * x = b. first solve trans(u)*y = b
382 for (k = 0; k < n; k++) {
383 t = ddot(k,a[k],0,1,b,0,1);
384 b[k] = (b[k] - t)/a[k][k];
387 // now solve trans(l)*x = y
390 for (kb = 1; kb < nm1; kb++) {
393 b[k] += ddot(n-(kp1),a[k],kp1,1,b,kp1,1);
408 constant times a vector plus a vector.
409 jack dongarra, linpack, 3/11/78.
411 final void daxpy( int n, double da, double dx[], int dx_off, int incx,
412 double dy[], int dy_off, int incy)
416 if ((n > 0) && (da != 0)) {
417 if (incx != 1 || incy != 1) {
419 // code for unequal increments or equal increments not equal to 1
423 if (incx < 0) ix = (-n+1)*incx;
424 if (incy < 0) iy = (-n+1)*incy;
425 for (i = 0;i < n; i++) {
426 dy[iy +dy_off] += da*dx[ix +dx_off];
433 // code for both increments equal to 1
435 for (i=0; i < n; i++)
436 dy[i +dy_off] += da*dx[i +dx_off];
444 forms the dot product of two vectors.
445 jack dongarra, linpack, 3/11/78.
447 final double ddot( int n, double dx[], int dx_off, int incx, double dy[],
448 int dy_off, int incy)
457 if (incx != 1 || incy != 1) {
459 // code for unequal increments or equal increments not equal to 1
463 if (incx < 0) ix = (-n+1)*incx;
464 if (incy < 0) iy = (-n+1)*incy;
465 for (i = 0;i < n; i++) {
466 dtemp += dx[ix +dx_off]*dy[iy +dy_off];
472 // code for both increments equal to 1
475 dtemp += dx[i +dx_off]*dy[i +dy_off];
484 scales a vector by a constant.
485 jack dongarra, linpack, 3/11/78.
487 final void dscal( int n, double da, double dx[], int dx_off, int incx)
494 // code for increment not equal to 1
497 for (i = 0; i < nincx; i += incx)
501 // code for increment equal to 1
503 for (i = 0; i < n; i++)
512 finds the index of element having max. absolute value.
513 jack dongarra, linpack, 3/11/78.
515 final int idamax( int n, double dx[], int dx_off, int incx)
524 } else if (incx != 1) {
526 // code for increment not equal to 1
528 dmax = abs(dx[0 +dx_off]);
530 for (i = 1; i < n; i++) {
531 dtemp = abs(dx[ix + dx_off]);
540 // code for increment equal to 1
543 dmax = abs(dx[0 +dx_off]);
544 for (i = 1; i < n; i++) {
545 dtemp = abs(dx[i + dx_off]);
558 estimate unit roundoff in quantities of size x.
560 this program should function properly on all systems
561 satisfying the following two assumptions,
562 1. the base used in representing dfloating point
563 numbers is not a power of three.
564 2. the quantity a in statement 10 is represented to
565 the accuracy used in dfloating point variables
566 that are stored in memory.
567 the statement number 10 and the go to 10 are intended to
568 force optimizing compilers to generate code satisfying
570 under these assumptions, it should be true that,
571 a is not exactly equal to four-thirds,
572 b has a zero for its last bit or digit,
573 c is not exactly equal to one,
574 eps measures the separation of 1.0 from
575 the next larger dfloating point number.
576 the developers of eispack would appreciate being informed
577 about any systems where these assumptions do not hold.
579 *****************************************************************
580 this routine is one of the auxiliary routines used by eispack iii
581 to avoid machine dependencies.
582 *****************************************************************
584 this version dated 4/6/83.
586 final double epslon (double x)
604 multiply matrix m times vector x and add the result to vector y.
608 n1 integer, number of elements in vector y, and number of rows in
611 y double [n1], vector of length n1 to which is added
614 n2 integer, number of elements in vector x, and number of columns
617 ldm integer, leading dimension of array m
619 x double [n2], vector of length n2
621 m double [ldm][n2], matrix of n1 rows and n2 columns
623 final void dmxpy ( int n1, double y[], int n2, int ldm, double x[], double m[][])
627 // cleanup odd vector
628 for (j = 0; j < n2; j++) {
629 for (i = 0; i < n1; i++) {
630 y[i] += x[j]*m[j][i];