4 Modified 3/3/97 by David M. Doolin (dmd) doolin@cs.utk.edu
5 Fixed error in matgen() method. Added some comments.
7 Modified 1/22/97 by Paul McMahan mcmahan@cs.utk.edu
8 Added more MacOS options to form.
10 Optimized by Jonathan Hardwick (jch@cs.cmu.edu), 3/28/96
11 Compare to Linkpack.java.
12 Optimizations performed:
13 - added "final" modifier to performance-critical methods.
14 - changed lines of the form "a[i] = a[i] + x" to "a[i] += x".
15 - minimized array references using common subexpression elimination.
16 - eliminated unused variables.
17 - undid an unrolled loop.
18 - added temporary 1D arrays to hold frequently-used columns of 2D arrays.
19 - wrote my own abs() method
20 See http://www.cs.cmu.edu/~jch/java/linpack.html for more details.
23 Ported to Java by Reed Wade (wade@cs.utk.edu) 2/96
24 built using JDK 1.0 on solaris
25 using "javac -O Linpack.java"
28 Translated to C by Bonnie Toy 5/88
29 (modified on 2/25/94 to fix a problem with daxpy for
30 unequal increments or equal increments not equal to 1.
36 * Class allowing to retrieve the computing power of a
37 * computer; this power is represented by MFlops
39 * @author Bonnie Toy, Reed Wade
46 * Empty default constructor.
50 final double abs (double d) {
51 return (d >= 0) ? d : -d;
54 double second_orig = -1;
58 if (second_orig==-1) {
59 second_orig = System.currentTimeMillis();
61 return (System.currentTimeMillis() - second_orig)/1000;
65 * Estimate the computing power of the local computer
66 * and return as result the obtained MFlops.
67 * @return The MFlops of the computer
69 public double getMFlops()
71 double mflops_result = 0.0;
72 double residn_result = 0.0;
73 double time_result = 0.0;
74 double eps_result = 0.0;
76 double a[][] = new double[1000][1001];
77 double b[] = new double[1000];
78 double x[] = new double[1000];
79 double ops,total,norma,normx;
82 int ipvt[] = new int[1000];
87 ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);
89 norma = matgen(a,lda,n,b);
92 dgesl(a,lda,n,ipvt,b,0);
93 total = second() - time;
95 for (i = 0; i < n; i++) {
98 norma = matgen(a,lda,n,b);
99 for (i = 0; i < n; i++) {
102 dmxpy(n,b,n,lda,x,a);
105 for (i = 0; i < n; i++) {
106 resid = (resid > abs(b[i])) ? resid : abs(b[i]);
107 normx = (normx > abs(x[i])) ? normx : abs(x[i]);
110 eps_result = epslon((double)1.0);
112 return ("Mflops/s: " + mflops_result +
113 " Time: " + time_result + " secs" +
114 " Norm Res: " + residn_result +
115 " Precision: " + eps_result);
117 residn_result = resid/( n*norma*normx*eps_result );
118 residn_result += 0.005; // for rounding
119 residn_result = (int)(residn_result*100);
120 residn_result /= 100;
123 time_result += 0.005; // for rounding
124 time_result = (int)(time_result*100);
127 mflops_result = ops/(1.0e6*total);
128 mflops_result += 0.0005; // for rounding
129 mflops_result = (int)(mflops_result*1000);
130 mflops_result /= 1000;
132 return mflops_result ;
137 final double matgen (double a[][], int lda, int n, double b[])
144 /* Next two for() statements switched. Solver wants
145 matrix in column order. --dmd 3/3/97
147 for (i = 0; i < n; i++) {
148 for (j = 0; j < n; j++) {
149 init = 3125*init % 65536;
150 a[j][i] = (init - 32768.0)/16384.0;
151 norma = (a[j][i] > norma) ? a[j][i] : norma;
154 for (i = 0; i < n; i++) {
157 for (j = 0; j < n; j++) {
158 for (i = 0; i < n; i++) {
169 dgefa factors a double precision matrix by gaussian elimination.
171 dgefa is usually called by dgeco, but it can be called
172 directly with a saving in time if rcond is not needed.
173 (time for dgeco) = (1 + 9/n)*(time for dgefa) .
177 a double precision[n][lda]
178 the matrix to be factored.
181 the leading dimension of the array a .
184 the order of the matrix a .
188 a an upper triangular matrix and the multipliers
189 which were used to obtain it.
190 the factorization can be written a = l*u where
191 l is a product of permutation and unit lower
192 triangular matrices and u is upper triangular.
195 an integer vector of pivot indices.
199 = k if u[k][k] .eq. 0.0 . this is not an error
200 condition for this subroutine, but it does
201 indicate that dgesl or dgedi will divide by zero
202 if called. use rcond in dgeco for a reliable
203 indication of singularity.
205 linpack. this version dated 08/14/78.
206 cleve moler, university of new mexico, argonne national lab.
210 blas daxpy,dscal,idamax
212 final int dgefa( double a[][], int lda, int n, int ipvt[])
214 double[] col_k, col_j;
219 // gaussian elimination with partial pivoting
224 for (k = 0; k < nm1; k++) {
228 // find l = pivot index
230 l = idamax(n-k,col_k,k,1) + k;
233 // zero pivot implies this column already triangularized
237 // interchange if necessary
245 // compute multipliers
248 dscal(n-(kp1),t,col_k,kp1,1);
250 // row elimination with column indexing
252 for (j = kp1; j < n; j++) {
259 daxpy(n-(kp1),t,col_k,kp1,1,
269 if (a[(n-1)][(n-1)] == 0) info = n-1;
277 dgesl solves the double precision system
278 a * x = b or trans(a) * x = b
279 using the factors computed by dgeco or dgefa.
283 a double precision[n][lda]
284 the output from dgeco or dgefa.
287 the leading dimension of the array a .
290 the order of the matrix a .
293 the pivot vector from dgeco or dgefa.
295 b double precision[n]
296 the right hand side vector.
299 = 0 to solve a*x = b ,
300 = nonzero to solve trans(a)*x = b where
301 trans(a) is the transpose.
305 b the solution vector x .
309 a division by zero will occur if the input factor contains a
310 zero on the diagonal. technically this indicates singularity
311 but it is often caused by improper arguments or improper
312 setting of lda . it will not occur if the subroutines are
313 called correctly and if dgeco has set rcond .gt. 0.0
314 or dgefa has set info .eq. 0 .
316 to compute inverse(a) * c where c is a matrix
318 dgeco(a,lda,n,ipvt,rcond,z)
319 if (!rcond is too small){
321 dgesl(a,lda,n,ipvt,c[j][0],0);
324 linpack. this version dated 08/14/78 .
325 cleve moler, university of new mexico, argonne national lab.
331 final void dgesl( double a[][], int lda, int n, int ipvt[], double b[], int job)
339 // job = 0 , solve a * x = b. first solve l*y = b
342 for (k = 0; k < nm1; k++) {
350 daxpy(n-(kp1),t,a[k],kp1,1,b,kp1,1);
356 for (kb = 0; kb < n; kb++) {
360 daxpy(k,t,a[k],0,1,b,0,1);
365 // job = nonzero, solve trans(a) * x = b. first solve trans(u)*y = b
367 for (k = 0; k < n; k++) {
368 t = ddot(k,a[k],0,1,b,0,1);
369 b[k] = (b[k] - t)/a[k][k];
372 // now solve trans(l)*x = y
375 for (kb = 1; kb < nm1; kb++) {
378 b[k] += ddot(n-(kp1),a[k],kp1,1,b,kp1,1);
393 constant times a vector plus a vector.
394 jack dongarra, linpack, 3/11/78.
396 final void daxpy( int n, double da, double dx[], int dx_off, int incx,
397 double dy[], int dy_off, int incy)
401 if ((n > 0) && (da != 0)) {
402 if (incx != 1 || incy != 1) {
404 // code for unequal increments or equal increments not equal to 1
408 if (incx < 0) ix = (-n+1)*incx;
409 if (incy < 0) iy = (-n+1)*incy;
410 for (i = 0;i < n; i++) {
411 dy[iy +dy_off] += da*dx[ix +dx_off];
418 // code for both increments equal to 1
420 for (i=0; i < n; i++)
421 dy[i +dy_off] += da*dx[i +dx_off];
429 forms the dot product of two vectors.
430 jack dongarra, linpack, 3/11/78.
432 final double ddot( int n, double dx[], int dx_off, int incx, double dy[],
433 int dy_off, int incy)
442 if (incx != 1 || incy != 1) {
444 // code for unequal increments or equal increments not equal to 1
448 if (incx < 0) ix = (-n+1)*incx;
449 if (incy < 0) iy = (-n+1)*incy;
450 for (i = 0;i < n; i++) {
451 dtemp += dx[ix +dx_off]*dy[iy +dy_off];
457 // code for both increments equal to 1
460 dtemp += dx[i +dx_off]*dy[i +dy_off];
469 scales a vector by a constant.
470 jack dongarra, linpack, 3/11/78.
472 final void dscal( int n, double da, double dx[], int dx_off, int incx)
479 // code for increment not equal to 1
482 for (i = 0; i < nincx; i += incx)
486 // code for increment equal to 1
488 for (i = 0; i < n; i++)
497 finds the index of element having max. absolute value.
498 jack dongarra, linpack, 3/11/78.
500 final int idamax( int n, double dx[], int dx_off, int incx)
509 } else if (incx != 1) {
511 // code for increment not equal to 1
513 dmax = abs(dx[0 +dx_off]);
515 for (i = 1; i < n; i++) {
516 dtemp = abs(dx[ix + dx_off]);
525 // code for increment equal to 1
528 dmax = abs(dx[0 +dx_off]);
529 for (i = 1; i < n; i++) {
530 dtemp = abs(dx[i + dx_off]);
543 estimate unit roundoff in quantities of size x.
545 this program should function properly on all systems
546 satisfying the following two assumptions,
547 1. the base used in representing dfloating point
548 numbers is not a power of three.
549 2. the quantity a in statement 10 is represented to
550 the accuracy used in dfloating point variables
551 that are stored in memory.
552 the statement number 10 and the go to 10 are intended to
553 force optimizing compilers to generate code satisfying
555 under these assumptions, it should be true that,
556 a is not exactly equal to four-thirds,
557 b has a zero for its last bit or digit,
558 c is not exactly equal to one,
559 eps measures the separation of 1.0 from
560 the next larger dfloating point number.
561 the developers of eispack would appreciate being informed
562 about any systems where these assumptions do not hold.
564 *****************************************************************
565 this routine is one of the auxiliary routines used by eispack iii
566 to avoid machine dependencies.
567 *****************************************************************
569 this version dated 4/6/83.
571 final double epslon (double x)
589 multiply matrix m times vector x and add the result to vector y.
593 n1 integer, number of elements in vector y, and number of rows in
596 y double [n1], vector of length n1 to which is added
599 n2 integer, number of elements in vector x, and number of columns
602 ldm integer, leading dimension of array m
604 x double [n2], vector of length n2
606 m double [ldm][n2], matrix of n1 rows and n2 columns
608 final void dmxpy ( int n1, double y[], int n2, int ldm, double x[], double m[][])
612 // cleanup odd vector
613 for (j = 0; j < n2; j++) {
614 for (i = 0; i < n1; i++) {
615 y[i] += x[j]*m[j][i];