4 Modified 3/3/97 by David M. Doolin (dmd) doolin@cs.utk.edu
5 Fixed error in matgen() method. Added some comments.
7 Modified 1/22/97 by Paul McMahan mcmahan@cs.utk.edu
8 Added more MacOS options to form.
10 Optimized by Jonathan Hardwick (jch@cs.cmu.edu), 3/28/96
11 Compare to Linkpack.java.
12 Optimizations performed:
13 - added "final" modifier to performance-critical methods.
14 - changed lines of the form "a[i] = a[i] + x" to "a[i] += x".
15 - minimized array references using common subexpression elimination.
16 - eliminated unused variables.
17 - undid an unrolled loop.
18 - added temporary 1D arrays to hold frequently-used columns of 2D arrays.
19 - wrote my own abs() method
20 See http://www.cs.cmu.edu/~jch/java/linpack.html for more details.
23 Ported to Java by Reed Wade (wade@cs.utk.edu) 2/96
24 built using JDK 1.0 on solaris
25 using "javac -O Linpack.java"
28 Translated to C by Bonnie Toy 5/88
29 (modified on 2/25/94 to fix a problem with daxpy for
30 unequal increments or equal increments not equal to 1.
36 public class Linpack {
38 final double abs (double d) {
39 return (d >= 0) ? d : -d;
42 double second_orig = -1;
46 if (second_orig==-1) {
47 second_orig = System.currentTimeMillis();
49 return (System.currentTimeMillis() - second_orig)/1000;
52 public double getMFlops()
54 double mflops_result = 0.0;
55 double residn_result = 0.0;
56 double time_result = 0.0;
57 double eps_result = 0.0;
59 double a[][] = new double[1000][1001];
60 double b[] = new double[1000];
61 double x[] = new double[1000];
62 double ops,total,norma,normx;
65 int ipvt[] = new int[1000];
67 //double mflops_result;
68 //double residn_result;
75 ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);
77 norma = matgen(a,lda,n,b);
80 dgesl(a,lda,n,ipvt,b,0);
81 total = second() - time;
83 for (i = 0; i < n; i++) {
86 norma = matgen(a,lda,n,b);
87 for (i = 0; i < n; i++) {
93 for (i = 0; i < n; i++) {
94 resid = (resid > abs(b[i])) ? resid : abs(b[i]);
95 normx = (normx > abs(x[i])) ? normx : abs(x[i]);
98 eps_result = epslon((double)1.0);
101 residn_result = resid/( n*norma*normx*eps_result );
103 mflops_result = ops/(1.0e6*total);
105 return ("Mflops/s: " + mflops_result +
106 " Time: " + time_result + " secs" +
107 " Norm Res: " + residn_result +
108 " Precision: " + eps_result);
110 residn_result = resid/( n*norma*normx*eps_result );
111 residn_result += 0.005; // for rounding
112 residn_result = (int)(residn_result*100);
113 residn_result /= 100;
116 time_result += 0.005; // for rounding
117 time_result = (int)(time_result*100);
120 mflops_result = ops/(1.0e6*total);
121 mflops_result += 0.0005; // for rounding
122 mflops_result = (int)(mflops_result*1000);
123 mflops_result /= 1000;
125 // System.out.println("Mflops/s: " + mflops_result +
126 // " Time: " + time_result + " secs" +
127 // " Norm Res: " + residn_result +
128 // " Precision: " + eps_result);
130 return mflops_result ;
135 final double matgen (double a[][], int lda, int n, double b[])
142 /* Next two for() statements switched. Solver wants
143 matrix in column order. --dmd 3/3/97
145 for (i = 0; i < n; i++) {
146 for (j = 0; j < n; j++) {
147 init = 3125*init % 65536;
148 a[j][i] = (init - 32768.0)/16384.0;
149 norma = (a[j][i] > norma) ? a[j][i] : norma;
152 for (i = 0; i < n; i++) {
155 for (j = 0; j < n; j++) {
156 for (i = 0; i < n; i++) {
167 dgefa factors a double precision matrix by gaussian elimination.
169 dgefa is usually called by dgeco, but it can be called
170 directly with a saving in time if rcond is not needed.
171 (time for dgeco) = (1 + 9/n)*(time for dgefa) .
175 a double precision[n][lda]
176 the matrix to be factored.
179 the leading dimension of the array a .
182 the order of the matrix a .
186 a an upper triangular matrix and the multipliers
187 which were used to obtain it.
188 the factorization can be written a = l*u where
189 l is a product of permutation and unit lower
190 triangular matrices and u is upper triangular.
193 an integer vector of pivot indices.
197 = k if u[k][k] .eq. 0.0 . this is not an error
198 condition for this subroutine, but it does
199 indicate that dgesl or dgedi will divide by zero
200 if called. use rcond in dgeco for a reliable
201 indication of singularity.
203 linpack. this version dated 08/14/78.
204 cleve moler, university of new mexico, argonne national lab.
208 blas daxpy,dscal,idamax
210 final int dgefa( double a[][], int lda, int n, int ipvt[])
212 double[] col_k, col_j;
217 // gaussian elimination with partial pivoting
222 for (k = 0; k < nm1; k++) {
226 // find l = pivot index
228 l = idamax(n-k,col_k,k,1) + k;
231 // zero pivot implies this column already triangularized
235 // interchange if necessary
243 // compute multipliers
246 dscal(n-(kp1),t,col_k,kp1,1);
248 // row elimination with column indexing
250 for (j = kp1; j < n; j++) {
257 daxpy(n-(kp1),t,col_k,kp1,1,
267 if (a[(n-1)][(n-1)] == 0) info = n-1;
275 dgesl solves the double precision system
276 a * x = b or trans(a) * x = b
277 using the factors computed by dgeco or dgefa.
281 a double precision[n][lda]
282 the output from dgeco or dgefa.
285 the leading dimension of the array a .
288 the order of the matrix a .
291 the pivot vector from dgeco or dgefa.
293 b double precision[n]
294 the right hand side vector.
297 = 0 to solve a*x = b ,
298 = nonzero to solve trans(a)*x = b where
299 trans(a) is the transpose.
303 b the solution vector x .
307 a division by zero will occur if the input factor contains a
308 zero on the diagonal. technically this indicates singularity
309 but it is often caused by improper arguments or improper
310 setting of lda . it will not occur if the subroutines are
311 called correctly and if dgeco has set rcond .gt. 0.0
312 or dgefa has set info .eq. 0 .
314 to compute inverse(a) * c where c is a matrix
316 dgeco(a,lda,n,ipvt,rcond,z)
317 if (!rcond is too small){
319 dgesl(a,lda,n,ipvt,c[j][0],0);
322 linpack. this version dated 08/14/78 .
323 cleve moler, university of new mexico, argonne national lab.
329 final void dgesl( double a[][], int lda, int n, int ipvt[], double b[], int job)
337 // job = 0 , solve a * x = b. first solve l*y = b
340 for (k = 0; k < nm1; k++) {
348 daxpy(n-(kp1),t,a[k],kp1,1,b,kp1,1);
354 for (kb = 0; kb < n; kb++) {
358 daxpy(k,t,a[k],0,1,b,0,1);
363 // job = nonzero, solve trans(a) * x = b. first solve trans(u)*y = b
365 for (k = 0; k < n; k++) {
366 t = ddot(k,a[k],0,1,b,0,1);
367 b[k] = (b[k] - t)/a[k][k];
370 // now solve trans(l)*x = y
373 for (kb = 1; kb < nm1; kb++) {
376 b[k] += ddot(n-(kp1),a[k],kp1,1,b,kp1,1);
391 constant times a vector plus a vector.
392 jack dongarra, linpack, 3/11/78.
394 final void daxpy( int n, double da, double dx[], int dx_off, int incx,
395 double dy[], int dy_off, int incy)
399 if ((n > 0) && (da != 0)) {
400 if (incx != 1 || incy != 1) {
402 // code for unequal increments or equal increments not equal to 1
406 if (incx < 0) ix = (-n+1)*incx;
407 if (incy < 0) iy = (-n+1)*incy;
408 for (i = 0;i < n; i++) {
409 dy[iy +dy_off] += da*dx[ix +dx_off];
416 // code for both increments equal to 1
418 for (i=0; i < n; i++)
419 dy[i +dy_off] += da*dx[i +dx_off];
427 forms the dot product of two vectors.
428 jack dongarra, linpack, 3/11/78.
430 final double ddot( int n, double dx[], int dx_off, int incx, double dy[],
431 int dy_off, int incy)
440 if (incx != 1 || incy != 1) {
442 // code for unequal increments or equal increments not equal to 1
446 if (incx < 0) ix = (-n+1)*incx;
447 if (incy < 0) iy = (-n+1)*incy;
448 for (i = 0;i < n; i++) {
449 dtemp += dx[ix +dx_off]*dy[iy +dy_off];
455 // code for both increments equal to 1
458 dtemp += dx[i +dx_off]*dy[i +dy_off];
467 scales a vector by a constant.
468 jack dongarra, linpack, 3/11/78.
470 final void dscal( int n, double da, double dx[], int dx_off, int incx)
477 // code for increment not equal to 1
480 for (i = 0; i < nincx; i += incx)
484 // code for increment equal to 1
486 for (i = 0; i < n; i++)
495 finds the index of element having max. absolute value.
496 jack dongarra, linpack, 3/11/78.
498 final int idamax( int n, double dx[], int dx_off, int incx)
507 } else if (incx != 1) {
509 // code for increment not equal to 1
511 dmax = abs(dx[0 +dx_off]);
513 for (i = 1; i < n; i++) {
514 dtemp = abs(dx[ix + dx_off]);
523 // code for increment equal to 1
526 dmax = abs(dx[0 +dx_off]);
527 for (i = 1; i < n; i++) {
528 dtemp = abs(dx[i + dx_off]);
541 estimate unit roundoff in quantities of size x.
543 this program should function properly on all systems
544 satisfying the following two assumptions,
545 1. the base used in representing dfloating point
546 numbers is not a power of three.
547 2. the quantity a in statement 10 is represented to
548 the accuracy used in dfloating point variables
549 that are stored in memory.
550 the statement number 10 and the go to 10 are intended to
551 force optimizing compilers to generate code satisfying
553 under these assumptions, it should be true that,
554 a is not exactly equal to four-thirds,
555 b has a zero for its last bit or digit,
556 c is not exactly equal to one,
557 eps measures the separation of 1.0 from
558 the next larger dfloating point number.
559 the developers of eispack would appreciate being informed
560 about any systems where these assumptions do not hold.
562 *****************************************************************
563 this routine is one of the auxiliary routines used by eispack iii
564 to avoid machine dependencies.
565 *****************************************************************
567 this version dated 4/6/83.
569 final double epslon (double x)
587 multiply matrix m times vector x and add the result to vector y.
591 n1 integer, number of elements in vector y, and number of rows in
594 y double [n1], vector of length n1 to which is added
597 n2 integer, number of elements in vector x, and number of columns
600 ldm integer, leading dimension of array m
602 x double [n2], vector of length n2
604 m double [ldm][n2], matrix of n1 rows and n2 columns
606 final void dmxpy ( int n1, double y[], int n2, int ldm, double x[], double m[][])
610 // cleanup odd vector
611 for (j = 0; j < n2; j++) {
612 for (i = 0; i < n1; i++) {
613 y[i] += x[j]*m[j][i];