src/and/Mapping/Linpack.java

   1 package and.Mapping;
   2 /*
   3
   4 Modified 3/3/97 by David M. Doolin (dmd) doolin@cs.utk.edu
   5 Fixed error in matgen() method. Added some comments.
   6
   7 Modified 1/22/97 by Paul McMahan mcmahan@cs.utk.edu
   8 Added more MacOS options to form.
   9
  10 Optimized by Jonathan Hardwick (jch@cs.cmu.edu), 3/28/96
  11 Compare to Linkpack.java.
  12 Optimizations performed:
  13  - added "final" modifier to performance-critical methods.
  14  - changed lines of the form "a[i] = a[i] + x" to "a[i] += x".
  15  - minimized array references using common subexpression elimination.
  16  - eliminated unused variables.
  17  - undid an unrolled loop.
  18  - added temporary 1D arrays to hold frequently-used columns of 2D arrays.
  19  - wrote my own abs() method
  20 See http://www.cs.cmu.edu/~jch/java/linpack.html for more details.
  21
  22
  23 Ported to Java by Reed Wade  (wade@cs.utk.edu) 2/96
  24 built using JDK 1.0 on solaris
  25 using "javac -O Linpack.java"
  26
  27
  28 Translated to C by Bonnie Toy 5/88
  29   (modified on 2/25/94  to fix a problem with daxpy  for
  30    unequal increments or equal increments not equal to 1.
  31      Jack Dongarra)
  32
  33 */
  34
  35
  36 public class Linpack {
  37
  38   final double abs (double d) {
  39     return (d >= 0) ? d : -d;
  40   }
  41
  42   double second_orig = -1;
  43
  44   double second()
  45   {
  46     if (second_orig==-1) {
  47       second_orig = System.currentTimeMillis();
  48     }
  49     return (System.currentTimeMillis() - second_orig)/1000;
  50   }
  51
  52   public double getMFlops()
  53   {
  54     double mflops_result = 0.0;
  55     double residn_result = 0.0;
  56     double time_result = 0.0;
  57     double eps_result = 0.0;
  58
  59     double a[][] = new double[1000][1001];
  60     double b[] = new double[1000];
  61     double x[] = new double[1000];
  62     double ops,total,norma,normx;
  63     double resid,time;
  64     int n,i,lda;
  65     int ipvt[] = new int[1000];
  66
  67     //double mflops_result;
  68     //double residn_result;
  69     //double time_result;
  70     //double eps_result;
  71
  72     lda = 1001;
  73     n = 500;
  74
  75     ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);
  76
  77     norma = matgen(a,lda,n,b);
  78     time = second();
  79     dgefa(a,lda,n,ipvt);
  80     dgesl(a,lda,n,ipvt,b,0);
  81     total = second() - time;
  82
  83     for (i = 0; i < n; i++) {
  84       x[i] = b[i];
  85     }
  86     norma = matgen(a,lda,n,b);
  87     for (i = 0; i < n; i++) {
  88       b[i] = -b[i];
  89     }
  90     dmxpy(n,b,n,lda,x,a);
  91     resid = 0.0;
  92     normx = 0.0;
  93     for (i = 0; i < n; i++) {
  94       resid = (resid > abs(b[i])) ? resid : abs(b[i]);
  95       normx = (normx > abs(x[i])) ? normx : abs(x[i]);
  96     }
  97
  98     eps_result = epslon((double)1.0);
  99 /*
 100
 101     residn_result = resid/( n*norma*normx*eps_result );
 102     time_result = total;
 103     mflops_result = ops/(1.0e6*total);
 104
 105     return ("Mflops/s: " + mflops_result +
 106             "  Time: " + time_result + " secs" +
 107             "  Norm Res: " + residn_result +
 108             "  Precision: " + eps_result);
 109 */
 110         residn_result = resid/( n*norma*normx*eps_result );
 111         residn_result += 0.005; // for rounding
 112         residn_result = (int)(residn_result*100);
 113         residn_result /= 100;
 114
 115         time_result = total;
 116         time_result += 0.005; // for rounding
 117         time_result = (int)(time_result*100);
 118         time_result /= 100;
 119
 120         mflops_result = ops/(1.0e6*total);
 121         mflops_result += 0.0005; // for rounding
 122         mflops_result = (int)(mflops_result*1000);
 123         mflops_result /= 1000;
 124
 125 //    System.out.println("Mflops/s: " + mflops_result +
 126 //          "  Time: " + time_result + " secs" +
 127 //          "  Norm Res: " + residn_result +
 128 //          "  Precision: " + eps_result);
 129
 130         return mflops_result ;
 131   }
 132
 133
 134
 135   final double matgen (double a[][], int lda, int n, double b[])
 136   {
 137     double norma;
 138     int init, i, j;
 139
 140     init = 1325;
 141     norma = 0.0;
 142 /*  Next two for() statements switched.  Solver wants
 143 matrix in column order. --dmd 3/3/97
 144 */
 145       for (i = 0; i < n; i++) {
 146     for (j = 0; j < n; j++) {
 147         init = 3125*init % 65536;
 148         a[j][i] = (init - 32768.0)/16384.0;
 149         norma = (a[j][i] > norma) ? a[j][i] : norma;
 150       }
 151     }
 152     for (i = 0; i < n; i++) {
 153       b[i] = 0.0;
 154     }
 155     for (j = 0; j < n; j++) {
 156       for (i = 0; i < n; i++) {
 157         b[i] += a[j][i];
 158       }
 159     }
 160
 161     return norma;
 162   }
 163
 164
 165
 166   /*
 167     dgefa factors a double precision matrix by gaussian elimination.
 168
 169     dgefa is usually called by dgeco, but it can be called
 170     directly with a saving in time if  rcond  is not needed.
 171     (time for dgeco) = (1 + 9/n)*(time for dgefa) .
 172
 173     on entry
 174
 175     a       double precision[n][lda]
 176     the matrix to be factored.
 177
 178     lda     integer
 179     the leading dimension of the array  a .
 180
 181     n       integer
 182     the order of the matrix  a .
 183
 184     on return
 185
 186     a       an upper triangular matrix and the multipliers
 187     which were used to obtain it.
 188     the factorization can be written  a = l*u  where
 189     l  is a product of permutation and unit lower
 190     triangular matrices and  u  is upper triangular.
 191
 192     ipvt    integer[n]
 193     an integer vector of pivot indices.
 194
 195     info    integer
 196     = 0  normal value.
 197     = k  if  u[k][k] .eq. 0.0 .  this is not an error
 198     condition for this subroutine, but it does
 199     indicate that dgesl or dgedi will divide by zero
 200     if called.  use  rcond  in dgeco for a reliable
 201     indication of singularity.
 202
 203     linpack. this version dated 08/14/78.
 204     cleve moler, university of new mexico, argonne national lab.
 205
 206     functions
 207
 208     blas daxpy,dscal,idamax
 209   */
 210   final int dgefa( double a[][], int lda, int n, int ipvt[])
 211   {
 212     double[] col_k, col_j;
 213     double t;
 214     int j,k,kp1,l,nm1;
 215     int info;
 216
 217     // gaussian elimination with partial pivoting
 218
 219     info = 0;
 220     nm1 = n - 1;
 221     if (nm1 >=  0) {
 222       for (k = 0; k < nm1; k++) {
 223         col_k = a[k];
 224         kp1 = k + 1;
 225
 226         // find l = pivot index
 227
 228         l = idamax(n-k,col_k,k,1) + k;
 229         ipvt[k] = l;
 230
 231         // zero pivot implies this column already triangularized
 232
 233         if (col_k[l] != 0) {
 234
 235           // interchange if necessary
 236
 237           if (l != k) {
 238             t = col_k[l];
 239             col_k[l] = col_k[k];
 240             col_k[k] = t;
 241           }
 242
 243           // compute multipliers
 244
 245           t = -1.0/col_k[k];
 246           dscal(n-(kp1),t,col_k,kp1,1);
 247
 248           // row elimination with column indexing
 249
 250           for (j = kp1; j < n; j++) {
 251             col_j = a[j];
 252             t = col_j[l];
 253             if (l != k) {
 254               col_j[l] = col_j[k];
 255               col_j[k] = t;
 256             }
 257             daxpy(n-(kp1),t,col_k,kp1,1,
 258                   col_j,kp1,1);
 259           }
 260         }
 261         else {
 262           info = k;
 263         }
 264       }
 265     }
 266     ipvt[n-1] = n-1;
 267     if (a[(n-1)][(n-1)] == 0) info = n-1;
 268
 269     return info;
 270   }
 271
 272
 273
 274   /*
 275     dgesl solves the double precision system
 276     a * x = b  or  trans(a) * x = b
 277     using the factors computed by dgeco or dgefa.
 278
 279     on entry
 280
 281     a       double precision[n][lda]
 282     the output from dgeco or dgefa.
 283
 284     lda     integer
 285     the leading dimension of the array  a .
 286
 287     n       integer
 288     the order of the matrix  a .
 289
 290     ipvt    integer[n]
 291     the pivot vector from dgeco or dgefa.
 292
 293     b       double precision[n]
 294     the right hand side vector.
 295
 296     job     integer
 297     = 0         to solve  a*x = b ,
 298     = nonzero   to solve  trans(a)*x = b  where
 299     trans(a)  is the transpose.
 300
 301     on return
 302
 303     b       the solution vector  x .
 304
 305     error condition
 306
 307     a division by zero will occur if the input factor contains a
 308     zero on the diagonal.  technically this indicates singularity
 309     but it is often caused by improper arguments or improper
 310     setting of lda .  it will not occur if the subroutines are
 311     called correctly and if dgeco has set rcond .gt. 0.0
 312     or dgefa has set info .eq. 0 .
 313
 314     to compute  inverse(a) * c  where  c  is a matrix
 315     with  p  columns
 316     dgeco(a,lda,n,ipvt,rcond,z)
 317     if (!rcond is too small){
 318     for (j=0,j<p,j++)
 319     dgesl(a,lda,n,ipvt,c[j][0],0);
 320     }
 321
 322     linpack. this version dated 08/14/78 .
 323     cleve moler, university of new mexico, argonne national lab.
 324
 325     functions
 326
 327     blas daxpy,ddot
 328   */
 329   final void dgesl( double a[][], int lda, int n, int ipvt[], double b[], int job)
 330   {
 331     double t;
 332     int k,kb,l,nm1,kp1;
 333
 334     nm1 = n - 1;
 335     if (job == 0) {
 336
 337       // job = 0 , solve  a * x = b.  first solve  l*y = b
 338
 339       if (nm1 >= 1) {
 340         for (k = 0; k < nm1; k++) {
 341           l = ipvt[k];
 342           t = b[l];
 343           if (l != k){
 344             b[l] = b[k];
 345             b[k] = t;
 346           }
 347           kp1 = k + 1;
 348           daxpy(n-(kp1),t,a[k],kp1,1,b,kp1,1);
 349         }
 350       }
 351
 352       // now solve  u*x = y
 353
 354       for (kb = 0; kb < n; kb++) {
 355         k = n - (kb + 1);
 356         b[k] /= a[k][k];
 357         t = -b[k];
 358         daxpy(k,t,a[k],0,1,b,0,1);
 359       }
 360     }
 361     else {
 362
 363       // job = nonzero, solve  trans(a) * x = b.  first solve  trans(u)*y = b
 364
 365       for (k = 0; k < n; k++) {
 366         t = ddot(k,a[k],0,1,b,0,1);
 367         b[k] = (b[k] - t)/a[k][k];
 368       }
 369
 370       // now solve trans(l)*x = y
 371
 372       if (nm1 >= 1) {
 373         for (kb = 1; kb < nm1; kb++) {
 374           k = n - (kb+1);
 375           kp1 = k + 1;
 376           b[k] += ddot(n-(kp1),a[k],kp1,1,b,kp1,1);
 377           l = ipvt[k];
 378           if (l != k) {
 379             t = b[l];
 380             b[l] = b[k];
 381             b[k] = t;
 382           }
 383         }
 384       }
 385     }
 386   }
 387
 388
 389
 390   /*
 391     constant times a vector plus a vector.
 392     jack dongarra, linpack, 3/11/78.
 393   */
 394   final void daxpy( int n, double da, double dx[], int dx_off, int incx,
 395               double dy[], int dy_off, int incy)
 396   {
 397     int i,ix,iy;
 398
 399     if ((n > 0) && (da != 0)) {
 400       if (incx != 1 || incy != 1) {
 401
 402         // code for unequal increments or equal increments not equal to 1
 403
 404         ix = 0;
 405         iy = 0;
 406         if (incx < 0) ix = (-n+1)*incx;
 407         if (incy < 0) iy = (-n+1)*incy;
 408         for (i = 0;i < n; i++) {
 409           dy[iy +dy_off] += da*dx[ix +dx_off];
 410           ix += incx;
 411           iy += incy;
 412         }
 413         return;
 414       } else {
 415
 416         // code for both increments equal to 1
 417
 418         for (i=0; i < n; i++)
 419           dy[i +dy_off] += da*dx[i +dx_off];
 420       }
 421     }
 422   }
 423
 424
 425
 426   /*
 427     forms the dot product of two vectors.
 428     jack dongarra, linpack, 3/11/78.
 429   */
 430   final double ddot( int n, double dx[], int dx_off, int incx, double dy[],
 431                int dy_off, int incy)
 432   {
 433     double dtemp;
 434     int i,ix,iy;
 435
 436     dtemp = 0;
 437
 438     if (n > 0) {
 439
 440       if (incx != 1 || incy != 1) {
 441
 442         // code for unequal increments or equal increments not equal to 1
 443
 444         ix = 0;
 445         iy = 0;
 446         if (incx < 0) ix = (-n+1)*incx;
 447         if (incy < 0) iy = (-n+1)*incy;
 448         for (i = 0;i < n; i++) {
 449           dtemp += dx[ix +dx_off]*dy[iy +dy_off];
 450           ix += incx;
 451           iy += incy;
 452         }
 453       } else {
 454
 455         // code for both increments equal to 1
 456
 457         for (i=0;i < n; i++)
 458           dtemp += dx[i +dx_off]*dy[i +dy_off];
 459       }
 460     }
 461     return(dtemp);
 462   }
 463
 464
 465
 466   /*
 467     scales a vector by a constant.
 468     jack dongarra, linpack, 3/11/78.
 469   */
 470   final void dscal( int n, double da, double dx[], int dx_off, int incx)
 471   {
 472     int i,nincx;
 473
 474     if (n > 0) {
 475       if (incx != 1) {
 476
 477         // code for increment not equal to 1
 478
 479         nincx = n*incx;
 480         for (i = 0; i < nincx; i += incx)
 481           dx[i +dx_off] *= da;
 482       } else {
 483
 484         // code for increment equal to 1
 485
 486         for (i = 0; i < n; i++)
 487           dx[i +dx_off] *= da;
 488       }
 489     }
 490   }
 491
 492
 493
 494   /*
 495     finds the index of element having max. absolute value.
 496     jack dongarra, linpack, 3/11/78.
 497   */
 498   final int idamax( int n, double dx[], int dx_off, int incx)
 499   {
 500     double dmax, dtemp;
 501     int i, ix, itemp=0;
 502
 503     if (n < 1) {
 504       itemp = -1;
 505     } else if (n ==1) {
 506       itemp = 0;
 507     } else if (incx != 1) {
 508
 509       // code for increment not equal to 1
 510
 511       dmax = abs(dx[0 +dx_off]);
 512       ix = 1 + incx;
 513       for (i = 1; i < n; i++) {
 514         dtemp = abs(dx[ix + dx_off]);
 515         if (dtemp > dmax)  {
 516           itemp = i;
 517           dmax = dtemp;
 518         }
 519         ix += incx;
 520       }
 521     } else {
 522
 523       // code for increment equal to 1
 524
 525       itemp = 0;
 526       dmax = abs(dx[0 +dx_off]);
 527       for (i = 1; i < n; i++) {
 528         dtemp = abs(dx[i + dx_off]);
 529         if (dtemp > dmax) {
 530           itemp = i;
 531           dmax = dtemp;
 532         }
 533       }
 534     }
 535     return (itemp);
 536   }
 537
 538
 539
 540   /*
 541     estimate unit roundoff in quantities of size x.
 542
 543     this program should function properly on all systems
 544     satisfying the following two assumptions,
 545     1.  the base used in representing dfloating point
 546     numbers is not a power of three.
 547     2.  the quantity  a  in statement 10 is represented to
 548     the accuracy used in dfloating point variables
 549     that are stored in memory.
 550     the statement number 10 and the go to 10 are intended to
 551     force optimizing compilers to generate code satisfying
 552     assumption 2.
 553     under these assumptions, it should be true that,
 554     a  is not exactly equal to four-thirds,
 555     b  has a zero for its last bit or digit,
 556     c  is not exactly equal to one,
 557     eps  measures the separation of 1.0 from
 558     the next larger dfloating point number.
 559     the developers of eispack would appreciate being informed
 560     about any systems where these assumptions do not hold.
 561
 562     *****************************************************************
 563     this routine is one of the auxiliary routines used by eispack iii
 564     to avoid machine dependencies.
 565     *****************************************************************
 566
 567     this version dated 4/6/83.
 568   */
 569   final double epslon (double x)
 570   {
 571     double a,b,c,eps;
 572
 573     a = 4.0e0/3.0e0;
 574     eps = 0;
 575     while (eps == 0) {
 576       b = a - 1.0;
 577       c = b + b + b;
 578       eps = abs(c-1.0);
 579     }
 580     return(eps*abs(x));
 581   }
 582
 583
 584
 585   /*
 586     purpose:
 587     multiply matrix m times vector x and add the result to vector y.
 588
 589     parameters:
 590
 591     n1 integer, number of elements in vector y, and number of rows in
 592     matrix m
 593
 594     y double [n1], vector of length n1 to which is added
 595     the product m*x
 596
 597     n2 integer, number of elements in vector x, and number of columns
 598     in matrix m
 599
 600     ldm integer, leading dimension of array m
 601
 602     x double [n2], vector of length n2
 603
 604     m double [ldm][n2], matrix of n1 rows and n2 columns
 605   */
 606   final void dmxpy ( int n1, double y[], int n2, int ldm, double x[], double m[][])
 607   {
 608     int j,i;
 609
 610     // cleanup odd vector
 611     for (j = 0; j < n2; j++) {
 612       for (i = 0; i < n1; i++) {
 613         y[i] += x[j]*m[j][i];
 614       }
 615     }
 616   }
 617
 618 }