src/and/Mapping/Linpack.java

   1 package and.Mapping;
   2 /*
   3
   4 Modified 3/3/97 by David M. Doolin (dmd) doolin@cs.utk.edu
   5 Fixed error in matgen() method. Added some comments.
   6
   7 Modified 1/22/97 by Paul McMahan mcmahan@cs.utk.edu
   8 Added more MacOS options to form.
   9
  10 Optimized by Jonathan Hardwick (jch@cs.cmu.edu), 3/28/96
  11 Compare to Linkpack.java.
  12 Optimizations performed:
  13  - added "final" modifier to performance-critical methods.
  14  - changed lines of the form "a[i] = a[i] + x" to "a[i] += x".
  15  - minimized array references using common subexpression elimination.
  16  - eliminated unused variables.
  17  - undid an unrolled loop.
  18  - added temporary 1D arrays to hold frequently-used columns of 2D arrays.
  19  - wrote my own abs() method
  20 See http://www.cs.cmu.edu/~jch/java/linpack.html for more details.
  21
  22
  23 Ported to Java by Reed Wade  (wade@cs.utk.edu) 2/96
  24 built using JDK 1.0 on solaris
  25 using "javac -O Linpack.java"
  26
  27
  28 Translated to C by Bonnie Toy 5/88
  29   (modified on 2/25/94  to fix a problem with daxpy  for
  30    unequal increments or equal increments not equal to 1.
  31      Jack Dongarra)
  32
  33 */
  34
  35 /**
  36  * Class allowing to retrieve the computing power of a
  37  * computer; this power is represented by MFlops
  38  *
  39  * @author Bonnie Toy, Reed Wade
  40  */
  41
  42 public class Linpack
  43 {
  44
  45         /**
  46          * Empty default constructor.
  47          */
  48         public Linpack(){}
  49
  50         final double abs (double d) {
  51                 return (d >= 0) ? d : -d;
  52         }
  53
  54         double second_orig = -1;
  55
  56         double second()
  57         {
  58                 if (second_orig==-1) {
  59                         second_orig = System.currentTimeMillis();
  60                 }
  61                 return (System.currentTimeMillis() - second_orig)/1000;
  62         }
  63
  64         /**
  65          * Estimate the computing power of the local computer
  66          * and return as result the obtained MFlops.
  67          * @return The MFlops of the computer
  68          */
  69         public double getMFlops()
  70         {
  71                 double mflops_result = 0.0;
  72                 double residn_result = 0.0;
  73                 double time_result = 0.0;
  74                 double eps_result = 0.0;
  75
  76                 double a[][] = new double[1000][1001];
  77                 double b[] = new double[1000];
  78                 double x[] = new double[1000];
  79                 double ops,total,norma,normx;
  80                 double resid,time;
  81                 int n,i,lda;
  82                 int ipvt[] = new int[1000];
  83
  84                 //double mflops_result;
  85                 //double residn_result;
  86                 //double time_result;
  87                 //double eps_result;
  88
  89                 lda = 1001;
  90                 n = 500;
  91
  92                 ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);
  93
  94                 norma = matgen(a,lda,n,b);
  95                 time = second();
  96                 dgefa(a,lda,n,ipvt);
  97                 dgesl(a,lda,n,ipvt,b,0);
  98                 total = second() - time;
  99
 100                 for (i = 0; i < n; i++) {
 101                         x[i] = b[i];
 102                 }
 103                 norma = matgen(a,lda,n,b);
 104                 for (i = 0; i < n; i++) {
 105                         b[i] = -b[i];
 106                 }
 107                 dmxpy(n,b,n,lda,x,a);
 108                 resid = 0.0;
 109                 normx = 0.0;
 110                 for (i = 0; i < n; i++) {
 111                         resid = (resid > abs(b[i])) ? resid : abs(b[i]);
 112                         normx = (normx > abs(x[i])) ? normx : abs(x[i]);
 113                 }
 114
 115                 eps_result = epslon((double)1.0);
 116                 /*
 117
 118     residn_result = resid/( n*norma*normx*eps_result );
 119     time_result = total;
 120     mflops_result = ops/(1.0e6*total);
 121
 122     return ("Mflops/s: " + mflops_result +
 123             "  Time: " + time_result + " secs" +
 124             "  Norm Res: " + residn_result +
 125             "  Precision: " + eps_result);
 126                  */
 127                 residn_result = resid/( n*norma*normx*eps_result );
 128                 residn_result += 0.005; // for rounding
 129                 residn_result = (int)(residn_result*100);
 130                 residn_result /= 100;
 131
 132                 time_result = total;
 133                 time_result += 0.005; // for rounding
 134                 time_result = (int)(time_result*100);
 135                 time_result /= 100;
 136
 137                 mflops_result = ops/(1.0e6*total);
 138                 mflops_result += 0.0005; // for rounding
 139                 mflops_result = (int)(mflops_result*1000);
 140                 mflops_result /= 1000;
 141
 142                 //    System.out.println("Mflops/s: " + mflops_result +
 143                 //          "  Time: " + time_result + " secs" +
 144                 //          "  Norm Res: " + residn_result +
 145                 //          "  Precision: " + eps_result);
 146
 147                 return mflops_result ;
 148         }
 149
 150
 151
 152         final double matgen (double a[][], int lda, int n, double b[])
 153         {
 154                 double norma;
 155                 int init, i, j;
 156
 157                 init = 1325;
 158                 norma = 0.0;
 159                 /*  Next two for() statements switched.  Solver wants
 160 matrix in column order. --dmd 3/3/97
 161                  */
 162                 for (i = 0; i < n; i++) {
 163                         for (j = 0; j < n; j++) {
 164                                 init = 3125*init % 65536;
 165                                 a[j][i] = (init - 32768.0)/16384.0;
 166                                 norma = (a[j][i] > norma) ? a[j][i] : norma;
 167                         }
 168                 }
 169                 for (i = 0; i < n; i++) {
 170                         b[i] = 0.0;
 171                 }
 172                 for (j = 0; j < n; j++) {
 173                         for (i = 0; i < n; i++) {
 174                                 b[i] += a[j][i];
 175                         }
 176                 }
 177
 178                 return norma;
 179         }
 180
 181
 182
 183         /*
 184     dgefa factors a double precision matrix by gaussian elimination.
 185
 186     dgefa is usually called by dgeco, but it can be called
 187     directly with a saving in time if  rcond  is not needed.
 188     (time for dgeco) = (1 + 9/n)*(time for dgefa) .
 189
 190     on entry
 191
 192     a       double precision[n][lda]
 193     the matrix to be factored.
 194
 195     lda     integer
 196     the leading dimension of the array  a .
 197
 198     n       integer
 199     the order of the matrix  a .
 200
 201     on return
 202
 203     a       an upper triangular matrix and the multipliers
 204     which were used to obtain it.
 205     the factorization can be written  a = l*u  where
 206     l  is a product of permutation and unit lower
 207     triangular matrices and  u  is upper triangular.
 208
 209     ipvt    integer[n]
 210     an integer vector of pivot indices.
 211
 212     info    integer
 213     = 0  normal value.
 214     = k  if  u[k][k] .eq. 0.0 .  this is not an error
 215     condition for this subroutine, but it does
 216     indicate that dgesl or dgedi will divide by zero
 217     if called.  use  rcond  in dgeco for a reliable
 218     indication of singularity.
 219
 220     linpack. this version dated 08/14/78.
 221     cleve moler, university of new mexico, argonne national lab.
 222
 223     functions
 224
 225     blas daxpy,dscal,idamax
 226          */
 227         final int dgefa( double a[][], int lda, int n, int ipvt[])
 228         {
 229                 double[] col_k, col_j;
 230                 double t;
 231                 int j,k,kp1,l,nm1;
 232                 int info;
 233
 234                 // gaussian elimination with partial pivoting
 235
 236                 info = 0;
 237                 nm1 = n - 1;
 238                 if (nm1 >=  0) {
 239                         for (k = 0; k < nm1; k++) {
 240                                 col_k = a[k];
 241                                 kp1 = k + 1;
 242
 243                                 // find l = pivot index
 244
 245                                 l = idamax(n-k,col_k,k,1) + k;
 246                                 ipvt[k] = l;
 247
 248                                 // zero pivot implies this column already triangularized
 249
 250                                 if (col_k[l] != 0) {
 251
 252                                         // interchange if necessary
 253
 254                                         if (l != k) {
 255                                                 t = col_k[l];
 256                                                 col_k[l] = col_k[k];
 257                                                 col_k[k] = t;
 258                                         }
 259
 260                                         // compute multipliers
 261
 262                                         t = -1.0/col_k[k];
 263                                         dscal(n-(kp1),t,col_k,kp1,1);
 264
 265                                         // row elimination with column indexing
 266
 267                                         for (j = kp1; j < n; j++) {
 268                                                 col_j = a[j];
 269                                                 t = col_j[l];
 270                                                 if (l != k) {
 271                                                         col_j[l] = col_j[k];
 272                                                         col_j[k] = t;
 273                                                 }
 274                                                 daxpy(n-(kp1),t,col_k,kp1,1,
 275                                                                 col_j,kp1,1);
 276                                         }
 277                                 }
 278                                 else {
 279                                         info = k;
 280                                 }
 281                         }
 282                 }
 283                 ipvt[n-1] = n-1;
 284                 if (a[(n-1)][(n-1)] == 0) info = n-1;
 285
 286                 return info;
 287         }
 288
 289
 290
 291         /*
 292     dgesl solves the double precision system
 293     a * x = b  or  trans(a) * x = b
 294     using the factors computed by dgeco or dgefa.
 295
 296     on entry
 297
 298     a       double precision[n][lda]
 299     the output from dgeco or dgefa.
 300
 301     lda     integer
 302     the leading dimension of the array  a .
 303
 304     n       integer
 305     the order of the matrix  a .
 306
 307     ipvt    integer[n]
 308     the pivot vector from dgeco or dgefa.
 309
 310     b       double precision[n]
 311     the right hand side vector.
 312
 313     job     integer
 314     = 0         to solve  a*x = b ,
 315     = nonzero   to solve  trans(a)*x = b  where
 316     trans(a)  is the transpose.
 317
 318     on return
 319
 320     b       the solution vector  x .
 321
 322     error condition
 323
 324     a division by zero will occur if the input factor contains a
 325     zero on the diagonal.  technically this indicates singularity
 326     but it is often caused by improper arguments or improper
 327     setting of lda .  it will not occur if the subroutines are
 328     called correctly and if dgeco has set rcond .gt. 0.0
 329     or dgefa has set info .eq. 0 .
 330
 331     to compute  inverse(a) * c  where  c  is a matrix
 332     with  p  columns
 333     dgeco(a,lda,n,ipvt,rcond,z)
 334     if (!rcond is too small){
 335     for (j=0,j<p,j++)
 336     dgesl(a,lda,n,ipvt,c[j][0],0);
 337     }
 338
 339     linpack. this version dated 08/14/78 .
 340     cleve moler, university of new mexico, argonne national lab.
 341
 342     functions
 343
 344     blas daxpy,ddot
 345          */
 346         final void dgesl( double a[][], int lda, int n, int ipvt[], double b[], int job)
 347         {
 348                 double t;
 349                 int k,kb,l,nm1,kp1;
 350
 351                 nm1 = n - 1;
 352                 if (job == 0) {
 353
 354                         // job = 0 , solve  a * x = b.  first solve  l*y = b
 355
 356                         if (nm1 >= 1) {
 357                                 for (k = 0; k < nm1; k++) {
 358                                         l = ipvt[k];
 359                                         t = b[l];
 360                                         if (l != k){
 361                                                 b[l] = b[k];
 362                                                 b[k] = t;
 363                                         }
 364                                         kp1 = k + 1;
 365                                         daxpy(n-(kp1),t,a[k],kp1,1,b,kp1,1);
 366                                 }
 367                         }
 368
 369                         // now solve  u*x = y
 370
 371                         for (kb = 0; kb < n; kb++) {
 372                                 k = n - (kb + 1);
 373                                 b[k] /= a[k][k];
 374                                 t = -b[k];
 375                                 daxpy(k,t,a[k],0,1,b,0,1);
 376                         }
 377                 }
 378                 else {
 379
 380                         // job = nonzero, solve  trans(a) * x = b.  first solve  trans(u)*y = b
 381
 382                         for (k = 0; k < n; k++) {
 383                                 t = ddot(k,a[k],0,1,b,0,1);
 384                                 b[k] = (b[k] - t)/a[k][k];
 385                         }
 386
 387                         // now solve trans(l)*x = y
 388
 389                         if (nm1 >= 1) {
 390                                 for (kb = 1; kb < nm1; kb++) {
 391                                         k = n - (kb+1);
 392                                         kp1 = k + 1;
 393                                         b[k] += ddot(n-(kp1),a[k],kp1,1,b,kp1,1);
 394                                         l = ipvt[k];
 395                                         if (l != k) {
 396                                                 t = b[l];
 397                                                 b[l] = b[k];
 398                                                 b[k] = t;
 399                                         }
 400                                 }
 401                         }
 402                 }
 403         }
 404
 405
 406
 407         /*
 408     constant times a vector plus a vector.
 409     jack dongarra, linpack, 3/11/78.
 410          */
 411         final void daxpy( int n, double da, double dx[], int dx_off, int incx,
 412                         double dy[], int dy_off, int incy)
 413         {
 414                 int i,ix,iy;
 415
 416                 if ((n > 0) && (da != 0)) {
 417                         if (incx != 1 || incy != 1) {
 418
 419                                 // code for unequal increments or equal increments not equal to 1
 420
 421                                 ix = 0;
 422                                 iy = 0;
 423                                 if (incx < 0) ix = (-n+1)*incx;
 424                                 if (incy < 0) iy = (-n+1)*incy;
 425                                 for (i = 0;i < n; i++) {
 426                                         dy[iy +dy_off] += da*dx[ix +dx_off];
 427                                         ix += incx;
 428                                         iy += incy;
 429                                 }
 430                                 return;
 431                         } else {
 432
 433                                 // code for both increments equal to 1
 434
 435                                 for (i=0; i < n; i++)
 436                                         dy[i +dy_off] += da*dx[i +dx_off];
 437                         }
 438                 }
 439         }
 440
 441
 442
 443         /*
 444     forms the dot product of two vectors.
 445     jack dongarra, linpack, 3/11/78.
 446          */
 447         final double ddot( int n, double dx[], int dx_off, int incx, double dy[],
 448                         int dy_off, int incy)
 449         {
 450                 double dtemp;
 451                 int i,ix,iy;
 452
 453                 dtemp = 0;
 454
 455                 if (n > 0) {
 456
 457                         if (incx != 1 || incy != 1) {
 458
 459                                 // code for unequal increments or equal increments not equal to 1
 460
 461                                 ix = 0;
 462                                 iy = 0;
 463                                 if (incx < 0) ix = (-n+1)*incx;
 464                                 if (incy < 0) iy = (-n+1)*incy;
 465                                 for (i = 0;i < n; i++) {
 466                                         dtemp += dx[ix +dx_off]*dy[iy +dy_off];
 467                                         ix += incx;
 468                                         iy += incy;
 469                                 }
 470                         } else {
 471
 472                                 // code for both increments equal to 1
 473
 474                                 for (i=0;i < n; i++)
 475                                         dtemp += dx[i +dx_off]*dy[i +dy_off];
 476                         }
 477                 }
 478                 return(dtemp);
 479         }
 480
 481
 482
 483         /*
 484     scales a vector by a constant.
 485     jack dongarra, linpack, 3/11/78.
 486          */
 487         final void dscal( int n, double da, double dx[], int dx_off, int incx)
 488         {
 489                 int i,nincx;
 490
 491                 if (n > 0) {
 492                         if (incx != 1) {
 493
 494                                 // code for increment not equal to 1
 495
 496                                 nincx = n*incx;
 497                                 for (i = 0; i < nincx; i += incx)
 498                                         dx[i +dx_off] *= da;
 499                         } else {
 500
 501                                 // code for increment equal to 1
 502
 503                                 for (i = 0; i < n; i++)
 504                                         dx[i +dx_off] *= da;
 505                         }
 506                 }
 507         }
 508
 509
 510
 511         /*
 512     finds the index of element having max. absolute value.
 513     jack dongarra, linpack, 3/11/78.
 514          */
 515         final int idamax( int n, double dx[], int dx_off, int incx)
 516         {
 517                 double dmax, dtemp;
 518                 int i, ix, itemp=0;
 519
 520                 if (n < 1) {
 521                         itemp = -1;
 522                 } else if (n ==1) {
 523                         itemp = 0;
 524                 } else if (incx != 1) {
 525
 526                         // code for increment not equal to 1
 527
 528                         dmax = abs(dx[0 +dx_off]);
 529                         ix = 1 + incx;
 530                         for (i = 1; i < n; i++) {
 531                                 dtemp = abs(dx[ix + dx_off]);
 532                                 if (dtemp > dmax)  {
 533                                         itemp = i;
 534                                         dmax = dtemp;
 535                                 }
 536                                 ix += incx;
 537                         }
 538                 } else {
 539
 540                         // code for increment equal to 1
 541
 542                         itemp = 0;
 543                         dmax = abs(dx[0 +dx_off]);
 544                         for (i = 1; i < n; i++) {
 545                                 dtemp = abs(dx[i + dx_off]);
 546                                 if (dtemp > dmax) {
 547                                         itemp = i;
 548                                         dmax = dtemp;
 549                                 }
 550                         }
 551                 }
 552                 return (itemp);
 553         }
 554
 555
 556
 557         /*
 558     estimate unit roundoff in quantities of size x.
 559
 560     this program should function properly on all systems
 561     satisfying the following two assumptions,
 562     1.  the base used in representing dfloating point
 563     numbers is not a power of three.
 564     2.  the quantity  a  in statement 10 is represented to
 565     the accuracy used in dfloating point variables
 566     that are stored in memory.
 567     the statement number 10 and the go to 10 are intended to
 568     force optimizing compilers to generate code satisfying
 569     assumption 2.
 570     under these assumptions, it should be true that,
 571     a  is not exactly equal to four-thirds,
 572     b  has a zero for its last bit or digit,
 573     c  is not exactly equal to one,
 574     eps  measures the separation of 1.0 from
 575     the next larger dfloating point number.
 576     the developers of eispack would appreciate being informed
 577     about any systems where these assumptions do not hold.
 578
 579          *****************************************************************
 580     this routine is one of the auxiliary routines used by eispack iii
 581     to avoid machine dependencies.
 582          *****************************************************************
 583
 584     this version dated 4/6/83.
 585          */
 586         final double epslon (double x)
 587         {
 588                 double a,b,c,eps;
 589
 590                 a = 4.0e0/3.0e0;
 591                 eps = 0;
 592                 while (eps == 0) {
 593                         b = a - 1.0;
 594                         c = b + b + b;
 595                         eps = abs(c-1.0);
 596                 }
 597                 return(eps*abs(x));
 598         }
 599
 600
 601
 602         /*
 603     purpose:
 604     multiply matrix m times vector x and add the result to vector y.
 605
 606     parameters:
 607
 608     n1 integer, number of elements in vector y, and number of rows in
 609     matrix m
 610
 611     y double [n1], vector of length n1 to which is added
 612     the product m*x
 613
 614     n2 integer, number of elements in vector x, and number of columns
 615     in matrix m
 616
 617     ldm integer, leading dimension of array m
 618
 619     x double [n2], vector of length n2
 620
 621     m double [ldm][n2], matrix of n1 rows and n2 columns
 622          */
 623         final void dmxpy ( int n1, double y[], int n2, int ldm, double x[], double m[][])
 624         {
 625                 int j,i;
 626
 627                 // cleanup odd vector
 628                 for (j = 0; j < n2; j++) {
 629                         for (i = 0; i < n1; i++) {
 630                                 y[i] += x[j]*m[j][i];
 631                         }
 632                 }
 633         }
 634
 635 }