src/and/Mapping/Linpack.java

   1 package and.Mapping;
   2 /*
   3
   4 Modified 3/3/97 by David M. Doolin (dmd) doolin@cs.utk.edu
   5 Fixed error in matgen() method. Added some comments.
   6
   7 Modified 1/22/97 by Paul McMahan mcmahan@cs.utk.edu
   8 Added more MacOS options to form.
   9
  10 Optimized by Jonathan Hardwick (jch@cs.cmu.edu), 3/28/96
  11 Compare to Linkpack.java.
  12 Optimizations performed:
  13  - added "final" modifier to performance-critical methods.
  14  - changed lines of the form "a[i] = a[i] + x" to "a[i] += x".
  15  - minimized array references using common subexpression elimination.
  16  - eliminated unused variables.
  17  - undid an unrolled loop.
  18  - added temporary 1D arrays to hold frequently-used columns of 2D arrays.
  19  - wrote my own abs() method
  20 See http://www.cs.cmu.edu/~jch/java/linpack.html for more details.
  21
  22
  23 Ported to Java by Reed Wade  (wade@cs.utk.edu) 2/96
  24 built using JDK 1.0 on solaris
  25 using "javac -O Linpack.java"
  26
  27
  28 Translated to C by Bonnie Toy 5/88
  29   (modified on 2/25/94  to fix a problem with daxpy  for
  30    unequal increments or equal increments not equal to 1.
  31      Jack Dongarra)
  32
  33 */
  34
  35 /**
  36  * Class allowing to retrieve the computing power of a
  37  * computer; this power is represented by MFlops
  38  *
  39  * @author Bonnie Toy, Reed Wade
  40  */
  41
  42 public class Linpack
  43 {
  44
  45         /**
  46          * Empty default constructor.
  47          */
  48         public Linpack(){}
  49
  50         final double abs (double d) {
  51                 return (d >= 0) ? d : -d;
  52         }
  53
  54         double second_orig = -1;
  55
  56         double second()
  57         {
  58                 if (second_orig==-1) {
  59                         second_orig = System.currentTimeMillis();
  60                 }
  61                 return (System.currentTimeMillis() - second_orig)/1000;
  62         }
  63
  64         /**
  65          * Estimate the computing power of the local computer
  66          * and return as result the obtained MFlops.
  67          * @return The MFlops of the computer
  68          */
  69         public double getMFlops()
  70         {
  71                 double mflops_result = 0.0;
  72                 double residn_result = 0.0;
  73                 double time_result = 0.0;
  74                 double eps_result = 0.0;
  75
  76                 double a[][] = new double[1000][1001];
  77                 double b[] = new double[1000];
  78                 double x[] = new double[1000];
  79                 double ops,total,norma,normx;
  80                 double resid,time;
  81                 int n,i,lda;
  82                 int ipvt[] = new int[1000];
  83
  84                 lda = 1001;
  85                 n = 500;
  86
  87                 ops = (2.0e0*(n*n*n))/3.0 + 2.0*(n*n);
  88
  89                 norma = matgen(a,lda,n,b);
  90                 time = second();
  91                 dgefa(a,lda,n,ipvt);
  92                 dgesl(a,lda,n,ipvt,b,0);
  93                 total = second() - time;
  94
  95                 for (i = 0; i < n; i++) {
  96                         x[i] = b[i];
  97                 }
  98                 norma = matgen(a,lda,n,b);
  99                 for (i = 0; i < n; i++) {
 100                         b[i] = -b[i];
 101                 }
 102                 dmxpy(n,b,n,lda,x,a);
 103                 resid = 0.0;
 104                 normx = 0.0;
 105                 for (i = 0; i < n; i++) {
 106                         resid = (resid > abs(b[i])) ? resid : abs(b[i]);
 107                         normx = (normx > abs(x[i])) ? normx : abs(x[i]);
 108                 }
 109
 110                 eps_result = epslon((double)1.0);
 111                 /*
 112            return ("Mflops/s: " + mflops_result +
 113                "  Time: " + time_result + " secs" +
 114                "  Norm Res: " + residn_result +
 115                "  Precision: " + eps_result);
 116                  */
 117                 residn_result = resid/( n*norma*normx*eps_result );
 118                 residn_result += 0.005; // for rounding
 119                 residn_result = (int)(residn_result*100);
 120                 residn_result /= 100;
 121
 122                 time_result = total;
 123                 time_result += 0.005; // for rounding
 124                 time_result = (int)(time_result*100);
 125                 time_result /= 100;
 126
 127                 mflops_result = ops/(1.0e6*total);
 128                 mflops_result += 0.0005; // for rounding
 129                 mflops_result = (int)(mflops_result*1000);
 130                 mflops_result /= 1000;
 131
 132                 return mflops_result ;
 133         }
 134
 135
 136
 137         final double matgen (double a[][], int lda, int n, double b[])
 138         {
 139                 double norma;
 140                 int init, i, j;
 141
 142                 init = 1325;
 143                 norma = 0.0;
 144                 /*  Next two for() statements switched.  Solver wants
 145                 matrix in column order. --dmd 3/3/97
 146                  */
 147                 for (i = 0; i < n; i++) {
 148                         for (j = 0; j < n; j++) {
 149                                 init = 3125*init % 65536;
 150                                 a[j][i] = (init - 32768.0)/16384.0;
 151                                 norma = (a[j][i] > norma) ? a[j][i] : norma;
 152                         }
 153                 }
 154                 for (i = 0; i < n; i++) {
 155                         b[i] = 0.0;
 156                 }
 157                 for (j = 0; j < n; j++) {
 158                         for (i = 0; i < n; i++) {
 159                                 b[i] += a[j][i];
 160                         }
 161                 }
 162
 163                 return norma;
 164         }
 165
 166
 167
 168         /*
 169     dgefa factors a double precision matrix by gaussian elimination.
 170
 171     dgefa is usually called by dgeco, but it can be called
 172     directly with a saving in time if  rcond  is not needed.
 173     (time for dgeco) = (1 + 9/n)*(time for dgefa) .
 174
 175     on entry
 176
 177     a       double precision[n][lda]
 178     the matrix to be factored.
 179
 180     lda     integer
 181     the leading dimension of the array  a .
 182
 183     n       integer
 184     the order of the matrix  a .
 185
 186     on return
 187
 188     a       an upper triangular matrix and the multipliers
 189     which were used to obtain it.
 190     the factorization can be written  a = l*u  where
 191     l  is a product of permutation and unit lower
 192     triangular matrices and  u  is upper triangular.
 193
 194     ipvt    integer[n]
 195     an integer vector of pivot indices.
 196
 197     info    integer
 198     = 0  normal value.
 199     = k  if  u[k][k] .eq. 0.0 .  this is not an error
 200     condition for this subroutine, but it does
 201     indicate that dgesl or dgedi will divide by zero
 202     if called.  use  rcond  in dgeco for a reliable
 203     indication of singularity.
 204
 205     linpack. this version dated 08/14/78.
 206     cleve moler, university of new mexico, argonne national lab.
 207
 208     functions
 209
 210     blas daxpy,dscal,idamax
 211          */
 212         final int dgefa( double a[][], int lda, int n, int ipvt[])
 213         {
 214                 double[] col_k, col_j;
 215                 double t;
 216                 int j,k,kp1,l,nm1;
 217                 int info;
 218
 219                 // gaussian elimination with partial pivoting
 220
 221                 info = 0;
 222                 nm1 = n - 1;
 223                 if (nm1 >=  0) {
 224                         for (k = 0; k < nm1; k++) {
 225                                 col_k = a[k];
 226                                 kp1 = k + 1;
 227
 228                                 // find l = pivot index
 229
 230                                 l = idamax(n-k,col_k,k,1) + k;
 231                                 ipvt[k] = l;
 232
 233                                 // zero pivot implies this column already triangularized
 234
 235                                 if (col_k[l] != 0) {
 236
 237                                         // interchange if necessary
 238
 239                                         if (l != k) {
 240                                                 t = col_k[l];
 241                                                 col_k[l] = col_k[k];
 242                                                 col_k[k] = t;
 243                                         }
 244
 245                                         // compute multipliers
 246
 247                                         t = -1.0/col_k[k];
 248                                         dscal(n-(kp1),t,col_k,kp1,1);
 249
 250                                         // row elimination with column indexing
 251
 252                                         for (j = kp1; j < n; j++) {
 253                                                 col_j = a[j];
 254                                                 t = col_j[l];
 255                                                 if (l != k) {
 256                                                         col_j[l] = col_j[k];
 257                                                         col_j[k] = t;
 258                                                 }
 259                                                 daxpy(n-(kp1),t,col_k,kp1,1,
 260                                                                 col_j,kp1,1);
 261                                         }
 262                                 }
 263                                 else {
 264                                         info = k;
 265                                 }
 266                         }
 267                 }
 268                 ipvt[n-1] = n-1;
 269                 if (a[(n-1)][(n-1)] == 0) info = n-1;
 270
 271                 return info;
 272         }
 273
 274
 275
 276         /*
 277     dgesl solves the double precision system
 278     a * x = b  or  trans(a) * x = b
 279     using the factors computed by dgeco or dgefa.
 280
 281     on entry
 282
 283     a       double precision[n][lda]
 284     the output from dgeco or dgefa.
 285
 286     lda     integer
 287     the leading dimension of the array  a .
 288
 289     n       integer
 290     the order of the matrix  a .
 291
 292     ipvt    integer[n]
 293     the pivot vector from dgeco or dgefa.
 294
 295     b       double precision[n]
 296     the right hand side vector.
 297
 298     job     integer
 299     = 0         to solve  a*x = b ,
 300     = nonzero   to solve  trans(a)*x = b  where
 301     trans(a)  is the transpose.
 302
 303     on return
 304
 305     b       the solution vector  x .
 306
 307     error condition
 308
 309     a division by zero will occur if the input factor contains a
 310     zero on the diagonal.  technically this indicates singularity
 311     but it is often caused by improper arguments or improper
 312     setting of lda .  it will not occur if the subroutines are
 313     called correctly and if dgeco has set rcond .gt. 0.0
 314     or dgefa has set info .eq. 0 .
 315
 316     to compute  inverse(a) * c  where  c  is a matrix
 317     with  p  columns
 318     dgeco(a,lda,n,ipvt,rcond,z)
 319     if (!rcond is too small){
 320     for (j=0,j<p,j++)
 321     dgesl(a,lda,n,ipvt,c[j][0],0);
 322     }
 323
 324     linpack. this version dated 08/14/78 .
 325     cleve moler, university of new mexico, argonne national lab.
 326
 327     functions
 328
 329     blas daxpy,ddot
 330          */
 331         final void dgesl( double a[][], int lda, int n, int ipvt[], double b[], int job)
 332         {
 333                 double t;
 334                 int k,kb,l,nm1,kp1;
 335
 336                 nm1 = n - 1;
 337                 if (job == 0) {
 338
 339                         // job = 0 , solve  a * x = b.  first solve  l*y = b
 340
 341                         if (nm1 >= 1) {
 342                                 for (k = 0; k < nm1; k++) {
 343                                         l = ipvt[k];
 344                                         t = b[l];
 345                                         if (l != k){
 346                                                 b[l] = b[k];
 347                                                 b[k] = t;
 348                                         }
 349                                         kp1 = k + 1;
 350                                         daxpy(n-(kp1),t,a[k],kp1,1,b,kp1,1);
 351                                 }
 352                         }
 353
 354                         // now solve  u*x = y
 355
 356                         for (kb = 0; kb < n; kb++) {
 357                                 k = n - (kb + 1);
 358                                 b[k] /= a[k][k];
 359                                 t = -b[k];
 360                                 daxpy(k,t,a[k],0,1,b,0,1);
 361                         }
 362                 }
 363                 else {
 364
 365                         // job = nonzero, solve  trans(a) * x = b.  first solve  trans(u)*y = b
 366
 367                         for (k = 0; k < n; k++) {
 368                                 t = ddot(k,a[k],0,1,b,0,1);
 369                                 b[k] = (b[k] - t)/a[k][k];
 370                         }
 371
 372                         // now solve trans(l)*x = y
 373
 374                         if (nm1 >= 1) {
 375                                 for (kb = 1; kb < nm1; kb++) {
 376                                         k = n - (kb+1);
 377                                         kp1 = k + 1;
 378                                         b[k] += ddot(n-(kp1),a[k],kp1,1,b,kp1,1);
 379                                         l = ipvt[k];
 380                                         if (l != k) {
 381                                                 t = b[l];
 382                                                 b[l] = b[k];
 383                                                 b[k] = t;
 384                                         }
 385                                 }
 386                         }
 387                 }
 388         }
 389
 390
 391
 392         /*
 393     constant times a vector plus a vector.
 394     jack dongarra, linpack, 3/11/78.
 395          */
 396         final void daxpy( int n, double da, double dx[], int dx_off, int incx,
 397                         double dy[], int dy_off, int incy)
 398         {
 399                 int i,ix,iy;
 400
 401                 if ((n > 0) && (da != 0)) {
 402                         if (incx != 1 || incy != 1) {
 403
 404                                 // code for unequal increments or equal increments not equal to 1
 405
 406                                 ix = 0;
 407                                 iy = 0;
 408                                 if (incx < 0) ix = (-n+1)*incx;
 409                                 if (incy < 0) iy = (-n+1)*incy;
 410                                 for (i = 0;i < n; i++) {
 411                                         dy[iy +dy_off] += da*dx[ix +dx_off];
 412                                         ix += incx;
 413                                         iy += incy;
 414                                 }
 415                                 return;
 416                         } else {
 417
 418                                 // code for both increments equal to 1
 419
 420                                 for (i=0; i < n; i++)
 421                                         dy[i +dy_off] += da*dx[i +dx_off];
 422                         }
 423                 }
 424         }
 425
 426
 427
 428         /*
 429     forms the dot product of two vectors.
 430     jack dongarra, linpack, 3/11/78.
 431          */
 432         final double ddot( int n, double dx[], int dx_off, int incx, double dy[],
 433                         int dy_off, int incy)
 434         {
 435                 double dtemp;
 436                 int i,ix,iy;
 437
 438                 dtemp = 0;
 439
 440                 if (n > 0) {
 441
 442                         if (incx != 1 || incy != 1) {
 443
 444                                 // code for unequal increments or equal increments not equal to 1
 445
 446                                 ix = 0;
 447                                 iy = 0;
 448                                 if (incx < 0) ix = (-n+1)*incx;
 449                                 if (incy < 0) iy = (-n+1)*incy;
 450                                 for (i = 0;i < n; i++) {
 451                                         dtemp += dx[ix +dx_off]*dy[iy +dy_off];
 452                                         ix += incx;
 453                                         iy += incy;
 454                                 }
 455                         } else {
 456
 457                                 // code for both increments equal to 1
 458
 459                                 for (i=0;i < n; i++)
 460                                         dtemp += dx[i +dx_off]*dy[i +dy_off];
 461                         }
 462                 }
 463                 return(dtemp);
 464         }
 465
 466
 467
 468         /*
 469     scales a vector by a constant.
 470     jack dongarra, linpack, 3/11/78.
 471          */
 472         final void dscal( int n, double da, double dx[], int dx_off, int incx)
 473         {
 474                 int i,nincx;
 475
 476                 if (n > 0) {
 477                         if (incx != 1) {
 478
 479                                 // code for increment not equal to 1
 480
 481                                 nincx = n*incx;
 482                                 for (i = 0; i < nincx; i += incx)
 483                                         dx[i +dx_off] *= da;
 484                         } else {
 485
 486                                 // code for increment equal to 1
 487
 488                                 for (i = 0; i < n; i++)
 489                                         dx[i +dx_off] *= da;
 490                         }
 491                 }
 492         }
 493
 494
 495
 496         /*
 497     finds the index of element having max. absolute value.
 498     jack dongarra, linpack, 3/11/78.
 499          */
 500         final int idamax( int n, double dx[], int dx_off, int incx)
 501         {
 502                 double dmax, dtemp;
 503                 int i, ix, itemp=0;
 504
 505                 if (n < 1) {
 506                         itemp = -1;
 507                 } else if (n ==1) {
 508                         itemp = 0;
 509                 } else if (incx != 1) {
 510
 511                         // code for increment not equal to 1
 512
 513                         dmax = abs(dx[0 +dx_off]);
 514                         ix = 1 + incx;
 515                         for (i = 1; i < n; i++) {
 516                                 dtemp = abs(dx[ix + dx_off]);
 517                                 if (dtemp > dmax)  {
 518                                         itemp = i;
 519                                         dmax = dtemp;
 520                                 }
 521                                 ix += incx;
 522                         }
 523                 } else {
 524
 525                         // code for increment equal to 1
 526
 527                         itemp = 0;
 528                         dmax = abs(dx[0 +dx_off]);
 529                         for (i = 1; i < n; i++) {
 530                                 dtemp = abs(dx[i + dx_off]);
 531                                 if (dtemp > dmax) {
 532                                         itemp = i;
 533                                         dmax = dtemp;
 534                                 }
 535                         }
 536                 }
 537                 return (itemp);
 538         }
 539
 540
 541
 542         /*
 543     estimate unit roundoff in quantities of size x.
 544
 545     this program should function properly on all systems
 546     satisfying the following two assumptions,
 547     1.  the base used in representing dfloating point
 548     numbers is not a power of three.
 549     2.  the quantity  a  in statement 10 is represented to
 550     the accuracy used in dfloating point variables
 551     that are stored in memory.
 552     the statement number 10 and the go to 10 are intended to
 553     force optimizing compilers to generate code satisfying
 554     assumption 2.
 555     under these assumptions, it should be true that,
 556     a  is not exactly equal to four-thirds,
 557     b  has a zero for its last bit or digit,
 558     c  is not exactly equal to one,
 559     eps  measures the separation of 1.0 from
 560     the next larger dfloating point number.
 561     the developers of eispack would appreciate being informed
 562     about any systems where these assumptions do not hold.
 563
 564          *****************************************************************
 565     this routine is one of the auxiliary routines used by eispack iii
 566     to avoid machine dependencies.
 567          *****************************************************************
 568
 569     this version dated 4/6/83.
 570          */
 571         final double epslon (double x)
 572         {
 573                 double a,b,c,eps;
 574
 575                 a = 4.0e0/3.0e0;
 576                 eps = 0;
 577                 while (eps == 0) {
 578                         b = a - 1.0;
 579                         c = b + b + b;
 580                         eps = abs(c-1.0);
 581                 }
 582                 return(eps*abs(x));
 583         }
 584
 585
 586
 587         /*
 588     purpose:
 589     multiply matrix m times vector x and add the result to vector y.
 590
 591     parameters:
 592
 593     n1 integer, number of elements in vector y, and number of rows in
 594     matrix m
 595
 596     y double [n1], vector of length n1 to which is added
 597     the product m*x
 598
 599     n2 integer, number of elements in vector x, and number of columns
 600     in matrix m
 601
 602     ldm integer, leading dimension of array m
 603
 604     x double [n2], vector of length n2
 605
 606     m double [ldm][n2], matrix of n1 rows and n2 columns
 607          */
 608         final void dmxpy ( int n1, double y[], int n2, int ldm, double x[], double m[][])
 609         {
 610                 int j,i;
 611
 612                 // cleanup odd vector
 613                 for (j = 0; j < n2; j++) {
 614                         for (i = 0; i < n1; i++) {
 615                                 y[i] += x[j]*m[j][i];
 616                         }
 617                 }
 618         }
 619
 620 }