src/nws_portability/Forecast/predictor.c

   1 /* $Id$ */
   2
   3 #include "config_portability.h"
   4 #include <stdio.h>      /* fprintf() fflush() */
   5 #include "strutil.h"    /* SAFESTRCPY() */
   6 #include "predictor.h"
   7
   8 /* These should be in a header file, median.h */
   9 extern double Median();
  10 extern double TrimmedMedian();
  11 extern void AdjustedMean();
  12 extern void AdjustedMedian();
  13 extern char *DefineHistogram();
  14 extern char *DefineHistogramPred();
  15 extern void PrintHistogram();
  16 extern void ModalDev();
  17
  18 /* not yet
  19 char *Histogram;
  20 char *MeanPred;
  21 char *MedianPred;
  22 char *MiddlePred;
  23 int Histodim;
  24 int Histostates;
  25 double Histomax;
  26 double Histomin;
  27 */
  28
  29 double Onedev_mse;
  30 double Onedev_mpe;
  31 double Onedev_modal;
  32 double Twodev_mse;
  33 double Twodev_mpe;
  34 double Twodev_modal;
  35 double Devcounts;
  36
  37 double Lifetime_mse[LIFETIMES];
  38 double Lifetime_mpe[LIFETIMES];
  39 int Lifetime_count = 0;
  40 extern int Median_sorted;
  41
  42 const char *Names[PREDS];
  43 #define NAME(i) (Names[i])
  44
  45 void
  46 PrintPredData(tp,where)
  47 struct thruput_pred *tp;
  48 int where;
  49 {
  50         struct value_el *data = tp->data;
  51         int prev = MODMINUS(where,1,HISTORYSIZE);
  52         int i;
  53
  54         fprintf(stdout,"%d %3.4f ",(int)data[where].time, data[where].value);
  55
  56         for(i=0; i < PREDS; i++)
  57         {
  58                 fprintf(stdout,"| %3.4f %3.4f %3.4f ",PRED(i,data[prev]),
  59                         TPMSE(tp,i)/TPCOUNT(tp,i),
  60                         TPMAE(tp,i)/TPCOUNT(tp,i));
  61         }
  62         fprintf(stdout,"| %d %d | ",MedWIN(6,data[prev]),MeanWIN(7,data[prev]));
  63         for(i=STATIC_PREDS; i < SECOND_ORDER_PREDS; i++)
  64         {
  65                 fprintf(stdout,"%d ",TPPREDNUM(tp,i));
  66         }
  67         fprintf(stdout,"| ");
  68         for(i=SECOND_ORDER_PREDS; i < PREDS; i++)
  69         {
  70                 fprintf(stdout,"%d ",TPPREDNUM(tp,i));
  71         }
  72         fprintf(stdout,"\n");
  73         fflush(stdout);
  74
  75         return;
  76 }
  77
  78
  79 void
  80 ThruputPrediction(tp)
  81 struct thruput_pred *tp;
  82 {
  83          int tail = tp->tail;
  84          int head = tp->head;
  85          int i;
  86          int prev;
  87          double error;
  88          double sq_error;
  89          struct value_el *data = tp->data;
  90          double value;
  91          double run_mean = tp->run_mean;
  92          double total = tp->total;
  93          double tot_sum;
  94          double min_run_p_error = MAXDOUBLE;
  95          int min_run_p_i = 0;
  96          double min_run_sq_error = MAXDOUBLE;
  97          int min_run_sq_i = 0;
  98          double prev_pred;
  99          double absolute_error;
 100          int last_error;
 101          double min_win_sq_error = MAXDOUBLE;
 102          double min_win_p_error = MAXDOUBLE;
 103          int min_win_sq_i = 0;
 104          int min_win_p_i = 0;
 105          double min_tot_sq_error = MAXDOUBLE;
 106          double min_tot_p_error = MAXDOUBLE;
 107          int min_tot_sq_i = 0;
 108          int min_tot_p_i = 0;
 109          double win_error;
 110          int error_window;
 111          int datasize;
 112
 113          datasize = MODMINUS(head,tail,HISTORYSIZE) - 1;
 114          if(ERR_WIN > datasize)
 115                 error_window = datasize;
 116          else
 117                 error_window = ERR_WIN;
 118
 119          last_error = MODMINUS(head,error_window,HISTORYSIZE);
 120
 121
 122
 123         value = data[head].value;
 124         prev = MODMINUS(head,1,HISTORYSIZE);
 125 #ifdef GROT
 126         /*
 127          * was the value within one deviation of the total prediction?
 128          */
 129         avg_err = TPMAE(tp,TOT_MSE_PRED)/TPCOUNT(tp,TOT_MSE_PRED);
 130         if((value > (PRED(TOT_MSE_PRED,data[prev])-avg_err)) &&
 131            (value < (PRED(TOT_MSE_PRED,data[prev])+avg_err)))
 132         {
 133                 Onedev_mse++;
 134         }
 135         if((value > (PRED(TOT_MSE_PRED,data[prev])-2*avg_err)) &&
 136            (value < (PRED(TOT_MSE_PRED,data[prev])+2*avg_err)))
 137         {
 138                 Twodev_mse++;
 139         }
 140
 141         avg_err = TPMAE(tp,TOT_MAE_PRED)/TPCOUNT(tp,TOT_MAE_PRED);
 142         if((value > (PRED(TOT_MAE_PRED,data[prev])-avg_err)) &&
 143            (value < (PRED(TOT_MAE_PRED,data[prev])+avg_err)))
 144         {
 145                 Onedev_mpe++;
 146         }
 147         if((value > (PRED(TOT_MAE_PRED,data[prev])-2*avg_err)) &&
 148            (value < (PRED(TOT_MAE_PRED,data[prev])+2*avg_err)))
 149         {
 150                 Twodev_mpe++;
 151         }
 152
 153         ModalDev(Histogram,MedianPred,1.0,&hi,&lo);
 154         if((value > lo) && (value < hi))
 155                 Onedev_modal++;
 156         ModalDev(Histogram,MedianPred,2.0,&hi,&lo);
 157         if((value > lo) && (value < hi))
 158                 Twodev_modal++;
 159
 160         Devcounts++;
 161 #endif
 162
 163         /*
 164          * calculate the error our last prediction made
 165          * now that we have the actual value
 166          */
 167         for(i=0; i < PREDS; i++)
 168         {
 169
 170                 error = data[head].value - PRED(i,data[prev]);
 171                 if(TPCOUNT(tp,i) < MIN_DATA_HISTORY)
 172                         error = 0.0;
 173                 sq_error = error * error;
 174
 175                 SQERR(i,data[head]) = SQERR(i,data[prev])+sq_error;
 176                 TPMSE(tp,i) += sq_error;
 177                 if(error < 0.0)
 178                         error = -1.0 * error;
 179                 absolute_error = error;
 180                 ERR(i,data[head]) = ERR(i,data[prev])+absolute_error;
 181                 TPMAE(tp,i) += absolute_error;
 182                 TPCOUNT(tp,i) += 1.0;
 183
 184         }
 185
 186 #ifdef GROT
 187         /*
 188          * now calculate MSE values over different lifetimes
 189          */
 190         if(datasize > LIFETIMES)
 191         {
 192                 curr = prev;
 193                 for(i=0; i < LIFETIMES; i++)
 194                 {
 195                         error = data[head].value -
 196                                 PRED(TOT_MSE_PRED,data[curr]);
 197                         Lifetime_mse[i] += (error * error);
 198                         error = data[head].value -
 199                                 PRED(TOT_MAE_PRED,data[curr]);
 200                         if(error < 0.0)
 201                                 error = -1.0 * error;
 202                         Lifetime_mpe[i] += (error / data[head].value);
 203                         curr = MODMINUS(curr,1,HISTORYSIZE);
 204                 }
 205                 Lifetime_count++;
 206         }
 207 #endif
 208
 209
 210
 211
 212
 213         /*
 214          * use the current running mean
 215          */
 216         NAME(0) = "Last value";
 217         PRED(0,data[head]) = value;
 218
 219
 220         /*
 221          * use the current value as a prediction of the next value
 222          */
 223         tot_sum = run_mean * total;
 224         tot_sum += data[head].value;
 225         total++;
 226         run_mean = tot_sum/total;
 227         tp->run_mean = run_mean;
 228         tp->total = total;
 229         NAME(1) = "Running Mean";
 230         PRED(1,data[head]) = run_mean;
 231
 232         /*
 233          * use the Van Jacobson method
 234          */
 235         prev_pred = PRED(2,data[prev]);
 236         NAME(2) = "Van Jacobson";
 237         PRED(2,data[head]) = prev_pred + GAIN*(value - prev_pred);
 238
 239         /*
 240          * tell the median routines that the array is no longer valid
 241          */
 242         Median_sorted = 0;
 243         NAME(3) = "Median";
 244         PRED(3,data[head]) = Median(tp,MED_N);
 245         NAME(4) = "Trimmed Median";
 246         PRED(4,data[head]) = TrimmedMedian(tp,MED_N,MED_ALPHA);
 247
 248         /*
 249          * Now use TrimmedMean to give a sliding window mean
 250          */
 251         NAME(5) = "Sliding Window Mean";
 252         PRED(5,data[head]) = TrimmedMedian(tp,MED_N,0.0);
 253         NAME(6) = "Adjusted Median";
 254         AdjustedMedian(tp,6,WIN_MIN,WIN_MAX);
 255         NAME(7) = "Adjusted Mean";
 256         AdjustedMean(tp,7,WIN_MIN,WIN_MAX);
 257
 258         for(i=0; i < STATIC_PREDS; i++)
 259         {
 260                 if((TPMSE(tp,i)/TPCOUNT(tp,i)) < min_run_sq_error)
 261                 {
 262                         min_run_sq_error = TPMSE(tp,i)/TPCOUNT(tp,i);
 263                         min_run_sq_i = i;
 264                 }
 265
 266                 if((TPMAE(tp,i)/TPCOUNT(tp,i)) < min_run_p_error)
 267                 {
 268                         min_run_p_error = TPMAE(tp,i)/TPCOUNT(tp,i);
 269                         min_run_p_i = i;
 270                 }
 271
 272                 win_error =
 273                         (SQERR(i,data[head]) - SQERR(i,data[last_error])) /
 274                                 (double)error_window;
 275                 if(win_error < min_win_sq_error)
 276                 {
 277                         min_win_sq_error = win_error;
 278                         min_win_sq_i = i;
 279                 }
 280
 281                 win_error =
 282                         (ERR(i,data[head]) - ERR(i,data[last_error])) /
 283                                 (double)error_window;
 284                 if(win_error < min_win_p_error)
 285                 {
 286                         min_win_p_error = win_error;
 287                         min_win_p_i = i;
 288                 }
 289
 290         }
 291
 292
 293         NAME(MSE_PRED) = "Min MSE Pred";
 294         PRED(MSE_PRED,data[head]) = PRED(min_run_sq_i,data[head]);
 295         TPPREDNUM(tp,MSE_PRED) = min_run_sq_i;
 296         NAME(MAE_PRED) = "Min MAE Pred";
 297         PRED(MAE_PRED,data[head]) = PRED(min_run_p_i,data[head]);
 298         TPPREDNUM(tp,MAE_PRED) = min_run_p_i;
 299         NAME(WIN_MSE_PRED) = "Win MSE Pred";
 300         PRED(WIN_MSE_PRED,data[head]) = PRED(min_win_sq_i,data[head]);
 301         TPPREDNUM(tp,WIN_MSE_PRED) = min_win_sq_i;
 302         NAME(WIN_MAE_PRED) = "Win MAE Pred";
 303         PRED(WIN_MAE_PRED,data[head]) = PRED(min_win_p_i,data[head]);
 304         TPPREDNUM(tp,WIN_MAE_PRED) = min_win_p_i;
 305
 306         for(i=STATIC_PREDS; i < SECOND_ORDER_PREDS; i++)
 307         {
 308                 if((TPMSE(tp,i)/TPCOUNT(tp,i)) < min_tot_sq_error)
 309                 {
 310                         min_tot_sq_error = TPMSE(tp,i)/TPCOUNT(tp,i);
 311                         min_tot_sq_i = TPPREDNUM(tp,i);
 312                 }
 313                 if((TPMAE(tp,i)/TPCOUNT(tp,i)) < min_tot_p_error)
 314                 {
 315                         min_tot_p_error = TPMAE(tp,i)/TPCOUNT(tp,i);
 316                         min_tot_p_i = TPPREDNUM(tp,i);
 317                 }
 318         }
 319         NAME(TOT_MSE_PRED) = "Total MSE Pred";
 320         PRED(TOT_MSE_PRED,data[head]) = PRED(min_tot_sq_i,data[head]);
 321         TPPREDNUM(tp,TOT_MSE_PRED) = min_tot_sq_i;
 322         NAME(TOT_MAE_PRED) = "Total MAE Pred";
 323         PRED(TOT_MAE_PRED,data[head]) = PRED(min_tot_p_i,data[head]);
 324         TPPREDNUM(tp,TOT_MAE_PRED) = min_tot_p_i;
 325
 326 #ifdef PRINTPRED
 327         ModalDev(Histogram,MedianPred,2.0,&hi,&lo);
 328         fprintf(stdout,"%d %10.5f\n",data[head].time,
 329                 PRED(TOT_MAE_PRED,data[prev]));
 330
 331         fflush(stdout);
 332 #endif
 333
 334         return;
 335
 336 }
 337
 338 void
 339 InitThruputPrediction(struct thruput_pred *tp)
 340 {
 341         int i;
 342
 343         tp->head = 0;
 344         tp->tail = 0;
 345
 346 /* ** NOT yet as we don't quite know what min and max values to use
 347
 348         Histogram = DefineHistogram(MODES,
 349                         DIM,
 350                         Histomin,
 351                         Histomax);
 352         MeanPred = DefineHistogramPred(Histogram);
 353         MedianPred = DefineHistogramPred(Histogram);
 354         MiddlePred = DefineHistogramPred(Histogram);
 355 */
 356
 357
 358         tp->mean = 0.0;
 359         tp->run_mean = 0.0;
 360         tp->total = 0.0;
 361         memset(tp->data,0,sizeof(tp->data));
 362
 363         for(i=0; i < PREDS; i++)
 364         {
 365                 TPMSE(tp,i) = 0.0;
 366                 TPCOUNT(tp,i) = 0.0;
 367         }
 368
 369         return;
 370 }
 371
 372 /* not sure if exptime is right, I just wanted to avoid namespace collisions */
 373 void
 374 UpdateThruputPrediction(struct thruput_pred *tp,float value, time_t exptime)
 375 {
 376
 377         int head = tp->head;
 378         int tail = tp->tail;
 379         struct value_el *data = tp->data;
 380         int i;
 381         int data_size = MODMINUS(head,tail,HISTORYSIZE);
 382
 383         tp->data[tp->head].time = exptime;
 384         tp->data[tp->head].value = value;
 385
 386         /*
 387          * a negative tail value says that we haven't filled the
 388          * pipe yet.  See if this does it.
 389          */
 390         if(data_size > MIN_DATA_HISTORY)
 391         {
 392                 ThruputPrediction(tp);
 393
 394         }
 395         else
 396         {
 397                 for(i=0; i < PREDS; i++)
 398                 {
 399                         PRED(i,data[head]) = tp->data[head].value;
 400                         TPMSE(tp,i) = 0.0;
 401                         TPMAE(tp,i) = 0.0;
 402                         ERR(i,data[head]) = 0.0;
 403                         SQERR(i,data[head]) = 0.0;
 404                         MedWIN(i,data[head]) = WIN_INIT;
 405                         MeanWIN(i,data[head]) = WIN_INIT;
 406                 }
 407                 TPPREDNUM(tp,TOT_MSE_PRED) = 0;
 408                 TPPREDNUM(tp,TOT_MAE_PRED) = 0;
 409         }
 410
 411
 412
 413         tp->head = MODPLUS(head,1,HISTORYSIZE);
 414
 415         /*
 416          * if we have incremented head forward to that it is sitting on
 417          * top of tail, then we need to bump tail.  The entire buffer is
 418          * being used to fill the window.
 419          */
 420         if(tp->head == tp->tail)
 421                 tp->tail = MODPLUS(tp->tail,1,HISTORYSIZE);
 422
 423         return;
 424 }
 425
 426 #ifdef STANDALONE
 427
 428 #define PRED_ARGS "f:d:s:l:h:"
 429
 430 #include "format.h"
 431
 432 CalculateThruputPredictors(fname,tp)
 433 char *fname;
 434 struct thruput_pred *tp;
 435 {
 436         FILE *fd;
 437         char line_buf[255];
 438         char *cp;
 439         time_t ltime;
 440         float value;
 441         int i;
 442
 443
 444         fd = fopen(fname,"r");
 445
 446         if(fd == NULL)
 447         {
 448                 fprintf(stderr,
 449                         "CalculateThruputPredictors: unable to open file %s\n",
 450                                 fname);
 451                 fflush(stderr);
 452                 exit(1);
 453         }
 454
 455         while(fgets(line_buf,255,fd))
 456         {
 457                 cp = line_buf;
 458
 459                 while(isspace(*cp))
 460                         cp++;
 461
 462                 for(i=1; i < TSFIELD; i++)
 463                 {
 464                         while(!isspace(*cp))
 465                                 cp++;
 466
 467                         while(isspace(*cp))
 468                                 cp++;
 469                 }
 470
 471                 ltime = atoi(cp);
 472
 473                 cp = line_buf;
 474
 475                 for(i=1; i < VALFIELD; i++)
 476                 {
 477                         while(!isspace(*cp))
 478                                 cp++;
 479
 480                         while(isspace(*cp))
 481                                 cp++;
 482                 }
 483
 484                 value = atof(cp);
 485
 486                 if(value != 0.0)
 487                         UpdateThruputPrediction(tp,value,ltime);
 488
 489         }
 490
 491         fclose(fd);
 492 }
 493
 494
 495 struct thruput_pred Tp;
 496
 497 main(argc,argv)
 498 int argc;
 499 char *argv[];
 500 {
 501
 502         FILE *fd;
 503         int i;
 504         int j;
 505         double temp_mse;
 506         double temp_mpe;
 507         char fname[255];
 508         char c;
 509
 510
 511         if(argc < 2)
 512         {
 513                 fprintf(stderr,"usage: testpred filename\n");
 514                 fflush(stderr);
 515                 exit(1);
 516         }
 517
 518         fname[0] = 0;
 519         Histodim = DIM;
 520         Histostates = STATES;
 521         Histomax = HMAX;
 522         Histomin = HMIN;
 523
 524         while((c = getopt(argc,argv,PRED_ARGS)) != EOF)
 525         {
 526                 switch(c)
 527                 {
 528                         case 'f':
 529                                 SAFESTRCPY(fname,optarg);
 530                                 break;
 531                         case 'd':
 532                                 Histodim = atoi(optarg);
 533                                 break;
 534                         case 's':
 535                                 Histostates = atoi(optarg);
 536                                 break;
 537                         case 'l':
 538                                 Histomin = atof(optarg);
 539                                 break;
 540                         case 'h':
 541                                 Histomax = atof(optarg);
 542                                 break;
 543                 }
 544         }
 545
 546         if(fname[0] == 0)
 547         {
 548                 fprintf(stderr,"usage: predictor -f fname [-d,s,l,h]\n");
 549                 fflush(stderr);
 550                 exit(1);
 551         }
 552
 553         InitThruputPrediction(&Tp);
 554
 555 #if !defined(PRINTPRED)
 556         fprintf(stdout,"Calculating throughput predictors...");
 557         fflush(stdout);
 558 #endif
 559
 560         CalculateThruputPredictors(fname,&Tp);
 561
 562         fprintf(stdout,"done.\n");
 563         fflush(stdout);
 564
 565         for(i=0; i < PREDS; i++)
 566         {
 567
 568                 fprintf(stdout,"Predictor %d, MSE: %3.4f MAE: %3.4f, %s\n",
 569                         i,TPMSE(&Tp,i)/TPCOUNT(&Tp,i),
 570                                 TPMAE(&Tp,i)/TPCOUNT(&Tp,i),
 571                                 NAME(i));
 572         }
 573
 574         fprintf(stdout,"MSE within 1 dev: %3.2f, 2 dev: %3.2f\n",
 575                         Onedev_mse/Devcounts,Twodev_mse/Devcounts);
 576         fprintf(stdout,"MAE within 1 dev: %3.2f, 2 dev: %3.2f\n",
 577                         Onedev_mpe/Devcounts,Twodev_mpe/Devcounts);
 578         fprintf(stdout,"Modal within 1 dev: %3.2f, 2 dev: %3.2f\n",
 579                         Onedev_modal/Devcounts,Twodev_modal/Devcounts);
 580
 581
 582 #ifdef GROT
 583         fprintf(stdout,"\nlook ahead  MSE  MAE\n");
 584         for(i=0; i < LIFETIMES; i++)
 585         {
 586                 temp_mse = 0.0;
 587                 temp_mpe = 0.0;
 588                 for(j=0; j <= i; j++)
 589                 {
 590                         temp_mse += (Lifetime_mse[j]/(double)Lifetime_count);
 591                         temp_mpe += (Lifetime_mpe[j]/(double)Lifetime_count);
 592                 }
 593                 fprintf(stdout,"%d %f %f\n",i,
 594                                 temp_mse/(double)j,
 595                                 temp_mpe/(double)j);
 596         }
 597 #endif
 598
 599         fprintf(stdout,"Histogram\n");
 600         PrintHistogram(Histogram);
 601         fprintf(stdout,"\n");
 602
 603         exit(0);
 604
 605
 606 }
 607 #endif