Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
1299c08ed8dd7269a063a9a036357c67c6e1d2db
[simgrid.git] / src / nws_portability / Forecast / predictor.c
1 /* $Id$ */
2
3 #include "config_portability.h"
4 #include <stdio.h>      /* fprintf() fflush() */
5 #include "strutil.h"    /* SAFESTRCPY() */
6 #include "predictor.h"
7
8 /* These should be in a header file, median.h */
9 extern double Median();
10 extern double TrimmedMedian();
11 extern void AdjustedMean();
12 extern void AdjustedMedian();
13 extern char *DefineHistogram();
14 extern char *DefineHistogramPred();
15 extern void PrintHistogram();
16 extern void ModalDev();
17
18 /* not yet
19 char *Histogram;
20 char *MeanPred;
21 char *MedianPred;
22 char *MiddlePred;
23 int Histodim;
24 int Histostates;
25 double Histomax;
26 double Histomin;
27 */
28
29 double Onedev_mse;
30 double Onedev_mpe;
31 double Onedev_modal;
32 double Twodev_mse;
33 double Twodev_mpe;
34 double Twodev_modal;
35 double Devcounts;
36
37 double Lifetime_mse[LIFETIMES];
38 double Lifetime_mpe[LIFETIMES];
39 int Lifetime_count = 0;
40 extern int Median_sorted;
41
42 const char *Names[PREDS];
43 #define NAME(i) (Names[i])
44
45 void
46 PrintPredData(tp,where)
47 struct thruput_pred *tp;
48 int where;
49 {
50         struct value_el *data = tp->data;
51         int prev = MODMINUS(where,1,HISTORYSIZE);
52         int i;
53
54         fprintf(stdout,"%d %3.4f ",(int)data[where].time, data[where].value);
55
56         for(i=0; i < PREDS; i++)
57         {
58                 fprintf(stdout,"| %3.4f %3.4f %3.4f ",PRED(i,data[prev]),
59                         TPMSE(tp,i)/TPCOUNT(tp,i),
60                         TPMAE(tp,i)/TPCOUNT(tp,i));
61         }
62         fprintf(stdout,"| %d %d | ",MedWIN(6,data[prev]),MeanWIN(7,data[prev]));
63         for(i=STATIC_PREDS; i < SECOND_ORDER_PREDS; i++)
64         {
65                 fprintf(stdout,"%d ",TPPREDNUM(tp,i));
66         }
67         fprintf(stdout,"| ");
68         for(i=SECOND_ORDER_PREDS; i < PREDS; i++)
69         {
70                 fprintf(stdout,"%d ",TPPREDNUM(tp,i));
71         }
72         fprintf(stdout,"\n");
73         fflush(stdout);
74
75         return;
76 }
77
78
79 void
80 ThruputPrediction(tp)
81 struct thruput_pred *tp;
82 {
83          int tail = tp->tail;
84          int head = tp->head;
85          int i;
86          int prev;
87          double error;
88          double sq_error;
89          struct value_el *data = tp->data;
90          double value;
91          double run_mean = tp->run_mean;
92          double total = tp->total;
93          double tot_sum;
94          double min_run_p_error = MAXDOUBLE;
95          int min_run_p_i = 0;
96          double min_run_sq_error = MAXDOUBLE;
97          int min_run_sq_i = 0;
98          double prev_pred;
99          double absolute_error;
100          int last_error;
101          double min_win_sq_error = MAXDOUBLE;
102          double min_win_p_error = MAXDOUBLE;
103          int min_win_sq_i = 0;
104          int min_win_p_i = 0;
105          double min_tot_sq_error = MAXDOUBLE;
106          double min_tot_p_error = MAXDOUBLE;
107          int min_tot_sq_i = 0;
108          int min_tot_p_i = 0;
109          double win_error;
110          int error_window;
111          int datasize;
112
113          datasize = MODMINUS(head,tail,HISTORYSIZE) - 1;
114          if(ERR_WIN > datasize)
115                 error_window = datasize;
116          else
117                 error_window = ERR_WIN;
118
119          last_error = MODMINUS(head,error_window,HISTORYSIZE);
120
121
122
123         value = data[head].value;
124         prev = MODMINUS(head,1,HISTORYSIZE);
125 #ifdef GROT
126         /*
127          * was the value within one deviation of the total prediction?
128          */
129         avg_err = TPMAE(tp,TOT_MSE_PRED)/TPCOUNT(tp,TOT_MSE_PRED);
130         if((value > (PRED(TOT_MSE_PRED,data[prev])-avg_err)) &&
131            (value < (PRED(TOT_MSE_PRED,data[prev])+avg_err)))
132         {
133                 Onedev_mse++;
134         }
135         if((value > (PRED(TOT_MSE_PRED,data[prev])-2*avg_err)) &&
136            (value < (PRED(TOT_MSE_PRED,data[prev])+2*avg_err)))
137         {
138                 Twodev_mse++;
139         }
140
141         avg_err = TPMAE(tp,TOT_MAE_PRED)/TPCOUNT(tp,TOT_MAE_PRED);
142         if((value > (PRED(TOT_MAE_PRED,data[prev])-avg_err)) &&
143            (value < (PRED(TOT_MAE_PRED,data[prev])+avg_err)))
144         {
145                 Onedev_mpe++;
146         }
147         if((value > (PRED(TOT_MAE_PRED,data[prev])-2*avg_err)) &&
148            (value < (PRED(TOT_MAE_PRED,data[prev])+2*avg_err)))
149         {
150                 Twodev_mpe++;
151         }
152
153         ModalDev(Histogram,MedianPred,1.0,&hi,&lo);
154         if((value > lo) && (value < hi))
155                 Onedev_modal++;
156         ModalDev(Histogram,MedianPred,2.0,&hi,&lo);
157         if((value > lo) && (value < hi))
158                 Twodev_modal++;
159
160         Devcounts++;
161 #endif
162
163         /*
164          * calculate the error our last prediction made
165          * now that we have the actual value
166          */
167         for(i=0; i < PREDS; i++)
168         {
169
170                 error = data[head].value - PRED(i,data[prev]);
171                 if(TPCOUNT(tp,i) < MIN_DATA_HISTORY)
172                         error = 0.0;
173                 sq_error = error * error;
174
175                 SQERR(i,data[head]) = SQERR(i,data[prev])+sq_error;
176                 TPMSE(tp,i) += sq_error;
177                 if(error < 0.0)
178                         error = -1.0 * error;
179                 absolute_error = error;
180                 ERR(i,data[head]) = ERR(i,data[prev])+absolute_error;
181                 TPMAE(tp,i) += absolute_error;
182                 TPCOUNT(tp,i) += 1.0;
183
184         }
185
186 #ifdef GROT
187         /*
188          * now calculate MSE values over different lifetimes
189          */
190         if(datasize > LIFETIMES)
191         {
192                 curr = prev;
193                 for(i=0; i < LIFETIMES; i++)
194                 {
195                         error = data[head].value - 
196                                 PRED(TOT_MSE_PRED,data[curr]);
197                         Lifetime_mse[i] += (error * error); 
198                         error = data[head].value - 
199                                 PRED(TOT_MAE_PRED,data[curr]);
200                         if(error < 0.0)
201                                 error = -1.0 * error;
202                         Lifetime_mpe[i] += (error / data[head].value);
203                         curr = MODMINUS(curr,1,HISTORYSIZE);
204                 }
205                 Lifetime_count++;
206         }
207 #endif
208
209
210
211
212
213         /*
214          * use the current running mean
215          */
216         NAME(0) = "Last value";
217         PRED(0,data[head]) = value;
218
219
220         /*
221          * use the current value as a prediction of the next value
222          */
223         tot_sum = run_mean * total;
224         tot_sum += data[head].value;
225         total++;
226         run_mean = tot_sum/total;
227         tp->run_mean = run_mean;
228         tp->total = total;
229         NAME(1) = "Running Mean";
230         PRED(1,data[head]) = run_mean;
231
232         /*
233          * use the Van Jacobson method
234          */
235         prev_pred = PRED(2,data[prev]);
236         NAME(2) = "Van Jacobson";
237         PRED(2,data[head]) = prev_pred + GAIN*(value - prev_pred);
238
239         /*
240          * tell the median routines that the array is no longer valid
241          */
242         Median_sorted = 0;
243         NAME(3) = "Median";
244         PRED(3,data[head]) = Median(tp,MED_N);
245         NAME(4) = "Trimmed Median";
246         PRED(4,data[head]) = TrimmedMedian(tp,MED_N,MED_ALPHA);
247
248         /*
249          * Now use TrimmedMean to give a sliding window mean
250          */
251         NAME(5) = "Sliding Window Mean";
252         PRED(5,data[head]) = TrimmedMedian(tp,MED_N,0.0);
253         NAME(6) = "Adjusted Median";
254         AdjustedMedian(tp,6,WIN_MIN,WIN_MAX);
255         NAME(7) = "Adjusted Mean";
256         AdjustedMean(tp,7,WIN_MIN,WIN_MAX);
257
258         for(i=0; i < STATIC_PREDS; i++)
259         {
260                 if((TPMSE(tp,i)/TPCOUNT(tp,i)) < min_run_sq_error)
261                 {
262                         min_run_sq_error = TPMSE(tp,i)/TPCOUNT(tp,i);
263                         min_run_sq_i = i;
264                 }
265
266                 if((TPMAE(tp,i)/TPCOUNT(tp,i)) < min_run_p_error)
267                 {
268                         min_run_p_error = TPMAE(tp,i)/TPCOUNT(tp,i);
269                         min_run_p_i = i;
270                 }
271
272                 win_error = 
273                         (SQERR(i,data[head]) - SQERR(i,data[last_error])) /
274                                 (double)error_window;
275                 if(win_error < min_win_sq_error)
276                 {
277                         min_win_sq_error = win_error;
278                         min_win_sq_i = i;
279                 }
280
281                 win_error = 
282                         (ERR(i,data[head]) - ERR(i,data[last_error])) /
283                                 (double)error_window;
284                 if(win_error < min_win_p_error)
285                 {
286                         min_win_p_error = win_error;
287                         min_win_p_i = i;
288                 }
289
290         }
291
292
293         NAME(MSE_PRED) = "Min MSE Pred";
294         PRED(MSE_PRED,data[head]) = PRED(min_run_sq_i,data[head]);
295         TPPREDNUM(tp,MSE_PRED) = min_run_sq_i;
296         NAME(MAE_PRED) = "Min MAE Pred";
297         PRED(MAE_PRED,data[head]) = PRED(min_run_p_i,data[head]);
298         TPPREDNUM(tp,MAE_PRED) = min_run_p_i;
299         NAME(WIN_MSE_PRED) = "Win MSE Pred";
300         PRED(WIN_MSE_PRED,data[head]) = PRED(min_win_sq_i,data[head]);
301         TPPREDNUM(tp,WIN_MSE_PRED) = min_win_sq_i;
302         NAME(WIN_MAE_PRED) = "Win MAE Pred";
303         PRED(WIN_MAE_PRED,data[head]) = PRED(min_win_p_i,data[head]);
304         TPPREDNUM(tp,WIN_MAE_PRED) = min_win_p_i;
305
306         for(i=STATIC_PREDS; i < SECOND_ORDER_PREDS; i++)
307         {
308                 if((TPMSE(tp,i)/TPCOUNT(tp,i)) < min_tot_sq_error)
309                 {
310                         min_tot_sq_error = TPMSE(tp,i)/TPCOUNT(tp,i);
311                         min_tot_sq_i = TPPREDNUM(tp,i);
312                 }
313                 if((TPMAE(tp,i)/TPCOUNT(tp,i)) < min_tot_p_error)
314                 {
315                         min_tot_p_error = TPMAE(tp,i)/TPCOUNT(tp,i);
316                         min_tot_p_i = TPPREDNUM(tp,i);
317                 }
318         }
319         NAME(TOT_MSE_PRED) = "Total MSE Pred";
320         PRED(TOT_MSE_PRED,data[head]) = PRED(min_tot_sq_i,data[head]);
321         TPPREDNUM(tp,TOT_MSE_PRED) = min_tot_sq_i;
322         NAME(TOT_MAE_PRED) = "Total MAE Pred";
323         PRED(TOT_MAE_PRED,data[head]) = PRED(min_tot_p_i,data[head]);
324         TPPREDNUM(tp,TOT_MAE_PRED) = min_tot_p_i;
325
326 #ifdef PRINTPRED
327         ModalDev(Histogram,MedianPred,2.0,&hi,&lo);
328         fprintf(stdout,"%d %10.5f\n",data[head].time,
329                 PRED(TOT_MAE_PRED,data[prev]));
330         
331         fflush(stdout);
332 #endif
333
334         return;
335
336 }
337
338 void
339 InitThruputPrediction(struct thruput_pred *tp)
340 {
341         int i;
342
343         tp->head = 0;
344         tp->tail = 0;
345
346 /* ** NOT yet as we don't quite know what min and max values to use
347
348         Histogram = DefineHistogram(MODES,
349                         DIM,
350                         Histomin,
351                         Histomax);
352         MeanPred = DefineHistogramPred(Histogram);
353         MedianPred = DefineHistogramPred(Histogram);
354         MiddlePred = DefineHistogramPred(Histogram);
355 */
356
357
358         tp->mean = 0.0;
359         tp->run_mean = 0.0;
360         tp->total = 0.0;
361         memset(tp->data,0,sizeof(tp->data));
362
363         for(i=0; i < PREDS; i++)
364         {
365                 TPMSE(tp,i) = 0.0;
366                 TPCOUNT(tp,i) = 0.0;
367         }
368
369         return;
370 }
371
372 /* not sure if exptime is right, I just wanted to avoid namespace collisions */
373 void
374 UpdateThruputPrediction(struct thruput_pred *tp,float value, time_t exptime)
375 {
376
377         int head = tp->head;
378         int tail = tp->tail;
379         struct value_el *data = tp->data;
380         int i;
381         int data_size = MODMINUS(head,tail,HISTORYSIZE);
382
383         tp->data[tp->head].time = exptime;
384         tp->data[tp->head].value = value;
385
386         /*
387          * a negative tail value says that we haven't filled the
388          * pipe yet.  See if this does it.
389          */
390         if(data_size > MIN_DATA_HISTORY)
391         {
392                 ThruputPrediction(tp);
393
394         }
395         else
396         {
397                 for(i=0; i < PREDS; i++)
398                 {
399                         PRED(i,data[head]) = tp->data[head].value;
400                         TPMSE(tp,i) = 0.0;
401                         TPMAE(tp,i) = 0.0;
402                         ERR(i,data[head]) = 0.0;
403                         SQERR(i,data[head]) = 0.0;
404                         MedWIN(i,data[head]) = WIN_INIT;
405                         MeanWIN(i,data[head]) = WIN_INIT;
406                 }
407                 TPPREDNUM(tp,TOT_MSE_PRED) = 0;
408                 TPPREDNUM(tp,TOT_MAE_PRED) = 0;
409         }
410
411
412
413         tp->head = MODPLUS(head,1,HISTORYSIZE);
414
415         /*
416          * if we have incremented head forward to that it is sitting on
417          * top of tail, then we need to bump tail.  The entire buffer is
418          * being used to fill the window.
419          */
420         if(tp->head == tp->tail)
421                 tp->tail = MODPLUS(tp->tail,1,HISTORYSIZE);
422
423         return;
424 }
425
426 #ifdef STANDALONE
427
428 #define PRED_ARGS "f:d:s:l:h:"
429
430 #include "format.h"
431
432 CalculateThruputPredictors(fname,tp)
433 char *fname;
434 struct thruput_pred *tp;
435 {
436         FILE *fd;
437         char line_buf[255];
438         char *cp;
439         time_t ltime;
440         float value;
441         int i;
442
443
444         fd = fopen(fname,"r");
445
446         if(fd == NULL)
447         {
448                 fprintf(stderr,
449                         "CalculateThruputPredictors: unable to open file %s\n",
450                                 fname);
451                 fflush(stderr);
452                 exit(1);
453         }
454
455         while(fgets(line_buf,255,fd))
456         {
457                 cp = line_buf;
458
459                 while(isspace(*cp))
460                         cp++;
461
462                 for(i=1; i < TSFIELD; i++)
463                 {
464                         while(!isspace(*cp))
465                                 cp++;
466
467                         while(isspace(*cp))
468                                 cp++;
469                 }
470
471                 ltime = atoi(cp);
472
473                 cp = line_buf;
474
475                 for(i=1; i < VALFIELD; i++)
476                 {
477                         while(!isspace(*cp))
478                                 cp++;
479
480                         while(isspace(*cp))
481                                 cp++;
482                 }
483
484                 value = atof(cp);
485
486                 if(value != 0.0)
487                         UpdateThruputPrediction(tp,value,ltime);
488
489         }
490
491         fclose(fd);
492 }
493
494
495 struct thruput_pred Tp;
496
497 main(argc,argv)
498 int argc;
499 char *argv[];
500 {
501
502         FILE *fd;
503         int i;
504         int j;
505         double temp_mse;
506         double temp_mpe;
507         char fname[255];
508         char c;
509
510
511         if(argc < 2)
512         {
513                 fprintf(stderr,"usage: testpred filename\n");
514                 fflush(stderr);
515                 exit(1);
516         }
517
518         fname[0] = 0;
519         Histodim = DIM;
520         Histostates = STATES;
521         Histomax = HMAX;
522         Histomin = HMIN;
523
524         while((c = getopt(argc,argv,PRED_ARGS)) != EOF)
525         {
526                 switch(c)
527                 {
528                         case 'f':
529                                 SAFESTRCPY(fname,optarg);
530                                 break;
531                         case 'd':
532                                 Histodim = atoi(optarg);
533                                 break;
534                         case 's':
535                                 Histostates = atoi(optarg);
536                                 break;
537                         case 'l':
538                                 Histomin = atof(optarg);
539                                 break;
540                         case 'h':
541                                 Histomax = atof(optarg);
542                                 break;
543                 }
544         }
545
546         if(fname[0] == 0)
547         {
548                 fprintf(stderr,"usage: predictor -f fname [-d,s,l,h]\n");
549                 fflush(stderr);
550                 exit(1);
551         }
552
553         InitThruputPrediction(&Tp);
554                                 
555 #if !defined(PRINTPRED)
556         fprintf(stdout,"Calculating throughput predictors...");
557         fflush(stdout);
558 #endif
559
560         CalculateThruputPredictors(fname,&Tp);
561
562         fprintf(stdout,"done.\n");
563         fflush(stdout);
564
565         for(i=0; i < PREDS; i++)
566         {
567
568                 fprintf(stdout,"Predictor %d, MSE: %3.4f MAE: %3.4f, %s\n",
569                         i,TPMSE(&Tp,i)/TPCOUNT(&Tp,i),
570                                 TPMAE(&Tp,i)/TPCOUNT(&Tp,i),
571                                 NAME(i));
572         }
573
574         fprintf(stdout,"MSE within 1 dev: %3.2f, 2 dev: %3.2f\n",
575                         Onedev_mse/Devcounts,Twodev_mse/Devcounts);
576         fprintf(stdout,"MAE within 1 dev: %3.2f, 2 dev: %3.2f\n",
577                         Onedev_mpe/Devcounts,Twodev_mpe/Devcounts);
578         fprintf(stdout,"Modal within 1 dev: %3.2f, 2 dev: %3.2f\n",
579                         Onedev_modal/Devcounts,Twodev_modal/Devcounts);
580
581
582 #ifdef GROT
583         fprintf(stdout,"\nlook ahead  MSE  MAE\n");
584         for(i=0; i < LIFETIMES; i++)
585         {
586                 temp_mse = 0.0;
587                 temp_mpe = 0.0;
588                 for(j=0; j <= i; j++)
589                 {
590                         temp_mse += (Lifetime_mse[j]/(double)Lifetime_count);
591                         temp_mpe += (Lifetime_mpe[j]/(double)Lifetime_count);
592                 }
593                 fprintf(stdout,"%d %f %f\n",i,
594                                 temp_mse/(double)j,
595                                 temp_mpe/(double)j);
596         }
597 #endif
598
599         fprintf(stdout,"Histogram\n");
600         PrintHistogram(Histogram);
601         fprintf(stdout,"\n");
602
603         exit(0);
604
605         
606 }
607 #endif