7 if len(sys.argv) != 2 and len(sys.argv) != 4:
8 print("Usage : {} datafile".format(sys.argv[0]))
9 print("or : {} datafile p1 p2".format(sys.argv[0]))
10 print("where : p1 < p2 belongs to sizes in datafiles")
13 if len(sys.argv) == 4:
17 ##-----------------------------------------
18 ## avg : return average of a list of values
19 ## param l list of values
20 ##-----------------------------------------
27 ##-------------------------------------------------
29 ## param X first data vector (..x_i..)
30 ## param Y second data vector (..y_i..)
31 ## = 1/n \Sum_{i=1}^n (x_i - avg(x)) * (y_i - avg(y))
32 ##--------------------------------------------------
35 assert(len(X)==len(Y))
36 n=len(X) # n=len(X)=len(Y)
41 S_XY = S_XY + ((X[i]-avg_X)*(Y[i]-avg_Y))
46 ##----------------------------------
47 ## variance : variance
48 ## param X data vector ( ..x_i.. )
49 ## (S_X)^2 = (Sum ( x_i - avg(x) )^2 ) / n
50 ##----------------------------------
56 S_X2 = S_X2 + ((X[i] - avg_X)**2)
60 ##-----------------------------------------------------------------------------------------------
61 ## correl_split_weighted : compute regression on each segment and
62 ## return the weigthed sum of correlation coefficients
63 ## param X first data vector (..x_i..)
64 ## param Y second data vector (..x_i..)
65 ## param segments list of pairs (i,j) where i refers to the ith value in X, and jth value in X
66 ## return (C,[(i1,j1,X[i1],X[j1]), (i2,j2,X[i2],X[j2]), ....]
67 ## where i1,j1 is the first segment, c1 the correlation coef on this segment, n1 the number of values
68 ## i2,j2 is the second segment, c2 the correlation coef on this segment, n2 the number of values
70 ## and C=c1/n1+c2/n2+...
71 ##-----------------------------------------------------------------------------------------------
72 def correl_split_weighted( X , Y , segments ):
73 # expects segments = [(0,i1-1),(i1-1,i2-1),(i2,len-1)]
75 interv = list(); # regr. line coeffs and range
78 for (start,stop) in segments:
79 sum_nb_val = sum_nb_val + stop - start;
82 S_XY= cov( X [start:stop+1], Y [start:stop+1] )
83 S_X2 = variance( X [start:stop+1] )
84 S_Y2 = variance( Y [start:stop+1] ) # to compute correlation
87 c = S_XY/(sqrt(S_X2)*sqrt(S_Y2))
88 a = S_XY/S_X2 # regr line coeffs
89 b= avg ( Y[start:stop+1] ) - a * avg( X[start:stop+1] )
90 print(" range [%d,%d] corr=%f, coeff det=%f [a=%f, b=%f]" % (X[start],X[stop],c,c**2,a, b))
91 correl.append( (c, stop-start) ); # store correl. coef + number of values (segment length)
92 interv.append( (a,b, X[start],X[stop]) );
95 glob_corr = glob_corr + (l/sum_nb_val)*c # weighted product of correlation
96 print('-- %f * %f' % (c,l/sum_nb_val))
98 print("-> glob_corr={}\n".format(glob_corr))
99 return (glob_corr,interv);
104 ##-----------------------------------------------------------------------------------------------
105 ## correl_split : compute regression on each segment and
106 ## return the product of correlation coefficient
107 ## param X first data vector (..x_i..)
108 ## param Y second data vector (..x_i..)
109 ## param segments list of pairs (i,j) where i refers to the ith value in X, and jth value in X
110 ## return (C,[(i1,j1,X[i1],X[j1]), (i2,j2,X[i2],X[j2]), ....]
111 ## where i1,j1 is the first segment, c1 the correlation coef on this segment,
112 ## i2,j2 is the second segment, c2 the correlation coef on this segment,
115 ##-----------------------------------------------------------------------------------------------
116 def correl_split( X , Y , segments ):
117 # expects segments = [(0,i1-1),(i1-1,i2-1),(i2,len-1)]
119 interv = list(); # regr. line coeffs and range
121 for (start,stop) in segments:
124 S_XY= cov( X [start:stop+1], Y [start:stop+1] )
125 S_X2 = variance( X [start:stop+1] )
126 S_Y2 = variance( Y [start:stop+1] ) # to compute correlation
129 c = S_XY/(sqrt(S_X2)*sqrt(S_Y2))
130 a = S_XY/S_X2 # regr line coeffs
131 b= avg ( Y[start:stop+1] ) - a * avg( X[start:stop+1] )
132 print(" range [%d,%d] corr=%f, coeff det=%f [a=%f, b=%f]" % (X[start],X[stop],c,c**2,a, b))
133 correl.append( (c, stop-start) ); # store correl. coef + number of values (segment length)
134 interv.append( (a,b, X[start],X[stop]) );
137 glob_corr = glob_corr * c # product of correlation coeffs
138 print("-> glob_corr=%f\n" % glob_corr)
139 return (glob_corr,interv);
143 ##-----------------------------------------------------------------------------------------------
145 ##-----------------------------------------------------------------------------------------------
148 skampidat = open(sys.argv[1], "r")
151 ## read data from skampi logs.
155 for line in skampidat:
157 if line[0] != '#' and len(l) >= 3: # is it a comment ?
160 #count= 8388608 8388608 144916.1 7.6 32 144916.1 143262.0
161 #("%s %d %d %f %f %d %f %f\n" % (countlbl, count, countn, time, stddev, iter, mini, maxi)
162 readdata.append( (int(l[1]),float(l[3])) );
165 ## These may not be sorted so sort it by message size before processing.
166 sorteddata = sorted( readdata, key=lambda pair: pair[0])
167 sizes,timings = zip(*sorteddata);
170 ##----------------------- search for best break points-----------------
172 ## p1=2048 -> p1inx=11 delta=3 -> [8;14]
173 ## 8 : segments[(0,7),(8,13),(13,..)]
175 ## p2=65536 -> p2inx=16 delta=3 -> [13;19]
177 if len(sys.argv) == 4:
179 p1inx = sizes.index( p1 );
180 p2inx = sizes.index( p2 );
185 ## tweak parameters here to extend/reduce search
186 search_p1 = 30 # number of values to search +/- around p1
187 search_p2 = 65 # number of values to search +/- around p2
190 lb1 = max(1, p1inx-search_p1)
191 ub1 = min(p1inx+search_p1,search_p1, p2inx);
192 lb2 = max(p1inx,p2inx-search_p2) # breakpoint +/- delta
193 ub2 = min(p2inx+search_p2,len(sizes)-1);
195 print("** evaluating over \n");
196 print("interv1:\t %d <--- %d ---> %d" % (sizes[lb1],p1,sizes[ub1]))
197 print("rank: \t (%d)<---(%d)--->(%d)\n" % (lb1,p1inx,ub1))
198 print("interv2:\t\t %d <--- %d ---> %d" % (sizes[lb2],p2,sizes[ub2]))
199 print("rank: \t\t(%d)<---(%d)--->(%d)\n" % (lb2,p2inx,ub2))
200 for i in range(lb1,ub1+1):
201 for j in range(lb2,ub2+1):
202 if i<j: # segments must not overlap
203 if i+1 >=min_seg_size and j-i+1 >= min_seg_size and len(sizes)-1-j >= min_seg_size : # not too small segments
204 print("** i=%d,j=%d" % (i,j))
205 segments = [(0,i),(i,j),(j,len(sizes)-1)]
206 (glob_cor, interv) = correl_split( sizes, timings, segments)
207 if ( glob_cor > max_glob_corr):
208 max_glob_corr = glob_cor
211 print("#-------------------- result summary ---------------------------------------------------------------------\n");
212 for (a,b,i,j) in max_interv:
213 print("** OPT: [%d .. %d] correl coef prod=%f slope: %f x + %f" % (i,j,max_glob_corr,a,b))
215 print("#-------------------- cut here the gnuplot code -----------------------------------------------------------\n");
216 preamble='set output "regr.eps"\n\
217 set terminal postscript eps color\n\
219 set xlabel "Each message size in bytes"\n\
220 set ylabel "Time in us"\n\
226 print('plot "%s" u 3:4:($5) with errorbars title "skampi traces %s",\\' % (sys.argv[1],sys.argv[1]));
227 for (a,b,i,j) in max_interv:
228 print('"%s" u (%d<=$3 && $3<=%d? $3:0/0):(%f*($3)+%f) w linespoints title "regress. %s-%s bytes",\\' % (sys.argv[1],i,j,a,b,i,j))
230 print("#-------------------- /cut here the gnuplot code -----------------------------------------------------------\n");
234 print('\n** Linear regression on %d values **\n' % (nblines))
235 print('\n sizes=',sizes,'\n\n')
236 avg_sizes = avg( sizes )
237 avg_timings = avg( timings )
238 print("avg_timings=%f, avg_sizes=%f, nblines=%d\n" % (avg_timings,avg_sizes,nblines))
240 S_XY= cov( sizes, timings )
241 S_X2 = variance( sizes )
242 S_Y2 = variance( timings ) # to compute correlation
245 correl = S_XY/(sqrt(S_X2)*sqrt(S_Y2)) # corealation coeff (Bravais-Pearson)
248 b= avg_timings - a * avg_sizes
249 print("[S_XY=%f, S_X2=%f]\n[correlation=%f, coeff det=%f]\n[a=%f, b=%f]\n" % (S_XY, S_X2, correl,correl**2,a, b))