From: Luka Stanisic Date: Thu, 20 Mar 2014 08:17:26 +0000 (+0100) Subject: Improving benchmarking code X-Git-Tag: v3_11~207^2~3 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/fc752c6b19d1d989dc1505fa7cc74721bac6bf26 Improving benchmarking code --- diff --git a/contrib/benchmarking_code_block/Rdhist.R b/contrib/benchmarking_code_block/Rdhist.R new file mode 100644 index 0000000000..76ff210833 --- /dev/null +++ b/contrib/benchmarking_code_block/Rdhist.R @@ -0,0 +1,313 @@ +#------------------------------------------------------------------------------- +# Copyright (c) 2012 University of Illinois, NCSA. +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the +# University of Illinois/NCSA Open Source License +# which accompanies this distribution, and is available at +# http://opensource.ncsa.illinois.edu/license.html +#------------------------------------------------------------------------------- +##--------------------------------------------------------------------------------------------------# +##' Variable-width (dagonally cut) histogram +##' +##' +##' When constructing a histogram, it is common to make all bars the same width. +##' One could also choose to make them all have the same area. +##' These two options have complementary strengths and weaknesses; the equal-width histogram oversmooths in regions of high density, and is poor at identifying sharp peaks; the equal-area histogram oversmooths in regions of low density, and so does not identify outliers. +##' We describe a compromise approach which avoids both of these defects. We regard the histogram as an exploratory device, rather than as an estimate of a density. +##' @name dhist +##' @title Diagonally Cut Histogram +##' @param x is a numeric vector (the data) +##' @param a is the scaling factor, default is 5 * IQR +##' @param nbins is the number of bins, default is assigned by the Stuges method +##' @param rx is the range used for the left of the left-most bin to the right of the right-most bin +##' @param eps used to set artificial bound on min width / max height of bins as described in Denby and Mallows (2009) on page 24. +##' @param xlab is label for the x axis +##' @param plot = TRUE produces the plot, FALSE returns the heights, breaks and counts +##' @param lab.spikes = TRUE labels the \% of data in the spikes +##' @return list with two elements, heights of length n and breaks of length n+1 indicating the heights and break points of the histogram bars. +##' @author Lorraine Denby, Colin Mallows +##' @references Lorraine Denby, Colin Mallows. Journal of Computational and Graphical Statistics. March 1, 2009, 18(1): 21-31. doi:10.1198/jcgs.2009.0002. +dhist <- function(x, a=5*iqr(x), + nbins=nclass.Sturges(x), rx = range(x,na.rm = TRUE), + eps=.15, xlab = "x", plot = TRUE,lab.spikes = TRUE) +{ + + if(is.character(nbins)) + nbins <- switch(casefold(nbins), + sturges = nclass.Sturges(x), + fd = nclass.FD(x), + scott = nclass.scott(x), + stop("Nclass method not recognized")) + else if(is.function(nbins)) + nbins <- nbins(x) + + x <- sort(x[!is.na(x)]) + if(a == 0) + a <- diff(range(x))/100000000 + if(a != 0 & a != Inf) { + n <- length(x) + h <- (rx[2] + a - rx[1])/nbins + ybr <- rx[1] + h * (0:nbins) + yupper <- x + (a * (1:n))/n + # upper and lower corners in the ecdf + ylower <- yupper - a/n + # + cmtx <- cbind(cut(yupper, breaks = ybr), cut(yupper, breaks = + ybr, left.include = TRUE), cut(ylower, breaks = ybr), + cut(ylower, breaks = ybr, left.include = TRUE)) + cmtx[1, 3] <- cmtx[1, 4] <- 1 + # to replace NAs when default r is used + cmtx[n, 1] <- cmtx[n, 2] <- nbins + # + #checksum <- apply(cmtx, 1, sum) %% 4 + checksum <- (cmtx[, 1] + cmtx[, 2] + cmtx[, 3] + cmtx[, 4]) %% + 4 + # will be 2 for obs. that straddle two bins + straddlers <- (1:n)[checksum == 2] + # to allow for zero counts + if(length(straddlers) > 0) { + counts <- table(c(1:nbins, cmtx[ - straddlers, 1])) + } else { + counts <- table(c(1:nbins, cmtx[, 1])) + } + counts <- counts - 1 + # + if(length(straddlers) > 0) { + for(i in straddlers) { + binno <- cmtx[i, 1] + theta <- ((yupper[i] - ybr[binno]) * n)/a + counts[binno - 1] <- counts[binno - 1] + ( + 1 - theta) + counts[binno] <- counts[binno] + theta + } + } + xbr <- ybr + xbr[-1] <- ybr[-1] - (a * cumsum(counts))/n + spike<-eps*diff(rx)/nbins + flag.vec<-c(diff(xbr)1) { + xbr.new<-xbr + counts.new<-counts + diff.xbr<-abs(diff(xbr)) + amt.spike<-diff.xbr[length(diff.xbr)] + for (i in rev(2:length(diff.xbr))) { + if (diff.xbr[i-1] <= spike&diff.xbr[i] <= spike & + !is.na(diff.xbr[i])) { + amt.spike <- amt.spike+diff.xbr[i-1] + counts.new[i-1] <- counts.new[i-1]+counts.new[i] + xbr.new[i] <- NA + counts.new[i] <- NA + flag.vec[i-1] <- T + } + else amt.spike<-diff.xbr[i-1] + } + flag.vec<-flag.vec[!is.na(xbr.new)] + flag.vec<-flag.vec[-length(flag.vec)] + counts<-counts.new[!is.na(counts.new)] + xbr<-xbr.new[!is.na(xbr.new)] + + } + else flag.vec<-flag.vec[-length(flag.vec)] + widths <- abs(diff(xbr)) + ## N.B. argument "widths" in barplot must be xbr + heights <- counts/widths + } + bin.size <- length(x)/nbins + cut.pt <- unique(c(min(x) - abs(min(x))/1000, + approx(seq(length(x)), x, (1:(nbins - 1)) * bin.size, rule = 2)$y, max(x))) + aa <- hist(x, breaks = cut.pt, plot = FALSE, probability = TRUE) + if(a == Inf) { + heights <- aa$counts + xbr <- aa$breaks + } + amt.height<-3 + q75<-quantile(heights,.75) + if (sum(flag.vec)!=0) { + amt<-max(heights[!flag.vec]) + ylim.height<-amt*amt.height + ind.h<-flag.vec&heights> ylim.height + flag.vec[heights=1) { + usr<-par('usr') + for ( i in seq(length(xbr)-1)) { + if (!flag.vec[i]) { + amt.txt<-0 + if (xbr[i]-xbr[1]1) + for(i in 1:(length(h$counts)-1)) + { + if(h$counts[i]!=0 || h$counts[i+1]!=0) + { + counts2[j]<-h$counts[i] + breaks2[j+1]<-h$breaks[i+1]; + j<-j+1 + } + } + counts2[j]<-h$counts[length(h$counts)] + breaks2[j+1]<-h$breaks[length(h$breaks)] + + h$counts<-counts2 + h$breaks<-breaks2 + + return (h) +} + +# Main +source("analysis/hist_script/Rdhist.R") + inputfile<-Sys.getenv("R_INPUT") outputfile<-Sys.getenv("R_OUTPUT") +type<-Sys.getenv("R_TYPE") + +if (!(type %in% c("mean","default","sturges","scott"))) + { + stop("Wrong histogram type") + } df<-read.table(inputfile,header=F) df<-df[,c(1,4)] @@ -21,17 +61,45 @@ attach(df) for(i in unique(NAME)) { vector1<-df[NAME==i,2] - h<-hist(vector1) - + + if (length(vector1)==1) + { + #If there is only one element + h$breaks<-c(vector1,vector1) + h$counts<-1 + } + else + { + if (type=="mean") + #Mean value only + h$breaks<-c(mean(vector1),mean(vector1)) + h$counts<-length(vector1) + else + if (type=="default") + #Standard HISTOGRAM: + h<-hist(vector1) + else + { + #Dhist: + h<-dhist(vector1,nbins=type, plot = FALSE, lab.spikes = FALSE, a=5*iqr(vector1), eps=0.15) + h$breaks<-h$xbr + h$count<-as.vector(h$counts) + h$counts<-h$count + h<-merge_empty_bins(h) + } + } + cat(i, file = outputfile, sep = "\t", append = TRUE) - cat(" ", file = outputfile, sep = "\t", append = TRUE) - cat(sprintf("%.8f", mean(vector1)), file =outputfile, sep = "\t ", append = TRUE) + cat("\t", file = outputfile, append = TRUE) + cat(sum(h$counts), file =outputfile, sep = "\t", append = TRUE) + cat("\t", file = outputfile, append = TRUE) + cat(sprintf("%.8f", mean(vector1)), file =outputfile, sep = "\t", append = TRUE) cat("\t", file = outputfile, append = TRUE) cat(length(h$breaks), file = outputfile, append = TRUE) cat("\t", file = outputfile, append = TRUE) - cat(sprintf("%.8f", h$breaks), file = outputfile, sep = " \t", append = TRUE) + cat(sprintf("%.8f", h$breaks), file = outputfile, sep = "\t", append = TRUE) cat("\t", file = outputfile, append = TRUE) h$density = h$counts/sum(h$counts) - cat(sprintf("%.14f", h$density), file = outputfile, sep = " \t", append = TRUE) + cat(sprintf("%.8f", h$density), file = outputfile, sep = "\t", append = TRUE) cat("\n", file = outputfile, append = TRUE) } diff --git a/contrib/benchmarking_code_block/Rplot_hist.R b/contrib/benchmarking_code_block/Rplot_hist.R new file mode 100644 index 0000000000..1f7487addf --- /dev/null +++ b/contrib/benchmarking_code_block/Rplot_hist.R @@ -0,0 +1,72 @@ +# R script showing .pdf file with plots of all injection histograms for a certain file + +# Can be called from the command line with: +# Rscript $this_script.R inputfile + +# Necessary libraries +library(plyr) +library(ggplot2) +library(data.table) +library(grid) + +# Functions for arranging multiple plots +vp.layout <- function(x, y) viewport(layout.pos.row=x, layout.pos.col=y) +arrange_ggplot2 <- function(list, nrow=NULL, ncol=NULL, as.table=FALSE) { +n <- length(list) +if(is.null(nrow) & is.null(ncol)) { nrow = floor(n/2) ; ncol = ceiling(n/nrow)} +if(is.null(nrow)) { nrow = ceiling(n/ncol)} +if(is.null(ncol)) { ncol = ceiling(n/nrow)} +## NOTE see n2mfrow in grDevices for possible alternative +grid.newpage() +pushViewport(viewport(layout=grid.layout(nrow,ncol) ) ) +ii.p <- 1 +for(ii.row in seq(1, nrow)){ +ii.table.row <- ii.row +if(as.table) {ii.table.row <- nrow - ii.table.row + 1} +for(ii.col in seq(1, ncol)){ +ii.table <- ii.p +if(ii.p > n) break +print(list[[ii.table]], vp=vp.layout(ii.table.row, ii.col)) +ii.p <- ii.p + 1 +} +} +} + +### Main + +# Reading command line argument with the input file path +args <- commandArgs(trailingOnly = TRUE) +fp <- file(args[1], open = "r") + +plots<-list() +i<-1 + +# Reading histograms one by one, line by line +while (length(oneLine <- readLines(fp, n = 1, warn = FALSE)) > 0) +{ myVector <- (strsplit(oneLine, "\t")) + + dfl <- ldply (myVector, data.frame) + + name<-as.character(dfl[1,]) + nbins<-as.numeric(as.character(dfl[4,])) + allbreaks<-as.numeric(as.character(dfl[5:(5+nbins-1),])) + + dh<-data.frame(Name=as.character(dfl[1,]), Total=as.numeric(as.character(dfl[2,])), Mean=as.numeric(as.character(dfl[3,])), Nbins=as.numeric(as.character(dfl[4,]))) + dh<-cbind(dh,Bstart=allbreaks[-length(allbreaks)]) + dh<-cbind(dh,Bend=allbreaks[-1]) + dh<-cbind(dh,Density=as.numeric(as.character(dfl[(5+nbins):(5+nbins*2-2),]))) + + # Plotting single histogram, if it only has one value then use geom_bar + if (nbins > 2) + plots[[i]]<-ggplot(data=data.frame(dh), aes(xmin=Bstart, xmax=Bend, ymin=0, ymax=Density)) + geom_rect(aes(fill=Density)) + theme_bw() + scale_x_continuous("Time [s]", allbreaks) + labs(title=name, y=element_text("Density %")) + else + plots[[i]]<-ggplot(data=data.frame(dh), aes(factor(Bstart))) + geom_bar(aes(fill=Density)) + theme_bw() + labs(title=name, y=element_text("Density %"), x=element_text("Time [s]")) + + i<-i+1 +} + +# Printing all plots together in a table +arrange_ggplot2(plots, as.table=TRUE) + +# End +write("Done producing a histogram plot. Open Rplots.pdf located in this folder to see the results", stdout()) diff --git a/contrib/benchmarking_code_block/inject.h b/contrib/benchmarking_code_block/inject.h index f4d9fa2b1f..57e2c8291b 100644 --- a/contrib/benchmarking_code_block/inject.h +++ b/contrib/benchmarking_code_block/inject.h @@ -24,13 +24,14 @@ #include "xbt/dict.h" #include "xbt/sysdep.h" -#define MAX_LINE_INJ 1000 +#define MAX_LINE_INJ 10000 /* * Histogram entry for each measured block * Each entry is guarded inside xbt dictionary which is read from the file */ typedef struct xbt_hist { int n; + int counts; double mean; double *breaks; double *percentage; @@ -68,7 +69,7 @@ static inline void xbt_inject_init(char *inputfile) printf("Error input file is empty!");//Skipping first row while (fgets(line, 200, fpInput) != NULL) { - key = strtok(line, " \t"); + key = strtok(line, "\t"); data = xbt_dict_get_or_null(mydict, key); if (data) @@ -77,15 +78,16 @@ static inline void xbt_inject_init(char *inputfile) data = (xbt_hist_t *) xbt_new(xbt_hist_t, 1); data->block_id = key; - data->mean = atof(strtok(NULL, " \t")); - data->n = atoi(strtok(NULL, " \t")); + data->counts = atoi(strtok(NULL, "\t")); + data->mean = atof(strtok(NULL, "\t")); + data->n = atoi(strtok(NULL, "\t")); data->breaks = (double*) malloc(sizeof(double) * data->n); data->percentage = (double*) malloc(sizeof(double) * (data->n - 1)); for (i = 0; i < data->n; i++) - data->breaks[i] = atof(strtok(NULL, " \t")); + data->breaks[i] = atof(strtok(NULL, "\t")); for (i = 0; i < (data->n - 1); i++) - data->percentage[i] = atof(strtok(NULL, " \t")); + data->percentage[i] = atof(strtok(NULL, "\t")); xbt_dict_set(mydict, key, data, NULL); } @@ -103,7 +105,10 @@ static inline void inject_init_starpu(char *inputfile, xbt_dict_t *dict, RngStre mydict = *dict; FILE* fpInput = fopen(inputfile, "r"); if (fpInput == NULL) + { printf("Error while opening the inputfile"); + return; + } fseek(fpInput, 0, 0); @@ -113,12 +118,15 @@ static inline void inject_init_starpu(char *inputfile, xbt_dict_t *dict, RngStre xbt_hist_t* data; if (fgets(line, MAX_LINE_INJ, fpInput) == NULL) + { printf("Error input file is empty!");//Skipping first row + return; + } while (fgets(line, MAX_LINE_INJ, fpInput) != NULL) { - key = strtok(line, " \t"); + key = strtok(line, "\t"); data = xbt_dict_get_or_null(mydict, key); if (data) @@ -126,16 +134,17 @@ static inline void inject_init_starpu(char *inputfile, xbt_dict_t *dict, RngStre data = (xbt_hist_t *) xbt_new(xbt_hist_t, 1); data->block_id = key; - data->mean = atof(strtok(NULL, " \t")); - data->n = atoi(strtok(NULL, " \t")); + data->counts = atoi(strtok(NULL, "\t")); + data->mean = atof(strtok(NULL, "\t")); + data->n = atoi(strtok(NULL, "\t")); data->breaks = (double*) malloc(sizeof(double) * data->n); data->percentage = (double*) malloc(sizeof(double) * (data->n - 1)); for (i = 0; i < data->n; i++) - data->breaks[i] = atof(strtok(NULL, " \t")); + data->breaks[i] = atof(strtok(NULL, "\t")); for (i = 0; i < (data->n - 1); i++) { - data->percentage[i] = atof(strtok(NULL, " \t")); + data->percentage[i] = atof(strtok(NULL, "\t")); } xbt_dict_set(mydict, key, data, NULL);