Scripts:mkhist.py
From UC_Chemistry
(Difference between revisions)
Current revision
This program is self-documenting, and basically a wrapper for pylab's hist() function. It pops-up a plot of the histogram which can then be saved to various image file formats.
[edit]
Source
<nowiki>
#!/usr/bin/python
import sys
from matplotlib.pylab import *
from math import sqrt, exp, pi
UsageInfo = "Usage: mkhist.py [options] <input data file> <number of bins>\n" \ "Options:\n" \
"\t-n [data column] Column of input file containing data points.\n"\
"\t-t [title] Title your graph.\n"\
"\t-x [xlabel] X-Axis label.\n"\
"\t-y [ylabel] Y-Axis label.\n"\
"\t-b [time] Discard data points with column 0 below this value.\n"\
"\t-e [time] Discard data points with column 0 above this value.\n"\
"\t-L [-inf inf) Wrap input data to specified interval.\n"\
"\t-scale [1.0] Scale input data.\n"\
"\t-shift [0.0] Shift input data.\n"\
"\t-osd [dev] Discard points > \"dev\" std. dev.s from mean.\n"\
"\t-omax [max] Discard points > \"max\".\n"\
"\t-omin [max] Discard points < \"min\".\n"\
"\t-orec Recalculate avg. and dev. after above discards.\n"\
"\t-out <filename> Output data after transformations.\n"\
"\t-norm [spec/off] Display normal distribution corresponding to data\n"\
"\t with graph specification \"spec\" -- default g--.\n"\ "\t-h,--help Print out this useful reference.\n\n"\
"Comments:\n" \
"\tData are read from an input file in matrix format. Column 0 is assumed\n"\
" to contain the consecutive time index for -b. Input data is read from\n"\
" the file and -b is applied. Next -scale and -shift are applied in that\n"\" order. Finally the average and deviation are calculated and outliers\n"\
" are discarded (if -osd, -omax, or -omin are selected).\n"
def main(argv):
xatitle = "X"
yatitle = "N"
col = 1
start = "first"
end = "last"
ptitle = "Histogram Plot"
outrecalc = 0
norm = "g--"
scale = 1.0
shift = 0.0
osd = "no"
omax = "no"
omin = "no"
wrap = "no"
outdat = -1
while(len(argv) > 3):
if(argv[1] == "-x"):
xatitle = argv[2]
argv = argv[2:]
elif(argv[1] == "-y"):
yatitle = argv[2]
argv = argv[2:]
elif(argv[1] == "-t"):
ptitle = argv[2]
argv = argv[2:]
elif(argv[1] == "-b"):
start = float(argv[2])
argv = argv[2:]
elif(argv[1] == "-e"):
end = float(argv[2])
argv = argv[2:]
elif(argv[1] == "-n"):
col = int(argv[2])
argv = argv[2:]
elif(argv[1] == "-out"):
if(outdat != -1):
print "Warning! out defined twice -- using last def."
outdat = argv[2]
argv = argv[2:]
elif(argv[1] == "-L"):
wrap = (float(argv[2]), float(argv[3]))
argv = argv[3:]
elif(argv[1] == "-scale"):
scale = float(argv[2])
argv = argv[2:]
elif(argv[1] == "-shift"):
shift = float(argv[2])
argv = argv[2:]
elif(argv[1] == "-osd"): # outlier definition by x devs
if(osd != "no"):
print "Warning! osd defined twice -- using last def."
osd = float(argv[2])
argv = argv[2:]
elif(argv[1] == "-omax"): # outlier definition by max
if(omax != "no"):
print "Warning! omax defined twice -- using last def."
omax = float(argv[2])
argv = argv[2:]
elif(argv[1] == "-omin"): # outlier definition by min
if(omin != "no"):
print "Warning! omin defined twice -- using last def."
omin = float(argv[2])
argv = argv[2:]
elif(argv[1] == "-norm"):
if(argv[2].lower() in ["off", "0", "no"]):
norm = "no"
else:
norm = argv[2]
argv = argv[2:]
# 0-parameter options (flags)
elif(argv[1] == "-orec"): # re-calculate avg and dev.
outrecalc = 1
argv = argv[1:]
elif(argv[1] in ["-h","-help","--help"]):
print UsageInfo
return 0
argv = argv[1:]
else:
break
if(len(argv) < 3):
print UsageInfo
return 1
#list = readdata(argv[1])[:,col]
D = readdata(argv[1])
#print D
if(start != "first" or end != "last"): # by assumption, 0 is the time
time = D[:,0]
if(start == "first"):
start = time[0]
if(end == "last"):
end = time[-1]
if end < start:
raise RuntimeError, "Error! start and end time are bogus!"
if(time[-1] < start or time[0] > end):
print "No valid time points in data, using all!"
i = 0
j = -1
else:
for i in range(len(time)): # search for start time
if(time[i] >= start):
break
for j in range(len(time)-1, i-1, -1):
if(time[j] <= end): # search backward for end time
break
j += 1
time = time[i:j]
else:
i = 0
j = -1
if(j != -1):
list = D[i:j,col]*scale+shift
else:
list = D[i:,col]*scale+shift
del D
if wrap != "no":
print "Wrapping data to range [ %f, %f )."%(wrap[0], wrap[1])
L = float(wrap[1]-wrap[0])
list -= L*floor((list-wrap[0])/L)
#print time
print "Using %d samples"%(len(list))
mu = avg(list)
sigma = dev(list, mu)
#list = mu + sigma*randn(10000) # ideal random normal data
if(osd != "no"):
list = strip_outliers(list, mu-osd*sigma, mu+osd*sigma)
elif(omin != "no"):
if(omax != "no"):
list = strip_outliers(list, omin, omax)
else:
omax = max(list)*1.000001
list = strip_outliers(list, omin, omax)
elif(omax != "no"):
omin = min(list)*0.99999
list = strip_outliers(list, omin, omax)
list = array(list)
if outrecalc:
print "Using %d samples for re-calculation."%(len(list))
mu = avg(list)
sigma = dev(list, mu)
n, bins, patches = hist(list, int(argv[2]), normed=0)
width = bins[1] - bins[0]
print "n = " + str(n)
print "start\t width"
print "%8f %8f"%(bins[0], width)
print "avg.\t var.\t sigma."
print "%8f %8f %8f"%(mu, sigma**2, sigma)
if(outdat != -1):
write_list(outdat, list)
if norm != "no": # add a 'best fit' line
x = arange(bins[0], bins[-1]+width, 1.0e-3*(bins[-1]-bins[0]))
y = normal(x, mu, sigma)*len(list)*width
l = plot(x, y, norm, linewidth=2)
xlabel(xatitle)
ylabel(yatitle)
title(ptitle)
#axis([-20.,-5.,0,350])
show()
def strip_outliers(list, minimum, maximum):
out = []
canh = 0
canl = 0
for x in list:
if(x <= maximum):
if(x >= minimum):
out.append(x)
else:
canl += 1
else:
canh += 1
print "Removed %d outliers >%e and %d <%e from list.\n"%(\
canh, maximum, canl, minimum)
return out
def readdata(name):
digits = "01234567890.-+"
file = open(name)
list = []
cols = -1
i = 0
for line in file.xreadlines():
j = 0
for tok in line.split():
if(tok[0] not in digits):
break
list.append(float(tok))
j += 1
if(j > 0):
if(cols == -1):
cols = j
elif(j != cols):
raise ValueError, "Error, line %d contains wrong " \
"number of columns"%(i+1)
i += 1
if(cols == -1):
raise ValueError, "No data!"
file.close()
return reshape(array(list), (i,cols))
def avg(list):
return sum(list)/len(list)
def dev(list, avg):
return sqrt( sum( (list-avg)**2 )/len(list) )
def normal(x, mu, sigma):
s = 2.0*(sigma**2)
norms = x.copy()
for i in range(len(norms)):
norms[i] = exp(-(norms[i]-mu)**2 / s)
return norms/(sigma*sqrt(2.0*pi))
def write_list(name, list):
out = open(name, 'w')
for x in list:
out.write("%12e\n"%(x))
out.close()
if __name__ == "__main__":
main(sys.argv)
</nowiki>
