Scripts:calc du.py
From UC_Chemistry
Notes:
- Make sure kT is correct for your system.
- Reading the rmin data multiplies by 10 to convert Gromacs nm to Angstrom units.
- h is in Angstroms and is arbitrary, but should be consistent with other plots.
- gnuplot line for output data:
u 1:3:($4/sqrt($2-1))
Output Datafile Columns:
- lambda (Angstroms)
- Effective number of samples (N)
- <DU|lambda> (input energy units -- CHECK WITH kT!)
- <variance DU | lambda> (input energy units ^2)
- simulation number (starting at zero) from which best bound was taken
- best upper / lower bound, depending on direction (input energy units)
- Standard deviation (estimation error) of best bound.
[edit]
Source
#!/usr/bin/python
import sys
from ucgrad import *
kT = Boltzmann*298.15/4.184
step = 0.01
def main(argv):
if len(argv) < 4:
print "Usage: %s <+/-> <M en> <rmin> <enlist> [-d diff. file (subtracted from last enlist)]"%(argv[0])
return 1
if argv[1] == '-': # upper bound
a = 1.0
arg = argmin
elif argv[1] == '+': # lower bound
a = -1.0
arg = argmax
else:
raise runtimeError, "No sign selected."
del argv[1]
NS = 0
m = []
rmin = []
du = []
mcol = -1
rcol = -1
ucol = -1
while len(argv) > 1:
if argv[1] == '-d':
if len(du) < 1:
raise runtimeError, "No complete input sets found!"
du[-1] -= read_matrix(argv[2])[:,ucol]
del argv[1:3]
continue
elif argv[1] == '-m':
mcol = int(argv[2])
del argv[1:3]
continue
elif argv[1] == '-r':
rcol = int(argv[2])
del argv[1:3]
continue
elif argv[1] == '-u':
ucol = int(argv[2])
del argv[1:3]
continue
# Append m, rmin (converted to Ang.) , u
m.append(read_matrix(argv[1])[:,mcol])
rmin.append(read_matrix(argv[2])[:,rcol]*10.0)
#rmin.append(read_matrix(argv[2])[:,rcol])
du.append(read_matrix(argv[3])[:,ucol])
NS += 1
del argv[1:4]
print >>sys.stderr, "%d input sets read."%NS
# Sort samples according to rmin.
wt = []
for i in range(NS):
lt = zip(rmin[i].copy(),m[i].copy(),du[i].copy()) # list of tuples
lt.sort()
for j,l in enumerate(lt): # re-order original data.
rmin[i][j] = l[0]
m[i][j] = l[1]
du[i][j] = l[2]
wt.append(exp(m[i]/kT))
#M = zeros((len(wt[-1]), 4))
#M[:,0] = rmin[i]
#M[:,1] = m[i]
#M[:,2] = du[i]
#M[:,3] = wt[i]
#write_matrix("dat.%d"%i, M)
# Create list of minimum observed rmin-s to act as an "on"
# switch and avoid bias during averaging.
lon = array([rmin[i][0] for i in range(NS)])
# Loop through lambda values and calculate stuff.
st = zeros(NS, int)
i = 0
start = floor(min(lon)/step)*step # Find a round starting point.
while 1:
i += 1
lm = start+i*step
on = lon <= lm
# Update start indices.
for j in range(NS):
while st[j] < len(rmin[j]) and rmin[j][st[j]] <= lm:
st[j] += 1
# Turn off those without further data.
if st[j] == len(rmin[j]):
on[j] = False
if not any(on): # Do any simulations merit inclusion?
if any(lon > lm):
print "Unable to use highlighted datasets: " \
+ str(lon > lm)
break
#print >>sys.stderr, "%4.2f"%lm
S,mu,v = calc_mv([du[j] for j in range(NS) if on[j]],\
[st[j] for j in range(NS) if on[j]],\
[wt[j] for j in range(NS) if on[j]])
if S <= 1.0: # Break in the data or none at all, either way
if any(lon > lm):
print "Unable to use highlighted datasets: " \
+ str(lon > lm)
break # we can't continue the calc.
line = "%4.2f %.19f %.19f %.19f "%(lm, S, mu, v)
nb, b, be = calc_best_bound(m,du,on,st,a,arg)
if nb < 0: # None of the sim.s has more than one sample.
continue
line += "%.19f %.19f %5d"%(b, be, nb)
print line
return 0
# The bound is of the form <U>+a*<VM>/kT for each sim.
# and we want the max/min of that from any sim.
def calc_best_bound(m,du,on,st,a,arg):
oni = on.copy()
b = zeros(len(m))
be = zeros(len(m))
nb = -1
for i in range(len(m)):
if oni[i]:
N,mu,v = calc_mv([du[i]],[st[i]])
if N <= 1.0:
oni[i] = False
continue
b[i] = mu
be[i] = v/(N-1.0)
N,mu,v = calc_mv([m[i]],[st[i]])
b[i] += a*v/kT
be[i] += (2.0*a*v/kT)**2/(N-1.0)
nb = i
be = sqrt(be) # change var. to stdev
# Find best in "on" list.
best = b[nb]+a*be[nb]
for i in range(len(m)):
if oni[i] and arg([b[i]+a*be[i], best]) == 0:
nb = i
best = b[i]+a*be[i]
return nb, b[nb], be[nb]
# Effective number of samples is the tricky one.
# We set the wt.s such that the max from each sim. is 1
# then sum all the wt.s to get an effective N.
def calc_mv(x, st, wt=None):
if wt == None: # Settle the issue of weights.
wt = [ones(len(xi)) for xi in x]
# Splice all data into a super-data element.
y = []
w = []
#i = 0
for xi, si, wi in zip(x,st,wt):
# i += 1
if si < len(xi):
# print "Dataset %d, %d samples, maxwt = %f, Neff = %f"%(i,len(wi[si:]), max(wi[si:]), sum(wi[si:])/max(wi[si:]))
y += xi[si:].tolist()
w += (wi[si:]/max(wi[si:])).tolist()
y = array(y)
w = array(w)
N = sum(w)
m = sum(y*w)/N
v = sum((y-m)*(y-m)*w)/N
return N, m, v
def cov(x,y):
return (sum(x*y) - sum(x)*sum(y)/len(x)) / len(x)
def pfee(x):
m, v, me, ve = stats(exp(x/kT))
return kT*log(m), kT*me/m
def nfee(x):
m, v, me, ve = stats(exp(-x/kT))
return -kT*log(m), kT*me/m
if __name__=="__main__":
main(sys.argv)
