Scripts:calc u.py

From UC_Chemistry

Jump to: navigation, search

Notes:

  • Make sure kT is correct for your system.
  • Reading the rmin data multiplies by 10 to convert Gromacs nm to Angstrom units.
  • h is in Angstroms is arbitrary, but should be consistent with other plots.
  • gnuplot line for output data:
u 1:3:($4/sqrt($2-1))

Source

#!/usr/bin/python

import sys
from ucgrad import *

#kT = Boltzmann*298.15
#step = 0.05
kT = Boltzmann*298.15/4.184
step = 0.01

def main(argv):
        if len(argv) < 3:
                print "Usage: %s <M en> <rmin> <u>"%(argv[0])
                return 1

        NS = 0
        m = []
        rmin = []
        u = []
        mcol = -1
        rcol = -1
        ucol = -1
        while len(argv) > 1:
                if argv[1] == '-d':
                        if len(u) < 1:
                          raise runtimeError, "No complete input sets found!"
                        u[-1] -= read_matrix(argv[2])[:,ucol]
                        del argv[1:3]
                        continue
                elif argv[1] == '-m':
                        mcol = int(argv[2])
                        del argv[1:3]
                        continue
                elif argv[1] == '-r':
                        rcol = int(argv[2])
                        del argv[1:3]
                        continue
                elif argv[1] == '-u':
                        ucol = int(argv[2])
                        del argv[1:3]
                        continue
                # Append m, rmin (converted to Ang.) , u
                m.append(read_matrix(argv[1])[:,mcol])
                #rmin.append(read_matrix(argv[2])[:,rcol])
                rmin.append(read_matrix(argv[2])[:,rcol]*10.0)
                u.append(read_matrix(argv[3])[:,ucol])
                assert len(m[-1]) == len(rmin[-1]) and len(m[-1]) == len(u[-1])
                NS += 1
                del argv[1:4]
        print >>sys.stderr, "%d input sets read."%NS

        # Sort samples according to rmin.
        wt = []
        for i in range(NS):
            lt = zip(rmin[i].copy(),m[i].copy(),u[i].copy()) # list of tuples
            lt.sort()
            for j,l in enumerate(lt): # re-order original data.
                rmin[i][j] = l[0]
                m[i][j] = l[1]
                u[i][j] = l[2]
            wt.append(exp(m[i]/kT))

        # Create list of minimum observed rmin-s to act as an "on"
        # switch and avoid bias during averaging.
        lon = array([rmin[i][0] for i in range(NS)])

        # Loop through lambda values and calculate stuff.
        st = zeros(NS, int)
        i = 0
        n = 0
        start = int(min(lon)/step) # Find a round starting point.
        while 1:
                i += 1
                lm = (start+i)*step
                on = lon <= lm
                # Update start indices.
                for j in range(NS):
                        while st[j] < len(rmin[j]) and rmin[j][st[j]] <= lm:
                                st[j] += 1
                        # Turn off those without further data.
                        if st[j] == len(rmin[j]):
                                on[j] = False
                if not any(on): # Do any simulations merit inclusion?
                        if any(lon > lm):
                                print "Unable to use highlighted datasets: " \
                                        + str(lon > lm)
                        break

                #print >>sys.stderr, "%4.2f"%lm
                S,mu,v = calc_mv([u[j] for j in range(NS) if on[j]],\
                                [st[j] for j in range(NS) if on[j]],\
                                [wt[j] for j in range(NS) if on[j]])
                if S <= 1.0: # Break in the data or none at all, either way
                        if n == 0: # (false start)
                                continue
                        if any(lon > lm):
                                print "Unable to use highlighted datasets: " \
                                        + str(lon > lm)
                        break # we can't continue the calc.

                line = "%4.2f %.19f %.19f %.19f "%(lm, S, mu, sqrt(v))
                n += 1
                print line

        return 0

# Effective number of samples is the tricky one.
# We set the wt.s such that the max from each sim. is 1
# then sum all the wt.s to get an effective N.
def calc_mv(x, st, wt=None):
        if wt == None: # Settle the issue of weights.
                wt = [ones(len(xi)) for xi in x]
        # Splice all data into a super-data element.
        y = []
        w = []
        #i = 0
        for xi, si, wi in zip(x,st,wt):
        #    i += 1
            if si < len(xi):
        #       print "Dataset %d, %d samples, maxwt = %f, Neff = %f"%(i,len(wi[si:]), max(wi[si:]), sum(wi[si:])/max(wi[si:]))
                y += xi[si:].tolist()
                w += (wi[si:]/max(wi[si:])).tolist()
        y = array(y)
        w = array(w)
        N = sum(w)
        m = sum(y*w)/N
        v = sum((y-m)*(y-m)*w)/N
        return N, m, v

def cov(x,y):
        return (sum(x*y) - sum(x)*sum(y)/len(x)) / len(x)

def pfee(x):
        m, v, me, ve = stats(exp(x/kT))
        return kT*log(m), kT*me/m

def nfee(x):
        m, v, me, ve = stats(exp(-x/kT))
        return -kT*log(m), kT*me/m

if __name__=="__main__":
        main(sys.argv)
Personal tools