Useful Python Classes
Upgraded Integration Script
# simple trapezium rule integration
# http://www.mathsrevision.net/alevel/pages.php?page=44
from scipy import integrate
import numpy as np
gaussian = lambda x: 1/np.sqrt(np.pi) * np.exp(-x**2)
x_n = 1.0
x_0 = 0.0
f_x = [gaussian(x) for x in np.arange(x_0, x_n, 0.0001)]
h = (x_n-x_0)/(len(f_x)-1.)
print 'h ['+str(h)+']'
print 'trapz ['+str(integrate.trapz(f_x , dx=h)) + ']'
print 'cumtrapz ['+str(integrate.cumtrapz(f_x , dx=h)[-1:]) + ']'
print 'simpson ['+str(integrate.simps(f_x , dx=h)) + ']'
print 'romberg ['+str(integrate.romberg(gaussian , x_0, x_n)) + ']'
Simple signed and unsigned Int conversion
Three simple javascript functions that can be used to parse strings contining suspected integers based text.
function testunsigned(str) {
return /^ *[0-9]+ *$/.test(str);
}
function testsigned(str) {
str = alltrim(str);
return /^[-+]?[0-9]+$/.test(str);
}
function testfloat(str) {
str = alltrim(str);
return /^[-+]?[0-9]+(\.[0-9]+)?$/.test(str);
}
Progress Indicator
The following code can be used to track progress on the command line.
import progress
p = progress.progress()
p.getStart('\n\tBuilding -')
for item in list:
... do something ...
p.moveOn() # move row twizzler
You will need to save this class in a file called progress on the Python path.
# Class to encapsulate a progress indicator
import sys, os
import unittest
class progressException(Exception): 'Error to raise for any recursive problem.'
class progress:
def __init__ (self):
self._twissler = ["|","/","-","\\","|"]
self._state = 0
def getStart(self):
sys.stdout.write('\t[ ') # include 2 spaces for the twissler
sys.stdout.flush()
def getStart(self, text):
sys.stdout.write('\t %s [ ' % (text)) # include 2 spaces for the twissler
sys.stdout.flush()
def moveOn(self):
try:
self._state = self._state + 1
if self._state >= 5:
self._state = 1
sys.stdout.write( chr(8)+chr(8)+self._twissler[self._state]+']' )
sys.stdout.flush()
except:
raise progressException('failed to progress')
class TestCase(unittest.TestCase):
def setUp(self):
pass
def tearDown(self):
pass
def testProgress(self):
p = progress()
p.getStart()
for a in range(0,10,1):
p.moveOn()
time.sleep(0.2)
if __name__ == '__main__':
widgetTestSuite = unittest.TestSuite()
widgetTestSuite.addTest(TestCase("testProgress"))
runner = unittest.TextTestRunner()
runner.run(widgetTestSuite)
Convolving Functions - Simple Hamming
Example below shows a simple routine to smooth out a non periodic function using a hamming function in scipy.
# simple script to calculate the fft for function
# Alexander Baker
import math, sys
# using standard python extension libs
# for maths and plotting
from pylab import *
from numpy import *
from numpy.fft import fft
from numpy.fft import fftfreq
from numpy.fft import fftshift
'''x = arange(0, 100, 0.01)
y = sin(x/3) + cos(x/10)
subplot(2,1,1)
ylabel('sin(x/3)+cos(x/10)')
grid(True)
plot(x,y)
subplot(2,1,2)
ylabel('fft(sin(x/3)+cos(x/10))')
grid(True)
plot(x[0:50],abs(fft(y)[0:50]))
show()'''
#savefig('fft.png')
x = r_[0:1:512j];
h01 = hamming(512)
theta = 2*pi*x;
F=fftfreq(1024)
y01 = 2*sin(10*theta) + 3*cos(20*theta) + sin(30*theta) + 2*cos(45*theta);
ys= y01*h01;
Ys= fft(ys, 1024);
Y01 = fft(y01, 1024)
Ysn2= fftshift(abs(Ys)/max(abs(Ys)))
Y01n2 = fftshift(abs(Y01)/max(abs(Y01)))
Ysn= (abs(Ys)/max(abs(Ys)))
Y01n = (abs(Y01)/max(abs(Y01)))
subplot(6,1,1);plot(theta,y01,linewidth=2);title('Original Function');
subplot(6,1,2);plot(F,Y01n,linewidth=2);title('FFT of Unmodified - unshifted');
subplot(6,1,3);plot(F,Y01n2,linewidth=2);title('FFT of Unmodified - shifted');
subplot(6,1,5);plot(F,Ysn, linewidth=2);title('FFT of Smoothed Signal - unshifted');
subplot(6,1,4);plot(F,Ysn2, linewidth=2);title('FFT of Smoothed Signal - shifted');
subplot(6,1,6);plot(x,h01,linewidth=2);title('Hamming - Convolution Function');
savefig('fft.png')
#show()
#http://ftp.osc.edu/supercomputing/training/python/python_pt2_0702.pdf
#http://www.alexfb.com/cgi-bin/twiki/view/PtPhysics/VortexLattice#Scalar_Diffraction_Theory
#http://www.lds-group.com/docs/site_documents/AN014%20Understanding%20FFT%20Windows.pdf
fft hamming non periodic function:
Gaussian Convolving Function
The example below shows am example of how to smooth a gaussian profile with noise imposed on the signal artificially.
#http://www.scipy.org/Cookbook/SignalSmooth
def gauss_kern(size, sizey=None):
""" Returns a normalized 2D gauss kernel array for convolutions """
size = int(size)
if not sizey:
sizey = size
else:
sizey = int(sizey)
#print size, sizey
x, y = mgrid[-size:size+1, -sizey:sizey+1]
g = exp(-(x**2/float(size)+y**2/float(sizey)))
return g / g.sum()
def blur_image(im, n, ny=None) :
""" blurs the image by convolving with a gaussian kernel of typical
size n. The optional keyword argument ny allows for a different
size in the y direction.
"""
g = gauss_kern(n, sizey=ny)
improc = signal.convolve(im,g, mode='valid')
return(improc)
from pylab import figure, show, clf, savefig, cm
from scipy import *
xmin, xmax, ymin, ymax = -70, 70, -70, 70
extent = xmin, xmax, ymin, ymax
X, Y = mgrid[-70:70, -70:70]
Z = cos((X**2+Y**2)/200.)+ random.normal(size=X.shape)
#Z = cos((X**2+Y**2)/200.)
fig1 = figure(1)
fig1.clf()
ax1a = fig1.add_subplot(131)
ax1a.imshow(abs(Z), cmap=cm.jet, alpha=.9, interpolation='bilinear', extent=extent)
ax1a.set_title(r'Noisey')
P = gauss_kern(3)
ax1b = fig1.add_subplot(132)
ax1b.imshow(abs(P), cmap=cm.jet, alpha=.9, interpolation='bilinear', extent=extent)
ax1b.set_title(r'Convolving Gaussian')
U = blur_image(Z, 3)
ax1c = fig1.add_subplot(133)
ax1c.imshow(abs(U), cmap=cm.jet, alpha=.9, interpolation='bilinear', extent=extent)
ax1c.set_title(r'Cleaned')
savefig('convolve-gaussian.png')
http://www.scipy.org/Cookbook/SignalSmooth
gaussian convolving:
Loading Data Files
from pylab import figure, show, datestr2num, load
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter, WeekdayLocator
dates, closes = load(
'data.csv', delimiter=',',
converters={0:datestr2num}, skiprows=1, usecols=(0,2),
unpack=True)
years = YearLocator() # every year
months = MonthLocator() # every month
weeks = WeekdayLocator() # every month
yearsFmt = DateFormatter('%M %Y')
monthsFmt = DateFormatter('%M')
daysFmt = DateFormatter('%D')
fig = figure()
ax = fig.add_subplot(111)
ax.plot_date(dates, closes)
# format the ticks
ax.xaxis.set_major_locator(weeks)
ax.xaxis.set_major_formatter(daysFmt)
#ax.xaxis.set_minor_locator(months)
#ax.xaxis.set_minor_formatter(monthsFmt)
ax.autoscale_view()
# format the coords message box
def price(x): return '$%1.2f'%x
ax.format_xdata = DateFormatter('%Y-%m-%d')
ax.format_ydata = price
show()
Data to Load
Date,Open,High,Low,Close,Volume,Adj. Close*
19-Sep-03,29.76,29.97,29.52,29.96,92433800,29.79
18-Sep-03,28.49,29.51,28.42,29.50,67268096,29.34
17-Sep-03,28.76,28.95,28.47,28.50,47221600,28.34
16-Sep-03,28.41,28.95,28.32,28.90,52060600,28.74
15-Sep-03,28.37,28.61,28.33,28.36,41432300,28.20
Date Plots
import matplotlib, datetime
matplotlib.use('Agg')
from matplotlib import rc
from matplotlib.figure import Figure
from matplotlib.dates import drange, DateFormatter, DayLocator, MonthLocator, YearLocator
from matplotlib.backends.backend_agg import FigureCanvasAgg
from numpy import rand
# Matplotlib likes things in inches instead of pixels, so
# a little math is necessary later
WIDTH = 1200
HEIGHT = 700
DPI = 72
now = datetime.date(2007, 9, 2)
difference10 = datetime.timedelta(weeks=520)
# Generate a range of dates, one day at a time
#dates = drange(datetime.date(2007, 7, 1),
# datetime.date(2007, 9, 2),
# datetime.timedelta(days=1))
dates = drange(now,
now+difference10,
datetime.timedelta(days=1))
fig = Figure()
#rc.ylim(ymax=3)
# Convert from pixels to inches
fig.set_figwidth(round(WIDTH/DPI))
fig.set_figheight(round(HEIGHT/DPI))
# The numbers here are margins from the edge of the graph
# You may need to adjust this depending on how you want your
# graph to look, and how large your labels are
ax = fig.add_axes([0.1, 0.1, 0.85, 0.8])
# Style b-o = solid blue line, round dots at each data point
# Instead of calling rand(), you'll probably want to pass in a list of
# your data
aa = rand(len(dates))
# simulate a spike
aa[135] = 3.1
aa[234] = 2.3
ax.plot_date(dates, aa, 'b-+')
ax.set_title('Total Changed Files')
ax.set_ylabel('Gigabytes')
ax.grid(True)
# X axis tick once each day
#ax.xaxis.set_major_locator(DayLocator())
#ax.xaxis.set_major_locator(MonthLocator())
#ax.xaxis.set_minor_locator(DayLocator())
ax.xaxis.set_major_locator(YearLocator())
# Use any strftime format strings here
#ax.xaxis.set_major_formatter(DateFormatter('%m/%d'))
#ax.xaxis.set_major_formatter(DateFormatter('%Y/%m'))
#ax.xaxis.set_minor_formatter(DateFormatter('%d'))
#ax.xaxis.set_major_formatter(DateFormatter('%Y'))
ax.xaxis.set_major_formatter(DateFormatter("%b '%y"))
#ax.yrange(10)
canvas = FigureCanvasAgg(fig)
canvas.print_figure('output.png', dpi=DPI)
- example date plot:
Regression Fit
from pylab import *
x = randn(10000) # some gaussian noise
subplot(211) # a subplot
hist(x, 100) # make a histogram
grid(True) # make an axes grid
ylabel('histogram')
# now do the regression...
x = arange(0.0, 2.0, 0.05)
y = 2+ 3*x + 0.2*randn(len(x)) # y is a linear function of x + nse
# the bestfit line from polyfit
m,b = polyfit(x,y,1) # a line is 1st order polynomial...
print m, b
# plot the data with blue circles and the best fit with a thick
# solid black line
subplot(212)
plot(x, y, 'bo', x, m*x+b, '-r', linewidth=2)
plot(x, 7*x+b+3, '-+', linewidth=2)
ylabel('regression')
grid(True)
# save the image to hardcopy
savefig('demo')
show()
Quick Numerical Integration
This is a simple script using the scipy intgeration library to quickly find the area under a set of data points (these should form a curve).
# simple trapezium rule integration
# http://www.mathsrevision.net/alevel/pages.php?page=44
from scipy import integrate
f_x = [1, 1.22, 1.41, 1.58, 1.73]
x_n = 1.0
x_0 = 0.0
h = (x_n-x_0)/(len(f_x)-1)
print 'h ['+str(h)+']'
print 'trapz ['+str(integrate.trapz(f_x, dx=h)) + ']'
print 'cumtrapz ['+str(integrate.cumtrapz(f_x, dx=h)) + ']'
print 'simpson ['+str(integrate.simps(f_x, dx=h)) + ']'
#print integrate.trapz([1,1.22,1.41,1.58,1.73], [0,0.25,0.5, 0.75, 1])
#print integrate.trapz([1,1.22,1.41,1.58,1.73], dx=0.25)
#print integrate.cumtrapz([1,1.22,1.41,1.58,1.73], dx=0.25)
#print integrate.simps([1,1.22,1.41,1.58,1.73], [0,0.25,0.5, 0.75, 1])
Iterator Based Query
This is an excellent query to call if you need to minimise the amount of memory that you are consuming when processing large datasets, this query will yield the rows back to the calling function in pages between.
def paged(connexion, sql, psize=128):
'''lookup the
scenario list configured for a processid
Parameters
----------
all parameters are keyword parameters
connexion : Object
object that represents the connection to the database, active
sql : string
sql to execute to pull data out of the database
psize : int
represents the page size that we want to flush each time to disk
Returns
-------
dictionary : dictonary
yields a
'''
recset = win32com.client.Dispatch(r'ADODB.Recordset')
recset.Open(sql, connexion, 0, 1)
try:
fields = [field.Name for field in recset.Fields]
ok = True
while ok:
# transpose: rows become columns and columns become rows
rows = zip(*recset.GetRows(psize))
if recset.EOF:
recset.Close()
recset = None
ok = False
for row in rows:
yield dict(zip(fields, row))
except:
if recset is not None:
recset.Close()
del recset
raise