# You can ignore this, it's just for aesthetic purposes matplotlib.rcParams['figure.figsize'] = (8,5) rcParams['savefig.dpi'] = 100 # These import commands set up the environment so we have access to numpy and pylab functions import numpy as np import pylab as pl # Data Fitting # First, we'll generate some fake data to use x = np.linspace(0,10,50) # 50 x points from 0 to 10 # Remember, you can look at the help for linspace too: # help(np.linspace) # y = m x + b y = 2.5 * x + 1.2 # let's plot that pl.clf() pl.plot(x,y) # looks like a simple line. But we want to see the individual data points pl.plot(x,y,marker='s') # We need to add noise first noise = pl.randn(y.size) # Like IDL, python has a 'randn' function that is centered at 0 with a standard deviation of 1. # IDL's 'randomu' is 'pl.rand' instead # What's y.size? print y.size print len(y) # We can add arrays in python just like in IDL noisy_flux = y + noise # We'll plot it too, but this time without any lines # between the points, and we'll use black dots # ('k' is a shortcut for 'black', '.' means 'point') pl.clf() # clear the figure pl.plot(x,noisy_flux,'k.') # We need labels, of course pl.xlabel("Time") pl.ylabel("Flux") # We'll use polyfit to find the values of the coefficients. The third # parameter is the "order" p = np.polyfit(x,noisy_flux,1) # help(polyfit) if you want to find out more # print our fit parameters. They are not exact because there's noise in the data! # note that this is an array! print p print type(p) # you can ask python to tell you what type a variable is # Great! We've got our fit. Let's overplot the data and the fit now pl.clf() # clear the figure pl.plot(x,noisy_flux,'k.') # repeated from above pl.plot(x,p[0]*x+p[1],'r-') # A red solid line pl.xlabel("Time") # labels again pl.ylabel("Flux") # Cool, but there's another (better) way to do this. We'll use the polyval # function instead of writing out the m x + b equation ourselves pl.clf() # clear the figure pl.plot(x,noisy_flux,'k.') # repeated from above pl.plot(x,np.polyval(p,x),'r-') # A red solid line pl.xlabel("Time") # labels again pl.ylabel("Flux") # help(polyval) if you want to find out more noisy_flux = y+noise*10 p = polyfit(x,noisy_flux,1) print p # plot it pl.clf() # clear the figure pl.plot(x,noisy_flux,'k.') # repeated from above pl.plot(x,np.polyval(p,x),'r-',label="Best fit") # A red solid line pl.plot(x,2.5*x+1.2,'b--',label="Input") # a blue dashed line showing the REAL line pl.legend(loc='best') # make a legend in the best location pl.xlabel("Time") # labels again pl.ylabel("Flux") pl.clf() # clear the figure pl.errorbar(x,noisy_flux,yerr=10,marker='.',color='k',linestyle='none') # errorbar requires some extras to look nice pl.plot(x,np.polyval(p,x),'r-',label="Best fit") # A red solid line pl.plot(x,2.5*x+1.2,'b--',label="Input") # a blue dashed line showing the REAL line pl.legend(loc='best') # make a legend in the best location pl.xlabel("Time") # labels again pl.ylabel("Flux") # this time we want our "independent variable" to be in radians x = np.linspace(0,2*np.pi,50) y = np.sin(x) pl.clf() pl.plot(x,y) # We'll make it noisy again noise = pl.randn(y.size) noisy_flux = y + noise pl.plot(x,noisy_flux,'k.') # no clear this time # curve_fit is the function we need for this, but it's in another package called scipy from scipy.optimize import curve_fit # we need to know what it does: help(curve_fit) def sinfunc(x,a,b): return a*np.sin(x-b) fitpars, covmat = curve_fit(sinfunc,x,noisy_flux) # The diagonals of the covariance matrix are variances # variance = standard deviation squared, so we'll take the square roots to get the standard devations! # You can get the diagonals of a 2D array easily: variances = covmat.diagonal() std_devs = np.sqrt(variances) print fitpars,std_devs # Let's plot our best fit, see how well we did # These two lines are equivalent: pl.plot(x, sinfunc(x, fitpars[0], fitpars[1]), 'r-') pl.plot(x, sinfunc(x, *fitpars), 'r-') t = np.linspace(0.1,10) a = 1.5 b = 2.5 z = a*t**b pl.clf() pl.plot(t,z) # Change the variables # np.log is the natural log y = np.log(z) x = np.log(t) pl.clf() pl.plot(x,y) pl.ylabel("log(z)") pl.xlabel("log(t)") noisy_z = z + pl.randn(z.size)*10 pl.clf() pl.plot(t,z) pl.plot(t,noisy_z,'k.') noisy_y = np.log(noisy_z) pl.clf() pl.plot(x,y) pl.plot(x,noisy_y,'k.') pl.ylabel("log(z)") pl.xlabel("log(t)") print noisy_y # try to polyfit a line pars = np.polyfit(x,noisy_y,1) print pars print 1 == 1 print np.nan == np.nan OK = noisy_y == noisy_y print OK print "There are %i OK values" % (OK.sum()) masked_noisy_y = noisy_y[OK] masked_x = x[OK] print "masked_noisy_y has length",len(masked_noisy_y) # now polyfit again pars = np.polyfit(masked_x,masked_noisy_y,1) print pars # cool, it worked. But the fit looks a little weird! fitted_y = polyval(pars,x) pl.plot(x, fitted_y, 'r--') # Convert bag to linear-space to see what it "really" looks like fitted_z = np.exp(fitted_y) pl.clf() pl.plot(t,z) pl.plot(t,noisy_z,'k.') pl.plot(t,fitted_z,'r--') pl.xlabel('t') pl.ylabel('z') def powerlaw(x,a,b): return a*(x**b) pars,covar = curve_fit(powerlaw,t,noisy_z) pl.clf() pl.plot(t,z) pl.plot(t,noisy_z,'k.') pl.plot(t,powerlaw(t,*pars),'r--') pl.xlabel('t') pl.ylabel('z') # sin(x) is already defined def sin2x(x): """ sin^2 of x """ return np.sin(x)**2 def sin3x(x): """ sin^3 of x """ return np.sin(x)**3 def sincos(x): """ sin(x)*cos(x) """ return np.sin(x)*np.cos(x) list_of_functions = [np.sin, sin2x, sin3x, sincos] # we want 0-2pi for these functions t = np.linspace(0,2*np.pi) # this is the cool part: we can make a variable function for fun in list_of_functions: # the functions know their own names (in a "secret hidden variable" called __name__) print "The maximum of ",fun.__name__," is ", fun(t).max() # OK, but we wanted the location of the maximum.... for fun in list_of_functions: print "The location of the maximum of ",fun.__name__," is ", fun(t).argmax() # well, that's not QUITE what we want, but it's close # We want to know the value of t, not the index! for fun in list_of_functions: print "The location of the maximum of ",fun.__name__," is ", t[fun(t).argmax()] # Finally, what if we want to store all that in an array? # Well, here's a cool trick: you can sort of invert the for loop # This is called a "list comprehension": maxlocs = [ t[fun(t).argmax()] for fun in list_of_functions ] print maxlocs # Confused? OK. Try this one: print range(6) print [ii**2 for ii in range(6)] def fail(x): """ an infinite recursion. Will fail """ return fail(x) # WARNING: this will definitely fail, but it may take a long time! # print fail(1) %install_ext https://raw.github.com/tkf/ipython-hierarchymagic/master/hierarchymagic.py %load_ext hierarchymagic %%dot --format svg -- digraph G { f5 [label="f(5)"]; Lf1 [label="f(1)"]; LLf0 [label="f(0)"]; Lf2 [label="f(2)"]; Lf3 [label="f(3)"]; RRf1 [label="f(1)"]; RRMf1 [label="f(1)"]; RRf0 [label="f(0)"]; LLf1 [label="f(1)"]; RMf0 [label="f(0)"]; RMf1 [label="f(1)"]; Rf2 [label="f(2)"]; RMf2 [label="f(2)"]; Rf3 [label="f(3)"]; Rf4 [label="f(4)"]; f5->Lf3; f5->Rf4; Lf3->Lf2; Lf3->Lf1; Rf4->Rf3; Rf4->Rf2; Rf2->RRf1; Rf2->RRf0; Lf2->LLf1; Lf2->LLf0; Rf3->RMf2; Rf3->RMf1; RMf2->RMf0; RMf2->RRMf1; } # EXERCISE: Write a recursive Fibonacci function! def fibonacci(n): return # put your code here! # EXERCISE: Write a recursive factorial function # Recall: factorial(x) = factorial(x-1) * x # factorial(1) = 1 def factorial(x): return # This is the test for the fibonacci code. # Run this after you've written your fibonnacci sequence code # it will fail if your code is wrong! for ii,ff in zip(range(9),[0,1,1,2,3,5,8,13,21]): assert fibonacci(ii) == ff print "Success!" # This is the test for the factorial code. # Run this after you've written your factorial code # it will fail if your code is wrong! for ii,ff in zip(range(1,9),[1,2,6,24,120,720,5040,40320]): assert factorial(ii) == ff print "Success!"