#!/usr/bin/env python # coding: utf-8 # # Manipulating data in Python # # In Python, you can manipulate your data in various ways. There are many, different functions that you can use in order to manipulate your data. # # At the end of this module, we will be generating the __Planck__ spectrum, which describes the intensity of a [blackbody](https://en.wikipedia.org/wiki/Black_body). # # Before we generate it, we need to know how to read a file, and plot its content # ## Reading in and manipulating data # # There are a lot of modules to read in data from a file: # # - Numpy # - `genfromtxt` # - `loadfromtxt` # - Pandas # - `read_csv` # - `read_table` # - ... # # We will use "`genfromtxt`" for this exercise # In[74]: ## First, you need to import your modules get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib import numpy as np import matplotlib.pyplot as plt # We can also define the path to the directory that contains all of our files # In[75]: data_path = '../data/' # If you have questions about the function, you can add a "?" at the end of your function. # In[76]: help(np.genfromtxt) # Now we can go ahead and read in the data, and save it to two arrays, i.e. __`x1`__ and __`y1`__: # In[77]: x1, y1 = np.genfromtxt('../data/dataset1.txt', unpack=True, dtype=np.float) # We now use __`unpack`__ to tell Python to throw out the two columns and we caught them with arrays __`x`__ and __`y`__, but we could have just captured whatever came out, then it just would be a merged array: # In[78]: data = np.genfromtxt('../data/dataset1.txt', dtype=np.float) # You can now examine the output from `genfromtxt`: # In[79]: print(data.shape) # In[80]: print(data[:,0]) # In[81]: x1 # You can check that the 1st column of __`data`__ is the same as __`x1`__ with `np.array_equal`: # In[82]: np.array_equal(x1, data[:,0]) # ### Reading in from remote data # # Another nice thing about `genfromtxt` is that it can read dta from a __URL__: # In[83]: ## Setting up path to remote file remote_file = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv' ## Extracting data from file, and saving it as to variable `A` A = np.genfromtxt(remote_file, unpack=True, delimiter=',') # Now `A` has the shape of 9 columns by 768 rows # In[84]: A.shape # As you can see, the shape of `A` is different than the one in the URL. # To fix it, you can `transpose` the array: # In[85]: A.T # In[86]: print(A.T.shape) # ## Fitting a straight line # # Now that we've read the data, we can use to __fit__ a straight line to it. # # The steps to follow are: # - Create a new function called `myline` # - Find the best-fit parameters for the data # - Plot the data against the fitted line # ### Define function `myline` # In[87]: def myline(x, m, b): """ Functional form of a straight line Parameters ----------- x : `float`, `int`, `np.ndarray`, list Variable that tells you how far along m : `float`, `int` Slope or gradient b : `float`, `int` Y-intercept Returns --------- y : `float`, `int`, `np.ndarray`, list Value for how far up on the y-axis """ y = (m * x) + b return y # ### Finding best-fit parameters # # We can now fit a line to the data, and find the parameters (`m`, and `b`) # that best describe our data: # In[88]: ## Import `curve_fit` function from scipy from scipy.optimize import curve_fit # In[89]: ## Calculating best-fit parameters bestfit, covar_matrix = curve_fit(myline, x1, y1, p0 = [1.0, 1.0]) print(bestfit) # In[90]: print("Best-fit parameters: m = {0} and b = {1}".format(*bestfit)) # In this example: # - `myline`: Is the function used to fit the data # - `x1`, `y1`: x- and y-values # - `p0`: Initial guesses for the two parameters. This variable is _optional_. # # You can read more about `curve_fit` by typing: # In[91]: help(curve_fit) # We can now overplot the best-fit line to the data: # In[92]: # Initializing figure (optional) fig = plt.figure(figsize=(10,10), facecolor='white') ax = fig.add_subplot(111, facecolor='white') # Plotting values plt.plot(x1, myline(x1, *bestfit), 'r--', linewidth=2, label='Best fit') plt.plot(x1, y1, 'bo', label='Data') # Setting up limits plt.xlim((-1, 21)) # Limit the x-range of our plot # Axis labels plt.xlabel('This is the x-label', fontsize=20) plt.ylabel('This is the y-label', fontsize=20) # Maybe add a title plt.title('You can also add a title with color', fontsize=20, color='blue') # And finally, a legend: plt.legend(loc='best') # The final script looks like this: # In[ ]: get_ipython().run_line_magic('load', '../scripts/fitting_line.py') # ## More complicated plots - Histogram # # Now let's plot a distribution of values # In[94]: ## Importing modules import numpy as np import scipy import matplotlib.pyplot as plt # Now we need to define the mean and standard deviation of a normal distribution, and create an array of __random__ values: # In[95]: # Mean and standard deviation mu, sigma = 100, 15 # Array of random values x = mu + (sigma * np.random.randn(10000)) # Printing out values of `x` print(x) # We can also define a function for the PDF of the distribution: # In[96]: # Function for the PDF of distribution def normpdf(x, mu, sigma): """ PDF of a distribution with a mean and standard deviation Parameters ----------- x : `np.ndarray`, list List/Array of values of the distribution mu : `float`, `int` Mean of the distribution sigma : `float`, `int` Standard deviation of the distribution Returns -------- y : `np.ndarray` or list List/array of the normalized PDF values """ u = (x-mu)/np.abs(sigma) y = (1/(np.sqrt(2*np.pi)*np.abs(sigma)))*np.exp(-u*u/2) return y # Now we construct a histogram with `plt.hist`: # In[97]: # Initialize figure fig = plt.figure(figsize=(8,8)) ax = fig.add_subplot(111, facecolor='white') # Creating histogram n, bins, patches = plt.hist(x, bins=50, density=True, histtype='stepfilled', facecolor='green', alpha=0.75, label='Normal distr.') # Normalized PDF y_pdf = normpdf(x, mu, sigma) plt.plot(x, y_pdf, 'ro', linestyle='', label='PDF') # Adding labels and title plt.title(r'Histogram of IQ: $\mu = %s, \sigma = %s$' %(mu, sigma), fontsize=20) # Setting up axis plt.axis([40, 160, 0, 0.03]) # Adding a grid plt.grid(True) # Adding legend plt.legend(loc='best', prop={'size': 15}) # The final script for this looks like: # In[ ]: get_ipython().run_line_magic('load', '../scripts/histogram_pdf.py') # ## Planck Spectrum # # The next step is to write a script that generates the Planck spectrum (wavelength and intensity at that wavelength for many wavelenghts) # # > Create an equation in Python syntax such that for temperature T=300 K, and wavelenth ($\lambda = 1cm$) it finds the intensity of a Planck Spectrum # # Planck Spectrum: # # $$ # I = \frac{2hc^2}{\lambda^5}\frac{1}{e^{hc/(\lambda\ k T)} - 1} # $$ # In[99]: # Write your answer here # ### Method 1 # # This method uses does the following: # # - Constructs a dictionary with known values for the constants, i.e. $h, c, k$ # - Creates a function to calculate the Planck spectrum, $I$ # - Plots the Planck spectrum at every wavelength for a given Temperature $T$ # In[100]: ## First import your modules import os import sys import numpy as np import matplotlib.pyplot as plt # #### Creating a dictionary with constants # In[101]: ## Dictionary of constants def const_dict_func(): """ Dictionary of constants Returns --------- const_dict : `dict` Dictionary of constants """ const_dict = {} const_dict['c'] = 3.0e8 # Speed of light const_dict['h'] = 6.62e-34 # Planck's constant const_dict['k'] = 1.38e-23 # Boltzmann's constant return const_dict # This will let you call the dictionary and get the _values_ for each of the constants. # # Keep in mind that the we had to know the __units__ beforehand. There are other ways to do this, but # we'll discuss them at the end. # #### Function for calculating the Power spectrum for a given wavelength $\lambda$ and at fixed Temperature $T$ # In[102]: def planck_spectrum(T, wavelen): """ Computes the Planck spectrum for a given Temperature and wavelength Parameters ----------- T : `float`, `int` Temperature used. In units of `Kelvin` wavelen : `float`, `int` Wavelengths to evaluate Returns ----------- I : `np.ndarray`, `float`, `int` Intensity of the Power spectrum at given temperature and wavelength """ ## Constants # Calling function `const_dict_func` and saving output as a dictionary const_dict = const_dict_func() # Saving values of constants as new variables c = const_dict['c'] h = const_dict['h'] k = const_dict['k'] ## Computing the Planck spectrum or 'radiance' # Radiance I = (2 * h * c ** 5) / (wavelen**5) I *= 1./(-1. + np.exp((h * c)/(wavelen * k * T))) return I # #### Function for _plotting_ the data # # This function will plot the spectrum for the Planck spectrum as function of wavelength $\alpha$ at fixed Temperature $T$ # In[145]: ## Plotting data def plot_planck(data, T): """ Plots the Planck spectrum Parameters ------------ data : `np.ndarray` Data containing wavelength and planck spectrum information. Shape is (N, 2) T : `float`, `int` Temperature, at which the Planck spectrum is analyzed. """ # Clearing previous figures plt.clf() plt.close() # Initializing figure fig = plt.figure(figsize=(8,8)) ax = fig.add_subplot(111, facecolor='white') # Plotting spectrum plt.plot(data[:, 0], data[:, 1], marker='o', color='b', linestyle='--', label='T={0} K'.format(T)) # # Axis labels ax.set_xlabel(r'$\lambda$ [m]', fontsize=20) ax.set_ylabel('Intensity', fontsize=20) # Legend ax.legend(loc='best', prop={'size':20}) # # Showing figure plt.show() # #### Writing the _main_ function: # In[146]: def main(): """ Main function """ # # Temperature as an input parameter T = 300 ## In units Kelvin # # Wavelenght wavelen_min = 1.e-7 # Minimum wavelength in meters waveln_max = 6.e-5 #Maximum wavelength in meters samples = 100 # Number of steps to output # ### List of wavelengths - Computing the wavelengths # Creating array of number of samples wavelen = np.arange(samples) # Populating the array of wavelengths wavelen = wavelen_min + (waveln_max - wavelen_min) * wavelen / float(samples) # # Computing the radiance radiance = planck_spectrum(T, wavelen) # # Stacking the data data = np.column_stack((wavelen, radiance)) # Sorting data from smallest to largest wavelength data_sort_idx = np.argsort(data[:,0]) data_sort = data[data_sort_idx] # ## Saving data to file # Definiing output path and checking if it exists output_path = '../datafiles' if not (os.path.exists(output_path)): os.makedirs(output_path) # Saving data to file np.savetxt('{0}/spectrum.dat'.format(output_path), data) # # Plotting data plot_planck(data, T) # In[147]: ## Calling main function main() # #### Script # # With all of this defined, you can put it in a single script. # # This code is found under `../scripts/planck_spectrum.py` # In[ ]: get_ipython().run_line_magic('load', '../scripts/planck_spectrum.py') # ### Method 2 - Plotting multiple Planck spectrums # # Now we can try to the Planck spectrum for multiple temperatures $T$. # # We will have to modify the some of the functions to include $T$ as part of an argument. # We'll modify both the `main` and `plot_planck` functions. # #### Modifying the _plotting_ function # # We'll modify this function to: # # - include $T$ as an input parameter, # - Loop over the different temperature values in `T_arr` # - Choose the color from a colormap # In[136]: ## Importing new modules from matplotlib.pyplot import cm ## Plotting data def plot_planck_2(data, T_arr): """ Plots the Planck spectrum Parameters ------------ data : `np.ndarray` Data containing wavelength and planck spectrum information. Shape is ((N, 2), M) T_arr : `np.ndarray` Array of the different temperatures. Shape, (M, ), i.e. it has `M` number of elements. """ # Defining the colormap color_arr=cm.rainbow(np.linspace(0, 1, len(T_arr))) # Clearing previous figures plt.clf() plt.close() # Initializing figure fig = plt.figure(figsize=(10,10)) ax = fig.add_subplot(111, facecolor='white') # Plotting spectrum for kk, T_kk in enumerate(T_arr): plt.plot(data[kk][:, 0], data[kk][:, 1], marker='o', color=color_arr[kk], linestyle='--', label='{0} K'.format(T_kk)) # # Axis labels ax.set_xlabel(r'$\lambda$ [m]', fontsize=20) ax.set_ylabel('Intensity', fontsize=20) # # Legend ax.legend(loc='best', prop={'size':14}) # # Showing figure plt.show() # #### Modifying the main function to include $T$ as an input parameter # In[137]: def main_2(): """ Main function """ # # Temperature as an input parameter T_arr = np.arange(300, 1000, 100) ## In units Kelvin # # Wavelenght wavelen_min = 1.e-7 # Minimum wavelength in meters waveln_max = 6.e-5 #Maximum wavelength in meters samples = 100 # Number of steps to output # ### List of wavelengths - Computing the wavelengths # Creating array of number of samples wavelen = np.arange(samples) # Populating the array of wavelengths wavelen = wavelen_min + (waveln_max - wavelen_min) * wavelen / float(samples) # ## Computing the radiance # Defining array for `data` data = [[] for x in range(len(T_arr))] for kk in range(len(T_arr)): radiance_kk = planck_spectrum(T_arr[kk], wavelen) # Stacking the data data_kk = np.column_stack((wavelen, radiance_kk)) # Sorting data from smallest to largest wavelength data_kk_sort_idx = np.argsort(data_kk[:,0]) data_kk_sort = data_kk[data_kk_sort_idx] # Saving to array data[kk] = data_kk_sort # # Plotting data plot_planck_2(data, T_arr) # In[138]: main_2() # Now you see how the Planck spectrum changes as a function of wavelength $\lambda$! #