# Loading modules
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
x = np.array([1,2,3,5,6,7,8,10],dtype=float)
x
array([ 1., 2., 3., 5., 6., 7., 8., 10.])
y = np.arange(10)
y
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
z = np.linspace(0,100,50)
z
array([ 0. , 2.04081633, 4.08163265, 6.12244898, 8.16326531, 10.20408163, 12.24489796, 14.28571429, 16.32653061, 18.36734694, 20.40816327, 22.44897959, 24.48979592, 26.53061224, 28.57142857, 30.6122449 , 32.65306122, 34.69387755, 36.73469388, 38.7755102 , 40.81632653, 42.85714286, 44.89795918, 46.93877551, 48.97959184, 51.02040816, 53.06122449, 55.10204082, 57.14285714, 59.18367347, 61.2244898 , 63.26530612, 65.30612245, 67.34693878, 69.3877551 , 71.42857143, 73.46938776, 75.51020408, 77.55102041, 79.59183673, 81.63265306, 83.67346939, 85.71428571, 87.75510204, 89.79591837, 91.83673469, 93.87755102, 95.91836735, 97.95918367, 100. ])
h = np.random.randn(100)
h
array([ 1.2125995 , 1.83486385, 0.63821859, -0.20766847, 0.16394987, 0.29579982, -1.16177048, -0.83906042, 1.05600292, -0.96406777, -1.83799388, -2.2114805 , -0.46763226, -0.28014607, -0.82671363, -0.29653933, -0.99461879, -1.05692908, -0.63431348, 0.58229083, -0.06923924, 2.56103329, 0.14558599, 0.06025412, 1.29401031, -0.9230366 , 1.01506698, 0.3772861 , -1.20513242, 1.49001138, -0.8995781 , 1.3497741 , 1.408165 , 1.23759972, 0.61076751, -0.4094068 , 0.67698797, -0.77231489, -1.56098342, -0.99862791, 1.21275181, 0.00340132, 0.77635256, 0.03975821, 1.06701466, 0.05538845, 1.00814729, -0.79890085, 0.01000337, 0.36924362, -0.79997093, -0.83036405, -0.741903 , 1.48206007, 0.3152332 , -0.17685307, -0.17511851, 0.74958486, -1.16929455, 1.03634486, 1.71691808, 0.84479828, -0.65825241, -1.49280007, -0.90322557, 0.33369591, -0.65891918, 1.81929896, 0.22827773, 2.24103316, -0.50474291, 0.67246374, 0.57899788, 0.14338281, -0.80948219, -0.09728413, 0.8249388 , 1.44920434, -1.61984601, -0.23985288, 0.12277596, -0.73190566, 0.63827243, -0.13229476, 0.3812185 , -1.58845796, -1.52718767, -0.10934944, -1.35275154, 0.79154503, 1.76049644, -0.26587198, -0.0231885 , -0.8432236 , 0.68359977, 2.46355285, 1.85368996, -1.49378643, -0.49819556, 0.0810053 ])
These are just a few of the different ways to create numpy arrays.
You can also use functions like np.max() and np.min() to get the maximum and minimum values, respectively.
print('Min X: {0:.3f} \t Max X: {1:.3f}'.format(np.min(x), np.max(x)) )
Min X: 1.000 Max X: 10.000
zz = x**2 + 3*x**3
zz
array([ 4., 28., 90., 400., 684., 1078., 1600., 3100.])
Find the indices of the elements in an array that meet some criteria.
In this example, we're finding all the elements that are within 100 and 500 in array ___"zz
"___.
zz_idx = np.where((zz>= 100)&(zz <= 500))[0]
print('zz_idx: {0}'.format(zz_idx))
zz[zz_idx]
zz_idx: [3]
array([400.])
There are a lot of things we can do to a numpy array.
h1 = np.random.randint(10, 50, 50)
h1
array([25, 26, 24, 15, 35, 24, 21, 47, 31, 22, 17, 35, 47, 30, 14, 29, 17, 17, 22, 27, 46, 26, 49, 40, 27, 30, 20, 26, 36, 38, 49, 15, 16, 29, 24, 21, 26, 47, 41, 14, 20, 37, 40, 18, 31, 29, 34, 31, 41, 42])
We can get the overall size and shape of the array.
We cane use the functions numpy.size
and numpy.shape
to get the total number of elements in an array and the shape of the array, respectively.
np.size(h1)
50
h1.shape
(50,)
A = np.array([[1,2,3,4,5],
[6,7,8,9,10],
[12,13,14,16,17],
[13,45,67,89,90] ])
A
array([[ 1, 2, 3, 4, 5], [ 6, 7, 8, 9, 10], [12, 13, 14, 16, 17], [13, 45, 67, 89, 90]])
np.shape(A)
(4, 5)
You can also transpose array A
.
A_t = np.transpose(A)
A_t
array([[ 1, 6, 12, 13], [ 2, 7, 13, 45], [ 3, 8, 14, 67], [ 4, 9, 16, 89], [ 5, 10, 17, 90]])
Why are Numpy
arrays better than lists:
Numpy
arrays are memory efficient.For large arrays it is inpractical to initialize the data manually, using normal Python lists. Instead, we can use many of the Numpy functions to generate arrays of different forms.
We use this one to create a sequence of ordered elements
np.arange(0,10,1)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
np.arange(0,20,5)
array([ 0, 5, 10, 15])
np.arange(-40,21,10)
array([-40, -30, -20, -10, 0, 10, 20])
We use these functions to created ordered lists, separated by intervals in real- and log-space.
B = np.linspace(0,50)
B
array([ 0. , 1.02040816, 2.04081633, 3.06122449, 4.08163265, 5.10204082, 6.12244898, 7.14285714, 8.16326531, 9.18367347, 10.20408163, 11.2244898 , 12.24489796, 13.26530612, 14.28571429, 15.30612245, 16.32653061, 17.34693878, 18.36734694, 19.3877551 , 20.40816327, 21.42857143, 22.44897959, 23.46938776, 24.48979592, 25.51020408, 26.53061224, 27.55102041, 28.57142857, 29.59183673, 30.6122449 , 31.63265306, 32.65306122, 33.67346939, 34.69387755, 35.71428571, 36.73469388, 37.75510204, 38.7755102 , 39.79591837, 40.81632653, 41.83673469, 42.85714286, 43.87755102, 44.89795918, 45.91836735, 46.93877551, 47.95918367, 48.97959184, 50. ])
B = np.linspace(0,100, 20)
B
array([ 0. , 5.26315789, 10.52631579, 15.78947368, 21.05263158, 26.31578947, 31.57894737, 36.84210526, 42.10526316, 47.36842105, 52.63157895, 57.89473684, 63.15789474, 68.42105263, 73.68421053, 78.94736842, 84.21052632, 89.47368421, 94.73684211, 100. ])
Array of 25 elements from $10^{0}$ to $10^{3}$, with base of 10.
B = np.logspace(0,3,25)
B
array([ 1. , 1.33352143, 1.77827941, 2.37137371, 3.16227766, 4.21696503, 5.62341325, 7.49894209, 10. , 13.33521432, 17.7827941 , 23.71373706, 31.6227766 , 42.16965034, 56.23413252, 74.98942093, 100. , 133.35214322, 177.827941 , 237.13737057, 316.22776602, 421.69650343, 562.34132519, 749.89420933, 1000. ])
Creating an array of 11 elements from $e^{0}$ to $e^{10}$, with the base == numpy.e
B = np.logspace(0,10,11, base=np.e)
B
array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01, 5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03, 2.98095799e+03, 8.10308393e+03, 2.20264658e+04])
from numpy import random
# Uniform random numbers in [0,1]
random.rand(5,5)
array([[0.60426703, 0.09522098, 0.73603191, 0.41629722, 0.03830843], [0.16121388, 0.42582919, 0.97209131, 0.9586233 , 0.61308885], [0.63238237, 0.15224875, 0.0961162 , 0.91785892, 0.1275659 ], [0.9448008 , 0.01132601, 0.656943 , 0.03904403, 0.28083426], [0.80007388, 0.05710783, 0.23060248, 0.75706392, 0.14909109]])
# 20 Random integers from 10 to 30
random.randint(10,30,20)
array([24, 15, 18, 26, 19, 19, 17, 27, 22, 23, 17, 10, 28, 19, 22, 22, 18, 11, 13, 15])
zeros
and ones
.¶np.zeros(20)
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
You can use these to populate other arrays
nelem = 10
C = np.ones(10)
C
array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
for ii in range(C.size):
C[ii] = random.rand()
C
array([0.93317549, 0.14262875, 0.6902437 , 0.87186553, 0.53890869, 0.71245388, 0.15132051, 0.20685907, 0.39431495, 0.89131517])
You can also construct an array with another array as the diagonal
np.diag(random.randint(10,20,5))
array([[16, 0, 0, 0, 0], [ 0, 13, 0, 0, 0], [ 0, 0, 11, 0, 0], [ 0, 0, 0, 15, 0], [ 0, 0, 0, 0, 14]])
You can choose which values to select.
Normally, you select the rows
first, and then the cols
of a numpy.ndarray
.
M = random.rand(10,5)
M
array([[0.68677283, 0.38902045, 0.42871994, 0.81523068, 0.64483989], [0.51884508, 0.49264815, 0.99477923, 0.18455273, 0.51262224], [0.45850799, 0.66295697, 0.81406189, 0.66847916, 0.62882748], [0.652714 , 0.24415316, 0.47897214, 0.06621372, 0.27317223], [0.04435413, 0.49195189, 0.10773559, 0.85004019, 0.29236086], [0.96106373, 0.35939549, 0.75456427, 0.00273902, 0.14031911], [0.69224339, 0.46379945, 0.59234115, 0.19991018, 0.49572112], [0.30785617, 0.97237966, 0.47133784, 0.79830205, 0.54963373], [0.30952122, 0.50024043, 0.41479166, 0.51712861, 0.57509521], [0.99702942, 0.41362465, 0.44884597, 0.8832873 , 0.95066126]])
Selecting the 1st row
M[1,:]
array([0.51884508, 0.49264815, 0.99477923, 0.18455273, 0.51262224])
The 2nd column
M[:,1]
array([0.38902045, 0.49264815, 0.66295697, 0.24415316, 0.49195189, 0.35939549, 0.46379945, 0.97237966, 0.50024043, 0.41362465])
Select a range of columns and rows
M[1:3, 2:4]
array([[0.99477923, 0.18455273], [0.81406189, 0.66847916]])
You can easily use this to create a mask, for when you are cleaning your data.
A = random.rand(3,3)
np.fill_diagonal(A, np.nan)
A
array([[ nan, 0.22709903, 0.34331486], [0.91062782, nan, 0.89461017], [0.54718005, 0.09103976, nan]])
B = np.arange(0,9).reshape((3,3))
B
array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
Appying the mask from $A \to B$
A_mask = np.isfinite(A)
A_mask
array([[False, True, True], [ True, False, True], [ True, True, False]])
B[A_mask]
array([1, 2, 3, 5, 6, 7])
This is probably one of the best functions of Numpy
.
You can use this to bin you data, and calculate means, standard deviations, etc.
# Creating my bin edges
bins = np.arange(0,13)
bins
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
# Generating Data
data = 10*random.rand(100)
data
array([4.99232218, 5.53702794, 5.929478 , 0.83550921, 9.96708561, 2.96031408, 2.00377163, 0.31854306, 0.48134572, 8.66035854, 9.95827216, 6.93070449, 4.47507617, 1.07392493, 8.15819131, 5.44619864, 2.53174022, 0.16471857, 8.35843109, 3.12430454, 4.99680908, 2.18259352, 9.21925301, 1.61365526, 7.83888345, 7.83650219, 8.6382215 , 8.30377063, 3.94558256, 5.31388424, 7.40166355, 2.91812888, 8.57977213, 0.4082193 , 1.4190216 , 0.8316332 , 1.35202807, 8.28293158, 3.60098678, 6.28447875, 7.12344948, 1.13588079, 5.05358434, 0.10274816, 1.71728426, 3.30533502, 3.41941008, 9.99568425, 5.70585609, 3.95266365, 4.94809192, 9.25017982, 1.76354834, 0.16091627, 7.69585176, 5.37392259, 5.66522166, 6.62362653, 7.96742888, 2.46147816, 7.87554625, 9.54633578, 4.74731321, 5.30576571, 1.20723003, 0.03960206, 1.01701994, 1.35132563, 4.98330919, 3.84840303, 6.82422195, 0.81665793, 4.60547073, 5.82589065, 9.54330536, 6.28828892, 6.57514647, 2.24173202, 4.48154443, 9.28000871, 9.97469965, 5.7771928 , 5.75892113, 5.58329275, 7.99999175, 2.57523747, 4.55828035, 4.25558514, 6.56544173, 1.60343203, 2.30881588, 5.87116817, 7.95971268, 7.6376034 , 7.12862717, 5.01166971, 6.17339498, 6.55501683, 6.06717032, 3.18076008])
Now I want to bin my data and calculate the mean for each bin
# Defining statistical function to use
stat_func = np.nanmean
# Binning the data
data_bins = np.digitize(data, bins)
data_bins
array([ 5, 6, 6, 1, 10, 3, 3, 1, 1, 9, 10, 7, 5, 2, 9, 6, 3, 1, 9, 4, 5, 3, 10, 2, 8, 8, 9, 9, 4, 6, 8, 3, 9, 1, 2, 1, 2, 9, 4, 7, 8, 2, 6, 1, 2, 4, 4, 10, 6, 4, 5, 10, 2, 1, 8, 6, 6, 7, 8, 3, 8, 10, 5, 6, 2, 1, 2, 2, 5, 4, 7, 1, 5, 6, 10, 7, 7, 3, 5, 10, 10, 6, 6, 6, 8, 3, 5, 5, 7, 2, 3, 6, 8, 8, 8, 6, 7, 7, 7, 4])
Calculating the mean for each of the bins
failval = -10
bins_stat = np.array([stat_func(data[data_bins == ii]) \
if len(data[data_bins == ii]) > 0 \
else failval \
for ii in range(1,len(bins))])
bins_stat = np.asarray(bins_stat)
bins_stat
array([ 0.41598935, 1.38675917, 2.46486798, 3.54718072, 4.70438024, 5.54393829, 6.4887491 , 7.67866005, 8.42595382, 9.63720271, -10. , -10. ])
You can put all of this into a function that estimates errors and more...
import math
def myceil(x, base=10):
"""
Returns the upper-bound integer of 'x' in base 'base'.
Parameters
----------
x: float
number to be approximated to closest number to 'base'
base: float
base used to calculate the closest 'largest' number
Returns
-------
n_high: float
Closest float number to 'x', i.e. upper-bound float.
Example
-------
>>>> myceil(12,10)
20
>>>>
>>>> myceil(12.05, 0.1)
12.10000
"""
n_high = float(base*math.ceil(float(x)/base))
return n_high
def myfloor(x, base=10):
"""
Returns the lower-bound integer of 'x' in base 'base'
Parameters
----------
x: float
number to be approximated to closest number of 'base'
base: float
base used to calculate the closest 'smallest' number
Returns
-------
n_low: float
Closest float number to 'x', i.e. lower-bound float.
Example
-------
>>>> myfloor(12, 5)
>>>> 10
"""
n_low = float(base*math.floor(float(x)/base))
return n_low
def Bins_array_create(arr, base=10):
"""
Generates array between [arr.min(), arr.max()] in steps of `base`.
Parameters
----------
arr: array_like, Shape (N,...), One-dimensional
Array of numerical elements
base: float, optional (default=10)
Interval between bins
Returns
-------
bins_arr: array_like
Array of bin edges for given arr
"""
base = float(base)
arr = np.array(arr)
assert(arr.ndim==1)
arr_min = myfloor(arr.min(), base=base)
arr_max = myceil( arr.max(), base=base)
bins_arr = np.arange(arr_min, arr_max+0.5*base, base)
return bins_arr
def Mean_std_calc_one_array(x1, y1, arr_len=0, statfunc=np.nanmean,
failval=np.nan, error='std',
base=10.):
"""
Calculates statistics of two arrays, e.g. scatter,
error in `statfunc`, etc.
Parameters
----------
x1: array-like, shape (N,)
array of x-values
y1: array-like, shape (N,)
array of y-values
arr_len: int, optional (default = 0)
minimum number of elements in the bin
statfunc: numpy function, optional (default = numpy.nanmean)
statistical function used to evaluate the bins
failval: int or float, optional (default = numpy.nan)
Number to use to replace when the number of elements in the
bin is smaller than `arr_len`
error: string, optional (default = 'std')
type of error to evaluate
Options:
- 'std': Evaluates the standard deviation of the bin
- 'stat': Evaluates the error in the mean/median of each bin
- 'none': Does not calculate the error in `y1`
base: float
Value of bin width in units of that of `x1`
Returns
--------
x1_stat: array-like, shape (N,)
`stat_func` of each bin in `base` spacings for x1
y1_stat: array-like, shape (N,)
`stat_func` of each bin in `base` spacings for y1
"""
x1 = np.asarray(x1)
y1 = np.asarray(y1)
assert((x1.ndim==1) & (y1.ndim==1))
assert((x1.size >0) & (y1.size>0))
n_elem = len(x1)
## Computing Bins
x1_bins = Bins_array_create(x1, base=base)
x1_digit = np.digitize(x1, x1_bins)
## Computing Statistics in bins
x1_stat = np.array([statfunc(x1[x1_digit==ii])
if len(x1[x1_digit==ii])>arr_len
else failval
for ii in range(1,x1_bins.size)])
y1_stat = np.array([statfunc(y1[x1_digit==ii])
if len(y1[x1_digit==ii])>arr_len
else failval
for ii in range(1,x1_bins.size)])
## Computing error in the data
if error=='std':
stat_err = np.nanstd
y1_err = np.array([stat_err(y1[x1_digit==ii])
if len(y1[x1_digit==ii])>arr_len
else failval
for ii in range(1,x1_bins.size)])
if error!='none':
y1_err = np.array([stat_err(y1[x1_digit==ii])/np.sqrt(len(y1[x1_digit==ii]))
if len(y1[x1_digit==ii])>arr_len
else failval
for ii in range(1,x1_bins.size)])
if (stat_func==np.median) or (stat_func==np.nanmedian):
y1_err *= 1.253
else:
y1_err = np.zeros(y1.stat.size)
return x1_stat, y1_stat, y1_err
Example of using these function:
import numpy as np
# Defining arrays
x_arr = np.arange(100)
y_arr = 50*np.random.randn(x_arr.size)
# Computing mean and error in the mean for `x_arr` and `y_arr`
x_stat, y_stat, y_err = Mean_std_calc_one_array(x_arr, y_arr,
statfunc=np.nanmean,
failval=np.nan,
base=10)
x_stat2, y_stat2, y_err2 = Mean_std_calc_one_array(x_arr, y_arr,
statfunc=np.nanmedian,
failval=np.nan,
base=10)
plt.style.use('seaborn-notebook')
plt.clf()
plt.close()
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111,facecolor='white')
ax.plot(x_arr, y_arr, 'ro', label='Data')
ax.errorbar(x_stat, y_stat, yerr=y_err, color='blue', marker='o',
linestyle='--',label='Mean')
ax.errorbar(x_stat2, y_stat2, yerr=y_err2, color='green', marker='o',
linestyle='--',label='Median')
ax.set_xlabel('X axis', fontsize=20)
ax.set_ylabel('Y axis', fontsize=20)
ax.set_title('Data and the Binned Data', fontsize=24)
plt.legend(fontsize=20)
plt.show()
With this function, it is really easy to apply statistics on binned data, as well as to ___estimate errors___ on the data.
One can always modify the shape of a numpy.ndarray
, as well as append it to a pre-existing array.
A = np.array([[n+m*10 for n in range(5)] for m in range(5)])
A
array([[ 0, 1, 2, 3, 4], [10, 11, 12, 13, 14], [20, 21, 22, 23, 24], [30, 31, 32, 33, 34], [40, 41, 42, 43, 44]])
n, m = A.shape
B = A.reshape((1,n*m))
B
array([[ 0, 1, 2, 3, 4, 10, 11, 12, 13, 14, 20, 21, 22, 23, 24, 30, 31, 32, 33, 34, 40, 41, 42, 43, 44]])
A_f = A.flatten()
A_f
array([ 0, 1, 2, 3, 4, 10, 11, 12, 13, 14, 20, 21, 22, 23, 24, 30, 31, 32, 33, 34, 40, 41, 42, 43, 44])
C = random.rand(A.size)
C
array([0.22546599, 0.97540312, 0.13059234, 0.60889311, 0.30114622, 0.35999722, 0.87544608, 0.22143994, 0.02629711, 0.5020551 , 0.85802423, 0.20244432, 0.4825194 , 0.38529469, 0.79174434, 0.55720808, 0.18167695, 0.58105877, 0.41776584, 0.80677464, 0.2633001 , 0.79435708, 0.87975669, 0.94088208, 0.89088446])
C.shape
(25,)
# Stacking the two arrays
D = np.column_stack((A_f,C))
D
array([[0.00000000e+00, 2.25465989e-01], [1.00000000e+00, 9.75403121e-01], [2.00000000e+00, 1.30592337e-01], [3.00000000e+00, 6.08893110e-01], [4.00000000e+00, 3.01146219e-01], [1.00000000e+01, 3.59997225e-01], [1.10000000e+01, 8.75446079e-01], [1.20000000e+01, 2.21439941e-01], [1.30000000e+01, 2.62971117e-02], [1.40000000e+01, 5.02055104e-01], [2.00000000e+01, 8.58024231e-01], [2.10000000e+01, 2.02444322e-01], [2.20000000e+01, 4.82519404e-01], [2.30000000e+01, 3.85294690e-01], [2.40000000e+01, 7.91744345e-01], [3.00000000e+01, 5.57208079e-01], [3.10000000e+01, 1.81676951e-01], [3.20000000e+01, 5.81058773e-01], [3.30000000e+01, 4.17765837e-01], [3.40000000e+01, 8.06774642e-01], [4.00000000e+01, 2.63300096e-01], [4.10000000e+01, 7.94357085e-01], [4.20000000e+01, 8.79756686e-01], [4.30000000e+01, 9.40882076e-01], [4.40000000e+01, 8.90884459e-01]])
# Selecting from 3rd to 11th row
D[2:10]
array([[ 2. , 0.13059234], [ 3. , 0.60889311], [ 4. , 0.30114622], [10. , 0.35999722], [11. , 0.87544608], [12. , 0.22143994], [13. , 0.02629711], [14. , 0.5020551 ]])
You can also concadenate different arrays
a = np.array([[1, 2], [3, 4]])
b = np.array([[5,6]])
np.concatenate((a,b))
array([[1, 2], [3, 4], [5, 6]])
np.concatenate((a,b.T), axis=1)
array([[1, 2, 5], [3, 4, 6]])
Sometimes it is important to create new copies of arrays and other objects. For this reason, one uses numpy.copy to create new copies of arrays
A = np.array([[1, 2], [3, 4]])
A
array([[1, 2], [3, 4]])
# `B` is now referring to the same array data as `A`
B = A
If we make any changes to B
, A
will also be affected by this change.
B[0,0] = 10
B
array([[10, 2], [ 3, 4]])
A
array([[10, 2], [ 3, 4]])
To get a completely independent, new object, you would use:
B = np.copy(A)
# Modifying `B`
B[0,0] = -5
B
array([[-5, 2], [ 3, 4]])
A
array([[10, 2], [ 3, 4]])
The array A
was not affected by this changed. This is important when you're constantly re-defining new arrays
SciPy
provides a large number of higher-level scientif algorithms.
It includes:
import scipy as sc
You can use Scipy
to interpolate your data.
You would use the interp1d
function to interpolate your function.
from scipy.interpolate import interp1d
def f(x):
return np.sin(x)
n = np.arange(0, 10)
x = np.linspace(0, 9, 100)
y_meas = f(n) + 0.1 * np.random.randn(len(n)) # simulate measurement with noise
y_real = f(x)
linear_interpolation = interp1d(n, y_meas)
y_interp1 = linear_interpolation(x)
cubic_interpolation = interp1d(n, y_meas, kind='cubic')
y_interp2 = cubic_interpolation(x)
fig, ax = plt.subplots(figsize=(15,6))
ax.set_facecolor('white')
ax.plot(n, y_meas, 'bs', label='noisy data')
ax.plot(x, y_real, 'k', lw=2, label='true function')
ax.plot(x, y_interp1, 'r', label='linear interp')
ax.plot(x, y_interp2, 'g', label='cubic interp')
ax.legend(loc=3, prop={'size':20});
ax.tick_params(axis='both', which='major', labelsize=20)
ax.tick_params(axis='both', which='minor', labelsize=15)
You can also use SciPy
to calculate KD-Trees for a set of points
Lbox = 250.
Npts = 1000
# Creating cartesian coordinates
x = np.random.uniform(0, Lbox, Npts)
y = np.random.uniform(0, Lbox, Npts)
z = np.random.uniform(0, Lbox, Npts)
sample1 = np.vstack([x, y, z]).T
sample1
array([[ 4.91677843, 203.53330277, 100.85634333], [ 42.6545178 , 82.63566177, 228.99066071], [232.5314 , 180.79608431, 95.0066412 ], ..., [ 76.65226267, 207.95240739, 30.11950574], [224.58498597, 221.71205368, 242.40314917], [ 30.36547633, 148.94184928, 144.29214812]])
sample1.shape
(1000, 3)
Let's say we want to know how many points are within distances of 30 and 50 from other points. To know this, you construct a KD-Tree
from scipy.spatial import cKDTree
# Initializing KDTree
KD_obj = cKDTree(sample1)
N_neighbours = cKDTree.count_neighbors(KD_obj, KD_obj, 50) - \
cKDTree.count_neighbors(KD_obj, KD_obj, 30)
print("Number of Neighbours: {0}".format(N_neighbours))
Number of Neighbours: 20536
Let's say you want to get the distances to the Nth-nearest neighbor.
k_nearest = 4
dist_k, dist_k_idx = cKDTree.query(KD_obj, sample1, k_nearest)
dist_k
array([[ 0. , 14.1577149 , 20.28848924, 22.1680788 ], [ 0. , 12.88549073, 17.92327564, 18.42130936], [ 0. , 17.05682145, 18.37226707, 25.92539045], ..., [ 0. , 17.95350521, 20.41909125, 22.79189467], [ 0. , 18.57945774, 22.80480635, 29.61321934], [ 0. , 12.66943781, 21.47467022, 27.36415359]])
You can also get the indices
dist_k_idx
array([[ 0, 38, 553, 113], [ 1, 151, 855, 541], [ 2, 212, 37, 40], ..., [997, 317, 886, 688], [998, 357, 844, 163], [999, 691, 928, 497]])
The first columns corresponds to itself.
You can also find pairs that are separated by at most a distance r
pairs = KD_obj.query_ball_tree(KD_obj, 30)
pairs[0:10]
[[0, 38, 113, 306, 526, 553, 762, 856, 946], [1, 151, 541, 552, 565, 583, 855], [2, 37, 40, 212, 561, 567, 653, 742], [3, 5, 18, 102, 342, 480, 513, 570, 618], [4, 75, 109, 207, 311, 864], [3, 5, 18, 342, 480, 513, 570, 922], [6, 323, 543, 617, 638, 655, 675, 686, 842, 873, 905, 964], [7, 112, 229, 248, 440, 463, 506, 615, 865, 927], [8, 205, 275, 296, 420, 486, 589, 636, 737, 839], [9, 719, 949]]
So that's it for today's lesson