This notebook contains the solution to the Bayesian model of 47 Ursae Majoris.
Your task to find the orbital parameters of the first planet in this system, using data from table 1 of Fischer et al 2002.
The following IPython magic command will create the data file 47UrsaeMajoris.txt
in the current directory:
%%file 47UrsaeMajoris.txt
date rv rv_err
6959.737 -60.48 14.00
7194.912 -53.60 7.49
7223.798 -38.36 6.14
7964.893 0.60 8.19
8017.730 -28.29 10.57
8374.771 -40.25 9.37
8647.897 42.37 11.41
8648.910 32.64 11.02
8670.878 55.45 11.45
8745.691 51.78 8.76
8992.061 4.49 11.21
9067.771 -14.63 7.00
9096.734 -26.06 6.79
9122.691 -47.38 7.91
9172.686 -38.22 10.55
9349.912 -52.21 9.52
9374.964 -48.69 8.67
9411.839 -36.01 12.81
9481.720 -52.46 13.40
9767.918 38.58 5.48
9768.908 36.68 5.02
9802.789 37.93 3.85
10058.079 15.82 3.45
10068.980 15.46 4.63
10072.012 21.20 4.09
10088.994 1.30 4.25
10089.947 6.12 3.70
10091.900 0.00 4.16
10120.918 4.07 4.16
10124.905 0.29 3.74
10125.823 -1.87 3.79
10127.898 -0.68 4.10
10144.877 -4.13 5.26
10150.797 -8.14 4.18
10172.829 -10.79 4.43
10173.762 -9.33 5.43
10181.742 -23.87 3.28
10187.740 -16.70 4.67
10199.730 -16.29 3.98
10203.733 -21.84 4.92
10214.731 -24.51 3.67
10422.018 -56.63 4.23
10438.001 -39.61 3.91
10442.027 -44.62 4.05
10502.853 -32.05 4.69
10504.859 -39.08 4.65
10536.845 -22.46 5.18
10537.842 -22.83 4.16
10563.673 -17.47 4.03
10579.697 -11.01 3.84
10610.719 -8.67 3.52
10793.957 37.00 3.78
10795.039 41.85 4.80
10978.684 36.42 5.01
11131.066 13.56 6.61
11175.027 -3.74 8.17
11242.842 -21.85 5.43
11303.712 -48.75 4.63
11508.070 -51.65 8.37
11536.064 -72.44 4.73
11540.999 -57.58 5.97
11607.916 -43.94 4.94
11626.771 -39.14 7.03
11627.754 -50.88 6.21
11628.727 -51.52 5.87
11629.832 -51.86 4.60
11700.693 -24.58 5.20
11861.049 14.64 5.33
11874.068 14.15 5.75
11881.045 18.02 4.15
11895.068 16.96 4.60
11906.014 11.73 4.07
11907.011 22.83 4.38
11909.042 23.42 3.78
11910.955 18.34 4.33
11914.067 15.45 5.37
11915.048 24.05 3.82
11916.033 23.16 3.67
11939.969 27.53 5.08
11946.960 21.44 4.18
11969.902 30.99 4.58
11971.894 38.36 5.01
11998.779 33.82 3.93
11999.820 27.52 3.98
12000.858 23.40 4.07
12028.740 37.08 4.95
12033.746 26.28 5.24
12040.759 31.12 3.54
12041.719 34.04 3.45
12042.695 31.38 3.98
12073.723 21.81 4.73
But first, we'll copy some relevant material from the first notebook.
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn; seaborn.set() #nice plot formatting
from collections import namedtuple
params = namedtuple('params', ['T', 'e', 'K', 'V', 'omega', 'chi', 's'])
from scipy import optimize
@np.vectorize
def compute_E(M, e):
"""Solve Kepler's eqns for eccentric anomaly given mean anomaly"""
f = lambda E, M=M, e=e: E - e * np.sin(E) - M
return optimize.brentq(f, 0, 2 * np.pi)
def radial_velocity(t, theta):
"""Compute radial velocity given orbital parameters"""
T, e, K, V, omega, chi = theta[:6]
# compute mean anomaly (0 <= M < 2pi)
M = 2 * np.pi * ((t / T + chi) % 1)
# solve for eccentric anomaly
E = compute_E(M, e)
# compute true anomaly
f = 2 * np.arctan2(np.sqrt(1 + e) * np.sin(E / 2),
np.sqrt(1 - e) * np.cos(E / 2))
# compute radial velocity
return V - K * (np.sin(f + omega) + e * np.sin(omega))
theta_lim = params(T=(0.2, 2000),
e=(0, 1),
K=(0.01, 2000),
V=(-2000, 2000),
omega=(0, 2 * np.pi),
chi=(0, 1),
s=(0.001, 100))
theta_min, theta_max = map(np.array, zip(*theta_lim))
def log_prior(theta):
if np.any(theta < theta_min) or np.any(theta > theta_max):
return -np.inf # log(0)
# Jeffreys Prior on T, K, and s
return -np.sum(np.log(theta[[0, 2, 6]]))
def log_likelihood(theta, t, rv, rv_err):
sq_err = rv_err ** 2 + theta[6] ** 2
rv_model = radial_velocity(t, theta)
return -0.5 * np.sum(np.log(sq_err) + (rv - rv_model) ** 2 / sq_err)
def log_posterior(theta, t, rv, rv_err):
ln_prior = log_prior(theta)
if np.isinf(ln_prior):
return ln_prior
else:
return ln_prior + log_likelihood(theta, t, rv, rv_err)
def make_starting_guess(t, rv, rv_err):
model = LombScargleFast()
model.optimizer.set(period_range=theta_lim.T,
quiet=True)
model.fit(t, rv, rv_err)
rv_range = 0.5 * (np.max(rv) - np.min(rv))
rv_center = np.mean(rv)
return params(T=model.best_period,
e=0.1,
K=rv_range,
V=rv_center,
omega=np.pi,
chi=0.5,
s=rv_err.mean())
After running the above command, your data will be in a file called 47UrsaeMajoris.txt
.
An easy way to load data in this form is the pandas package, which implements a DataFrame object (basically, a labeled data table). Reading the CSV file is a one-line operation:
import pandas as pd
data = pd.read_csv('47UrsaeMajoris.txt', delim_whitespace=True)
t, rv, rv_err = data.values.T
# Start by Visualizing the Data
plt.errorbar(t, rv, rv_err, fmt='.');
# Compute the periodogram to look for significant periodicity
from gatspy.periodic import LombScargleFast
model = LombScargleFast()
model.fit(t, rv, rv_err)
periods, power = model.periodogram_auto()
plt.semilogx(periods, power)
plt.xlim(0, 10000);
theta_guess = make_starting_guess(t, rv, rv_err)
theta_guess
import emcee
ndim = len(theta_guess) # number of parameters in the model
nwalkers = 50 # number of MCMC walkers
# start with a tight distribution of theta around the initial guess
rng = np.random.RandomState(42)
starting_guesses = theta_guess * (1 + 0.1 * rng.randn(nwalkers, ndim))
# create the sampler; fix the random state for replicability
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=(t, rv, rv_err))
sampler.random_state = rng
# time and run the MCMC
%time pos, prob, state = sampler.run_mcmc(starting_guesses, 1000)
def plot_chains(sampler):
fig, ax = plt.subplots(7, figsize=(8, 10), sharex=True)
for i in range(7):
ax[i].plot(sampler.chain[:, :, i].T, '-k', alpha=0.2);
ax[i].set_ylabel(params._fields[i])
plot_chains(sampler)
sampler.reset()
%time pos, prob, state = sampler.run_mcmc(pos, 1000)
plot_chains(sampler)
import corner
corner.corner(sampler.flatchain[:, :2],
labels=params._fields[:2]);
t_fit = np.linspace(t.min(), t.max(), 1000)
rv_fit = [radial_velocity(t_fit, sampler.flatchain[i])
for i in rng.choice(sampler.flatchain.shape[0], 200)]
plt.figure(figsize=(14, 6))
plt.errorbar(t, rv, rv_err, fmt='.k')
plt.plot(t_fit, np.transpose(rv_fit), '-k', alpha=0.01)
plt.xlabel('time (days)')
plt.ylabel('radial velocity (km/s)');
mean = sampler.flatchain.mean(0)
std = sampler.flatchain.std(0)
print("Period = {0:.0f} +/- {1:.0f} days".format(mean[0], std[0]))
print("eccentricity = {0:.2f} +/- {1:.2f}".format(mean[1], std[1]))
In this case, a simple mean and standard deviation is probably not the best summary of the data, because the posterior has multiple modes. We could isolate the strongest mode and then get a better estimate:
chain = sampler.flatchain[sampler.flatchain[:, 0] > 1050]
mean = chain.mean(0)
std = chain.std(0)
print("Period = {0:.0f} +/- {1:.0f} days".format(mean[0], std[0]))
print("eccentricity = {0:.2f} +/- {1:.2f}".format(mean[1], std[1]))
corner.corner(chain[:, :2],
labels=params._fields[:2]);
This is a better representation of the parameter estimates for the most prominant peak.
If you finish early, try tackling this...
One reason we see multiple modes in the posterior is that there are actually multiple planets in the system! For example, the source of the above data is a paper which actually reports two detected planets.
Build a Bayesian model which models two planets simultaneously: can you find signals from both planets in the data?