#!/usr/bin/env python # coding: utf-8 # # How does the SFF method work? # Vanderburg and Johnson (2014) introduced a method for "Self Flat Fielding" by tracking how the lightcurve changes with motion of the spacecraft: # # [A Technique for Extracting Highly Precise Photometry for the Two-Wheeled Kepler Mission](http://adsabs.harvard.edu/abs/2014PASP..126..948V) # # In this notebook we replicate the K2SFF method following the same example source, #60021426, as that in the publication. We aim to demystify the technique, which is extremely popular within the K2 community. We have focused on reproducibility, so that we achieve the same result at the publication. # # The Vanderburg & Johnson 2014 paper uses data from the Kepler two-wheel "Concept Engineering Test", predating campaign 0, and sometimes called campaign *"eng"* or abbreviated CET. This vestigal "campaign" lacks some of the standardization of later K2 campaigns--- it was much shorter, only about 9 days long, it lacks some of the standard quality flags, targets have non-traditional EPIC IDs, and other quirks. # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import matplotlib.pyplot as plt from astropy.io import fits import pandas as pd # ## Retrieve the K2SFF data for ENG test source `60021426` # First we will retrieve data and inspect the mask used in the paper. # In[4]: path = 'http://archive.stsci.edu/hlsps/k2sff/cet/060000000/21426/hlsp_k2sff_k2_lightcurve_060021426-cet_kepler_v1_llc.fits' vdb_fits = fits.open(path) # The `BESTAPER` keyword explains which aperture was chosen as the "best" by Vanderburg & Johnson 2014. The FITS header for that slice contains the metadata needed to reproduce the mask. # In[5]: keys = ['MASKTYPE', 'MASKINDE', 'NPIXSAP'] _ = [print(key, ' : ', vdb_fits['BESTAPER'].header[key]) for key in keys] # We want the *exact same* mask as Vanderburg & Johnson 2014, but the publication version and MAST version differ! # # Publication version: # ![img](https://www.cfa.harvard.edu/~avanderb/k2/ep60021426image.png) # MAST Version: # ![MAST Version](http://archive.stsci.edu/hlsps/k2sff/cet/060000000/21426/hlsp_k2sff_k2_lightcurve_060021426-cet_kepler_v1_image.png) # Aperture 7 should yield a bigger mask, more similar to what was used in the paper. # In[6]: VDB_J_mask = vdb_fits['PRF_APER_TBL'].data[7,:, :] == True VDB_J_mask.sum() # Save the mask for easy use in our next notebook. # In[7]: np.save('VDB_J_2014_mask.npy', VDB_J_mask) # ## Manually reproduce with the Vanderburg-provided diagnostic data # Retrieve the Vanderburg-provided diagnostic data for the Kepler ENG testing. # Uncomment the line below to retrieve the data programmatically, or manually get the linked file in a browser and save it to this directory. # In[8]: #! wget https://www.cfa.harvard.edu/~avanderb/k2/ep60021426alldiagnostics.csv # In[9]: df = pd.read_csv('ep60021426alldiagnostics.csv',index_col=False) df.head() # We can mean-subtract the provided $x-y$ centroids, assigning them column and row identifiers, then rotate the coordinates into their major and minor axes. # In[10]: col = df[' X-centroid'].values col = col - np.mean(col) row = df[' Y-centroid'].values row = row - np.mean(row) # In[11]: def _get_eigen_vectors(centroid_col, centroid_row): '''get the eigenvalues and eigenvectors given centroid x, y positions''' centroids = np.array([centroid_col, centroid_row]) eig_val, eig_vec = np.linalg.eigh(np.cov(centroids)) return eig_val, eig_vec # In[12]: def _rotate(eig_vec, centroid_col, centroid_row): '''rotate the centroids into their predominant linear axis''' centroids = np.array([centroid_col, centroid_row]) return np.dot(eig_vec, centroids) # In[13]: eig_val, eig_vec = _get_eigen_vectors(col, row) v1, v2 = eig_vec # The major axis is the latter. # In[14]: platescale = 4.0 # The Kepler plate scale; has units of arcseconds / pixel # In[15]: plt.figure(figsize=(5, 6)) plt.plot(col * platescale, row * platescale, 'ko', ms=4) plt.plot(col * platescale, row * platescale, 'ro', ms=1) plt.xticks([-2, -1,0, 1, 2]) plt.yticks([-2, -1,0, 1, 2]) plt.xlabel('X position [arcseconds]') plt.ylabel('Y position [arcseconds]') plt.xlim(-2, 2) plt.ylim(-2, 2) plt.plot([0, v1[0]], [0, v1[1]], color='blue', lw=3) plt.plot([0, v2[0]], [0, v2[1]], color='blue', lw=3); # Following the form of **Figure 2** of Vanderburg & Johsnon 2014. # In[16]: rot_colp, rot_rowp = _rotate(eig_vec, col, row) #units in pixels # You can rotate into the new reference frame. # In[17]: plt.figure(figsize=(5, 6)) plt.plot(rot_rowp * platescale, rot_colp * platescale, 'ko', ms=4) plt.plot(rot_rowp * platescale, rot_colp * platescale, 'ro', ms=1) plt.xticks([-2, -1,0, 1, 2]) plt.yticks([-2, -1,0, 1, 2]) plt.xlabel("X' position [arcseconds]") plt.ylabel("Y' position [arcseconds]") plt.xlim(-2, 2) plt.ylim(-2, 2) plt.plot([0, 1], [0, 0], color='blue') plt.plot([0, 0], [0, 1], color='blue'); # We need to calculate the arclength using: # \begin{equation}s= \int_{x'_0}^{x'_1}\sqrt{1+\left( \frac{dy'_p}{dx'}\right)^2} dx'\end{equation} # # where $x^\prime_0$ is the transformed $x$ coordinate of the point with the smallest $x^\prime$ position, and $y^\prime_p$ is the best--fit polynomial function. # Fit a $5^{th}$ order polynomial to the rotated coordinates. # In[18]: z = np.polyfit(rot_rowp, rot_colp, 5) p5 = np.poly1d(z) p5_deriv = p5.deriv() # In[19]: x0_prime = np.min(rot_rowp) xmax_prime = np.max(rot_rowp) x_dense = np.linspace(x0_prime, xmax_prime, 2000) # In[20]: plt.plot(rot_rowp, rot_colp, '.') plt.plot(x_dense, p5(x_dense)) plt.ylabel('Position along minor axis (pixels)') plt.xlabel('Position along major axis (pixels)') plt.title('Performance of polynomial regression') plt.ylim(-0.1, 0.1); # We see evidence for a [bias-variance tradeoff](https://en.wikipedia.org/wiki/Bias%E2%80%93variance_tradeoff), suggesting some modest opportunity for improvement. # In[21]: @np.vectorize def arclength(x): '''Input x1_prime, get out arclength''' gi = x_dense