Introduction

Date: Nov 25, 2019

Usual Filtering

df = df.query('calib_psfCandidate == 0.0')
df = df.query('deblend_nChild == 0.0')
df['ellip'] = np.hypot( df['ext_shapeHSM_HsmShapeRegauss_e1'] ,
                        df['ext_shapeHSM_HsmShapeRegauss_e2'] )
df = df.query('ellip < 2.0') # it was 1.5 before

#select only few columns after filtering:
cols_select = ['base_SdssCentroid_x', 'base_SdssCentroid_y',
                'base_SdssCentroid_xSigma','base_SdssCentroid_ySigma',
                'ext_shapeHSM_HsmShapeRegauss_e1','ext_shapeHSM_HsmShapeRegauss_e2',
                'base_SdssShape_flux']
df = df[cols_select]        

# drop all nans
df = df.dropna()

# additional columns
df['radius'] =  df.eval(""" ( (ext_shapeHSM_HsmSourceMoments_xx *  ext_shapeHSM_HsmSourceMoments_yy) \
                                          -  (ext_shapeHSM_HsmSourceMoments_xy**2 ) )**0.25 """)

Shape filtering
https://github.com/LSSTDESC/DC2-analysis/blob/master/tutorials/object_gcr_2_lensing_cuts.ipynb

df = df.query('ext_shapeHSM_HsmShapeRegauss_resolution >= 0.3')
df = df.query('ext_shapeHSM_HsmShapeRegauss_sigma <= 0.4')
df = df.query('ext_shapeHSM_HsmShapeRegauss_flag== 0.0')

Filter strongly lensed objects

  • Take the objects with centroids >154 pixels (remove strong lens objects).
    # exclude strong lens objects <=154 distance
    # The shape of lsst.fits file is 3998,3998 and center is 1699,1699.
    df['x_center'] = 1699
    df['y_center'] = 1699
    df['distance'] = ( (df['x[0]'] - df['x_center'])**2 + (df['x[1]'] - df['y_center'])**2 )**0.5
    df = df[df.distance > 154]
    

Imcat script

lc -b +all 
'x = %x[0][0] %x[1][0] + %x[2][0] + %x[3][0] + 4 / %x[0][1] %x[1][1] + %x[2][1] + %x[3][1] + 4 / 2 vector'
'gm = %g[0][0] %g[1][0] + 2 / %g[0][1] %g[1][1] + 2 / 2 vector' 
'gc = %g[2][0] %g[3][0] + 2 / %g[2][1] %g[3][1] + 2 / 2 vector'   
'gmd = %g[0][0] %g[1][0] - 2 / %g[0][1] %g[1][1] - 2 / 2 vector' 
'gcd = %g[2][0] %g[3][0] - 2 / %g[2][1] %g[3][1] - 2 / 2 vector' 
< ${catalogs}/merge.cat > ${final}/final_${i}.cat

Notes

final_text.txt is created by imcat program after merging four lsst files (m,m9,l,l9) after cleaning.

Imports

In [2]:
import json, os,sys
import numpy as np
import pandas as pd
import seaborn as sns
sns.set(color_codes=True)

pd.set_option('display.max_columns',200)

import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
In [3]:
# if not os.path.isdir('images'):
#     os.makedirs('images')
    
# if not os.path.isdir('fitsfiles'):
#     os.makedirs('fitsfiles')

Plot the gsq vs gmdsq

gsq = g00 g00 + g10 g10
gmdsq = gmd0**2 + gmd1**2
In [4]:
!head -2 ../data/final/final_text.txt
#       fN[0][0]       fN[1][0]       fN[2][0]       fN[3][0]       id[0][0]       id[1][0]       id[2][0]       id[3][0]           x[0]           x[1]     errx[0][0]     errx[0][1]     errx[1][0]     errx[1][1]     errx[2][0]     errx[2][1]     errx[3][0]     errx[3][1]        g[0][0]        g[0][1]        g[1][0]        g[1][1]        g[2][0]        g[2][1]        g[3][0]        g[3][1]    ellip[0][0]    ellip[1][0]    ellip[2][0]    ellip[3][0]     flux[0][0]     flux[1][0]     flux[2][0]     flux[3][0]   radius[0][0]   radius[1][0]   radius[2][0]   radius[3][0]          gm[0]          gm[1]          gc[0]          gc[1]         gmd[0]         gmd[1]         gcd[0]         gcd[1]
               0              0              0              0           5301           5314           5231           5117       88.17075      1847.1934         0.0196         0.0249         0.0227         0.0216           0.02         0.0256         0.0231          0.022        -0.4253         0.1855          0.273        -0.3021        -0.4257         0.1904         0.2778        -0.3155      0.4639939     0.40717737     0.46633963     0.42037256       79841.47      82737.354      80303.923      83923.908      5.1869534      5.2938582      5.2678266       5.390682       -0.07615        -0.0583       -0.07395       -0.06255       -0.34915         0.2438       -0.35175        0.25295
In [5]:
names = "fN[0][0]       fN[1][0]       fN[2][0]       fN[3][0]       id[0][0]       id[1][0]       id[2][0]       id[3][0]           x[0]           x[1]     errx[0][0]     errx[0][1]     errx[1][0]     errx[1][1]     errx[2][0]     errx[2][1]     errx[3][0]     errx[3][1]        g[0][0]        g[0][1]        g[1][0]        g[1][1]        g[2][0]        g[2][1]        g[3][0]        g[3][1]    ellip[0][0]    ellip[1][0]    ellip[2][0]    ellip[3][0]     flux[0][0]     flux[1][0]     flux[2][0]     flux[3][0]   radius[0][0]   radius[1][0]   radius[2][0]   radius[3][0]          gm[0]          gm[1]          gc[0]          gc[1]         gmd[0]         gmd[1]         gcd[0]         gcd[1]".split()
print(names)
['fN[0][0]', 'fN[1][0]', 'fN[2][0]', 'fN[3][0]', 'id[0][0]', 'id[1][0]', 'id[2][0]', 'id[3][0]', 'x[0]', 'x[1]', 'errx[0][0]', 'errx[0][1]', 'errx[1][0]', 'errx[1][1]', 'errx[2][0]', 'errx[2][1]', 'errx[3][0]', 'errx[3][1]', 'g[0][0]', 'g[0][1]', 'g[1][0]', 'g[1][1]', 'g[2][0]', 'g[2][1]', 'g[3][0]', 'g[3][1]', 'ellip[0][0]', 'ellip[1][0]', 'ellip[2][0]', 'ellip[3][0]', 'flux[0][0]', 'flux[1][0]', 'flux[2][0]', 'flux[3][0]', 'radius[0][0]', 'radius[1][0]', 'radius[2][0]', 'radius[3][0]', 'gm[0]', 'gm[1]', 'gc[0]', 'gc[1]', 'gmd[0]', 'gmd[1]', 'gcd[0]', 'gcd[1]']
In [6]:
names = ['fN[0][0]', 'fN[1][0]', 'fN[2][0]', 'fN[3][0]', 
         'id[0][0]', 'id[1][0]', 'id[2][0]', 'id[3][0]',
         'x[0]', 'x[1]', 'errx[0][0]', 'errx[0][1]', 'errx[1][0]',
         'errx[1][1]', 'errx[2][0]', 'errx[2][1]', 'errx[3][0]',
         'errx[3][1]', 'g[0][0]', 'g[0][1]', 'g[1][0]', 'g[1][1]',
         'g[2][0]', 'g[2][1]', 'g[3][0]', 'g[3][1]',
         'ellip[0][0]', 'ellip[1][0]', 'ellip[2][0]', 'ellip[3][0]',
         'flux[0][0]', 'flux[1][0]', 'flux[2][0]', 'flux[3][0]',
         'radius[0][0]', 'radius[1][0]', 'radius[2][0]', 'radius[3][0]',
         'gm[0]', 'gm[1]', 'gc[0]', 'gc[1]',
         'gmd[0]', 'gmd[1]', 'gcd[0]', 'gcd[1]']


file_path = f'../data/final/final_text.txt'


df = pd.read_csv(file_path,comment='#',engine='python',sep=r'\s\s+',
                 header=None,names=names)

print(df.shape)
df.head()
(57318, 46)
Out[6]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1]
0 0 0 0 0 5301 5314 5231 5117 88.17075 1847.19340 0.0196 0.0249 0.0227 0.0216 0.0200 0.0256 0.0231 0.0220 -0.4253 0.1855 0.2730 -0.3021 -0.4257 0.1904 0.2778 -0.3155 0.463994 0.407177 0.466340 0.420373 79841.4700 82737.3540 80303.9230 83923.9080 5.186953 5.293858 5.267827 5.390682 -0.07615 -0.05830 -0.07395 -0.06255 -0.34915 0.24380 -0.35175 0.25295
1 0 0 0 0 3941 3957 3897 3923 3214.45390 930.33603 0.0344 0.0212 0.0331 0.0232 0.0344 0.0212 0.0332 0.0232 0.9068 0.3231 0.7867 0.3391 0.9179 0.3265 0.7956 0.3416 0.962642 0.856671 0.974240 0.865835 33913.5470 34112.9040 33903.5430 34114.7980 4.676457 4.750963 4.675408 4.751770 0.84675 0.33110 0.85675 0.33405 0.06005 -0.00800 0.06115 -0.00755
2 0 0 0 0 1301 1310 1323 1312 2652.56650 1772.34480 0.2510 0.1715 0.1663 0.3002 0.2522 0.1715 0.1665 0.3017 0.9614 0.5881 -0.9979 -0.4635 1.0062 0.6076 -1.0206 -0.4729 1.127010 1.100289 1.175422 1.124837 3694.2411 3674.4453 3684.1640 3663.4596 4.161950 4.303319 4.159870 4.301257 -0.01825 0.06230 -0.00720 0.06735 0.97965 0.52580 1.01340 0.54025
3 0 0 0 0 3564 3564 3541 3538 2536.84490 712.48793 0.0071 0.0125 0.0071 0.0129 0.0074 0.0129 0.0074 0.0134 -1.0289 0.4499 -1.0196 0.3961 -0.9862 0.4332 -0.9816 0.3755 1.122963 1.093837 1.077150 1.050970 107866.5700 109330.9900 109214.9900 110405.2400 4.848973 4.967938 4.963241 5.096215 -1.02425 0.42300 -0.98390 0.40435 -0.00465 0.02690 -0.00230 0.02885
4 0 0 0 0 4634 4659 4569 4615 109.82575 1405.32120 0.3760 0.3919 0.2353 0.2783 0.3803 0.3949 0.2379 0.2831 0.2055 0.1655 -0.1817 -0.0868 0.2052 0.1698 -0.1773 -0.0987 0.263857 0.201368 0.266344 0.202921 3512.8911 3518.9333 3517.0462 3518.1529 4.338535 4.362407 4.374177 4.392268 0.01190 0.03935 0.01395 0.03555 0.19360 0.12615 0.19125 0.13425
In [7]:
# new columns
# df['gsq'] = df['g[0][0]'] **2 + df['g[1][0]']**2 # only for imcat 00 and 10
# df['gmdsq'] = df['gmd[0]'] **2 + df['gmd[1]']**2

df['gsq'] = df['g[0][0]'] **2 + df['g[0][1]']**2
df['gmdsq'] = df['gmd[0]'] **2 + df['gmd[1]']**2

df['gmsq'] = df['gm[0]']**2 + df['gm[1]']**2
df['gcsq'] = df['gc[0]']**2 + df['gc[1]']**2
In [8]:
df.head()
Out[8]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] gsq gmdsq gmsq gcsq
0 0 0 0 0 5301 5314 5231 5117 88.17075 1847.19340 0.0196 0.0249 0.0227 0.0216 0.0200 0.0256 0.0231 0.0220 -0.4253 0.1855 0.2730 -0.3021 -0.4257 0.1904 0.2778 -0.3155 0.463994 0.407177 0.466340 0.420373 79841.4700 82737.3540 80303.9230 83923.9080 5.186953 5.293858 5.267827 5.390682 -0.07615 -0.05830 -0.07395 -0.06255 -0.34915 0.24380 -0.35175 0.25295 0.215290 0.181344 0.009198 0.009381
1 0 0 0 0 3941 3957 3897 3923 3214.45390 930.33603 0.0344 0.0212 0.0331 0.0232 0.0344 0.0212 0.0332 0.0232 0.9068 0.3231 0.7867 0.3391 0.9179 0.3265 0.7956 0.3416 0.962642 0.856671 0.974240 0.865835 33913.5470 34112.9040 33903.5430 34114.7980 4.676457 4.750963 4.675408 4.751770 0.84675 0.33110 0.85675 0.33405 0.06005 -0.00800 0.06115 -0.00755 0.926680 0.003670 0.826613 0.845610
2 0 0 0 0 1301 1310 1323 1312 2652.56650 1772.34480 0.2510 0.1715 0.1663 0.3002 0.2522 0.1715 0.1665 0.3017 0.9614 0.5881 -0.9979 -0.4635 1.0062 0.6076 -1.0206 -0.4729 1.127010 1.100289 1.175422 1.124837 3694.2411 3674.4453 3684.1640 3663.4596 4.161950 4.303319 4.159870 4.301257 -0.01825 0.06230 -0.00720 0.06735 0.97965 0.52580 1.01340 0.54025 1.270152 1.236180 0.004214 0.004588
3 0 0 0 0 3564 3564 3541 3538 2536.84490 712.48793 0.0071 0.0125 0.0071 0.0129 0.0074 0.0129 0.0074 0.0134 -1.0289 0.4499 -1.0196 0.3961 -0.9862 0.4332 -0.9816 0.3755 1.122963 1.093837 1.077150 1.050970 107866.5700 109330.9900 109214.9900 110405.2400 4.848973 4.967938 4.963241 5.096215 -1.02425 0.42300 -0.98390 0.40435 -0.00465 0.02690 -0.00230 0.02885 1.261045 0.000745 1.228017 1.131558
4 0 0 0 0 4634 4659 4569 4615 109.82575 1405.32120 0.3760 0.3919 0.2353 0.2783 0.3803 0.3949 0.2379 0.2831 0.2055 0.1655 -0.1817 -0.0868 0.2052 0.1698 -0.1773 -0.0987 0.263857 0.201368 0.266344 0.202921 3512.8911 3518.9333 3517.0462 3518.1529 4.338535 4.362407 4.374177 4.392268 0.01190 0.03935 0.01395 0.03555 0.19360 0.12615 0.19125 0.13425 0.069621 0.053395 0.001690 0.001458
In [9]:
df.plot.scatter(x='gsq',y='gmdsq', figsize=(12,8))
Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x11d1d2240>

Matrix of Number density from two columns

In [10]:
def matrix_of_number_density_from_two_cols(df,xcol,ycol,N):
    """Create grid of number density of two columns
    
    - Find the absolute max from two columns.
    - Create N bins -absMax to +absMax.
    - Return a matrix of N*N shape.
    """
    from itertools import product
    
    # derived variables
    xlabel = xcol
    ylabel = ycol
    xlabel1 = xlabel + '_cat_labels'
    ylabel1 = ylabel + '_cat_labels'
    
    xlabel2 = xlabel + '_cat'
    ylabel2 = ylabel + '_cat'
    colname = 'cat_freq'
    
    # take only xlabel and ylabel columns
    dx = df[[xlabel, ylabel]].copy()
    
    # get absolute maximum from two columns
    max_abs_xcol_ycol = df[[xcol,ycol]].abs().max().max()
    
    # create 1d array with N+1 values to create N bins
    bins = np.linspace(0, max_abs_xcol_ycol,N+1)

    # create N bins
    dx[xlabel1] = pd.cut(dx[xlabel], bins, labels=np.arange(N))
    dx[ylabel1] = pd.cut(dx[ylabel], bins, labels=np.arange(N))

    # count number of points in each bin
    dx[colname] = dx.groupby([xlabel1,ylabel1])[xlabel1].transform('count')

    # drop duplicates
    dx1 = dx.drop_duplicates(subset=[xlabel1,ylabel1])[[xlabel1,ylabel1,colname]]

    # use permutation to get the grid of N * N
    perms = list(product(range(N), range(N)))
    x = [i[0] for i in perms]
    y = [i[1] for i in perms]
    dx2 = pd.DataFrame({xlabel1: x, ylabel1: y, colname:0})

    # update dx2 to merge frequency values
    dx2.update(dx2.drop(colname,1).merge(dx1,how='left'))
    dx2 = dx2.astype(int)

    # z values to plot heatmap
    z = dx2[colname].values.reshape(N,N)
    z = z.T

    return z

Transform and scale the data

In [11]:
def transform_scale(z,transform='linear',scale=None):
    """Transform and scale given 1d numpy array.
    
    transform: linear, log, sqrt, sinh, arcsinh
    scale    : minmax, zscale
    
    """
    #==================================
    # linear, log, sqrt, arcsinh, sinh 
    #
    # we need linear tranform option to compare.
    if transform == 'linear':
        z = z

    if transform == 'log':
        z = np.log1p(z)
        
    if transform == 'sqrt':
        z = np.sqrt(z)

    if transform == 'sinh':
        z = np.sinh(z)
        
    if transform == 'arcsinh':
        z = np.arcsinh(z)
    
    #===============================
    # scaling minmax and zscale
    if scale== 'minmax':
        z = z / (z.max()-z.min())
    if scale == 'zscale':
        z = (z-z.mean()) / z.std()
        
    return z

Contour plots

In [12]:
import plotly
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.tools as tls
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)