Date: Nov 19, 2019
Update Nov 19
Reference: https://github.com/LSSTDESC/DC2-analysis/blob/master/tutorials/object_gcr_2_lensing_cuts.ipynb
df = df.query('ext_shapeHSM_HsmShapeRegauss_resolution >= 0.3')
df = df.query('ext_shapeHSM_HsmShapeRegauss_sigma <= 0.4')
df = df.query('ext_shapeHSM_HsmShapeRegauss_flag== 0.0')
IMCAT Script to get final_text.txt from dmstack_txt
lc -b +all
'x = %x[0][0] %x[1][0] + %x[2][0] + %x[3][0] + 4 / %x[0][1] %x[1][1] + %x[2][1] + %x[3][1] + 4 / 2 vector'
'gm = %g[0][0] %g[1][0] + 2 / %g[0][1] %g[1][1] + 2 / 2 vector'
'gc = %g[2][0] %g[3][0] + 2 / %g[2][1] %g[3][1] + 2 / 2 vector'
'gmd = %g[0][0] %g[1][0] - 2 / %g[0][1] %g[1][1] - 2 / 2 vector'
'gcd = %g[2][0] %g[3][0] - 2 / %g[2][1] %g[3][1] - 2 / 2 vector'
< ${catalogs}/merge.cat > ${final}/final_${i}.cat
Usual Filtering
df = df.query('calib_psfCandidate == 0.0')
df = df.query('deblend_nChild == 0.0')
df['ellip'] = np.hypot( df['ext_shapeHSM_HsmShapeRegauss_e1'] ,
df['ext_shapeHSM_HsmShapeRegauss_e2'] )
df = df.query('ellip < 2.0')
select only few columns after filtering:
cols_select = ['base_SdssCentroid_x', 'base_SdssCentroid_y',
'base_SdssCentroid_xSigma','base_SdssCentroid_ySigma',
'ext_shapeHSM_HsmShapeRegauss_e1','ext_shapeHSM_HsmShapeRegauss_e2',
'base_SdssShape_flux']
df = df[cols_select]
# drop all nans
df = df.dropna()
Notes
import numpy as np
import pandas as pd
import seaborn as sns
sns.set(color_codes=True)
pd.set_option('display.max_columns',200)
import matplotlib.pyplot as plt
%matplotlib inline
import json
def show_method_attributes(obj, ncols=7,start=None, inside=None):
""" Show all the attributes of a given method.
Example:
========
show_method_attributes(list)
"""
print(f'Object Type: {type(obj)}\n')
lst = [elem for elem in dir(obj) if elem[0]!='_' ]
lst = [elem for elem in lst
if elem not in 'os np pd sys time psycopg2'.split() ]
if isinstance(start,str):
lst = [elem for elem in lst if elem.startswith(start)]
if isinstance(start,tuple) or isinstance(start,list):
lst = [elem for elem in lst for start_elem in start
if elem.startswith(start_elem)]
if isinstance(inside,str):
lst = [elem for elem in lst if inside in elem]
if isinstance(inside,tuple) or isinstance(inside,list):
lst = [elem for elem in lst for inside_elem in inside
if inside_elem in elem]
return pd.DataFrame(np.array_split(lst,ncols)).T.fillna('')
df_csv = pd.read_csv('../data/dmstack_csv/src_lsst_mono_z1.5_000.csv').astype(np.float32)
df_csv.columns = df_csv.columns.str.lstrip('# ')
print(df_csv.shape)
df_csv.head()
(7698, 167)
calib_detected | calib_psfCandidate | calib_psfUsed | calib_psfReserved | flags_negative | deblend_deblendedAsPsf | deblend_tooManyPeaks | deblend_parentTooBig | deblend_masked | deblend_skipped | deblend_rampedTemplate | deblend_patchedTemplate | deblend_hasStrayFlux | base_GaussianCentroid_flag | base_GaussianCentroid_flag_noPeak | base_GaussianCentroid_flag_resetToPeak | base_NaiveCentroid_flag | base_NaiveCentroid_flag_noCounts | base_NaiveCentroid_flag_edge | base_NaiveCentroid_flag_resetToPeak | base_SdssCentroid_flag | base_SdssCentroid_flag_edge | base_SdssCentroid_flag_noSecondDerivative | base_SdssCentroid_flag_almostNoSecondDerivative | base_SdssCentroid_flag_notAtMaximum | base_SdssCentroid_flag_resetToPeak | base_SdssShape_flag | base_SdssShape_flag_unweightedBad | base_SdssShape_flag_unweighted | base_SdssShape_flag_shift | base_SdssShape_flag_maxIter | base_SdssShape_flag_psf | ext_shapeHSM_HsmPsfMoments_flag | ext_shapeHSM_HsmPsfMoments_flag_no_pixels | ext_shapeHSM_HsmPsfMoments_flag_not_contained | ext_shapeHSM_HsmPsfMoments_flag_galsim | ext_shapeHSM_HsmShapeRegauss_flag | ext_shapeHSM_HsmShapeRegauss_flag_no_pixels | ext_shapeHSM_HsmShapeRegauss_flag_not_contained | ext_shapeHSM_HsmShapeRegauss_flag_parent_source | ext_shapeHSM_HsmShapeRegauss_flag_galsim | ext_shapeHSM_HsmSourceMoments_flag | ext_shapeHSM_HsmSourceMoments_flag_no_pixels | ext_shapeHSM_HsmSourceMoments_flag_not_contained | ext_shapeHSM_HsmSourceMoments_flag_galsim | base_CircularApertureFlux_3_0_flag | base_CircularApertureFlux_3_0_flag_apertureTruncated | base_CircularApertureFlux_3_0_flag_sincCoeffsTruncated | base_CircularApertureFlux_4_5_flag | base_CircularApertureFlux_4_5_flag_apertureTruncated | base_CircularApertureFlux_4_5_flag_sincCoeffsTruncated | base_CircularApertureFlux_6_0_flag | base_CircularApertureFlux_6_0_flag_apertureTruncated | base_CircularApertureFlux_6_0_flag_sincCoeffsTruncated | base_CircularApertureFlux_9_0_flag | base_CircularApertureFlux_9_0_flag_apertureTruncated | base_CircularApertureFlux_9_0_flag_sincCoeffsTruncated | base_CircularApertureFlux_12_0_flag | base_CircularApertureFlux_12_0_flag_apertureTruncated | base_CircularApertureFlux_17_0_flag | base_CircularApertureFlux_17_0_flag_apertureTruncated | base_CircularApertureFlux_25_0_flag | base_CircularApertureFlux_25_0_flag_apertureTruncated | base_CircularApertureFlux_35_0_flag | base_CircularApertureFlux_35_0_flag_apertureTruncated | base_CircularApertureFlux_50_0_flag | base_CircularApertureFlux_50_0_flag_apertureTruncated | base_CircularApertureFlux_70_0_flag | base_CircularApertureFlux_70_0_flag_apertureTruncated | base_GaussianFlux_flag | base_PixelFlags_flag | base_PixelFlags_flag_offimage | base_PixelFlags_flag_edge | base_PixelFlags_flag_interpolated | base_PixelFlags_flag_saturated | base_PixelFlags_flag_cr | base_PixelFlags_flag_bad | base_PixelFlags_flag_suspect | base_PixelFlags_flag_interpolatedCenter | base_PixelFlags_flag_saturatedCenter | base_PixelFlags_flag_crCenter | base_PixelFlags_flag_suspectCenter | base_PsfFlux_flag | base_PsfFlux_flag_noGoodPixels | base_PsfFlux_flag_edge | base_Variance_flag | base_Variance_flag_emptyFootprint | base_PsfFlux_flag_apCorr | base_GaussianFlux_flag_apCorr | base_ClassificationExtendedness_flag | id | coord_ra | coord_dec | parent | deblend_nChild | deblend_psfCenter_x | deblend_psfCenter_y | deblend_psfFlux | base_GaussianCentroid_x | base_GaussianCentroid_y | base_NaiveCentroid_x | base_NaiveCentroid_y | base_SdssCentroid_x | base_SdssCentroid_y | base_SdssCentroid_xSigma | base_SdssCentroid_ySigma | base_SdssShape_xx | base_SdssShape_yy | base_SdssShape_xy | base_SdssShape_xxSigma | base_SdssShape_yySigma | base_SdssShape_xySigma | base_SdssShape_x | base_SdssShape_y | base_SdssShape_flux | base_SdssShape_fluxSigma | base_SdssShape_psf_xx | base_SdssShape_psf_yy | base_SdssShape_psf_xy | base_SdssShape_flux_xx_Cov | base_SdssShape_flux_yy_Cov | base_SdssShape_flux_xy_Cov | ext_shapeHSM_HsmPsfMoments_x | ext_shapeHSM_HsmPsfMoments_y | ext_shapeHSM_HsmPsfMoments_xx | ext_shapeHSM_HsmPsfMoments_yy | ext_shapeHSM_HsmPsfMoments_xy | ext_shapeHSM_HsmShapeRegauss_e1 | ext_shapeHSM_HsmShapeRegauss_e2 | ext_shapeHSM_HsmShapeRegauss_sigma | ext_shapeHSM_HsmShapeRegauss_resolution | ext_shapeHSM_HsmSourceMoments_x | ext_shapeHSM_HsmSourceMoments_y | ext_shapeHSM_HsmSourceMoments_xx | ext_shapeHSM_HsmSourceMoments_yy | ext_shapeHSM_HsmSourceMoments_xy | base_CircularApertureFlux_3_0_flux | base_CircularApertureFlux_3_0_fluxSigma | base_CircularApertureFlux_4_5_flux | base_CircularApertureFlux_4_5_fluxSigma | base_CircularApertureFlux_6_0_flux | base_CircularApertureFlux_6_0_fluxSigma | base_CircularApertureFlux_9_0_flux | base_CircularApertureFlux_9_0_fluxSigma | base_CircularApertureFlux_12_0_flux | base_CircularApertureFlux_12_0_fluxSigma | base_CircularApertureFlux_17_0_flux | base_CircularApertureFlux_17_0_fluxSigma | base_CircularApertureFlux_25_0_flux | base_CircularApertureFlux_25_0_fluxSigma | base_CircularApertureFlux_35_0_flux | base_CircularApertureFlux_35_0_fluxSigma | base_CircularApertureFlux_50_0_flux | base_CircularApertureFlux_50_0_fluxSigma | base_CircularApertureFlux_70_0_flux | base_CircularApertureFlux_70_0_fluxSigma | base_GaussianFlux_flux | base_GaussianFlux_fluxSigma | base_PsfFlux_flux | base_PsfFlux_fluxSigma | base_Variance_value | base_PsfFlux_apCorr | base_PsfFlux_apCorrSigma | base_GaussianFlux_apCorr | base_GaussianFlux_apCorrSigma | base_ClassificationExtendedness_value | footprint | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0034 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | 44.000000 | 12.000000 | 44.092899 | 11.8224 | 44.0 | 12.0 | NaN | NaN | 8.096000 | 14.394400 | -0.697000 | NaN | NaN | NaN | 44.064499 | 11.579300 | NaN | NaN | 11.6343 | 11.9580 | 0.6932 | NaN | NaN | NaN | 0.0003 | 0.0006 | 11.6400 | 11.9639 | 0.6933 | NaN | NaN | NaN | NaN | 44.346600 | 4.7701 | 15.0647 | 1.3038 | 0.1181 | 337.884613 | 25.7848 | 957.775024 | 39.055599 | 2167.570557 | 52.364899 | 8611.485352 | 78.949501 | 20285.562500 | 105.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 570.410889 | 39.068600 | 4140.206543 | 63.567799 | NaN | 0.9653 | 0.0 | 1.0472 | 0.0 | NaN | 1.0 |
1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 2.0 | 0.0030 | 0.0 | 0.0 | 3.0 | NaN | NaN | NaN | 485.997192 | 20.360701 | 485.996704 | 20.0172 | 486.0 | 20.0 | NaN | NaN | 10.344900 | 12.952100 | 0.996300 | 0.1558 | 0.1237 | 0.1950 | 485.999390 | 20.181499 | 11307.490234 | 85.123497 | 11.7969 | 12.0755 | 0.6936 | -6.629200 | -0.638500 | -8.299900 | 0.0002 | 0.0005 | 11.8036 | 12.0818 | 0.6937 | NaN | NaN | NaN | NaN | 485.998810 | 20.3612 | 10.3610 | 12.8100 | 0.9905 | 3709.388672 | 25.7848 | 6565.007812 | 39.055599 | 8779.926758 | 52.365002 | 11045.166992 | 78.950401 | 11928.157227 | 105.0 | 12952.307617 | 150.083298 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 11807.673828 | 62.854698 | 12024.018555 | 63.929901 | NaN | 0.9658 | 0.0 | 1.0467 | 0.0 | NaN | 2.0 |
2 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 3.0 | 0.0027 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | 809.209717 | 14.600800 | 809.014099 | 14.9891 | 809.0 | 15.0 | NaN | NaN | 12.889400 | 10.482900 | 1.121700 | 1.0448 | 0.6693 | 0.8497 | 809.105591 | 14.801800 | 2103.723145 | 85.261398 | 11.9032 | 12.1357 | 0.6939 | -44.539799 | -3.875900 | -36.223999 | 0.0002 | 0.0004 | 11.9096 | 12.1416 | 0.6942 | NaN | NaN | NaN | NaN | 809.207703 | 14.6070 | 12.8311 | 10.3401 | 1.1472 | 692.577576 | 25.7848 | 1210.554565 | 39.055599 | 1631.007324 | 52.365002 | 2072.951416 | 78.950104 | 2152.659424 | 105.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2190.896484 | 62.788200 | 2238.241211 | 64.139603 | NaN | 0.9661 | 0.0 | 1.0464 | 0.0 | NaN | 3.0 |
3 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 4.0 | 0.0025 | 0.0 | 0.0 | 2.0 | NaN | NaN | NaN | 1044.863770 | 18.765499 | 1044.986572 | 18.0783 | 1045.0 | 18.0 | NaN | NaN | 85.502403 | 205.036896 | -64.301003 | 1.1072 | 1.3477 | 2.6550 | 1047.611328 | 11.325200 | 41651.667969 | 269.673309 | 11.9676 | 12.1663 | 0.6959 | -149.287003 | 112.269402 | -357.993896 | 0.0001 | 0.0004 | 11.9678 | 12.1737 | 0.6976 | NaN | NaN | NaN | NaN | 1048.829102 | 11.5367 | 74.0755 | 61.8624 | -24.6222 | 2527.981934 | 25.7848 | 4700.720215 | 39.055599 | 6747.881836 | 52.365002 | 10284.900391 | 78.950302 | 14352.309570 | 105.0 | 26736.316406 | 150.083298 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 28019.847656 | 147.246094 | 9611.236328 | 64.253403 | NaN | 0.9662 | 0.0 | 1.0462 | 0.0 | NaN | 4.0 |
4 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 5.0 | 0.0024 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | 1125.895630 | 15.341200 | 1125.993042 | 15.0464 | 1126.0 | 15.0 | NaN | NaN | 10.290900 | 11.557200 | 1.432400 | 0.6839 | 0.5169 | 0.7681 | 1125.951538 | 15.174400 | 2480.244385 | 82.418999 | 11.9818 | 12.1751 | 0.6980 | -28.184700 | -3.923000 | -31.653000 | 0.0001 | 0.0004 | 11.9889 | 12.1811 | 0.6982 | NaN | NaN | NaN | NaN | 1125.903442 | 15.3461 | 10.3452 | 11.4144 | 1.4841 | 868.762329 | 25.7848 | 1477.959351 | 39.055599 | 1957.600464 | 52.365002 | 2466.916260 | 78.950104 | 2577.637939 | 105.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2588.978027 | 60.834301 | 2734.262939 | 64.288696 | NaN | 0.9662 | 0.0 | 1.0461 | 0.0 | NaN | 5.0 |
dict_flags = df_csv.columns[:90].to_frame().reset_index(drop=True).to_dict()[0]
import json
with open('dict_flags.json','w') as fo:
json.dump(dict_flags, fo)
dict_flags = json.load( open('dict_flags.json'))
# dict_flags
dict_flags['0']
'calib_detected'
%%writefile b01_remove_nans_dmstack.py
# Author : Bhishan Poudel
# Date : July 5, 2019
# Update : Nov 7, 2019
# Description:
#===============
# Remove nans from dmstack output csv files and
# do some filterings to give txt files.
#
# Input/Oputputs:
#=================
# inputs : ../data/dmstack_csv/*.csv (100*4 csv files)
# outputs: dmstack_txt/*.txt (100 combined txt files with few columns)
#
# Filtering:
#============
# 1. column ==> deblend_nChild==0
# 2. flag ==> calib_psfCandidate==False **Read flag from json**
# 3. ellipticity ==> e = sqrt(e1^2 + e2^2) < 2.0
# 4. selection ==> choose only few columns
# 5. nans ==> remove nans from all selected columns
# 6. delimiter ==> change delimiter from space to tab for imcat
#
# Shape HSM Filtering:
#======================
# Nov 19, 2019
# Reference: https://github.com/LSSTDESC/DC2-analysis/blob/master/tutorials/object_gcr_2_lensing_cuts.ipynb
#
# 7. 'ext_shapeHSM_HsmShapeRegauss_resolution >= 0.3'
# 8. 'ext_shapeHSM_HsmShapeRegauss_sigma <= 0.4'
# 9. 'ext_shapeHSM_HsmShapeRegauss_flag == 0'
# Usage:
#=======
# py b01_remove_nans_dmstack.py
#
#
# Note:
# When reading columns ext_shapeHSM_HsmShapeRegauss_e1 and e2
# we read them combinedly as g in IMCAT, so original
# reduced shear will be g = g/2.
#
import pandas as pd
import numpy as np
import os,sys
import glob
import json
import multiprocessing
from multiprocessing import Process
# constants
RANGE = 100
# global variables
dict_flags_all = json.load(open('dict_flags.json'))
# create output folder if not exist
if not os.path.isdir('dmstack_txt'):
os.makedirs('dmstack_txt')
def remove_nans(ifile,file_number):
""" Remove nans and filter data from dmstack output csv file.
There are 90 flags col0 to col89
col90 is id is first column 'id'
There are 90 flags and 77 columns.
We exclude first column 'flags' and have 76 columns
In total there are 90 + 76 = 166 columns.
Columns selected:
# flags only for filtering
1 : calib_psfCandidate (for filtering only)
94 : deblend_nChild (for filtering only)
"36":"ext_shapeHSM_HsmShapeRegauss_flag", (flag=0 choose)
# actual columns used
90 : id
102 : base_SdssCentroid_x
103 : base_SdssCentroid_y
104 : base_SdssCentroid_xSigma
105 : base_SdssCentroid_ySigma
114 : 'base_SdssShape_flux',
127 : ext_shapeHSM_HsmShapeRegauss_e1
128 : ext_shapeHSM_HsmShapeRegauss_e2
# Added on Nov19, 2019 for shape measurements
# https://github.com/LSSTDESC/DC2-analysis/blob/master/tutorials/object_gcr_2_lensing_cuts.ipynb
129: 'ext_shapeHSM_HsmShapeRegauss_sigma',
130: 'ext_shapeHSM_HsmShapeRegauss_resolution',
# Added for radius calculation
133: 'ext_shapeHSM_HsmSourceMoments_xx',
134: 'ext_shapeHSM_HsmSourceMoments_yy',
135: 'ext_shapeHSM_HsmSourceMoments_xy',
# This gives
radius = (xx*yy - xy**2)**1/4
# In the output file we have
# 1 2 34 56 78 9 10 11
file_number, id, x,y xsigma,ysigma, e1,e2, ellip flux, radius
"""
df = pd.read_csv(ifile, sep=",",low_memory=False)
df.columns = df.columns.str.lstrip('# ')
# make dtype float
df = df.astype(float)
# select only few columns
usecols = [1, 36, 94, 90, 102, 103, 104, 105,
127, 128, 129, 130, 114, 133, 134, 135]
df = df.iloc[:,usecols]
df = df.copy()
# make selected columns numeric
for c in df.columns:
df[c] = pd.to_numeric(df[c],errors='coerce')
# filter the flag calib_psfCandidate==False
# not a star candidate
df = df.query('calib_psfCandidate == 0.0')
# filter the flag ext_shapeHSM_HsmShapeRegauss_flag==0
# shape should not have errors
df = df.query('ext_shapeHSM_HsmShapeRegauss_flag== 0.0')
# filter the column deblend_nChild==0
# no child source after deblending
df = df.query('deblend_nChild == 0.0')
# filter for HSM shapes
df = df.query('ext_shapeHSM_HsmShapeRegauss_resolution >= 0.3')
df = df.query('ext_shapeHSM_HsmShapeRegauss_sigma <= 0.4')
# clean out unphysical results
# e1^2 + e2^2 < 1.5^2
df = df.copy()
df['ellip'] = np.hypot( df['ext_shapeHSM_HsmShapeRegauss_e1'] ,
df['ext_shapeHSM_HsmShapeRegauss_e2'] )
df = df.query('ellip < 2.0')
# calculate radius of ellipse using HSM moments
# radius**4 = xx*yy - xy**2
df['radius'] = df.eval(""" ( (ext_shapeHSM_HsmSourceMoments_xx * ext_shapeHSM_HsmSourceMoments_yy) \
- (ext_shapeHSM_HsmSourceMoments_xy**2 ) )**0.25 """)
# add a new column with file_number
df['file_number'] = file_number
# take only required columns
cols_select = ['file_number', 'id',
'base_SdssCentroid_x', 'base_SdssCentroid_y',
'base_SdssCentroid_xSigma','base_SdssCentroid_ySigma',
'ext_shapeHSM_HsmShapeRegauss_e1','ext_shapeHSM_HsmShapeRegauss_e2',
'ellip', 'base_SdssShape_flux', 'radius'
]
df = df[cols_select]
# drop all nans
df = df.dropna()
# write txt file with commented header
prefix = ' '*2
header_line = prefix.join(cols_select)
# from: ../data/dmstack_csv/src_lsst_mono_z1.5_000.csv
# to : dmstack_txt/src_lsst_mono_z1.5_000.txt
ofile = ifile.replace('../data/dmstack_csv', 'dmstack_txt')
ofile = ofile.replace('.csv', '.txt')
np.savetxt(ofile,df.values,header=header_line,delimiter='\t')
def func1():
infiles = ['../data/dmstack_csv/src_lsst_z1.5_{:03d}.csv'.format(i) for i in range(RANGE)]
for ifile in infiles:
file_number = int(ifile.rstrip('.csv').split('_')[-1])
remove_nans(ifile, file_number)
def func2():
infiles = ['../data/dmstack_csv/src_lsst90_z1.5_{:03d}.csv'.format(i) for i in range(RANGE)]
for ifile in infiles:
file_number = int(ifile.rstrip('.csv').split('_')[-1])
remove_nans(ifile, file_number)
def func3():
infiles = ['../data/dmstack_csv/src_lsst_mono_z1.5_{:03d}.csv'.format(i) for i in range(RANGE)]
for ifile in infiles:
file_number = int(ifile.rstrip('.csv').split('_')[-1])
remove_nans(ifile, file_number)
def func4():
infiles = ['../data/dmstack_csv/src_lsst_mono90_z1.5_{:03d}.csv'.format(i) for i in range(RANGE)]
for ifile in infiles:
file_number = int(ifile.rstrip('.csv').split('_')[-1])
remove_nans(ifile, file_number)
if __name__ == '__main__':
p1 = Process(target=func1)
p1.start()
p2 = Process(target=func2)
p2.start()
p3 = Process(target=func3)
p3.start()
p4 = Process(target=func4)
p4.start()
# join them all
p1.join()
p2.join()
p3.join()
p4.join()