How to use GCRCatalogs

by Yao-Yuan Mao (last update: 7/27/2018)

Links to GitHub code repos: GCRCatalogs and GCR

Note: You should be running this notebook at https://jupyter-dev.nersc.gov

In [1]:
## Note: if you clone the gcr-catalogs repo and are running this under the `examples` folder,
## you can also use your version of GCRCatalogs:
#import sys
#sys.path.insert(0, '/path/to/your/cloned/gcr-catalogs')

## The following lines are to check if you're in the lsst group
import subprocess
assert u'lsst' in subprocess.check_output(['groups']).decode().split(), 'You need to be in the `lsst` group for this notebook to work'
In [2]:
import numpy as np

The basics:

  • get_available_catalogs() lists available catlaogs; returns dict.
  • load_catalog() loads the catalog you want; returns an instance of GCR.BaseGenericCatalog
In [3]:
import GCRCatalogs

## check version
print('GCRCatalogs =', GCRCatalogs.__version__, '|' ,'GCR =', GCRCatalogs.GCR.__version__)
GCRCatalogs = 0.8.0 | GCR = 0.7.2
In [4]:
## find available catlaogs, sorted by their name

print('\n'.join(sorted(GCRCatalogs.get_available_catalogs())))
buzzard
buzzard_high-res
buzzard_test
dc1
dc2_coadd_run1.1p
dc2_coadd_run1.1p_tract4850
focal_plane_0_test
focal_plane_16_test
hsc-pdr1-xmm
protoDC2
In [5]:
## find *all* available catlaogs, including some older versions, sorted by their name

print('\n'.join(sorted(GCRCatalogs.get_available_catalogs(include_default_only=False))))
buzzard
buzzard_high-res
buzzard_high-res_v1.1
buzzard_test
buzzard_v1.6
buzzard_v1.6_1
buzzard_v1.6_2
buzzard_v1.6_21
buzzard_v1.6_3
buzzard_v1.6_5
buzzard_v1.6_test
cosmoDC2_v0.1
cosmoDC2_v0.1_test
dc1
dc2_coadd_run1.1p
dc2_coadd_run1.1p_tract4850
dc2_instance_example1
dc2_instance_example2
dc2_reference_run1.1
dc2_reference_run1.2
dc2_truth_run1.1
focal_plane_0_test
focal_plane_16_test
hsc-pdr1-xmm
proto-dc2_v2.0
proto-dc2_v2.0_redmapper
proto-dc2_v2.0_test
proto-dc2_v2.1
proto-dc2_v2.1.1
proto-dc2_v2.1.2
proto-dc2_v2.1.2_addon_knots
proto-dc2_v2.1.2_test
proto-dc2_v3.0
proto-dc2_v3.0_addon_knots
proto-dc2_v3.0_redmapper
proto-dc2_v3.0_test
proto-dc2_v4.3_redmapper
proto-dc2_v4.4
proto-dc2_v4.4_test
proto-dc2_v4.5
proto-dc2_v4.5_test
proto-dc2_v4.6.1
proto-dc2_v4.6.1_test
proto-dc2_v4.7_test
proto-dc2_v5.0
proto-dc2_v5.0_test
protoDC2
protoDC2_addon_tidal
protoDC2_test
um_v0.1
um_v0.1_shear_test
um_v0.1_test
In [6]:
## find *all* available catlaogs whose name starts with 'proto-dc2', sorted by their name

print('\n'.join(sorted(c for c in GCRCatalogs.get_available_catalogs(include_default_only=False) if c.startswith('proto-dc2'))))
proto-dc2_v2.0
proto-dc2_v2.0_redmapper
proto-dc2_v2.0_test
proto-dc2_v2.1
proto-dc2_v2.1.1
proto-dc2_v2.1.2
proto-dc2_v2.1.2_addon_knots
proto-dc2_v2.1.2_test
proto-dc2_v3.0
proto-dc2_v3.0_addon_knots
proto-dc2_v3.0_redmapper
proto-dc2_v3.0_test
proto-dc2_v4.3_redmapper
proto-dc2_v4.4
proto-dc2_v4.4_test
proto-dc2_v4.5
proto-dc2_v4.5_test
proto-dc2_v4.6.1
proto-dc2_v4.6.1_test
proto-dc2_v4.7_test
proto-dc2_v5.0
proto-dc2_v5.0_test
In [7]:
## load 'protoDC2' catalog

gc = GCRCatalogs.load_catalog('protoDC2_test') # use 'protoDC2_test' to skip md5 check (which takes a while)
#gc = GCRCatalogs.load_catalog('protoDC2')
/global/common/software/lsst/common/miniconda/current/lib/python3.6/site-packages/GCRCatalogs/alphaq.py:105: UserWarning: No md5 sum specified in the config file
  warnings.warn('No md5 sum specified in the config file')

GCR interface

See also the full GCR API Documentation.

quantities

  • get_quantities() loads the quantities you need; takes a list and returns dict.
  • has_quantity() and has_quantities() can check if the quantities you need exist; both return bool.
  • list_all_quantities() lists all available catlaogs; returns list.
In [8]:
gc.get_quantities(['mag_u_lsst', 'ra', 'dec'])
Out[8]:
{'dec': array([-1.78091323, -2.06204367, -1.48433971, ...,  2.43747878,
         2.49400425,  2.37360001], dtype=float32),
 'ra': array([-0.21326847, -1.29391468,  0.68152297, ..., -2.39075041,
        -2.46616554, -2.31633687], dtype=float32),
 'mag_u_lsst': array([ 21.2349205 ,  21.83907318,  21.02371979, ...,  26.76194382,
         33.03851318,  28.94227791], dtype=float32)}
In [9]:
gc.has_quantity('mag_u_lsst')
Out[9]:
True
In [10]:
gc.has_quantities(['mag_u_lsst', 'ra', 'dec'])
Out[10]:
True
In [11]:
## Only returns `True` if *all* quantities exist
gc.has_quantities(['mag_u_lsst', 'ra', 'dec', 'quantitiy_that_does_not_exist'])
Out[11]:
False
In [12]:
print(', '.join(sorted(gc.list_all_quantities())))
A_v, A_v_bulge, A_v_disk, Mag_true_Y_lsst_z0, Mag_true_Y_lsst_z0_no_host_extinction, Mag_true_g_lsst_z0, Mag_true_g_lsst_z0_no_host_extinction, Mag_true_g_sdss_z0, Mag_true_g_sdss_z0_no_host_extinction, Mag_true_i_lsst_z0, Mag_true_i_lsst_z0_no_host_extinction, Mag_true_i_sdss_z0, Mag_true_i_sdss_z0_no_host_extinction, Mag_true_r_lsst_z0, Mag_true_r_lsst_z0_no_host_extinction, Mag_true_r_sdss_z0, Mag_true_r_sdss_z0_no_host_extinction, Mag_true_u_lsst_z0, Mag_true_u_lsst_z0_no_host_extinction, Mag_true_u_sdss_z0, Mag_true_u_sdss_z0_no_host_extinction, Mag_true_y_lsst_z0, Mag_true_y_lsst_z0_no_host_extinction, Mag_true_z_lsst_z0, Mag_true_z_lsst_z0_no_host_extinction, Mag_true_z_sdss_z0, Mag_true_z_sdss_z0_no_host_extinction, R_v, R_v_bulge, R_v_disk, bulge_to_total_ratio_i, convergence, dec, dec_true, ellipticity_1_bulge_true, ellipticity_1_disk_true, ellipticity_1_true, ellipticity_2_bulge_true, ellipticity_2_disk_true, ellipticity_2_true, ellipticity_bulge_true, ellipticity_disk_true, ellipticity_true, galaxy_id, halo_id, halo_mass, is_central, mag_Y_lsst, mag_Y_lsst_no_host_extinction, mag_g, mag_g_lsst, mag_g_lsst_no_host_extinction, mag_g_sdss, mag_g_sdss_no_host_extinction, mag_i, mag_i_lsst, mag_i_lsst_no_host_extinction, mag_i_sdss, mag_i_sdss_no_host_extinction, mag_r, mag_r_lsst, mag_r_lsst_no_host_extinction, mag_r_sdss, mag_r_sdss_no_host_extinction, mag_true_Y_lsst, mag_true_Y_lsst_no_host_extinction, mag_true_g, mag_true_g_lsst, mag_true_g_lsst_no_host_extinction, mag_true_g_sdss, mag_true_g_sdss_no_host_extinction, mag_true_i, mag_true_i_lsst, mag_true_i_lsst_no_host_extinction, mag_true_i_sdss, mag_true_i_sdss_no_host_extinction, mag_true_r, mag_true_r_lsst, mag_true_r_lsst_no_host_extinction, mag_true_r_sdss, mag_true_r_sdss_no_host_extinction, mag_true_u, mag_true_u_lsst, mag_true_u_lsst_no_host_extinction, mag_true_u_sdss, mag_true_u_sdss_no_host_extinction, mag_true_y, mag_true_y_lsst, mag_true_y_lsst_no_host_extinction, mag_true_z, mag_true_z_lsst, mag_true_z_lsst_no_host_extinction, mag_true_z_sdss, mag_true_z_sdss_no_host_extinction, mag_u, mag_u_lsst, mag_u_lsst_no_host_extinction, mag_u_sdss, mag_u_sdss_no_host_extinction, mag_y, mag_y_lsst, mag_y_lsst_no_host_extinction, mag_z, mag_z_lsst, mag_z_lsst_no_host_extinction, mag_z_sdss, mag_z_sdss_no_host_extinction, magnification, position_angle_true, position_x, position_y, position_z, ra, ra_true, redshift, redshift_true, sed_1000_246, sed_1000_246_bulge, sed_1000_246_bulge_no_host_extinction, sed_1000_246_disk, sed_1000_246_disk_no_host_extinction, sed_1000_246_no_host_extinction, sed_11467_1710, sed_11467_1710_bulge, sed_11467_1710_bulge_no_host_extinction, sed_11467_1710_disk, sed_11467_1710_disk_no_host_extinction, sed_11467_1710_no_host_extinction, sed_1246_306, sed_1246_306_bulge, sed_1246_306_bulge_no_host_extinction, sed_1246_306_disk, sed_1246_306_disk_no_host_extinction, sed_1246_306_no_host_extinction, sed_13177_1966, sed_13177_1966_bulge, sed_13177_1966_bulge_no_host_extinction, sed_13177_1966_disk, sed_13177_1966_disk_no_host_extinction, sed_13177_1966_no_host_extinction, sed_15143_2259, sed_15143_2259_bulge, sed_15143_2259_bulge_no_host_extinction, sed_15143_2259_disk, sed_15143_2259_disk_no_host_extinction, sed_15143_2259_no_host_extinction, sed_1552_381, sed_1552_381_bulge, sed_1552_381_bulge_no_host_extinction, sed_1552_381_disk, sed_1552_381_disk_no_host_extinction, sed_1552_381_no_host_extinction, sed_17402_2596, sed_17402_2596_bulge, sed_17402_2596_bulge_no_host_extinction, sed_17402_2596_disk, sed_17402_2596_disk_no_host_extinction, sed_17402_2596_no_host_extinction, sed_1933_474, sed_1933_474_bulge, sed_1933_474_bulge_no_host_extinction, sed_1933_474_disk, sed_1933_474_disk_no_host_extinction, sed_1933_474_no_host_extinction, sed_2407_591, sed_2407_591_bulge, sed_2407_591_bulge_no_host_extinction, sed_2407_591_disk, sed_2407_591_disk_no_host_extinction, sed_2407_591_no_host_extinction, sed_2998_186, sed_2998_186_bulge, sed_2998_186_bulge_no_host_extinction, sed_2998_186_disk, sed_2998_186_disk_no_host_extinction, sed_2998_186_no_host_extinction, sed_3184_197, sed_3184_197_bulge, sed_3184_197_bulge_no_host_extinction, sed_3184_197_disk, sed_3184_197_disk_no_host_extinction, sed_3184_197_no_host_extinction, sed_3381_209, sed_3381_209_bulge, sed_3381_209_bulge_no_host_extinction, sed_3381_209_disk, sed_3381_209_disk_no_host_extinction, sed_3381_209_no_host_extinction, sed_3590_222, sed_3590_222_bulge, sed_3590_222_bulge_no_host_extinction, sed_3590_222_disk, sed_3590_222_disk_no_host_extinction, sed_3590_222_no_host_extinction, sed_3812_236, sed_3812_236_bulge, sed_3812_236_bulge_no_host_extinction, sed_3812_236_disk, sed_3812_236_disk_no_host_extinction, sed_3812_236_no_host_extinction, sed_4048_251, sed_4048_251_bulge, sed_4048_251_bulge_no_host_extinction, sed_4048_251_disk, sed_4048_251_disk_no_host_extinction, sed_4048_251_no_host_extinction, sed_4299_266, sed_4299_266_bulge, sed_4299_266_bulge_no_host_extinction, sed_4299_266_disk, sed_4299_266_disk_no_host_extinction, sed_4299_266_no_host_extinction, sed_4565_283, sed_4565_283_bulge, sed_4565_283_bulge_no_host_extinction, sed_4565_283_disk, sed_4565_283_disk_no_host_extinction, sed_4565_283_no_host_extinction, sed_4848_300, sed_4848_300_bulge, sed_4848_300_bulge_no_host_extinction, sed_4848_300_disk, sed_4848_300_disk_no_host_extinction, sed_4848_300_no_host_extinction, sed_5148_319, sed_5148_319_bulge, sed_5148_319_bulge_no_host_extinction, sed_5148_319_disk, sed_5148_319_disk_no_host_extinction, sed_5148_319_no_host_extinction, sed_5467_339, sed_5467_339_bulge, sed_5467_339_bulge_no_host_extinction, sed_5467_339_disk, sed_5467_339_disk_no_host_extinction, sed_5467_339_no_host_extinction, sed_5806_360, sed_5806_360_bulge, sed_5806_360_bulge_no_host_extinction, sed_5806_360_disk, sed_5806_360_disk_no_host_extinction, sed_5806_360_no_host_extinction, sed_6166_382, sed_6166_382_bulge, sed_6166_382_bulge_no_host_extinction, sed_6166_382_disk, sed_6166_382_disk_no_host_extinction, sed_6166_382_no_host_extinction, sed_6548_406, sed_6548_406_bulge, sed_6548_406_bulge_no_host_extinction, sed_6548_406_disk, sed_6548_406_disk_no_host_extinction, sed_6548_406_no_host_extinction, sed_6954_431, sed_6954_431_bulge, sed_6954_431_bulge_no_host_extinction, sed_6954_431_disk, sed_6954_431_disk_no_host_extinction, sed_6954_431_no_host_extinction, sed_7385_458, sed_7385_458_bulge, sed_7385_458_bulge_no_host_extinction, sed_7385_458_disk, sed_7385_458_disk_no_host_extinction, sed_7385_458_no_host_extinction, sed_7843_486, sed_7843_486_bulge, sed_7843_486_bulge_no_host_extinction, sed_7843_486_disk, sed_7843_486_disk_no_host_extinction, sed_7843_486_no_host_extinction, sed_8329_517, sed_8329_517_bulge, sed_8329_517_bulge_no_host_extinction, sed_8329_517_disk, sed_8329_517_disk_no_host_extinction, sed_8329_517_no_host_extinction, sed_8846_549, sed_8846_549_bulge, sed_8846_549_bulge_no_host_extinction, sed_8846_549_disk, sed_8846_549_disk_no_host_extinction, sed_8846_549_no_host_extinction, sed_9395_583, sed_9395_583_bulge, sed_9395_583_bulge_no_host_extinction, sed_9395_583_disk, sed_9395_583_disk_no_host_extinction, sed_9395_583_no_host_extinction, sed_9978_1489, sed_9978_1489_bulge, sed_9978_1489_bulge_no_host_extinction, sed_9978_1489_disk, sed_9978_1489_disk_no_host_extinction, sed_9978_1489_no_host_extinction, sersic_bulge, sersic_disk, shear_1, shear_2, shear_2_phosim, shear_2_treecorr, size_bulge_true, size_disk_true, size_minor_bulge_true, size_minor_disk_true, size_minor_true, size_true, stellar_mass, stellar_mass_bulge, stellar_mass_disk, velocity_x, velocity_y, velocity_z

accessing native quantities

Native quantities are quantities that have not yet be homogenized (to common labels/units). However, you can still access them as long as you know what you are doing.

In [13]:
## print out the first 5 native quantities

print('\n'.join(sorted(gc.list_all_native_quantities())[:5]))
LSST_filters/diskLuminositiesStellar:LSST_g:observed
LSST_filters/diskLuminositiesStellar:LSST_g:observed:dustAtlas
LSST_filters/diskLuminositiesStellar:LSST_g:rest
LSST_filters/diskLuminositiesStellar:LSST_g:rest:dustAtlas
LSST_filters/diskLuminositiesStellar:LSST_i:observed
In [14]:
## list both native or derived quantities, print the first 5 out

print('\n'.join(sorted(gc.list_all_quantities(include_native=True))[:5]))
A_v
A_v_bulge
A_v_disk
LSST_filters/diskLuminositiesStellar:LSST_g:observed
LSST_filters/diskLuminositiesStellar:LSST_g:observed:dustAtlas
In [17]:
# find all quantities that match a regular expression
import re

data = gc.get_quantities([q for q in gc.list_all_quantities() if re.match(r'sed_\d+_\d+$', q)])
list(data.keys())
Out[17]:
['sed_2998_186',
 'sed_13177_1966',
 'sed_15143_2259',
 'sed_4048_251',
 'sed_6548_406',
 'sed_6166_382',
 'sed_7385_458',
 'sed_4565_283',
 'sed_8846_549',
 'sed_9978_1489',
 'sed_5806_360',
 'sed_7843_486',
 'sed_6954_431',
 'sed_11467_1710',
 'sed_4848_300',
 'sed_1552_381',
 'sed_4299_266',
 'sed_1000_246',
 'sed_5148_319',
 'sed_3184_197',
 'sed_9395_583',
 'sed_3381_209',
 'sed_1246_306',
 'sed_2407_591',
 'sed_5467_339',
 'sed_3812_236',
 'sed_1933_474',
 'sed_17402_2596',
 'sed_8329_517',
 'sed_3590_222']
In [15]:
# to retrive native quantities, you can just use `get_quantities` as usual
gc.get_quantities(['LSST_filters/diskLuminositiesStellar:LSST_g:observed',
                   'LSST_filters/diskLuminositiesStellar:LSST_g:observed:dustAtlas'])
Out[15]:
{'LSST_filters/diskLuminositiesStellar:LSST_g:observed:dustAtlas': array([ 1035582.25      ,   580861.5       ,   980863.875     , ...,
         3248385.5       ,    38833.45703125,   442917.21875   ], dtype=float32),
 'LSST_filters/diskLuminositiesStellar:LSST_g:observed': array([  992498.5625   ,   590357.75     ,  1002641.3125   , ...,
         3645767.       ,    40224.2265625,   485800.09375  ], dtype=float32)}

You can also rename the native quantities by using add_quantity_modifier(). For example:

In [16]:
# you can also make quantity alias

gc.add_quantity_modifier('balmer_alpha_6563', 'emissionLines/diskLineLuminosity:balmerAlpha6563:rest')
gc.get_quantities(['balmer_alpha_6563'])
Out[16]:
{'balmer_alpha_6563': array([  1.76363730e+04,   4.74945703e+03,   1.73469531e+05, ...,
          7.58312000e+07,   4.01797803e+03,   6.08336523e+03], dtype=float32)}

more info about the catalog

  • lightcone is a bool
  • cosmology is a instance of astropy.cosmology.FLRW
  • get_input_kwargs() returns a dict (when no argument) from the orignal yaml config file. If a argument if passed, it returns the corresponding value for key=argument
In [16]:
gc.lightcone
Out[16]:
True
In [17]:
gc.cosmology
Out[17]:
FlatLambdaCDM(H0=71 km / (Mpc s), Om0=0.265, Tcmb0=0 K, Neff=3.04, m_nu=None, Ob0=0.0448)
In [18]:
gc.version
Out[18]:
'5.0'
In [19]:
print(gc.get_catalog_info('description'))
ProtoDC2 is a down-scaled version of the catalog to be generated for LSST-DESC DC2.
For a description of the catalog and the methods, please see https://goo.gl/fXDQwP

In [20]:
print(gc.get_catalog_info())
{'subclass_name': 'alphaq.AlphaQGalaxyCatalog', 'filename': '/global/projecta/projectdirs/lsst/groups/CS/descqa/catalog/v5.0.all.hdf5', 'lightcone': True, 'version': '5.0', 'creators': ['Andrew Benson', 'Andrew Hearin', 'Katrin Heitmann', 'Joe Hollowed', 'Danila Korytov', 'Eve Kovacs', 'Patricia Larsen'], 'description': 'ProtoDC2 is a down-scaled version of the catalog to be generated for LSST-DESC DC2.\nFor a description of the catalog and the methods, please see https://goo.gl/fXDQwP\n'}
In [21]:
for q in sorted(gc.list_all_quantities())[:20]:
    print(q, gc.get_quantity_info(q))
A_v None
A_v_bulge None
A_v_disk None
Mag_true_Y_lsst_z0 {'units': 'AB magnitude'}
Mag_true_Y_lsst_z0_no_host_extinction {'units': 'AB magnitude'}
Mag_true_g_lsst_z0 {'units': 'AB magnitude'}
Mag_true_g_lsst_z0_no_host_extinction {'units': 'AB magnitude'}
Mag_true_g_sdss_z0 {'units': 'AB magnitude'}
Mag_true_g_sdss_z0_no_host_extinction {'units': 'AB magnitude'}
Mag_true_i_lsst_z0 {'units': 'AB magnitude'}
Mag_true_i_lsst_z0_no_host_extinction {'units': 'AB magnitude'}
Mag_true_i_sdss_z0 {'units': 'AB magnitude'}
Mag_true_i_sdss_z0_no_host_extinction {'units': 'AB magnitude'}
Mag_true_r_lsst_z0 {'units': 'AB magnitude'}
Mag_true_r_lsst_z0_no_host_extinction {'units': 'AB magnitude'}
Mag_true_r_sdss_z0 {'units': 'AB magnitude'}
Mag_true_r_sdss_z0_no_host_extinction {'units': 'AB magnitude'}
Mag_true_u_lsst_z0 {'units': 'AB magnitude'}
Mag_true_u_lsst_z0_no_host_extinction {'units': 'AB magnitude'}
Mag_true_u_sdss_z0 {'units': 'AB magnitude'}
/global/common/software/lsst/common/miniconda/current/lib/python3.6/site-packages/GCRCatalogs/alphaq.py:366: UserWarning: This value is composed of a function on native quantities. So we have no idea what the units are
  warnings.warn('This value is composed of a function on native quantities. So we have no idea what the units are')

use filters

You can specify filters in get_quantities to select a subset of data. Note that filters always takes a list.

In [22]:
# note that we use a list even there is only one filter

data = gc.get_quantities(['stellar_mass', 'ra', 'dec'], filters=['stellar_mass > 1e10']) 
print((data['stellar_mass'] > 1e10).all())
True
In [23]:
## You can use more than one filter.

data = gc.get_quantities(['stellar_mass'], filters=['ra < -2', 'dec > 1'])
print(len(data['stellar_mass']))

data_check = gc.get_quantities(['ra', 'dec'])
print(np.count_nonzero((data_check['ra'] < -2) & (data_check['dec'] > 1)))
570847
570847
In [24]:
# For more complicated filters, specify them as tuple of (callable, quantity1, quantity2, ...)

data = gc.get_quantities(['stellar_mass'], filters=[(np.isfinite, 'stellar_mass')])

native filters

Some catalogs (currently only buzzard and buzzard_high-res) support "native filters", which you can use to load only a subset of data more efficiently.

In [24]:
gc_buzzard = GCRCatalogs.load_catalog('buzzard')
print(gc_buzzard._native_filter_quantities)

data = gc_buzzard.get_quantities(['ra', 'dec'], native_filters=['healpix_pixel == 1'])
print(data['ra'].min(), data['ra'].max(), data['dec'].min(), data['dec'].max())

data = gc_buzzard.get_quantities(['ra', 'dec'], native_filters=['healpix_pixel == 2'])
print(data['ra'].min(), data['ra'].max(), data['dec'].min(), data['dec'].max())
{'healpix_pixel'}
44.9959447517 56.2350032152 4.77918879056 14.4744615426
33.7591549807 44.9914241079 4.77569650762 14.4781225874

more tips on using the quantities

tip 1

get_quantities() returns a dictionary, which you can easily turn into a astropy.table.Table or pandas.DataFrame

In [26]:
data = gc.get_quantities(['mag_u_lsst', 'ra', 'dec'])
In [27]:
import pandas as pd
pd.DataFrame(data)
Out[27]:
dec mag_u_lsst ra
0 -1.780913 21.234921 -0.213268
1 -2.062044 21.839073 -1.293915
2 -1.484340 21.023720 0.681523
3 -0.923260 17.603844 -0.304187
4 -0.874825 19.314825 -0.261973
5 -0.924046 16.110458 -1.786836
6 -0.909357 18.208616 -1.753474
7 -1.523700 17.888836 -0.262022
8 -1.700855 18.595100 0.072298
9 -1.490176 22.120970 0.063725
10 -1.216591 21.383211 -0.708944
11 -0.014494 21.787636 -0.402417
12 0.625458 21.205883 -0.272478
13 0.305759 19.265335 -0.480082
14 -0.172271 20.583281 -0.580657
15 -0.158516 16.410503 -2.263122
16 -0.485935 22.250452 -0.292392
17 -0.453527 18.530380 -0.677478
18 -0.382364 20.347807 0.334156
19 -0.688242 20.049438 0.306114
20 0.466396 15.867884 1.843608
21 0.355994 20.886177 1.915584
22 0.193510 18.926830 2.335595
23 0.128056 20.565500 1.859136
24 0.770014 19.866911 2.293251
25 0.928932 19.648052 1.856291
26 1.197047 16.213640 1.365838
27 1.145283 23.506783 1.379037
28 1.186768 22.549137 1.419448
29 0.579447 19.437201 2.397512
... ... ... ...
18286676 2.344119 27.980766 -2.372994
18286677 2.424500 27.769838 -2.445686
18286678 2.328301 26.979740 -2.474418
18286679 2.362773 28.483915 -2.440804
18286680 2.466642 28.569492 -2.455950
18286681 2.349971 27.639442 -2.430140
18286682 2.386925 29.215738 -2.441397
18286683 2.405529 29.051693 -2.445164
18286684 2.416599 27.439442 -2.445778
18286685 2.339918 26.979265 -2.393658
18286686 2.387848 28.929337 -2.446784
18286687 2.477501 28.996275 -2.343966
18286688 2.491531 27.439096 -2.427254
18286689 2.313082 31.250610 -2.341767
18286690 2.474660 28.234516 -2.308086
18286691 2.339188 27.606724 -2.334271
18286692 2.423793 27.666565 -2.383434
18286693 2.301899 28.936501 -2.395124
18286694 2.349056 30.725111 -2.349983
18286695 2.468506 29.804134 -2.473691
18286696 2.468706 27.269358 -2.457757
18286697 2.427693 31.805853 -2.362838
18286698 2.420603 31.342039 -2.327386
18286699 2.443260 30.946449 -2.300876
18286700 2.442581 32.081322 -2.496407
18286701 2.312973 28.347523 -2.426726
18286702 2.447692 26.709282 -2.302419
18286703 2.437479 26.761944 -2.390750
18286704 2.494004 33.038513 -2.466166
18286705 2.373600 28.942278 -2.316337

18286706 rows × 3 columns

In [28]:
from astropy.table import Table
Table(data)
Out[28]:
<Table length=18286706>
mag_u_lsstdecra
float32float32float32
21.2349-1.78091-0.213268
21.8391-2.06204-1.29391
21.0237-1.484340.681523
17.6038-0.92326-0.304187
19.3148-0.874825-0.261973
16.1105-0.924046-1.78684
18.2086-0.909357-1.75347
17.8888-1.5237-0.262022
18.5951-1.700860.0722983
22.121-1.490180.063725
.........
27.26942.46871-2.45776
31.80592.42769-2.36284
31.3422.4206-2.32739
30.94642.44326-2.30088
32.08132.44258-2.49641
28.34752.31297-2.42673
26.70932.44769-2.30242
26.76192.43748-2.39075
33.03852.494-2.46617
28.94232.3736-2.31634

tip 2

Sometimes you can allow slightly different quantiies (for example, lsst u band and sdss u band) when comparing different catalogs. In this case, you can use first_available() to get the first available of the given catalog.

In [29]:
gc.first_available('mag_u_des', 'mag_u_sdss', 'mag_u_lsst')
/global/common/software/lsst/common/miniconda/current/lib/python3.6/site-packages/GCR.py:216: UserWarning: mag_u_des not available; using mag_u_sdss instead
  warnings.warn('{} not available; using {} instead'.format(quantities[0], q))
Out[29]:
'mag_u_sdss'
In [22]:
# use first_available to get some quantities you need, and translate to your favorite name

mag_translate = {gc.first_available(*[name.format(band) for name in ('mag_{}', 'mag_{}_lsst', 'mag_{}_des', 'mag_{}_sdss')]): band for band in 'gri'}
data = gc.get_quantities(list(mag_translate))
print(list(data))

data = {mag_translate.get(k, k): v for k, v in data.items()}
print(list(data))
['mag_i', 'mag_r', 'mag_g']
['i', 'r', 'g']

Use iterator

In [25]:
# load coadd catalog (for a single tract)
coadd_cat = GCRCatalogs.load_catalog('dc2_coadd_run1.1p_tract4850')
In [26]:
# When `return_iterator` is turned on, the method `get_quantities` will return an 
# iterator, and each element in the iterator will be the quantities we requested in 
# different chunks of the dataset. 

# For coadd catalogs, the different chunks happen to be different patches, 
# resulting in a different color for each patch in the scatter plot below.

for coadd_data in coadd_cat.get_quantities(['ra', 'dec'], return_iterator=True):
    plt.scatter(coadd_data['ra'], coadd_data['dec'], s=1, rasterized=True);

plt.xlabel('RA');
plt.ylabel('Dec');

add derived quantities

you can add your own derived quantities that are based on available quantities. The call signature is:

cat.add_derived_quantity(derived_quantity, func, *quantities)
In [42]:
if 'gr' in coadd_cat.list_all_quantities():
    coadd_cat.del_quantity_modifier('gr')
coadd_cat.add_derived_quantity('gr', np.subtract, 'mag_g', 'mag_r')

data = coadd_cat.get_quantities(['mag_g', 'mag_r', 'gr'], filters=[(np.isfinite, 'mag_g'), (np.isfinite, 'mag_r')])
print((data['mag_g'] - data['mag_r'] == data['gr']).all())
True

Use GCRQuery

GCRQuery let you define cuts and filters before loading a catalog!

GCRQuery objects can operate with themselves using any boolean operations (and, or, xor, not)

In [45]:
from GCR import GCRQuery

# Let's choose a small RA and Dec range to do the matching so that it won't take too long!
ra_min, ra_max = 55.5, 56.0
dec_min, dec_max = -29.0, -28.5

coord_cut = GCRQuery(
    'ra >= {}'.format(ra_min),
    'ra < {}'.format(ra_max),
    'dec >= {}'.format(dec_min),
    'dec < {}'.format(dec_max),
)

mag_filters = GCRQuery(
    (np.isfinite, 'mag_i'),
    'mag_i < 24.5',
)

data = coadd_cat.get_quantities(['ra', 'dec', 'mag_i'])
data_subset = (coord_cut & mag_filters).filter(data)

Here's a full example

In [27]:
import numpy as np
from astropy.table import Table
import matplotlib.pyplot as plt
%matplotlib inline
In [28]:
catalogs = ('protoDC2_test', 'buzzard_test')
gc_all = dict(zip(catalogs, (GCRCatalogs.load_catalog(c) for c in catalogs)))
/global/common/software/lsst/common/miniconda/current/lib/python3.6/site-packages/GCRCatalogs/alphaq.py:105: UserWarning: No md5 sum specified in the config file
  warnings.warn('No md5 sum specified in the config file')
In [32]:
fig, ax = plt.subplots(ncols=2, figsize=(12,5), dpi=100)

for label, gc_this in gc_all.items():
    mag_g = gc_this.first_available('mag_g_lsst', 'mag_g_sdss', 'mag_g_des', 'mag_true_g_lsst', 'mag_true_g_sdss', 'mag_true_g_des')
    mag_r = gc_this.first_available('mag_r_lsst', 'mag_r_sdss', 'mag_r_des', 'mag_true_r_lsst', 'mag_true_r_sdss', 'mag_true_r_des')
    if 'gr' not in gc.list_all_quantities():
        gc_this.add_derived_quantity('gr', np.subtract, mag_g, mag_r)
    quantities_needed = ['gr', 'redshift']
    
    data = gc_this.get_quantities(quantities_needed, ['redshift > 0.1', 'redshift < 0.3', (np.isfinite, mag_g), (np.isfinite, mag_r), mag_r + ' < 22']) 
    ax[0].hist(data['redshift'], np.linspace(0.1, 0.3, 21), normed=True, alpha=0.6, label=label);
    ax[1].hist(data['gr'], np.linspace(-0.5, 2, 26), normed=True, alpha=0.6);

ax[0].legend(frameon=False);
ax[0].set_xlabel('$z$');
ax[1].set_xlabel('$g-r$');
/global/common/software/lsst/common/miniconda/current/lib/python3.6/site-packages/GCR.py:216: UserWarning: mag_g_lsst not available; using mag_g_des instead
  warnings.warn('{} not available; using {} instead'.format(quantities[0], q))
/global/common/software/lsst/common/miniconda/current/lib/python3.6/site-packages/GCR.py:216: UserWarning: mag_r_lsst not available; using mag_r_des instead
  warnings.warn('{} not available; using {} instead'.format(quantities[0], q))
/global/common/software/lsst/common/miniconda/current/lib/python3.6/site-packages/GCR.py:364: RuntimeWarning: invalid value encountered in subtract
  return func(*new_args)
In [ ]: