by Yao-Yuan Mao (last update: 7/27/2018)
Links to GitHub code repos: GCRCatalogs and GCR
Note: You should be running this notebook at https://jupyter-dev.nersc.gov
## Note: if you clone the gcr-catalogs repo and are running this under the `examples` folder,
## you can also use your version of GCRCatalogs:
#import sys
#sys.path.insert(0, '/path/to/your/cloned/gcr-catalogs')
## The following lines are to check if you're in the lsst group
import subprocess
assert u'lsst' in subprocess.check_output(['groups']).decode().split(), 'You need to be in the `lsst` group for this notebook to work'
import numpy as np
get_available_catalogs()
lists available catlaogs; returns dict
.load_catalog()
loads the catalog you want; returns an instance of GCR.BaseGenericCatalog
import GCRCatalogs
## check version
print('GCRCatalogs =', GCRCatalogs.__version__, '|' ,'GCR =', GCRCatalogs.GCR.__version__)
## find available catlaogs, sorted by their name
print('\n'.join(sorted(GCRCatalogs.get_available_catalogs())))
## find *all* available catlaogs, including some older versions, sorted by their name
print('\n'.join(sorted(GCRCatalogs.get_available_catalogs(include_default_only=False))))
## find *all* available catlaogs whose name starts with 'proto-dc2', sorted by their name
print('\n'.join(sorted(c for c in GCRCatalogs.get_available_catalogs(include_default_only=False) if c.startswith('proto-dc2'))))
## load 'protoDC2' catalog
gc = GCRCatalogs.load_catalog('protoDC2_test') # use 'protoDC2_test' to skip md5 check (which takes a while)
#gc = GCRCatalogs.load_catalog('protoDC2')
See also the full GCR API Documentation.
get_quantities()
loads the quantities you need; takes a list
and returns dict
.has_quantity()
and has_quantities()
can check if the quantities you need exist; both return bool
.list_all_quantities()
lists all available catlaogs; returns list
.gc.get_quantities(['mag_u_lsst', 'ra', 'dec'])
gc.has_quantity('mag_u_lsst')
gc.has_quantities(['mag_u_lsst', 'ra', 'dec'])
## Only returns `True` if *all* quantities exist
gc.has_quantities(['mag_u_lsst', 'ra', 'dec', 'quantitiy_that_does_not_exist'])
print(', '.join(sorted(gc.list_all_quantities())))
Native quantities are quantities that have not yet be homogenized (to common labels/units). However, you can still access them as long as you know what you are doing.
## print out the first 5 native quantities
print('\n'.join(sorted(gc.list_all_native_quantities())[:5]))
## list both native or derived quantities, print the first 5 out
print('\n'.join(sorted(gc.list_all_quantities(include_native=True))[:5]))
# find all quantities that match a regular expression
import re
data = gc.get_quantities([q for q in gc.list_all_quantities() if re.match(r'sed_\d+_\d+$', q)])
list(data.keys())
# to retrive native quantities, you can just use `get_quantities` as usual
gc.get_quantities(['LSST_filters/diskLuminositiesStellar:LSST_g:observed',
'LSST_filters/diskLuminositiesStellar:LSST_g:observed:dustAtlas'])
You can also rename the native quantities by using add_quantity_modifier()
. For example:
# you can also make quantity alias
gc.add_quantity_modifier('balmer_alpha_6563', 'emissionLines/diskLineLuminosity:balmerAlpha6563:rest')
gc.get_quantities(['balmer_alpha_6563'])
lightcone
is a bool
cosmology
is a instance of astropy.cosmology.FLRW
get_input_kwargs()
returns a dict
(when no argument) from the orignal yaml config file.
If a argument if passed, it returns the corresponding value for key=argumentgc.lightcone
gc.cosmology
gc.version
print(gc.get_catalog_info('description'))
print(gc.get_catalog_info())
for q in sorted(gc.list_all_quantities())[:20]:
print(q, gc.get_quantity_info(q))
You can specify filters
in get_quantities
to select a subset of data.
Note that filters
always takes a list.
# note that we use a list even there is only one filter
data = gc.get_quantities(['stellar_mass', 'ra', 'dec'], filters=['stellar_mass > 1e10'])
print((data['stellar_mass'] > 1e10).all())
## You can use more than one filter.
data = gc.get_quantities(['stellar_mass'], filters=['ra < -2', 'dec > 1'])
print(len(data['stellar_mass']))
data_check = gc.get_quantities(['ra', 'dec'])
print(np.count_nonzero((data_check['ra'] < -2) & (data_check['dec'] > 1)))
# For more complicated filters, specify them as tuple of (callable, quantity1, quantity2, ...)
data = gc.get_quantities(['stellar_mass'], filters=[(np.isfinite, 'stellar_mass')])
Some catalogs (currently only buzzard and buzzard_high-res) support "native filters", which you can use to load only a subset of data more efficiently.
gc_buzzard = GCRCatalogs.load_catalog('buzzard')
print(gc_buzzard._native_filter_quantities)
data = gc_buzzard.get_quantities(['ra', 'dec'], native_filters=['healpix_pixel == 1'])
print(data['ra'].min(), data['ra'].max(), data['dec'].min(), data['dec'].max())
data = gc_buzzard.get_quantities(['ra', 'dec'], native_filters=['healpix_pixel == 2'])
print(data['ra'].min(), data['ra'].max(), data['dec'].min(), data['dec'].max())
data = gc.get_quantities(['mag_u_lsst', 'ra', 'dec'])
import pandas as pd
pd.DataFrame(data)
from astropy.table import Table
Table(data)
Sometimes you can allow slightly different quantiies (for example, lsst u band and sdss u band) when comparing different catalogs. In this case, you can use first_available()
to get the first available of the given catalog.
gc.first_available('mag_u_des', 'mag_u_sdss', 'mag_u_lsst')
# use first_available to get some quantities you need, and translate to your favorite name
mag_translate = {gc.first_available(*[name.format(band) for name in ('mag_{}', 'mag_{}_lsst', 'mag_{}_des', 'mag_{}_sdss')]): band for band in 'gri'}
data = gc.get_quantities(list(mag_translate))
print(list(data))
data = {mag_translate.get(k, k): v for k, v in data.items()}
print(list(data))
# load coadd catalog (for a single tract)
coadd_cat = GCRCatalogs.load_catalog('dc2_coadd_run1.1p_tract4850')
# When `return_iterator` is turned on, the method `get_quantities` will return an
# iterator, and each element in the iterator will be the quantities we requested in
# different chunks of the dataset.
# For coadd catalogs, the different chunks happen to be different patches,
# resulting in a different color for each patch in the scatter plot below.
for coadd_data in coadd_cat.get_quantities(['ra', 'dec'], return_iterator=True):
plt.scatter(coadd_data['ra'], coadd_data['dec'], s=1, rasterized=True);
plt.xlabel('RA');
plt.ylabel('Dec');
you can add your own derived quantities that are based on available quantities. The call signature is:
cat.add_derived_quantity(derived_quantity, func, *quantities)
if 'gr' in coadd_cat.list_all_quantities():
coadd_cat.del_quantity_modifier('gr')
coadd_cat.add_derived_quantity('gr', np.subtract, 'mag_g', 'mag_r')
data = coadd_cat.get_quantities(['mag_g', 'mag_r', 'gr'], filters=[(np.isfinite, 'mag_g'), (np.isfinite, 'mag_r')])
print((data['mag_g'] - data['mag_r'] == data['gr']).all())
GCRQuery let you define cuts and filters before loading a catalog!
GCRQuery objects can operate with themselves using any boolean operations (and, or, xor, not)
from GCR import GCRQuery
# Let's choose a small RA and Dec range to do the matching so that it won't take too long!
ra_min, ra_max = 55.5, 56.0
dec_min, dec_max = -29.0, -28.5
coord_cut = GCRQuery(
'ra >= {}'.format(ra_min),
'ra < {}'.format(ra_max),
'dec >= {}'.format(dec_min),
'dec < {}'.format(dec_max),
)
mag_filters = GCRQuery(
(np.isfinite, 'mag_i'),
'mag_i < 24.5',
)
data = coadd_cat.get_quantities(['ra', 'dec', 'mag_i'])
data_subset = (coord_cut & mag_filters).filter(data)
import numpy as np
from astropy.table import Table
import matplotlib.pyplot as plt
%matplotlib inline
catalogs = ('protoDC2_test', 'buzzard_test')
gc_all = dict(zip(catalogs, (GCRCatalogs.load_catalog(c) for c in catalogs)))
fig, ax = plt.subplots(ncols=2, figsize=(12,5), dpi=100)
for label, gc_this in gc_all.items():
mag_g = gc_this.first_available('mag_g_lsst', 'mag_g_sdss', 'mag_g_des', 'mag_true_g_lsst', 'mag_true_g_sdss', 'mag_true_g_des')
mag_r = gc_this.first_available('mag_r_lsst', 'mag_r_sdss', 'mag_r_des', 'mag_true_r_lsst', 'mag_true_r_sdss', 'mag_true_r_des')
if 'gr' not in gc.list_all_quantities():
gc_this.add_derived_quantity('gr', np.subtract, mag_g, mag_r)
quantities_needed = ['gr', 'redshift']
data = gc_this.get_quantities(quantities_needed, ['redshift > 0.1', 'redshift < 0.3', (np.isfinite, mag_g), (np.isfinite, mag_r), mag_r + ' < 22'])
ax[0].hist(data['redshift'], np.linspace(0.1, 0.3, 21), normed=True, alpha=0.6, label=label);
ax[1].hist(data['gr'], np.linspace(-0.5, 2, 26), normed=True, alpha=0.6);
ax[0].legend(frameon=False);
ax[0].set_xlabel('$z$');
ax[1].set_xlabel('$g-r$');