#!/usr/bin/env python # coding: utf-8 # In[171]: import os, fnmatch, glob import numpy as np import pandas as pd from sklearn.cluster import MeanShift, estimate_bandwidth from astropy.io import fits from io import StringIO from astropy.coordinates import SkyCoord from astropy.coordinates import Angle import astropy.units as u # Set up matplotlib and use a nicer set of plot parameters get_ipython().run_line_magic('config', 'InlineBackend.rc = {}') import matplotlib matplotlib.rc_file("matplotlibrc") import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # ## We define the folder that contain the FITS images and Night log files # In[227]: survey_file = 'data/MS_VISTA_P98.csv' im_folder= '/mnt/data2/ESO/raw' im_pattern = '*.fits.fz' log_folder= '/mnt/data2/ESO/raw' log_pattern = '*.NL.txt' header_keys = ['DATE-OBS', 'RA', 'DEC', \ 'HIERARCH ESO OBS PROG ID', 'HIERARCH ESO OBS NAME', 'HIERARCH ESO DPR TYPE', \ 'HIERARCH ESO INS FILT1 NAME', 'HIERARCH ESO DET DIT', 'HIERARCH ESO DET NDIT', \ 'HIERARCH ESO TPL ID', 'HIERARCH ESO TPL NEXP', 'HIERARCH ESO TPL EXPNO', \ 'OFFST_ID','NOFFSETS', 'OFFSET_I', \ 'JITTR_ID','NJITTER', 'JITTER_I'] survey_info= pd.DataFrame([{'Tile':1, 'RA':'23:26:07.46', 'Dec':'-53:28:13.7'}, \ {'Tile':2, 'RA':'23:53:46.15', 'Dec':'-52:32:21.7'}, \ {'Tile':3, 'RA':'00:22:34.67', 'Dec':'-51:17:37.6'}, \ {'Tile':4, 'RA':'00:53:43.00', 'Dec':'-49:36:27.2'}, \ {'Tile':5, 'RA':'01:03:22.85', 'Dec':'-48:35:37.4'}, \ {'Tile':6, 'RA':'01:12:56.00', 'Dec':'-47:35:04.6'}], columns=['Tile', 'RA','Dec']) # In[204]: im_files = glob.glob( os.path.join(im_folder, im_pattern)) print("Number of images: ", len(im_files)) # In[205]: log_files = glob.glob( os.path.join(log_folder, log_pattern)) print("Number of log files: ", len(log_files)) # ## Let's process the FITS headers # In[206]: survey_list=[] for i, im_file in enumerate(im_files): if i % 100 == 0: print('Processing file ', i, ' of ', len(im_files)) hdulist = fits.open(im_file) im_header = hdulist[0].header result = {} for key in header_keys: if key in im_header: result[key] = im_header[key] else: result[key] = np.nan result['Filename'] = im_file survey_list.append(result) survey_df = pd.DataFrame(survey_list, columns=['Filename']+header_keys) survey_df['DATE-OBS']=pd.to_datetime(survey_df['DATE-OBS']) survey_df.rename(columns={'DEC':'Dec'}, inplace=True) survey_df.sort_values('DATE-OBS', inplace=True) # In[208]: survey_df.head() # In[209]: survey_df['HIERARCH ESO DPR TYPE'].unique() # In[210]: survey_science_df = survey_df[survey_df['HIERARCH ESO DPR TYPE']=='OBJECT'].copy() survey_science_df.head() # ## Let's process the night log files # In[211]: for i, log_file in enumerate(log_files): if i % 100 == 0: print('Processing log file ', i, ' of ', len(log_files)) log_data="" log_data_append=False log_data_template=False with open(log_file) as f: for line in f: if line.startswith("Name:"): log_name=line.split(":")[1].strip() if line.startswith("Grade:"): log_grade=line.split(":")[1].strip() if line.startswith("Template:"): log_data_template=True if log_data_template==True and log_data_append==False: if line.startswith("-----"): log_data_append=True elif log_data_template==True and log_data_append==True: if line.startswith("-----"): og_data_template=False log_data_append=False break line = line.rstrip("\t\n") log_data = log_data + line.replace(" ","_").replace("\t",",") + "\n" log_data = StringIO(log_data) log_df = pd.read_csv(log_data) log_df['OBS.NAME'] = log_name log_df['ESOGRADE'] = log_grade if i==0: survey_log_df=log_df else: survey_log_df=survey_log_df.append(log_df, ignore_index=True) # In[212]: survey_log_df.head() # In[213]: survey_log_df['ESOGRADE'].unique() # In[214]: survey_log_df.columns # In[215]: survey_log_df.groupby(['ESOGRADE']).agg('count') # ## Counting the total number of images per tile # Adding columng 'Archive_Filename' to survey_science_df # In[216]: survey_science_df.loc[:,'Archive_Filename']=survey_science_df['Filename'].apply(lambda x: os.path.basename(x).strip(".fits.fz")) # Merging survey_science_df and survey_log_df using column 'Archive_Filename'. The new dataframe is stored in survey_science_log # In[219]: survey_science_log = survey_science_df.merge(survey_log_df[['Archive_Filename','OBS.NAME','ESOGRADE']], how='inner', on='Archive_Filename') survey_science_log.head() # In[220]: survey_science_log.groupby(['ESOGRADE','OFFSET_I']).agg('count')['Filename'] # We keep only OBs with ESOGRADE equals A or B # In[228]: survey_science_log_good = survey_science_log[(survey_science_log['ESOGRADE']=='A') | (survey_science_log['ESOGRADE']=='B') ] # In[229]: survey_info # In[230]: def separation(x, tile_coo): c=SkyCoord(x['RA'], x['Dec'], unit=(u.deg, u.deg), frame='icrs') result=c.separation(tile_coo) return result.deg for i in range(len(survey_info)): print('Analysing the images of Tile ', survey_info.loc[i,'Tile']) #survey_science_log_good print('RA: ', survey_info.loc[i,'RA']) print('Dec: ', survey_info.loc[i,'Dec']) tile_coo=SkyCoord(survey_info.loc[i,'RA'], survey_info.loc[i,'Dec'], unit=(u.hour, u.deg), frame='icrs') separation_df=survey_science_log_good.apply(separation, tile_coo=tile_coo, axis=1) print('Total number of images: ', len(survey_science_log_good[separation_df < 0.5])) print() # ## Display FITS images # In[8]: im_data = hdulist[1].data im_data.shape # In[21]: ss_data = im_data - np.median(im_data) plt.imshow(ss_data, cmap='gray', vmin=-1e3, vmax=5e3) plt.colorbar() # In[ ]: