import numpy as np import matplotlib.pyplot as plt import os import subprocess %pylab inline matched_cat = np.loadtxt('DEEP2_matched.dat') # This is a matched calatogue with all DEEP2 objects matched_cat.shape figsize(11 * 2 , 9 * 2) bands= ['u','i','z','r','g'] for i in np.arange(5): sx = subplot(5, 3, i+1) plt.setp(sx.get_yticklabels(), visible=False) plt.hist(matched_cat[i,:],bins=40,range=(15,40),label=bands[i],alpha=0.5) leg = plt.legend() leg.get_frame().set_alpha(0.4) sx.legend(loc=0,fontsize=22).draw_frame(False) plt.suptitle("MAG",fontsize=24,y=1.02) plt.autoscale(tight=True) print 'undetected objects in band (mag == 99)',bands[i],np.sum(matched_cat[i,:] == 99) plt.tight_layout() for i in np.arange(5): sel_99 = matched_cat[i,:] == 99 matched_cat[i,sel_99] = 35.0 for i in np.arange(5,10): sx = subplot(5, 3, i-4) plt.setp(sx.get_yticklabels(), visible=False) plt.hist(matched_cat[i,:],bins=40,range=(0,0.1),label=bands[i - 5],alpha=0.5) leg = plt.legend() leg.get_frame().set_alpha(0.4) plt.autoscale(tight=True) sx.legend(loc=0,fontsize=22).draw_frame(False) print 'objects with mag_err > 1.0 =',np.sum(matched_cat[i,:] > 1.0),bands[i -5],'band' \ ,'maximum error value ',np.max(matched_cat[i,:]) plt.suptitle("MAG ERROR",fontsize=24,y=1.02) plt.tight_layout() for i in np.arange(5,10): sel_err = matched_cat[i,:] > 2.0 matched_cat[i,sel_err] = 2.0 figsize(5,4) plt.hist(matched_cat[10,:],bins=40,range=(0,2),alpha=0.5) leg.get_frame().set_alpha(0.4) plt.suptitle("Z spec",fontsize=14,y=1.02) plt.autoscale(tight=True) plt.grid() plt.tight_layout() np.random.shuffle(matched_cat.T) cut = int(0.3 * len(matched_cat[0,:])) # use 30% as training set as an example, in the paper this is 70%. This will train faster train_cat = matched_cat[:,:cut] valid_cat = matched_cat[:,cut:] print 'training catalogue shape ' ,train_cat.shape print 'valdation catalogue shape' ,valid_cat.shape train = open('sky_net_reg_train.txt','w') train.write(str(len(train_cat[:,0])-1)) # first line is the amount of featues fed to the NN (for 5bands this number is 10) # second line is the amount of 'truths', in this case 1 (spec-z) train.write('\n') train.write('1') train.write('\n') # Training # ## write mag and mag errors for i in range(len(train_cat.T)): for j in range(len(train_cat[:,0])-1): train.write(str(train_cat[j,i])) if j != (len(train_cat[:,0]) -2): train.write(str(',')) ## write the spec z ## train.write('\n') train.write(str(train_cat.T[i,-1])) train.write('\n') train.close() ! head -n 6 'sky_net_reg_train.txt' valid = open('sky_net_reg_test.txt','w') valid.write(str(len(valid_cat[:,0])-1)) # first line is the amount of featues fed to the NN # second line is the amount of 'outputs', in this case one z-phot valid.write('\n') valid.write('1') valid.write('\n') # Training # ## write mag and mag errors for i in range(len(valid_cat.T)): for j in range(len(valid_cat[:,0])-1): valid.write(str(valid_cat[j,i])) if j != (len(valid_cat[:,0]) -2): valid.write(str(',')) ## write the spec z ## valid.write('\n') valid.write(str(valid_cat.T[i,-1])) valid.write('\n') valid.close() z_binned = np.linspace(0,2.0,41) # 41 bin edges give you 40 bin centers z_center = [(z_binned[i] + z_binned[i+1])/2.0 for i in range(len(z_binned)-1)] # bins centers np.savetxt('z_center_'+ str(len(z_binned)-1) +'_bins_pdf.dat',z_center) np.savetxt('z_edges_'+ str(len(z_binned)-1) +'_bins_pdf.dat',z_center) z_spec_train = train_cat.T[:,-1] z_spec_valid = valid_cat.T[:,-1] label_train = np.zeros_like(z_spec_train) for k,z in enumerate(z_spec_train): for count_i in range(len(z_binned)-1): if z > z_binned[count_i] and z <= z_binned[count_i+1]: label_train[k]= count_i label_valid = np.zeros_like(z_spec_valid) for k,z in enumerate(z_spec_valid): for count_i in range(len(z_binned)-1): if z > z_binned[count_i] and z <= z_binned[count_i+1]: label_valid[k]= count_i print label_valid.shape print label_train.shape print 'valid labels = ', label_valid print 'train labels= ', label_train train_cla = open('sky_net_cla_train.txt','w') valid_cla = open('sky_net_cla_test.txt','w') train_cla.write(str(len(train_cat[:,0])-1)) # first line is the amount of featues fed to the NN # second line is the amount of classes, here this is 40 train_cla.write('\n') train_cla.write(str(40)) train_cla.write('\n') valid_cla.write(str(len(valid_cat[:,0])-1)) valid_cla.write('\n') valid_cla.write(str(40)) valid_cla.write('\n') ##### Training ######### for i in range(len(train_cat.T)): for j in range(len(train_cat[:,0])-1): train_cla.write(str(train_cat[j,i])) if j != (len(train_cat[:,0]) -2): train_cla.write(str(',')) train_cla.write('\n') train_cla.write(str(int(label_train.T[i]))) train_cla.write('\n') ##### Validation ######### for i in range(len(valid_cat.T)): for j in range(len(valid_cat[:,0])-1): valid_cla.write(str(valid_cat[j,i])) if j != (len(valid_cat[:,0]) -2): valid_cla.write(str(',')) valid_cla.write('\n') valid_cla.write(str(int(label_valid.T[i]))) valid_cla.write('\n') train_cla.close() valid_cla.close() # classification valid file ! head sky_net_cla_test.txt # regregession valid file ! head sky_net_reg_test.txt # file names # con_reg_filename = 'photo_z_regression.inp' con_cla_filename = 'photo_z_bins_40_classification.inp' ## classification config file ## f_class = open('temp_clas','w') print >>f_class,'#input_root' print >>f_class,'sky_net_cla_' #input root without the train.txt or test.txt print >>f_class,'#output_root' print >>f_class,'results_notebook_cla_' print >>f_class,'#nhid' print >>f_class,'20' # amount of nodes in first hidden layer print >>f_class,'#nhid' print >>f_class,'40' # amount of nodes in second hidden layer print >>f_class,'#nhid' print >>f_class,'40' # amount of nodes in third hidden layer print >>f_class,'#classification_network' print >>f_class,'1' # confirming it's a classification network f_class.close() ## regression config file ## f_reg = open('temp_reg','w') print >>f_reg,'#input_root' print >>f_reg,'sky_net_reg_' print >>f_reg,'#output_root' print >>f_reg,'results_notebook_reg_' print >>f_reg,'#nhid' print >>f_reg,'20' print >>f_reg,'#nhid' print >>f_reg,'40' print >>f_reg,'#nhid' print >>f_reg,'40' print >>f_reg,'#classification_network' print >>f_reg,'0' f_reg.close() subprocess.call('cat temp_clas bottom >' + con_cla_filename, shell=True) # cat the temp_cla file with 'bottom' subprocess.call('cat temp_reg bottom >' + con_reg_filename, shell=True) subprocess.call('rm temp_clas', shell=True) subprocess.call('rm temp_reg', shell=True) #subprocess.call('SkyNet ' + con_cla_filename, shell=True) #subprocess.call('SkyNet ' + con_reg_filename, shell=True) cla_res = np.loadtxt('pre_made_results_notebook_cla_test_pred.txt') reg_res = np.loadtxt('pre_made_results_notebook_reg_test_pred.txt') cla_phot = cla_res[:,-40:] # results from classication NN are 40 last columns spec_z= reg_res[:,-2] # spec_z bins_center = np.loadtxt('z_center_40_bins_pdf.dat') # read in bin centers ## Alterate photo_z estimates ## # peak of the 'PDF' as photo-z estimate # photo_z_alt = [] for i in range(cla_res.shape[0]): loc_max = np.argmax(cla_phot[i,:]) z_max = bins_center[loc_max] photo_z_alt.append(z_max) photo_z_alt = np.array(photo_z_alt) # weighted mean of the PDF as photo_z estimate # photo_z_alt2 = np.inner(bins_center,cla_phot) photo_z_alt2 = np.array(photo_z_alt2) # the redshift bin edges k =0 z_edges =[0.0,2.0,0.0,0.2,0.39,0.58,0.72,0.86,1.02,1.30,2.0] fig1 = plt.figure(num=None, figsize=(15,15 *2), dpi=80, facecolor='w', edgecolor='k') for m in range(len(z_edges)-1): if m !=1: # this ignores the [2.0-0.0] bin k = k + 1 sel_NN = np.where(( photo_z_alt > z_edges[m]) & (photo_z_alt < z_edges[m+1])) sel_NN2 =(( photo_z_alt > z_edges[m]) & (photo_z_alt < z_edges[m+1])) count = np.sum(sel_NN2) print 'z[',z_edges[m],z_edges[m +1],']','# selected galaxies = ',count print '==============================' if count > 0 : sel_PDF = cla_phot[sel_NN] stacked_PDF = np.sum(sel_PDF,axis=0) # stacked pdf stacked_PDF = stacked_PDF/np.sum(stacked_PDF) # normalize it sel_spec = spec_z[sel_NN] sel_spec_hist,bin_edges = np.histogram(sel_spec,bins=40,range=(0.0,2.0),density=1) # bin spec-z norm = np.sum(sel_spec_hist) sel_spec_hist = sel_spec_hist/norm # normalize it cum_NN = [np.sum(stacked_PDF[:kkk]) for kkk in range(1,len(bins_center)+1)] # cumulative distibution cum_spec = [np.sum(sel_spec_hist[:kkk]) for kkk in range(1,len(bins_center)+1)]# cumulative distibution ax1 = fig1.add_subplot(len(z_edges)-1,3,(k*3)+1) ax2 = fig1.add_subplot(len(z_edges)-1,3,(k*3)+2) ax3 = fig1.add_subplot(len(z_edges)-1,3,(k*3)+3) diff = np.array(cum_NN) - np.array(cum_spec) ax1.plot(bins_center,stacked_PDF,color='blue',drawstyle='steps-mid',lw=2,ls='--',label='NN',alpha=0.9) ax1.plot(bins_center,sel_spec_hist,color='red',drawstyle='steps-mid',lw=2,ls='-',label='Truth',alpha=0.4) ax2.plot(bins_center,cum_NN,color='blue',drawstyle='steps-mid',lw=2,ls='--',label='NN',alpha=0.9) ax2.plot(bins_center,cum_spec,color='red',drawstyle='steps-mid',lw=2,ls='-',label='Truth',alpha=0.4) ax3.plot(bins_center,diff,color='red',drawstyle='steps-mid',lw=2,ls='-',label='Truth',alpha=0.9) ax3.set_ylim((-0.05,0.05)) ax1.legend(loc=0,fontsize=14).draw_frame(False) ax2.legend(loc=0,fontsize=14).draw_frame(False) if k !=9: ax1.tick_params(axis='both', which='major', labelsize=12,labelright=False,labelleft=False,labelbottom=False) ax2.tick_params(axis='both', which='major', labelsize=12,labelright=False,labelleft=False,labelbottom=False) ax3.tick_params(axis='both', which='major', labelsize=12,labelright=True,labelleft=False,labelbottom=False) if k==9: ax1.tick_params(axis='both', which='major', labelsize=12,labelright=False,labelleft=False,labelbottom=True) ax2.tick_params(axis='both', which='major', labelsize=12,labelright=False,labelleft=False,labelbottom=True) ax3.tick_params(axis='both', which='major', labelsize=12,labelright=True,labelleft=False,labelbottom=True) ax1.set_xlabel(r'z [redshift]',fontsize=15) ax2.set_xlabel(r'z [redshift]',fontsize=15) ax3.set_xlabel(r'z [redshift]',fontsize=15) ax1.set_xticks([0.0,0.5,1.0,1.5,1.8]) ax2.set_xticks([0.0,0.5,1.0,1.5,1.8]) ax3.set_xticks([0.0,0.5,1.0,1.5,1.8]) fig1.subplots_adjust(hspace=0) fig1.subplots_adjust(wspace=0)