import numpy as np
import matplotlib.pyplot as plt
import os 
import subprocess
%pylab inline

matched_cat = np.loadtxt('DEEP2_matched.dat') 
# This is a matched calatogue with all DEEP2 objects

matched_cat.shape

figsize(11 * 2 , 9 * 2)
bands= ['u','i','z','r','g']
for i in np.arange(5):
    sx = subplot(5, 3, i+1)
    plt.setp(sx.get_yticklabels(), visible=False)
    plt.hist(matched_cat[i,:],bins=40,range=(15,40),label=bands[i],alpha=0.5)
    
    leg = plt.legend()
    leg.get_frame().set_alpha(0.4)
    sx.legend(loc=0,fontsize=22).draw_frame(False)
    plt.suptitle("MAG",fontsize=24,y=1.02)
    plt.autoscale(tight=True)
    print 'undetected objects in band (mag == 99)',bands[i],np.sum(matched_cat[i,:] == 99)
    
plt.tight_layout()

for i in np.arange(5):
        sel_99 = matched_cat[i,:] == 99
        matched_cat[i,sel_99] = 35.0

for i in np.arange(5,10):
    sx = subplot(5, 3, i-4)
    plt.setp(sx.get_yticklabels(), visible=False)
    plt.hist(matched_cat[i,:],bins=40,range=(0,0.1),label=bands[i - 5],alpha=0.5)

    leg = plt.legend()
    leg.get_frame().set_alpha(0.4)
    plt.autoscale(tight=True)
    sx.legend(loc=0,fontsize=22).draw_frame(False)
    print 'objects with mag_err > 1.0 =',np.sum(matched_cat[i,:] > 1.0),bands[i -5],'band' \
    ,'maximum error value ',np.max(matched_cat[i,:])
    plt.suptitle("MAG ERROR",fontsize=24,y=1.02)
plt.tight_layout()
    

for i in np.arange(5,10):
        sel_err = matched_cat[i,:] > 2.0
        matched_cat[i,sel_err] = 2.0

figsize(5,4)
plt.hist(matched_cat[10,:],bins=40,range=(0,2),alpha=0.5)
leg.get_frame().set_alpha(0.4)
plt.suptitle("Z spec",fontsize=14,y=1.02)
plt.autoscale(tight=True)
plt.grid()
plt.tight_layout()

np.random.shuffle(matched_cat.T)

cut = int(0.3 * len(matched_cat[0,:]))  # use 30% as training set as an example, in the paper this is 70%. This will train faster

train_cat      = matched_cat[:,:cut]
valid_cat      = matched_cat[:,cut:]

print 'training catalogue shape ' ,train_cat.shape
print 'valdation catalogue shape' ,valid_cat.shape

train = open('sky_net_reg_train.txt','w')

train.write(str(len(train_cat[:,0])-1)) 
# first line is the amount of featues fed to the NN (for 5bands this number is 10)
# second line is the amount of 'truths', in this case 1 (spec-z)
train.write('\n')
train.write('1')
train.write('\n')

# Training # 
## write mag and mag errors
for i in range(len(train_cat.T)):
    for j in range(len(train_cat[:,0])-1):
        train.write(str(train_cat[j,i]))
        if j != (len(train_cat[:,0]) -2):
            train.write(str(','))

    ## write the spec z ## 
    train.write('\n')
    train.write(str(train_cat.T[i,-1]))
    train.write('\n')

train.close()

! head -n 6 'sky_net_reg_train.txt'

valid = open('sky_net_reg_test.txt','w')

valid.write(str(len(valid_cat[:,0])-1)) 
# first line is the amount of featues fed to the NN 
# second line is the amount of 'outputs', in this case one   z-phot
valid.write('\n')
valid.write('1')
valid.write('\n')

# Training # 
## write mag and mag errors
for i in range(len(valid_cat.T)):
    for j in range(len(valid_cat[:,0])-1):
        valid.write(str(valid_cat[j,i]))
        if j != (len(valid_cat[:,0]) -2):
            valid.write(str(','))

    ## write the spec z ## 
    valid.write('\n')
    valid.write(str(valid_cat.T[i,-1]))
    valid.write('\n')

valid.close()

z_binned = np.linspace(0,2.0,41) # 41  bin edges give you 40 bin centers
z_center = [(z_binned[i] + z_binned[i+1])/2.0 for i in range(len(z_binned)-1)] # bins centers 
np.savetxt('z_center_'+ str(len(z_binned)-1) +'_bins_pdf.dat',z_center)
np.savetxt('z_edges_'+ str(len(z_binned)-1) +'_bins_pdf.dat',z_center)

z_spec_train = train_cat.T[:,-1]
z_spec_valid = valid_cat.T[:,-1]

label_train = np.zeros_like(z_spec_train)
for k,z in enumerate(z_spec_train):
    for count_i in range(len(z_binned)-1):
        if z > z_binned[count_i] and z <= z_binned[count_i+1]:
            label_train[k]= count_i

label_valid = np.zeros_like(z_spec_valid)
for k,z in enumerate(z_spec_valid):
    for count_i in range(len(z_binned)-1):
        if z > z_binned[count_i] and z <= z_binned[count_i+1]:
            label_valid[k]= count_i       

print label_valid.shape
print label_train.shape
print 'valid labels = ', label_valid
print 'train labels= ',  label_train

train_cla = open('sky_net_cla_train.txt','w')
valid_cla = open('sky_net_cla_test.txt','w')

train_cla.write(str(len(train_cat[:,0])-1)) 
# first line is the amount of featues fed to the NN 
# second line is the amount of classes, here this is 40
train_cla.write('\n')
train_cla.write(str(40))
train_cla.write('\n')

valid_cla.write(str(len(valid_cat[:,0])-1))
valid_cla.write('\n')
valid_cla.write(str(40))
valid_cla.write('\n')


##### Training ######### 
for i in range(len(train_cat.T)):
    for j in range(len(train_cat[:,0])-1):
        train_cla.write(str(train_cat[j,i]))
        if j != (len(train_cat[:,0]) -2):
            train_cla.write(str(','))

    train_cla.write('\n')
    train_cla.write(str(int(label_train.T[i])))
    train_cla.write('\n')

##### Validation ######### 
for i in range(len(valid_cat.T)):
    for j in range(len(valid_cat[:,0])-1):
        valid_cla.write(str(valid_cat[j,i]))
        if j != (len(valid_cat[:,0]) -2):
            valid_cla.write(str(','))

    valid_cla.write('\n')
    valid_cla.write(str(int(label_valid.T[i])))
    valid_cla.write('\n')
       
train_cla.close()
valid_cla.close()

# classification valid file 
! head sky_net_cla_test.txt

# regregession valid file 
! head sky_net_reg_test.txt

# file names # 
con_reg_filename = 'photo_z_regression.inp'
con_cla_filename = 'photo_z_bins_40_classification.inp'

## classification config file  ## 
f_class = open('temp_clas','w')
print >>f_class,'#input_root'
print >>f_class,'sky_net_cla_' #input root without the train.txt or test.txt
print >>f_class,'#output_root'
print >>f_class,'results_notebook_cla_'
print >>f_class,'#nhid' 
print >>f_class,'20'     # amount of nodes in first hidden layer  
print >>f_class,'#nhid'
print >>f_class,'40'     # amount of nodes in second  hidden layer   
print >>f_class,'#nhid'
print >>f_class,'40'     # amount of nodes in third  hidden layer  
print >>f_class,'#classification_network'
print >>f_class,'1'      # confirming it's a classification network   
f_class.close()

## regression config file  ## 
f_reg = open('temp_reg','w')
print >>f_reg,'#input_root'
print >>f_reg,'sky_net_reg_'
print >>f_reg,'#output_root'
print >>f_reg,'results_notebook_reg_'
print >>f_reg,'#nhid'
print >>f_reg,'20'
print >>f_reg,'#nhid'
print >>f_reg,'40'
print >>f_reg,'#nhid'
print >>f_reg,'40'
print >>f_reg,'#classification_network'
print >>f_reg,'0'
f_reg.close()

subprocess.call('cat temp_clas bottom >' + con_cla_filename, shell=True) # cat the temp_cla file with 'bottom'
subprocess.call('cat temp_reg bottom >'  + con_reg_filename, shell=True)
subprocess.call('rm temp_clas', shell=True)
subprocess.call('rm temp_reg', shell=True)

#subprocess.call('SkyNet ' + con_cla_filename, shell=True)

#subprocess.call('SkyNet ' + con_reg_filename, shell=True)

cla_res = np.loadtxt('pre_made_results_notebook_cla_test_pred.txt')
reg_res = np.loadtxt('pre_made_results_notebook_reg_test_pred.txt')

cla_phot = cla_res[:,-40:] # results from classication NN are 40 last columns
spec_z= reg_res[:,-2]      # spec_z

bins_center = np.loadtxt('z_center_40_bins_pdf.dat') # read in bin centers

## Alterate photo_z estimates ##
# peak of the 'PDF' as photo-z estimate #
photo_z_alt = []
for i in range(cla_res.shape[0]):
    loc_max =  np.argmax(cla_phot[i,:])
    z_max = bins_center[loc_max]
    photo_z_alt.append(z_max)
photo_z_alt = np.array(photo_z_alt)    
# weighted mean of the PDF as photo_z estimate #
photo_z_alt2 = np.inner(bins_center,cla_phot)
photo_z_alt2 = np.array(photo_z_alt2)

# the redshift bin edges 
k =0
z_edges =[0.0,2.0,0.0,0.2,0.39,0.58,0.72,0.86,1.02,1.30,2.0]
fig1 = plt.figure(num=None, figsize=(15,15 *2), dpi=80, facecolor='w', edgecolor='k')

for m in range(len(z_edges)-1):
    if m !=1: # this ignores the [2.0-0.0] bin
        k = k + 1 

        sel_NN  = np.where(( photo_z_alt > z_edges[m]) & (photo_z_alt < z_edges[m+1]))
        sel_NN2 =(( photo_z_alt > z_edges[m]) & (photo_z_alt < z_edges[m+1]))
        count = np.sum(sel_NN2)
        
        print 'z[',z_edges[m],z_edges[m +1],']','# selected galaxies = ',count
        print '=============================='
        if count > 0 :
            sel_PDF = cla_phot[sel_NN]
            stacked_PDF = np.sum(sel_PDF,axis=0) # stacked pdf
            stacked_PDF = stacked_PDF/np.sum(stacked_PDF) # normalize it
        
            sel_spec = spec_z[sel_NN]
            sel_spec_hist,bin_edges = np.histogram(sel_spec,bins=40,range=(0.0,2.0),density=1) # bin spec-z
            norm = np.sum(sel_spec_hist)
            sel_spec_hist = sel_spec_hist/norm # normalize it
            
            cum_NN   = [np.sum(stacked_PDF[:kkk]) for kkk in range(1,len(bins_center)+1)]  # cumulative distibution
            cum_spec = [np.sum(sel_spec_hist[:kkk]) for kkk in range(1,len(bins_center)+1)]# cumulative distibution
            
            ax1 = fig1.add_subplot(len(z_edges)-1,3,(k*3)+1)
            ax2 = fig1.add_subplot(len(z_edges)-1,3,(k*3)+2)
            ax3 = fig1.add_subplot(len(z_edges)-1,3,(k*3)+3)
            
            diff = np.array(cum_NN) - np.array(cum_spec)
            
            ax1.plot(bins_center,stacked_PDF,color='blue',drawstyle='steps-mid',lw=2,ls='--',label='NN',alpha=0.9)
            ax1.plot(bins_center,sel_spec_hist,color='red',drawstyle='steps-mid',lw=2,ls='-',label='Truth',alpha=0.4)
        
            ax2.plot(bins_center,cum_NN,color='blue',drawstyle='steps-mid',lw=2,ls='--',label='NN',alpha=0.9)
            ax2.plot(bins_center,cum_spec,color='red',drawstyle='steps-mid',lw=2,ls='-',label='Truth',alpha=0.4)
        
            ax3.plot(bins_center,diff,color='red',drawstyle='steps-mid',lw=2,ls='-',label='Truth',alpha=0.9)
            ax3.set_ylim((-0.05,0.05))
            
            ax1.legend(loc=0,fontsize=14).draw_frame(False)
            ax2.legend(loc=0,fontsize=14).draw_frame(False)
            
            if k !=9:
            
                ax1.tick_params(axis='both', which='major', labelsize=12,labelright=False,labelleft=False,labelbottom=False)
                ax2.tick_params(axis='both', which='major', labelsize=12,labelright=False,labelleft=False,labelbottom=False)
                ax3.tick_params(axis='both', which='major', labelsize=12,labelright=True,labelleft=False,labelbottom=False)
            
            if k==9:
                
                ax1.tick_params(axis='both', which='major', labelsize=12,labelright=False,labelleft=False,labelbottom=True)
                ax2.tick_params(axis='both', which='major', labelsize=12,labelright=False,labelleft=False,labelbottom=True)
                ax3.tick_params(axis='both', which='major', labelsize=12,labelright=True,labelleft=False,labelbottom=True)
                
                ax1.set_xlabel(r'z [redshift]',fontsize=15)
                ax2.set_xlabel(r'z [redshift]',fontsize=15)
                ax3.set_xlabel(r'z [redshift]',fontsize=15)
                
                ax1.set_xticks([0.0,0.5,1.0,1.5,1.8])
                ax2.set_xticks([0.0,0.5,1.0,1.5,1.8])
                ax3.set_xticks([0.0,0.5,1.0,1.5,1.8])
            
fig1.subplots_adjust(hspace=0)
fig1.subplots_adjust(wspace=0)