This work explores the space of DHV curves from the prostate library (currently 60 patients).
We proceed with Pricipal Componant Analysis (PCA) on the high dimensonal space of Volume-Dose data points (e.g. V95 for PTV) in the DVH library.
We restrict the study to the PTV, Rectum and Bladder curves only. This is done for conviniece and simplicity, and also to lower the dimensonality of the data.
Here we load in the data from Nan.
import autoPlanLib as myTools #this has some custom functions to clean up the code here
from iPyNoteDisplay import *
extended_styles()
#load patient DVH data (settings are in autoPlanLib.py)
patients = myTools.loadPatients()
PTV Rect Blad FH_Rt FH_Lt Body Done processing 60 patients.
myTools = reload(myTools) #when needed
#Some useful parameters
Rx_dose = 8100
num_pat = size(patients,0) #number of patients in library (60)
num_orgs = 3 #number of organs #len(patients[pat])
Here we can see the range of the DVH for each organ.
#plotting DVH of library
num_pat = 60
fig, ax = plt.subplots(1,3,figsize=(18,5))
for pat in range(num_pat):
for org in range(len(organ_fmt)):
data = patients[pat][org]
ax[org].plot(data.dose,multiply(data.volume,100.0))#,organ_fmt[o])
if pat == num_pat - 1:
ax[org].set_title("All " + data.name)
ax[org].set_xlabel('Dose (cGy)')
ax[org].set_ylabel('Volume (%)')
ax[org].set_ylim(0,100)
Here we have some freedom in how we re-sample the data for use in PCA. PCA requires a data matrix, each row being a observation (patient), and each column representing some measurement. The PCA library we use also assumes that the number of columns in the data matrix is less than or equal to the number or rows.
A few different sampling schemes were tested, but in the end they all gave very similar results.
Below is a scheme to weight the sampling grid quadratically towards lower volume (%) values (higher dose regions).
# say we want to weight samples by x^2
# integrate, then invert we get 3/x^3, then uniformly sample
n_pts = 20
grid_points = zeros((num_orgs,n_pts))
g = mgrid[0.0:1.0:(1./float(n_pts))] #a uniform grid with desired number of sampling points
sampling = map(lambda x: pow(x,1.0/3.0)*100, g) #map uniform grid through weighting function
sampling = abs(array(sampling)-98) #here the sampling points are effectively reversed and shifted
print "Volume sampling points (%):"
print sampling
#Vols for PTV, RECT and BLAD:
for i in range(3):
grid_points[i] = sampling / 100.0
Volume sampling points (%): [ 98. 61.15968501 51.58411166 44.86707154 39.51964524 35.00394751 31.05670499 27.52701268 24.31937003 21.36905676 18.6299474 16.06787294 13.65673347 11.37608947 9.20959983 7.14397036 5.16822333 3.27317628 1.45106154 0.30475725]
Below are 2 hand picked sampling scheme which treats the PTV and OARS differently
n_pts = 12 #points sampled from each organ
grid_points = zeros((num_orgs,n_pts))
#Vols for PTV:
grid_points[0] = array( (99,98,97,96,95,90,50, 5, 4, 3, 2, 1) ) / 100.0
#Vols for RECT:
grid_points[1] = array( (90,70,60,50,40,30,20,10, 9, 7, 5, 1) ) / 100.0
#Vols for BLAD:
grid_points[2] = array( (90,70,60,50,40,30,20,10, 9, 7, 5, 1) ) / 100.0
print "Volume sampling points (%):"
print 100*grid_points
Volume sampling points (%): [[ 99. 98. 97. 96. 95. 90. 50. 5. 4. 3. 2. 1.] [ 90. 70. 60. 50. 40. 30. 20. 10. 9. 7. 5. 1.] [ 90. 70. 60. 50. 40. 30. 20. 10. 9. 7. 5. 1.]]
#another hand picked sampling scheme with fiewer points (for simplicity)
n_pts = 6 #points sampled from each organ
grid_points = zeros((num_orgs,n_pts))
#Vols for PTV:
grid_points[0] = array( (99,95,90,70,50,2) ) / 100.0
#Vols for RECT:
grid_points[1] = array( (90,60,30,20,10,5) ) / 100.0
#Vols for BLAD:
grid_points[2] = array( (90,60,30,20,10,5) ) / 100.0
print "Volume sampling points (%):"
print 100*grid_points
Volume sampling points (%): [[ 99. 95. 90. 70. 50. 2.] [ 90. 60. 30. 20. 10. 5.] [ 90. 60. 30. 20. 10. 5.]]
This makes the final result (when we reconstuct DVH from the PCA vectors) more eye pleasing.
#a uniform sampling scheme with 20 sampling points per organ
n_pts = 20
low_vol = 1.
high_vol = 99.
a = frange(low_v, high_v, (high_v-low_v)/float(n_pts-1))
a = a[::-1] #reversed
grid_points = repeat([a/100.],3,axis=0)
#Plot the grid
grid_points100 = 100*grid_points #this gives %dose units
plot(grid_points100[0],grid_points100[0],'o')
title('Sampling points for Volume (%)')
show()
Below, we resample the patient DVH data using the above volume grid points. No interpolation is used, we simply find the dose value where the DVH volume first drops below the sought volume value.
gridDim = len(grid_points[0])
V_grid = zeros((num_pat,num_orgs*gridDim))
for pat in range(num_pat):
for org in range(num_orgs):
data = patients[pat][org]
for i in range(len(grid_points[org])):
V_grid[pat,org*gridDim+i]=myTools.DoseAtVol(data.dose,data.volume,grid_points[org][i])/Rx_dose
#this defines the indices for each organ in the rows of V_grid
r = (range(0,n_pts),range(n_pts,2*n_pts),range(2*n_pts,3*n_pts))
fig, axes = plt.subplots(1,3,figsize=(18,4))
for pat in range(num_pat):
axes[0].plot( V_grid[pat,r[0]], grid_points100[0],'-m', alpha=.3)
axes[1].plot( V_grid[pat,r[1]], grid_points100[1],'-r', alpha=.3)
axes[2].plot( V_grid[pat,r[2]], grid_points100[2],'-b', alpha=.3)
#axes[i,2].plot([],[],'--k',label='Neg(%i)'% m_idx)
#axes.plot(V_grid[p,:],'.',alpha = 0.5)
axes[0].set_title(patients[0][0].name)
map(lambda x: x.set_xlabel('Dose/RxDose'),axes)
axes[0].set_ylabel('Volume (%)')
axes[1].set_title(patients[0][1].name)
axes[2].set_title(patients[0][2].name)
show()
MatPlotLib has a built in PCA function. Below are the variances (extent of data along given axis) ploted for each of Principal Componant (PC) directions.
from matplotlib.mlab import PCA
V_pca = PCA(V_grid)
figure()
title('PCA Fractions')
plot(V_pca.fracs,'or')
ylabel("Variance")
xlabel("Principal Component")
show()
The plot below gives a hint as to how the data matrix is sorted. Each row represents a patient DVH and the first 1/3 of the columns sample the PTV, the middle 1/3 sample the Rectum, and the last 1/3 sample the Bladder.
The PCA modes represent (unit) directions in the high dimensional data space (after the mean for each column has been subtracted and the standard deviation is divided out).
n_comps = 6
p = V_pca
fig, axes = plt.subplots(figsize=(18,7))
for i in range(n_comps):
axes.plot(p.Wt[i,:],label='PC %i'%i)
axes.plot(zeros(num_orgs*n_pts),':k')
axes.set_xlabel('Volume (%) for PTV (Purple), Rectum (Red), Bladder (Blue)')
axes.set_ylabel('weights/correlations (arb)')
axes.set_title('First %i PCA modes' % n_comps)
axes.legend(loc=0)
#format Ticks
axes.set_xticks(range(3*n_pts))
#make integer axis of Vol axis
l = axes.set_xticklabels(array(concatenate((grid_points100[0],grid_points100[1],grid_points100[2])),int)) #output hidden
junk = map(lambda x: x.set_color('purple'), l[:n_pts] )
junk = map(lambda x: x.set_color('red'), l[n_pts:2*n_pts] )
junk = map(lambda x: x.set_color('blue'), l[2*n_pts:] )
Below we plot the PCA mode in the first column, the componant along the mode for each patient in the second column, and the 'extreemum' DVH (Three patients with the most negative, closest to zero, and most positive componants along the PCA mode)
n_comps = 6
p = V_pca
fig2, axes2 = plt.subplots(n_comps,3,figsize=(18,n_comps*6))
for i in range(n_comps):
axes2[i,0].plot( zeros((len(grid_points[0]),1)),grid_points100[0],':k')
axes2[i,0].plot(p.Wt[i,r[0]],grid_points100[0],'-m',label="PTV")
axes2[i,0].plot(p.Wt[i,r[1]],grid_points100[1],'-r',label="Rectum")
axes2[i,0].plot(p.Wt[i,r[2]],grid_points100[2],'-b',label="Bladder")
axes2[i,0].set_title('Principal Componant Vector # %i' % i)
axes2[i,0].set_ylabel('Volume (%)')
axes2[i,0].set_xlabel('Weight/Correlation (arb)')
axes2[i,0].legend(loc=0)
#plot PC dimension (componants for each patient)
axes2[i,1].plot( p.Y[:,i],p.Y[:,i],'sr',mew=0,ms=20,alpha = 0.15)
axes2[i,1].set_title('PC %i vs. PC %i (squares are patients)' % (i,i))
axes2[i,1].set_xlabel('Weight/Correlation (arb)')
#Find Extremum Patients for PC mode (Most Negative, Closest to Zero, and Most Positive)
neg_idx = find(min(p.Y[:,i]) == p.Y[:,i])
tempArray = map(lambda x: abs(x), p.Y[:,i]) #map absolute value
zero_idx = find(min(tempArray) == tempArray)
pos_idx = find(max(p.Y[:,i]) == p.Y[:,i])
c_fmt = ('m','r','b') #line colors for PTV, Rect, and Blad
axes2[i,2].plot([],[],'--k',label='Neg(%i)'% neg_idx)
axes2[i,2].plot([],[],':k',label='Zero(%i)' % zero_idx)
axes2[i,2].plot([],[],'-k',label='Pos(%i)' % pos_idx)
for org in range(3):
axes2[i,2].plot( patients[neg_idx][org].dose, 100.*array(patients[neg_idx][org].volume),'--'+c_fmt[org])
axes2[i,2].plot( patients[zero_idx][org].dose, 100.*array(patients[zero_idx][org].volume),':'+c_fmt[org])
axes2[i,2].plot( patients[pos_idx][org].dose, 100.*array(patients[pos_idx][org].volume),'-'+c_fmt[org])
axes2[i,2].set_xlim((0,9100))
axes2[i,2].set_ylim((0,110))
axes2[i,2].set_title('Extremum DVH along PC %i ' % i)
axes2[i,2].set_xlabel('Dose (cGy)')
axes2[i,2].set_ylabel('Volume (%)')
axes2[i,2].legend(loc='upper center',ncol=3,frameon=False)
Here we sort through and animate the DVH data along the first three pricipal directions in the data
from matplotlib import animation
from IPython.core.display import HTML
# animation function. This is called sequentially
def animate(i):
pid = pids_sorted[i]
ax.set_title('Pat. %i PC_%i = %s' % (pid,pc_vect,str(p.Y[pid,pc_vect])[:5]))
for org in range(3):
line[org][0].set_data(patients[pid][org].dose, 100.*array(patients[pid][org].volume))
#set up the figure, the axis, and the plot element we want to animate
fig, ax = plt.subplots(figsize=(5,4))
line = []
line.append(ax.plot([], [],'-m',label='PTV'))
line.append(ax.plot([], [],'-r',label='Rectum'))
line.append(ax.plot([], [],'-b',label='Bladder'))
ax.set_xlim((0.0,9000.0))
ax.set_ylim((0.0,101))
ax.set_xlabel('Dose (cGy)')
ax.set_ylabel('Volume (%)')
ax.legend(loc=3)
html_out = ''
for pc_vect in range(3):
#create a sorted vector containing patients for a given mode
pids_sorted = argsort(p.Y[:,pc_vect])
# call the animator. blit=True means only re-draw the parts that have changed.
anim = animation.FuncAnimation(fig, animate, frames=60, interval=5, blit=True)
html_out += myTools.anim_to_html(anim) #+ " <br />"
fig.clf() #this prevents the regular plot from showing
HTML(html_out)
Here I plot the DVH data in the PC space of the first 3 modes.
#3D scatter in PCV (0,1,3) space
from mpl_toolkits.mplot3d.axes3d import Axes3D
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(1, 1, 1, projection='3d')
pY = V_pca.Y
p = ax.scatter(pY[:,0], pY[:,1], pY[:,2],s=60,c='r',marker='o',edgecolors='none',alpha = 0.5)
#maybe try animation later
p = V_pca
proj = p.project(V_grid[0,:])
temp = zeros(proj.shape)
errors = zeros(proj.shape)
n_pats = 60 #number of patients to consider
n_pc = 6 #number of pca modes to include
#placeholders to find max error patient
max_error = 0.0
max_error_row = []
max_error_pca = []
for pat in range(n_pats):
row = V_grid[pat,:]
proj = p.project(row)
temp = zeros(proj.shape)
for i in range(n_pc):
temp = temp + multiply(p.Wt[i,:],proj[i])
#compute error between pca reduced representation
temp_err = abs(temp*p.sigma + p.mu - row)
#compute max error and compare to global
if max(temp_err) > max_error:
#save patient index
max_error_idx = pat
max_error_row = row.copy()
max_error_pca = temp.copy()
errors = errors + temp_err
#plot PCA errors
fig, ax = plt.subplots(1,2,figsize=(16,7))
ax[0].plot(100*errors[r[0]]/float(n_pats),grid_points100[0],':om',label="PTV")
ax[0].plot(100*errors[r[1]]/float(n_pats),grid_points100[0],':or',label="Rectum")
ax[0].plot(100*errors[r[2]]/float(n_pats),grid_points100[0],':ob',label="Bladder")
ax[0].set_title('Average Error with first %i PCA modes' % n_pc)
ax[0].legend(loc='best')
ax[0].set_ylabel('Volume (%)')
ax[0].set_xlabel('Absolute Error (% of RxDose)')
#example Origional and Reduced
dvh_pca = max_error_pca*p.sigma + p.mu
ax[1].plot(Rx_dose * dvh_pca[r[0]],grid_points100[0],'-m',label="PTV")
ax[1].plot(Rx_dose * dvh_pca[r[1]],grid_points100[1],'-r',label="Rectum")
ax[1].plot(Rx_dose * dvh_pca[r[2]],grid_points100[2],'-b',label="Bladder")
#plot(max_error_pca*p.sigma + p.mu)
ax[1].plot(Rx_dose * max_error_row[r[0]],grid_points100[0],'--m')
ax[1].plot(Rx_dose * max_error_row[r[1]],grid_points100[1],'--r')
ax[1].plot(Rx_dose * max_error_row[r[2]],grid_points100[2],'--b')
ax[1].plot([],[],'--k',label='Orig.')
ax[1].plot([],[],'-k',label='Proj.')
ax[1].set_ylabel('Volume (%)')
ax[1].set_xlabel('Dose (cGy)')
ax[1].legend(loc=0,ncol=2)
ax[1].set_title('Worst Max Error (Patient %i)' % max_error_idx)
show()
This is done to understand what features of the DVH are represented by each Pricipal Componant. We start with the mean DVH of the whole data set (for purely illustrative purposes). Each frame represents the average DVH moved along the direction of the PC. This is done by adding some scalar multiplied by the PC vector to the mean DVH vector.
#good ranges for x^2 weighted sampling
pc_range = [] #an array for the range of vector motions
pc_range.append(frange(-5,9,.5)) #for PC_0
pc_range.append(frange(-5,16,.5)) #for PC_1
pc_range.append(frange(-6,6,.5)) #for PC_2
pc_range.append(frange(-4,6,.5)) #for PC_3
pc_range.append(frange(-2,6,.5)) #for PC_4
pc_range.append(frange(-3,6,.5)) #for PC_5
pc_range.append(frange(-6,4,.5)) #for PC_6
#good ranges for uniform samled data
pc_range = [] #an array for the range of vector motions
pc_range.append(frange(-9,6,.5)) #for PC_0
pc_range.append(frange(-8.5,6.5,.5)) #for PC_1
pc_range.append(frange(-3,6,.5)) #for PC_2
pc_range.append(frange(-4,3,.5)) #for PC_3
pc_range.append(frange(-3,2.5,.5)) #for PC_4
pc_range.append(frange(-3,3,.5)) #for PC_5
pc_range.append(frange(-4,4,.5)) #for PC_6
#this just gets the size of the vector right
row = V_grid[pat,:]
proj = p.project(row)
DVH_vect = zeros(proj.shape) #this is actually the "mean" DVH
#the code below will recover the PCA space reconstruction of DVH
#for i in range(n_pc):
# DVH_vect = DVH_vect + multiply(PC_0,proj[i])
#DVH_vect = DVH_vect*p.sigma + p.mu
# animation function. This is called sequentially
def animate(i):
temp_vec = Rx_dose * ( (DVH_vect + pc_range[pc_idx][i] * p.Wt[pc_idx,:])*p.sigma + p.mu )
ax.set_title('PC_%i * %.1f' % (pc_idx,pc_range[pc_idx][i]))
line1.set_data(temp_vec[r[0]], grid_points100[0])
line2.set_data(temp_vec[r[1]], grid_points100[1])
line3.set_data(temp_vec[r[2]], grid_points100[2])
pass
#set up the figure, the axis, and the plot element we want to animate
fig, ax = plt.subplots()#figsize=(10,8))
line1, = ax.plot([], [],'-m',label='PTV')
line2, = ax.plot([], [],'-r',label='Rectum')
line3, = ax.plot([], [],'-b',label='Bladder')
ax.set_xlim((0.0,Rx_dose*1.2))
ax.set_ylim((0.0,100))
ax.set_xlabel('Dose (cGy)')
ax.set_ylabel('Volume (%)')
ax.legend(loc=3)
html_out = ''
for pc_idx in range(len(pc_range)):
# call the animator. blit=True means only re-draw the parts that have changed.
anim = animation.FuncAnimation(fig, animate, frames=len(pc_range[pc_idx]), interval=5, blit=True)
html_out += myTools.anim_to_html(anim)# + " <br />"
fig.clf() #this prevents the regular plot from showing
#HTML(html_out)
HTML(html_out)
Maybe we can use Kevins model (his last paper) to predict the DVH of a new patient. Then we can use that predicted DVH to generate nearby "library" plans by sampling along the PC directions.