#!/usr/bin/env python # coding: utf-8 # # PhenoCam - Extracting Image Statistics over a Region of Interest (ROI) # # Here's an iPython notebook to demonstrate the code used in getting stats from an image and ROI mask pair. Let's start by reading in an image and mask. The routine processing code is written in python but we've compared similar processing in R, and MATLAB with comparable results. # In[1]: from PIL import Image, ImageOps import numpy as np import matplotlib.pyplot as plt from datetime import datetime, timedelta get_ipython().run_line_magic('matplotlib', 'inline') test_image = 'harvard_2008_08_24_120140.jpg' test_mask = 'harvard_DB_0001_01.tif' # read in mask image and convert to nparray mask_img = Image.open(test_mask) roimask = np.asarray(mask_img, dtype=np.bool8) # read in canopy image img = Image.open(test_image) img.load() # create an image with alpha mask rev_mask_img = ImageOps.invert(mask_img) alpha_data = rev_mask_img.getdata() rev_mask_img.putdata(alpha_data,.4,150) imga = img.copy() fig = plt.figure(figsize=[8,8]) imga.putalpha(rev_mask_img) plt.imshow(imga) # The quantity we use for phenological studies is the "green chromatic coordinate" or "gcc" value. This is defined as: # # $$ gcc = \frac{g_{mean}}{r_{mean} + g_{mean} + b_{mean}} $$ # # To further characterize the distribution of color over the ROi we also calculate the percentiles of the distributions of the digital number (DN) values for each color plane. The correlations coefficients between the DN's of the different color planes are also calculated. Here is the function used in standard processing routines. # In[2]: def get_roi_stats(im, roimask): """ Function to return a collection of stats for DN values for an image / mask pair. """ # split into bands (im_r, im_g, im_b) = im.split() # create numpy arrays with bands r_array = np.asarray(im_r, dtype=np.int16) g_array = np.asarray(im_g, dtype=np.int16) b_array = np.asarray(im_b, dtype=np.int16) # try applying mask to red image ... if mask and image don't # have same size this will raise an exception. try: r_ma = np.ma.array(r_array,mask=roimask) except: errstr = "Error applying mask to image file.\n" sys.stderr.write(errstr) return None # make masked arrays for G,B g_ma = np.ma.array(g_array,mask=roimask) b_ma = np.ma.array(b_array,mask=roimask) # find mean, std r_vals = r_ma.compressed() r_mean = r_vals.mean() r_diff = np.float64(r_vals) - r_mean r_std = np.sqrt(np.dot(r_diff, r_diff)/r_vals.size) g_vals = g_ma.compressed() g_mean = g_vals.mean() g_diff = np.float64(g_vals) - g_mean g_std = np.sqrt(np.dot(g_diff, g_diff)/g_vals.size) b_vals = b_ma.compressed() b_mean = b_vals.mean() b_diff = np.float64(b_vals) - b_mean b_std = np.sqrt(np.dot(b_diff, b_diff)/b_vals.size) # calculate percentiles for each array r_pcts = np.percentile(r_vals, (5., 10., 25., 50., 75., 90., 95.)) g_pcts = np.percentile(g_vals, (5., 10., 25., 50., 75., 90., 95.)) b_pcts = np.percentile(b_vals, (5., 10., 25., 50., 75., 90., 95.)) # calculate covariance rg_cov = np.dot(r_diff, g_diff)/r_diff.size gb_cov = np.dot(g_diff, b_diff)/g_diff.size br_cov = np.dot(b_diff, r_diff)/b_diff.size # correlation coefficients RG_cor = rg_cov/(r_std * g_std) GB_cor = gb_cov/(g_std * b_std) BR_cor = br_cov/(b_std * r_std) # return list of values return [{'mean': r_mean, 'stdev': r_std, 'percentiles': r_pcts}, {'mean': g_mean, 'stdev': g_std, 'percentiles':g_pcts}, {'mean': b_mean, 'stdev': b_std, 'percentiles': b_pcts}, RG_cor, GB_cor, BR_cor] # In[3]: # call stats function stats = get_roi_stats(img, roimask) stats # To get a better idea of what these numbers mean let's repeat the above without using a function. That way we can examine some of the intermediate steps. In particular it would be nice to examine the distribution of DN values over the ROI for each of the color planes. # In[4]: # split into bands (im_r, im_g, im_b) = img.split() # create numpy arrays with bands r_array = np.asarray(im_r, dtype=np.float64) g_array = np.asarray(im_g, dtype=np.float64) b_array = np.asarray(im_b, dtype=np.float64) brt_array = r_array + g_array + b_array # apply mask to brightness image brt_ma = np.ma.array(brt_array,mask=roimask) # make masked arrays for R,G,B g_ma = np.ma.array(g_array,mask=roimask) r_ma = np.ma.array(r_array,mask=roimask) b_ma = np.ma.array(b_array,mask=roimask) # Basically, all I've done so far is split the image into R,G and B components and created a ```numpy``` "masked array". For simple stats, like ```mean```, used in the previous calculations we could just use masked array functions. Here I'm going to grab the un-masked values and put them in 1-d arrays. This also makes it easier to do the plotting. # In[5]: # first let's get percentiles for R, G and B nvalues = g_ma.size r_values = r_ma.compressed() g_values = g_ma.compressed() b_values = b_ma.compressed() print "Number of pixels in image: {0}".format(nvalues) print "Number of masked pixels: {0}".format(len(g_values)) # calculate mean, median and std-dev for R, G, B r_mean = np.mean(r_values) r_median = np.median(r_values) r_var = np.var(r_values) r_std = np.std(r_values) g_mean = np.mean(g_values) g_median = np.median(g_values) g_var = np.var(g_values) g_std = np.std(g_values) b_mean = np.mean(b_values) b_median = np.median(b_values) b_var = np.var(b_values) b_std = np.std(b_values) # calculate percentiles for each array r_percentiles = np.percentile(r_values, (5., 10., 25., 50., 75., 90., 95.)) g_percentiles = np.percentile(g_values, (5., 10., 25., 50., 75., 90., 95.)) b_percentiles = np.percentile(b_values, (5., 10., 25., 50., 75., 90., 95.)) # histogram the value for R, G, B fig = plt.figure(figsize=[20,6]) r_95 = r_percentiles[-1] r_low = r_mean - r_std r_high = r_mean + r_std plt.subplot(1,3,1) plt.hist(r_values,20,color='r') plt.plot([r_mean, r_mean], [0,60000], 'k--', lw=2) plt.plot([r_low, r_low], [0,60000], 'k:', lw=2) plt.plot([r_high, r_high], [0,60000], 'k:', lw=2 ) plt.text(150,50000,'mean: {0:.2f}'.format(r_mean), size=14) plt.text(150,47000,'std. dev.: {0:.2f}'.format(r_std), size=14) plt.xlim([0,255]) g_95 = g_percentiles[-1] g_low = g_mean - g_std g_high = g_mean + g_std plt.subplot(1,3,2) plt.hist(g_values,20,color='g') plt.plot([g_mean, g_mean], [0,60000], 'k--', lw=2) plt.plot([g_low, g_low], [0,60000], 'k:', lw=2) plt.plot([g_high, g_high], [0,60000], 'k:', lw=2 ) plt.text(150,50000,'mean: {0:.2f}'.format(g_mean), size=14) plt.text(150,47000,'std. dev.: {0:.2f}'.format(g_std), size=14) plt.xlim([0,255]) b_95 = b_percentiles[-1] b_low = b_mean - b_std b_high = b_mean + b_std plt.subplot(1,3,3) plt.hist(b_values,20,color='b') plt.plot([b_mean, b_mean], [0,70000], 'k--', lw=2) plt.plot([b_low, b_low], [0,70000], 'k:', lw=2) plt.plot([b_high, b_high], [0,70000], 'k:', lw=2 ) plt.text(150,59000,'mean: {0:.2f}'.format(b_mean), size=14) plt.text(150,55000,'std. dev.: {0:.2f}'.format(b_std), size=14) plt.xlim([0,255]) print r_mean, r_std, r_median, r_var, r_percentiles print g_mean, g_std, g_median, g_var, g_percentiles print b_mean, b_std, b_median, b_var, b_percentiles # Visualizing the correlation coefficients is probably best done by showing a scatterplot of DN values for the pixels in the ROI. # In[6]: # calculate the covariance matrix between pairs (R,G), (R,B), (G,B) rg_cov = np.cov(r_values, g_values) rb_cov = np.cov(r_values, b_values) gb_cov = np.cov(g_values, b_values) fig = plt.figure(figsize=[16,4]) plt.subplot(1,3,1) plt.scatter(r_values,g_values,s=2,color='k',alpha=.2,edgecolor='none') plt.xlim([0,255]) plt.ylim([0,255]) plt.xlabel('R') plt.ylabel('G') plt.subplot(1,3,2) plt.scatter(r_values,b_values,s=2,color='k',alpha=.2,edgecolor='none') plt.xlim([0,255]) plt.ylim([0,255]) plt.xlabel('R') plt.ylabel('B') plt.subplot(1,3,3) plt.scatter(g_values,b_values,s=2,color='k',alpha=.2,edgecolor='none') plt.xlim([0,255]) plt.ylim([0,255]) plt.xlabel('G') plt.ylabel('B') RG_cov = rg_cov[0,1] RB_cov = rb_cov[0,1] GB_cov = gb_cov[0,1] RG_cor = RG_cov/(r_std * g_std) RB_cor = RB_cov/(r_std * b_std) GB_cor = GB_cov/(g_std * b_std) print 'rg_cov: {0} rb_cov: {1} gb_cov: {2}'.format(RG_cov, RB_cov, GB_cov) print 'rg_cor: {0} rb_cor: {1} gb_cor: {2}'.format(RG_cor, RB_cor, GB_cor) # # Questions or Suggestions # # If you have questions about how the statistics are calculated please feel free to contact me (Tom Milliman) at thomas.milliman@unh.edu. We would also welcome any suggestions on image processing or comments on our standard products. # ### References # # Here's a [link](http://stackoverflow.com/questions/19391149/numpy-mean-and-variance-from-single-function) to a stackoverflow posting which discusses speeding up calculations of mean and variance. I used this discussion to help speed up calculations. # In[ ]: