# don't forget to open the ipython notebook AFTER you've already started the virtual environment import bq client = bq.Client.Get() import numpy import matplotlib import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.basemap import Basemap from scipy.io import netcdf import os import time !bq query 'SELECT COUNT(float_id) FROM argo.all_data_20140806;' q = 'SELECT COUNT(float_id) FROM argo.all_data_20140806;' total_measurements = client.ReadTableRows(client.Query(q)['configuration']['query']['destinationTable']) print total_measurements print int(total_measurements[0][0]) def Query(q): t0 = time.time() answer = client.ReadTableRows(client.Query(q)['configuration']['query']['destinationTable']) print 'Query time: ' + str(time.time() - t0) + ' seconds.' return answer q = ''' SELECT x, y, z FROM argo.all_data_20140806 LIMIT 10 ''' answer = Query(q) answer q = ''' SELECT bucket, COUNT(z) FROM ( SELECT z, CASE WHEN z >= 0 THEN 1 WHEN z >= -100 AND z < 0 THEN 2 WHEN z >= -200 AND z < -100 THEN 3 WHEN z >= -300 AND z < -200 THEN 4 WHEN z >= -400 AND z < -300 THEN 5 WHEN z >= -500 AND z < -400 THEN 6 WHEN z >= -600 AND z < -500 THEN 7 WHEN z >= -700 AND z < -600 THEN 8 WHEN z >= -800 AND z < -700 THEN 9 WHEN z >= -900 AND z < -800 THEN 10 WHEN z >= -1000 AND z < -900 THEN 11 WHEN z >= -1100 AND z < -1000 THEN 12 WHEN z >= -1200 AND z < -1100 THEN 13 WHEN z >= -1300 AND z < -1200 THEN 14 WHEN z >= -1400 AND z < -1300 THEN 15 WHEN z >= -1500 AND z < -1400 THEN 16 WHEN z >= -1600 AND z < -1500 THEN 17 WHEN z >= -1700 AND z < -1600 THEN 18 WHEN z >= -1800 AND z < -1700 THEN 19 WHEN z >= -1900 AND z < -1800 THEN 20 WHEN z >= -2000 AND z < -1900 THEN 21 ELSE 22 END as bucket FROM argo.all_data_20140806) GROUP BY bucket; ''' depth_slices = Query(q) depth_slices def PlotHistogram(data, B = 1, C = 0): buckets = [float(entry[B]) for entry in data] counts = [int(entry[C]) for entry in data] plt.bar(counts, buckets) plt.show() PlotHistogram(depth_slices) # Depth is derived from pressure, so we check that the corresponding pressure measurement is good data. q = ''' SELECT bucket, COUNT(z) FROM( SELECT z, INTEGER(z / 1000) as bucket FROM argo.all_data_20140806 WHERE z > -2500 and z < 0 and pres_a_qc = 1 ) GROUP BY bucket ''' depth_slices2 = Query(q) depth_slices2 PlotHistogram(depth_slices2) # More buckets! q = ''' SELECT bucket, COUNT(z) FROM ( SELECT z, INTEGER(z / 100) as bucket FROM argo.all_data_20140806 WHERE z > -2500 and z < 0 and pres_a_qc = 1 ) GROUP BY bucket ''' depth_slices3 = Query(q) PlotHistogram(depth_slices3) q = ''' SELECT bucket, COUNT(datetime) FROM ( SELECT datetime, INTEGER(julian_date / 365) as bucket FROM argo.all_data_20140806 WHERE julian_date > 15000 and julian_date < 35000 ) GROUP BY bucket ''' time_slices = Query(q) PlotHistogram(time_slices) # Overall temperatures q = ''' SELECT bucket, COUNT(temp_a) FROM ( SELECT temp_a, INTEGER(temp_a / 1) as bucket FROM argo.all_data_20140806 WHERE temp_a > -10 and temp_a < 60 and temp_a_qc = 1 and pres_a_qc = 1 ) GROUP BY bucket ''' temp_slices = Query(q) PlotHistogram(temp_slices) # Shallow temperatures q = ''' SELECT bucket, COUNT(temp_a) FROM ( SELECT temp_a, INTEGER(temp_a / 1) as bucket FROM argo.all_data_20140806 WHERE temp_a > -10 and temp_a < 60 and temp_a_qc = 1 and z < 0 and z > -20 and pres_a_qc = 1 ) GROUP BY bucket ''' temp_slices_shallow = Query(q) PlotHistogram(temp_slices_shallow) # Deep temperatures q = ''' SELECT bucket, COUNT(temp_a) FROM ( SELECT temp_a, INTEGER(temp_a) as bucket FROM argo.all_data_20140806 WHERE temp_a > -10 and temp_a < 60 and temp_a_qc = 1 and z < -1500 and z > -1600 and pres_a_qc = 1 ) GROUP BY bucket ''' temp_slices_deep = Query(q) PlotHistogram(temp_slices_deep) def PlotMap(data, map_rotation = 180, marker_size = 2, X = 0, Y = 1, VALUES = 2): lon = [float(row[X]) for row in data] lat = [float(row[Y]) for row in data] values = [float(row[VALUES]) for row in data] # if the range will be too small with ints, then make the range bigger if (max(values) - min(values)) < 100: values = [(value*100.) for value in values] values_int = [int(value) for value in values] m1 = Basemap(projection='hammer',lon_0=map_rotation) x, y = m1(lon,lat) m1.drawmapboundary(fill_color='#ffffff') m1.fillcontinents(color='#cc9966',lake_color='#ffffff') m1.scatter(x,y,marker_size,marker='o',c=values_int, cmap = cm.jet, lw=0) plt.show() q = ''' SELECT x, y, temp_a, rand() as random FROM argo.all_data_20140806 WHERE temp_a >= 0 and temp_a < 60 and temp_a_qc = 1 and z < -1500 and z > -1600 and pres_a_qc = 1 and x is not NULL and y is not NULL ORDER BY random LIMIT 100000 ''' deep_temps = Query(q) PlotMap(deep_temps, map_rotation = 70) q = ''' SELECT x, y, temp_a FROM argo.all_data_20140806 WHERE temp_a >= 4 and temp_a < 60 and temp_a_qc = 1 and z < -1500 and z > -1600 and pres_a_qc = 1 and x is not NULL and y is not NULL ''' high_deep_temps = Query(q) PlotMap(high_deep_temps, map_rotation = 70) q = ''' SELECT float_id, COUNT(*) FROM argo.all_data_20140806 WHERE temp_a_qc = 1 and temp_a IS NOT NULL and x IS NOT NULL and y IS NOT NULL GROUP BY float_id ''' valid_temps = Query(q) max_valid = 0 fid_with_max = None for entry in valid_temps: if entry[1] > max_valid: max_valid = int(entry[1]) fid_with_max = str(entry[0]) print max_valid print fid_with_max q = ''' SELECT bucket, COUNT(z) FROM ( SELECT *, INTEGER(z / 100) as bucket FROM argo.all_data_20140806 WHERE float_id = 5900432 and temp_a_qc = 1 and temp_a IS NOT NULL and x IS NOT NULL and y IS NOT NULL and pres_a_qc = 1 and z IS NOT NULL ) GROUP BY bucket ''' single_float_depth_slices = Query(q) PlotHistogram(single_float_depth_slices) q = ''' SELECT bucket, COUNT(temp_a) FROM ( SELECT *, INTEGER(temp_a) as bucket FROM argo.all_data_20140806 WHERE float_id = 5900432 and temp_a_qc = 1 and temp_a IS NOT NULL and x IS NOT NULL and y IS NOT NULL and pres_a_qc = 1 ) GROUP BY bucket ''' single_float_temp_slices = Query(q) PlotHistogram(single_float_temp_slices) q = ''' SELECT bucket, COUNT(temp_a) FROM ( SELECT *, INTEGER((temp_a * 2) / 1) as bucket FROM argo.all_data_20140806 WHERE float_id = 5900432 and temp_a_qc = 1 and temp_a IS NOT NULL and x IS NOT NULL and y IS NOT NULL and z < 0 and z > -20 and pres_a_qc = 1 ) GROUP BY bucket ''' single_float_shallow_temp_slices = Query(q) PlotHistogram(single_float_shallow_temp_slices) q = ''' SELECT bucket, COUNT(temp_a) FROM ( SELECT *, INTEGER((temp_a * 2) / 1) as bucket FROM argo.all_data_20140806 WHERE float_id = 5900432 and temp_a_qc = 1 and x IS NOT NULL and y IS NOT NULL and z < -600 and z > -2000 and pres_a_qc = 1 ) GROUP BY bucket ''' single_float_deep_temp_slices = Query(q) PlotHistogram(single_float_deep_temp_slices) q = ''' SELECT x, y, temp_a FROM ( SELECT * FROM argo.all_data_20140806 WHERE float_id = 5900432 and temp_a_qc = 1 and temp_a IS NOT NULL and x IS NOT NULL and y IS NOT NULL ) ''' single_float_temps = Query(q) PlotMap(single_float_temps, marker_size = 20) q = ''' SELECT QUARTER(datetime) as quar, COUNT(*) FROM argo.all_data_20140806 GROUP BY quar ''' measurements_by_quarter = Query(q) PlotHistogram(measurements_by_quarter) by_quarter = {'1': None, '2': None, '3': None, '4': None} for quarter in by_quarter: q = ''' SELECT x, y, temp_a, rand() as random FROM argo.all_data_20140806 WHERE temp_a_qc = 1 and temp_a IS NOT NULL and x IS NOT NULL and y IS NOT NULL and z < 0 and z > -20 and pres_a_qc = 1 and QUARTER(datetime) = ''' + quarter + ''' ORDER BY random LIMIT 10000 ''' by_quarter[quarter] = Query(q) for quarter in sorted(by_quarter.keys()): print quarter PlotMap(by_quarter[quarter], marker_size = 10) by_depth = {'/*01*/ z < -1750 z >= -2000': None, '/*02*/ z < -1500 z >= -1750': None, '/*03*/ z < -1250 and z >= -1500': None, '/*04*/ z < -1000 and z >= -1250': None, '/*05*/ z < -750 and z >= -1000': None, '/*06*/ z < -500 and z >= -750': None, '/*07*/ z < -250 and z >= -500': None, '/*08*/ z < -50 and z >= -250': None, '/*09*/ z < -50 and z >= 0': None} for depth in by_depth: q = ''' SELECT x, y, temp_a, rand() as random FROM argo.all_data_20140806 WHERE temp_a_qc = 1 and temp_a IS NOT NULL and x IS NOT NULL and y IS NOT NULL and pres_a_qc = 1 and ''' + depth + ''' ORDER BY random LIMIT 10000 ''' try: by_depth[depth] = Query(q) except: print depth + ' not enough points?' by_depth[depth] = [[0, 0, 0]] for depth in sorted(by_depth.keys()): if len(by_depth[depth]) > 1: print depth PlotMap(by_depth[depth], marker_size = 10, map_rotation = 70) q = ''' SELECT COUNT(*) FROM argo.all_data_20140806 WHERE pres_a_qc = 1 and (pres_a >= 99999) ''' print (Query(q)) q = ''' SELECT COUNT(*) FROM argo.all_data_20140806 WHERE temp_a_qc = 1 and (temp_a >= 99999) ''' print (Query(q)) q = ''' SELECT COUNT(*) FROM argo.all_data_20140806 WHERE psal_a_qc = 1 and (psal_a >= 99999) ''' print (Query(q)) q = ''' SELECT COUNT(*) FROM argo.all_data_20140806 WHERE doxy_a_qc = 1 and (doxy_a >= 99999) ''' print (Query(q))