#!/usr/bin/env python
# coding: utf-8

# # Getting Started with RasterFrames Notebook

# ## Setup Spark Environment

# In[1]:


import pyrasterframes
from pyrasterframes.utils import create_rf_spark_session
import pyrasterframes.rf_ipython  # enables nicer visualizations of pandas DF
from pyrasterframes.rasterfunctions import *
import pyspark.sql.functions as F


# In[2]:


spark = create_rf_spark_session()


# ### Get a PySpark DataFrame from [open data](https://docs.opendata.aws/modis-pds/readme.html)
# 
# Read a single "granule" or scene of MODIS surface reflectance data. 

# In[3]:


uri = 'https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059' \
      '/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF'
df = spark.read.raster(uri)


# In[4]:


df.printSchema()


# Do some work with the raster data; add 3 element-wise to the pixel/cell values and show some rows of the DataFrame.

# In[5]:


df.select(rf_local_add(df.proj_raster, F.lit(3)))


# The extent struct tells us where in the [CRS](https://spatialreference.org/ref/sr-org/6842/) the tile data covers. The granule is split into arbitrary sized chunks. Each row is a different chunk. Let's see how many.
# 
# Side note: you can configure the default size of these chunks, which are called Tiles, by passing a tuple of desired columns and rows as: `raster(uri, tile_dimensions=(96, 96))`. The default is `(256, 256)`

# In[6]:


df.count()


# What area does the DataFrame cover?

# In[7]:


crs = df.agg(F.first(rf_crs(df.proj_raster)).crsProj4.alias('crs')).first()['crs']
print(crs)
coverage_area = df.select(
                           df.proj_raster_path,
                           st_reproject(
                               st_geometry(rf_extent(df.proj_raster)), 
                               rf_mk_crs(crs), 
                               rf_mk_crs('EPSG:4326')).alias('footprint')
                         )
coverage_area


# So where in the world is that? We'll generate a little visualization with Leaflet in the notebook using Folium.

# In[8]:


import geopandas
import folium


# In[9]:


gdf = geopandas.GeoDataFrame(
        coverage_area.select('footprint').toPandas(), 
        geometry='footprint', crs={'init':'EPSG:4326'}) 


# In[ ]:


folium.Map((5, -65), zoom_start=6) \
    .add_child(folium.GeoJson(gdf.__geo_interface__))


# Look at a sample of the data. You may find it useful to double-click the tile image column to see larger or smaller rendering of the image.

# In[11]:


#Look at a sample
pandas_df = df.select(
    df.proj_raster_path,
    rf_extent(df.proj_raster).alias('extent'),
    rf_geometry(df.proj_raster).alias('geo'),
    rf_tile(df.proj_raster).alias('tile'),
).limit(5).toPandas()
pandas_df


# In[ ]: