#!/usr/bin/env python
# coding: utf-8

# This notebook illustrates two blogposts (in French) :
# - "Peut-on toujours atteindre une maternité en 30 minutes ?" https://www.smalsresearch.be/peut-on-toujours-atteindre-une-maternite-en-30-minutes/
# - [Blog post to be published in September 2021]

# Requires same packages as in "Spatial_Join" notebook, plus : 
#     
#     - conda install -c conda-forge dash
#     - conda install -c conda-forge -c plotly jupyter-dash
#     - conda install descartes
#     - conda install openpyxl
#     - conda install -c conda-forge flexpolyline
#     - conda install -c conda-forge python-kaleido
# 

# In[3]:


get_ipython().run_line_magic('matplotlib', 'inline')


# In[4]:


import urllib
import time
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Polygon
import contextily as ctx
import shapely
from shapely.geometry import shape
from pyproj import CRS

import os

import requests

import matplotlib.pyplot as plt

from tqdm.auto import tqdm, trange
import json
tqdm.pandas()


# In[5]:


import shapely
import plotly.express as px
import dash
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.graph_objects as go

import flexpolyline as fp


# In[6]:


from credentials import here_api_key, here_app_code, here_app_id


# In[7]:


crs =     'epsg:3857'
osm_crs=  'epsg:4326'


# In[8]:


try : os.mkdir("output")
except FileExistsError: pass


# In[9]:


from_files = True 
# If False, Here API is called for geocoding and isolines, and result is stored in a file
# If True, API is not called, data is retreived from the file and plots are produced


# In[10]:


add_options={}
option=None


# In[11]:


dataset=  "smur"
# dataset = "maternity"
# dataset = "vaccin"


# In[12]:


# Uncomment to two following lines to add traffic
# add_options = {"departureTime" : "2021-05-28T14:00:00+01:00"} # with heavy traffic
# option="traffic"
# # Could also be {"transportMode":"pedestrian"}


# # Functions
# 
# ## Geocoding

# In[13]:


def get_geocode(addr):
    """
    Geocode an address with Here API

    Parameters
    ----------
    addr: str
       address to geocode
    filename: str
       local file to save

    Returns
    -------

    (lat, lon): (float, float)
        geographical coordinates (in epsg:4326)
    """

    host_name = "geocoder.api.here.com"
    
    params = urllib.parse.urlencode({"app_id": here_app_id,
                                     "app_code": here_app_code,
                                    "searchtext":addr,
                                    })
    
    url = f"https://{host_name}/6.2/geocode.json?{params}"
    
#     print(url)
    with urllib.request.urlopen(url) as response:
        res = response.read()
        res = json.loads(res)
        r = res["Response"]["View"][0]["Result"][0]["Location"]["DisplayPosition"]
        return r["Latitude"], r["Longitude"]


# In[14]:


def geocode_points(starts):
    """
    Geocode all addresses in a geopandas GeoDataFrame. Add columns "lat", "lon" (in epsg:4326), 
    as well as "geometry" (in epsg:3857)

    Parameters
    ----------
    starts: geopandas.GeoDataFrame
       a dataframe which contains a column "address"

    Returns
    -------
        geopandas.GeoDataFrame, with 3 additionnal columns : "lat", "lon", and "geometry"
    """
    starts[["lat", "lon"]] = starts.address.progress_apply(get_geocode).apply(pd.Series)
    # Convert lat/long in xy points
    starts= starts.set_geometry(gpd.points_from_xy(x = starts["lon"], y=starts["lat"]))
    
    # Set CRS
    starts.crs = CRS(osm_crs)
    starts = starts.to_crs(CRS(crs))
    return starts


# ## Isolines

# In[15]:


def get_isoline(lat, lon, range_list, add_options={}):
    """
    Get isolines 'lat, lon' as origin points, for all delays given in 'range_list' in minutes.
    Call Here isoline API, with some default parameters that can be overwritten or enriched with 'add_options'

    If 'Too Many Requests' error is received from Here, wait for 15 seconds and retries
    
    Parameters
    ----------
    lat: float
        Latitude (in epsg:4326)
    lon: float
        Longitude (in epsg:4326)
    range_list: list
        List of range values (in minutes)
    add_options : dict
        Example:  {"departureTime" : "2021-05-28T14:00:00+01:00", "transportMode":"pedestrian"}
    
    Returns
    -------
        A dict, with keys from range_list, and data is a list of coordinates tuple. 
        Example : 
            {10.0: [(50.818634, 4.257374),
                    (50.818291, 4.258232),
                    (50.817604, 4.258919),
                    (...)],
             15.0: (50.821381, 4.266815),
                   (50.821037, 4.267845),
                   (50.820351, 4.268532),
                   (...)]}
    """

    
    host_name = "isoline.router.hereapi.com"
    
    
    params = {"apiKey": here_api_key,
                "transportMode":f"car", 
                "departureTime":"any", # No traffic dependent
                "origin" : f"{lat},{lon}",
                "range[values]" :",".join([str(rng*60) for rng in range_list]),
                "range[type]":"time"#  "distance"
                }
    for k in add_options:
        params[k] = add_options[k]
        
    params = urllib.parse.urlencode(params)
    
    url = f"https://{host_name}/v8/isolines?{params}"
    
#     print(url)
    try: 
        with urllib.request.urlopen(url) as response:
            res = response.read()
            res = json.loads(res)
#             print(res)
    #         return res
            for it in res["isolines"]:
                if len(it["polygons"]) != 1:
                    print("More than 1 polygon!!")

            return {it["range"]["value"]/60: fp.decode(it["polygons"][0]["outer"]) for it in res["isolines"]} #["isoline"][0]["component"][0]["shape"]
    except urllib.error.HTTPError as ex:
        if ex.getcode() == 429:  #Too Many Requests
            print("Too many requests, wait for 15 seconds...")
            time.sleep(15)
            return get_isoline(lat, lon, range_list)
        else:
            print(ex)
            print(type(ex))
            print(url)
            return None


# In[16]:


# get_isoline(50.83458, 4.33639, [10])


# In[17]:


def get_isolines(starts, range_list, add_options={}):
    """
    From the output of geocode_points(starts), call get_isoline for each record, with range_list and add_options as parameter.
    
    Note: does NOT support multi-component isolines (https://developer.here.com/documentation/isoline-routing-api/8.4.0/dev_guide/topics/multi-component-isoline.html)
    
    Parameters
    ----------
    start: geopandas.GeoDataFrame
        output of geocode_points(starts)
    range_list: list
        list of range values (in minutes)
    add_options : dict
        Example:  {"departureTime" : "2021-05-28T14:00:00+01:00", "transportMode":"pedestrian"}
    
    Returns
    -------
    geopandas.GeoDataFrame
        A geopandas.GeoDataFrame, with starts.shape[0] * len(range_list) record. For each starting point and each range, 
        "geometry" columns is a polygon with the corresponding isoline
        

    """

    isolines= starts.copy()
    isolines["isolines"] = isolines.progress_apply(lambda row: get_isoline(row["lat"], row["lon"], range_list, add_options), axis=1)
    
    
    for rng in range_list:
        isolines[rng] = isolines["isolines"].apply(lambda x: x[rng])
    
    isolines =  isolines.drop(["geometry", "isolines"], axis=1)
    cols= isolines.drop(range_list, axis=1).columns
    
    isolines = isolines.set_index(list(cols)).stack().rename("isoline").reset_index().rename(columns={"level_7": "range",
                                                                                                      "level_8": "range",
                                                                                                      "level_9": "range",
                                                                                                     })

    # Convert isolines to polygons
    isolines["geometry"] = isolines["isoline"].apply(lambda row:Polygon(map(lambda tpl: (tpl[1], tpl[0]), row)))
    isolines = isolines.set_geometry("geometry")

    isolines.crs = CRS(osm_crs)
    isolines = isolines.to_crs(CRS(crs))
    return isolines.drop("isoline", axis=1)


# In[ ]:


# In[18]:


def get_isolines_grouped(isolines, subsets):
    """
    From the output of get_isolines(starts), group all isolines for the same range, making a (multi)polygon from all the 
    (single starting points) polygons
    
    Parameters
    ----------
    isolines: geopandas.GeoDataFrame
        output of get_isolines
    subsets: list
        list of scenarii (corrsponding to boolean columns in isolines dataframe)
    
    Returns
    -------
    geopandas.GeoDataFrame
        A geopandas.GeoDataFrame, with len(subsets) * len(range_list) record. For each scenario point and each range, 
        "geometry" columns is a (mutli)polygon with the corresponding reachable region

    """

    
    isolines_grouped = []
    for subset in subsets:
        gr = isolines[isolines[subset]].groupby("range").apply(lambda bloc: bloc.unary_union).rename("geometry").reset_index()
        gr["subset"]=subset
        isolines_grouped.append(gr)
        
    isolines_grouped = gpd.GeoDataFrame(pd.concat(isolines_grouped))
    isolines_grouped.crs = CRS(crs)

    isolines_grouped["label"] = isolines_grouped.apply(lambda x: f"{x['subset']} ({x['range']})", axis=1)
    
    return isolines_grouped


# In[19]:


# get_isolines_grouped(isolines, subsets)


# ## Plot

# In[20]:


def add_basemap(ax, alpha=None):
    """
    Add a basemap on a plot. Tries first default (Stamen) basemap. If errors, tries OpenStreetMap.Mapnik
    
    Parameters
    ----------
    ax: matplotlib axes
    
    Returns
    -------
        None
    """

    try: 
        ctx.add_basemap(ax, alpha=alpha)
    except requests.HTTPError:
        print("Default basemap doesn't work...")
        ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, alpha=alpha)


# In[96]:


def disjoin_labels(geo_df):
    """
    From the output of get_isolines_grouped, filtred on a unique range (we should then have len(subsets) records, 
    extract the overlapping regions.
    
    Example :
     geo_df= [["subs1", geom1],
              ["subs2", geom2]]
    if geom2 in full included in geom1, output will be: 
        [["subs1",          geom1-geom2],
         ["all scenarii",   geom2]]
         
    if there is an ovelapping region "common":
        [["subs1",        geom1-common],
         ["subs2",        geom2-common],
         ["all scenarii", common]]
    
    If more than 2 subsets, extracts also overlappings two by two
    Parameters
    ----------
    geo_df: geopandas.GeoDataFrame
    
    Returns
    -------
        None
    """

    ## Find global common zone (common to all zones)
    
    common_zone =  geo_df.iloc[[0]]
    
    for i in range(1, geo_df.shape[0]):
        common_zone = gpd.overlay(common_zone, 
                              geo_df.iloc[[i]], 
                              how="intersection", 
                              keep_geom_type=False)
    common_zone = common_zone.geometry
    
    if common_zone.shape[0]>0:
        common_zone_s = gpd.GeoSeries([common_zone.iloc[0] for it in geo_df.iterrows()])
        common_zone_s.crs= geo_df.crs


        geo_df = geo_df.reset_index(drop=True).copy()

        geo_df["common_zone"] = common_zone_s
        try: 
            geo_df["geometry"]= geo_df.geometry.difference(geo_df["common_zone"])#.plot()
        except shapely.errors.TopologicalError as e:
            print(e)
            print("Trying with buffer(0)...")
            geo_df["geometry"]= geo_df.geometry.buffer(0).difference(geo_df["common_zone"])
            
            #return geo_df#.drop("common_zone", axis=1)

        m_range =  geo_df.range.iloc[0]

        geo_df = pd.concat([geo_df, gpd.GeoDataFrame([{"subset": "all scenarii", 
                                "label" : f"all scenarii ({m_range})", 
                                "geometry":common_zone.iloc[0], 
                                "range": m_range,
                               "reach_ratio":0 # not correct!
                               }])]).drop("common_zone", axis=1).copy()
    ## Find one2one (remaining) overlaps (only if len(subsets)>2)
    
    geo_df = gpd.GeoDataFrame(geo_df)
    
    
    sh = geo_df.shape[0]
    for i in range(sh):
        for j in range(i+1, sh):
            
            common_zone = geo_df.iloc[i].geometry.intersection(geo_df.iloc[j].geometry)
            
            if geo_df.iloc[i].geometry.area > 0 and common_zone.area/geo_df.area.max() > 0.01:
                
                geo_df["geometry"].iloc[i] = geo_df.iloc[i].geometry.difference(common_zone)
                geo_df["geometry"].iloc[j] = geo_df.iloc[j].geometry.difference(common_zone)
                
                geo_df = pd.concat([geo_df, gpd.GeoDataFrame([{"subset": f"{geo_df.iloc[i].subset} + {geo_df.iloc[j].subset}", 
                                "label" : f"{geo_df.iloc[i].subset} + {geo_df.iloc[j].subset} ({m_range})", 
                                "geometry":common_zone, 
                                "range": m_range,
                               "reach_ratio":0 # not correct!
                               }])]).copy()
                #common_zone.plot()
            
    geo_df = geo_df[~geo_df.geometry.is_empty & (geo_df.area/geo_df.area.max() > 0.01)]
    return geo_df


# In[110]:


# from matplotlib import cm
from matplotlib.colors import ListedColormap
def plot_isolines(isolines_grouped, starts, m_range, boundary):

    cmap_st = ListedColormap( [f"C{i}" for i in range(starts.label.nunique())])

    ax=starts.sort_values("label").plot("label", cmap=cmap_st, figsize=(10, 10), legend=(m_range is None))
    boundaries.boundary.plot(ax=ax, color="black")

    if m_range is not None: 
        geo_df = isolines_grouped[(isolines_grouped.range == m_range)]

        geo_df = disjoin_labels(geo_df).sort_values("label")
        cmap =    ListedColormap( [f"C{i}" for i in range(geo_df.label.nunique())])

        ax=  geo_df.plot("label",  ax=ax, alpha=0.2, cmap=cmap)
    #     ax = geo_df.plot("label", ax=ax, facecolor="none", edgecolor="k", cmap=cmap, legend=True)
        ax = geo_df.plot("label", ax=ax, facecolor="none", cmap=cmap, legend=True)

#     boundaries.boundary.plot(ax=ax, color="C2", linewidth=5)
    
    minx, miny, maxx, maxy = boundaries.total_bounds
    width = maxx-minx
    height = maxy-miny
    ax.set_xlim(minx - width/10,  maxx + width/10)
    ax.set_ylim(miny - height/10, maxy + height/10)
    
    ax.set_title(f"Reached zone ({dataset}{', '+option if option else ''})" + ("" if m_range is None else f" - range: {m_range} minutes"))
    add_basemap(ax)#, alpha=0.8)
    
    return ax


# In[23]:


# ax= isolines[isolines.range == minute_range[5]].plot("name", alpha=0.8, figsize=(10, 10), legend=True)
# boundaries.boundary.plot(ax=ax, color="C2", linewidth=5)
# add_basemap(ax)

# ax = plot_isolines(isolines_grouped, starts, minute_range[10], boundaries)
# plt.axis("off")


# In[24]:


# ax = plot_isolines(isolines_grouped, starts, minute_range[1], boundaries)


# In[25]:


def plot_isolines_px(isolines_grouped, starts, m_range, boundary):
    geo_df = isolines_grouped[(isolines_grouped.range == m_range)]
    
    geo_df.crs = CRS(crs)
    geo_df = geo_df.to_crs(osm_crs)
    
    bnd = boundaries.copy()
    bnd.crs = CRS(crs)
    bnd = bnd.to_crs(osm_crs)

#     display(geo_df)
    geo_df = disjoin_labels(geo_df)
    
    #display(geo_df)
    fig = px.choropleth_mapbox(geo_df, 
                           geojson=geo_df.geometry, 
                           locations=geo_df.index, 
                           hover_name="label",
                           hover_data=["label"],
                           color=geo_df['label'],
                           labels={"label": "Scenario"},
                           center={"lat": bnd.bounds[["miny","maxy"]].mean(axis=1).iloc[0], 
                                   "lon": bnd.bounds[["minx","maxx"]].mean(axis=1).iloc[0]},
#                            mapbox_style="carto-positron",
                           mapbox_style="open-street-map",
                           zoom=zoom,
                           width=950, height=800,
                           opacity=0.5,
                           title= f"Reached zone ({dataset}{', '+option if option else ''}) - range: {m_range} minutes"
                           
                   )
    
    #fig.update_geos(fitbounds="locations")
    
    starts_df =  starts.to_crs(osm_crs)
    
    fig.add_trace(go.Scattermapbox(
                            lat=starts_df.lat,
                            lon=starts_df.lon,
                            text=starts_df.name, 
                            name="Starts")
                       )
    
    
    if isinstance(bnd.geometry.iloc[0], shapely.geometry.MultiPolygon):
        # Get the main polygon
        bnd_coords = pd.DataFrame(max(bnd.geometry.iloc[0], key=lambda a: a.area).exterior.coords)
#         print(bnd)
    else: 
        bnd_coords = pd.DataFrame(bnd.geometry.iloc[0].exterior.coords)
    #[(x, y) for (x, y) in bnd.geometry.iloc[0].exterior.coords])
    
    fig.add_trace(go.Scattermapbox(lat=bnd_coords[1],
                            lon=bnd_coords[0],
                            mode='lines',
                            name="Boundary"))
    fig.update_layout(legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="right",
        x=0.99
    ))

    return fig


# ## Other

# In[26]:


import os
def download_if_nexist(url, filename):
    """
    If the (local) file <filename> does not exists, download it from <url> 

    Parameters
    ----------
    url: str
       url to fetch
    filename: str
       local file to save

    Returns
    -------

    None
    """
    if not os.path.isfile(filename):
        with urllib.request.urlopen(url) as response:
            with open(filename, "wb") as f:
                f.write(response.read())


# # Prepare starting points data

# In[27]:


dataset_label=f"{dataset}{'_'+option if option else ''}"


# In[28]:


output_path=f"output/{dataset_label}"
try : os.mkdir(output_path)
except FileExistsError: pass


# In[29]:


boundaries_filename = "data/boundaries.geojson"
if not os.path.isfile(boundaries_filename):
    zipcode_boundaries_filename = "data/zipcode_boundaries_shapefile_3812.zip"
    # https://www.geo.be/catalog/details/9738c7c0-5255-11ea-8895-34e12d0f0423?l=fr
    download_if_nexist("https://bgu.bpost.be/assets/9738c7c0-5255-11ea-8895-34e12d0f0423_x-shapefile_3812.zip",
                      zipcode_boundaries_filename)
    zipcodes_boundaries = gpd.read_file(f"zip://{zipcode_boundaries_filename}/3812")
    
    bru = Polygon(zipcodes_boundaries[zipcodes_boundaries.nouveau_PO.between("1000", "1299")].unary_union.exterior)
    bel = Polygon(max(zipcodes_boundaries.unary_union, key=lambda a: a.area).exterior)

    all_boundaries= gpd.GeoDataFrame([{"name":"BRU", "geometry": bru},
                                      {"name":"BEL", "geometry": bel}
                                  ])
    all_boundaries = all_boundaries.set_geometry("geometry")
    all_boundaries.crs = zipcodes_boundaries.crs
    all_boundaries = all_boundaries.to_crs(CRS(crs))
    
    all_boundaries.to_file(boundaries_filename, driver="GeoJSON")
else:
    
    all_boundaries = gpd.read_file(boundaries_filename)


# In[ ]:


# In[30]:


all_boundaries.plot("name", alpha=0.5)


# ## Smur Bxl

# In[31]:


if dataset == "smur":
    starts = gpd.GeoDataFrame(columns = 
    ["name",             "address",                                      "wk_xl", "wk_dlt", "wk_stjean", "wk_none"], data=[
    ["Paul Brien",       "Av. Britsiers 5, 1030 Schaerbeek",             True,    True,    True,    True ],
    ["Saint-Pierre",     "Rue Haute 290, 1000 Bruxelles",                True,    True,    True,    True ],
    ["Sainte-Elisabeth", "Avenue De Fré 206, 1180 Uccle",                True,    True,    True,    True ],
    ["Saint-Luc",        "Avenue Hippocrate, 1200 Woluwe-Saint-Lambert", True,    True,    True,    True ],
    ["Érasme (ULB)",     "Rue Meylemeersch 56, 1070 Anderlecht",         True,    True,    True,    True ],
    ["HM",               "Rue Bruyn 1, 1120 Bruxelles",                  True,    True,    True,    True ],
    ["AZVUB",            "Avenue du Laerbeek 101, 1090 Jette",           True,    True,    True,    True ],
    ["Ixelles",          "Rue Jean Paquot 63, 1050 Ixelles",             True,    False,   False,   False],
    ["Delta",            "Boulevard du Triomphe 201, 1160 Auderghem",    False,   True,    False,   False],
    ["Saint-Jean",       "Rue du Marais 104, 1000 Bruxelles",            False,   False,   True,    False],

    ])


    subsets = ["wk_xl", "wk_dlt", "wk_stjean"] #, "wk_none"
    minute_range = list(range(16)) # list(range(21)) #[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 18]
    

# In[32]:


if dataset == "smur":
    boundaries = all_boundaries[all_boundaries.name == "BRU"]
    zoom = 10.5


# ## Maternities

# In[33]:


if dataset == "maternity":
    starts = pd.read_csv("data/maternities.csv", usecols=["name", "address"])
    starts = gpd.GeoDataFrame(starts)

    starts["current"] = True


# In[34]:


# https://kce.fgov.be/fr/une-r%C3%A9duction-du-nombre-de-maternit%C3%A9s-est-possible-dans-notre-pays
# From https://www.google.com/maps/d/u/0/viewer?mid=1BuSrXrmRLzqMNVNLR-hSq0PL53o9GaB9&ll=50.83934807412515%2C4.471731899999895&z=9
# https://plus.lesoir.be/273299/article/2020-01-16/voici-les-17-maternites-belges-qui-pourraient-fermer-leurs-portes-carte

# AZ Sint-Jan (Ostende)
# AZ Zeno (Knokke-Heist)
# AZ Delta (Turnhout)
# Sint-Andries (Tielt)
# St. Vincentius (Deinze)
# A.Z. (Audenarde)
# AZ Delta (Menin)
# AZ Glorieux (Renaix)
# Site St-Joseph, du CHR de Mons
# CH Haute Senne, site le Tilleriau (Soignies)
# CHU André Vésale - site de Lobbes (Montigny-le-Tilleul)
# C.H. Jolimont (Lobbes)
# CHR Sambre et Meuse (Auvelais)
# Centre Hospitalier Régional de Huy
# CHC - Clinique Sainte-Elisabeth (Verviers)
# St. Nikolaus Hospital (Eupen)
# ASZ (Grammont) - déjà fermé depuis l'été
if dataset == "maternity":
    should_go_names = ["ST. JAN BRUGGE", "Zeno", "Torhout", "Tielt", "Deinze", "Oudenaarde", "Menen", "Glorieux", 
                       "CHR Mons", "Tilleriau", "Vesale", "Lobbes", "Sambre", "Huy", "VERVIERS, CENTRE HOSPITALIER CHRETIEN", 
                       "Eupen"]

    starts["future"] = starts.name.str.upper().apply(lambda x: sum([sgn.lower() in x.lower() for sgn in should_go_names]) == 0)
    assert starts[~starts.future].shape[0] ==  len(should_go_names)

    subsets = ["current", "future"]
    minute_range = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45]


# In[35]:


if dataset == "maternity":
    boundaries = all_boundaries[all_boundaries.name == "BEL"]
    zoom = 7


# ## Covid Vaccin

# In[36]:


if dataset == "vaccin":
    starts = gpd.GeoDataFrame(columns = 
    ["name",        "address",                                            "current", "with_WB"], data=[
    ["Pachéco",     "Boulevard Pachéco, 42, 1000 Bruxelles",               True,    True],
    ["Heysel ",     "Avenue Impératrice Charlotte, 6, 1020 Laeken",        True,    True],
    ["WSP",         "Drève des Shetlands, 15, 1150 Woluwé-Saint-Pierre",   True,    True],
    ["Schaerbeek",  "Avenue du Suffrage Universel, 1030 Schaerbeek",       True,    True],
    ["Molenbeek",   "Chaussée de Gand, 696, 1080 Molenbeek",               True,    True],
    ["Forest",      "Avenue Jupiter, 201, 1190 Forest",                    True,    True],
    ["Anderlecht",  "Avenue Théo Verbeeck, 2, 1070 Anderlecht",            True,    True],
    ["WSL",         "Avenue des Vaillants, 2, 1200 Woluwé-Saint-Lambert",  True,    True],
    ["Uccle",       "Rue Égide Van Ophem, 110, 1180 Uccle",                True,    True],
    ["HM",          "Rue Bruyn, 200 à 1120 Neder-over-Heembeek",           True,    True],
    ["Watermael",   "Boulevard du Souverain 25, 1170 Watermael-Boitsfort", False,   True]
    ])
    subsets = ["current", "with_WB"]
    minute_range = list(range(21)) #[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]


# In[37]:


if dataset == "vaccin":
    boundaries = all_boundaries[all_boundaries.name == "BRU"]
    zoom = 10.5


# ## Add label

# In[38]:


starts["label"] = starts.apply(lambda row: ";".join(sb for sb in subsets if row[sb]), axis=1)
starts["label"] = starts["label"].str.replace(";".join(subsets), "all scenarii")


# In[39]:


starts


# In[40]:


starts.label.value_counts()


# ## Geocoding

# In[41]:


if not from_files:
    starts = geocode_points(starts)
    starts.to_pickle(f"data/starts_{dataset_label}.pkl.gz")
else: 
    starts = pd.read_pickle(f"data/starts_{dataset_label}.pkl.gz")
    
    # fixing a bug that makes geodataframe being loaded from pickle to be not plottable
    # after loading from pickle: starts.name.dtype is np.dtype("O") == False
    n = starts.name.str.replace("willnotappear", "").copy()
    starts = starts.drop("name", axis=1).assign(name=n)
    # after those lines : # starts.name.dtype is np.dtype("O") == True
    
starts


# In[ ]:


# In[42]:


fig = px.scatter_mapbox(starts,
                        lat="lat",
                        lon="lon",
                        hover_name="name",
                        color='label',
                        size_max=120,
                        zoom=10,
                        mapbox_style="open-street-map",
                        #opacity=0.2
                   )

bd = boundaries.to_crs(osm_crs)
fig_bd = px.choropleth_mapbox(bd, 
                           geojson=bd.geometry, 
                           locations=bd.index, 
                           #labels="boundary",
                           #hover_name="name",
                           #fill_color="none",
                           
                           #zoom=10,
                           opacity=0.2
                   )


fig.add_trace(fig_bd.data[0])


fig.show()


# In[43]:


ax = starts.plot("label", legend=True, figsize=(10,10), markersize=200)
boundaries.boundary.plot(ax=ax, color="C2", linewidth=5)
add_basemap(ax, alpha=0.5)
plt.axis("off")


# # Multirange process

# In[44]:


from_files=True


# In[45]:


if not from_files:
    isolines = get_isolines(starts, minute_range, add_options)
    isolines.to_pickle(f"data/isolines_{dataset_label}.pkl.gz")

else :
    isolines = pd.read_pickle(f"data/isolines_{dataset_label}.pkl.gz")
    # fixing a bug that makes geodataframe being loaded from pickle to be not plottable
    n = isolines.name.str.replace("willnotappear", "").copy()
    isolines = isolines.drop("name", axis=1).assign(name=n)
    
    
isolines


# In[46]:


isolines_grouped = get_isolines_grouped(isolines, subsets)
isolines_grouped


# ## Some specific plots

# In[47]:


ax= isolines[isolines.range == minute_range[5]].plot("name", alpha=0.8, figsize=(10, 10), legend=True)
boundaries.boundary.plot(ax=ax, color="C2", linewidth=5)
add_basemap(ax, alpha=0.5)
plt.axis("off")


# In[48]:


start_name= starts.name.iloc[-2]
sel = isolines[(isolines.name==start_name) & (isolines.range==minute_range[5])]
ax = sel.plot(alpha=0.2, figsize=(10, 10))
ax = sel.boundary.plot(ax=ax)
starts[(starts.name==start_name)].plot(ax=ax)
add_basemap(ax, alpha=0.5)#, source=ctx.providers.OpenStreetMap.Mapnik)
plt.axis("off")


# In[49]:


sel = isolines_grouped[(isolines_grouped.range == minute_range[5]) & 
                    (isolines_grouped.subset == subsets[1])]

ax= sel.plot("label", alpha=0.2, figsize=(10, 10), legend=False)
ax = sel.boundary.plot(ax=ax)
starts[starts[subsets[1]]].plot(ax=ax)
boundaries.boundary.plot(ax=ax, color="C1")
add_basemap(ax)


# In[50]:


# ax = plot_isolines(isolines_grouped, starts, minute_range[6], boundaries)


# In[ ]:


# # Static images

# In[51]:


plt.rcParams.update({'figure.max_open_warning': len(minute_range)})


# In[104]:


for r in tqdm([None] + minute_range):
    f = plot_isolines(isolines_grouped, starts, r, boundaries)
    plt.axis("off")
    
    r_lab = "none" if r is None else f"{r:02}"
    plt.savefig(f"{output_path}/reached_zone_{dataset_label}_{r_lab}.png")
    plt.close() # Remove this line to see all plots --> might be long! 


# In[65]:


output_path


# In[111]:


plot_isolines(isolines_grouped, starts, minute_range[11], boundaries)
plt.axis("off")


# In[101]:


plot_isolines(isolines_grouped, starts, None, boundaries)
plt.axis("off")


# In[51]:


# Make a video from PNG images - Assumes ffmpeg is installed on the machine
print(f"Making {output_path}/reached_zone_{dataset_label}.mp4")
os.system(f"rm {output_path}/reached_zone_{dataset_label}.mp4")
os.system(f"cat {output_path}/reached_zone*.png | ffmpeg -f image2pipe -framerate 2 -i - -c:v libx264 -vf format=yuv420p -r 25 {output_path}/reached_zone_{dataset_label}.mp4")


# # Dash dashboard

# In[52]:


# Build App
app = JupyterDash(__name__)
app.layout = html.Div([
    html.H1(f"Multipoint reachability ({dataset})"),
    dcc.Graph(id='graph'),
    html.Label([
        "Range",
        dcc.Slider(
            id='range-slider', 
            min=minute_range[0],
            max=minute_range[-1],
            step=None,
            marks={c: str(c) for c in minute_range},
            value=minute_range[-3]
            )
    ])
])


# In[53]:


# Define callback to update graph
@app.callback(
    Output('graph', 'figure'),
    [Input("range-slider", "value")]#, Input('traffic-radio', 'value')]
)
def update_figure(m_range):#, traffic):
    
    return plot_isolines_px(isolines_grouped, starts, m_range, boundaries)#, traffic) 


# In[ ]:


ip = 'http://172.27.0.64/'
print(f"Watch outside Jupyter: http://{ip}:8050/")
app.run_server(mode='inline', debug=True, host=ip)


# In[ ]:


plot_isolines_px(isolines_grouped, starts, minute_range[5], boundaries)


# ## Reach ratio

# In[63]:


isolines_grouped_bnd = gpd.overlay(isolines_grouped, boundaries[["geometry"]], how="intersection")#.plot()


# In[64]:


isolines_grouped_bnd["reach_ratio"] = isolines_grouped_bnd.area/boundaries.area.iloc[0]


# In[74]:


from matplotlib.colors import  ListedColormap

pv=isolines_grouped_bnd.pivot(index="range", values="reach_ratio", columns= "subset") #["label", "traffic"])#
fig=pv.plot(figsize=(10, 6), title=f"Reach ratio ({dataset}{', '+option if option else ''})", colormap= ListedColormap(["C1", "C2", "C3"]))
plt.savefig(f"{output_path}/reach_ratio_{dataset_label}.png")


# In[ ]:


# With plotly
fig = px.line(pv,  title=f"Reach ratio ({dataset}{', '+option if option else ''})", 
             color_discrete_sequence=px.colors.qualitative.D3)
fig.update_layout(
    yaxis_title='Reached zone ratio',
    xaxis_title='Range (minutes)',
    title=f"Reach ratio ({dataset}{', '+option if option else ''})",
    hovermode="x",
    yaxis_tickformat= ',.2%'
)
fig.update_layout(legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    ))

fig.write_html(f"{output_path}/reach_ratio_{dataset_label}.html",  include_plotlyjs="cdn")
fig


# # Population data

# ## Using statistical sectors

# In[75]:


# https://statbel.fgov.be/fr/open-data/secteurs-statistiques-2020
download_if_nexist("https://statbel.fgov.be/sites/default/files/files/opendata/Statistische%20sectoren/sh_statbel_statistical_sectors_31370_20200101.shp.zip",
                   "data/stat_sectors_2020.zip")
statistical_sectors = gpd.read_file("zip://data/stat_sectors_2020.zip/sh_statbel_statistical_sectors_20200101.shp")
statistical_sectors = statistical_sectors.to_crs(crs)


# In[76]:


#statistical_sectors.shape


# In[77]:


# https://statbel.fgov.be/fr/nouvelles/la-population-par-secteur-statistique
download_if_nexist("https://statbel.fgov.be/sites/default/files/files/opendata/bevolking/sectoren/OPENDATA_SECTOREN_2020.xlsx",
                   "data/OPENDATA_SECTOREN_2020.xlsx")
population_per_sector = pd.read_excel("data/OPENDATA_SECTOREN_2020.xlsx")


# In[78]:


pop_per_sector_geo = population_per_sector[["TX_DESCR_FR", "TX_DESCR_SECTOR_FR", "CD_SECTOR", "POPULATION"]]
pop_per_sector_geo = pop_per_sector_geo.merge(statistical_sectors[["CS01012020", "geometry"]].rename({"CS01012020":"CD_SECTOR"}, axis=1)).set_geometry("geometry")


# In[79]:


pop_per_sector_geo.plot("POPULATION")


# ## Using grid

# It might be interessting to consider grid data, grouping population per 1km square. 
# In this example, statistical sectors give a beter precision, so we do not use it.

# In[80]:


# # https://statbel.fgov.be/fr/open-data/population-selon-la-grille-km2-grid-2020
# download_if_nexist("https://statbel.fgov.be/sites/default/files/files/opendata/Pop_GRID/TF_POPULATION_GRID_3035_20200101_geojson.zip",
#                    "data/pop_grid.zip")
# pop_per_sector_geo =  gpd.read_file("zip://data/pop_grid.zip/TF_POPULATION_GRID_3035_20200101.geojson")
# pop_per_sector_geo = pop_per_sector_geo[["geometry", "ms_population_20200101", "grd_floaid"]]
# pop_per_sector_geo = pop_per_sector_geo.rename(columns={"ms_population_20200101": "POPULATION",
#                                                         "grd_floaid": "CD_SECTOR"})
# pop_per_sector_geo = pop_per_sector_geo.to_crs(crs)
# pop_per_sector_geo


# ## Compute bounds

# In[81]:


# Keep only sector within boundaries
pop_per_sector_geo = gpd.sjoin(pop_per_sector_geo, boundaries.buffer(100).reset_index().set_geometry(0), op="within")


# In[82]:


# pop_per_sector_geo.POPULATION.sort_values()


# In[83]:


# pop_per_sector_geo.area.sort_values().astype(int)#.hist()


# In[84]:


pop_per_sector_geo.plot("POPULATION")


# In[85]:


# isolines_grouped_bnd.assign(geometry=isolines_grouped_bnd.buffer(10))


# In[86]:


security_buffer = 10


# In[87]:


isolines_grouped_pop = isolines_grouped_bnd
for op in ['intersects', 'contains']:
    isolines_grouped_op = gpd.sjoin(
                                    #isolines_grouped_bnd[["subset", "range", "geometry"]], 
                                    isolines_grouped_bnd.assign(geometry=isolines_grouped_bnd.buffer(security_buffer))[["subset", "range", "geometry"]], 
                                    pop_per_sector_geo[["geometry", "POPULATION"]],
                                    op=op)
    isolines_grouped_op = isolines_grouped_op.groupby(["subset", "range"]).POPULATION.sum().rename("POP_"+op).reset_index()
    
    isolines_grouped_pop = isolines_grouped_pop.merge(isolines_grouped_op, how="left")
isolines_grouped_pop


# In[88]:


pv_pop_max=isolines_grouped_pop.pivot(index="range", values="POP_intersects", columns= "subset").fillna(0) #["label", "traffic"])#
pv_pop_min=isolines_grouped_pop.pivot(index="range", values="POP_contains", columns= "subset").fillna(0) #["label", "traffic"])#


ax = pv_pop_max.plot(figsize=(10, 6), title=f"Population reach ratio ({dataset}{', '+option if option else ''})")
pv_pop_min.fillna(0).plot(ax=ax, alpha=0.5, color=["C0", "C1", "C2", "C3"], legend=False)

for c in range(pv_pop_min.shape[1]):
    ax.fill_between(pv_pop_max.index, pv_pop_max.iloc[:,c], pv_pop_min.iloc[:,c], color=f"C{c}", alpha=0.1)
plt.savefig(f"{output_path}/pop_reach_ratio_{dataset_label}.png") #set_index("label", "range").reach_ratio.plot()


# In[89]:


isolines_grouped_pop


# ## Beter average

# In[90]:


nb_levels=5

geo_df_levels = None

def get_crossed_sectors(geo_df, shape):
    """
    Compute which polygons of geo_df are crossed by shape

    Parameters
    ----------
    geo_df: geopandas.GeoDataFrame
       a GeoDataFrame with a string column "CD_SECTOR" 
    shape: shapely.geometry.linestring.LineString
       a line corresponding typically to a geofg.loc[i].geometry.boundary
       
    Returns
    -------

    pd.Series of boolean, with geo_df.shape[0] rows
    """

    # Simple version. Might be very long, especially for "maternity" use case 
    # return pop_per_sector_geo.geometry.crosses(shape)

    # Hierarchical version. Should be much more efficient
    # First tries to see which high level boundaries such as provinces are crossed by "shape". 
    # Then, only within those "crossed" provinces, see which districts are crossed by "shape", ..., down to statistical sectors
    
    global geo_df_levels
    if geo_df_levels is None:
        geo_df_levels = multilevel_split(geo_df)
    
    ppsg = geo_df_levels[0]
    
    for i in tqdm(range(nb_levels), leave=False):
        crosses = ppsg.geometry.crosses(shape)

        ppsg = ppsg[crosses]
        if i < nb_levels-1:
            ppsg = gpd.GeoDataFrame(ppsg[[f"CD_{i}"]].merge(geo_df_levels[i+1]))

    return geo_df.CD_SECTOR.isin(ppsg[f"CD_{nb_levels-1}"])


# In[91]:


# Multilevels
# Group sectors in several levels of larger sections

def multilevel_split(geo_df):
    geo_df = geo_df.assign(CD_0 = geo_df.CD_SECTOR.str[0:2],
                           CD_1 = geo_df.CD_SECTOR.str[0:4],
                           CD_2 = geo_df.CD_SECTOR.str[0:6],
                           CD_3 = geo_df.CD_SECTOR.str[0:7],
                           CD_4 = geo_df.CD_SECTOR)

    geo_df_levels= {}
    for i in tqdm(range(nb_levels), leave=False, desc="Getting multilevel split..."):
        df = geo_df.dissolve(f"CD_{i}")[["geometry"]].reset_index()
        if i > 0:
            df = df.merge(geo_df[[f"CD_{i-1}", f"CD_{i}"]].drop_duplicates()) 
        geo_df_levels[i] = df
    return geo_df_levels


# In[94]:


if from_files:
    isolines_grouped_pop = pd.read_pickle(f"data/isolines_reach_pop_{dataset_label}.pkl.gz")
else: 
    
    
    for i in tqdm(isolines_grouped_pop.index):
        isoline_sel=isolines_grouped_pop.loc[i].geometry
    #     crosses = pop_per_sector_geo.simplify(1000).geometry.crosses(isoline_sel.simplify(1000).boundary)
        crosses = get_crossed_sectors(pop_per_sector_geo, isoline_sel.buffer(security_buffer).boundary)

        isolines_grouped_pop.loc[i, "POP_crossed"] = pop_per_sector_geo[crosses].POPULATION.sum()
        common_part = pop_per_sector_geo[crosses].intersection(isoline_sel)#.plot()
        covered_ratio = common_part.area / pop_per_sector_geo[crosses].area
        covered_pop = int((pop_per_sector_geo[crosses].POPULATION * covered_ratio).sum())

        isolines_grouped_pop.loc[i, "POP_mean"] = isolines_grouped_pop.POP_contains.fillna(0).loc[i] + covered_pop
    isolines_grouped_pop.to_pickle(f"data/isolines_reach_pop_{dataset_label}.pkl.gz")


# In[95]:


# Solid line: assumes population is uniformly spread accross all sectors
# Top fill: assumes we reach all the population of a sector as soon as the isolines cross the sector (best case assumption)
# Bottom fill: assumes we reach no one in a sector before the full sector is included in the isoline (worst case assumption)

pv_pop_max= isolines_grouped_pop.pivot(index="range", values="POP_intersects", columns= "subset").fillna(0) / pop_per_sector_geo.POPULATION.sum()
pv_pop_min= isolines_grouped_pop.pivot(index="range", values="POP_contains",   columns= "subset").fillna(0) / pop_per_sector_geo.POPULATION.sum()
pv_pop_mean=isolines_grouped_pop.pivot(index="range", values="POP_mean",       columns= "subset").fillna(0) / pop_per_sector_geo.POPULATION.sum()


# In[99]:


# With Matplotlib
ax = pv_pop_mean.plot(figsize=(10, 6), 
                      title=f"Population reach ratio ({dataset}{', '+option if option else ''})",
                     colormap= ListedColormap(["C1", "C2", "C3"])) #set_index("label", "range").reach_ratio.plot()

for c in range(pv_pop_mean.shape[1]):
     ax.fill_between(pv_pop_max.index, pv_pop_max.iloc[:,c], pv_pop_min.iloc[:,c], color=f"C{c+1}", alpha=0.1)
plt.savefig(f"{output_path}/pop_reach_mean_ratio_{dataset_label}.png") #set_index("label", "range").reach_ratio.plot()


# In[ ]:


# With Plotly express
fig = go.Figure()
for subs, color in zip(pv_pop_mean.columns[::-1], ["44, 160, 44", "255, 127, 14",  "31, 119, 180", "255,0,0"]):
    
    fig.add_trace(
        go.Scatter(
            name=f'{subs} (mean)',
            x=pv_pop_mean.index,
            y=pv_pop_mean[subs],
            mode='lines',
            line=dict(color=f'rgb({color})'),
        ))
    fig.add_trace(    go.Scatter(
            name=f'{subs} (UB)',
            x=pv_pop_max.index,
            y=pv_pop_max[subs],
            mode='lines',
            marker=dict(color=f'rgb({color})'),
            line=dict(width=0),
            showlegend=False
        ))#,
    fig.add_trace(    go.Scatter(
            name=f'{subs} (LB)',
            x=pv_pop_min.index,
            y=pv_pop_min[subs],
            marker=dict(color=f'rgb({color})'),
            line=dict(width=0),
            mode='lines',
            fillcolor=f'rgba({color}, 0.2)',
       
            fill='tonexty',
            showlegend=False
        ))
# ])
fig.update_layout(
    yaxis_title='Reached population (ratio)',
    xaxis_title='Range (minutes)',
    title=f"Population reach ratio ({dataset}{', '+option if option else ''})",
    hovermode="x",
    yaxis_tickformat= ',.2%',
)

fig.update_layout(legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    ))
fig.show()
fig.write_html(f"{output_path}/pop_reach_mean_ratio_{dataset_label}.html",include_plotlyjs="cdn" ) #set_index("label", "range").reach_ratio.plot()


# ## Plots for blog
# 

# In[ ]:


# Works for "Vaccin", no traffic

if dataset == "vaccin" and option is None:
    aud_wb = statistical_sectors[statistical_sectors.T_MUN_FR.isin(["Auderghem", "Watermael-Boitsfort"])].copy()
    il = isolines[(isolines.name == "Watermael") & (isolines.range==3)]


    aud_wb["color"] = "red"
    aud_wb.loc[aud_wb.crosses(il.boundary.iloc[0]), "color"] ="orange"
    aud_wb.loc[aud_wb.within(il.iloc[0].geometry), "color"] ="green"

    ax = aud_wb.plot(figsize=(10,10), edgecolor="black", color=aud_wb.color, alpha=0.2)


    il.boundary.plot(ax=ax, color="blue")
    plt.axis('off')
    plt.savefig(f"{output_path}/crosses.png")


# In[ ]:


def get_osm_polyg(name):
    url = f"http://nominatim.openstreetmap.org/search.php?q={name}&format=json&limit=1&polygon_geojson=1"
    with urllib.request.urlopen(url) as response:
        return shape(json.loads(response.read())[0]["geojson"])

foret = get_osm_polyg("Foret+de+Soignes")
bxl = get_osm_polyg("Bruxelles-Capitale")
inters = bxl.intersection(foret)
df =  gpd.GeoDataFrame([{"name":"Forêt de Soignes", "geometry": foret},
                        {"name":"Bruxelles", "geometry": bxl},
                        {"name":"Intersection", "geometry": inters}])
df = df.set_geometry("geometry")
df.crs = osm_crs 
df.plot("name", legend=True)
print(f"Proportion of 'Forêt de Soignes' in Brussels: {inters.area/bxl.area:.02%}")


# In[ ]: