#!/usr/bin/env python
# coding: utf-8

# In[57]:


import pandas as pd
import numpy as np


# In[58]:


sides = pd.DataFrame(data = {"shape" : ["triangle", "square", "rectangle", "pentagon", "hexagon"],
                             "sides" : [3, 4, 4, 5, 6]})
sides


# In[59]:


# here I have interchanged the pentagon and hexagon positions

angles = pd.DataFrame(data = {"shape" : ["triangle", "square", "rectangle", "hexagon", "pentagon"],
                                "angles_in_degrees" : [180, 360, 360, 720, 540]})
angles


# In[60]:


"""simple merge - I allow pandas to decide what works best here - It identifies the shape name and merges 
angles for pentagon and hexagon correctly"""

pd.merge(left = sides, right = angles)


# In[61]:


"""as present in your notebook: 
merged = pd.merge(left = combined_service_cat, right = years, left_index = True, right_index = True)

And we get a different results since we said merge on both indexes. Since the values are different for index 3 and 4 in both 
dataframes we get a different result"""

pd.merge(left = sides, right = angles, left_index = True, right_index = True)


# In[62]:


"""simple concat - I didnt assign 'axis' parameter a value - so this results in 2nd dataframe stacked below the 1st dataframe

Please Note: This is what the solution for this project actually wants! DETE below TAFE or vice-versa"""

pd.concat([sides, angles])


# In[63]:


"""as present in your notebook: 
combined_service_cat = pd.concat([combined_service_cat, years], axis = 1)

I assign 'axis' parameter a value - so the results 2nd dataframe is now next to the 1st one. 
However the column name and values are now duplicated for 'shape'
"""
pd.concat([sides, angles], axis = 1)


# In[64]:


dummy_years = pd.DataFrame({"years": ["1-2", 1, 5, 10, "11-12", np.NaN]})
dummy_years


# In[65]:


pattern = r"(?P<First_Year>[0-9][0-9]?)-?(?P<Second_Year>[1-9][0-9]?)?"

# I added transform astype(str) to your code so now it's posible extract all years
dummy_extract = dummy_years["years"].astype(str).str.extractall(pattern)
dummy_extract


# In[66]:


dummy_new = dummy_extract.reset_index("match")
dummy_new


# In[67]:


# Now we suppose we only want the first_year column, although in my proyect I calculated a new column as 
# an arithmetic mean of the other two. That's why I extracted all years in my proyect.
dummy_new = dummy_new.drop(["match", "Second_Year"], axis = 1)

dummy_new


# In[68]:


merge = pd.merge(left = dummy_years , right = dummy_new, left_index = True, right_index = True)

merge