In [247]:
%load_ext watermark
%watermark -a "Romell D.Z." -u -d -p numpy,pandas,matplotlib,seaborn,statsmodels
The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Romell D.Z. 
last updated: 2018-11-25 

numpy 1.15.4
pandas 0.23.4
matplotlib 2.2.2
seaborn 0.9.0
statsmodels 0.10.0.dev0+3261eea
In [248]:
import warnings
warnings.simplefilter('ignore')

%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import numpy as np
import pandas as pd
import seaborn as sns
sns.set('notebook')
from __future__ import division
import statsmodels.api as sm
plt.rcParams['figure.figsize'] = (18,10)
plt.rcParams['axes.titlesize'] = 40
plt.rcParams['axes.labelsize'] = 25
plt.rcParams['ytick.labelsize'] = 15
plt.rcParams['xtick.labelsize'] = 15
%config InlineBackend.figure_format = 'retina'
In [249]:
def clean_dataframe(df):
    df.dropna(axis=1,how='all',inplace=True)
    columns_navalue =  df.columns[df.isna().any()].tolist()
    if columns_navalue != []:
        df.ix[-1,columns_navalue] = df[columns_navalue].median()
        df[columns_navalue] = df[columns_navalue].interpolate()
        
def plot_df(df,title ='Tourist'):
    df = df.T
    dic = dict(y='Region',x='Amount')
    info = df.sum().to_frame().reset_index()
    info.columns = list(dic.values())
    ax = sns.barplot(data=info,**dic)
    for i, v in enumerate(info['Amount'].values):
        ax.text( v+v*.1 ,i+.2 , str('%.3fM'%(v/1000)), color='black', fontweight='bold')
    plt.xscale('log')
    xx, locs = plt.xticks()
    ll = ['{:,}'.format(a) for a in xx]
    plt.xticks(xx[:-1], ll[:-1])
        
def show_descriptive_data(name_file,sheet,scale='symlog',title ='Tourist'):
    df = pd.read_excel(name_file,sheet_name=sheet,nrows=25,
                           skiprows=3,index_col=0,)
    df = df.loc[:,df.columns.map(lambda x: x.isalnum())]
    clean_dataframe(df)
    plot_df(df,title)
    plt.xscale(scale)
    plt.title(title)
    plt.show();
In [250]:
def plot_df_byYear(df):
    df = df.T.drop('Lima',1)
    df.index = pd.to_datetime(df.index)
    fig = plt.figure()
    ax = plt.subplot(111)
    df.plot(ax=ax)
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05),
          ncol=3, fancybox=True, shadow=True)
    ax.set_ylabel('Amount')
    ax.set_yscale('log');
        
def show_descriptive_data_byYear(name_file,sheet):
    df = pd.read_excel(name_file,sheet_name=sheet,nrows=25,
                           skiprows=3,index_col=0,)
    df = df.loc[:,df.columns.map(lambda x: x.isalnum())]
    clean_dataframe(df)
    plot_df_byYear(df)
In [251]:
show_descriptive_data('rptaniomes_a.xls','rptaniomes',
                      title='Número de establecimientos de hospedaje, según región')
In [252]:
show_descriptive_data_byYear('rptaniomes_a.xls','rptaniomes')
In [253]:
show_descriptive_data('rptaniomes_a.xls','rptaniomes',scale='log',
                      title='Número de establecimientos de hospedaje, según región')
In [254]:
show_descriptive_data('rptaniomes_b.xls','rptaniomes',
                      title='Número de plazas-Cama de los establecimientos de hospedaje, según región')