# LIBS utilizadas durante o projeto
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.express as px
import numpy as np
# Import de Libs para trabalhar com Datas
from dateutil.relativedelta import relativedelta
from dateutil import parser
from dateutil.parser import parse
from datetime import date, time, datetime, timedelta
# Import de Libs para correlação
import seaborn as sn
import scipy
import statsmodels
# get the response in the form of html
wikiurl="https://en.wikipedia.org/wiki/List_of_helicopter_prison_escapes"
table_class="wikitable sortable"
response=requests.get(wikiurl)
print(response.status_code)
200
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(response.text, 'html.parser')
helicopter_escape=soup.find('table',{'class':"wikitable"})
# Retorna uma lista
dados=pd.read_html(str(helicopter_escape))
dados_df=pd.DataFrame(dados[0])
dados_df.head(5)
Date | Prison name | Country | Succeeded | Escapee(s) | Details | |
---|---|---|---|---|---|---|
0 | August 19, 1971 | Santa Martha Acatitla | Mexico | Yes | Joel David Kaplan Carlos Antonio Contreras Castro | Joel David Kaplan was a New York businessman w... |
1 | October 31, 1973 | Mountjoy Jail | Ireland | Yes | JB O'Hagan Seamus TwomeyKevin Mallon | On October 31, 1973 an IRA member hijacked a h... |
2 | May 24, 1978 | United States Penitentiary, Marion | United States | No | Garrett Brock TrapnellMartin Joseph McNallyJam... | 43-year-old Barbara Ann Oswald hijacked a Sain... |
3 | February 27, 1981 | Fleury-Mérogis, Essonne, Ile de France | France | Yes | Gérard DupréDaniel Beaumont | With the help of Serge Coutel, Gérard Dupré an... |
4 | May 7, 1981 | Orsainville Prison, Quebec City | Canada | No | Marina Paquet (hijacker)Giles Arseneault (pris... | Marina Paquet held a sawed off shotgun against... |
dados_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 48 entries, 0 to 47 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 48 non-null object 1 Prison name 48 non-null object 2 Country 48 non-null object 3 Succeeded 48 non-null object 4 Escapee(s) 48 non-null object 5 Details 48 non-null object dtypes: object(6) memory usage: 2.4+ KB
# Filtro de colunas
dados_df[['Date', 'Succeeded']].head(5)
Date | Succeeded | |
---|---|---|
0 | August 19, 1971 | Yes |
1 | October 31, 1973 | Yes |
2 | May 24, 1978 | No |
3 | February 27, 1981 | Yes |
4 | May 7, 1981 | No |
# Armazenado as colunas necessárias à resposta 1 em uma variável (select_1)
select_1 = dados_df[['Date', 'Succeeded']]
# Tipos de Dados
select_1.dtypes
Date object Succeeded object dtype: object
select_1.info(5)
<class 'pandas.core.frame.DataFrame'> RangeIndex: 48 entries, 0 to 47 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 48 non-null object 1 Succeeded 48 non-null object dtypes: object(2) memory usage: 896.0+ bytes
# Select da coluna Date
select_1['Date'].head(5)
0 August 19, 1971 1 October 31, 1973 2 May 24, 1978 3 February 27, 1981 4 May 7, 1981 Name: Date, dtype: object
#Transforma a coluna Date em um DataFrame
coluna_date_df = pd.DataFrame(select_1['Date'])
coluna_date_df.head(5)
Date | |
---|---|
0 | August 19, 1971 |
1 | October 31, 1973 |
2 | May 24, 1978 |
3 | February 27, 1981 |
4 | May 7, 1981 |
coluna_date_df.dtypes
Date object dtype: object
# Utilizado a função do pandas "apply", que aplica funções para todas as linhas ou colunas usando axis. Escolhida por
#substituir os dias faltantes pelo dia vigente.
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.apply.html
coluna_date_df['Date'].apply(parse).head(5)
0 1971-08-19 1 1973-10-31 2 1978-05-24 3 1981-02-27 4 1981-05-07 Name: Date, dtype: datetime64[ns]
# Atribuindo resultado da View a uma nova coluna no DataFrame
coluna_date_df['Data_tratada'] = coluna_date_df['Date'].apply(parse)
coluna_date_df.head(5)
Date | Data_tratada | |
---|---|---|
0 | August 19, 1971 | 1971-08-19 |
1 | October 31, 1973 | 1973-10-31 |
2 | May 24, 1978 | 1978-05-24 |
3 | February 27, 1981 | 1981-02-27 |
4 | May 7, 1981 | 1981-05-07 |
# Função dt.year usada para extrair apenas o ano da coluna "Data_tratada"
coluna_date_df['Ano'] = coluna_date_df['Data_tratada'].dt.year
coluna_date_df.head(5)
Date | Data_tratada | Ano | |
---|---|---|---|
0 | August 19, 1971 | 1971-08-19 | 1971 |
1 | October 31, 1973 | 1973-10-31 | 1973 |
2 | May 24, 1978 | 1978-05-24 | 1978 |
3 | February 27, 1981 | 1981-02-27 | 1981 |
4 | May 7, 1981 | 1981-05-07 | 1981 |
coluna_date_df.groupby('Ano').count().columns
Index(['Date', 'Data_tratada'], dtype='object')
# Resposta da primeira pergunta, os anos que mais ocorreram tentativas de fuga de helic foram "2009, 2001, 1996, 2007"
df_count = coluna_date_df.groupby('Ano').count().reset_index()[['Ano','Date']].sort_values('Date', ascending=False)
df_count.columns = ['Ano', 'Contagem']
df_count = df_count.reset_index(drop=True)
df_count
Ano | Contagem | |
---|---|---|
0 | 2009 | 3 |
1 | 2001 | 3 |
2 | 1986 | 3 |
3 | 2007 | 3 |
4 | 2002 | 2 |
5 | 1981 | 2 |
6 | 2000 | 2 |
7 | 1985 | 2 |
8 | 2005 | 2 |
9 | 2013 | 2 |
10 | 1989 | 2 |
11 | 1992 | 2 |
12 | 2006 | 1 |
13 | 1971 | 1 |
14 | 2010 | 1 |
15 | 2003 | 1 |
16 | 2014 | 1 |
17 | 2016 | 1 |
18 | 2018 | 1 |
19 | 2012 | 1 |
20 | 1999 | 1 |
21 | 1973 | 1 |
22 | 1997 | 1 |
23 | 1996 | 1 |
24 | 1993 | 1 |
25 | 1991 | 1 |
26 | 1990 | 1 |
27 | 1988 | 1 |
28 | 1987 | 1 |
29 | 1983 | 1 |
30 | 1978 | 1 |
31 | 2020 | 1 |
dados=pd.read_html(str(helicopter_escape))
# Resposta da segunda pergunta: Em quais países ocorreram mais tentativas de fuga mediante sequestro de helicópteros?
# Foi possível verificar que os países com mais tentativas de fuga de helicóptero foram a França(15 tentativas),
# Estados Unidos(8 tentativas) seguidos de Belgica, Canada e Grécia todos com 4 tentativas.
dados_2 = pd.DataFrame(dados[0])
dados_2 = dados_2[['Country', 'Succeeded']].groupby('Country').count()[['Succeeded']].sort_values('Succeeded', ascending=False).reset_index()
dados_2
Country | Succeeded | |
---|---|---|
0 | France | 15 |
1 | United States | 8 |
2 | Belgium | 4 |
3 | Canada | 4 |
4 | Greece | 4 |
5 | Australia | 2 |
6 | Brazil | 2 |
7 | United Kingdom | 2 |
8 | Chile | 1 |
9 | Ireland | 1 |
10 | Italy | 1 |
11 | Mexico | 1 |
12 | Netherlands | 1 |
13 | Puerto Rico | 1 |
14 | Russia | 1 |
df_count['Ano']=df_count['Ano'].astype(str)
fig = px.bar(df_count, x='Ano', y='Contagem', title='Tentativas de fuga por ano',
labels={"Ano":"Ano", "Contagem":"Tentativas de fuga", "Contagem":"Tentativas de fuga"})
fig.show()
fig = px.bar(dados_2, x='Country', y='Succeeded', title='Países com mais tentativas de fuga',
labels={"Country":"Países", "Succeeded":"Tentativas de fuga", "Country":"Países"})
fig.show()
dados=pd.read_html(str(helicopter_escape))
dados_c = pd.DataFrame(dados[0])
dados_c.head(5)
Date | Prison name | Country | Succeeded | Escapee(s) | Details | |
---|---|---|---|---|---|---|
0 | August 19, 1971 | Santa Martha Acatitla | Mexico | Yes | Joel David Kaplan Carlos Antonio Contreras Castro | Joel David Kaplan was a New York businessman w... |
1 | October 31, 1973 | Mountjoy Jail | Ireland | Yes | JB O'Hagan Seamus TwomeyKevin Mallon | On October 31, 1973 an IRA member hijacked a h... |
2 | May 24, 1978 | United States Penitentiary, Marion | United States | No | Garrett Brock TrapnellMartin Joseph McNallyJam... | 43-year-old Barbara Ann Oswald hijacked a Sain... |
3 | February 27, 1981 | Fleury-Mérogis, Essonne, Ile de France | France | Yes | Gérard DupréDaniel Beaumont | With the help of Serge Coutel, Gérard Dupré an... |
4 | May 7, 1981 | Orsainville Prison, Quebec City | Canada | No | Marina Paquet (hijacker)Giles Arseneault (pris... | Marina Paquet held a sawed off shotgun against... |
select = dados_c[['Country','Succeeded']]
fugas_sucesso = select[select['Succeeded'] == 'Yes'].groupby('Country').count().sort_values('Succeeded', ascending=False).reset_index()
fugas_falha = select[select['Succeeded'] == 'No'].groupby('Country').count().sort_values('Succeeded', ascending=False).reset_index()
## Separação da coluna "Succeeded" = total de tentativas em duas colunas "Sucesso" e "Falha"
fugas_merge = fugas_sucesso.merge(fugas_falha, left_on='Country', right_on='Country', how='outer')
fugas_merge.columns=['Country', 'Sucesso', 'Falha']
fugas_merge
Country | Sucesso | Falha | |
---|---|---|---|
0 | France | 11.0 | 4.0 |
1 | United States | 6.0 | 2.0 |
2 | Canada | 3.0 | 1.0 |
3 | Belgium | 2.0 | 2.0 |
4 | Brazil | 2.0 | NaN |
5 | Greece | 2.0 | 2.0 |
6 | Australia | 1.0 | 1.0 |
7 | Chile | 1.0 | NaN |
8 | Ireland | 1.0 | NaN |
9 | Italy | 1.0 | NaN |
10 | Mexico | 1.0 | NaN |
11 | Puerto Rico | 1.0 | NaN |
12 | Russia | 1.0 | NaN |
13 | United Kingdom | 1.0 | 1.0 |
14 | Netherlands | NaN | 1.0 |
fugas_merge['Sucesso'] = fugas_merge['Sucesso'].fillna(0)
fugas_merge
Country | Sucesso | Falha | |
---|---|---|---|
0 | France | 11.0 | 4.0 |
1 | United States | 6.0 | 2.0 |
2 | Canada | 3.0 | 1.0 |
3 | Belgium | 2.0 | 2.0 |
4 | Brazil | 2.0 | NaN |
5 | Greece | 2.0 | 2.0 |
6 | Australia | 1.0 | 1.0 |
7 | Chile | 1.0 | NaN |
8 | Ireland | 1.0 | NaN |
9 | Italy | 1.0 | NaN |
10 | Mexico | 1.0 | NaN |
11 | Puerto Rico | 1.0 | NaN |
12 | Russia | 1.0 | NaN |
13 | United Kingdom | 1.0 | 1.0 |
14 | Netherlands | 0.0 | 1.0 |
fugas_merge['Falha'] = fugas_merge['Falha'].fillna(0)
fugas_merge
Country | Sucesso | Falha | |
---|---|---|---|
0 | France | 11.0 | 4.0 |
1 | United States | 6.0 | 2.0 |
2 | Canada | 3.0 | 1.0 |
3 | Belgium | 2.0 | 2.0 |
4 | Brazil | 2.0 | 0.0 |
5 | Greece | 2.0 | 2.0 |
6 | Australia | 1.0 | 1.0 |
7 | Chile | 1.0 | 0.0 |
8 | Ireland | 1.0 | 0.0 |
9 | Italy | 1.0 | 0.0 |
10 | Mexico | 1.0 | 0.0 |
11 | Puerto Rico | 1.0 | 0.0 |
12 | Russia | 1.0 | 0.0 |
13 | United Kingdom | 1.0 | 1.0 |
14 | Netherlands | 0.0 | 1.0 |
fugas_merge['Total_tentativas'] = fugas_merge['Falha']+fugas_merge['Sucesso']
fugas_merge
Country | Sucesso | Falha | Total_tentativas | |
---|---|---|---|---|
0 | France | 11.0 | 4.0 | 15.0 |
1 | United States | 6.0 | 2.0 | 8.0 |
2 | Canada | 3.0 | 1.0 | 4.0 |
3 | Belgium | 2.0 | 2.0 | 4.0 |
4 | Brazil | 2.0 | 0.0 | 2.0 |
5 | Greece | 2.0 | 2.0 | 4.0 |
6 | Australia | 1.0 | 1.0 | 2.0 |
7 | Chile | 1.0 | 0.0 | 1.0 |
8 | Ireland | 1.0 | 0.0 | 1.0 |
9 | Italy | 1.0 | 0.0 | 1.0 |
10 | Mexico | 1.0 | 0.0 | 1.0 |
11 | Puerto Rico | 1.0 | 0.0 | 1.0 |
12 | Russia | 1.0 | 0.0 | 1.0 |
13 | United Kingdom | 1.0 | 1.0 | 2.0 |
14 | Netherlands | 0.0 | 1.0 | 1.0 |
fugas_merge['Chance_sucesso'] = (fugas_merge['Sucesso']/fugas_merge['Total_tentativas'])*100
fugas_merge
Country | Sucesso | Falha | Total_tentativas | Chance_sucesso | |
---|---|---|---|---|---|
0 | France | 11.0 | 4.0 | 15.0 | 73.333333 |
1 | United States | 6.0 | 2.0 | 8.0 | 75.000000 |
2 | Canada | 3.0 | 1.0 | 4.0 | 75.000000 |
3 | Belgium | 2.0 | 2.0 | 4.0 | 50.000000 |
4 | Brazil | 2.0 | 0.0 | 2.0 | 100.000000 |
5 | Greece | 2.0 | 2.0 | 4.0 | 50.000000 |
6 | Australia | 1.0 | 1.0 | 2.0 | 50.000000 |
7 | Chile | 1.0 | 0.0 | 1.0 | 100.000000 |
8 | Ireland | 1.0 | 0.0 | 1.0 | 100.000000 |
9 | Italy | 1.0 | 0.0 | 1.0 | 100.000000 |
10 | Mexico | 1.0 | 0.0 | 1.0 | 100.000000 |
11 | Puerto Rico | 1.0 | 0.0 | 1.0 | 100.000000 |
12 | Russia | 1.0 | 0.0 | 1.0 | 100.000000 |
13 | United Kingdom | 1.0 | 1.0 | 2.0 | 50.000000 |
14 | Netherlands | 0.0 | 1.0 | 1.0 | 0.000000 |
## Os países em que fugas prisionais de helicóptero tem mais chance de sucesso são: Brasil,
## Chile, Irlanda, Itália, México, Porto Rico e Rússia
fugas_merge['Chance_insucesso'] = (fugas_merge['Falha']/fugas_merge['Total_tentativas'])*100
fugas_merge = fugas_merge.sort_values(['Chance_sucesso', 'Sucesso'], ascending=False)
fugas_merge
Country | Sucesso | Falha | Total_tentativas | Chance_sucesso | Chance_insucesso | |
---|---|---|---|---|---|---|
4 | Brazil | 2.0 | 0.0 | 2.0 | 100.000000 | 0.000000 |
7 | Chile | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
8 | Ireland | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
9 | Italy | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
10 | Mexico | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
11 | Puerto Rico | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
12 | Russia | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
1 | United States | 6.0 | 2.0 | 8.0 | 75.000000 | 25.000000 |
2 | Canada | 3.0 | 1.0 | 4.0 | 75.000000 | 25.000000 |
0 | France | 11.0 | 4.0 | 15.0 | 73.333333 | 26.666667 |
3 | Belgium | 2.0 | 2.0 | 4.0 | 50.000000 | 50.000000 |
5 | Greece | 2.0 | 2.0 | 4.0 | 50.000000 | 50.000000 |
6 | Australia | 1.0 | 1.0 | 2.0 | 50.000000 | 50.000000 |
13 | United Kingdom | 1.0 | 1.0 | 2.0 | 50.000000 | 50.000000 |
14 | Netherlands | 0.0 | 1.0 | 1.0 | 0.000000 | 100.000000 |
fig = px.histogram(fugas_merge, x='Country', y='Chance_sucesso', title = 'Chance de sucesso',
labels={'Country':'Países', 'Chance_sucesso':'Chance de sucesso', 'Chance_sucesso':'Chance de sucesso'})
fig.show()
# Percentual de falha com for adotado no projeto
Negativo = []
for i in fugas_merge['Chance_insucesso']:
if i > 0:
Negativo.append(-i)
else:
Negativo.append(i)
Negativo
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -25.0, -25.0, -26.666666666666668, -50.0, -50.0, -50.0, -50.0, -100.0]
fugas_merge
Country | Sucesso | Falha | Total_tentativas | Chance_sucesso | Chance_insucesso | |
---|---|---|---|---|---|---|
4 | Brazil | 2.0 | 0.0 | 2.0 | 100.000000 | 0.000000 |
7 | Chile | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
8 | Ireland | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
9 | Italy | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
10 | Mexico | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
11 | Puerto Rico | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
12 | Russia | 1.0 | 0.0 | 1.0 | 100.000000 | 0.000000 |
1 | United States | 6.0 | 2.0 | 8.0 | 75.000000 | 25.000000 |
2 | Canada | 3.0 | 1.0 | 4.0 | 75.000000 | 25.000000 |
0 | France | 11.0 | 4.0 | 15.0 | 73.333333 | 26.666667 |
3 | Belgium | 2.0 | 2.0 | 4.0 | 50.000000 | 50.000000 |
5 | Greece | 2.0 | 2.0 | 4.0 | 50.000000 | 50.000000 |
6 | Australia | 1.0 | 1.0 | 2.0 | 50.000000 | 50.000000 |
13 | United Kingdom | 1.0 | 1.0 | 2.0 | 50.000000 | 50.000000 |
14 | Netherlands | 0.0 | 1.0 | 1.0 | 0.000000 | 100.000000 |
fugas_merge['Chance_insucesso']
4 0.000000 7 0.000000 8 0.000000 9 0.000000 10 0.000000 11 0.000000 12 0.000000 1 25.000000 2 25.000000 0 26.666667 3 50.000000 5 50.000000 6 50.000000 13 50.000000 14 100.000000 Name: Chance_insucesso, dtype: float64
import plotly.graph_objects as go
years = fugas_merge['Country']
fig = go.Figure(
layout=dict(title=dict(text=" Chances de sucesso/falha"))
)
fig.add_trace(go.Bar(x=years, y=fugas_merge['Chance_insucesso'],
base=Negativo,
marker_color='crimson',
name='Chance de Falha'))
fig.add_trace(go.Bar(x=years, y=fugas_merge['Chance_sucesso'],
base=0,
marker_color='lightslategrey',
name='Chance de Sucesso'
))
fig.update_traces(texttemplate='%{y:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()