# Data science imports
import pandas as pd
import numpy as np
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../..')
# Options for pandas
pd.options.display.max_columns = 20
# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
import plotly.plotly as py
import plotly.graph_objs as go
import cufflinks
cufflinks.go_offline()
df = pd.read_csv('data/blue_jays.csv', index_col=0)
df.tail()
BirdID | KnownSex | BillDepth | BillWidth | BillLength | Head | Mass | Skull | Sex | |
---|---|---|---|---|---|---|---|---|---|
119 | 962-62176 | M | 8.70 | 9.12 | 24.62 | 56.61 | 77.00 | 31.99 | 1 |
120 | 962-62181 | M | 7.96 | 9.80 | 25.07 | 55.68 | 68.00 | 30.61 | 1 |
121 | 962-62184 | F | 7.90 | 9.30 | 23.60 | 53.90 | 63.90 | 30.30 | 0 |
122 | 962-62185 | F | 7.63 | 8.56 | 24.29 | 54.19 | 70.45 | 29.90 | 0 |
123 | 962-62200 | F | 7.90 | 8.00 | 23.00 | 52.70 | 66.00 | 29.70 | 0 |
from plotly.offline import iplot
import plotly.figure_factory as ff
figure = ff.create_scatterplotmatrix(df[['KnownSex', 'Head', 'Mass', 'Skull']],
index='KnownSex', height=800, width=800)
iplot(figure)
corrs = df.corr()
figure = ff.create_annotated_heatmap(z=corrs.round(2).values, x = list(corrs.columns), y = list(corrs.index), showscale=True)
iplot(figure)
figure = ff.create_scatterplotmatrix(df[['KnownSex', 'Head', 'Mass', 'Skull']], diag='histogram',
index='KnownSex', height=800, width=800)
iplot(figure)
iplot(ff.create_violin(df, data_header='Mass',
group_header='KnownSex'))
figure = ff.create_dendrogram(df[['Mass', 'Skull']])
iplot(figure)