Description¶

This is an example tutorial to use my module bhishan for the plotly extension for pandas.

Imports¶

In [1]:

import numpy as np
import pandas as pd
import seaborn as sns
import bhishan
from bhishan import bp
import matplotlib.pyplot as plt

%load_ext autoreload
%load_ext watermark

%autoreload 2
%watermark -a "Bhishan Poudel" -d -v -m
%watermark -iv

Bhishan Poudel 2020-09-28 

CPython 3.7.7
IPython 7.18.1

compiler   : Clang 4.0.1 (tags/RELEASE_401/final)
system     : Darwin
release    : 19.6.0
machine    : x86_64
processor  : i386
CPU cores  : 4
interpreter: 64bit
pandas   1.1.0
bhishan  0.3.1
autopep8 1.5.2
seaborn  0.11.0
json     2.0.9
numpy    1.18.4

Using plotly api in module bhishan¶

In [2]:

# print(sns.get_dataset_names())

In [3]:

df = sns.load_dataset('titanic')
df.head()

Out[3]:

	survived	pclass	sex	age	sibsp	fare	embarked	class	who	adult_male	deck	embark_town	alive	alone
0	0	3	male	22.0	1	7.2500	S	Third	man	True	NaN	Southampton	no	False
1	1	1	female	38.0	1	71.2833	C	First	woman	False	C	Cherbourg	yes	False
2	1	3	female	26.0	0	7.9250	S	Third	woman	False	NaN	Southampton	yes	True
3	1	1	female	35.0	1	53.1000	S	First	woman	False	C	Southampton	yes	False
4	0	3	male	35.0	0	8.0500	S	Third	man	True	NaN	Southampton	no	True

In [4]:

df.bp.freq(['embarked'],style=True)

Out[4]:

	embarked	Count	Percent	Cumulative Count	Cumulative Percent
0	S	644	72.44%	644	72.44%
1	C	168	18.90%	812	91.34%
2	Q	77	8.66%	889	100.00%

In [5]:

df.bp.describe()

Out[5]:

	Feature	Type	N	Count	Unique	Missing	MissingPct	Zeros	ZerosPct	mean	std	min	max	25%	50%	75%
11	deck	category	891	203	7	688	77.22	0	0.00
3	age	float64	891	714	88	177	19.87	0	0.00	29.70	14.53	0.42	80.00	20.12	28.00	38.00
7	embarked	object	891	889	3	2	0.22	0	0.00
12	embark_town	object	891	889	3	2	0.22	0	0.00
5	parch	int64	891	891	7	0	0.00	678	76.09	0.38	0.81	0.00	6.00	0.00	0.00	0.00
4	sibsp	int64	891	891	7	0	0.00	608	68.24	0.52	1.10	0.00	8.00	0.00	0.00	1.00
0	survived	int64	891	891	2	0	0.00	549	61.62	0.38	0.49	0.00	1.00	0.00	0.00	1.00
10	adult_male	bool	891	891	2	0	0.00	354	39.73
14	alone	bool	891	891	2	0	0.00	354	39.73
6	fare	float64	891	891	248	0	0.00	15	1.68	32.20	49.69	0.00	512.33	7.91	14.45	31.00
1	pclass	int64	891	891	3	0	0.00	0	0.00	2.31	0.84	1.00	3.00	2.00	3.00	3.00
2	sex	object	891	891	2	0	0.00	0	0.00
8	class	category	891	891	3	0	0.00	0	0.00
9	who	object	891	891	3	0	0.00	0	0.00
13	alive	object	891	891	2	0	0.00	0	0.00

In [6]:

df.bp.get_duplicate_columns()

Out[6]:

[]

In [7]:

df.dtypes

Out[7]:

survived         int64
pclass           int64
sex             object
age            float64
sibsp            int64
parch            int64
fare           float64
embarked        object
class           object
who             object
adult_male        bool
deck            object
embark_town     object
alive           object
alone             bool
dtype: object

In [8]:

df1 = pd.DataFrame({'a': range(3),'b':range(1,4),'a_dup':range(3)})
df1

Out[8]:

	a	b	a_dup
0	0	1	0
1	1	2	1
2	2	3	2

In [9]:

df1.bp.get_duplicate_columns()

a == a_dup

Out[9]:

['a_dup']

In [10]:

df.bp.missing()

Missing values high threshold = 80%

Number of missing values features: 4
cols_missing_high = []
cols_missing_low = ['deck', 'age', 'embarked', 'embark_town']

Out[10]:

	Feature	Type	Count	Missing	Unique	MissingPct	count	mean	std	min	25%	50%	75%	max
11	deck	object	891	688	7	77.216611
3	age	float64	891	177	88	19.865320	714.000000	29.699118	14.526497	0.420000	20.125000	28.000000	38.000000	80.000000
7	embarked	object	891	2	3	0.224467
12	embark_town	object	891	2	3	0.224467

In [ ]:

Table of Contents

Description¶

Imports¶

Using plotly api in module bhishan¶