This is an example tutorial to use my module bhishan for the plotly extension for pandas.
import numpy as np
import pandas as pd
import seaborn as sns
import bhishan
from bhishan import bp
import matplotlib.pyplot as plt
%load_ext autoreload
%load_ext watermark
%autoreload 2
%watermark -a "Bhishan Poudel" -d -v -m
%watermark -iv
Bhishan Poudel 2020-09-28 CPython 3.7.7 IPython 7.18.1 compiler : Clang 4.0.1 (tags/RELEASE_401/final) system : Darwin release : 19.6.0 machine : x86_64 processor : i386 CPU cores : 4 interpreter: 64bit pandas 1.1.0 bhishan 0.3.1 autopep8 1.5.2 seaborn 0.11.0 json 2.0.9 numpy 1.18.4
# print(sns.get_dataset_names())
df = sns.load_dataset('titanic')
df.head()
survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | Third | man | True | NaN | Southampton | no | False |
1 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | First | woman | False | C | Cherbourg | yes | False |
2 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S | Third | woman | False | NaN | Southampton | yes | True |
3 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S | First | woman | False | C | Southampton | yes | False |
4 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S | Third | man | True | NaN | Southampton | no | True |
df.bp.freq(['embarked'],style=True)
embarked | Count | Percent | Cumulative Count | Cumulative Percent | |
---|---|---|---|---|---|
0 | S | 644 | 72.44% | 644 | 72.44% |
1 | C | 168 | 18.90% | 812 | 91.34% |
2 | Q | 77 | 8.66% | 889 | 100.00% |
df.bp.describe()
Feature | Type | N | Count | Unique | Missing | MissingPct | Zeros | ZerosPct | mean | std | min | max | 25% | 50% | 75% | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
11 | deck | category | 891 | 203 | 7 | 688 | 77.22 | 0 | 0.00 | |||||||
3 | age | float64 | 891 | 714 | 88 | 177 | 19.87 | 0 | 0.00 | 29.70 | 14.53 | 0.42 | 80.00 | 20.12 | 28.00 | 38.00 |
7 | embarked | object | 891 | 889 | 3 | 2 | 0.22 | 0 | 0.00 | |||||||
12 | embark_town | object | 891 | 889 | 3 | 2 | 0.22 | 0 | 0.00 | |||||||
5 | parch | int64 | 891 | 891 | 7 | 0 | 0.00 | 678 | 76.09 | 0.38 | 0.81 | 0.00 | 6.00 | 0.00 | 0.00 | 0.00 |
4 | sibsp | int64 | 891 | 891 | 7 | 0 | 0.00 | 608 | 68.24 | 0.52 | 1.10 | 0.00 | 8.00 | 0.00 | 0.00 | 1.00 |
0 | survived | int64 | 891 | 891 | 2 | 0 | 0.00 | 549 | 61.62 | 0.38 | 0.49 | 0.00 | 1.00 | 0.00 | 0.00 | 1.00 |
10 | adult_male | bool | 891 | 891 | 2 | 0 | 0.00 | 354 | 39.73 | |||||||
14 | alone | bool | 891 | 891 | 2 | 0 | 0.00 | 354 | 39.73 | |||||||
6 | fare | float64 | 891 | 891 | 248 | 0 | 0.00 | 15 | 1.68 | 32.20 | 49.69 | 0.00 | 512.33 | 7.91 | 14.45 | 31.00 |
1 | pclass | int64 | 891 | 891 | 3 | 0 | 0.00 | 0 | 0.00 | 2.31 | 0.84 | 1.00 | 3.00 | 2.00 | 3.00 | 3.00 |
2 | sex | object | 891 | 891 | 2 | 0 | 0.00 | 0 | 0.00 | |||||||
8 | class | category | 891 | 891 | 3 | 0 | 0.00 | 0 | 0.00 | |||||||
9 | who | object | 891 | 891 | 3 | 0 | 0.00 | 0 | 0.00 | |||||||
13 | alive | object | 891 | 891 | 2 | 0 | 0.00 | 0 | 0.00 |
df.bp.get_duplicate_columns()
[]
df.dtypes
survived int64 pclass int64 sex object age float64 sibsp int64 parch int64 fare float64 embarked object class object who object adult_male bool deck object embark_town object alive object alone bool dtype: object
df1 = pd.DataFrame({'a': range(3),'b':range(1,4),'a_dup':range(3)})
df1
a | b | a_dup | |
---|---|---|---|
0 | 0 | 1 | 0 |
1 | 1 | 2 | 1 |
2 | 2 | 3 | 2 |
df1.bp.get_duplicate_columns()
a == a_dup
['a_dup']
df.bp.missing()
Missing values high threshold = 80% Number of missing values features: 4 cols_missing_high = [] cols_missing_low = ['deck', 'age', 'embarked', 'embark_town']
Feature | Type | Count | Missing | Zeros | Unique | MissingPct | ZerosPct | count | mean | std | min | 25% | 50% | 75% | max | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
11 | deck | object | 891 | 688 | 0 | 7 | 77.216611 | 0.000000 | ||||||||
3 | age | float64 | 891 | 177 | 0 | 88 | 19.865320 | 0.000000 | 714.000000 | 29.699118 | 14.526497 | 0.420000 | 20.125000 | 28.000000 | 38.000000 | 80.000000 |
7 | embarked | object | 891 | 2 | 0 | 3 | 0.224467 | 0.000000 | ||||||||
12 | embark_town | object | 891 | 2 | 0 | 3 | 0.224467 | 0.000000 |