from __future__ import print_function
import sys,tempfile, urllib, os
import pandas as pd
import numpy as np
BASE_DIR = '/tmp'
OUTPUT_FILE = os.path.join(BASE_DIR, 'churn_data.csv')
churn_data=urllib.request.urlretrieve('https://raw.githubusercontent.com/srivatsan88/YouTubeLI/master/dataset/WA_Fn-UseC_-Telco-Customer-Churn.csv', OUTPUT_FILE)
customerID - Customer ID
gender - Whether the customer is a male or a female
SeniorCitizen - Whether the customer is a senior citizen or not (1, 0)
Partner - Whether the customer has a partner or not (Yes, No)
Dependents - Whether the customer has dependents or not (Yes, No)
tenure - Number of months the customer has stayed with the company
PhoneService - Whether the customer has a phone service or not (Yes, No)
MultipleLines - Whether the customer has multiple lines or not (Yes, No, No phone service)
InternetService - Customer’s internet service provider (DSL, Fiber optic, No)
OnlineSecurity - Whether the customer has online security or not (Yes, No, No internet service)
OnlineBackup - Whether the customer has online backup or not (Yes, No, No internet service)
DeviceProtection - Whether the customer has device protection or not (Yes, No, No internet service)
TechSupport - Whether the customer has tech support or not (Yes, No, No internet service)
StreamingTV - Whether the customer has streaming TV or not (Yes, No, No internet service)
StreamingMovies - Whether the customer has streaming movies or not (Yes, No, No internet service)
Contract - The contract term of the customer (Month-to-month, One year, Two year)
PaperlessBilling - Whether the customer has paperless billing or not (Yes, No)
PaymentMethod - The customer’s payment method (Electronic check, Mailed check, Bank transfer (automatic), Credit card (automatic))
MonthlyCharges - The amount charged to the customer monthly
TotalCharges - The total amount charged to the customer
Churn - Whether the customer churned or not (Yes or No)
churn_master_df = pd.read_csv(OUTPUT_FILE)
churn_df = churn_master_df
churn_df.head()
customerID | gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 7590-VHVEG | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
1 | 5575-GNVDE | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No |
2 | 3668-QPYBK | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
3 | 7795-CFOCW | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
4 | 9237-HQITU | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
churn_df.replace(r'^\s*$', np.nan, regex=True).isna().sum()
customerID 0 gender 0 SeniorCitizen 0 Partner 0 Dependents 0 tenure 0 PhoneService 0 MultipleLines 0 InternetService 0 OnlineSecurity 0 OnlineBackup 0 DeviceProtection 0 TechSupport 0 StreamingTV 0 StreamingMovies 0 Contract 0 PaperlessBilling 0 PaymentMethod 0 MonthlyCharges 0 TotalCharges 11 Churn 0 dtype: int64
churn_df = churn_df.replace(r'^\s*$', np.nan, regex=True)
print ("Rows : " ,churn_df.shape[0])
print ("Columns : " ,churn_df.shape[1])
print ("\nFeatures : \n" ,churn_df.columns.tolist())
print ("\nMissing values : ", churn_df.isnull().any())
print ("\nUnique values : \n",churn_df.nunique())
Rows : 7043 Columns : 21 Features : ['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn'] Missing values : customerID False gender False SeniorCitizen False Partner False Dependents False tenure False PhoneService False MultipleLines False InternetService False OnlineSecurity False OnlineBackup False DeviceProtection False TechSupport False StreamingTV False StreamingMovies False Contract False PaperlessBilling False PaymentMethod False MonthlyCharges False TotalCharges True Churn False dtype: bool Unique values : customerID 7043 gender 2 SeniorCitizen 2 Partner 2 Dependents 2 tenure 73 PhoneService 2 MultipleLines 3 InternetService 3 OnlineSecurity 3 OnlineBackup 3 DeviceProtection 3 TechSupport 3 StreamingTV 3 StreamingMovies 3 Contract 3 PaperlessBilling 2 PaymentMethod 4 MonthlyCharges 1585 TotalCharges 6530 Churn 2 dtype: int64
!pip install autoviz
Requirement already satisfied: autoviz in /usr/local/lib/python3.6/dist-packages (0.0.68) Requirement already satisfied: ipython in /usr/local/lib/python3.6/dist-packages (from autoviz) (5.5.0) Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from autoviz) (0.25.3) Requirement already satisfied: jupyter in /usr/local/lib/python3.6/dist-packages (from autoviz) (1.0.0) Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from autoviz) (3.1.3) Requirement already satisfied: seaborn in /usr/local/lib/python3.6/dist-packages (from autoviz) (0.10.0) Requirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from autoviz) (0.22.1) Requirement already satisfied: pickleshare in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (0.7.5) Requirement already satisfied: simplegeneric>0.8 in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (0.8.1) Requirement already satisfied: prompt-toolkit<2.0.0,>=1.0.4 in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (1.0.18) Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (4.3.3) Requirement already satisfied: decorator in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (4.4.1) Requirement already satisfied: pygments in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (2.1.3) Requirement already satisfied: pexpect; sys_platform != "win32" in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (4.8.0) Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (45.2.0) Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->autoviz) (2018.9) Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from pandas->autoviz) (1.17.5) Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas->autoviz) (2.6.1) Requirement already satisfied: notebook in /usr/local/lib/python3.6/dist-packages (from jupyter->autoviz) (5.2.2) Requirement already satisfied: ipykernel in /usr/local/lib/python3.6/dist-packages (from jupyter->autoviz) (4.6.1) Requirement already satisfied: nbconvert in /usr/local/lib/python3.6/dist-packages (from jupyter->autoviz) (5.6.1) Requirement already satisfied: qtconsole in /usr/local/lib/python3.6/dist-packages (from jupyter->autoviz) (4.7.1) Requirement already satisfied: ipywidgets in /usr/local/lib/python3.6/dist-packages (from jupyter->autoviz) (7.5.1) Requirement already satisfied: jupyter-console in /usr/local/lib/python3.6/dist-packages (from jupyter->autoviz) (5.2.0) Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->autoviz) (2.4.6) Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->autoviz) (0.10.0) Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->autoviz) (1.1.0) Requirement already satisfied: scipy>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from seaborn->autoviz) (1.4.1) Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->autoviz) (0.14.1) Requirement already satisfied: wcwidth in /usr/local/lib/python3.6/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->ipython->autoviz) (0.1.8) Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->ipython->autoviz) (1.12.0) Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.6/dist-packages (from traitlets>=4.2->ipython->autoviz) (0.2.0) Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.6/dist-packages (from pexpect; sys_platform != "win32"->ipython->autoviz) (0.6.0) Requirement already satisfied: jupyter-core in /usr/local/lib/python3.6/dist-packages (from notebook->jupyter->autoviz) (4.6.2) Requirement already satisfied: tornado>=4 in /usr/local/lib/python3.6/dist-packages (from notebook->jupyter->autoviz) (4.5.3) Requirement already satisfied: jinja2 in /usr/local/lib/python3.6/dist-packages (from notebook->jupyter->autoviz) (2.11.1) Requirement already satisfied: nbformat in /usr/local/lib/python3.6/dist-packages (from notebook->jupyter->autoviz) (5.0.4) Requirement already satisfied: jupyter-client in /usr/local/lib/python3.6/dist-packages (from notebook->jupyter->autoviz) (5.3.4) Requirement already satisfied: terminado>=0.3.3; sys_platform != "win32" in /usr/local/lib/python3.6/dist-packages (from notebook->jupyter->autoviz) (0.8.3) Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.6/dist-packages (from nbconvert->jupyter->autoviz) (0.3) Requirement already satisfied: testpath in /usr/local/lib/python3.6/dist-packages (from nbconvert->jupyter->autoviz) (0.4.4) Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.6/dist-packages (from nbconvert->jupyter->autoviz) (0.8.4) Requirement already satisfied: bleach in /usr/local/lib/python3.6/dist-packages (from nbconvert->jupyter->autoviz) (3.1.0) Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.6/dist-packages (from nbconvert->jupyter->autoviz) (1.4.2) Requirement already satisfied: defusedxml in /usr/local/lib/python3.6/dist-packages (from nbconvert->jupyter->autoviz) (0.6.0) Requirement already satisfied: qtpy in /usr/local/lib/python3.6/dist-packages (from qtconsole->jupyter->autoviz) (1.9.0) Requirement already satisfied: widgetsnbextension~=3.5.0 in /usr/local/lib/python3.6/dist-packages (from ipywidgets->jupyter->autoviz) (3.5.1) Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from jinja2->notebook->jupyter->autoviz) (1.1.1) Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /usr/local/lib/python3.6/dist-packages (from nbformat->notebook->jupyter->autoviz) (2.6.0) Requirement already satisfied: pyzmq>=13 in /usr/local/lib/python3.6/dist-packages (from jupyter-client->notebook->jupyter->autoviz) (17.0.0) Requirement already satisfied: webencodings in /usr/local/lib/python3.6/dist-packages (from bleach->nbconvert->jupyter->autoviz) (0.5.1)
from autoviz.AutoViz_Class import AutoViz_Class
AV = AutoViz_Class()
Imported AutoViz_Class version: 0.0.68. Call using: from autoviz.AutoViz_Class import AutoViz_Class AV = AutoViz_Class() AutoViz(filename, sep=',', depVar='', dfte=None, header=0, verbose=0, lowess=False,chart_format='svg',max_rows_analyzed=150000,max_cols_analyzed=30) To remove previous versions, perform 'pip uninstall autoviz'
dft = AV.AutoViz("/tmp/churn_data.csv", sep=",", depVar="", dfte=None, header=1, verbose=1,
lowess=False,chart_format='svg',max_rows_analyzed=7500,max_cols_analyzed=50)
Shape of your Data Set: (7043, 21) Classifying variables in data set... Number of Numeric Columns = 1 Number of Integer-Categorical Columns = 1 Number of String-Categorical Columns = 10 Number of Factor-Categorical Columns = 0 Number of String-Boolean Columns = 6 Number of Numeric-Boolean Columns = 1 Number of Discrete String Columns = 1 Number of NLP String Columns = 0 Number of Date Time Columns = 0 Number of ID Columns = 1 Number of Columns to Delete = 0 21 Predictors classified... This does not include the Target column(s) 2 variables removed since they were ID or low-information variables List of variables removed: ['customerID', 'TotalCharges'] Data Set Shape: 7043 rows, 19 cols Data Set columns info: * gender: 0 nulls, 2 unique vals, most common: {'Male': 3555, 'Female': 3488} * SeniorCitizen: 0 nulls, 2 unique vals, most common: {0: 5901, 1: 1142} * Partner: 0 nulls, 2 unique vals, most common: {'No': 3641, 'Yes': 3402} * Dependents: 0 nulls, 2 unique vals, most common: {'No': 4933, 'Yes': 2110} * tenure: 0 nulls, 73 unique vals, most common: {1: 613, 72: 362} * PhoneService: 0 nulls, 2 unique vals, most common: {'Yes': 6361, 'No': 682} * MultipleLines: 0 nulls, 3 unique vals, most common: {'No': 3390, 'Yes': 2971} * InternetService: 0 nulls, 3 unique vals, most common: {'Fiber optic': 3096, 'DSL': 2421} * OnlineSecurity: 0 nulls, 3 unique vals, most common: {'No': 3498, 'Yes': 2019} * OnlineBackup: 0 nulls, 3 unique vals, most common: {'No': 3088, 'Yes': 2429} * DeviceProtection: 0 nulls, 3 unique vals, most common: {'No': 3095, 'Yes': 2422} * TechSupport: 0 nulls, 3 unique vals, most common: {'No': 3473, 'Yes': 2044} * StreamingTV: 0 nulls, 3 unique vals, most common: {'No': 2810, 'Yes': 2707} * StreamingMovies: 0 nulls, 3 unique vals, most common: {'No': 2785, 'Yes': 2732} * Contract: 0 nulls, 3 unique vals, most common: {'Month-to-month': 3875, 'Two year': 1695} * PaperlessBilling: 0 nulls, 2 unique vals, most common: {'Yes': 4171, 'No': 2872} * PaymentMethod: 0 nulls, 4 unique vals, most common: {'Electronic check': 2365, 'Mailed check': 1612} * MonthlyCharges: 0 nulls, 1585 unique vals, most common: {20.05: 61, 19.85: 45} * Churn: 0 nulls, 2 unique vals, most common: {'No': 5174, 'Yes': 1869} ------ Columns to delete: ' []' Boolean variables %s (" ['gender', 'Partner', 'Dependents', 'PhoneService', 'PaperlessBilling', " "'Churn', 'SeniorCitizen']") Categorical variables %s (" ['MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', " "'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', " "'Contract', 'PaymentMethod', 'tenure', 'gender', 'Partner', 'Dependents', " "'PhoneService', 'PaperlessBilling', 'Churn', 'SeniorCitizen']") Continuous variables %s " ['MonthlyCharges']" Discrete string variables %s " ['TotalCharges']" Date and time variables %s ' []' ID variables %s " ['customerID']" Target variable %s ' ' Nothing to add Plot not being added
Nothing to add Plot not being added Number of Categorical and Continuous Vars exceeds limit, hence no Bar Plots Time to run AutoViz (in seconds) = 4.111 ###################### VISUALIZATION Completed ########################
dft = AV.AutoViz("/tmp/churn_data.csv", sep=",", depVar="Churn", dfte=None, header=0, verbose=0,
lowess=False,chart_format='svg',max_rows_analyzed=7500,max_cols_analyzed=50)
Shape of your Data Set: (7043, 21) Classifying variables in data set... 20 Predictors classified... This does not include the Target column(s) 2 variables removed since they were ID or low-information variables Nothing to add Plot not being added Nothing to add Plot not being added Number of Cat and Continuous Vars exceeds 50, hence no Pivot or Bar Charts Time to run AutoViz (in seconds) = 1.884