#!/usr/bin/env python
# coding: utf-8

# #### An example of handling missing values in data. We will use a modified version of the video store data  (__[Video_Store_3.csv](http://facweb.cs.depaul.edu/mobasher/classes/CSC478/data/Video_Store_3.csv)__) which contains some missing values.

# In[1]:


import numpy as np
import pandas as pd


# In[2]:


vstable = pd.read_csv("http://facweb.cs.depaul.edu/mobasher/classes/csc478/data/Video_Store_3.csv", index_col=0, na_values=["?"])

vstable.shape


# In[3]:


vstable.head(10)


# In[4]:


vstable.info()


# In[5]:


vstable.describe(include="all")


# In[6]:


vstable.isnull()[0:10]


# In[7]:


vstable[vstable.isnull().any(axis=1)]


# In[8]:


vstable[vstable.Gender.isnull()]


# In[9]:


vstable[vstable.Age.isnull()]


# In[10]:


age_mean = vstable.Age.mean()
vstable.Age.fillna(age_mean, axis=0, inplace=True)


# In[11]:


vstable.head(10)


# In[12]:


vstable.drop(vstable[vstable.Gender.isnull()].index, axis=0).head(10)


# In[13]:


vstable.head(10)


# #### To permanently remove the rows with NaN Gender values, use the "inplace" parameter in the "drop" function.

# In[14]:


vstable.drop(vstable[vstable.Gender.isnull()].index, axis=0, inplace=True)
vstable.head(10)


# #### It is also possible to use the "dropna" function to drop all rows that have one or more NaN values.

# In[15]:


vstable2 = pd.read_csv("http://facweb.cs.depaul.edu/mobasher/classes/csc478/data/Video_Store_3.csv", index_col=0, na_values=["?"])


# In[16]:


vstable2.head(10)


# In[17]:


vstable2.dropna(axis=0, inplace=True)
vstable2.shape


# In[18]:


vstable2.head(10)


# In[ ]: