import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv('winequality-white.csv',sep=';')
df.head()
df.shape
df.columns.values
df.info()
df.describe()
df.quality.unique()
df.quality.value_counts()
sns.heatmap(df.isnull(),cbar=False,yticklabels=False,cmap = 'viridis')
plt.figure(figsize=(6,4))
sns.heatmap(df.corr(),cmap='Blues',annot=False)
#Quality correlation matrix
k = 12 #number of variables for heatmap
cols = df.corr().nlargest(k, 'quality')['quality'].index
cm = df[cols].corr()
plt.figure(figsize=(10,6))
sns.heatmap(cm, annot=True, cmap = 'viridis')
l = df.columns.values
number_of_columns=12
number_of_rows = len(l)-1/number_of_columns
plt.figure(figsize=(number_of_columns,5*number_of_rows))
for i in range(0,len(l)):
plt.subplot(number_of_rows + 1,number_of_columns,i+1)
sns.set_style('whitegrid')
sns.boxplot(df[l[i]],color='green',orient='v')
plt.tight_layout()
plt.figure(figsize=(2*number_of_columns,5*number_of_rows))
for i in range(0,len(l)):
plt.subplot(number_of_rows + 1,number_of_columns,i+1)
sns.distplot(df[l[i]],kde=True)