# install
# pip install seaborn
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline
# setting sns default
sns.set()
sns.set_style('darkgrid')
X = np.random.random_integers(10, 100, 15)
plt.plot(X)
[<matplotlib.lines.Line2D at 0xb4267b8>]
** Getting Data and Preprocessing **
names = [
'mpg'
, 'cylinders'
, 'displacement'
, 'horsepower'
, 'weight'
, 'acceleration'
, 'model_year'
, 'origin'
, 'car_name'
]
# reading the file and assigning the header
df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data", sep='\s+', names=names)
df['maker'] = df.car_name.map(lambda x: x.split()[0])
df.origin = df.origin.map({1: 'America', 2: 'Europe', 3: 'Asia'})
df=df.applymap(lambda x: np.nan if x == '?' else x).dropna()
df['horsepower'] = df.horsepower.astype(float)
df.head()
mpg | cylinders | displacement | horsepower | weight | acceleration | model_year | origin | car_name | maker | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 18.0 | 8 | 307.0 | 130.0 | 3504.0 | 12.0 | 70 | America | chevrolet chevelle malibu | chevrolet |
1 | 15.0 | 8 | 350.0 | 165.0 | 3693.0 | 11.5 | 70 | America | buick skylark 320 | buick |
2 | 18.0 | 8 | 318.0 | 150.0 | 3436.0 | 11.0 | 70 | America | plymouth satellite | plymouth |
3 | 16.0 | 8 | 304.0 | 150.0 | 3433.0 | 12.0 | 70 | America | amc rebel sst | amc |
4 | 17.0 | 8 | 302.0 | 140.0 | 3449.0 | 10.5 | 70 | America | ford torino | ford |
sns.factorplot(data=df, x="model_year", y="mpg")
<seaborn.axisgrid.FacetGrid at 0xaa593c8>
sns.factorplot(data=df, x="model_year", y="mpg", col="origin")
<seaborn.axisgrid.FacetGrid at 0xb6fda20>