Basic Plots¶

In [2]:

x = np.linspace(0, 10, 50)
sines = np.sin(x)

plt.plot(x, sines)
plt.show()

In [3]:

plt.plot(x, sines, "o")
plt.show()
# use plt.plot to get color / marker abbreviations

In [4]:

# Rapid multiplot
cosines = np.cos(x)
plt.plot(x, sines, "-b", x, sines, "ob", x, cosines, "-r", x, cosines, "or")
plt.xlabel("This is X")
plt.ylabel("This is Y")
plt.title("Sin and Cosine plot")
plt.show()

In [5]:

# Step by step
plt.plot(x, sines, label='sinus', color='blue',             linestyle='--', linewidth=2)

plt.plot(x, cosines, label='cosinus', color='red', linestyle='-', linewidth=2)
plt.legend()
plt.show()

Scatter (2D) plots¶

In [6]:

try:
    salary = pd.reat("../datasets/salary_table.csv")
except:
    url = 'https://raw.githubusercontent.com/neurospin/pystatsml/master/datasets/salary_table.csv'

salary = pd.read_csv(url)
df = salary

Simple scatter with colors¶

In [7]:

colors = colors_edu = {"Bachelor": "r",'Master':'g', 'Ph.D':'blue'}
plt.scatter(df['experience'], df['salary'], c=df['education'].apply(lambda x: colors[x]), s=100)
plt.show()

Scatter plot with colors and symbols¶

In [8]:

# list(salary.groupby(['education', 'management']))

In [9]:

plt.figure(figsize=(6, 5))

symbols_manag = dict(Y="*", N=".")
colors_edu = {'Bachelor':'r', 'Master':'g', 'Ph.D':'b'}

## group by education x management => 6 groups
for values, d in salary.groupby(['education', 'management']):
    edu, manager = values
    # print(values, d)
    plt.scatter(d['experience'], d['salary'], marker=symbols_manag[manager], color=colors_edu[edu], s=150,  label=manager+"/"+edu)


## Set labels
plt.xlabel('Experience')
plt.ylabel('Salary')
plt.legend(loc=4) # lower right
plt.show()

Seaborn¶

Boxplot¶

Box plots are non-parametric: they display variation in samples of a statistical population without making any assumptions of the underlying statistical distribution

In [10]:

sns.boxplot(x='education', y='salary', hue='management', data=salary)
plt.show()

# https://datascience.stackexchange.com/questions/46117/meaning-of-hue-in-seaborn-barplot

In [11]:

sns.boxplot(x="management", y="salary", hue="education", data=salary)
sns.stripplot(x="management", y="salary", hue="education", data=salary, jitter=True, dodge=True, linewidth=1)
plt.show()

Density Plots¶

In [12]:

# list(salary.groupby(['education']))

f, axes = plt.subplots(3, 1, figsize=(9, 9), sharex=True)

i = 0

for edu, d in salary.groupby(['education']):
    sns.distplot(d.salary[d.management == 'Y'], color='b', bins=10, label="Manager", ax=axes[i])
    sns.distplot(d.salary[d.management == "N"], color='r', bins=10, label="Employee", ax=axes[i])
    axes[i].set_title(edu)
    axes[i].set_ylabel("Density")
    i += 1

plt.tight_layout(pad=2.0)
ax = plt.legend()
plt.show()

Violin plot(distribution)¶

Combines a boxplot with the kernel density estimation procedure

In [13]:

ax = sns.violinplot(x="salary", data=salary)
# https://www.geeksforgeeks.org/violin-plot-for-data-analysis

In [14]:

ax = sns.violinplot(x="salary", data=salary, bw=.15)

In [15]:

ax = sns.violinplot(x="management", y="salary", hue="education", data=salary)

In [16]:

# tips dataset

In [17]:

tips = sns.load_dataset("tips")
tips.head()

Out[17]:

	total_bill	tip	sex	smoker	day	time	size
0	16.99	1.01	Female	No	Sun	Dinner	2
1	10.34	1.66	Male	No	Sun	Dinner	3
2	21.01	3.50	Male	No	Sun	Dinner	3
3	23.68	3.31	Male	No	Sun	Dinner	2
4	24.59	3.61	Female	No	Sun	Dinner	4

In [18]:

ax = sns.violinplot(x=tips["total_bill"], bw=.2)

In [19]:

# group by day

ax = sns.violinplot(x="day", y="total_bill", data=tips, palette="muted")

In [20]:

# Group by day and color by time (lunch vs dinner)

ax = sns.violinplot(x="day", y="total_bill", hue="time", data=tips, palette="muted", split=True)

Pairwise scatter plots¶

In [21]:

g = sns.PairGrid(salary, hue='management')
g.map_diag(plt.hist)
g.map_offdiag(plt.scatter)
ax = g.add_legend()

Table of Contents