import pandas as pd
import numpy as np
import plotly.plotly as py
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
df= pd.read_csv('StudentsPerformance.csv')
df.head()
gender | race/ethnicity | parental level of education | lunch | test preparation course | math score | reading score | writing score | |
---|---|---|---|---|---|---|---|---|
0 | female | group B | bachelor's degree | standard | none | 72 | 72 | 74 |
1 | female | group C | some college | standard | completed | 69 | 90 | 88 |
2 | female | group B | master's degree | standard | none | 90 | 95 | 93 |
3 | male | group A | associate's degree | free/reduced | none | 47 | 57 | 44 |
4 | male | group C | some college | standard | none | 76 | 78 | 75 |
df.isnull().sum()
gender 0 race/ethnicity 0 parental level of education 0 lunch 0 test preparation course 0 math score 0 reading score 0 writing score 0 dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1000 entries, 0 to 999 Data columns (total 8 columns): gender 1000 non-null object race/ethnicity 1000 non-null object parental level of education 1000 non-null object lunch 1000 non-null object test preparation course 1000 non-null object math score 1000 non-null int64 reading score 1000 non-null int64 writing score 1000 non-null int64 dtypes: int64(3), object(5) memory usage: 62.6+ KB
df['test preparation course'].value_counts()
none 642 completed 358 Name: test preparation course, dtype: int64
df_none= df[df['test preparation course']=='none']
df_completed=df[df['test preparation course']=='completed']
math_none=df_none['math score']
math_completed=df_completed['math score']
trace1 = go.Box(
y = math_none,
name = 'Not Completed the Course',
marker = dict(color = 'rgba(254,24,0,0.8)')
)
trace2 = go.Box(
y = math_completed,
name = 'Completed the Course',
marker = dict(color = 'rgba(5,140,20,0.8)')
)
data = [trace1,trace2]
layout= go.Layout(title='Math Score Comparison')
fig =go.Figure(data=data, layout=layout)
iplot(fig)
reading_none=df_none['reading score']
reading_completed=df_completed['reading score']
trace1 = go.Box(
y = reading_none,
name = 'Not Completed the Course',
marker = dict(color = 'rgba(254,24,0,0.8)')
)
trace2 = go.Box(
y = reading_completed,
name = 'Completed the Course',
marker = dict(color = 'rgba(5,140,20,0.8)')
)
data = [trace1,trace2]
layout= go.Layout(title='Reading Score Comparison')
fig =go.Figure(data=data, layout=layout)
iplot(fig)
writing_none=df_none['writing score']
writing_completed=df_completed['writing score']
trace1 = go.Box(
y = writing_none,
name = 'Not Completed the Course',
marker = dict(color = 'rgba(254,24,0,0.8)')
)
trace2 = go.Box(
y = writing_completed,
name = 'Completed the Course',
marker = dict(color = 'rgba(5,140,20,0.8)')
)
data = [trace1,trace2]
layout= go.Layout(title='Writing Score Comparison')
fig =go.Figure(data=data, layout=layout)
iplot(fig)
df.head()
gender | race/ethnicity | parental level of education | lunch | test preparation course | math score | reading score | writing score | |
---|---|---|---|---|---|---|---|---|
0 | female | group B | bachelor's degree | standard | none | 72 | 72 | 74 |
1 | female | group C | some college | standard | completed | 69 | 90 | 88 |
2 | female | group B | master's degree | standard | none | 90 | 95 | 93 |
3 | male | group A | associate's degree | free/reduced | none | 47 | 57 | 44 |
4 | male | group C | some college | standard | none | 76 | 78 | 75 |
df['race/ethnicity'].value_counts()
group C 319 group D 262 group B 190 group E 140 group A 89 Name: race/ethnicity, dtype: int64
race=df.groupby('race/ethnicity')
racedf=race.mean()
trace1= go.Bar(x= racedf.index,
y= racedf['math score'],
name='Math')
trace2= go.Bar(x= racedf.index,
y= racedf['reading score'],
name='Reading')
trace3= go.Bar(x= racedf.index,
y= racedf['writing score'],
name='Writing')
data=[trace1,trace2,trace3]
layout=go.Layout(title='Average Score for different Race/Ethnicity')
fig= go.Figure(data=data, layout=layout)
iplot(fig)
df.head()
gender | race/ethnicity | parental level of education | lunch | test preparation course | math score | reading score | writing score | |
---|---|---|---|---|---|---|---|---|
0 | female | group B | bachelor's degree | standard | none | 72 | 72 | 74 |
1 | female | group C | some college | standard | completed | 69 | 90 | 88 |
2 | female | group B | master's degree | standard | none | 90 | 95 | 93 |
3 | male | group A | associate's degree | free/reduced | none | 47 | 57 | 44 |
4 | male | group C | some college | standard | none | 76 | 78 | 75 |
#scatter plot
df_marks = df.iloc[:,5:]
import plotly.figure_factory as ff
df_marks['index'] = np.arange(1,len(df_marks)+1)
fig = ff.create_scatterplotmatrix(df_marks, diag ='box', index = 'index' , colormap ='Portland',
colormap_type = 'seq', height = 700, width = 700)
iplot(fig)
df.head()
gender | race/ethnicity | parental level of education | lunch | test preparation course | math score | reading score | writing score | |
---|---|---|---|---|---|---|---|---|
0 | female | group B | bachelor's degree | standard | none | 72 | 72 | 74 |
1 | female | group C | some college | standard | completed | 69 | 90 | 88 |
2 | female | group B | master's degree | standard | none | 90 | 95 | 93 |
3 | male | group A | associate's degree | free/reduced | none | 47 | 57 | 44 |
4 | male | group C | some college | standard | none | 76 | 78 | 75 |
gender_lunch=df.groupby(['gender','lunch'])
gender_lunch_count=gender_lunch.size()
trace= go.Pie(values=gender_lunch_count.values,
labels=gender_lunch_count.index)
data=[trace]
iplot(data)
df.head()
gender | race/ethnicity | parental level of education | lunch | test preparation course | math score | reading score | writing score | |
---|---|---|---|---|---|---|---|---|
0 | female | group B | bachelor's degree | standard | none | 72 | 72 | 74 |
1 | female | group C | some college | standard | completed | 69 | 90 | 88 |
2 | female | group B | master's degree | standard | none | 90 | 95 | 93 |
3 | male | group A | associate's degree | free/reduced | none | 47 | 57 | 44 |
4 | male | group C | some college | standard | none | 76 | 78 | 75 |
gender_lunch_mean=gender_lunch.mean()
gender_lunch_mean.index[0]
('female', 'free/reduced')
new_data=[]
for i in gender_lunch_mean.index:
new_data.append(' '.join(i))
new_data
['female free/reduced', 'female standard', 'male free/reduced', 'male standard']
trace1= go.Bar(x= new_data,
y= gender_lunch_mean['math score'],
name='Math')
trace2= go.Bar(x= new_data,
y= gender_lunch_mean['reading score'],
name='Reading')
trace3= go.Bar(x= new_data,
y= gender_lunch_mean['writing score'],
name='Writing')
data=[trace1,trace2,trace3]
layout=go.Layout(title='Average Score for Gender/Lunch Catagories')
fig= go.Figure(data=data, layout=layout)
iplot(fig)
df.head()
gender | race/ethnicity | parental level of education | lunch | test preparation course | math score | reading score | writing score | |
---|---|---|---|---|---|---|---|---|
0 | female | group B | bachelor's degree | standard | none | 72 | 72 | 74 |
1 | female | group C | some college | standard | completed | 69 | 90 | 88 |
2 | female | group B | master's degree | standard | none | 90 | 95 | 93 |
3 | male | group A | associate's degree | free/reduced | none | 47 | 57 | 44 |
4 | male | group C | some college | standard | none | 76 | 78 | 75 |
df['parental level of education'].value_counts()
some college 226 associate's degree 222 high school 196 some high school 179 bachelor's degree 118 master's degree 59 Name: parental level of education, dtype: int64
trace1= go.Heatmap(x=df['parental level of education'], y=df['test preparation course'], z=df['math score'].values.tolist())
trace2= go.Heatmap(x=df['parental level of education'], y=df['test preparation course'], z=df['reading score'].values.tolist())
trace3= go.Heatmap(x=df['parental level of education'], y=df['test preparation course'], z=df['writing score'].values.tolist())
data=[trace1]
layout=go.Layout(title='Math Score')
fig= go.Figure(data=data, layout=layout)
iplot(fig)
data=[trace2]
layout=go.Layout(title='Reading Score')
fig= go.Figure(data=data, layout=layout)
iplot(fig)
data=[trace3]
layout=go.Layout(title='Writing Score')
fig= go.Figure(data=data, layout=layout)
iplot(fig)
df.head()
gender | race/ethnicity | parental level of education | lunch | test preparation course | math score | reading score | writing score | |
---|---|---|---|---|---|---|---|---|
0 | female | group B | bachelor's degree | standard | none | 72 | 72 | 74 |
1 | female | group C | some college | standard | completed | 69 | 90 | 88 |
2 | female | group B | master's degree | standard | none | 90 | 95 | 93 |
3 | male | group A | associate's degree | free/reduced | none | 47 | 57 | 44 |
4 | male | group C | some college | standard | none | 76 | 78 | 75 |
fig = {
"data": [
{
"type": 'violin',
"x": df['race/ethnicity'] [ df['lunch'] == 'standard' ],
"y": df['math score'] [ df['lunch'] == 'standard' ],
"legendgroup": 'Yes',
"scalegroup": 'Yes',
"name": 'Standard Lunch',
"side": 'negative',
"box": {
"visible": True
},
"meanline": {
"visible": True
},
"line": {
"color": 'blue'
}
},
{
"type": 'violin',
"x": df['race/ethnicity'] [ df['lunch'] == 'free/reduced' ],
"y": df['math score'] [ df['lunch'] == 'free/reduced' ],
"legendgroup": 'No',
"scalegroup": 'No',
"name": 'Free/Reduced Lunch',
"side": 'positive',
"box": {
"visible": True
},
"meanline": {
"visible": True
},
"line": {
"color": 'green'
}
}
],
"layout" : {"title":"Violin Plot For Math Scores",
"yaxis": {
"zeroline": False,
},
"violingap": 0,
"violinmode": "overlay"
}
}
iplot(fig)
fig = {
"data": [
{
"type": 'violin',
"x": df['race/ethnicity'] [ df['lunch'] == 'standard' ],
"y": df['reading score'] [ df['lunch'] == 'standard' ],
"legendgroup": 'Yes',
"scalegroup": 'Yes',
"name": 'Standard Lunch',
"side": 'negative',
"box": {
"visible": True
},
"meanline": {
"visible": True
},
"line": {
"color": 'blue'
}
},
{
"type": 'violin',
"x": df['race/ethnicity'] [ df['lunch'] == 'free/reduced' ],
"y": df['reading score'] [ df['lunch'] == 'free/reduced' ],
"legendgroup": 'No',
"scalegroup": 'No',
"name": 'Free/Reduced Lunch',
"side": 'positive',
"box": {
"visible": True
},
"meanline": {
"visible": True
},
"line": {
"color": 'green'
}
}
],
"layout" : {"title":"Violin Plot For Reading Scores",
"yaxis": {
"zeroline": False,
},
"violingap": 0,
"violinmode": "overlay"
}
}
iplot(fig)
fig = {
"data": [
{
"type": 'violin',
"x": df['race/ethnicity'] [ df['lunch'] == 'standard' ],
"y": df['writing score'] [ df['lunch'] == 'standard' ],
"legendgroup": 'Yes',
"scalegroup": 'Yes',
"name": 'Standard Lunch',
"side": 'negative',
"box": {
"visible": True
},
"meanline": {
"visible": True
},
"line": {
"color": 'blue'
}
},
{
"type": 'violin',
"x": df['race/ethnicity'] [ df['lunch'] == 'free/reduced' ],
"y": df['writing score'] [ df['lunch'] == 'free/reduced' ],
"legendgroup": 'No',
"scalegroup": 'No',
"name": 'Free/Reduced Lunch',
"side": 'positive',
"box": {
"visible": True
},
"meanline": {
"visible": True
},
"line": {
"color": 'green'
}
}
],
"layout" : {"title":"Violin Plot For Writing Scores",
"yaxis": {
"zeroline": False,
},
"violingap": 0,
"violinmode": "overlay"
}
}
iplot(fig)