import plotly.graph_objs as go
import plotly.plotly as py
# from plotly import tools
from plotly.offline import init_notebook_mode
import plotly.offline as offline
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.colors import rgb2hex
init_notebook_mode(connected=False)
# tools.set_credentials_file(username='your_username', api_key='your_apikey')
survey_df = pd.read_csv('pythondevsurvey2017_raw_data.csv')
survey_df.columns = [c.lower() for c in survey_df.columns]
survey_df.head()
is python the main language you use for your current projects? | none:what other language(s) do you use? | java:what other language(s) do you use? | javascript:what other language(s) do you use? | c/c++:what other language(s) do you use? | php:what other language(s) do you use? | c#:what other language(s) do you use? | ruby:what other language(s) do you use? | bash / shell:what other language(s) do you use? | objective-c:what other language(s) do you use? | ... | technical support:which of the following best describes your job role(s)? | data analyst:which of the following best describes your job role(s)? | business analyst:which of the following best describes your job role(s)? | team lead:which of the following best describes your job role(s)? | product manager:which of the following best describes your job role(s)? | cio / ceo / cto:which of the following best describes your job role(s)? | systems analyst:which of the following best describes your job role(s)? | other - write in::which of the following best describes your job role(s)? | could you tell us your age range? | what country do you live in? | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Yes | NaN | NaN | JavaScript | NaN | PHP | NaN | NaN | Bash / Shell | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 60 or older | Italy |
1 | Yes | NaN | NaN | JavaScript | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | Team lead | NaN | NaN | NaN | NaN | 40-49 | United Kingdom |
2 | Yes | NaN | NaN | JavaScript | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 40-49 | France |
3 | No, I don’t use Python for my current projects | NaN | NaN | NaN | NaN | NaN | C# | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 17 or younger | Spain |
4 | Yes | NaN | Java | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 18-20 | Israel |
5 rows × 162 columns
def find_cols(df, kws):
'''找到 df 中含有 kws 的列'''
return [item for item in df.columns if all ([w in item for w in kws])]
# mode 可以是 ['lines', 'markers', 'text'] 三者的任意组合
# 颜色支持 rgb 和 十六进制格式
# 对于大数据量可以使用 go.Scattergl 进行绘制
trace1 = go.Scatter(x=np.arange(100), y = np.random.rand(100), mode='lines+markers',
marker={
'color': 'red'
})
data = [trace1]
layout = {
'title': '散点+折线'
}
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig, show_link=False)
offline.iplot(fig, show_link=False)
python_version = survey_df['which version of python do you use the most?']
counts = python_version.value_counts()
counts
Python 3 6046 Python 2 2066 Name: which version of python do you use the most?, dtype: int64
labels = counts.index
values = counts.values
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
trace = go.Pie(labels=labels, values=values,
marker={
'colors': colors
},
rotation=0,
hoverinfo='label+value')
data = [trace]
layout = {
'title': 'Python 2 VS Python 3'
}
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
python_da_ml = survey_df[['machine learning:\xa0what do you use python for?', 'data analysis:\xa0what do you use python for?', 'which version of python do you use the most?']]
python_da = pd.crosstab(python_da_ml['which version of python do you use the most?'], python_da_ml['data analysis:\xa0what do you use python for?'], normalize=True)
python_ml = pd.crosstab(python_da_ml['which version of python do you use the most?'], python_da_ml['machine learning:\xa0what do you use python for?'], normalize=True)
da_ml = pd.concat([python_da, python_ml], axis=1)
da_ml
Data analysis | Machine learning | |
---|---|---|
which version of python do you use the most? | ||
Python 2 | 0.233177 | 0.193548 |
Python 3 | 0.766823 | 0.806452 |
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
python2 = go.Bar(x=da_ml.columns, y=da_ml.loc['Python 2'], name='Python 2', marker={'color': colors[0]})
python3 = go.Bar(x=da_ml.columns, y=da_ml.loc['Python 3'], name='Python 3', marker={'color': colors[1]})
data = [python2, python3]
# go.FigureWidget(data)
fig = go.Figure(data=data)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
cols = find_cols(survey_df, 'what framework(s) do you use in addition to python?')
frameworks = survey_df[cols[1:]]
count_df = frameworks.count().sort_values(ascending=False)
count_df.index = [item.split(':')[0] for item in count_df.index]
count_df.head()
django 3363 numpy / pandas / matplotlib / scipy and similar 3163 requests 2769 flask 2607 cloud platforms (google app engine, aws, rackspace, heroku and similar) 1960 dtype: int64
colors = [rgb2hex(i) for i in sns.color_palette('rainbow', len(count_df))]
trace = go.Bar(x=count_df.index, y=count_df.values, marker={'color': colors})
data = [trace]
layout = {'title': 'Framework Usage'}
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
# 对于 Y 轴刻度标签太长的情况,可以设置 layout 中 yaxis 的 automargin 属性为 True
# 也可以自定义 margin
trace = go.Bar(y=count_df.index[::-1], x=count_df.values[::-1], marker={'color': colors[::-1]}, orientation='h')
data = [trace]
layout = go.Layout(
title='Framework Usage',
margin={'r': 10},
height=1000,
yaxis={'automargin': True}
)
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
frameworks_pyver = frameworks.apply(lambda col: pd.crosstab(index=python_version, columns=col).iloc[:, 0])
frameworks_pyver = frameworks_pyver / frameworks_pyver.sum(axis=0)
frameworks_pyver.columns = [item.split(':')[0] for item in frameworks.columns]
frameworks_pyver
django | flask | tornado | bottle | web2py | numpy / pandas / matplotlib / scipy and similar | keras / theano / tensorflow / scikit-learn and similar | pillow | pyqt / pygtk / wxpython | tkinter | ... | requests | asyncio | kivy | six | aiohttp | other - write in | cloud platforms (google app engine, aws, rackspace, heroku and similar) | jupyter notebook | komodo editor | komodo ide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
which version of python do you use the most? | |||||||||||||||||||||
Python 2 | 0.250074 | 0.260069 | 0.282353 | 0.294326 | 0.292169 | 0.229845 | 0.194118 | 0.264916 | 0.264836 | 0.186567 | ... | 0.275551 | 0.125165 | 0.179949 | 0.378594 | 0.100228 | 0.343606 | 0.281122 | 0.198851 | 0.262195 | 0.318919 |
Python 3 | 0.749926 | 0.739931 | 0.717647 | 0.705674 | 0.707831 | 0.770155 | 0.805882 | 0.735084 | 0.735164 | 0.813433 | ... | 0.724449 | 0.874835 | 0.820051 | 0.621406 | 0.899772 | 0.656394 | 0.718878 | 0.801149 | 0.737805 | 0.681081 |
2 rows × 24 columns
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
py2 = go.Bar(x=frameworks_pyver.columns, y=frameworks_pyver.loc['Python 2'], marker={'color': colors[0]}, name='Python 2')
py3 = go.Bar(x=frameworks_pyver.columns, y=frameworks_pyver.loc['Python 3'], marker={'color': colors[1]}, name='Python 3')
data = [py2, py3]
layout = go.Layout(title='Python 2 and Python 3 Usage among Frameworks')
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
py2 = go.Bar(y=frameworks_pyver.columns, x=frameworks_pyver.loc['Python 2'], marker={'color': colors[0]}, orientation='h', name='Python 2')
py3 = go.Bar(y=frameworks_pyver.columns, x=frameworks_pyver.loc['Python 3'], marker={'color': colors[1]}, orientation='h', name='Python 3')
data = [py2, py3]
layout = go.Layout(title='Python 2 and Python 3 Usage among Frameworks', height=1000, yaxis={'automargin': True})
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
import plotly.figure_factory as ff
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
fig = ff.create_distplot(hist_data=[frameworks_pyver.loc['Python 2'], frameworks_pyver.loc['Python 3']], group_labels=['Python 2', 'Python 3'], bin_size=0.05, colors=colors)
# go.FigureWidget(fig)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
py2 = go.Scatter(x=frameworks_pyver.columns, y=frameworks_pyver.loc['Python 2'], mode='markers', marker={'color': colors[0]}, name='Python 2')
py3 = go.Scatter(x=frameworks_pyver.columns, y=frameworks_pyver.loc['Python 3'], mode='markers', marker={'color': colors[1]}, name='Python 3')
data = [py2, py3]
layout = go.Layout(title='Python 2 and Python 3 Usage among Frameworks')
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
py2 = go.Scatter(y=frameworks_pyver.columns[::-1], x=frameworks_pyver.loc['Python 2'][::-1], mode='markers', marker={'color': colors[0]}, orientation='h', name='Python 2')
py3 = go.Scatter(y=frameworks_pyver.columns[::-1], x=frameworks_pyver.loc['Python 3'][::-1], mode='markers', marker={'color': colors[1]}, orientation='h', name='Python 3')
data = [py2, py3]
layout = go.Layout(
title='Python 2 and Python 3 Usage among Frameworks',
margin={'r': 10},
height=1000,
yaxis={'automargin': True}
)
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
frameworks_pyver
django | flask | tornado | bottle | web2py | numpy / pandas / matplotlib / scipy and similar | keras / theano / tensorflow / scikit-learn and similar | pillow | pyqt / pygtk / wxpython | tkinter | ... | requests | asyncio | kivy | six | aiohttp | other - write in | cloud platforms (google app engine, aws, rackspace, heroku and similar) | jupyter notebook | komodo editor | komodo ide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
which version of python do you use the most? | |||||||||||||||||||||
Python 2 | 0.250074 | 0.260069 | 0.282353 | 0.294326 | 0.292169 | 0.229845 | 0.194118 | 0.264916 | 0.264836 | 0.186567 | ... | 0.275551 | 0.125165 | 0.179949 | 0.378594 | 0.100228 | 0.343606 | 0.281122 | 0.198851 | 0.262195 | 0.318919 |
Python 3 | 0.749926 | 0.739931 | 0.717647 | 0.705674 | 0.707831 | 0.770155 | 0.805882 | 0.735084 | 0.735164 | 0.813433 | ... | 0.724449 | 0.874835 | 0.820051 | 0.621406 | 0.899772 | 0.656394 | 0.718878 | 0.801149 | 0.737805 | 0.681081 |
2 rows × 24 columns
sorted_frameworks_pyver = frameworks_pyver.sort_values(by='Python 3', axis=1, ascending=False)
sorted_frameworks_pyver
aiohttp | asyncio | pygame | kivy | tkinter | keras / theano / tensorflow / scikit-learn and similar | jupyter notebook | numpy / pandas / matplotlib / scipy and similar | django | flask | ... | cloud platforms (google app engine, aws, rackspace, heroku and similar) | tornado | pyramid | web2py | bottle | cherrypy | komodo ide | other - write in | six | twisted | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
which version of python do you use the most? | |||||||||||||||||||||
Python 2 | 0.100228 | 0.125165 | 0.164349 | 0.179949 | 0.186567 | 0.194118 | 0.198851 | 0.229845 | 0.250074 | 0.260069 | ... | 0.281122 | 0.282353 | 0.291367 | 0.292169 | 0.294326 | 0.316964 | 0.318919 | 0.343606 | 0.378594 | 0.39934 |
Python 3 | 0.899772 | 0.874835 | 0.835651 | 0.820051 | 0.813433 | 0.805882 | 0.801149 | 0.770155 | 0.749926 | 0.739931 | ... | 0.718878 | 0.717647 | 0.708633 | 0.707831 | 0.705674 | 0.683036 | 0.681081 | 0.656394 | 0.621406 | 0.60066 |
2 rows × 24 columns
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
py2 = go.Scatter(y=sorted_frameworks_pyver.columns[::-1], x=sorted_frameworks_pyver.loc['Python 2'][::-1], mode='markers', marker={'color': colors[0]}, orientation='h', name='Python 2')
py3 = go.Scatter(y=sorted_frameworks_pyver.columns[::-1], x=sorted_frameworks_pyver.loc['Python 3'][::-1], mode='markers', marker={'color': colors[1]}, orientation='h', name='Python 3')
data = [py2, py3]
layout = go.Layout(
title='Python 2 and Python 3 Usage among Frameworks',
margin={'r': 10},
height=1000,
yaxis={'automargin': True}
)
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
cols = find_cols(survey_df, ['use', 'python', 'most'])
uses = survey_df['what do you use python for the most?']
frameworks_uses = frameworks.apply(lambda col: pd.crosstab(index=uses, columns=col).iloc[:, 0])
frameworks_uses.columns = [item.split(':')[0] for item in frameworks_uses.columns]
frameworks_uses.head()
django | flask | tornado | bottle | web2py | numpy / pandas / matplotlib / scipy and similar | keras / theano / tensorflow / scikit-learn and similar | pillow | pyqt / pygtk / wxpython | tkinter | ... | requests | asyncio | kivy | six | aiohttp | other - write in | cloud platforms (google app engine, aws, rackspace, heroku and similar) | jupyter notebook | komodo editor | komodo ide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Computer graphics | 20 | 15 | 7 | 3 | 3 | 47 | 11.0 | 23 | 34 | 16 | ... | 16 | 3.0 | 9 | 3.0 | 1 | 8 | 16 | 10 | 5 | 3 |
Data analysis | 424 | 395 | 73 | 38 | 49 | 926 | 397.0 | 159 | 200 | 154 | ... | 376 | 85.0 | 44 | 94.0 | 46 | 81 | 279 | 594 | 27 | 25 |
Desktop development | 151 | 114 | 17 | 13 | 21 | 156 | 28.0 | 79 | 193 | 139 | ... | 119 | 20.0 | 51 | 26.0 | 8 | 41 | 61 | 71 | 13 | 15 |
DevOps / System administration / Writing automation scripts | 271 | 289 | 41 | 33 | 28 | 230 | 58.0 | 83 | 106 | 80 | ... | 343 | 97.0 | 20 | 64.0 | 53 | 68 | 227 | 113 | 24 | 23 |
Educational purposes | 160 | 91 | 13 | 16 | 21 | 186 | 53.0 | 55 | 68 | 115 | ... | 68 | 17.0 | 35 | 6.0 | 9 | 22 | 80 | 96 | 16 | 20 |
5 rows × 24 columns
da_ml_frameworks_uses = frameworks_uses.loc[['Data analysis', 'Machine learning']]
# fill 的可选值为:['none', 'tozeroy', 'tozerox', 'tonexty', 'tonextx', 'toself', 'tonext']
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
da = go.Scatter(x=da_ml_frameworks_uses.columns, y=da_ml_frameworks_uses.loc['Data analysis'], fill='tozeroy', marker={'color': colors[0]}, name='Python 2')
ml = go.Scatter(x=da_ml_frameworks_uses.columns, y=da_ml_frameworks_uses.loc['Machine learning'], fill='tozeroy', marker={'color': colors[1]}, name='Python 3')
data = [da, ml]
layout = go.Layout(title='Frameworks Usage among Data Analysis and Machine Learning Developers')
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
sorted_da_ml_frameworks_uses = da_ml_frameworks_uses.sort_values(by='Data analysis', axis=1, ascending=True)
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
da = go.Scatter(x=sorted_da_ml_frameworks_uses.columns, y=sorted_da_ml_frameworks_uses.loc['Data analysis'], fill='tozeroy', marker={'color': colors[0]}, name='Python 2')
ml = go.Scatter(x=sorted_da_ml_frameworks_uses.columns, y=sorted_da_ml_frameworks_uses.loc['Machine learning'], fill='tozeroy', marker={'color': colors[1]}, name='Python 3')
data = [da, ml]
layout = go.Layout(title='Frameworks Usage among Data Analysis and Machine Learning Developers')
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
da = go.Bar(y=sorted_da_ml_frameworks_uses.columns, x=sorted_da_ml_frameworks_uses.loc['Data analysis'], marker={'color': colors[0]}, orientation='h', name='Data analysis')
ml = go.Bar(y=sorted_da_ml_frameworks_uses.columns, x=sorted_da_ml_frameworks_uses.loc['Machine learning'], marker={'color': colors[1]}, orientation='h', name='Machine learning')
data = [da, ml]
layout = go.Layout(title='Frameworks Usage among Data Analysis and Machine Learning Developers', height=1000, yaxis={'automargin': True})
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
cols = find_cols(survey_df, ['how', 'many', 'people', 'project'])
team_scale = survey_df[cols[0]]
team_pyver = pd.crosstab(team_scale, python_version)
team_pyver = team_pyver.reindex(['2-7 people', '8-12 people', '13-20 people', '21-40 people', 'More than 40 people'])
team_pyver_sorted = team_pyver.div(team_pyver.sum(axis=1), axis=0).sort_values(by='Python 3', ascending=False)
team_pyver_sorted
which version of python do you use the most? | Python 2 | Python 3 |
---|---|---|
how many people are in your project team? | ||
2-7 people | 0.331180 | 0.668820 |
8-12 people | 0.358824 | 0.641176 |
21-40 people | 0.373134 | 0.626866 |
13-20 people | 0.382353 | 0.617647 |
More than 40 people | 0.411765 | 0.588235 |
trace = go.Scatter(x=team_pyver_sorted.index, y=team_pyver_sorted['Python 3'], marker={'color': colors[0]}, mode='lines+markers', line={'width': 2})
data = [trace]
layout = go.Layout(title='Team scale VS Use ratio of Python 3')
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
cols = find_cols(survey_df, ['age', 'range'])
age = survey_df[cols[0]]
age_pyver = pd.crosstab(index=age, columns=python_version)
age_pyver = age_pyver.div(age_pyver.sum(axis=1), axis=0)
age_pyver
which version of python do you use the most? | Python 2 | Python 3 |
---|---|---|
could you tell us your age range? | ||
17 or younger | 0.148036 | 0.851964 |
18-20 | 0.160830 | 0.839170 |
21-29 | 0.259882 | 0.740118 |
30-39 | 0.287273 | 0.712727 |
40-49 | 0.289535 | 0.710465 |
50-59 | 0.347079 | 0.652921 |
60 or older | 0.225225 | 0.774775 |
trace = go.Scatter(x=age_pyver.index, y=age_pyver['Python 3'], marker={'color': colors[0]}, mode='lines+markers', line={'width': 2}, name='Python 3')
data = [trace]
layout = go.Layout(title="The developers' age VS The use ratio of Python 3")
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
country_age = pd.crosstab([survey_df['what country do you live in?'], survey_df['which version of python do you use the most?']], survey_df['could you tell us your age range?'])
country_age_total = country_age.sum(level=0)
country_age_total.head()
could you tell us your age range? | 17 or younger | 18-20 | 21-29 | 30-39 | 40-49 | 50-59 | 60 or older |
---|---|---|---|---|---|---|---|
what country do you live in? | |||||||
Afghanistan | 3 | 1 | 1 | 1 | 0 | 0 | 0 |
Albania | 0 | 4 | 8 | 4 | 1 | 1 | 0 |
Algeria | 1 | 2 | 15 | 4 | 1 | 0 | 0 |
Andorra | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
Antigua and Barbuda | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
sorted_country_age_total = country_age_total.sort_values(by='60 or older', ascending=False)
colors = [rgb2hex(i) for i in sns.color_palette('rainbow', 10)]
trace = go.Bar(x=sorted_country_age_total.index[:10], y=sorted_country_age_total.iloc[:10, -1], marker={'color': colors})
data = [trace]
layout = {'title': 'Top 10 countries of # of the developers whose age are 60+'}
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
three_countries = country_age_total.loc[['United States', 'India', 'China']]
three_countries = three_countries.div(three_countries.sum(axis=1), axis=0)
three_countries
could you tell us your age range? | 17 or younger | 18-20 | 21-29 | 30-39 | 40-49 | 50-59 | 60 or older |
---|---|---|---|---|---|---|---|
what country do you live in? | |||||||
United States | 0.083621 | 0.070491 | 0.293711 | 0.287491 | 0.156876 | 0.069109 | 0.038701 |
India | 0.043810 | 0.222857 | 0.557143 | 0.137143 | 0.030476 | 0.006667 | 0.001905 |
China | 0.029260 | 0.077453 | 0.645439 | 0.213425 | 0.034423 | 0.000000 | 0.000000 |
colors = [rgb2hex(i) for i in sns.color_palette('rainbow', 7)]
data = [go.Bar(x=three_countries.index, y=three_countries[c], marker={'color': colors[i]}, name=c)
for i, c in enumerate(three_countries.columns)]
layout = go.Layout(title="Age distribution of the developers who're from USA, India and China")
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
cols = find_cols(survey_df, ['country', 'live'])
countries = survey_df[cols[0]]
count_countries = countries.value_counts(ascending=False)
countries.head()
0 Italy 1 United Kingdom 2 France 3 Spain 4 Israel Name: what country do you live in?, dtype: object
# 等同于 sns.countplot
colors = [rgb2hex(i) for i in sns.color_palette('rainbow', 10)]
trace = go.Bar(x=count_countries.index[:10], y=count_countries[:10], marker={'color': colors})
data = [trace]
layout = go.Layout(title='Top 10 countries of # of the developers')
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
countries_pyver = pd.crosstab(index=countries, columns=python_version)
top10_countries = countries_pyver.loc[countries.value_counts()[:10].index]
top10_countries = top10_countries.div(top10_countries.sum(axis=1), axis=0)
top10_countries.head()
which version of python do you use the most? | Python 2 | Python 3 |
---|---|---|
United States | 0.272979 | 0.727021 |
India | 0.303810 | 0.696190 |
China | 0.254733 | 0.745267 |
United Kingdom | 0.220126 | 0.779874 |
Germany | 0.219638 | 0.780362 |
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
py2 = go.Bar(x=top10_countries.index, y=top10_countries['Python 2'], marker={'color': colors[0]}, name='Python 2')
py3 = go.Bar(x=top10_countries.index, y=top10_countries['Python 3'], marker={'color': colors[1]}, name='Python 3')
data = [py2, py3]
layout = go.Layout(title='Python 2 and Python 3 Usage among Different Countries')
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
countries_pyver_ratio = countries_pyver.div(countries_pyver.sum(axis=1), axis=0)
# 等同于 sns.distplot
# 注意 hist_data, group_labels, colors 都必须是列表形式,一个元素表示一个数据集
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
fig = ff.create_distplot(hist_data=[countries_pyver_ratio['Python 3']], group_labels=['Python 3'], bin_size=0.05, colors=[colors[0]])
fig['layout'].update(title='Use ratio of Python 3 in the world')
# go.FigureWidget(fig)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
# 等同于 sns.tripplot
trace = go.Scatter(x=countries_pyver_ratio['Python 3'], y=['Python 3'] * len(countries_pyver_ratio), mode='markers', marker={'color': colors[0]})
data = [trace]
# go.FigureWidget(data)
fig = go.Figure(data=data)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
fig = {
"data": [{
"type": 'violin',
"y": countries_pyver_ratio['Python 3'],
"box": {
"visible": True
},
"line": {
"color": 'black'
},
"meanline": {
"visible": True
},
"fillcolor": colors[0],
"opacity": 0.6,
"x0": 'Total Bill'
}],
"layout" : {
"title": "Use ratio of Python 3 in the world",
"yaxis": {
"zeroline": False,
}
}
}
# go.FigureWidget(fig)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
colors = [rgb2hex(i) for i in sns.color_palette('rainbow')[:2]]
trace = go.Violin(x=countries_pyver_ratio['Python 3'], meanline={'visible': True}, box={'visible': True}, fillcolor=colors[0], opacity=0.6, line={'color': 'black'})
layout = go.Layout(title="Use ratio of Python 3 in the world", xaxis={'zeroline': False})
data = [trace]
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
# ['equirectangular', 'mercator', 'orthographic', 'natural earth', 'kavrayskiy7', 'miller', 'robinson', 'eckert4',
# 'azimuthal equal area', 'azimuthal equidistant', 'conic
# equal area', 'conic conformal', 'conic equidistant',
# 'gnomonic', 'stereographic', 'mollweide', 'hammer',
# 'transverse mercator', 'albers usa', 'winkel tripel',
# 'aitoff', 'sinusoidal']
data = [ dict(
type = 'choropleth',
locations = countries_pyver_ratio.index,
locationmode = 'country names',
z = countries_pyver_ratio['Python 3'] * 100,
text = countries_pyver_ratio.index,
colorscale = 'Bluered',
autocolorscale = False,
reversescale = True,
marker = dict(
line = dict (
color = 'rgb(180,180,180)',
width = 0.5
) ),
colorbar = dict(
# autotick = False,
ticksuffix = '%',
title = 'Percent'),
) ]
layout = dict(
title = 'Python 3 in the world',
geo = dict(
showframe = False,
showcoastlines = False,
projection = dict(
type = 'equirectangular'
),
)
)
# fig = go.Figure()
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
offline.iplot(fig, show_link=False)
cols = find_cols(survey_df, ['what', 'editor(s)/ide(s)'])
editors = survey_df[cols]
editors.columns = [item.split(':')[0] for item in editors.columns]
editors.head()
pycharm professional edition | pycharm community edition | sublime text | vim | atom | vs code | eclipse + pydev | aptana | jupyter notebook | intellij idea | ... | netbeans | spyder | rodeo | gedit | ninja-ide | komodo editor | komodo ide | wing ide | textmate | other - write in | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NaN | NaN | NaN | Vim | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Other - Write In: |
1 | NaN | NaN | NaN | NaN | Atom | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Wing IDE | NaN | NaN |
2 | PyCharm Professional Edition | NaN | Sublime Text | Vim | Atom | NaN | Eclipse + Pydev | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | Komodo IDE | NaN | NaN | NaN |
3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4 | PyCharm Professional Edition | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 24 columns
count_editors = editors.count().sort_values(ascending=False)
count_editors.head()
pycharm community edition 3061 sublime text 2762 vim 2468 atom 2070 pycharm professional edition 2069 dtype: int64
colors = [rgb2hex(i) for i in sns.color_palette('rainbow', len(count_editors))]
trace = go.Bar(y=count_editors.index[::-1], x=count_editors.values[::-1], marker={'color': colors[::-1]}, orientation='h')
data = [trace]
layout = go.Layout(
title="What editor(s)/IDE(s) have you considered for use in your Python development?",
margin={'r': 10},
height=1000,
yaxis={'automargin': True}
)
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig, show_link=False)
colors = [rgb2hex(i) for i in sns.color_palette('rainbow', len(count_editors))]
trace = go.Bar(x=count_editors.index, y=count_editors.values, marker={'color': colors}, orientation='v')
data = [trace]
layout = go.Layout(
title="What editor(s)/IDE(s) have you considered for use in your Python development?",
)
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig, show_link=False)
col = find_cols(survey_df, ['what', 'main', 'editor'])
main_editor = survey_df[col[0]]
count_main_editor = main_editor.value_counts(ascending=False)
count_main_editor.head()
PyCharm Professional Edition 1339 PyCharm Community Edition 1240 Sublime Text 844 Vim 775 IDLE 708 Name: what is the main editor you use for your current python development?, dtype: int64
colors = [rgb2hex(i) for i in sns.color_palette('rainbow', len(count_main_editor))]
trace = go.Bar(y=count_main_editor.index[::-1], x=count_main_editor.values[::-1], marker={'color': colors[::-1]}, orientation='h')
data = [trace]
layout = go.Layout(
title="What is the main editor you use for your current python development?",
margin={'r': 10},
height=1000,
yaxis={'automargin': True}
)
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig, show_link=False)
colors = [rgb2hex(i) for i in sns.color_palette('rainbow', len(count_editors))]
trace = go.Bar(x=count_main_editor.index, y=count_main_editor.values, marker={'color': colors}, orientation='v')
data = [trace]
layout = go.Layout(
title="What is the main editor you use for your current python development?",
)
# go.FigureWidget(data=data, layout=layout)
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig, show_link=False)