Python in 2013, according to StackOverflow

In [74]:
print 'hello world'
plot(np.random.randn(1000).cumsum())
hello world
Out[74]:
[<matplotlib.lines.Line2D at 0x115ba7050>]
In [2]:
cd stackexchange/
/Users/wesm/Dropbox/talks/20130614pyconsg/stackexchange
In [75]:
import pandas as pd
files = ['Python200901-07.csv',
         'Python200907-201007.csv',
         'Python201007-201107.csv',
         'Python201107-12.csv',
         'Python201201-07.csv',
         'Python201207-12.csv',
         'Python201301.csv']

tables = []
for path in files:
    tables.append(pd.read_csv(path))

posts = pd.concat(tables, ignore_index=True)
In [78]:
posts.ix[0]
Out[78]:
AnswerCount                                                   7
CommentCount                                                  1
CreationDate                                2009-01-01 00:55:16
Id                                                       404346
OwnerDisplayName                                   marc lincoln
OwnerUserId                                               47204
ParentId                                                    NaN
PostTypeId                                                    1
Score                                                         2
Tags                                             <python><math>
Title               Python program to calculate harmonic series
Name: 0, dtype: object
In [79]:
posts.Tags
Out[79]:
0                                <python><math>
1          <python><binding><scope><identifier>
2          <python><windows><subprocess><popen>
3          <python><com><outlook><outlook-2007>
4           <python><windows><winapi><controls>
5                <python><opengl><3d><wxpython>
6     <python><xml><google-app-engine><parsing>
7                          <python><networking>
8                           <python><urlencode>
9            <python><django><django-templates>
10                       <python><syntax-rules>
11                  <python><google-app-engine>
12              <python><open-source><projects>
13                   <python><global-variables>
14                             <python><ctypes>
...
191976     <python><delete><pyside><qtreewidget><top-level>
191977        <python><response><serial><ussd><non-english>
191978                                 <python><bugs><args>
191979     <python><user-interface><coding-style><wxpython>
191980                                    <python><sockets>
191981         <python><django><setuptools><django-testing>
191982                         <python><mysql><application>
191983    <c++><python><c><shared-libraries><python-c-ex...
191984                                  <python><soap><zsi>
191985                      <python><r><rpy2><bioconductor>
191986                                  <python><greenlets>
191987                         <python><pygame><python-3.3>
191988                                    <python><twisted>
191989    <python><multidimensional-array><numpy><indexing>
191990                           <python><function><primes>
Name: Tags, Length: 191991, dtype: object
In [80]:
import re

regex = re.compile('<([^>]*)>')

ids = []
tags = []
for id, val in zip(posts.Id, posts.Tags):
    for tag in regex.findall(val):
        ids.append(id)
        tags.append(tag)
        
tag_table = pd.DataFrame({'subtag': tags, 'Id': ids})
In [81]:
tag_table.head()
Out[81]:
Id subtag
0 404346 python
1 404346 math
2 404534 python
3 404534 binding
4 404534 scope
In [82]:
merged = pd.merge(tag_table, posts)
In [84]:
merged.ix[0]
Out[84]:
Id                                                       404346
subtag                                                   python
AnswerCount                                                   7
CommentCount                                                  1
CreationDate                                2009-01-01 00:55:16
OwnerDisplayName                                   marc lincoln
OwnerUserId                                               47204
ParentId                                                    NaN
PostTypeId                                                    1
Score                                                         2
Tags                                             <python><math>
Title               Python program to calculate harmonic series
Name: 0, dtype: object
In [88]:
top = merged.groupby('subtag').size().order(ascending=False)[:500]
In [89]:
merged.CreationDate = pd.to_datetime(merged.CreationDate)
In [132]:
filtered = merged[merged.subtag.isin(top.index)]
grouped = filtered.groupby('subtag')

def agg_monthly(group):
    return group.set_index('CreationDate').Score.resample('M', how='count')
results = grouped.apply(agg_monthly).unstack('subtag')
In [133]:
results = results[:'2013-05-31']
In [138]:
[x for x in top.index if 'meta' in x]
Out[138]:
['metaclass', 'metaprogramming']
In [139]:
normed = results.div(results['python'], axis=0)
normed['metaprogramming'].plot()
Out[139]:
<matplotlib.axes.AxesSubplot at 0x1111d1050>
In [97]:
results['python'].plot()
Out[97]:
<matplotlib.axes.AxesSubplot at 0x128a40350>
In [98]:
normed = results.div(results['python'], axis=0)
to_analyze = normed[:'2013-05-31']
to_analyze['pandas'].plot()
Out[98]:
<matplotlib.axes.AxesSubplot at 0x127ec7e90>
In [99]:
to_analyze['django'].plot()
Out[99]:
<matplotlib.axes.AxesSubplot at 0x10965f7d0>
In [100]:
to_analyze['flask'].plot()
Out[100]:
<matplotlib.axes.AxesSubplot at 0x109a9b890>
In [101]:
to_analyze['google-app-engine'].plot()
Out[101]:
<matplotlib.axes.AxesSubplot at 0x111811950>
In [102]:
to_analyze['python-3.x'].plot()
Out[102]:
<matplotlib.axes.AxesSubplot at 0x111680f10>
In [103]:
to_analyze['matplotlib'].plot()
Out[103]:
<matplotlib.axes.AxesSubplot at 0x1118496d0>
In [104]:
to_analyze['regex'].plot()
Out[104]:
<matplotlib.axes.AxesSubplot at 0x1132bba50>
In [66]:
top
Out[66]:
subtag
python               191991
django                20986
google-app-engine      6147
list                   5577
numpy                  5397
python-2.7             4953
regex                  4598
python-3.x             4303
string                 3600
matplotlib             3336
dictionary             3203
windows                2594
tkinter                2547
linux                  2483
mysql                  2433
...
variable-assignment    133
sublimetext2           133
httplib                133
django-orm             133
tweepy                 132
tags                   132
glade                  132
merge                  131
dbus                   131
blobstore              131
website                130
mvc                    130
autocomplete           130
find                   129
decimal                129
Length: 500, dtype: int64
In [105]:
to_analyze['ironpython'].plot()
Out[105]:
<matplotlib.axes.AxesSubplot at 0x11402e2d0>
In [106]:
to_analyze['twisted'].plot()
Out[106]:
<matplotlib.axes.AxesSubplot at 0x114062390>
In [107]:
to_analyze['tornado'].plot()
Out[107]:
<matplotlib.axes.AxesSubplot at 0x1164aab50>
In [109]:
filtered = merged[merged.subtag.isin(top.index)]
grouped = filtered.groupby('subtag')

def agg_monthly(group):
    return group.set_index('CreationDate').Score.resample('A', how='count')
results = grouped.apply(agg_monthly).unstack('subtag')
In [112]:
n
results['django'].plot(kind='bar')
Out[112]:
<matplotlib.axes.AxesSubplot at 0x116474310>
In [114]:
normed = results.div(results['python'], axis=0)

normed['django'].plot(kind='bar')
Out[114]:
<matplotlib.axes.AxesSubplot at 0x1167e3190>
In [115]:
normed['django'].pct_change()
Out[115]:
CreationDate
2009-12-31           NaN
2010-12-31     -0.014833
2011-12-31     -0.052872
2012-12-31     -0.116431
2013-12-31     -0.149268
Name: django, dtype: float64
In [118]:
whats_happening2013 = normed.pct_change().ix[-1]
In [130]:
[x for x in whats_happening2013.index if 'sublime' in x]
Out[130]:
['sublimetext2']
In [131]:
whats_happening2013['sublimetext2']
Out[131]:
0.36393813179275791
In [120]:
downtrends = whats_happening2013.order()[:50]
uptrends = whats_happening2013.order()[-50:]
In [122]:
uptrends[::-1]
Out[122]:
subtag
pylab                         NaN
python-3.3               7.594584
user-interface           4.107720
raspberry-pi             3.542113
openerp                  2.626263
enthought                2.061674
sympy                    1.734087
python-2.7               1.197395
pandas                   1.025044
scikit-learn             0.913183
python-import            0.782495
xlwt                     0.738728
format                   0.717129
flask-sqlalchemy         0.663131
nested                   0.564855
matrix                   0.547597
parameters               0.543436
python-multithreading    0.542176
node.js                  0.535484
cx-freeze                0.534617
web-crawler              0.531504
histogram                0.518511
count                    0.497674
return                   0.488314
website                  0.485113
3d                       0.465417
xlrd                     0.461540
mapreduce                0.457409
django-south             0.452425
graph                    0.450032
python-3.x               0.433470
merge                    0.424710
tkinter                  0.412605
loops                    0.412116
xml-parsing              0.380465
ipython                  0.379742
python-requests          0.377138
serial-port              0.374284
sublimetext2             0.363938
printing                 0.357086
decimal                  0.349404
pyside                   0.348702
pyinstaller              0.346390
pyserial                 0.343298
networkx                 0.333529
compare                  0.322945
optimization             0.316720
flask                    0.303641
split                    0.297746
input                    0.292707
Name: 2013-12-31 00:00:00, dtype: float64
In [123]:
downtrends
Out[123]:
subtag
plone                    -0.712494
osx-lion                 -0.679115
interpreter              -0.647215
minidom                  -0.624765
osx-snow-leopard         -0.603116
xmpp                     -0.597193
mod-python               -0.588417
pylons                   -0.572587
qt4                      -0.567036
pygtk                    -0.563162
winapi                   -0.562059
.net                     -0.559018
permissions              -0.555490
irc                      -0.554376
wx                       -0.551000
objective-c              -0.546419
boost-python             -0.516837
reportlab                -0.507864
gui                      -0.494472
metaclass                -0.478071
gmail                    -0.461853
metaprogramming          -0.437748
cookies                  -0.436203
model                    -0.435835
programming-languages    -0.433023
deployment               -0.424806
timezone                 -0.422715
blobstore                -0.415119
introspection            -0.404675
emacs                    -0.398895
design                   -0.394230
filesystems              -0.381131
functional-programming   -0.378079
decorator                -0.374820
ruby-on-rails            -0.372370
packaging                -0.364986
forms                    -0.364986
com                      -0.358231
mako                     -0.358231
dll                      -0.356948
webserver                -0.356405
django-admin             -0.352027
jquery                   -0.346309
caching                  -0.344279
plugins                  -0.340563
pyparsing                -0.338527
zip                      -0.338527
authentication           -0.337576
boolean                  -0.332775
web.py                   -0.332775
Name: 2013-12-31 00:00:00, dtype: float64