print 'hello world' plot(np.random.randn(1000).cumsum()) cd stackexchange/ import pandas as pd files = ['Python200901-07.csv', 'Python200907-201007.csv', 'Python201007-201107.csv', 'Python201107-12.csv', 'Python201201-07.csv', 'Python201207-12.csv', 'Python201301.csv'] tables = [] for path in files: tables.append(pd.read_csv(path)) posts = pd.concat(tables, ignore_index=True) posts.ix[0] posts.Tags import re regex = re.compile('<([^>]*)>') ids = [] tags = [] for id, val in zip(posts.Id, posts.Tags): for tag in regex.findall(val): ids.append(id) tags.append(tag) tag_table = pd.DataFrame({'subtag': tags, 'Id': ids}) tag_table.head() merged = pd.merge(tag_table, posts) merged.ix[0] top = merged.groupby('subtag').size().order(ascending=False)[:500] merged.CreationDate = pd.to_datetime(merged.CreationDate) filtered = merged[merged.subtag.isin(top.index)] grouped = filtered.groupby('subtag') def agg_monthly(group): return group.set_index('CreationDate').Score.resample('M', how='count') results = grouped.apply(agg_monthly).unstack('subtag') results = results[:'2013-05-31'] [x for x in top.index if 'meta' in x] normed = results.div(results['python'], axis=0) normed['metaprogramming'].plot() results['python'].plot() normed = results.div(results['python'], axis=0) to_analyze = normed[:'2013-05-31'] to_analyze['pandas'].plot() to_analyze['django'].plot() to_analyze['flask'].plot() to_analyze['google-app-engine'].plot() to_analyze['python-3.x'].plot() to_analyze['matplotlib'].plot() to_analyze['regex'].plot() top to_analyze['ironpython'].plot() to_analyze['twisted'].plot() to_analyze['tornado'].plot() filtered = merged[merged.subtag.isin(top.index)] grouped = filtered.groupby('subtag') def agg_monthly(group): return group.set_index('CreationDate').Score.resample('A', how='count') results = grouped.apply(agg_monthly).unstack('subtag') n results['django'].plot(kind='bar') normed = results.div(results['python'], axis=0) normed['django'].plot(kind='bar') normed['django'].pct_change() whats_happening2013 = normed.pct_change().ix[-1] [x for x in whats_happening2013.index if 'sublime' in x] whats_happening2013['sublimetext2'] downtrends = whats_happening2013.order()[:50] uptrends = whats_happening2013.order()[-50:] uptrends[::-1] downtrends