import numpy as np import pandas as pd from pandas import DataFrame from pandas import Series #Set PANDAS to show all columns in DataFrame pd.set_option('display.max_columns', None) import matplotlib.pyplot as plt print matplotlib.__version__ #NECESSARY FOR XTICKS OPTION, ETC. from pylab import* %matplotlib inline import seaborn as sns print sns.__version__ df = pd.read_pickle('CSR tweets - 2013 by 41 accounts.pkl') print len(df) df.head(2) df.columns len(df.columns) pd.unique(df.from_user_screen_name.ravel()) len(pd.unique(df.from_user_screen_name.ravel())) len(pd.unique(df.Company.ravel())) def f(x): return Series(dict(Number_of_tweets = x['content'].count(), Company=x['Company'].min(), Description=x['from_user_description'].min(), )) account_count = df.groupby('from_user_screen_name').apply(f) print len(account_count) account_count counts = account_count.groupby('Company').size() #create a series of number of rows per company df2 = pd.DataFrame(counts, columns = ['size']) #convert series to a dataframe df2 = df2[df2['size']>1] #keep only those cases with more than one row in account-level dataframe df2 #show dataframe account_count.to_csv('Number of Tweets per Account.csv') DataFrame.sort? account_count = account_count.sort(['Number_of_tweets'], ascending=False) account_count plt.rcParams['figure.figsize'] = (20, 10) account_count['Number_of_tweets'].plot(kind='bar') account_count['Number_of_tweets'].plot(kind='barh') account_plot = account_count['Number_of_tweets'].plot(kind='bar', alpha=0.75) xticks(fontsize = 10,rotation=60, ha ="right") account_plot.set_xlabel('Twitter Account Name', weight='bold', labelpad=30) account_plot.set_ylabel('# Tweets', weight='bold', labelpad=25) #ADD PADDING TO RIGHT OF Y-AXIS LABEL savefig('account counts - bar graph.png', bbox_inches='tight', dpi=300, format='png') account_plot = account_count['Number_of_tweets'].plot(kind='barh', alpha=0.75) xticks(fontsize = 12) #,rotation=60, ha ="right") account_plot.set_ylabel('Twitter Account Name', weight='bold', labelpad=20) account_plot.set_xlabel('# Tweets', weight='bold', labelpad=25) #ADD PADDING TO RIGHT OF Y-AXIS LABEL savefig('account counts - horizontal bar graph.png', bbox_inches='tight', dpi=300, format='png')