#!/usr/bin/env python # coding: utf-8 # ## Baltimore City Salary Analysis # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import pandas as pd import requests import StringIO as StringIO import numpy as np # In[2]: url = "https://data.baltimorecity.gov/api/views/2j28-xzd7/rows.csv?accessType=DOWNLOAD" r = requests.get(url) data = StringIO.StringIO(r.content) # read the urllib3 docs and decide if the warning matters to you. # In[3]: dataframe = pd.read_csv(data,header=0) # We read the CSV into a dataframe. The header is called out as row0. data is of type FILE. # In[4]: dataframe['AnnualSalary'] = dataframe['AnnualSalary'].str.lstrip('$') dataframe['AnnualSalary'] = dataframe['AnnualSalary'].astype(float) # In[5]: grouped = dataframe.groupby(['JobTitle'])['AnnualSalary'] aggregated = grouped.agg([np.sum, np.mean, np.std, np.size, np.min, np.max]) # sort the data pd.set_option('display.max_rows', 10000000) output = aggregated.sort(['amax'],ascending=0) output.head(15) # Now lets take a look at the data when grouped by JobTitle. # In[6]: output = aggregated.sort(['sum','size'],ascending=[0,1]) output.head(15) # now lets graph it # In[7]: aggregated = grouped.agg([np.sum]) output = aggregated.sort(['sum'],ascending=0) output = output.head(15) output.rename(columns={'sum': 'Salary'}, inplace=True) # In[8]: from matplotlib.ticker import FormatStrFormatter myplot = output.plot(kind='bar',title='Baltimore Total Annual Salary by Job Title - 2014') myplot.set_ylabel('$') myplot.yaxis.set_major_formatter(FormatStrFormatter('%d')) # In[ ]: