#!/usr/bin/env python
# coding: utf-8

# ## Baltimore City Salary Analysis

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
import pandas as pd
import requests
import StringIO as StringIO
import numpy as np


# In[2]:


url = "https://data.baltimorecity.gov/api/views/2j28-xzd7/rows.csv?accessType=DOWNLOAD"
r = requests.get(url)
data = StringIO.StringIO(r.content)


# read the urllib3 docs and decide if the warning matters to you.

# In[3]:


dataframe = pd.read_csv(data,header=0)


# We read the CSV into a dataframe. The header is called out as row0. data is of type FILE.

# In[4]:


dataframe['AnnualSalary'] = dataframe['AnnualSalary'].str.lstrip('$')
dataframe['AnnualSalary'] = dataframe['AnnualSalary'].astype(float)


# In[5]:


grouped = dataframe.groupby(['JobTitle'])['AnnualSalary']
aggregated = grouped.agg([np.sum, np.mean, np.std, np.size, np.min, np.max])

# sort the data
pd.set_option('display.max_rows', 10000000)
output = aggregated.sort(['amax'],ascending=0)
output.head(15)


# Now lets take a look at the data when grouped by JobTitle.

# In[6]:


output = aggregated.sort(['sum','size'],ascending=[0,1])
output.head(15)


# now lets graph it

# In[7]:


aggregated = grouped.agg([np.sum])
output = aggregated.sort(['sum'],ascending=0)
output = output.head(15)
output.rename(columns={'sum': 'Salary'}, inplace=True)


# In[8]:


from matplotlib.ticker import FormatStrFormatter

myplot = output.plot(kind='bar',title='Baltimore Total Annual Salary by Job Title - 2014')
myplot.set_ylabel('$')
myplot.yaxis.set_major_formatter(FormatStrFormatter('%d'))


# In[ ]: