In [1]:

# import modules
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [34]:

url = 'http://www.paywizard.org/main/salary/minimum-wage'

# Scrape the HTML at the url
r = requests.get(url)

# Turn the HTML into a Beautiful Soup object
soup = BeautifulSoup(r.text, 'lxml')

In [35]:

# Create four variables to score the scraped data in
state_name = []
hourly_min_wage = []
notes = []

# Create an object of the second object that is class=grid listing table
table = soup.findAll(class_='grid listing table')[1]

In [36]:

# Find all the <tr> tag pairs, skip the first one, then for each.
for row in table.find_all('tr')[1:]:
    # Create a variable of all the <td> tag pairs in each <tr> tag pair,
    col = row.find_all('td')

    # Create a variable of the string inside 1st <td> tag pair,
    column_1 = col[0].text.strip()
    # and append it to first_name variable
    state_name.append(column_1)

    # Create a variable of the string inside 2nd <td> tag pair,
    column_2 = col[1].text.strip()
    # and append it to last_name variable
    hourly_min_wage.append(column_2)

    # Create a variable of the string inside 3rd <td> tag pair,
    column_3 = col[2].text.strip()
    # and append it to age variable
    notes.append(column_3)


# Create a variable of the value of the columns
columns = {'state': state_name, 'hrly_min_wage': hourly_min_wage, 'notes': notes }

# Create a dataframe from the columns variable
df = pd.DataFrame(columns)

In [37]:

df[['state','hrly_min_wage']]

Out[37]:

	state	hrly_min_wage
0	ALABAMA	NONE
1	ALASKA	$9.75
2	ARIZONA	$8.05
3	ARKANSAS	$8.00
4	CALIFORNIA	$10.00
5	COLORADO	$8.31
6	CONNECTICUT	$9.60
7	DELAWARE	$8.25
8	DISTRICT OF COLUMBIA	$11.50
9	FLORIDA	$8.05
10	GEORGIA	$5.15
11	HAWAII	$8.50
12	IDAHO	$7.25
13	ILLINOIS	$8.25
14	INDIANA	$7.25
15	IOWA	$7.25
16	KANSAS	$7.25
17	KENTUCKY	$7.25
18	LOUISIANA	NONE
19	MAINE	$7.50
20	MARYLAND	$8.75
21	MASSACHUSETTS	$10.00
22	MICHIGAN	$8.50
23	MINNESOTA	$9.50/7.75
24	MISSISSIPPI	NONE
25	MISSOURI	$7.65
26	MONTANA	$8.05
27	NEBRASKA	$9.00
28	NEVADA	$8.25/7.25
29	NEW HAMPSHIRE	$7.25
30	NEW JERSEY	$8.38
31	NEW MEXICO	$7.50
32	NEW YORK	$9.00
33	Building service janitors in residential build...	$6.00
34	All employees except janitors in residential b...	$9.00
35	N. CAROLINA	$7.25
36	N. DAKOTA	$7.25
37	OHIO	$8.10
38	OKLAHOMA	$7.25
39	OREGON	$9.50/9.75
40	PENNSYLVANIA	$7.25
41	RHODE ISLAND	$9.60
42	S. CAROLINA	NONE
43	S. DAKOTA	$8.55
44	TENNESSEE	NONE
45	TEXAS	$7.25
46	UTAH	$7.25
47	VERMONT	$9.60
48	VIRGINIA	$7.25
49	WASHINGTON	$9.47
50	WEST VIRGINIA	$8.75
51	WISCONSIN	$7.25
52	WYOMING	$5.15

In [39]:

df.drop('notes',axis=1,inplace=True) # drop the notes column

In [42]:

# write the dataframe to a table, drop the index column
df.to_csv('min_wage_per_state.csv', index=False) 

In [ ]: