In [1]:
# import modules
import requests
from bs4 import BeautifulSoup
import pandas as pd
In [34]:
url = 'http://www.paywizard.org/main/salary/minimum-wage'

# Scrape the HTML at the url
r = requests.get(url)

# Turn the HTML into a Beautiful Soup object
soup = BeautifulSoup(r.text, 'lxml')
In [35]:
# Create four variables to score the scraped data in
state_name = []
hourly_min_wage = []
notes = []

# Create an object of the second object that is class=grid listing table
table = soup.findAll(class_='grid listing table')[1]
In [36]:
# Find all the <tr> tag pairs, skip the first one, then for each.
for row in table.find_all('tr')[1:]:
    # Create a variable of all the <td> tag pairs in each <tr> tag pair,
    col = row.find_all('td')

    # Create a variable of the string inside 1st <td> tag pair,
    column_1 = col[0].text.strip()
    # and append it to first_name variable
    state_name.append(column_1)

    # Create a variable of the string inside 2nd <td> tag pair,
    column_2 = col[1].text.strip()
    # and append it to last_name variable
    hourly_min_wage.append(column_2)

    # Create a variable of the string inside 3rd <td> tag pair,
    column_3 = col[2].text.strip()
    # and append it to age variable
    notes.append(column_3)


# Create a variable of the value of the columns
columns = {'state': state_name, 'hrly_min_wage': hourly_min_wage, 'notes': notes }

# Create a dataframe from the columns variable
df = pd.DataFrame(columns)
In [37]:
df[['state','hrly_min_wage']]
Out[37]:
state hrly_min_wage
0 ALABAMA NONE
1 ALASKA $9.75
2 ARIZONA $8.05
3 ARKANSAS $8.00
4 CALIFORNIA $10.00
5 COLORADO $8.31
6 CONNECTICUT $9.60
7 DELAWARE $8.25
8 DISTRICT OF COLUMBIA $11.50
9 FLORIDA $8.05
10 GEORGIA $5.15
11 HAWAII $8.50
12 IDAHO $7.25
13 ILLINOIS $8.25
14 INDIANA $7.25
15 IOWA $7.25
16 KANSAS $7.25
17 KENTUCKY $7.25
18 LOUISIANA NONE
19 MAINE $7.50
20 MARYLAND $8.75
21 MASSACHUSETTS $10.00
22 MICHIGAN $8.50
23 MINNESOTA $9.50/7.75
24 MISSISSIPPI NONE
25 MISSOURI $7.65
26 MONTANA $8.05
27 NEBRASKA $9.00
28 NEVADA $8.25/7.25
29 NEW HAMPSHIRE $7.25
30 NEW JERSEY $8.38
31 NEW MEXICO $7.50
32 NEW YORK $9.00
33 Building service janitors in residential build... $6.00
34 All employees except janitors in residential b... $9.00
35 N. CAROLINA $7.25
36 N. DAKOTA $7.25
37 OHIO $8.10
38 OKLAHOMA $7.25
39 OREGON $9.50/9.75
40 PENNSYLVANIA $7.25
41 RHODE ISLAND $9.60
42 S. CAROLINA NONE
43 S. DAKOTA $8.55
44 TENNESSEE NONE
45 TEXAS $7.25
46 UTAH $7.25
47 VERMONT $9.60
48 VIRGINIA $7.25
49 WASHINGTON $9.47
50 WEST VIRGINIA $8.75
51 WISCONSIN $7.25
52 WYOMING $5.15
In [39]:
df.drop('notes',axis=1,inplace=True) # drop the notes column
In [42]:
# write the dataframe to a table, drop the index column
df.to_csv('min_wage_per_state.csv', index=False) 
In [ ]: