# import modules
import requests
from bs4 import BeautifulSoup
import pandas as pd
url = 'http://www.paywizard.org/main/salary/minimum-wage'
# Scrape the HTML at the url
r = requests.get(url)
# Turn the HTML into a Beautiful Soup object
soup = BeautifulSoup(r.text, 'lxml')
# Create four variables to score the scraped data in
state_name = []
hourly_min_wage = []
notes = []
# Create an object of the second object that is class=grid listing table
table = soup.findAll(class_='grid listing table')[1]
# Find all the <tr> tag pairs, skip the first one, then for each.
for row in table.find_all('tr')[1:]:
# Create a variable of all the <td> tag pairs in each <tr> tag pair,
col = row.find_all('td')
# Create a variable of the string inside 1st <td> tag pair,
column_1 = col[0].text.strip()
# and append it to first_name variable
state_name.append(column_1)
# Create a variable of the string inside 2nd <td> tag pair,
column_2 = col[1].text.strip()
# and append it to last_name variable
hourly_min_wage.append(column_2)
# Create a variable of the string inside 3rd <td> tag pair,
column_3 = col[2].text.strip()
# and append it to age variable
notes.append(column_3)
# Create a variable of the value of the columns
columns = {'state': state_name, 'hrly_min_wage': hourly_min_wage, 'notes': notes }
# Create a dataframe from the columns variable
df = pd.DataFrame(columns)
df[['state','hrly_min_wage']]
state | hrly_min_wage | |
---|---|---|
0 | ALABAMA | NONE |
1 | ALASKA | $9.75 |
2 | ARIZONA | $8.05 |
3 | ARKANSAS | $8.00 |
4 | CALIFORNIA | $10.00 |
5 | COLORADO | $8.31 |
6 | CONNECTICUT | $9.60 |
7 | DELAWARE | $8.25 |
8 | DISTRICT OF COLUMBIA | $11.50 |
9 | FLORIDA | $8.05 |
10 | GEORGIA | $5.15 |
11 | HAWAII | $8.50 |
12 | IDAHO | $7.25 |
13 | ILLINOIS | $8.25 |
14 | INDIANA | $7.25 |
15 | IOWA | $7.25 |
16 | KANSAS | $7.25 |
17 | KENTUCKY | $7.25 |
18 | LOUISIANA | NONE |
19 | MAINE | $7.50 |
20 | MARYLAND | $8.75 |
21 | MASSACHUSETTS | $10.00 |
22 | MICHIGAN | $8.50 |
23 | MINNESOTA | $9.50/7.75 |
24 | MISSISSIPPI | NONE |
25 | MISSOURI | $7.65 |
26 | MONTANA | $8.05 |
27 | NEBRASKA | $9.00 |
28 | NEVADA | $8.25/7.25 |
29 | NEW HAMPSHIRE | $7.25 |
30 | NEW JERSEY | $8.38 |
31 | NEW MEXICO | $7.50 |
32 | NEW YORK | $9.00 |
33 | Building service janitors in residential build... | $6.00 |
34 | All employees except janitors in residential b... | $9.00 |
35 | N. CAROLINA | $7.25 |
36 | N. DAKOTA | $7.25 |
37 | OHIO | $8.10 |
38 | OKLAHOMA | $7.25 |
39 | OREGON | $9.50/9.75 |
40 | PENNSYLVANIA | $7.25 |
41 | RHODE ISLAND | $9.60 |
42 | S. CAROLINA | NONE |
43 | S. DAKOTA | $8.55 |
44 | TENNESSEE | NONE |
45 | TEXAS | $7.25 |
46 | UTAH | $7.25 |
47 | VERMONT | $9.60 |
48 | VIRGINIA | $7.25 |
49 | WASHINGTON | $9.47 |
50 | WEST VIRGINIA | $8.75 |
51 | WISCONSIN | $7.25 |
52 | WYOMING | $5.15 |
df.drop('notes',axis=1,inplace=True) # drop the notes column
# write the dataframe to a table, drop the index column
df.to_csv('min_wage_per_state.csv', index=False)