#imports
import requests
import string
from bs4 import BeautifulSoup
import re
import csv

page = requests.get('http://www.accesstoresearch.org.uk/libraries')

soup = BeautifulSoup(page.text)
librarylist = soup.find('div', class_='col-lft')
libraries = librarylist.find_all('ul')

print len(libraries)
print libraries[0]

libs = []
for library in libraries:
    name = library.find('a').text
    url = library.find('a')['href']
    libs.append([name, url])
    

print libs[0]

# The UK Postcode REGEX came from
# http://www.regxlib.com/REDetails.aspx?regexp_id=260
ukpc = re.compile('([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]? {1,2}[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)')

def process(raw_postcode,page):
    """
    Given a postcode from the 'doesn't work' list, process page correctly
    """
    soup = BeautifulSoup(page)
    if raw_postcode == 'OX1 1ND':
        elem = soup.find(['span','div'], class_='postal-code')

        postcode = ukpc.search(elem.get_text()).group(0)
        
    elif raw_postcode == 'ME14 1LQ':
        elem = soup.find('span', id='ctl00__mainContent_uxPostcodeLabel')
        postcode = ukpc.search(elem.get_text()).group(0)
        
    elif raw_postcode == 'HX1 1UJ':
        elem = soup.find_all('ul', class_='contactitem')[1]
        postcode = ukpc.search(elem.get_text()).group(0)
     
    return postcode

tocorrect = ['OX1 1ND', 'ME14 1LQ', 'HX1 1UJ']
for library in libs:
    page = requests.get(library[1])
    m = ukpc.search(page.text)
    if m:
        postcode = m.group(0)
        if postcode in tocorrect:
            postcode = process(postcode, page.text)
        library.append(postcode)


filename = 'libraries.csv'
with open(filename, 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['Library', 'url', 'postcode'])
    writer.writerows(libs)

for segment in [(0,100), (101, 200), (201,235)]:
    filename = 'libraries%s.csv' % str(segment[0])
    with open(filename, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(['Library', 'url', 'postcode'])
        writer.writerows(libs[segment[0]:segment[1]])