import requests # pip install requests
from BeautifulSoup import BeautifulSoup # pip install BeautifulSoup

# XXX: Any URL containing a geo microformat...

URL = 'http://en.wikipedia.org/wiki/Franklin,_Tennessee'

# In the case of extracting content from Wikipedia, be sure to
# review its "Bot Policy," which is defined at
# http://meta.wikimedia.org/wiki/Bot_policy#Unacceptable_usage

req = requests.get(URL, headers={'User-Agent' : "Mining the Social Web"})
soup = BeautifulSoup(req.text)

geoTag = soup.find(True, 'geo')

if geoTag and len(geoTag) > 1:
    lat = geoTag.find(True, 'latitude').string
    lon = geoTag.find(True, 'longitude').string
    print 'Location is at', lat, lon
elif geoTag and len(geoTag) == 1:
    (lat, lon) = geoTag.string.split(';')
    (lat, lon) = (lat.strip(), lon.strip())
    print 'Location is at', lat, lon
else:
    print 'No location found'

from IPython.display import IFrame
from IPython.core.display import display

# Google Maps URL template for an iframe

google_maps_url = "http://maps.google.com/maps?q={0}+{1}&" + \
  "ie=UTF8&t=h&z=14&{0},{1}&output=embed".format(lat, lon)

display(IFrame(google_maps_url, '425px', '350px'))

import sys
import requests
import json
import BeautifulSoup

# Pass in a URL containing hRecipe...

URL = 'http://britishfood.about.com/od/recipeindex/r/applepie.htm'

# Parse out some of the pertinent information for a recipe.
# See http://microformats.org/wiki/hrecipe.


def parse_hrecipe(url):
    req = requests.get(URL)
    
    soup = BeautifulSoup.BeautifulSoup(req.text)
    
    hrecipe = soup.find(True, 'hrecipe')

    if hrecipe and len(hrecipe) > 1:
        fn = hrecipe.find(True, 'fn').string
        author = hrecipe.find(True, 'author').find(text=True)
        ingredients = [i.string 
                            for i in hrecipe.findAll(True, 'ingredient') 
                                if i.string is not None]

        instructions = []
        for i in hrecipe.find(True, 'instructions'):
            if type(i) == BeautifulSoup.Tag:
                s = ''.join(i.findAll(text=True)).strip()
            elif type(i) == BeautifulSoup.NavigableString:
                s = i.string.strip()
            else:
                continue

            if s != '': 
                instructions += [s]

        return {
            'name': fn,
            'author': author,
            'ingredients': ingredients,
            'instructions': instructions,
            }
    else:
        return {}


recipe = parse_hrecipe(URL)
print json.dumps(recipe, indent=4)

import requests
import json
from BeautifulSoup import BeautifulSoup

# Pass in a URL that contains hReview-aggregate info...

URL = 'http://britishfood.about.com/od/recipeindex/r/applepie.htm'

def parse_hreview_aggregate(url, item_type):
    
    req = requests.get(URL)
    
    soup = BeautifulSoup(req.text)
    
    # Find the hRecipe or whatever other kind of parent item encapsulates
    # the hReview (a required field).
    
    item_element = soup.find(True, item_type)
    item = item_element.find(True, 'item').find(True, 'fn').text
        
    # And now parse out the hReview
    
    hreview = soup.find(True, 'hreview-aggregate')
    
    # Required field
    
    rating = hreview.find(True, 'rating').find(True, 'value-title')['title']
    
    # Optional fields
    
    try:
        count = hreview.find(True, 'count').text
    except AttributeError: # optional
        count = None
    try:
        votes = hreview.find(True, 'votes').text
    except AttributeError: # optional
        votes = None

    try:
        summary = hreview.find(True, 'summary').text
    except AttributeError: # optional
        summary = None

    return {
        'item': item,
        'rating': rating,
        'count': count,
        'votes': votes,
        'summary' : summary
    }

# Find hReview aggregate information for an hRecipe

reviews = parse_hreview_aggregate(URL, 'hrecipe')

print json.dumps(reviews, indent=4)

%%bash

FuXi --rules=resources/ch08-semanticweb/chuck-norris.n3 --ruleFacts --naive

%%bash

FuXi --help