import requests # pip install requests from BeautifulSoup import BeautifulSoup # pip install BeautifulSoup # XXX: Any URL containing a geo microformat... URL = 'http://en.wikipedia.org/wiki/Franklin,_Tennessee' # In the case of extracting content from Wikipedia, be sure to # review its "Bot Policy," which is defined at # http://meta.wikimedia.org/wiki/Bot_policy#Unacceptable_usage req = requests.get(URL, headers={'User-Agent' : "Mining the Social Web"}) soup = BeautifulSoup(req.text) geoTag = soup.find(True, 'geo') if geoTag and len(geoTag) > 1: lat = geoTag.find(True, 'latitude').string lon = geoTag.find(True, 'longitude').string print 'Location is at', lat, lon elif geoTag and len(geoTag) == 1: (lat, lon) = geoTag.string.split(';') (lat, lon) = (lat.strip(), lon.strip()) print 'Location is at', lat, lon else: print 'No location found' from IPython.display import IFrame from IPython.core.display import display # Google Maps URL template for an iframe google_maps_url = "http://maps.google.com/maps?q={0}+{1}&" + \ "ie=UTF8&t=h&z=14&{0},{1}&output=embed".format(lat, lon) display(IFrame(google_maps_url, '425px', '350px')) import sys import requests import json import BeautifulSoup # Pass in a URL containing hRecipe... URL = 'http://britishfood.about.com/od/recipeindex/r/applepie.htm' # Parse out some of the pertinent information for a recipe. # See http://microformats.org/wiki/hrecipe. def parse_hrecipe(url): req = requests.get(URL) soup = BeautifulSoup.BeautifulSoup(req.text) hrecipe = soup.find(True, 'hrecipe') if hrecipe and len(hrecipe) > 1: fn = hrecipe.find(True, 'fn').string author = hrecipe.find(True, 'author').find(text=True) ingredients = [i.string for i in hrecipe.findAll(True, 'ingredient') if i.string is not None] instructions = [] for i in hrecipe.find(True, 'instructions'): if type(i) == BeautifulSoup.Tag: s = ''.join(i.findAll(text=True)).strip() elif type(i) == BeautifulSoup.NavigableString: s = i.string.strip() else: continue if s != '': instructions += [s] return { 'name': fn, 'author': author, 'ingredients': ingredients, 'instructions': instructions, } else: return {} recipe = parse_hrecipe(URL) print json.dumps(recipe, indent=4) import requests import json from BeautifulSoup import BeautifulSoup # Pass in a URL that contains hReview-aggregate info... URL = 'http://britishfood.about.com/od/recipeindex/r/applepie.htm' def parse_hreview_aggregate(url, item_type): req = requests.get(URL) soup = BeautifulSoup(req.text) # Find the hRecipe or whatever other kind of parent item encapsulates # the hReview (a required field). item_element = soup.find(True, item_type) item = item_element.find(True, 'item').find(True, 'fn').text # And now parse out the hReview hreview = soup.find(True, 'hreview-aggregate') # Required field rating = hreview.find(True, 'rating').find(True, 'value-title')['title'] # Optional fields try: count = hreview.find(True, 'count').text except AttributeError: # optional count = None try: votes = hreview.find(True, 'votes').text except AttributeError: # optional votes = None try: summary = hreview.find(True, 'summary').text except AttributeError: # optional summary = None return { 'item': item, 'rating': rating, 'count': count, 'votes': votes, 'summary' : summary } # Find hReview aggregate information for an hRecipe reviews = parse_hreview_aggregate(URL, 'hrecipe') print json.dumps(reviews, indent=4) %%bash FuXi --rules=resources/ch08-semanticweb/chuck-norris.n3 --ruleFacts --naive %%bash FuXi --help