import os creds = { 'twitter': (os.environ['TW_API_KEY'], os.environ['TW_API_SEC'], (os.environ['TW_ACC_KEY'], os.environ['TW_ACC_SEC'])), 'facebook': os.environ['FB_API_KEY'], 'google': os.environ['GG_API_KEY'] } import pattern from pattern.web import Twitter, Facebook, DuckDuckGo, Google, Bing, Wikipedia, Wikia, Newsfeed from pattern.web import SEARCH, NEWS, IMAGE, SPARQL, COMMENTS, LIKES tw = Twitter(license=creds['twitter']) fb = Facebook(license=creds['facebook']) ggl = Google(license=creds['google']) # None uses credentials shared across ALL pattern users ddg = DuckDuckGo(license=None) bing = Bing(license=None) wp = Wikipedia(license=None) wa = Wikia(license=None) nf = Newsfeed(license=None) # RSS / atom tw.trends() twitter_result = tw.search('ananas', count=100) twitter_result[0] from collections import Counter languages = [tweet.language for tweet in twitter_result] Counter(languages) from pattern.web.locale import geocode in_result = tw.search('#EngvInd', count=50, geo=geocode('New Delhi')[:2]) en_result = tw.search('#EngvInd', count=50, geo=geocode('London')[:2]) from pattern.en import sentiment reduce(lambda x, y: x+y, [sentiment(t.text)[0] for t in in_result]) / len(in_result) reduce(lambda x, y: x+y, [sentiment(t.text)[0] for t in en_result]) / len(en_result) fb_result = fb.search('dragon age inquisition', type=SEARCH, count=100) fb_result[0] for post in fb_result: if post.likes > 0 and post.comments > 0: print('\n\n'.join( [l.author[1] for l in fb.search(post.id, type=LIKES)] )) print('-' * 10) print('\n\n'.join( [c.text for c in fb.search(post.id, type=COMMENTS)] )) break google_result = ggl.search('dragon age inquisition', count=10) google_result[0] lang_id = [ggl.identify(res.text) for res in fb_result[:10]] lang_id ggl.translate(fb_result[0].text, input=lang_id[0][0], output='de') from pattern.web import find_urls, find_email s = ''' Find out more at the PUGS website (http://pugs.org.sg), or email us at idontknow@whatouremail.is! This is a decoy URL http://pugs.org.sg. ''' find_urls(s, unique=True), \ find_email(s, unique=True) from pattern.web import sort, GOOGLE terms = [ 'french', 'german', 'japanese', 'chinese', 'persian', 'hun', 'american', 'russian', 'swede', 'polish', 'singaporean', 'politician', ] sort_result = sort(terms=terms, context='dangerous', prefix=True, service=GOOGLE, license=creds['google']) for weight, term in sort_result: print "%.2f" % (weight * 100) + '%', term from pattern.en import article, referenced article('harbour'), \ referenced('umbrella') from pattern.en import (pluralize as pluralise, singularize as singularise) pluralise('octopus') pluralise('octopus', classical=False) pluralise('I'), \ pluralise('my'), \ pluralise('her') singularise('bacteria') pluralise('virus') singularise('viruses'), \ singularise('virii'), \ singularise('virus') singularise('viri') from pattern.en import comparative, superlative 'python is %s than ruby!' % comparative('good') 'iPython is %s python shell' % ( referenced(superlative('ideal'), article=pattern.en.DEFINITE)) from pattern.en import conjugate, lemma, lexeme, tenses lemma('are') tenses('be'), \ tenses('were') lexeme('be') conjugate('nibble', '1sgp'), \ conjugate('nibble', '3sg') conjugate('google', tense=pattern.en.PARTICIPLE, parse=False), \ conjugate('google', tense=pattern.en.PARTICIPLE, parse=True) from pattern.en import number number('five thousand six hundred and eighty nine') from pattern.en import numerals numerals('42.128', round=2) animals = ['goose', 'goose', 'duck', 'chicken', 'chicken', 'chicken'] orangebirds = {'carrot': 100, 'parrot': 5, 'orange': 20} from pattern.en import quantify quantify(animals), \ quantify(orangebirds) from pattern.en import suggest suggest('psuh') suggest('carot') from pattern.en import ngrams ngrams('This is a sentence', n=2) ngrams('This is a sentence', n=3) short_s = 'When I saw the prices for some of those apartments, I was startled.' long_s = 'Personally, I think the only unassailable definition is the one often attributed to the great editor John W Campbell: "Science fiction is what I say it is."' from pattern.en import tag for word, pos in tag(short_s): if pos[:2] == 'NN': print(word) from pattern.en import parse parsed = parse(short_s, tokenize=True, tags=True, chunks=True, relations=True, lemmata=True) parsed from pattern.en import pprint pprint(parsed) from pattern.en import parsetree parsetree(short_s, tokenize=True, tags=True, chunks=True, relations=True, lemmata=True) tr = parsetree(long_s, tokenize=True, tags=True, chunks=True, relations=True, lemmata=True) print(type(tr)) [type(item) for item in tr] type(tr[0][0]) for sentence in tr: for chunk in sentence.chunks: print(chunk.type, [(w.string, w.type) for w in chunk.words]) from pattern.en import sentiment sg_result = tw.search('causeway', count=50, geo=geocode('Singapore')[:2]) my_result = tw.search('causeway', count=50, geo=geocode('Kuala Lumpur')[:2]) sg_sentiment = sorted(sentiment(tweet.text)[0] for tweet in sg_result) my_sentiment = sorted(sentiment(tweet.text)[0] for tweet in my_result) sg_avg = sum(sg_sentiment) / len(sg_sentiment) my_avg = sum(my_sentiment) / len(my_sentiment) sg_avg, my_avg x = linspace(0, 50) plot(sg_sentiment, 'r-', label='sg') plot(x, [sg_avg for i in xrange(0, 50)], 'r--', label='sg_avg') plot(my_sentiment, 'g-', label='my') plot(x, [my_avg for i in xrange(0, 50)], 'g--', label='my_avg') grid(b=True, which='both') legend(loc='best') from pattern.en import wordnet birds = wordnet.synsets('bird') birds bird = birds[0] 'Definition', bird.gloss '- Synonyms', bird.synonyms '^ Hypernyms', bird.hypernyms() 'v Hyponyms', bird.hyponyms() '^ Holonyms', bird.holonyms() 'v Meronyms', bird.meronyms() wordnet.synsets('owl')[0].holonyms(), wordnet.synsets('amoeba')[0].holonyms() kitty = wordnet.synsets('kitten')[0] pup = wordnet.synsets('puppy')[0] wordnet.ancestor(kitty, pup) human = wordnet.synsets('human')[0] cuy = wordnet.synsets('guinea pig')[0] wordnet.similarity(human, cuy), \ wordnet.similarity(human, kitty) s = 'the fluffy brown bunnies hopped across the wet grass with much gusto.' from pattern.search import search search('NP', parsetree(s)) from pattern.search import taxonomy for animal in ('bunny', 'dog', 'cat', 'banana'): taxonomy.append(animal, type='animal') search('ANIMAL', parsetree(s, lemmata=True)) from pattern.search import Pattern pat = Pattern.fromstring('{JJ} {ANIMAL} {VP}') match = pat.match(parsetree(s, lemmata=True)) for i in range(0,4): print(match.group(i))