import twitter

def oauth_login():
    # XXX: Go to http://twitter.com/apps/new to create an app and get values
    # for these credentials that you'll need to provide in place of these
    # empty string values that are defined as placeholders.
    # See https://dev.twitter.com/docs/auth/oauth for more information 
    # on Twitter's OAuth implementation.
    
    CONSUMER_KEY = ''
    CONSUMER_SECRET = ''
    OAUTH_TOKEN = ''
    OAUTH_TOKEN_SECRET = ''
    
    auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                               CONSUMER_KEY, CONSUMER_SECRET)
    
    twitter_api = twitter.Twitter(auth=auth)
    return twitter_api

# Sample usage
twitter_api = oauth_login()    

# Nothing to see by displaying twitter_api except that it's now a
# defined variable

print twitter_api

import json
import pymongo # pip install pymongo

def insert_into_mongo(data, mongo_db, mongo_db_coll, **mongo_conn_kw):
    
    # Connects to the MongoDB server running on 
    # localhost:27017 by default
    
    client = pymongo.MongoClient(**mongo_conn_kw)
    
    # Get a reference to a particular database
    
    db = client[mongo_db]
    
    # Reference a particular collection in the database
    
    coll = db[mongo_db_coll]
    
    # Perform a bulk insert and  return the IDs
    
    return coll.insert(data)

#If we have an _id pre-exists, insert_into_mongo raises an error
#save_to_mongo will create a new document if the _id does not exist, or replace the old doc with the new one if it does
def save_to_mongo(data, mongo_db, mongo_db_coll, **mongo_conn_kw):
    
    # Connects to the MongoDB server running on 
    # localhost:27017 by default
    
    client = pymongo.MongoClient(**mongo_conn_kw)
    
    # Get a reference to a particular database
    
    db = client[mongo_db]
    
    # Reference a particular collection in the database
    
    coll = db[mongo_db_coll]
    
    return coll.save(data)
    
    
def load_from_mongo(mongo_db, mongo_db_coll, return_cursor=False,
                    criteria=None, projection=None, **mongo_conn_kw):
    
    # Optionally, use criteria and projection to limit the data that is 
    # returned as documented in 
    # http://docs.mongodb.org/manual/reference/method/db.collection.find/
    
    # Consider leveraging MongoDB's aggregations framework for more 
    # sophisticated queries.
    
    client = pymongo.MongoClient(**mongo_conn_kw)
    db = client[mongo_db]
    coll = db[mongo_db_coll]
    
    if criteria is None:
        criteria = {}
    
    if projection is None:
        cursor = coll.find(criteria)
    else:
        cursor = coll.find(criteria, projection)

    # Returning a cursor is recommended for large amounts of data
    
    if return_cursor:
        return cursor
    else:
        return [ item for item in cursor ]

#Some mongo utility functions, useful howtos, etc.

def mongo_dbs(**mongo_conn_kw):
    mc= pymongo.MongoClient(**mongo_conn_kw)
    #c = Connection()
    print mc.database_names()
#mongo_dbs()

def getCollections_in_mongo(mongo_db, **mongo_conn_kw):
    client = pymongo.MongoClient(**mongo_conn_kw)
    db = client[mongo_db]
    return db.collection_names()

#getCollections_in_mongo('twitter')[:10]
## Drop a database
#from pymongo import Connection
#c = Connection()
#c.drop_database('twitter')

#Sample usage:
#getCollections_in_mongo('twitter')[:10]

import sys
import time
from urllib2 import URLError
from httplib import BadStatusLine
import json
import twitter

def make_twitter_request(twitter_api_func, max_errors=10, *args, **kw): 
    
    # A nested helper function that handles common HTTPErrors. Return an updated
    # value for wait_period if the problem is a 500 level error. Block until the
    # rate limit is reset if it's a rate limiting issue (429 error). Returns None
    # for 401 and 404 errors, which requires special handling by the caller.
    def handle_twitter_http_error(e, wait_period=2, sleep_when_rate_limited=True):
    
        if wait_period > 3600: # Seconds
            print >> sys.stderr, 'Too many retries. Quitting.'
            raise e
    
        # See https://dev.twitter.com/docs/error-codes-responses for common codes
    
        if e.e.code == 401:
            print >> sys.stderr, 'Encountered 401 Error (Not Authorized)'
            return None
        elif e.e.code == 404:
            print >> sys.stderr, 'Encountered 404 Error (Not Found)'
            return None
        elif e.e.code == 429: 
            print >> sys.stderr, 'Encountered 429 Error (Rate Limit Exceeded)'
            if sleep_when_rate_limited:
                print >> sys.stderr, "Retrying in 15 minutes...ZzZ..."
                sys.stderr.flush()
                time.sleep(60*15 + 5)
                print >> sys.stderr, '...ZzZ...Awake now and trying again.'
                return 2
            else:
                raise e # Caller must handle the rate limiting issue
        elif e.e.code in (500, 502, 503, 504):
            print >> sys.stderr, 'Encountered %i Error. Retrying in %i seconds' % \
                (e.e.code, wait_period)
            time.sleep(wait_period)
            wait_period *= 1.5
            return wait_period
        else:
            raise e

    # End of nested helper function
    
    wait_period = 2 
    error_count = 0 

    while True:
        try:
            return twitter_api_func(*args, **kw)
        except twitter.api.TwitterHTTPError, e:
            error_count = 0 
            wait_period = handle_twitter_http_error(e, wait_period)
            if wait_period is None:
                return
        except URLError, e:
            error_count += 1
            time.sleep(wait_period)
            wait_period *= 1.5
            print >> sys.stderr, "URLError encountered. Continuing."
            if error_count > max_errors:
                print >> sys.stderr, "Too many consecutive errors...bailing out."
                raise
        except BadStatusLine, e:
            error_count += 1
            time.sleep(wait_period)
            wait_period *= 1.5
            print >> sys.stderr, "BadStatusLine encountered. Continuing."
            if error_count > max_errors:
                print >> sys.stderr, "Too many consecutive errors...bailing out."
                raise


# Sample usage
#twitter_api = oauth_login()

# See https://dev.twitter.com/docs/api/1.1/get/users/lookup for 
# twitter_api.users.lookup

#response = make_twitter_request(twitter_api.users.lookup, 
                                screen_name="SocialWebMining")

#print json.dumps(response, indent=1)

def get_user_profile(twitter_api, screen_names=None, user_ids=None):
   
    # Must have either screen_name or user_id (logical xor)
    assert (screen_names != None) != (user_ids != None), \
    "Must have screen_names or user_ids, but not both"
        
    items_to_info = {}

    items = screen_names or user_ids
    print >> sys.stderr, 'Grabbing {0} user data records, up to 100 at a time...'.format(len(items))

    while len(items) > 0:

        # Process 100 items at a time per the API specifications for /users/lookup.
        # See https://dev.twitter.com/docs/api/1.1/get/users/lookup for details.
        
        items_str = ','.join([str(item) for item in items[:100]])
        items = items[100:]

        if screen_names:
            response = make_twitter_request(twitter_api.users.lookup, 
                                            screen_name=items_str)
        else: # user_ids
            response = make_twitter_request(twitter_api.users.lookup, 
                                            user_id=items_str)
    
        for user_info in response:
            if screen_names:
                items_to_info[user_info['screen_name']] = user_info
            else: # user_ids
                items_to_info[user_info['id']] = user_info

    return items_to_info


# Sample usage
#twitter_api = oauth_login()

#print get_user_profile(twitter_api, screen_names=["SocialWebMining", "ptwobrussell"]) get_user_profile(twitter_api, user_ids=[132373965])

def get_list_members(twitter_api, owner_screen_name=None, slug=None):
    assert (owner_screen_name != None) & (slug != None), \
    "Must have screen_names and list name"
    
    print >> sys.stderr, 'Grabbing members of list {0}/{1}'.format(owner_screen_name,slug)
    
    items_to_info = {}
    
    response = make_twitter_request(twitter_api.lists.members, 
                                            owner_screen_name=owner_screen_name,slug=slug)
    for user_info in response['users']:
        items_to_info[user_info['screen_name']] = user_info

    return items_to_info

#Sample usage
#twitter_api = oauth_login()

#print get_list_members(twitter_api, "sidepodcast", "f1-drivers")

from functools import partial
from sys import maxint

def get_friends_followers_ids(twitter_api, screen_name=None, user_id=None,
                              friends_limit=maxint, followers_limit=maxint):
    
    # Must have either screen_name or user_id (logical xor)
    assert (screen_name != None) != (user_id != None), \
    "Must have screen_name or user_id, but not both"
    
    # See https://dev.twitter.com/docs/api/1.1/get/friends/ids and
    # https://dev.twitter.com/docs/api/1.1/get/followers/ids for details
    # on API parameters
    
    get_friends_ids = partial(make_twitter_request, twitter_api.friends.ids, 
                              count=5000)
    get_followers_ids = partial(make_twitter_request, twitter_api.followers.ids, 
                                count=5000)

    friends_ids, followers_ids = [], []
    
    for twitter_api_func, limit, ids, label in [
                    [get_friends_ids, friends_limit, friends_ids, "friends"], 
                    [get_followers_ids, followers_limit, followers_ids, "followers"]
                ]:
        
        if limit == 0: continue
        
        cursor = -1
        while cursor != 0:
        
            # Use make_twitter_request via the partially bound callable...
            if screen_name: 
                response = twitter_api_func(screen_name=screen_name, cursor=cursor)
            else: # user_id
                response = twitter_api_func(user_id=user_id, cursor=cursor)

            if response is not None:
                ids += response['ids']
                cursor = response['next_cursor']
        
            print >> sys.stderr, 'Fetched {0} total {1} ids for {2}'.format(len(ids), 
                                                    label, (user_id or screen_name))
        
            # XXX: You may want to store data during each iteration to provide an 
            # an additional layer of protection from exceptional circumstances
        
            if len(ids) >= limit or response is None:
                break

    # Do something useful with the IDs, like store them to disk...
    return friends_ids[:friends_limit], followers_ids[:followers_limit]


# Sample usage
#twitter_api = oauth_login()

#friends_ids, followers_ids = get_friends_followers_ids(twitter_api, 
                                                       screen_name="SocialWebMining", 
                                                       friends_limit=10, 
                                                       followers_limit=10)

#print friends_ids
#print followers_ids

import random

#Rather than crawl all followers, crawl a sample...
def crawl_followers_sample(twitter_api, screen_name, limit=5000, depth=2, sample=50):
    
    print >> sys.stderr, 'Crawling depth {0} with sample size {1}'.format(depth, sample)
    
    # Resolve the ID for screen_name and start working with IDs for consistency 
    # in storage

    ##THIS SECTION CAN BE REPLACED BY ESTABLISH USER?
    #THOUGH TO PASS ON APPROPRIATE VALS TO next_queue
    seed_id_ = twitter_api.users.show(screen_name=screen_name)
    #tmp friend_ids next_queue follower_ids
    tmp, next_queue = get_friends_followers_ids(twitter_api, user_id=seed_id_['id_str'], 
                                              friends_limit=limit, followers_limit=limit)
    
    # Store a seed_id => _follower_ids mapping in MongoDB
    # Use a Twitter user id as the mongo document _id (native indexing, prevent multiple records for one individual)
    
    save_to_mongo({'_id': seed_id_['id'], 'screen_name':screen_name, 'id_str':seed_id_['id_str'],
                   'follower_ids' : [ _id for _id in next_queue ]},
                  'twitter', 'followers')
    save_to_mongo({'_id': seed_id_['id'] , 'screen_name':screen_name, 'id_str':seed_id_['id_str'],
                   'friend_ids' : [ _id for _id in tmp ]},
                  'twitter', 'friends')
    
    udata=get_user_profile(twitter_api, user_ids=[ seed_id_['id_str'] ])
    for u in udata:
        save_to_mongo({'_id':udata[u]['id'],'screen_name':udata[u]['screen_name'],'id_str':udata[u]['id_str'],
                       'name':udata[u]['name'],'description':udata[u]['description'],
                       'location':udata[u]['location'],'followers_count':udata[u]['followers_count'],
                       'followers_count':udata[u]['friends_count'],'created_at':udata[u]['created_at']},'twitter', 'userdata')

    #We're going to try to mimimise the amount of calls we make to the Twitter API
    #HEURISTIC: if we already have follower data for a user, don't get friend/follower data again
    sspool=set()
    mgd=load_from_mongo('twitter','userdata', projection={'_id':1})
    namesdone=set([ i['_id'] for i in mgd ])
    d = 1
    while d < depth:
        d += 1
        (queue, next_queue) = (next_queue, [])
        
        #TH: only interested in grabbing data we haven't grabbed before
        diff = set(queue) - set( [ i['_id'] for i in load_from_mongo('twitter','followers', projection={'_id':1})] )
        
        #TH: propagate the sampling measure
        queue = random.sample(list(diff), sample) if len(diff) > sample else list(diff)
        
        for fid in queue:
            
            friend_ids, follower_ids = get_friends_followers_ids(twitter_api, user_id=fid, 
                                                     friends_limit=limit, 
                                                     followers_limit=limit)
 
            #Get some user info while we're here...
            sspoolt= set(follower_ids).union(set(friend_ids)) - namesdone
            sspoolt = sspoolt.union(sspool) if len(sspoolt)<100 else sspoolt
            ssize = 99 if len(sspoolt) > 99 else len(sspoolt)
            uids=[fid]+random.sample(list(sspoolt), ssize)
            namesdone=namesdone.union(set(uids))
            sspool=sspoolt.union(sspool)-namesdone
            
            udata=get_user_profile(twitter_api, user_ids=uids)
            for u in udata:
                save_to_mongo( {'_id':udata[u]['id'],'screen_name':udata[u]['screen_name'], 'id_str':udata[u]['id_str'], 
                                'name':udata[u]['name'],'description':udata[u]['description'],
                                'location':udata[u]['location'],'followers_count':udata[u]['followers_count'],
                                'followers_count':udata[u]['friends_count'],'created_at':udata[u]['created_at']},
                              'twitter', 'userdata')

            
            tmp=load_from_mongo('twitter','userdata',criteria={'_id':fid},projection={'screen_name':1,'_id':1})
            s_name=tmp[0]['screen_name']
            
            # Store a fid => follower_ids mapping in MongoDB
            save_to_mongo({'_id': fid, 'id_str': str(fid) , 'screen_name':s_name, 'follower_ids' : [ _id for _id in follower_ids ]},
                  'twitter', 'followers')
            save_to_mongo({'_id': fid, 'id_str': str(fid) , 'screen_name':s_name, 'friend_ids' : [ _id for _id in friend_ids ]},
                  'twitter', 'friends')
                   
            next_queue += follower_ids


# Sample usage
#twitter_api = oauth_login()

#screen_name = "bbcinternetblog"
#crawl_followers_sample(twitter_api, screen_name, depth=2, limit=5000, sample=10)

import networkx as nx

def get_common_friends_of_followers_grapher(twitter_api, screen_name, foid_list, toget, minsupport=5):
    #We're going to use networkx to construct the graph
    DG=nx.DiGraph()
    
    print >> sys.stderr, 'Getting friends of followers of {0}'.format(screen_name)
    
    #The toget folk should already have friends/followers in the db    
    for fo in toget:
        tmp=load_from_mongo('twitter','friends',criteria={'_id':fo},
                        projection={'screen_name':1, 'friend_ids':1,'_id':1})
        members2=tmp[0]['friend_ids']
        if len(members2)>0:
            for foid in foid_list:
                DG.add_edge(fo,foid)
            fedges=[(fo,u) for u in members2]
            DG.add_edges_from(fedges)

    print >> sys.stderr, 'Filtering network...'
    #Now we can filter the network
    filterNodes=[]
    for n in DG:
        if DG.degree(n)>=minsupport:
            filterNodes.append(n)
    H=DG.subgraph(set(filterNodes))

    #Label the filtered graph, getting in additional labels if we need them
    mgd=load_from_mongo('twitter','userdata', projection={'_id':1})
    got= [ i['_id'] for i in mgd ]
    tofetch=[ _id for _id in H.nodes() if _id not in got]

    for n in set(H.nodes()).intersection(got):
        mgd=load_from_mongo('twitter','userdata', criteria={'_id':n}, projection={'screen_name':1,'id_str':1,'_id':1})
        H.node[n]['label']=mgd[0]['screen_name']

    udata=get_user_profile(twitter_api, user_ids=tofetch)
    for u in udata:
        save_to_mongo( {'_id':udata[u]['id'],'screen_name':udata[u]['screen_name'], 'id_str':udata[u]['id_str'], 
                                'name':udata[u]['name'],'description':udata[u]['description'],
                                'location':udata[u]['location'],'followers_count':udata[u]['followers_count'],
                                'followers_count':udata[u]['friends_count'],'created_at':udata[u]['created_at']},
                              'twitter', 'userdata')
        H.node[udata[u]['id']]['label']=udata[u]['screen_name']

    print >> sys.stderr, 'Writing network to {0}_{1}.gexf'.format(screen_name,minsupport)
    #Write the resulting network to a gexf file
    nx.write_gexf(H, '{0}_{1}.gexf'.format(screen_name,minsupport) )
    #print tofetch
    print >> sys.stderr, 'Done...'
    
def get_common_friends_of_followers(twitter_api, screen_name, minsupport=5):
    
    print >> sys.stderr, 'Getting followers of {0}'.format(screen_name)
    
    ff=load_from_mongo('twitter','followers',criteria={'screen_name':screen_name},
                   projection={'screen_name':1,'follower_ids':1,'_id':1})


    #Get the follower ids of the target individual
    members=ff[0]['follower_ids']
    
    #For now, find which followers we have friend data for and use that
    tmp=load_from_mongo('twitter', 'friends', projection={'_id':1})
    fr =[ i['_id'] for i in tmp ]
    toget = [ i for i in members if i in fr ]
    #What we really need to do is:
    ## - set a sample size of followers
    ## - get the set of ids we have friend data for and see if size of intersect with user's followers is greater than sample
    ## - if it is, we can get the sample out of the database. If it isn't, we need to crawl some more.

    get_common_friends_of_followers_grapher(twitter_api, screen_name, [ff[0]['_id']], toget, minsupport=5)

#Sample usage:
#screen_name = "schoolofdata"
#get_common_friends_of_followers(twitter_api, screen_name)

def quickExpt(screen_name, sample=119, minsupport=5):
    save_to_mongo( {'_id':screen_name, 'screen_name':screen_name},'twitter', 'quickexpt_source')
    crawl_followers_sample(twitter_api, screen_name, depth=2, limit=5000, sample=sample)
    get_common_friends_of_followers(twitter_api, screen_name, minsupport=minsupport)


##THIS IS WHERE YOU NEED TO ADD THE USERNAME OF THE ACCOUNT YOU WANT TO GRAB THE DATA FOR
twitter_username='schoolofdata'


quickExpt(twitter_username,sample=119, minsupport=5)