In [1]:
from rpyc_docker import Browser,WebDriver
import os.path
In [3]:
"""
basepage.py
used for the examples, other pages should inherit off this page
"""

class BasePage(object):
    url = None
    js_dict_to_array = """
    window.dict_to_array = function(dict) {
        var result = [];
        for(var k in dict) {
            result.push([k,dict[k]]);
        }
        return result;}
    """
    def __init__(self,browser):
        self.browser = browser
        self.driver = browser.driver
        #make nice shortcuts to browser
        self.js_ex = self.browser.js_ex
        
    def find_elements_with_text(self,tagName,rePattern):
        return self.driver.execute_script("""
        return (function(tag,pattern) {
        var patt = RegExp(pattern);
        var elms = Array.prototype.slice.call(document.getElementsByTagName(tag));
        return elms.filter(function(elm) {
        return patt.test(elm.textContent);
        })
        })(arguments[0],arguments[1]);
        """,tagName,rePattern)

    def scroll_top(self):
        self.driver.execute_script("window.scrollTo(0,0);")
        return True

    def scroll_bottom(self):
        self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        return True

    def goto(self,url = None):
        if url :
            self.driver.get(url)
        else:
            self.driver.get(self.url)

    def ipython_screenshot(self):
        from IPython.display import Image
        img = self.driver.get_screenshot_as_png()
        return Image(data = img)

    def find_css_input(self,css,value):
        elm = self.driver.find_element_by_css_selector(css)
        elm.clear()
        elm.send_keys(value)
    
    def find_css_click(self,css):
        try :
            elm = self.driver.find_element_by_css_selector(css)
            elm.click()
            return True
        except SelEx.ElementNotVisibleException:
            return False
In [44]:
import urlparse
from bs4 import BeautifulSoup
import selenium.common.exceptions as SelEx

class SearchPage(BasePage):
    
    def __init__(self,browser,searchUrl):
        BasePage.__init__(self,browser)
        self.url = searchUrl
        
        
    def goto(self):
        BasePage.goto(self)
            
    def do_ajax_results_request(self,url):
        js = """
        var url = arguments[0];
        window._jsonResult = null;
        var token = document.querySelector('meta[name = "csrf-token"]').getAttribute("content");
        
        var xmlhttp = new XMLHttpRequest();
    
        xmlhttp.onreadystatechange = function() {
        if (xmlhttp.readyState == 4 && xmlhttp.status == 200) {
            window._jsonResult = jsonResult = JSON.parse(xmlhttp.responseText);
            }}

        xmlhttp.open("GET", url, true);
        xmlhttp.setRequestHeader("X-CSRF-Token",token);
        xmlhttp.setRequestHeader("X-Requested-With","XMLHttpRequest")
        xmlhttp.setRequestHeader("Accept","application/json, text/javascript, */*; q=0.01")
        xmlhttp.send();

        return true;;
        """
        return self.driver.execute_script(js,url)
    
    def get_ajax_result(self):
        #the result when passed back to python will be converted to a python dict automatically"
        return self.js_ex("return window._jsonResult")
    
    def do_next_request(self,pageNum):
        #woe_id is the location identifier in this case 23424977 for USA
        nextPageUrl = 'https://www.kickstarter.com/discover/categories/12?page=%d&sort=popularity&term=card+games&woe_id=23424977'
        self.do_ajax_results_request(nextPageUrl % pageNum)
        return True
In [48]:
browser = Browser()
browser.setup(visible = True, driver = "firefox")
INFO:rpyc_docker:def driver_firefox(self):
Out[48]:
True
In [49]:
searchUrl = "https://www.kickstarter.com/discover/advanced?term=card+games&category_id=12&woe_id=23424977&sort=popularity"
In [50]:
searchPage = SearchPage(browser,searchUrl)
In [51]:
searchPage.goto()
In [52]:
searchPage.do_next_request(1)
Out[52]:
True
In [53]:
projectResults = searchPage.get_ajax_result()
projectResults.keys()
Out[53]:
[u'total_hits', u'seed', u'colloquial_title', u'projects', u'see_more']
In [54]:
projectResults['projects'][0]["name"]
Out[54]:
u'Pillars of Eternity: Lords of the Eastern Reach Card Game'
In [55]:
projectResults['projects'][0]["blurb"]
Out[55]:
u'Build cities, raise armies, defeat your enemies in this one to four player card game based in the world of Pillars of Eternity.'
In [56]:
projectResults['projects'][0]["backers_count"]
Out[56]:
2480
In [57]:
projectResults['projects'][0]["pledged"]
Out[57]:
157880.5
In [58]:
browser.teardown()
Out[58]:
True

Running headless in a docker container

In [60]:
from docker import Client
docker = Client(base_url='unix://var/run/docker.sock')
In [59]:
from rpyc_docker.rpyc_browser_worker import BrowserRpycWorker
In [61]:
worker = BrowserRpycWorker(docker,mount = "/home/john/Development")
INFO:worker 1:RpycWorker __init__
In [62]:
worker.create_container()

worker.conn is a rpyc connection instance inside the docker container

In [63]:
worker.connect_rpyc()
worker.conn.modules.sys.path.insert(0,"/Development/python/rpyc_docker")
Out[63]:
True
In [65]:
worker.setup_browser(driver = "firefox")
Out[65]:
True

worker.browser is an rpyc instance of browser running inside the docker container

In [68]:
searchPage = SearchPage(worker.browser,searchUrl)
In [69]:
searchPage.goto()
In [70]:
searchPage.do_next_request(1)
Out[70]:
True
In [71]:
projectResults = searchPage.get_ajax_result()
projectResults.keys()
Out[71]:
[u'total_hits', u'seed', u'colloquial_title', u'projects', u'see_more']
In [72]:
projectResults['projects'][0]["name"]
Out[72]:
u'Pillars of Eternity: Lords of the Eastern Reach Card Game'
In [73]:
projectResults['projects'][0]["blurb"]
Out[73]:
u'Build cities, raise armies, defeat your enemies in this one to four player card game based in the world of Pillars of Eternity.'

Don't forget to tear down the docker container after it is done. Multiple docker containers can be run to create a grid of headless browsers

In [74]:
worker.teardown()