In [1]:
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
In [2]:
driver = webdriver.Firefox()
In [3]:
import time

class DuckDuckGoResults(object):
    def __init__(self,driver):
        self.driver = driver
        
    def search(self,searchTerm):
        self.driver.get("https://duckduckgo.com/")
        inputSearchElm = driver.find_element_by_css_selector('#search_form_input_homepage')
        inputSearchElm.send_keys("%s\n" % searchTerm)
        
    def scroll_botton(self):
        self.driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
        return True
    
    def load_all_results(self):
        self.resultElms = self.driver.find_elements_by_css_selector("#links>div.results_links_deep")
        while True :
            self.scroll_botton()
            time.sleep(4)
            newResultElms = self.driver.find_elements_by_css_selector("#links>div.results_links_deep")
            if len(newResultElms) == len(self.resultElms):
                self.resultElms = newResultElms
                break
            self.resultElms = newResultElms
            
    def parse_resultElm(self,resultElm):
        try :
            resultAElm = resultElm.find_element_by_css_selector("a.result__a")
            result = {
                "title" : resultAElm.text,
                "href" : resultAElm.get_attribute("href"),
                "snippet" : resultElm.find_element_by_css_selector("div.result__snippet").text
                }
            return result
        except NoSuchElementException:
            print "exception",resultElm.text
        
    def get_results_python(self):
        return map(self.parse_resultElm,self.resultElms)
    
    def get_results_javascript(self):
        jsFunction = """
        var resultElms = Array.prototype.slice.call(document.querySelectorAll("#links>div.results_links_deep"))
        return resultElms.map(function(resultElm) {
            var result = [];
            var resultAElm = resultElm.querySelector("a.result__a");
            result.push(["title",resultAElm.textContent]);
            result.push(["href",resultAElm.getAttribute("href")]);
            result.push(["snippet",resultElm.querySelector("div.result__snippet").textContent]);
            return result;
            });
        """
        results = self.driver.execute_script(jsFunction)
        return map(dict,results)
In [4]:
page = DuckDuckGoResults(driver)
In [5]:
page.search("python")
In [6]:
page.load_all_results()
In [7]:
jsResults = page.get_results_javascript()
In [8]:
pyResults = page.get_results_python()
In [9]:
len(pyResults) == len(jsResults)
Out[9]:
True