from rpyc_docker import Browser,WebDriver
import os.path
"""
basepage.py
used for the examples, other pages should inherit off this page
"""
class BasePage(object):
url = None
js_dict_to_array = """
window.dict_to_array = function(dict) {
var result = [];
for(var k in dict) {
result.push([k,dict[k]]);
}
return result;}
"""
def __init__(self,browser):
self.browser = browser
self.driver = browser.driver
#make nice shortcuts to browser
self.js_ex = self.browser.js_ex
def find_elements_with_text(self,tagName,rePattern):
return self.driver.execute_script("""
return (function(tag,pattern) {
var patt = RegExp(pattern);
var elms = Array.prototype.slice.call(document.getElementsByTagName(tag));
return elms.filter(function(elm) {
return patt.test(elm.textContent);
})
})(arguments[0],arguments[1]);
""",tagName,rePattern)
def scroll_top(self):
self.driver.execute_script("window.scrollTo(0,0);")
return True
def scroll_bottom(self):
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
return True
def goto(self,url = None):
if url :
self.driver.get(url)
else:
self.driver.get(self.url)
def ipython_screenshot(self):
from IPython.display import Image
img = self.driver.get_screenshot_as_png()
return Image(data = img)
def find_css_input(self,css,value):
elm = self.driver.find_element_by_css_selector(css)
elm.clear()
elm.send_keys(value)
def find_css_click(self,css):
try :
elm = self.driver.find_element_by_css_selector(css)
elm.click()
return True
except SelEx.ElementNotVisibleException:
return False
import urlparse
from bs4 import BeautifulSoup
import selenium.common.exceptions as SelEx
class SearchPage(BasePage):
def __init__(self,browser,searchUrl):
BasePage.__init__(self,browser)
self.url = searchUrl
def goto(self):
BasePage.goto(self)
def do_ajax_results_request(self,url):
js = """
var url = arguments[0];
window._jsonResult = null;
var token = document.querySelector('meta[name = "csrf-token"]').getAttribute("content");
var xmlhttp = new XMLHttpRequest();
xmlhttp.onreadystatechange = function() {
if (xmlhttp.readyState == 4 && xmlhttp.status == 200) {
window._jsonResult = jsonResult = JSON.parse(xmlhttp.responseText);
}}
xmlhttp.open("GET", url, true);
xmlhttp.setRequestHeader("X-CSRF-Token",token);
xmlhttp.setRequestHeader("X-Requested-With","XMLHttpRequest")
xmlhttp.setRequestHeader("Accept","application/json, text/javascript, */*; q=0.01")
xmlhttp.send();
return true;;
"""
return self.driver.execute_script(js,url)
def get_ajax_result(self):
#the result when passed back to python will be converted to a python dict automatically"
return self.js_ex("return window._jsonResult")
def do_next_request(self,pageNum):
#woe_id is the location identifier in this case 23424977 for USA
nextPageUrl = 'https://www.kickstarter.com/discover/categories/12?page=%d&sort=popularity&term=card+games&woe_id=23424977'
self.do_ajax_results_request(nextPageUrl % pageNum)
return True
browser = Browser()
browser.setup(visible = True, driver = "firefox")
INFO:rpyc_docker:def driver_firefox(self):
True
searchUrl = "https://www.kickstarter.com/discover/advanced?term=card+games&category_id=12&woe_id=23424977&sort=popularity"
searchPage = SearchPage(browser,searchUrl)
searchPage.goto()
searchPage.do_next_request(1)
True
projectResults = searchPage.get_ajax_result()
projectResults.keys()
[u'total_hits', u'seed', u'colloquial_title', u'projects', u'see_more']
projectResults['projects'][0]["name"]
u'Pillars of Eternity: Lords of the Eastern Reach Card Game'
projectResults['projects'][0]["blurb"]
u'Build cities, raise armies, defeat your enemies in this one to four player card game based in the world of Pillars of Eternity.'
projectResults['projects'][0]["backers_count"]
2480
projectResults['projects'][0]["pledged"]
157880.5
browser.teardown()
True
from docker import Client
docker = Client(base_url='unix://var/run/docker.sock')
from rpyc_docker.rpyc_browser_worker import BrowserRpycWorker
worker = BrowserRpycWorker(docker,mount = "/home/john/Development")
INFO:worker 1:RpycWorker __init__
worker.create_container()
worker.conn is a rpyc connection instance inside the docker container
worker.connect_rpyc()
worker.conn.modules.sys.path.insert(0,"/Development/python/rpyc_docker")
True
worker.setup_browser(driver = "firefox")
True
worker.browser is an rpyc instance of browser running inside the docker container
searchPage = SearchPage(worker.browser,searchUrl)
searchPage.goto()
searchPage.do_next_request(1)
True
projectResults = searchPage.get_ajax_result()
projectResults.keys()
[u'total_hits', u'seed', u'colloquial_title', u'projects', u'see_more']
projectResults['projects'][0]["name"]
u'Pillars of Eternity: Lords of the Eastern Reach Card Game'
projectResults['projects'][0]["blurb"]
u'Build cities, raise armies, defeat your enemies in this one to four player card game based in the world of Pillars of Eternity.'
Don't forget to tear down the docker container after it is done. Multiple docker containers can be run to create a grid of headless browsers
worker.teardown()