import requests from pattern import web from BeautifulSoup import BeautifulSoup url = 'http://www.imdb.com/search/title?sort=num_votes,desc&start=1&title_type=feature&year=1950,2012' r = requests.get(url) print r.url url = 'http://www.imdb.com/search/title' params = dict(sort='num_votes,desc', start=1, title_type='feature', year='1950,2012') r = requests.get(url, params=params) print r.url # notice it constructs the full url for you #selection in pattern follows the rules of CSS dom = web.Element(r.text) for movie in dom.by_tag('td.title'): title = movie.by_tag('a')[0].content genres = movie.by_tag('span.genre')[0].by_tag('a') genres = [g.content for g in genres] runtime = movie.by_tag('span.runtime')[0].content rating = movie.by_tag('span.value')[0].content print title, genres, runtime, rating bs = BeautifulSoup(r.text) for movie in bs.findAll('td', 'title'): title = movie.find('a').contents[0] genres = movie.find('span', 'genre').findAll('a') genres = [g.contents[0] for g in genres] runtime = movie.find('span', 'runtime').contents[0] rating = movie.find('span', 'value').contents[0] print title, genres, runtime, rating