!scrapy startproject tutorial
!ls -R tutorial
scrapy.cfg tutorial tutorial/tutorial: __init__.py items.py pipelines.py settings.py spiders tutorial/tutorial/spiders: __init__.py
%%writefile tutorial/tutorial/items.py
from scrapy.item import Item, Field
class DmozItem(Item):
title = Field()
link = Field()
desc = Field()
Overwriting tutorial/tutorial/items.py
%%writefile tutorial/tutorial/spiders/dmoz_spider.py
from scrapy.spider import BaseSpider
class DmozSpider(BaseSpider):
name = "dmoz"
allowed_domains = ["dmoz.org"]
start_urls = [
"http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
"http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
]
def parse(self, response):
filename = response.url.split("/")[-2]
open(filename, 'wb').write(response.body)
Writing tutorial/tutorial/spiders/dmoz_spider.py
! cd tutorial/; scrapy crawl dmoz
2013-11-18 22:15:39-0500 [scrapy] INFO: Scrapy 0.20.0 started (bot: tutorial) 2013-11-18 22:15:39-0500 [scrapy] DEBUG: Optional features available: ssl, http11 2013-11-18 22:15:39-0500 [scrapy] DEBUG: Overridden settings: {'NEWSPIDER_MODULE': 'tutorial.spiders', 'SPIDER_MODULES': ['tutorial.spiders'], 'BOT_NAME': 'tutorial'} 2013-11-18 22:15:39-0500 [scrapy] DEBUG: Enabled extensions: LogStats, TelnetConsole, CloseSpider, WebService, CoreStats, SpiderState 2013-11-18 22:15:40-0500 [scrapy] DEBUG: Enabled downloader middlewares: HttpAuthMiddleware, DownloadTimeoutMiddleware, UserAgentMiddleware, RetryMiddleware, DefaultHeadersMiddleware, MetaRefreshMiddleware, HttpCompressionMiddleware, RedirectMiddleware, CookiesMiddleware, ChunkedTransferMiddleware, DownloaderStats 2013-11-18 22:15:40-0500 [scrapy] DEBUG: Enabled spider middlewares: HttpErrorMiddleware, OffsiteMiddleware, RefererMiddleware, UrlLengthMiddleware, DepthMiddleware 2013-11-18 22:15:40-0500 [scrapy] DEBUG: Enabled item pipelines: 2013-11-18 22:15:40-0500 [dmoz] INFO: Spider opened 2013-11-18 22:15:40-0500 [dmoz] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min) 2013-11-18 22:15:40-0500 [scrapy] DEBUG: Telnet console listening on 0.0.0.0:6023 2013-11-18 22:15:40-0500 [scrapy] DEBUG: Web service listening on 0.0.0.0:6080 2013-11-18 22:15:40-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> (referer: None) 2013-11-18 22:15:40-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> (referer: None) 2013-11-18 22:15:40-0500 [dmoz] INFO: Closing spider (finished) 2013-11-18 22:15:40-0500 [dmoz] INFO: Dumping Scrapy stats: {'downloader/request_bytes': 530, 'downloader/request_count': 2, 'downloader/request_method_count/GET': 2, 'downloader/response_bytes': 14892, 'downloader/response_count': 2, 'downloader/response_status_count/200': 2, 'finish_reason': 'finished', 'finish_time': datetime.datetime(2013, 11, 19, 3, 15, 40, 260043), 'log_count/DEBUG': 8, 'log_count/INFO': 3, 'response_received_count': 2, 'scheduler/dequeued': 2, 'scheduler/dequeued/memory': 2, 'scheduler/enqueued': 2, 'scheduler/enqueued/memory': 2, 'start_time': datetime.datetime(2013, 11, 19, 3, 15, 40, 39752)} 2013-11-18 22:15:40-0500 [dmoz] INFO: Spider closed (finished)
%%writefile tutorial/tutorial/spiders/dmoz_spider.py
from scrapy.spider import BaseSpider
from scrapy.selector import Selector
class DmozSpider(BaseSpider):
name = "dmoz"
allowed_domains = ["dmoz.org"]
start_urls = [
"http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
"http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
]
def parse(self, response):
sel = Selector(response)
sites = sel.xpath('//ul/li')
for site in sites:
title = site.xpath('a/text()').extract()
link = site.xpath('a/@href').extract()
desc = site.xpath('text()').extract()
print title, link, desc
Overwriting tutorial/tutorial/spiders/dmoz_spider.py
! cd tutorial/; scrapy crawl dmoz
2013-11-18 22:15:45-0500 [scrapy] INFO: Scrapy 0.20.0 started (bot: tutorial) 2013-11-18 22:15:45-0500 [scrapy] DEBUG: Optional features available: ssl, http11 2013-11-18 22:15:45-0500 [scrapy] DEBUG: Overridden settings: {'NEWSPIDER_MODULE': 'tutorial.spiders', 'SPIDER_MODULES': ['tutorial.spiders'], 'BOT_NAME': 'tutorial'} 2013-11-18 22:15:45-0500 [scrapy] DEBUG: Enabled extensions: LogStats, TelnetConsole, CloseSpider, WebService, CoreStats, SpiderState 2013-11-18 22:15:45-0500 [scrapy] DEBUG: Enabled downloader middlewares: HttpAuthMiddleware, DownloadTimeoutMiddleware, UserAgentMiddleware, RetryMiddleware, DefaultHeadersMiddleware, MetaRefreshMiddleware, HttpCompressionMiddleware, RedirectMiddleware, CookiesMiddleware, ChunkedTransferMiddleware, DownloaderStats 2013-11-18 22:15:45-0500 [scrapy] DEBUG: Enabled spider middlewares: HttpErrorMiddleware, OffsiteMiddleware, RefererMiddleware, UrlLengthMiddleware, DepthMiddleware 2013-11-18 22:15:45-0500 [scrapy] DEBUG: Enabled item pipelines: 2013-11-18 22:15:45-0500 [dmoz] INFO: Spider opened 2013-11-18 22:15:45-0500 [dmoz] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min) 2013-11-18 22:15:45-0500 [scrapy] DEBUG: Telnet console listening on 0.0.0.0:6023 2013-11-18 22:15:45-0500 [scrapy] DEBUG: Web service listening on 0.0.0.0:6080 2013-11-18 22:15:45-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> (referer: None) [u'Top'] [u'/'] [u'\r\n\r\n '] [u'Computers'] [u'/Computers/'] [] [u'Programming'] [u'/Computers/Programming/'] [] [u'Languages'] [u'/Computers/Programming/Languages/'] [] [u'Python'] [u'/Computers/Programming/Languages/Python/'] [] [] [] [u'\r\n ', u'\xa0', u'\r\n '] [u'Computers: Programming: Resources'] [u'/Computers/Programming/Resources/'] [u'\r\n ', u' \r\n ', u'\r\n '] [u"eff-bot's Daily Python URL"] [u'http://www.pythonware.com/daily/'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - Contains links to assorted resources from the Python universe, compiled by PythonWare.\r\n \r\n '] [u'Free Python and Zope Hosting Directory'] [u'http://www.oinko.net/freepython/'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - A directory of free Python and Zope hosting providers, with reviews and ratings.\r\n \r\n '] [u"O'Reilly Python Center"] [u'http://oreilly.com/python/'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - Features Python books, resources, news and articles.\r\n \r\n '] [u"Python Developer's Guide"] [u'http://www.python.org/dev/'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - Resources for reporting bugs, accessing the Python source tree with CVS and taking part in the development of Python.\r\n \r\n '] [u'Social Bug'] [u'http://win32com.goermezer.de/'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - Scripts, examples and news about Python programming for the Windows platform.\r\n \r\n '] 2013-11-18 22:15:45-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> (referer: None) [u'Top'] [u'/'] [u'\r\n\r\n '] [u'Computers'] [u'/Computers/'] [] [u'Programming'] [u'/Computers/Programming/'] [] [u'Languages'] [u'/Computers/Programming/Languages/'] [] [u'Python'] [u'/Computers/Programming/Languages/Python/'] [] [] [] [u'\r\n ', u'\xa0', u'\r\n '] [u'Computers: Programming: Languages: Python: Resources'] [u'/Computers/Programming/Languages/Python/Resources/'] [u'\r\n ', u' \r\n ', u'\r\n '] [u'Computers: Programming: Languages: Ruby: Books'] [u'/Computers/Programming/Languages/Ruby/Books/'] [u'\r\n ', u' \r\n ', u'\r\n '] [u'German'] [u'/World/Deutsch/Computer/Programmieren/Sprachen/Python/B%C3%BCcher/'] [u'\r\n \t', u'\r\n ', u'\r\n\t\t\t\t\t'] [u'Russian'] [u'/World/Russian/%D0%9A%D0%BE%D0%BC%D0%BF%D1%8C%D1%8E%D1%82%D0%B5%D1%80%D1%8B/%D0%9F%D1%80%D0%BE%D0%B3%D1%80%D0%B0%D0%BC%D0%BC%D0%B8%D1%80%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5/%D0%AF%D0%B7%D1%8B%D0%BA%D0%B8/Python/%D0%9A%D0%BD%D0%B8%D0%B3%D0%B8/'] [u'\r\n \t', u'\r\n ', u'\r\n\t\t\t\t\t'] [u'Core Python Programming'] [u'http://www.pearsonhighered.com/educator/academic/product/0,,0130260363,00%2Ben-USS_01DBC.html'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - By Wesley J. Chun; Prentice Hall PTR, 2001, ISBN 0130260363. For experienced developers to improve extant skills; professional level examples. Starts by introducing syntax, objects, error handling, functions, classes, built-ins. [Prentice Hall]\r\n \r\n '] [u'Data Structures and Algorithms with Object-Oriented Design Patterns in Python'] [u'http://www.brpreiss.com/books/opus7/html/book.html'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - The primary goal of this book is to promote object-oriented design using Python and to illustrate the use of the emerging object-oriented design patterns.\r\nA secondary goal of the book is to present mathematical tools just in time. Analysis techniques and proofs are presented as needed and in the proper context.\r\n \r\n '] [u'Dive Into Python 3'] [u'http://www.diveintopython.net/'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - By Mark Pilgrim, Guide to Python 3 and its differences from Python 2. Each chapter starts with a real code sample and explains it fully. Has a comprehensive appendix of all the syntactic and semantic changes in Python 3\r\n\r\n\r\n \r\n '] [u'Foundations of Python Network Programming'] [u'http://rhodesmill.org/brandon/2011/foundations-of-python-network-programming/'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - This book covers a wide range of topics. From raw TCP and UDP to encryption with TSL, and then to HTTP, SMTP, POP, IMAP, and ssh. It gives you a good understanding of each field and how to do everything on the network with Python.\r\n \r\n '] [u'Free Python books'] [u'http://www.techbooksforfree.com/perlpython.shtml'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - Free Python books and tutorials.\r\n \r\n '] [u'FreeTechBooks: Python Scripting Language'] [u'http://www.freetechbooks.com/python-f6.html'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - Annotated list of free online books on Python scripting language. Topics range from beginner to advanced.\r\n \r\n '] [u'How to Think Like a Computer Scientist: Learning with Python'] [u'http://greenteapress.com/thinkpython/'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - By Allen B. Downey, Jeffrey Elkner, Chris Meyers; Green Tea Press, 2002, ISBN 0971677506. Teaches general principles of programming, via Python as subject language. Thorough, in-depth approach to many basic and intermediate programming topics. Full text online and downloads: HTML, PDF, PS, LaTeX. [Free, Green Tea Press]\r\n \r\n '] [u'An Introduction to Python'] [u'http://www.network-theory.co.uk/python/intro/'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - By Guido van Rossum, Fred L. Drake, Jr.; Network Theory Ltd., 2003, ISBN 0954161769. Printed edition of official tutorial, for v2.x, from Python.org. [Network Theory, online]\r\n \r\n '] [u'Learn to Program Using Python'] [u'http://www.freenetpages.co.uk/hp/alan.gauld/'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - Book by Alan Gauld with full text online. Introduction for those learning programming basics: terminology, concepts, methods to write code. Assumes no prior knowledge but basic computer skills.\r\n \r\n '] [u'Making Use of Python'] [u'http://www.wiley.com/WileyCDA/WileyTitle/productCd-0471219754.html'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - By Rashi Gupta; John Wiley and Sons, 2002, ISBN 0471219754. Covers language basics, use for CGI scripting, GUI development, network programming; shows why it is one of more sophisticated of popular scripting languages. [Wiley]\r\n \r\n '] [u'Practical Python'] [u'http://hetland.org/writing/practical-python/'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - By Magnus Lie Hetland; Apress LP, 2002, ISBN 1590590066. Readable guide to ideas most vital to new users, from basics common to high level languages, to more specific aspects, to a series of 10 ever more complex programs. [Apress]\r\n \r\n '] [u'Pro Python System Administration'] [u'http://www.sysadminpy.com/'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - By Rytis Sileika, ISBN13: 978-1-4302-2605-5, Uses real-world system administration examples like manage devices with SNMP and SOAP, build a distributed monitoring system, manage web applications and parse complex log files, monitor and manage MySQL databases.\r\n\r\n \r\n '] [u'Programming in Python 3 (Second Edition)'] [u'http://www.qtrac.eu/py3book.html'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - A Complete Introduction to the Python 3.\r\n \r\n '] [u'Python 2.1 Bible'] [u'http://www.wiley.com/WileyCDA/WileyTitle/productCd-0764548077.html'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - By Dave Brueck, Stephen Tanner; John Wiley and Sons, 2001, ISBN 0764548077. Full coverage, clear explanations, hands-on examples, full language reference; shows step by step how to use components, assemble them, form full-featured programs. [John Wiley and Sons]\r\n \r\n '] [u'Python 3 Object Oriented Programming'] [u'https://www.packtpub.com/python-3-object-oriented-programming/book'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - A step-by-step tutorial for OOP in Python 3, including discussion and examples of abstraction, encapsulation, information hiding, and raise, handle, define, and manipulate exceptions.\r\n \r\n '] [u'Python Language Reference Manual'] [u'http://www.network-theory.co.uk/python/language/'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - By Guido van Rossum, Fred L. Drake, Jr.; Network Theory Ltd., 2003, ISBN 0954161785. Printed edition of official language reference, for v2.x, from Python.org, describes syntax, built-in datatypes. [Network Theory, online]\r\n \r\n '] [u'Python Programming Patterns'] [u'http://www.pearsonhighered.com/educator/academic/product/0,,0130409561,00%2Ben-USS_01DBC.html'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - By Thomas W. Christopher; Prentice Hall PTR, 2002, ISBN 0130409561. Shows how to write large programs, introduces powerful design patterns that deliver high levels of robustness, scalability, reuse.\r\n \r\n '] [u'Python Programming with the Java Class Libraries: A Tutorial for Building Web and Enterprise Applications with Jython'] [u'http://www.informit.com/store/product.aspx?isbn=0201616165&redir=1'] [u'\r\n\t\t\t\r\n ', u" \r\n\t\t\t\r\n - By Richard Hightower; Addison-Wesley, 2002, 0201616165. Begins with Python basics, many exercises, interactive sessions. Shows programming novices concepts and practical methods. Shows programming experts Python's abilities and ways to interface with Java APIs. [publisher website]\r\n \r\n "] [u'Python: Visual QuickStart Guide'] [u'http://www.pearsonhighered.com/educator/academic/product/0,,0201748843,00%2Ben-USS_01DBC.html'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - By Chris Fehily; Peachpit Press, 2002, ISBN 0201748843. Task-based, step-by-step visual reference guide, many screen shots, for courses in digital graphics; Web design, scripting, development; multimedia, page layout, office tools, operating systems. [Prentice Hall]\r\n \r\n '] [u'Sams Teach Yourself Python in 24 Hours'] [u'http://www.informit.com/store/product.aspx?isbn=0672317354'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - By Ivan Van Laningham; Sams Publishing, 2000, ISBN 0672317354. Split into 24 hands-on, 1 hour lessons; steps needed to learn topic: syntax, language features, OO design and programming, GUIs (Tkinter), system administration, CGI. [Sams Publishing]\r\n \r\n '] [u'Text Processing in Python'] [u'http://gnosis.cx/TPiP/'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - By David Mertz; Addison Wesley. Book in progress, full text, ASCII format. Asks for feedback. [author website, Gnosis Software, Inc.]\r\n \r\n '] [u'XML Processing with Python'] [u'http://www.informit.com/store/product.aspx?isbn=0130211192'] [u'\r\n\t\t\t\r\n ', u' \r\n\t\t\t\r\n - By Sean McGrath; Prentice Hall PTR, 2000, ISBN 0130211192, has CD-ROM. Methods to build XML applications fast, Python tutorial, DOM and SAX, new Pyxie open source XML processing library. [Prentice Hall PTR]\r\n \r\n '] 2013-11-18 22:15:45-0500 [dmoz] INFO: Closing spider (finished) 2013-11-18 22:15:45-0500 [dmoz] INFO: Dumping Scrapy stats: {'downloader/request_bytes': 530, 'downloader/request_count': 2, 'downloader/request_method_count/GET': 2, 'downloader/response_bytes': 14892, 'downloader/response_count': 2, 'downloader/response_status_count/200': 2, 'finish_reason': 'finished', 'finish_time': datetime.datetime(2013, 11, 19, 3, 15, 45, 348844), 'log_count/DEBUG': 8, 'log_count/INFO': 3, 'response_received_count': 2, 'scheduler/dequeued': 2, 'scheduler/dequeued/memory': 2, 'scheduler/enqueued': 2, 'scheduler/enqueued/memory': 2, 'start_time': datetime.datetime(2013, 11, 19, 3, 15, 45, 143009)} 2013-11-18 22:15:45-0500 [dmoz] INFO: Spider closed (finished)
%%writefile tutorial/tutorial/spiders/dmoz_spider.py
from scrapy.spider import BaseSpider
from scrapy.selector import Selector
from tutorial.items import DmozItem
class DmozSpider(BaseSpider):
name = "dmoz"
allowed_domains = ["dmoz.org"]
start_urls = [
"http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
"http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
]
def parse(self, response):
sel = Selector(response)
sites = sel.xpath('//ul/li')
items = []
for site in sites:
item = DmozItem()
item['title'] = site.xpath('a/text()').extract()
item['link'] = site.xpath('a/@href').extract()
item['desc'] = [x.strip() for x in site.xpath('text()').extract()]
items.append(item)
return items
Overwriting tutorial/tutorial/spiders/dmoz_spider.py
! cd tutorial/; scrapy crawl dmoz
2013-11-18 22:15:57-0500 [scrapy] INFO: Scrapy 0.20.0 started (bot: tutorial) 2013-11-18 22:15:57-0500 [scrapy] DEBUG: Optional features available: ssl, http11 2013-11-18 22:15:57-0500 [scrapy] DEBUG: Overridden settings: {'NEWSPIDER_MODULE': 'tutorial.spiders', 'SPIDER_MODULES': ['tutorial.spiders'], 'BOT_NAME': 'tutorial'} 2013-11-18 22:15:57-0500 [scrapy] DEBUG: Enabled extensions: LogStats, TelnetConsole, CloseSpider, WebService, CoreStats, SpiderState 2013-11-18 22:15:57-0500 [scrapy] DEBUG: Enabled downloader middlewares: HttpAuthMiddleware, DownloadTimeoutMiddleware, UserAgentMiddleware, RetryMiddleware, DefaultHeadersMiddleware, MetaRefreshMiddleware, HttpCompressionMiddleware, RedirectMiddleware, CookiesMiddleware, ChunkedTransferMiddleware, DownloaderStats 2013-11-18 22:15:57-0500 [scrapy] DEBUG: Enabled spider middlewares: HttpErrorMiddleware, OffsiteMiddleware, RefererMiddleware, UrlLengthMiddleware, DepthMiddleware 2013-11-18 22:15:57-0500 [scrapy] DEBUG: Enabled item pipelines: 2013-11-18 22:15:57-0500 [dmoz] INFO: Spider opened 2013-11-18 22:15:57-0500 [dmoz] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min) 2013-11-18 22:15:57-0500 [scrapy] DEBUG: Telnet console listening on 0.0.0.0:6023 2013-11-18 22:15:57-0500 [scrapy] DEBUG: Web service listening on 0.0.0.0:6080 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> (referer: None) 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u''], 'link': [u'/'], 'title': [u'Top']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [], 'link': [u'/Computers/'], 'title': [u'Computers']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [], 'link': [u'/Computers/Programming/'], 'title': [u'Programming']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [], 'link': [u'/Computers/Programming/Languages/'], 'title': [u'Languages']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [], 'link': [u'/Computers/Programming/Languages/Python/'], 'title': [u'Python']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u'', u'', u''], 'link': [], 'title': []} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u'', u'', u''], 'link': [u'/Computers/Programming/Resources/'], 'title': [u'Computers: Programming: Resources']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u'', u'- Contains links to assorted resources from the Python universe, compiled by PythonWare.'], 'link': [u'http://www.pythonware.com/daily/'], 'title': [u"eff-bot's Daily Python URL"]} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u'', u'- A directory of free Python and Zope hosting providers, with reviews and ratings.'], 'link': [u'http://www.oinko.net/freepython/'], 'title': [u'Free Python and Zope Hosting Directory']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u'', u'- Features Python books, resources, news and articles.'], 'link': [u'http://oreilly.com/python/'], 'title': [u"O'Reilly Python Center"]} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u'', u'- Resources for reporting bugs, accessing the Python source tree with CVS and taking part in the development of Python.'], 'link': [u'http://www.python.org/dev/'], 'title': [u"Python Developer's Guide"]} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u'', u'- Scripts, examples and news about Python programming for the Windows platform.'], 'link': [u'http://win32com.goermezer.de/'], 'title': [u'Social Bug']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> (referer: None) 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u''], 'link': [u'/'], 'title': [u'Top']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [], 'link': [u'/Computers/'], 'title': [u'Computers']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [], 'link': [u'/Computers/Programming/'], 'title': [u'Programming']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [], 'link': [u'/Computers/Programming/Languages/'], 'title': [u'Languages']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [], 'link': [u'/Computers/Programming/Languages/Python/'], 'title': [u'Python']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'', u''], 'link': [], 'title': []} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'', u''], 'link': [u'/Computers/Programming/Languages/Python/Resources/'], 'title': [u'Computers: Programming: Languages: Python: Resources']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'', u''], 'link': [u'/Computers/Programming/Languages/Ruby/Books/'], 'title': [u'Computers: Programming: Languages: Ruby: Books']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'', u''], 'link': [u'/World/Deutsch/Computer/Programmieren/Sprachen/Python/B%C3%BCcher/'], 'title': [u'German']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'', u''], 'link': [u'/World/Russian/%D0%9A%D0%BE%D0%BC%D0%BF%D1%8C%D1%8E%D1%82%D0%B5%D1%80%D1%8B/%D0%9F%D1%80%D0%BE%D0%B3%D1%80%D0%B0%D0%BC%D0%BC%D0%B8%D1%80%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5/%D0%AF%D0%B7%D1%8B%D0%BA%D0%B8/Python/%D0%9A%D0%BD%D0%B8%D0%B3%D0%B8/'], 'title': [u'Russian']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Wesley J. Chun; Prentice Hall PTR, 2001, ISBN 0130260363. For experienced developers to improve extant skills; professional level examples. Starts by introducing syntax, objects, error handling, functions, classes, built-ins. [Prentice Hall]'], 'link': [u'http://www.pearsonhighered.com/educator/academic/product/0,,0130260363,00%2Ben-USS_01DBC.html'], 'title': [u'Core Python Programming']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- The primary goal of this book is to promote object-oriented design using Python and to illustrate the use of the emerging object-oriented design patterns.\r\nA secondary goal of the book is to present mathematical tools just in time. Analysis techniques and proofs are presented as needed and in the proper context.'], 'link': [u'http://www.brpreiss.com/books/opus7/html/book.html'], 'title': [u'Data Structures and Algorithms with Object-Oriented Design Patterns in Python']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Mark Pilgrim, Guide to Python 3 and its differences from Python 2. Each chapter starts with a real code sample and explains it fully. Has a comprehensive appendix of all the syntactic and semantic changes in Python 3'], 'link': [u'http://www.diveintopython.net/'], 'title': [u'Dive Into Python 3']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- This book covers a wide range of topics. From raw TCP and UDP to encryption with TSL, and then to HTTP, SMTP, POP, IMAP, and ssh. It gives you a good understanding of each field and how to do everything on the network with Python.'], 'link': [u'http://rhodesmill.org/brandon/2011/foundations-of-python-network-programming/'], 'title': [u'Foundations of Python Network Programming']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- Free Python books and tutorials.'], 'link': [u'http://www.techbooksforfree.com/perlpython.shtml'], 'title': [u'Free Python books']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- Annotated list of free online books on Python scripting language. Topics range from beginner to advanced.'], 'link': [u'http://www.freetechbooks.com/python-f6.html'], 'title': [u'FreeTechBooks: Python Scripting Language']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Allen B. Downey, Jeffrey Elkner, Chris Meyers; Green Tea Press, 2002, ISBN 0971677506. Teaches general principles of programming, via Python as subject language. Thorough, in-depth approach to many basic and intermediate programming topics. Full text online and downloads: HTML, PDF, PS, LaTeX. [Free, Green Tea Press]'], 'link': [u'http://greenteapress.com/thinkpython/'], 'title': [u'How to Think Like a Computer Scientist: Learning with Python']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Guido van Rossum, Fred L. Drake, Jr.; Network Theory Ltd., 2003, ISBN 0954161769. Printed edition of official tutorial, for v2.x, from Python.org. [Network Theory, online]'], 'link': [u'http://www.network-theory.co.uk/python/intro/'], 'title': [u'An Introduction to Python']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- Book by Alan Gauld with full text online. Introduction for those learning programming basics: terminology, concepts, methods to write code. Assumes no prior knowledge but basic computer skills.'], 'link': [u'http://www.freenetpages.co.uk/hp/alan.gauld/'], 'title': [u'Learn to Program Using Python']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Rashi Gupta; John Wiley and Sons, 2002, ISBN 0471219754. Covers language basics, use for CGI scripting, GUI development, network programming; shows why it is one of more sophisticated of popular scripting languages. [Wiley]'], 'link': [u'http://www.wiley.com/WileyCDA/WileyTitle/productCd-0471219754.html'], 'title': [u'Making Use of Python']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Magnus Lie Hetland; Apress LP, 2002, ISBN 1590590066. Readable guide to ideas most vital to new users, from basics common to high level languages, to more specific aspects, to a series of 10 ever more complex programs. [Apress]'], 'link': [u'http://hetland.org/writing/practical-python/'], 'title': [u'Practical Python']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Rytis Sileika, ISBN13: 978-1-4302-2605-5, Uses real-world system administration examples like manage devices with SNMP and SOAP, build a distributed monitoring system, manage web applications and parse complex log files, monitor and manage MySQL databases.'], 'link': [u'http://www.sysadminpy.com/'], 'title': [u'Pro Python System Administration']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- A Complete Introduction to the Python 3.'], 'link': [u'http://www.qtrac.eu/py3book.html'], 'title': [u'Programming in Python 3 (Second Edition)']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Dave Brueck, Stephen Tanner; John Wiley and Sons, 2001, ISBN 0764548077. Full coverage, clear explanations, hands-on examples, full language reference; shows step by step how to use components, assemble them, form full-featured programs. [John Wiley and Sons]'], 'link': [u'http://www.wiley.com/WileyCDA/WileyTitle/productCd-0764548077.html'], 'title': [u'Python 2.1 Bible']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- A step-by-step tutorial for OOP in Python 3, including discussion and examples of abstraction, encapsulation, information hiding, and raise, handle, define, and manipulate exceptions.'], 'link': [u'https://www.packtpub.com/python-3-object-oriented-programming/book'], 'title': [u'Python 3 Object Oriented Programming']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Guido van Rossum, Fred L. Drake, Jr.; Network Theory Ltd., 2003, ISBN 0954161785. Printed edition of official language reference, for v2.x, from Python.org, describes syntax, built-in datatypes. [Network Theory, online]'], 'link': [u'http://www.network-theory.co.uk/python/language/'], 'title': [u'Python Language Reference Manual']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Thomas W. Christopher; Prentice Hall PTR, 2002, ISBN 0130409561. Shows how to write large programs, introduces powerful design patterns that deliver high levels of robustness, scalability, reuse.'], 'link': [u'http://www.pearsonhighered.com/educator/academic/product/0,,0130409561,00%2Ben-USS_01DBC.html'], 'title': [u'Python Programming Patterns']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u"- By Richard Hightower; Addison-Wesley, 2002, 0201616165. Begins with Python basics, many exercises, interactive sessions. Shows programming novices concepts and practical methods. Shows programming experts Python's abilities and ways to interface with Java APIs. [publisher website]"], 'link': [u'http://www.informit.com/store/product.aspx?isbn=0201616165&redir=1'], 'title': [u'Python Programming with the Java Class Libraries: A Tutorial for Building Web and Enterprise Applications with Jython']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Chris Fehily; Peachpit Press, 2002, ISBN 0201748843. Task-based, step-by-step visual reference guide, many screen shots, for courses in digital graphics; Web design, scripting, development; multimedia, page layout, office tools, operating systems. [Prentice Hall]'], 'link': [u'http://www.pearsonhighered.com/educator/academic/product/0,,0201748843,00%2Ben-USS_01DBC.html'], 'title': [u'Python: Visual QuickStart Guide']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Ivan Van Laningham; Sams Publishing, 2000, ISBN 0672317354. Split into 24 hands-on, 1 hour lessons; steps needed to learn topic: syntax, language features, OO design and programming, GUIs (Tkinter), system administration, CGI. [Sams Publishing]'], 'link': [u'http://www.informit.com/store/product.aspx?isbn=0672317354'], 'title': [u'Sams Teach Yourself Python in 24 Hours']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By David Mertz; Addison Wesley. Book in progress, full text, ASCII format. Asks for feedback. [author website, Gnosis Software, Inc.]'], 'link': [u'http://gnosis.cx/TPiP/'], 'title': [u'Text Processing in Python']} 2013-11-18 22:15:57-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Sean McGrath; Prentice Hall PTR, 2000, ISBN 0130211192, has CD-ROM. Methods to build XML applications fast, Python tutorial, DOM and SAX, new Pyxie open source XML processing library. [Prentice Hall PTR]'], 'link': [u'http://www.informit.com/store/product.aspx?isbn=0130211192'], 'title': [u'XML Processing with Python']} 2013-11-18 22:15:57-0500 [dmoz] INFO: Closing spider (finished) 2013-11-18 22:15:57-0500 [dmoz] INFO: Dumping Scrapy stats: {'downloader/request_bytes': 530, 'downloader/request_count': 2, 'downloader/request_method_count/GET': 2, 'downloader/response_bytes': 14892, 'downloader/response_count': 2, 'downloader/response_status_count/200': 2, 'finish_reason': 'finished', 'finish_time': datetime.datetime(2013, 11, 19, 3, 15, 57, 747642), 'item_scraped_count': 44, 'log_count/DEBUG': 52, 'log_count/INFO': 3, 'response_received_count': 2, 'scheduler/dequeued': 2, 'scheduler/dequeued/memory': 2, 'scheduler/enqueued': 2, 'scheduler/enqueued/memory': 2, 'start_time': datetime.datetime(2013, 11, 19, 3, 15, 57, 559107)} 2013-11-18 22:15:57-0500 [dmoz] INFO: Spider closed (finished)
!cd tutorial/; scrapy crawl dmoz -o items.json -t json
2013-11-18 22:15:59-0500 [scrapy] INFO: Scrapy 0.20.0 started (bot: tutorial) 2013-11-18 22:15:59-0500 [scrapy] DEBUG: Optional features available: ssl, http11 2013-11-18 22:15:59-0500 [scrapy] DEBUG: Overridden settings: {'NEWSPIDER_MODULE': 'tutorial.spiders', 'FEED_FORMAT': 'json', 'SPIDER_MODULES': ['tutorial.spiders'], 'FEED_URI': 'items.json', 'BOT_NAME': 'tutorial'} 2013-11-18 22:15:59-0500 [scrapy] DEBUG: Enabled extensions: FeedExporter, LogStats, TelnetConsole, CloseSpider, WebService, CoreStats, SpiderState 2013-11-18 22:15:59-0500 [scrapy] DEBUG: Enabled downloader middlewares: HttpAuthMiddleware, DownloadTimeoutMiddleware, UserAgentMiddleware, RetryMiddleware, DefaultHeadersMiddleware, MetaRefreshMiddleware, HttpCompressionMiddleware, RedirectMiddleware, CookiesMiddleware, ChunkedTransferMiddleware, DownloaderStats 2013-11-18 22:15:59-0500 [scrapy] DEBUG: Enabled spider middlewares: HttpErrorMiddleware, OffsiteMiddleware, RefererMiddleware, UrlLengthMiddleware, DepthMiddleware 2013-11-18 22:15:59-0500 [scrapy] DEBUG: Enabled item pipelines: 2013-11-18 22:15:59-0500 [dmoz] INFO: Spider opened 2013-11-18 22:15:59-0500 [dmoz] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min) 2013-11-18 22:15:59-0500 [scrapy] DEBUG: Telnet console listening on 0.0.0.0:6023 2013-11-18 22:15:59-0500 [scrapy] DEBUG: Web service listening on 0.0.0.0:6080 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> (referer: None) 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u''], 'link': [u'/'], 'title': [u'Top']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [], 'link': [u'/Computers/'], 'title': [u'Computers']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [], 'link': [u'/Computers/Programming/'], 'title': [u'Programming']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [], 'link': [u'/Computers/Programming/Languages/'], 'title': [u'Languages']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [], 'link': [u'/Computers/Programming/Languages/Python/'], 'title': [u'Python']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'', u''], 'link': [], 'title': []} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'', u''], 'link': [u'/Computers/Programming/Languages/Python/Resources/'], 'title': [u'Computers: Programming: Languages: Python: Resources']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'', u''], 'link': [u'/Computers/Programming/Languages/Ruby/Books/'], 'title': [u'Computers: Programming: Languages: Ruby: Books']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'', u''], 'link': [u'/World/Deutsch/Computer/Programmieren/Sprachen/Python/B%C3%BCcher/'], 'title': [u'German']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'', u''], 'link': [u'/World/Russian/%D0%9A%D0%BE%D0%BC%D0%BF%D1%8C%D1%8E%D1%82%D0%B5%D1%80%D1%8B/%D0%9F%D1%80%D0%BE%D0%B3%D1%80%D0%B0%D0%BC%D0%BC%D0%B8%D1%80%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5/%D0%AF%D0%B7%D1%8B%D0%BA%D0%B8/Python/%D0%9A%D0%BD%D0%B8%D0%B3%D0%B8/'], 'title': [u'Russian']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Wesley J. Chun; Prentice Hall PTR, 2001, ISBN 0130260363. For experienced developers to improve extant skills; professional level examples. Starts by introducing syntax, objects, error handling, functions, classes, built-ins. [Prentice Hall]'], 'link': [u'http://www.pearsonhighered.com/educator/academic/product/0,,0130260363,00%2Ben-USS_01DBC.html'], 'title': [u'Core Python Programming']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- The primary goal of this book is to promote object-oriented design using Python and to illustrate the use of the emerging object-oriented design patterns.\r\nA secondary goal of the book is to present mathematical tools just in time. Analysis techniques and proofs are presented as needed and in the proper context.'], 'link': [u'http://www.brpreiss.com/books/opus7/html/book.html'], 'title': [u'Data Structures and Algorithms with Object-Oriented Design Patterns in Python']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Mark Pilgrim, Guide to Python 3 and its differences from Python 2. Each chapter starts with a real code sample and explains it fully. Has a comprehensive appendix of all the syntactic and semantic changes in Python 3'], 'link': [u'http://www.diveintopython.net/'], 'title': [u'Dive Into Python 3']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- This book covers a wide range of topics. From raw TCP and UDP to encryption with TSL, and then to HTTP, SMTP, POP, IMAP, and ssh. It gives you a good understanding of each field and how to do everything on the network with Python.'], 'link': [u'http://rhodesmill.org/brandon/2011/foundations-of-python-network-programming/'], 'title': [u'Foundations of Python Network Programming']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- Free Python books and tutorials.'], 'link': [u'http://www.techbooksforfree.com/perlpython.shtml'], 'title': [u'Free Python books']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- Annotated list of free online books on Python scripting language. Topics range from beginner to advanced.'], 'link': [u'http://www.freetechbooks.com/python-f6.html'], 'title': [u'FreeTechBooks: Python Scripting Language']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Allen B. Downey, Jeffrey Elkner, Chris Meyers; Green Tea Press, 2002, ISBN 0971677506. Teaches general principles of programming, via Python as subject language. Thorough, in-depth approach to many basic and intermediate programming topics. Full text online and downloads: HTML, PDF, PS, LaTeX. [Free, Green Tea Press]'], 'link': [u'http://greenteapress.com/thinkpython/'], 'title': [u'How to Think Like a Computer Scientist: Learning with Python']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Guido van Rossum, Fred L. Drake, Jr.; Network Theory Ltd., 2003, ISBN 0954161769. Printed edition of official tutorial, for v2.x, from Python.org. [Network Theory, online]'], 'link': [u'http://www.network-theory.co.uk/python/intro/'], 'title': [u'An Introduction to Python']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- Book by Alan Gauld with full text online. Introduction for those learning programming basics: terminology, concepts, methods to write code. Assumes no prior knowledge but basic computer skills.'], 'link': [u'http://www.freenetpages.co.uk/hp/alan.gauld/'], 'title': [u'Learn to Program Using Python']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Rashi Gupta; John Wiley and Sons, 2002, ISBN 0471219754. Covers language basics, use for CGI scripting, GUI development, network programming; shows why it is one of more sophisticated of popular scripting languages. [Wiley]'], 'link': [u'http://www.wiley.com/WileyCDA/WileyTitle/productCd-0471219754.html'], 'title': [u'Making Use of Python']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Magnus Lie Hetland; Apress LP, 2002, ISBN 1590590066. Readable guide to ideas most vital to new users, from basics common to high level languages, to more specific aspects, to a series of 10 ever more complex programs. [Apress]'], 'link': [u'http://hetland.org/writing/practical-python/'], 'title': [u'Practical Python']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Rytis Sileika, ISBN13: 978-1-4302-2605-5, Uses real-world system administration examples like manage devices with SNMP and SOAP, build a distributed monitoring system, manage web applications and parse complex log files, monitor and manage MySQL databases.'], 'link': [u'http://www.sysadminpy.com/'], 'title': [u'Pro Python System Administration']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- A Complete Introduction to the Python 3.'], 'link': [u'http://www.qtrac.eu/py3book.html'], 'title': [u'Programming in Python 3 (Second Edition)']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Dave Brueck, Stephen Tanner; John Wiley and Sons, 2001, ISBN 0764548077. Full coverage, clear explanations, hands-on examples, full language reference; shows step by step how to use components, assemble them, form full-featured programs. [John Wiley and Sons]'], 'link': [u'http://www.wiley.com/WileyCDA/WileyTitle/productCd-0764548077.html'], 'title': [u'Python 2.1 Bible']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- A step-by-step tutorial for OOP in Python 3, including discussion and examples of abstraction, encapsulation, information hiding, and raise, handle, define, and manipulate exceptions.'], 'link': [u'https://www.packtpub.com/python-3-object-oriented-programming/book'], 'title': [u'Python 3 Object Oriented Programming']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Guido van Rossum, Fred L. Drake, Jr.; Network Theory Ltd., 2003, ISBN 0954161785. Printed edition of official language reference, for v2.x, from Python.org, describes syntax, built-in datatypes. [Network Theory, online]'], 'link': [u'http://www.network-theory.co.uk/python/language/'], 'title': [u'Python Language Reference Manual']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Thomas W. Christopher; Prentice Hall PTR, 2002, ISBN 0130409561. Shows how to write large programs, introduces powerful design patterns that deliver high levels of robustness, scalability, reuse.'], 'link': [u'http://www.pearsonhighered.com/educator/academic/product/0,,0130409561,00%2Ben-USS_01DBC.html'], 'title': [u'Python Programming Patterns']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u"- By Richard Hightower; Addison-Wesley, 2002, 0201616165. Begins with Python basics, many exercises, interactive sessions. Shows programming novices concepts and practical methods. Shows programming experts Python's abilities and ways to interface with Java APIs. [publisher website]"], 'link': [u'http://www.informit.com/store/product.aspx?isbn=0201616165&redir=1'], 'title': [u'Python Programming with the Java Class Libraries: A Tutorial for Building Web and Enterprise Applications with Jython']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Chris Fehily; Peachpit Press, 2002, ISBN 0201748843. Task-based, step-by-step visual reference guide, many screen shots, for courses in digital graphics; Web design, scripting, development; multimedia, page layout, office tools, operating systems. [Prentice Hall]'], 'link': [u'http://www.pearsonhighered.com/educator/academic/product/0,,0201748843,00%2Ben-USS_01DBC.html'], 'title': [u'Python: Visual QuickStart Guide']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Ivan Van Laningham; Sams Publishing, 2000, ISBN 0672317354. Split into 24 hands-on, 1 hour lessons; steps needed to learn topic: syntax, language features, OO design and programming, GUIs (Tkinter), system administration, CGI. [Sams Publishing]'], 'link': [u'http://www.informit.com/store/product.aspx?isbn=0672317354'], 'title': [u'Sams Teach Yourself Python in 24 Hours']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By David Mertz; Addison Wesley. Book in progress, full text, ASCII format. Asks for feedback. [author website, Gnosis Software, Inc.]'], 'link': [u'http://gnosis.cx/TPiP/'], 'title': [u'Text Processing in Python']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> {'desc': [u'', u'- By Sean McGrath; Prentice Hall PTR, 2000, ISBN 0130211192, has CD-ROM. Methods to build XML applications fast, Python tutorial, DOM and SAX, new Pyxie open source XML processing library. [Prentice Hall PTR]'], 'link': [u'http://www.informit.com/store/product.aspx?isbn=0130211192'], 'title': [u'XML Processing with Python']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> (referer: None) 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u''], 'link': [u'/'], 'title': [u'Top']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [], 'link': [u'/Computers/'], 'title': [u'Computers']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [], 'link': [u'/Computers/Programming/'], 'title': [u'Programming']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [], 'link': [u'/Computers/Programming/Languages/'], 'title': [u'Languages']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [], 'link': [u'/Computers/Programming/Languages/Python/'], 'title': [u'Python']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u'', u'', u''], 'link': [], 'title': []} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u'', u'', u''], 'link': [u'/Computers/Programming/Resources/'], 'title': [u'Computers: Programming: Resources']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u'', u'- Contains links to assorted resources from the Python universe, compiled by PythonWare.'], 'link': [u'http://www.pythonware.com/daily/'], 'title': [u"eff-bot's Daily Python URL"]} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u'', u'- A directory of free Python and Zope hosting providers, with reviews and ratings.'], 'link': [u'http://www.oinko.net/freepython/'], 'title': [u'Free Python and Zope Hosting Directory']} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u'', u'- Features Python books, resources, news and articles.'], 'link': [u'http://oreilly.com/python/'], 'title': [u"O'Reilly Python Center"]} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u'', u'- Resources for reporting bugs, accessing the Python source tree with CVS and taking part in the development of Python.'], 'link': [u'http://www.python.org/dev/'], 'title': [u"Python Developer's Guide"]} 2013-11-18 22:16:00-0500 [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> {'desc': [u'', u'- Scripts, examples and news about Python programming for the Windows platform.'], 'link': [u'http://win32com.goermezer.de/'], 'title': [u'Social Bug']} 2013-11-18 22:16:00-0500 [dmoz] INFO: Closing spider (finished) 2013-11-18 22:16:00-0500 [dmoz] INFO: Stored json feed (44 items) in: items.json 2013-11-18 22:16:00-0500 [dmoz] INFO: Dumping Scrapy stats: {'downloader/request_bytes': 530, 'downloader/request_count': 2, 'downloader/request_method_count/GET': 2, 'downloader/response_bytes': 14892, 'downloader/response_count': 2, 'downloader/response_status_count/200': 2, 'finish_reason': 'finished', 'finish_time': datetime.datetime(2013, 11, 19, 3, 16, 0, 92126), 'item_scraped_count': 44, 'log_count/DEBUG': 52, 'log_count/INFO': 4, 'response_received_count': 2, 'scheduler/dequeued': 2, 'scheduler/dequeued/memory': 2, 'scheduler/enqueued': 2, 'scheduler/enqueued/memory': 2, 'start_time': datetime.datetime(2013, 11, 19, 3, 15, 59, 911770)} 2013-11-18 22:16:00-0500 [dmoz] INFO: Spider closed (finished)
! head tutorial/items.json
[{"desc": [""], "link": ["/"], "title": ["Top"]}, {"desc": [], "link": ["/Computers/"], "title": ["Computers"]}, {"desc": [], "link": ["/Computers/Programming/"], "title": ["Programming"]}, {"desc": [], "link": ["/Computers/Programming/Languages/"], "title": ["Languages"]}, {"desc": [], "link": ["/Computers/Programming/Languages/Python/"], "title": ["Python"]}, {"desc": ["", "", ""], "link": [], "title": []}, {"desc": ["", "", ""], "link": ["/Computers/Programming/Languages/Python/Resources/"], "title": ["Computers: Programming: Languages: Python: Resources"]}, {"desc": ["", "", ""], "link": ["/Computers/Programming/Languages/Ruby/Books/"], "title": ["Computers: Programming: Languages: Ruby: Books"]}, {"desc": ["", "", ""], "link": ["/World/Deutsch/Computer/Programmieren/Sprachen/Python/B%C3%BCcher/"], "title": ["German"]}, {"desc": ["", "", ""], "link": ["/World/Russian/%D0%9A%D0%BE%D0%BC%D0%BF%D1%8C%D1%8E%D1%82%D0%B5%D1%80%D1%8B/%D0%9F%D1%80%D0%BE%D0%B3%D1%80%D0%B0%D0%BC%D0%BC%D0%B8%D1%80%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5/%D0%AF%D0%B7%D1%8B%D0%BA%D0%B8/Python/%D0%9A%D0%BD%D0%B8%D0%B3%D0%B8/"], "title": ["Russian"]},
%%writefile tutorial/tutorial/spiders/dmoz_spider.py
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.http import Request
from scrapy.selector import Selector
from tutorial.items import DmozItem
from scrapy.conf import settings
settings.overrides['DOWNLOAD_DELAY'] = 1
class DmozSpider(CrawlSpider):
name = "dmoz"
allowed_domains = ["dmoz.org"]
start_urls = ['http://www.dmoz.org/Computers/Programming/Languages']
rules = (
Rule(SgmlLinkExtractor(deny=('\?*', )), follow=True),
Rule(SgmlLinkExtractor(allow=('www\.dmoz\.org\/Computers\/Programming\/Languages\/Python', ), unique=True), follow=True),
)
def parse_python_page(self, response):
sel = Selector(response)
sites = sel.xpath('//ul/li')
print response.url
for site in sites:
try:
url = site.xpath('a/@href').extract()[0]
if url.startswith('/'):
url = 'http://www.dmoz.org' + url
yield Request(url=url)
except:
pass
Overwriting tutorial/tutorial/spiders/dmoz_spider.py
! cd tutorial/; scrapy crawl dmoz
2013-11-18 22:17:12-0500 [scrapy] INFO: Scrapy 0.20.0 started (bot: tutorial) 2013-11-18 22:17:12-0500 [scrapy] DEBUG: Optional features available: ssl, http11 2013-11-18 22:17:12-0500 [scrapy] DEBUG: Overridden settings: {'NEWSPIDER_MODULE': 'tutorial.spiders', 'SPIDER_MODULES': ['tutorial.spiders'], 'DOWNLOAD_DELAY': 1, 'BOT_NAME': 'tutorial'} 2013-11-18 22:17:12-0500 [scrapy] DEBUG: Enabled extensions: LogStats, TelnetConsole, CloseSpider, WebService, CoreStats, SpiderState 2013-11-18 22:17:12-0500 [scrapy] DEBUG: Enabled downloader middlewares: HttpAuthMiddleware, DownloadTimeoutMiddleware, UserAgentMiddleware, RetryMiddleware, DefaultHeadersMiddleware, MetaRefreshMiddleware, HttpCompressionMiddleware, RedirectMiddleware, CookiesMiddleware, ChunkedTransferMiddleware, DownloaderStats 2013-11-18 22:17:12-0500 [scrapy] DEBUG: Enabled spider middlewares: HttpErrorMiddleware, OffsiteMiddleware, RefererMiddleware, UrlLengthMiddleware, DepthMiddleware 2013-11-18 22:17:12-0500 [scrapy] DEBUG: Enabled item pipelines: 2013-11-18 22:17:12-0500 [dmoz] INFO: Spider opened 2013-11-18 22:17:12-0500 [dmoz] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min) 2013-11-18 22:17:12-0500 [scrapy] DEBUG: Telnet console listening on 0.0.0.0:6023 2013-11-18 22:17:12-0500 [scrapy] DEBUG: Web service listening on 0.0.0.0:6080 2013-11-18 22:17:13-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages> (referer: None) 2013-11-18 22:17:14-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/> (referer: http://www.dmoz.org/Computers/Programming/Languages) 2013-11-18 22:17:15-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Development_Tools/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/) 2013-11-18 22:17:15-0500 [dmoz] DEBUG: Filtered duplicate request: <GET http://www.dmoz.org/Computers/Programming/Languages/Python/> - no more duplicates will be shown (see DUPEFILTER_CLASS) 2013-11-18 22:17:17-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Web/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/) 2013-11-18 22:17:17-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/User_Groups/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/) 2013-11-18 22:17:19-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/) 2013-11-18 22:17:20-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Personal_Pages/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/) 2013-11-18 22:17:21-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Mailing_Lists/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/) 2013-11-18 22:17:23-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/FAQs,_Help,_and_Tutorials/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/) 2013-11-18 22:17:24-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Conferences/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/) 2013-11-18 22:17:25-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Commercial_Services/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/) 2013-11-18 22:17:26-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/) 2013-11-18 22:17:28-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Articles_and_Reviews/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/) 2013-11-18 22:17:29-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/) 2013-11-18 22:17:30-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Implementations/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/) 2013-11-18 22:17:31-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Development_Tools/Foreign_Language_Interfaces/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Development_Tools/) 2013-11-18 22:17:32-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Development_Tools/Documentation_Tools/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Development_Tools/) 2013-11-18 22:17:34-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Development_Tools/Deployment/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Development_Tools/) 2013-11-18 22:17:35-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Development_Tools/Integrated_Development_Environments/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Development_Tools/) 2013-11-18 22:17:36-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Development_Tools/Performance_and_Testing/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Development_Tools/) 2013-11-18 22:17:36-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Web/Web_Frameworks/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Web/) 2013-11-18 22:17:38-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Web/Templating_Libraries/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Web/) 2013-11-18 22:17:39-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Web/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Web/) 2013-11-18 22:17:40-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Development_Tools/GUI_Builders/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Development_Tools/) 2013-11-18 22:17:41-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Terminal_IO/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/) 2013-11-18 22:17:43-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Platform_Specific/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/) 2013-11-18 22:17:44-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Email/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/) 2013-11-18 22:17:46-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Data_Formats/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/) 2013-11-18 22:17:47-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/XML/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/) 2013-11-18 22:17:48-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Text_Processing/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/) 2013-11-18 22:17:49-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Scientific/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/) 2013-11-18 22:17:51-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Network/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/) 2013-11-18 22:17:52-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Math_and_Calculations/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/) 2013-11-18 22:17:53-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/GUI/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/) 2013-11-18 22:17:55-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Graphics/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/) 2013-11-18 22:17:56-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Distributed_Computing/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/) 2013-11-18 22:17:57-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Databases_and_Persistence/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/) 2013-11-18 22:17:58-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Web/Web_Frameworks/Django/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Web/Web_Frameworks/) 2013-11-18 22:17:59-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Cryptography/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/) 2013-11-18 22:18:00-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Directories/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/) 2013-11-18 22:18:01-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Platform_Specific/Windows/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Platform_Specific/) 2013-11-18 22:18:02-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Platform_Specific/Linux/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Platform_Specific/) 2013-11-18 22:18:04-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/XML/Parsers/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/XML/) 2013-11-18 22:18:04-0500 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Databases_and_Persistence/Database_API/> (referer: http://www.dmoz.org/Computers/Programming/Languages/Python/Modules/Databases_and_Persistence/) 2013-11-18 22:18:05-0500 [dmoz] INFO: Closing spider (finished) 2013-11-18 22:18:05-0500 [dmoz] INFO: Dumping Scrapy stats: {'downloader/request_bytes': 17891, 'downloader/request_count': 44, 'downloader/request_method_count/GET': 44, 'downloader/response_bytes': 273313, 'downloader/response_count': 44, 'downloader/response_status_count/200': 44, 'finish_reason': 'finished', 'finish_time': datetime.datetime(2013, 11, 19, 3, 18, 5, 1707), 'log_count/DEBUG': 51, 'log_count/INFO': 3, 'request_depth_max': 5, 'response_received_count': 44, 'scheduler/dequeued': 44, 'scheduler/dequeued/memory': 44, 'scheduler/enqueued': 44, 'scheduler/enqueued/memory': 44, 'start_time': datetime.datetime(2013, 11, 19, 3, 17, 12, 987421)} 2013-11-18 22:18:05-0500 [dmoz] INFO: Spider closed (finished)