clears = []
clears.append( {"hash": "7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN", "names": ["visit.gif"], "total": 406 } )
clears.append( {"hash": "D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW", "names": ["c.gif"], "total": 814 } )
clears.append( {"hash": "FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK", "names": ["n.gif", "spaceball.gif"], "total": 180895 } )
clears.append( {"hash": "GF2JNIEW23EGJBVHDVCSDGKLZULRU25T", "names": ["clear.gif"], "total": 16949 } )
clears.append( {"hash": "GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM", "names": ["dot_clear.gif","pixel.gif"], "total": 203802 } )
clears.append( {"hash": "K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F", "names": ["cleardot.gif", "cleardot.gif"], "total": 2365898 } )
clears.append( {"hash": "TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347", "names": ["blank.gif"], "total": 29252 } )
extra = {"hash": "EXMAIMZW5N4Z4UVRUDQV75VZLYE4ETRV", "names": ["ANJcron.php.gif"], "total": 196746 }
print(clears[5])
def json_name(k,y):
return "json/%s-for-%s.json" % (k,y)
{'hash': 'K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F', 'total': 2365898, 'names': ['cleardot.gif', 'cleardot.gif']}
Then we download the history data. This is disabled usually, as it takes a long time and can only be done with direct access to the Solr server. The files have been downloaded and are available locally.
(it seems this is Python2 only anyway)
import json, sys, codecs, hashlib
import urllib, datetime, re
from pprint import pprint
#
#urlo = urllib.FancyURLopener({"http":"http://explorer.bl.uk:3127"})
#
urlo=urllib.URLopener()
q = "http://192.168.1.181:8983/solr/jisc5/select?q=hash%%3A%%22sha1%%3A%s%%22&fq=crawl_years%%3A%s&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc"
for c in clears:
for y in range(1996,2011):
k = c['hash']
yq = q % ( k, y )
print("GET %s %s - %s" % (k,y, yq) )
# Currently disabled as the data has been downloaded already:
#urlo.retrieve(yq , json_name(k,y) )
print("DONE")
GET 7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN 1996 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3A7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN%22&fq=crawl_years%3A1996&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET 7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN 1997 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3A7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN%22&fq=crawl_years%3A1997&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET 7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN 1998 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3A7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN%22&fq=crawl_years%3A1998&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET 7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN 1999 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3A7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN%22&fq=crawl_years%3A1999&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET 7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN 2000 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3A7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN%22&fq=crawl_years%3A2000&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET 7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN 2001 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3A7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN%22&fq=crawl_years%3A2001&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET 7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN 2002 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3A7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN%22&fq=crawl_years%3A2002&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET 7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN 2003 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3A7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN%22&fq=crawl_years%3A2003&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET 7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN 2004 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3A7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN%22&fq=crawl_years%3A2004&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET 7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN 2005 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3A7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN%22&fq=crawl_years%3A2005&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET 7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN 2006 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3A7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN%22&fq=crawl_years%3A2006&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET 7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN 2007 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3A7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN%22&fq=crawl_years%3A2007&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET 7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN 2008 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3A7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN%22&fq=crawl_years%3A2008&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET 7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN 2009 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3A7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN%22&fq=crawl_years%3A2009&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET 7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN 2010 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3A7KUBIUXQYGNTAS4J6AEG7BNCSQNFPQZN%22&fq=crawl_years%3A2010&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW 1996 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AD2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW%22&fq=crawl_years%3A1996&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW 1997 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AD2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW%22&fq=crawl_years%3A1997&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW 1998 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AD2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW%22&fq=crawl_years%3A1998&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW 1999 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AD2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW%22&fq=crawl_years%3A1999&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW 2000 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AD2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW%22&fq=crawl_years%3A2000&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW 2001 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AD2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW%22&fq=crawl_years%3A2001&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW 2002 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AD2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW%22&fq=crawl_years%3A2002&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW 2003 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AD2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW%22&fq=crawl_years%3A2003&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW 2004 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AD2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW%22&fq=crawl_years%3A2004&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW 2005 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AD2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW%22&fq=crawl_years%3A2005&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW 2006 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AD2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW%22&fq=crawl_years%3A2006&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW 2007 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AD2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW%22&fq=crawl_years%3A2007&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW 2008 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AD2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW%22&fq=crawl_years%3A2008&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW 2009 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AD2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW%22&fq=crawl_years%3A2009&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET D2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW 2010 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AD2VQOLYTNWE6VT3MMPGIQANFPXGIS4SW%22&fq=crawl_years%3A2010&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK 1996 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AFWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK%22&fq=crawl_years%3A1996&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK 1997 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AFWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK%22&fq=crawl_years%3A1997&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK 1998 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AFWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK%22&fq=crawl_years%3A1998&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK 1999 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AFWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK%22&fq=crawl_years%3A1999&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK 2000 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AFWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK%22&fq=crawl_years%3A2000&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK 2001 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AFWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK%22&fq=crawl_years%3A2001&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK 2002 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AFWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK%22&fq=crawl_years%3A2002&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK 2003 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AFWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK%22&fq=crawl_years%3A2003&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK 2004 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AFWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK%22&fq=crawl_years%3A2004&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK 2005 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AFWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK%22&fq=crawl_years%3A2005&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK 2006 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AFWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK%22&fq=crawl_years%3A2006&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK 2007 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AFWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK%22&fq=crawl_years%3A2007&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK 2008 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AFWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK%22&fq=crawl_years%3A2008&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK 2009 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AFWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK%22&fq=crawl_years%3A2009&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET FWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK 2010 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AFWXKVC27DHYLYIE5S5WAFPLKZNI3ACYK%22&fq=crawl_years%3A2010&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GF2JNIEW23EGJBVHDVCSDGKLZULRU25T 1996 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGF2JNIEW23EGJBVHDVCSDGKLZULRU25T%22&fq=crawl_years%3A1996&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GF2JNIEW23EGJBVHDVCSDGKLZULRU25T 1997 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGF2JNIEW23EGJBVHDVCSDGKLZULRU25T%22&fq=crawl_years%3A1997&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GF2JNIEW23EGJBVHDVCSDGKLZULRU25T 1998 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGF2JNIEW23EGJBVHDVCSDGKLZULRU25T%22&fq=crawl_years%3A1998&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GF2JNIEW23EGJBVHDVCSDGKLZULRU25T 1999 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGF2JNIEW23EGJBVHDVCSDGKLZULRU25T%22&fq=crawl_years%3A1999&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GF2JNIEW23EGJBVHDVCSDGKLZULRU25T 2000 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGF2JNIEW23EGJBVHDVCSDGKLZULRU25T%22&fq=crawl_years%3A2000&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GF2JNIEW23EGJBVHDVCSDGKLZULRU25T 2001 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGF2JNIEW23EGJBVHDVCSDGKLZULRU25T%22&fq=crawl_years%3A2001&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GF2JNIEW23EGJBVHDVCSDGKLZULRU25T 2002 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGF2JNIEW23EGJBVHDVCSDGKLZULRU25T%22&fq=crawl_years%3A2002&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GF2JNIEW23EGJBVHDVCSDGKLZULRU25T 2003 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGF2JNIEW23EGJBVHDVCSDGKLZULRU25T%22&fq=crawl_years%3A2003&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GF2JNIEW23EGJBVHDVCSDGKLZULRU25T 2004 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGF2JNIEW23EGJBVHDVCSDGKLZULRU25T%22&fq=crawl_years%3A2004&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GF2JNIEW23EGJBVHDVCSDGKLZULRU25T 2005 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGF2JNIEW23EGJBVHDVCSDGKLZULRU25T%22&fq=crawl_years%3A2005&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GF2JNIEW23EGJBVHDVCSDGKLZULRU25T 2006 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGF2JNIEW23EGJBVHDVCSDGKLZULRU25T%22&fq=crawl_years%3A2006&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GF2JNIEW23EGJBVHDVCSDGKLZULRU25T 2007 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGF2JNIEW23EGJBVHDVCSDGKLZULRU25T%22&fq=crawl_years%3A2007&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GF2JNIEW23EGJBVHDVCSDGKLZULRU25T 2008 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGF2JNIEW23EGJBVHDVCSDGKLZULRU25T%22&fq=crawl_years%3A2008&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GF2JNIEW23EGJBVHDVCSDGKLZULRU25T 2009 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGF2JNIEW23EGJBVHDVCSDGKLZULRU25T%22&fq=crawl_years%3A2009&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GF2JNIEW23EGJBVHDVCSDGKLZULRU25T 2010 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGF2JNIEW23EGJBVHDVCSDGKLZULRU25T%22&fq=crawl_years%3A2010&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM 1996 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGKHEOJZBVEZULAA62VJTEQHKYLI7QSMM%22&fq=crawl_years%3A1996&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM 1997 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGKHEOJZBVEZULAA62VJTEQHKYLI7QSMM%22&fq=crawl_years%3A1997&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM 1998 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGKHEOJZBVEZULAA62VJTEQHKYLI7QSMM%22&fq=crawl_years%3A1998&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM 1999 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGKHEOJZBVEZULAA62VJTEQHKYLI7QSMM%22&fq=crawl_years%3A1999&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM 2000 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGKHEOJZBVEZULAA62VJTEQHKYLI7QSMM%22&fq=crawl_years%3A2000&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM 2001 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGKHEOJZBVEZULAA62VJTEQHKYLI7QSMM%22&fq=crawl_years%3A2001&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM 2002 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGKHEOJZBVEZULAA62VJTEQHKYLI7QSMM%22&fq=crawl_years%3A2002&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM 2003 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGKHEOJZBVEZULAA62VJTEQHKYLI7QSMM%22&fq=crawl_years%3A2003&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM 2004 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGKHEOJZBVEZULAA62VJTEQHKYLI7QSMM%22&fq=crawl_years%3A2004&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM 2005 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGKHEOJZBVEZULAA62VJTEQHKYLI7QSMM%22&fq=crawl_years%3A2005&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM 2006 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGKHEOJZBVEZULAA62VJTEQHKYLI7QSMM%22&fq=crawl_years%3A2006&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM 2007 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGKHEOJZBVEZULAA62VJTEQHKYLI7QSMM%22&fq=crawl_years%3A2007&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM 2008 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGKHEOJZBVEZULAA62VJTEQHKYLI7QSMM%22&fq=crawl_years%3A2008&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM 2009 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGKHEOJZBVEZULAA62VJTEQHKYLI7QSMM%22&fq=crawl_years%3A2009&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET GKHEOJZBVEZULAA62VJTEQHKYLI7QSMM 2010 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AGKHEOJZBVEZULAA62VJTEQHKYLI7QSMM%22&fq=crawl_years%3A2010&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F 1996 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AK3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F%22&fq=crawl_years%3A1996&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F 1997 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AK3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F%22&fq=crawl_years%3A1997&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F 1998 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AK3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F%22&fq=crawl_years%3A1998&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F 1999 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AK3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F%22&fq=crawl_years%3A1999&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F 2000 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AK3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F%22&fq=crawl_years%3A2000&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F 2001 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AK3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F%22&fq=crawl_years%3A2001&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F 2002 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AK3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F%22&fq=crawl_years%3A2002&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F 2003 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AK3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F%22&fq=crawl_years%3A2003&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F 2004 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AK3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F%22&fq=crawl_years%3A2004&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F 2005 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AK3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F%22&fq=crawl_years%3A2005&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F 2006 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AK3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F%22&fq=crawl_years%3A2006&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F 2007 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AK3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F%22&fq=crawl_years%3A2007&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F 2008 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AK3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F%22&fq=crawl_years%3A2008&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F 2009 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AK3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F%22&fq=crawl_years%3A2009&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET K3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F 2010 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3AK3KF7CQX6UDYUIFPTFRMTEWKIZ4EKB3F%22&fq=crawl_years%3A2010&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347 1996 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3ATUA4YXOI4BBMBVFNNT5YWOWDR2CKL347%22&fq=crawl_years%3A1996&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347 1997 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3ATUA4YXOI4BBMBVFNNT5YWOWDR2CKL347%22&fq=crawl_years%3A1997&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347 1998 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3ATUA4YXOI4BBMBVFNNT5YWOWDR2CKL347%22&fq=crawl_years%3A1998&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347 1999 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3ATUA4YXOI4BBMBVFNNT5YWOWDR2CKL347%22&fq=crawl_years%3A1999&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347 2000 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3ATUA4YXOI4BBMBVFNNT5YWOWDR2CKL347%22&fq=crawl_years%3A2000&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347 2001 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3ATUA4YXOI4BBMBVFNNT5YWOWDR2CKL347%22&fq=crawl_years%3A2001&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347 2002 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3ATUA4YXOI4BBMBVFNNT5YWOWDR2CKL347%22&fq=crawl_years%3A2002&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347 2003 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3ATUA4YXOI4BBMBVFNNT5YWOWDR2CKL347%22&fq=crawl_years%3A2003&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347 2004 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3ATUA4YXOI4BBMBVFNNT5YWOWDR2CKL347%22&fq=crawl_years%3A2004&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347 2005 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3ATUA4YXOI4BBMBVFNNT5YWOWDR2CKL347%22&fq=crawl_years%3A2005&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347 2006 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3ATUA4YXOI4BBMBVFNNT5YWOWDR2CKL347%22&fq=crawl_years%3A2006&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347 2007 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3ATUA4YXOI4BBMBVFNNT5YWOWDR2CKL347%22&fq=crawl_years%3A2007&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347 2008 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3ATUA4YXOI4BBMBVFNNT5YWOWDR2CKL347%22&fq=crawl_years%3A2008&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347 2009 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3ATUA4YXOI4BBMBVFNNT5YWOWDR2CKL347%22&fq=crawl_years%3A2009&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc GET TUA4YXOI4BBMBVFNNT5YWOWDR2CKL347 2010 - http://192.168.1.181:8983/solr/jisc5/select?q=hash%3A%22sha1%3ATUA4YXOI4BBMBVFNNT5YWOWDR2CKL347%22&fq=crawl_years%3A2010&rows=100&wt=json&indent=true&facet=true&facet.field=domain&facet.mincount=1&sort=crawl_date+asc DONE
Next, we parse the json output and re-assemble the data into a more useful form.
import json
import numpy as np
import pandas as pd
from pprint import pprint
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
#%pylab inline
# Loop over the items and years:
td = {}
for c in clears:
k = c['hash']
n = c['names']
values = []
years = []
for y in range(1996,2011):
fn = json_name(k,y)
#print("PARSE %s %s - %s" % (k,y, f) )
with open( fn ) as data_file:
data = json.load(data_file)
years.append(y)
values.append(data['response']['numFound'])
#pprint(data['facet_counts']['facet_fields']['domain'][0:10])
# And add:
td[", ".join(n)] = pd.Series(values,index=years)
df = pd.DataFrame(td)
print(df)
blank.gif c.gif clear.gif cleardot.gif, cleardot.gif \ 1996 2 0 0 0 1997 17 7 29 15 1998 14 9 425 88 1999 0 0 0 0 2000 0 0 0 0 2001 304 181 2411 17289 2002 230 157 2127 33741 2003 241 180 4071 17791 2004 219 139 4382 261769 2005 830 140 2158 104289 2006 10638 223 4233 95535 2007 10489 133 2735 89000 2008 321 8 119 1062943 2009 399 8 79 252990 2010 7347 12 191 473803 dot_clear.gif, pixel.gif n.gif, spaceball.gif visit.gif 1996 3 46 0 1997 75 242 0 1998 103 160 0 1999 0 0 0 2000 0 0 0 2001 2749 3456 5 2002 3300 5494 47 2003 5133 15912 53 2004 188048 27820 49 2005 4010 17779 27 2006 4723 58471 114 2007 3393 68682 75 2008 110 1458 9 2009 170 2765 77 2010 531 11996 20
# Update the matplotlib configuration parameters:
matplotlib.rcParams.update({'font.size': 16, 'font.family': 'STIXGeneral',
'mathtext.fontset': 'stix', 'axes.titlesize': 'medium' })
# Plot:
axs = df.plot(kind='bar', subplots=True, figsize=(16,20), legend=False, sharex=False)
# No border on the legend, please:
#leg = plt.legend(loc="best")
#leg.get_frame().set_linewidth(0.0)
# Use a logarithmic Y-axis (doesn't look great):
#ax.set_yscale("log")
# Dark Magic to get commas to show up in the integers on the Y axis:
for ax in axs:
ax.get_yaxis().set_major_formatter(
matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
axs = df.plot(kind='bar', subplots=True, figsize=(16,20), legend=False, sharex=False)
# No border on the legend, please:
#leg = plt.legend(loc="best")
#leg.get_frame().set_linewidth(0.0)
# Use a logarithmic Y-axis (doesn't look great):
# Dark Magic to get commas to show up in the integers on the Y axis:
for ax in axs:
ax.get_yaxis().set_major_formatter(
matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
ax.set_yscale("log")