# For parsing the csv
import csv
import urllib2
import StringIO
import math
# For actual computations
import random as rand
import pandas as pd
In 2010 Google released a page listing the top 1000 websites on the internet: https://web.archive.org/web/20130102235318/http://www.google.com/adplanner/static/top1000
The page has since been taken down, but you can still find the data mirrored.
Let's take a look at it!
websites_url = 'https://raw.githubusercontent.com/ledeprogram/courses/master/platforms/anonymization/googletop1000april2010.csv'
websites_response = urllib2.urlopen(websites_url)
websites = pd.read_csv(websites_response)
websites