print "Before Pandas", len(dir()) import pandas from pandas import * print "After Pandas", len(dir()) from numpy import * print "After NumPy", len(dir()) import urlparse import httplib import pandas csv_data = "http://priede.bf.lu.lv/ftp/grozs/Datorlietas/Geog5028/TIS_PRG/HomePlanet/ASTEROID.CSV" fwf_data = "http://ssd.jpl.nasa.gov/dat/ELEMENTS.NUMBR" tax_data = "http://sbn.psi.edu/ferret/reformatTable.action?productId=TAXONOMY10_TAB&dataSetId=EAR-A-5-DDR-TAXONOMY-V6.0" def save_file(location): """Read data at url""" url = urlparse.urlparse(location) connection = httplib.HTTPConnection(url.netloc) connection.connect() connection.request('GET', url.path) response = connection.getresponse() data = response.read() filename = '/tmp/' + url.path.split('/')[-1] print filename with open(filename, 'rw+') as tmp: tmp.write(data) return filename # Read NASA asteroid data #filename = save_file(fwf_data) filename = "data/ELEMENTS.NUMBR.txt" with open(filename, 'r') as f: widths = map(lambda line: len(line) + 1, f.read().splitlines()[1].split(' ')) asteroids_nasa = pandas.read_fwf(filename, widths=widths, skiprows=[1]) widths = [11, 18, 11, 13, 13, 14, 14, 14, 14, 13, 13, 12, 12, 10, 10, 15, 15, 16, 15, 22] asteroids_taxonomy = pandas.read_fwf(tax_data, widths=widths, skiprows=[1]) print asteroids_nasa.xs(0) print '\n' print asteroids_taxonomy.xs(0) asteroids_nasa.ix[:, 0:2].head(10) asteroids_nasa.describe() asteroids_nasa.ix[:, 3].describe() asteroids_nasa.ix[:, 'a'].describe() asteroids_nasa.ix[:, 3:7].corr() asteroids_nasa.rename(columns={'Name': 'AST_NAME', 'a': 'Semimajor Axis', 'e': 'Eccentricity'}) asteroids_nasa asteroids_nasa.rename(columns={'Name': 'AST_NAME', 'a': 'Semimajor Axis', 'e': 'Eccentricity'}, inplace=True) asteroids_nasa asteroids_taxonomy merged = asteroids_nasa.merge(asteroids_taxonomy, on='AST_NAME') print merged merged.pop('Ref\n') merged.pop('COMMENT \n') merged.columns ref_codes = merged.pop('DEMEO_REF_CODE') merged merged.insert(11, 'DEMEO_REF_CODE', ref_codes) merged from pandas.stats.api import ols model = ols(y=asteroids_nasa.xs('Semimajor Axis', axis=1), x=asteroids_nasa.xs('i', axis=1)) print model import matplotlib.pyplot as plt