import os

os.getcwd()

os.chdir('..')
os.getcwd()

os.chdir('./week2/')
os.getcwd()

import pandas as pd

fileUrl = 'https://data.baltimorecity.gov/api/views/dz54-2aru/rows.csv?accessType=DOWNLOAD'

# we can directly use read_csv to download the file
# this is equivalent to R's combined download.file() and read.table() or read.csv() commands
cameraData = pd.read_csv(fileUrl)

# save data locally
cameraData.to_csv('../data/cameras.csv', index=False)

# for simplicity I'll use IPython tricks to list folder contents
!ls ../data


# get current date and time
# this is equivalent to R date() command
# note that I use IPython ! prefix to run my system's command
dateDownloaded = !date
print '\nDate downloaded: ' + str(dateDownloaded)

cameraData.head()

import urllib2

# download the file as camera.xls and save it in ./data subfolder
fileUrl = 'https://data.baltimorecity.gov/api/views/dz54-2aru/rows.xls?accessType=DOWNLOAD'
f = urllib2.urlopen(fileUrl)
data = f.read()
with open('../data/camera.xls', 'wb') as w:
    w.write(data)

# load the Excel file as a pandas DataFrame
cameraData = pd.ExcelFile('../data/camera.xls')
cameraData = cameraData.parse('Baltimore Fixed Speed Cameras', index_col=None, na_values=['NA'])
cameraData.head()

import json

# first we get the json file from the website
fileUrl = 'https://data.baltimorecity.gov/api/views/dz54-2aru/rows.json?accessType=DOWNLOAD'
req = urllib2.Request(fileUrl)
opener = urllib2.build_opener()
f = opener.open(req)

# then we read it into a data structure
jsonCamera = json.loads(f.read())

# json is loadad as dictionary
print jsonCamera['meta']['view']['id']
print jsonCamera['meta']['view']['name']
print jsonCamera['meta']['view']['attribution']

# first read the csv file
cameraData = pd.read_csv('../data/cameras.csv')

# take a subset of the columns
tmpData = cameraData.ix[:,2:]

# then save it to a different csv file
# this is equivalent to R's write.table() command
tmpData.to_csv('../data/camerasModified.csv', sep=',', index=False)

cameraData2 = pd.read_csv('../data/camerasModified.csv')
cameraData2.head()

print ['../data' + str(i) + '.csv' for i in range(1, 6)]

from lxml.html import parse

url = 'http://scholar.google.com/citations?user=HI-I6C0AAAAJ&hl=en'

# this is equivalent to the combined R's opening/reading/closing connection and htmlTreeParse() commands
html3 = parse(url).getroot()

# get the title text using xpath expression
# this is equivalent to R xpathSApply() command
title = html3.xpath('//title')
print [x.text_content() for x in title]

# get the texts of col-citedby elements using xpath expression
citedby = html3.xpath("//td[@id='col-citedby']")
print [x.text_content() for x in citedby]