import pandas as pd
import numpy as np
import os
cur_dir = os.path.dirname(os.path.realpath('__file__'))
# Create a list of elements counting from 1995 to 2013
years = list(range(1995, 2014, 1))
df = pd.DataFrame()
for year in years:
data = pd.read_csv(cur_dir + '/data/cross_raw_data/bc_' + str(year) + '.csv', header=1, skipfooter=8)
df = pd.concat([df, data])
df = df.ix[:, 0:-1]
# Create two lists for the loop results to be placed
city = []
state = []
# For each row in a varible,
for row in df['Port Name']:
# Try to,
try:
# Split the row by comma and append
# everything before the comma to lat
city.append(row.split(': ')[1])
# Split the row by comma and append
# everything after the comma to lon
state.append(row.split(': ')[0])
# But if you get an error
except:
# append a missing value to lat
city.append(np.NaN)
# append a missing value to lon
state.append(np.NaN)
# Create two new columns from lat and lon
df['City'] = city
df['State'] = state
df = df.reset_index()
df = df.drop('index', axis=1)
df = df.rename(columns=lambda x: x.strip())
df.to_csv(cur_dir + '/data/cross_raw_data/bc_' + 'full_crossing_data.csv', index=False)