import pandas as pd
%load_ext rmagic
Making directions for different folders, and create the folders by using their respording direction.
* Including the following directories:
./data
./script
./data/raw # to store raw data
./data/cleaned # to store the cleanned data
./data/simulated # to store simulated data
./visualizations # to store plots
%%R
script_dir = './script'
data_dir = './data'
raw_dir = paste(data_dir, '/raw', sep='')
cleaned_dir = paste(data_dir, '/cleaned', sep='')
simulated_dir = paste(data_dir, '/simulated', sep='')
visualizations_dir = './visualizations'
dir.create(script_dir)
dir.create(data_dir)
dir.create(raw_dir)
dir.create(cleaned_dir)
dir.create(simulated_dir)
dir.create(visualizations_dir)
!ls
print "Data Folder: "
!ls data/
Readme(How to import image).txt data_gathering.ipynb data data_project_report.ipynb data_analysis.ipynb script data_cleaning.ipynb visualizations Data Folder: cleaned raw simulated
%%R
immigration_url = 'https://dl.dropboxusercontent.com/u/40198639/stats_data/immg.csv'
blackunemployment_url = 'https://dl.dropboxusercontent.com/u/40198639/stats_data/black.csv'
whiteunemployment_url = 'https://dl.dropboxusercontent.com/u/40198639/stats_data/white.csv'
highedu_url = 'https://dl.dropboxusercontent.com/u/40198639/stats_data/high_edu.csv'
lowedu_url = 'https://dl.dropboxusercontent.com/u/40198639/stats_data/low_edu.csv'
unemployment_url = 'https://dl.dropboxusercontent.com/u/40198639/stats_data/un_rate.csv'
%%R
date = paste(strsplit(date(), split=' ')[[1]], collapse='_')
immigration_file = './data/raw/immg.csv'
blackunemployment_file = './data/raw/black.csv'
whiteunemployment_file = './data/raw/white.csv'
highedu_file = './data/raw/high_edu.csv'
lowedu_file = './data/raw/low_edu.csv'
unemployment_file = './data/raw/un_rate.csv'
Downloading the raw data in the csv files in the specific link to differen files that we created respecively.
Then listing out all the files under the folders called "raw".
%%R
download.file(immigration_url, immigration_file, method="curl")
download.file(blackunemployment_url, blackunemployment_file, method="curl")
download.file(whiteunemployment_url, whiteunemployment_file, method="curl")
download.file(highedu_url, highedu_file, method="curl")
download.file(lowedu_url, lowedu_file, method="curl")
download.file(unemployment_url, unemployment_file, method="curl")
print(list.files(raw_dir))
[1] "black.csv" "high_edu.csv" "immg.csv" "low_edu.csv" "mexican.csv" [6] "un_rate.csv" "white.csv"
import pandas as pd
immg = pd.read_csv("./data/raw/immg.csv", header = 0)
black = pd.read_csv("./data/raw/black.csv", header = 0)
white = pd.read_csv("./data/raw/white.csv", header = 0)
high_edu = pd.read_csv("./data/raw/high_edu.csv", header = 0)
low_edu = pd.read_csv("./data/raw/low_edu.csv", header = 0)
un_rate = pd.read_csv("./data/raw/un_rate.csv", header = 0)
Display the sample of immg.csv file
immg.head()
State | 2000 | 2010 | |
---|---|---|---|
0 | Alabama | 2.0 | 3.5 |
1 | Alaska | 5.9 | 6.9 |
2 | Arizona | 12.8 | 13.4 |
3 | Arkansas | 2.8 | 4.5 |
4 | Califonia | 26.2 | 27.2 |
un_rate.head()
State | 2000 | 2010 | |
---|---|---|---|
0 | Alabama | 1.1 | 11.8 |
1 | Alaska | 0.1 | 9.4 |
2 | Arizona | 5.4 | 12.0 |
3 | Arkansas | 6.2 | 9.9 |
4 | Califonia | 6.6 | 13.4 |
Display the sample of black.csv file
black.head()
State | 2000 | 2010 | |
---|---|---|---|
0 | Alabama | 12.1 | 19.2 |
1 | Alaska | 9.4 | 12.7 |
2 | Arizona | 9.3 | 17.5 |
3 | Arkansas | 12.8 | 16.6 |
4 | Califonia | 12.1 | 19.2 |
Display the sample of white.csv file
white.head()
StateState | 2000 | 2010 | |
---|---|---|---|
0 | Alabama | 4.4 | 9.2 |
1 | Alaska | 6.7 | 7.0 |
2 | Arizona | 4.4 | 10.7 |
3 | Arkansas | 5.1 | 8.5 |
4 | Califonia | 5.5 | 12.4 |
Display the sample of high_edu.csv file
high_edu.head()
State | 2000 | 2010 | |
---|---|---|---|
0 | Alabama | 1.8 | 4.5 |
1 | Alaska | 2.2 | 3.1 |
2 | Arizona | 2.3 | 4.7 |
3 | Arkansas | 1.7 | 3.0 |
4 | Califonia | 2.5 | 6.8 |
Display the sample of low_edu.csv file
low_edu.head()
State | 2000 | 2010 | |
---|---|---|---|
0 | Alabama | 8.5 | 17.2 |
1 | Alaska | 13.6 | 16.8 |
2 | Arizona | 8.0 | 18.0 |
3 | Arkansas | 7.9 | 13.9 |
4 | Califonia | 10.8 | 20.5 |
List the team members contributing to this notebook, along with their responsabilities: