Help out my noob friend mike
Configure notebook style (see NBCONFIG.ipynb), add imports and paths. The %run magic used below requires IPython 2.0 or higher.
%run NBCONFIG.ipynb
Populating the interactive namespace from numpy and matplotlib ['dti', 'd', 'h', 'm', 'us', 's', 'ms', 'intvl', 'ns']
FILEXLS = '/home/glue/Desktop/formic.xls'
FILECSV = '/home/glue/Desktop/formic.csv'
from pandas import read_excel
df = read_excel(FILEXLS)
df
NIN | DMDDATE | DMDQTY | QTR | |
---|---|---|---|---|
0 | 11111 | 20090101 | 2 | 1 |
1 | 11111 | 20090501 | 4 | 1 |
2 | 11111 | 20090701 | 6 | 3 |
3 | 22222 | 20090101 | 8 | 1 |
4 | 22222 | 20120601 | 10 | 3 |
5 | 22222 | 20130101 | 12 | 7 |
6 | 22222 | 20130301 | 14 | 7 |
7 | 22222 | 20131001 | 53 | 9 |
8 | 22222 | 20131201 | 6 | 12 |
9 | 33333 | 20091201 | 23 | 1 |
10 | 33333 | 20131201 | 3 | 5 |
df111 = df [df['NIN'] == 11111]
df111
NIN | DMDDATE | DMDQTY | QTR | |
---|---|---|---|---|
0 | 11111 | 20090101 | 2 | 1 |
1 | 11111 | 20090501 | 4 | 1 |
2 | 11111 | 20090701 | 6 | 3 |
Look at the unique values for the QTR column
len( df111['QTR'].unique() )
2
Import a random int generator to show off how to apply a function elementwise, essentially a mapping for a column in your dataset
from random import randint as rint
def fakemap(value):
return rint(1,10)
dates = df['DMDDATE']
QTR_FOO = dates.apply(fakemap)
Create a newcolumn, append it to the orignal
df['QTR_FOO'] = QTR_FOO
df
NIN | DMDDATE | DMDQTY | QTR | QTR_FOO | |
---|---|---|---|---|---|
0 | 11111 | 20090101 | 2 | 1 | 1 |
1 | 11111 | 20090501 | 4 | 1 | 5 |
2 | 11111 | 20090701 | 6 | 3 | 4 |
3 | 22222 | 20090101 | 8 | 1 | 2 |
4 | 22222 | 20120601 | 10 | 3 | 1 |
5 | 22222 | 20130101 | 12 | 7 | 4 |
6 | 22222 | 20130301 | 14 | 7 | 2 |
7 | 22222 | 20131001 | 53 | 9 | 3 |
8 | 22222 | 20131201 | 6 | 12 | 3 |
9 | 33333 | 20091201 | 23 | 1 | 1 |
10 | 33333 | 20131201 | 3 | 5 | 6 |
f = open(FILECSV, 'r')
lines = f.readlines()
DATADIC = {}
for (idx, line) in enumerate(lines):
sline = line.strip().split(',')
print idx, sline
0 ['"NIN"', '"DMDDATE"', '"DMDQTY"', '"QTR"'] 1 ['11111', '20090101', '2', '1'] 2 ['11111', '20090501', '4', '1'] 3 ['11111', '20090701', '6', '3'] 4 ['22222', '20090101', '8', '1'] 5 ['22222', '20120601', '10', '3'] 6 ['22222', '20130101', '12', '7'] 7 ['22222', '20130301', '14', '7'] 8 ['22222', '20131001', '53', '9'] 9 ['22222', '20131201', '6', '12'] 10 ['33333', '20091201', '23', '1'] 11 ['33333', '20131201', '3', '5']
UNIQUE_DFS = []
for nin in df['NIN'].unique():
UNIQUE_DFS.append(df [df['NIN'] == nin])
# for data in UNIQUE_DFS:
# dates = data['DMDDATE'].apply(fakemap) #Apply function to date column
# df['QTR_FOO'] = QTR_FOO #Write a new column
# UNIQUE_DFS.append(df)
for df in UNIQUE_DFS:
unq = df['QTR_FOO'].unique()
print unq, len(unq)
[1 5 4] 3 [2 1 4 3] 4 [1 6] 2
[len(df['QTR_FOO'].unique()) for df in UNIQUE_DFS]
[3, 4, 2]