import pandas as pd
import datetime as dt
# example dataset
data="""cruiseid year station month day date lat lon depth_w taxon count
AA8704 1987 1 04 13 13-APR-87 35.85 -75.48 18 Centropages_typicus 75343
AA8704 1987 1 04 13 13-APR-87 35.85 -75.48 18 Gastropoda 0
AA8704 1987 1 04 13 13-APR-87 35.85 -75.48 18 Calanus_finmarchicus 2340
AA8704 1987 1 04 13 13-APR-87 35.85 -75.48 18 Pseudocalanus_spp. 53816
AA8704 1987 1 05 13 13-MAY-87 35.85 -75.48 18 Penilia_avirostris 0
AA8704 1987 1 05 13 13-MAY-87 35.85 -75.48 18 Temora_longicornis 86574
AA8704 1987 1 05 13 13-APY-87 35.85 -75.48 18 Centropages_hamatus 88446
AA8704 1987 1 05 13 13-MAY-87 35.85 -75.48 18 Echinodermata 0
AA8704 1987 1 06 13 13-JUN-87 35.85 -75.48 18 Appendicularia 0
AA8704 1987 1 06 13 13-JUN-87 35.85 -75.48 18 Paracalanus_parvus 0
AA8704 1987 1 07 13 13-JUL-87 35.85 -75.48 18 Gastropoda 0
AA8704 1987 1 07 13 13-JUL-87 35.85 -75.48 18 Acartia_spp. 5616
AA8704 1987 1 07 13 13-JUL-87 35.85 -75.48 18 Metridia_lucens 468
AA8704 1987 1 08 13 13-AUG-87 35.85 -75.48 18 Evadne_spp. 0
AA8704 1987 1 08 13 13-AUG-87 35.85 -75.48 18 Salpa 0
AA8704 1987 1 08 13 13-AUG-87 35.85 -75.48 18 Oithona_spp. 468
"""
datafile = open('data.txt','w')
datafile.write(data)
datafile.close()
parse = lambda x: dt.datetime.strptime(x, '%d-%m-%Y')
df = pd.read_csv('data.txt',index_col=0, header=False, parse_dates={"Datetime" : [1,3,4]}, skipinitialspace=True, sep=' ', skiprows=0)
# query the df where taxon is equal to 'Calanus_finmarchicus' or 'Gastropoda'
df[(df.taxon == 'Calanus_finmarchicus') | (df.taxon == 'Gastropoda')]
cruiseid | station | date | lat | lon | depth_w | taxon | count | Unnamed: 11 | |
---|---|---|---|---|---|---|---|---|---|
Datetime | |||||||||
1987-04-13 | AA8704 | 1 | 13-APR-87 | 35.85 | -75.48 | 18 | Gastropoda | 0 | NaN |
1987-04-13 | AA8704 | 1 | 13-APR-87 | 35.85 | -75.48 | 18 | Calanus_finmarchicus | 2340 | NaN |
1987-07-13 | AA8704 | 1 | 13-JUL-87 | 35.85 | -75.48 | 18 | Gastropoda | 0 | NaN |
# how to query the dataframe :
# WHERE dataframe.taxon LIKE 'Calanus_finmarchicus' OR dataframe.taxone LIKE 'Gastropoda' AND dataframe.month LIKE 4
# ?
# in munmpy i can do something like that :
#
import numpy as np
data = np.genfromtxt('data.txt', dtype=[('cruiseid','S6'), ('year','i4'), ('station','i4'), ('month','i4'), ('day','i4'), ('date','S9'), ('lat','f8'), ('lon','f8'), ('depth_w','i8'), ('taxon','S60'), ('count','i8')], skip_header=1)
selection = [np.where((data['taxon']=='Calanus_finmarchicus') | (data['taxon']=='Gastropoda') & ((data['month']==4) | (data['month']==3)))[0]]
data[selection]
array([ ('AA8704', 1987, 1, 4, 13, '13-APR-87', 35.85, -75.48, 18, 'Gastropoda', 0), ('AA8704', 1987, 1, 4, 13, '13-APR-87', 35.85, -75.48, 18, 'Calanus_finmarchicus', 2340)], dtype=[('cruiseid', '|S6'), ('year', '<i4'), ('station', '<i4'), ('month', '<i4'), ('day', '<i4'), ('date', '|S9'), ('lat', '<f8'), ('lon', '<f8'), ('depth_w', '<i8'), ('taxon', '|S60'), ('count', '<i8')])
# how can i do the same in pandas ?
dataset = data[selection]
desired = pd.DataFrame(dataset)
dataset
array([ ('AA8704', 1987, 1, 4, 13, '13-APR-87', 35.85, -75.48, 18, 'Gastropoda', 0), ('AA8704', 1987, 1, 4, 13, '13-APR-87', 35.85, -75.48, 18, 'Calanus_finmarchicus', 2340)], dtype=[('cruiseid', '|S6'), ('year', '<i4'), ('station', '<i4'), ('month', '<i4'), ('day', '<i4'), ('date', '|S9'), ('lat', '<f8'), ('lon', '<f8'), ('depth_w', '<i8'), ('taxon', '|S60'), ('count', '<i8')])
desired # but with index the Datetime
cruiseid | year | station | month | day | date | lat | lon | depth_w | taxon | count | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | AA8704 | 1987 | 1 | 4 | 13 | 13-APR-87 | 35.85 | -75.48 | 18 | Gastropoda | 0 |
1 | AA8704 | 1987 | 1 | 4 | 13 | 13-APR-87 | 35.85 | -75.48 | 18 | Calanus_finmarchicus | 2340 |