%%file birds.txt robin,kentucky,may 23 robin,west virginia,may 24 seagull,maine,may 24 chickadee,kentucky,june 1 seagull,maine,june 1 # Let's start by reading in each line of the file, one by one. fp = file('birds.txt', 'rb') for line in fp: print line import csv fp = file('birds.txt', 'rb') reader = csv.reader(fp) for bird, state, day in reader: print bird, '/', state, '/', day import csv fp = file('birds.txt', 'rb') reader = csv.reader(fp) n = 0 for bird, state, day in reader: if bird == 'chickadee': n += 1 print 'we saw', n, 'chickadees' # define an empty list birdlist = [] fp = file('birds.txt', 'rb') reader = csv.reader(fp) n = 0 for bird, state, day in reader: birdlist.append(bird) print 'we saw', birdlist.count('chickadee'), 'chickadees' print 'we saw', birdlist.count('robin'), 'robins' # to do this, define functions def read_birdlist(filename): birdlist = [] fp = file('birds.txt', 'rb') reader = csv.reader(fp) n = 0 for bird, state, day in reader: birdlist.append(bird) return birdlist def print_birdcount(birdlist, bird): print 'we saw', birdlist.count(bird), 'of bird type', bird birdlist = read_birdlist('birds.txt') print_birdcount(birdlist, 'robin') print_birdcount(birdlist, 'chickadee') def read_birdlist2(filename): birdlist = [] fp = file(filename, 'rb') reader = csv.reader(fp) n = 0 for bird, state, day in reader: birdlist.append((bird, state, day)) return birdlist birdlist2 = read_birdlist2('birds.txt') print birdlist2 for bird, state, day in birdlist2: print bird, state, day def print_birdcount2(birdlist2, bird_to_count): n = 0 for bird, state, day in birdlist2: if bird == bird_to_count: n += 1 print 'we saw', n, 'of bird type', bird_to_count print_birdcount2(birdlist2, 'robin') def get_entries_by_state(birdlist2, query_state): newlist = [] for bird, state, day in birdlist2: if state == query_state: newlist.append((bird, state, day)) return newlist get_entries_by_state(birdlist2, 'kentucky') kentucky_birds = get_entries_by_state(birdlist2, 'kentucky') print_birdcount2(kentucky_birds, 'chickadee') bigbirdlist = read_birdlist2('long-birds.csv') print len(bigbirdlist) print bigbirdlist[:10] birdlist2 = read_birdlist2('birds.txt') for bird, state, day in birdlist2: print_birdcount2(birdlist2, bird) bird_types = set() for bird, state, day in birdlist2: bird_types.add(bird) print bird_types # now we can just look at the unique set of birds for bird_type in bird_types: print_birdcount2(birdlist2, bird_type) d = {} d['robin'] = 1 d['seagull'] = 2 print d['robin'] def make_birddict(birdlist2): # first, get the unique set of birds bird_types = set() for bird, state, day in birdlist2: bird_types.add(bird) # then, copy the code from print_birdcount2: d = {} for bird_type in bird_types: n = 0 for bird, state, day in birdlist2: if bird == bird_type: n += 1 d[bird_type] = n return d # tada! print make_birddict(birdlist2) def make_birddict2(birdlist2): # first, get the unique set of birds bird_types = set() for bird, state, day in birdlist2: bird_types.add(bird) # then, set the count for each bird to 0 d = {} for bird in bird_types: d[bird] = 0 # now, go through all of the entries in birdlist2 and, for each one, increment the count in the dictionary for bird, state, day in birdlist2: d[bird] = d[bird] + 1 return d make_birddict2(birdlist2) from datetime import datetime # strptime will convert strings into dates given the appropriate formatting string (see docs above) print datetime.strptime('may 21', '%B %d') # we should fix that year... print datetime.strptime('may 21' + ' 2013', '%B %d %Y') # ok -- and now we need to convert back to a straight up number. # If all the dates are 2013, we can just ask for day of year... date = datetime.strptime('may 21' + ' 2013', '%B %d %Y') day_of_year = date.strftime('%j') print day_of_year # hmm, does that actually work!? for bird, state, day in birdlist2: date = datetime.strptime(day + ' 2013', '%B %d %Y') day_of_year = date.strftime('%j') print day, date, day_of_year birdcount_by_day = {} for bird, state, day in birdlist2: birdcount_by_day[day] = 0 for bird, state, day in birdlist2: birdcount_by_day[day] = birdcount_by_day[day] + 1 print birdcount_by_day plotme_x = [] plotme_y = [] for day in birdcount_by_day: # note, iterating over dictionaries gives you keys date = datetime.strptime(day + ' 2013', '%B %d %Y') day_of_year = date.strftime('%j') # trick: we need to convert day_of_year into an integer day_of_year = int(day_of_year) # retrieve birdcount count = birdcount_by_day[day] # now add day_of_year and birdcount plotme_x.append(day_of_year) plotme_y.append(count) print plotme_x print plotme_y plot(plotme_x, plotme_y, 'ro') plot(plotme_x, plotme_y, 'ro') axis(ymin=0, xmin=140, xmax=160) # if we plot with lines, we see that because the points aren't sorted, we don't get what we expect. plot(plotme_x, plotme_y, 'r-') axis(ymin=0, xmin=140, xmax=160) # how do we fix this? one way is to zip the two lists together, sort them, and pull them back out... plotme_all = zip(plotme_x, plotme_y) plotme_all.sort() plotme_x = [] plotme_y = [] for (x, y) in plotme_all: plotme_x.append(x) plotme_y.append(y) print plotme_x # tada... plot(plotme_x, plotme_y, 'r-') axis(ymin=0, xmin=140, xmax=160) bigbirdcount_by_day = {} for bird, state, day in bigbirdlist: bigbirdcount_by_day[day] = 0 for bird, state, day in bigbirdlist: bigbirdcount_by_day[day] = bigbirdcount_by_day[day] + 1 plotme = [] for day in bigbirdcount_by_day: # note, iterating over dictionaries gives you keys date = datetime.strptime(day + ' 2013', '%B %d %Y') day_of_year = date.strftime('%j') # trick: we need to convert day_of_year into an integer day_of_year = int(day_of_year) # retrieve birdcount count = bigbirdcount_by_day[day] # now add day_of_year and birdcount plotme.append((day_of_year, count)) plotme.sort() print plotme[:5] plotme_x = [] plotme_y = [] for (x, y) in plotme: plotme_x.append(x) plotme_y.append(y) plot(plotme_x, plotme_y, 'r-') birdlist = read_birdlist2('long-birds.csv') bird_dict = make_birddict2(birdlist) labels=[] counts=[] nbirds = len(bird_dict) for bird,count in bird_dict.iteritems(): labels.append(bird) counts.append(count) fig = plt.figure() ax = fig.add_subplot(1,1,1) xlocations = numpy.arange(nbirds) print xlocations width = 0.6 bars = ax.bar(xlocations, counts, width=width) xtickNames = ax.set_xticklabels(labels) ax.set_xticks(xlocations-0.1) plt.setp(xtickNames, rotation=45) plt.show() ax.set_title("Bird counts") # start by picking out the birds we want to save: birdlist2 = read_birdlist2('birds.txt') print birdlist2 kentucky_birds = [] for bird, state, date in birdlist2: if state == 'kentucky': kentucky_birds.append((bird, date)) print kentucky_birds import csv # now, open a file for writing, and use 'csv.writer' to write rows to it. # just for grins, use tab for a delimiter, instead of a comma. fp = file('kentucky-birds.tsv', 'wb') w = csv.writer(fp, delimiter='\t') # <- use tab instead of the default, which is a comma for bird, date in kentucky_birds: row = (bird, date) w.writerow(row) fp.close() # check it out -- did it work? print open('kentucky-birds.tsv').read() %%file bird-weights.txt robin,5 seagull,12 chickadee,4 # first, load in the bird weights fp = file('bird-weights.txt', 'rb') r = csv.reader(fp) weight_dict = {} for bird, weight in r: weight_dict[bird] = weight # now, go through all of the birdlist2 entries birdlist2 = read_birdlist2('birds.txt') new_birdlist = [] for bird, state, day in birdlist2: weight = weight_dict[bird] new_birdlist.append((bird,state,day,weight)) print new_birdlist # what if one of the birds doesn't have a weight in the file, though? del weight_dict['robin'] new_birdlist = [] for bird, state, day in birdlist2: weight = weight_dict[bird] new_birdlist.append((bird,state,day,weight)) print new_birdlist # change the code to be aware of potentially missing data: new_birdlist = [] for bird, state, day in birdlist2: weight = weight_dict.get(bird, -1) # use '-1' or something patently absurd if no weight in table new_birdlist.append((bird,state,day,weight)) print new_birdlist # and now you can output to a file, or whatever.