SOURCE_FILE = '/Users/libraries/Code/menus-site/data/dishes/fixtures_by_alpha/_nonascii-fixture_data-reshaped_with_items.json' import json with open(SOURCE_FILE, 'r') as infile: OUTLIER_DATA = json.loads(infile.read()) len(OUTLIER_DATA.keys()) import re def starts_with_number(obj): if re.match(r'[0-9]', obj) is not None: return True else: return False NUMBER_OUTLIER_DATA = {k: v for k,v in OUTLIER_DATA.items() if starts_with_number(k) == True} def find_nonnull_appearances(key): resultlist = NUMBER_OUTLIER_DATA[key] filtered_resultlist = [n for n in resultlist if n['times_appeared'] != 0] return (key, filtered_resultlist) APPEARS_1_OR_MORE = {k: v for k,v in NUMBER_OUTLIER_DATA.items() if find_nonnull_appearances(k)[1] != []} len(APPEARS_1_OR_MORE.keys()) DOUBLE_0_START = {k: v for k,v in APPEARS_1_OR_MORE.items() if k.startswith('00')} len(DOUBLE_0_START.keys()) import random TEST_CASES = dict(random.sample(DOUBLE_0_START.items(), 20)) print(TEST_CASES.keys()) TEST_CASES['00 1 crabflakes salad'] IMG_URL = 'http://j2k.repo.nypl.org/adore-djatoka/resolver?url_ver=Z39.88-2004&rft_id=urn:uuid:bcb737e9-be35-80f1-e040-e00a180630eb&svc_id=info:lanl-repo/svc/getRegion&svc_val_fmt=info:ofi/fmt:kev:mtx:jpeg2000&svc.format=image/jpeg&svc.rotate=0&svc.region=2306,1222,438,2085&svc.scale=950,200' from PIL import Image import requests img_req = requests.get(IMG_URL, stream=True) if img_req.status_code == 200: with open('/tmp/menu_data_downloads/menu_slice.jpg', 'wb') as savefile: for chunk in img_req.iter_content(): savefile.write(chunk) im = Image.open('/tmp/menu_data_downloads/menu_slice.jpg') #copied from http://nbviewer.ipython.org/gist/deeplook/5162445 from io import BytesIO from IPython.core import display #from PIL import Image def display_pil_image(im): """Displayhook function for PIL Images, rendered as PNG.""" b = BytesIO() im.save(b, format='png') data = b.getvalue() ip_img = display.Image(data=data, format='png', embed=True) return ip_img._repr_png_() # register display func with PNG formatter: png_formatter = get_ipython().display_formatter.formatters['image/png'] dpi = png_formatter.for_type(Image.Image, display_pil_image) im import os payload = {"token" : os.environ['MENUS_API_KEY']} dish_count = requests.get('http://api.menus.nypl.org/dishes/', params=payload) stats = json.loads(dish_count.content.decode())['stats'] print(stats) DISH = TEST_CASES['00 1 crabflakes salad'][0] print(DISH) import datetime import time target_path = re.split('/', DISH['dish_uri'], maxsplit=3) api_uri = 'http://api.menus.nypl.org/{0}'.format(target_path[-1]) req_t0 = requests.get(api_uri, params=payload) print(api_uri) print(datetime.datetime.now()) print(req_t0.status_code) resp_t0 = json.loads(req_t0.content.decode()) time.sleep(0.5) #And we'll grab the linked menu while we're at it menu_api_uri = api_uri + '/menus' menu_resp_t0 = json.loads(requests.get(menu_api_uri, params=payload).content.decode()) print(json.dumps(resp_t0, indent=2)) target_path = re.split('/', DISH['dish_uri'], maxsplit=3) api_uri = 'http://api.menus.nypl.org/{0}'.format(target_path[-1]) req_t1 = requests.get(api_uri, params=payload) print(api_uri) print(datetime.datetime.now()) print(req_t1.status_code) resp_t1 = json.loads(req_t1.content.decode()) time.sleep(0.5) #And we'll grab the linked menu while we're at it menu_api_uri = api_uri + '/menus' menu_resp_t1 = json.loads(requests.get(menu_api_uri, params=payload).content.decode()) print(json.dumps(resp_t1, indent=2)) from IPython.display import HTML HTML('') NEW_URI = 'http://menus.nypl.org/dishes/289940' target_path = re.split('/', NEW_URI, maxsplit=3) api_uri = 'http://api.menus.nypl.org/{0}'.format(target_path[-1]) req_t2 = requests.get(api_uri, params=payload) print(api_uri) print(datetime.datetime.now()) print(req_t2.status_code) resp_t2 = json.loads(req_t2.content.decode()) time.sleep(0.5) #And we'll grab the linked menu while we're at it menu_api_uri = api_uri + '/menus' menu_resp_t2 = json.loads(requests.get(menu_api_uri, params=payload).content.decode()) print(json.dumps(resp_t2, indent=2)) dish_count_t2 = requests.get('http://api.menus.nypl.org/dishes/', params=payload) stats_t2 = json.loads(dish_count_t2.content.decode())['stats'] print(stats) import pandas as pd !ls /tmp/menu_data_downloads/2014_10_01/ OCT_1_DATA_DF = pd.DataFrame.from_csv('/tmp/menu_data_downloads/2014_10_01/Dish.csv', index_col='id') OCT_1_DATA_DF[OCT_1_DATA_DF.index == 361169] OCT_1_DATA_DF[OCT_1_DATA_DF.index == 289940] from IPython.display import Image embed1 = Image('menu_data_updates1.png') embed1 embed2 = Image('menu_data_updates2.png') embed2