import urllib
html_str = urllib.urlopen("http://www.menupages.com/restaurants/all-areas/morningside-heights/all-cuisines/").read()
from bs4 import BeautifulSoup
import re
document = BeautifulSoup(html_str)
table_tag = document.find("table", attrs={"class": "search-results"})
restaurant_list = []
for tr_tag in table_tag.find_all('tr'):
name_address_td_tag = tr_tag.find('td', attrs={'class': 'name-address'})
# if we can't find a td tag with a name-address class, skip this tr
if name_address_td_tag is None:
continue
name_a_tag = name_address_td_tag.find('a')
rating_td_tag = tr_tag.find('td', attrs={'class': 'rating'})
rating_image_tag = rating_td_tag.find('img')
rating_image_url = rating_image_tag['src']
# find the "number of stars" in the URL for the image tag, we'll use that
# to determine the rating value
rating_val = re.findall(r'[0-9_]+', rating_image_url)[0]
rating_val = rating_val.replace('_', '.')
rating_val = float(rating_val)
price_td_tag = tr_tag.find('td', attrs={'class': 'price'})
review_td_tag = tr_tag.find('td', attrs={'class': 'reviews'})
# .contents is a list of things inside a tag, regardless of whether or
# not it's a string or another tag. (look at how the <a> tag is organized
# in the source to see why this is necessary---we can't just use .string)
restaurant = {
'name': name_a_tag.contents[1],
'rating': rating_val,
'price': len(price_td_tag.string),
'reviews': review_td_tag.string
}
restaurant_list.append(restaurant)
restaurant_list
[{'name': u'Ajanta', 'price': 2, 'rating': 3.0, 'reviews': u'43'}, {'name': u'Amigos', 'price': 3, 'rating': 3.0, 'reviews': u'5'}, {'name': u"Amir's Grill", 'price': 2, 'rating': 3.0, 'reviews': u'38'}, {'name': u'Amsterdam Restaurant', 'price': 4, 'rating': 3.5, 'reviews': u'31'}, {'name': u'Artopolis Espresso Cafe', 'price': 2, 'rating': 4.5, 'reviews': u'4'}, {'name': u'Bettolona', 'price': 3, 'rating': 4.0, 'reviews': u'19'}, {'name': u'Bier International', 'price': 2, 'rating': 4.0, 'reviews': u'5'}, {'name': u"Brad's", 'price': 1, 'rating': 4.0, 'reviews': u'2'}, {'name': u'Cafe Amrita', 'price': 2, 'rating': 3.5, 'reviews': u'34'}, {'name': u'Cafe Nana', 'price': 1, 'rating': 4.5, 'reviews': u'8'}, {'name': u"Camille's", 'price': 2, 'rating': 4.0, 'reviews': u'18'}, {'name': u'Chapati House', 'price': 2, 'rating': 3.0, 'reviews': u'1'}, {'name': u"Che' Bella Pizza", 'price': 2, 'rating': 4.0, 'reviews': u'11'}, {'name': u'China Place', 'price': 2, 'rating': 3.5, 'reviews': u'7'}, {'name': u'Chipotle', 'price': 2, 'rating': 4.0, 'reviews': u'2'}, {'name': u'Chokolat Patisserie', 'price': 1, 'rating': 0.0, 'reviews': u'0'}, {'name': u'Chokolat Patisserie', 'price': 1, 'rating': 0.0, 'reviews': u'0'}, {'name': u'Columbia Cottage (CLOSED)', 'price': 2, 'rating': 3.0, 'reviews': u'37'}, {'name': u'Community Food & Juice', 'price': 4, 'rating': 3.5, 'reviews': u'52'}, {'name': u'Deluxe', 'price': 2, 'rating': 3.0, 'reviews': u'70'}, {'name': u'Dig Inn Seasonal Market', 'price': 2, 'rating': 0.0, 'reviews': u'0'}, {'name': u'Dinosaur Bar-B-Que', 'price': 3, 'rating': 4.0, 'reviews': u'116'}, {'name': u'Domain NYC', 'price': 5, 'rating': 0.0, 'reviews': u'0'}, {'name': u'El Paso Truck', 'price': 1, 'rating': 0.0, 'reviews': u'0'}, {'name': u'El Porton', 'price': 3, 'rating': 4.0, 'reviews': u'10'}, {'name': u'Falafel on Broadway', 'price': 2, 'rating': 5.0, 'reviews': u'1'}, {'name': u'Famous Famiglia', 'price': 2, 'rating': 4.0, 'reviews': u'18'}, {'name': u'Five Guys', 'price': 1, 'rating': 0.0, 'reviews': u'0'}, {'name': u'Flat Top', 'price': 3, 'rating': 0.0, 'reviews': u'0'}, {'name': u'Haagen-Dazs', 'price': 1, 'rating': 0.0, 'reviews': u'0'}, {'name': u"Haakon's Hall", 'price': 3, 'rating': 4.0, 'reviews': u'21'}, {'name': u'Hamilton Deli', 'price': 1, 'rating': 3.5, 'reviews': u'18'}, {'name': u'Havana Central at The West End (CLOSED)', 'price': 3, 'rating': 3.0, 'reviews': u'48'}, {'name': u'Henan Cart', 'price': 1, 'rating': 0.0, 'reviews': u'0'}, {'name': u'Insomnia Cookies', 'price': 1, 'rating': 3.0, 'reviews': u'1'}, {'name': u'Joe the Art of Coffee', 'price': 1, 'rating': 4.5, 'reviews': u'1'}, {'name': u"Joe's G-H Deli", 'price': 1, 'rating': 0.0, 'reviews': u'0'}, {'name': u'Kissaten Jin', 'price': 2, 'rating': 0.0, 'reviews': u'0'}, {'name': u'Kitchenette', 'price': 3, 'rating': 3.0, 'reviews': u'46'}, {'name': u'Koronet Pizza', 'price': 1, 'rating': 4.0, 'reviews': u'5'}, {'name': u'Kuro Kuma', 'price': 1, 'rating': 4.5, 'reviews': u'2'}, {'name': u'Le Monde', 'price': 4, 'rating': 3.0, 'reviews': u'39'}, {'name': u'Levain Bakery', 'price': 1, 'rating': 4.5, 'reviews': u'2'}, {'name': u'M2M - Morning To Midnight', 'price': 1, 'rating': 2.5, 'reviews': u'8'}, {'name': u"Mama's Fried Chicken and Pizza", 'price': 1, 'rating': 0.0, 'reviews': u'0'}, {'name': u'Maoz Vegetarian', 'price': 2, 'rating': 4.5, 'reviews': u'5'}, {'name': u'Massawa', 'price': 3, 'rating': 3.5, 'reviews': u'33'}, {'name': u'Max Caffe', 'price': 2, 'rating': 3.5, 'reviews': u'11'}, {'name': u'Max Soha', 'price': 3, 'rating': 3.5, 'reviews': u'17'}, {'name': u"Mel's Burger Bar", 'price': 3, 'rating': 3.0, 'reviews': u'16'}, {'name': u"Melba's", 'price': 3, 'rating': 3.5, 'reviews': u'35'}, {'name': u'Milano Market', 'price': 2, 'rating': 4.0, 'reviews': u'23'}, {'name': u'New Aroma', 'price': 2, 'rating': 3.5, 'reviews': u'3'}, {'name': u'Nikko', 'price': 2, 'rating': 4.0, 'reviews': u'14'}, {'name': u'Nussbaum & Wu', 'price': 1, 'rating': 3.0, 'reviews': u'36'}, {'name': u"Ollie's", 'price': 2, 'rating': 2.5, 'reviews': u'97'}, {'name': u'Orange Peel (CLOSED)', 'price': 2, 'rating': 4.0, 'reviews': u'2'}, {'name': u"Oren's", 'price': 1, 'rating': 0.0, 'reviews': u'0'}, {'name': u'Panino Sportivo Roma', 'price': 2, 'rating': 3.5, 'reviews': u'19'}, {'name': u'Peking Garden', 'price': 1, 'rating': 3.5, 'reviews': u'14'}, {'name': u'Pinkberry', 'price': 1, 'rating': 4.0, 'reviews': u'3'}, {'name': u'Pisticci', 'price': 3, 'rating': 4.0, 'reviews': u'34'}, {'name': u'Pita Grill', 'price': 3, 'rating': 5.0, 'reviews': u'1'}, {'name': u'Serengeti Teas & Spices', 'price': 1, 'rating': 0.0, 'reviews': u'0'}, {'name': u'Sezz Medi (CLOSED)', 'price': 3, 'rating': 3.5, 'reviews': u'34'}, {'name': u'Silvana', 'price': 2, 'rating': 5.0, 'reviews': u'1'}, {'name': u'Strokos Pizza', 'price': 2, 'rating': 3.0, 'reviews': u'16'}, {'name': u'Subsconscious', 'price': 2, 'rating': 3.0, 'reviews': u'38'}, {'name': u'Sushi Sushi', 'price': 2, 'rating': 4.5, 'reviews': u'3'}, {'name': u'Symposium Greek Restaurant', 'price': 3, 'rating': 3.5, 'reviews': u'19'}, {'name': u'The Heights Bar & Grill', 'price': 3, 'rating': 4.0, 'reviews': u'26'}, {'name': u'The Mill', 'price': 2, 'rating': 3.5, 'reviews': u'52'}, {'name': u'Toast', 'price': 2, 'rating': 4.0, 'reviews': u'26'}, {'name': u'Toast Chicken', 'price': 2, 'rating': 3.0, 'reviews': u'4'}, {'name': u"Tom's Delicious Pizza", 'price': 2, 'rating': 3.5, 'reviews': u'12'}, {'name': u"Tom's Restaurant", 'price': 2, 'rating': 3.0, 'reviews': u'44'}, {'name': u'Uncle Luoyang', 'price': 1, 'rating': 0.0, 'reviews': u'0'}, {'name': u'V & T Pizza', 'price': 3, 'rating': 3.5, 'reviews': u'68'}, {'name': u'Vareli', 'price': 5, 'rating': 3.5, 'reviews': u'16'}, {'name': u'Vegenation', 'price': 1, 'rating': 0.0, 'reviews': u'0'}, {'name': u'Vinateria', 'price': 3, 'rating': 3.5, 'reviews': u'1'}, {'name': u'Vine', 'price': 2, 'rating': 3.0, 'reviews': u'26'}, {'name': u'West Place', 'price': 2, 'rating': 3.5, 'reviews': u'13'}]
import pandas as pd
df = pd.DataFrame(restaurant_list)
df[(df['price']==1) & (df['rating']>=4.0)]
name | price | rating | reviews | |
---|---|---|---|---|
7 | Brad's | 1 | 4.0 | 2 |
9 | Cafe Nana | 1 | 4.5 | 8 |
35 | Joe the Art of Coffee | 1 | 4.5 | 1 |
39 | Koronet Pizza | 1 | 4.0 | 5 |
40 | Kuro Kuma | 1 | 4.5 | 2 |
42 | Levain Bakery | 1 | 4.5 | 2 |
60 | Pinkberry | 1 | 4.0 | 3 |
7 rows × 4 columns