import urllib html_str = urllib.urlopen("http://static.decontextualize.com/widgets.html").read() print len(html_str) from bs4 import BeautifulSoup # your code here document = BeautifulSoup(html_str) len(document.find_all('h3')) widget_names = [] widget_names = [tag.string for tag in document.find_all('td', attrs={"class": "widgetname"})] widget_names widgets = [] html_str = urllib.urlopen("http://static.decontextualize.com/widgets.html").read() # your code here document = BeautifulSoup(html_str) tr_tags = document.find_all("tr", attrs={'class': 'widgetinfo'}) for tr_tag in tr_tags: widget_dict = {} for class_ in ("partno", "widgetname", "price", "quantity"): tag = tr_tag.find("td", attrs={'class': class_}) widget_dict[class_] = tag.string widgets.append(widget_dict) # end your code widgets widgets = [] html_str = urllib.urlopen("http://static.decontextualize.com/widgets.html").read() # your code here document = BeautifulSoup(html_str) tr_tags = document.find_all("tr", attrs={'class': 'widgetinfo'}) for tr_tag in tr_tags: widget_dict = {} for class_ops in (("partno", str), ("widgetname", str), ("price", lambda x: float(x[1:])), ("quantity", int)): tag = tr_tag.find("td", attrs={'class': class_ops[0]}) widget_dict[class_ops[0]] = class_ops[1](tag.string) widgets.append(widget_dict) # end your code widgets import pandas as pd widgets_df = pd.DataFrame(widgets) widgets_df widgets_df['quantity'].sum() widgets_df[widgets_df['price'] > widgets_df['price'].mean()] example_html = """

Camembert

A soft cheese made in the Camembert region of France.

Cheddar

A yellow cheese made in the Cheddar region of... France, probably, idk whatevs.

""" example_doc = BeautifulSoup(example_html) cheese_dict = {} for h2_tag in example_doc.find_all('h2'): cheese_name = h2_tag.string cheese_desc_tag = h2_tag.find_next_sibling('p') cheese_dict[cheese_name] = cheese_desc_tag.string cheese_dict hallowed_h3_tag = document.find_all('h3')[2] table_tag = hallowed_h3_tag.find_next_sibling('table') len(table_tag.find_all('tr')) category_counts = {} # your code here for h3_tag in document.find_all('h3'): table_tag = h3_tag.find_next_sibling('table') category_counts[h3_tag.string] = len(table_tag.find_all('tr')) category_counts