#!/usr/bin/env python
# coding: utf-8

# In[4]:


# pip install beautifulsoup4.
from bs4 import BeautifulSoup


# In[5]:


#pip install urllib3 
#This library helps in downloading data
import urllib.request  


# # Request Data from URL

# In[6]:


r = urllib.request.urlopen('http://www.yelp.ca/search?find_loc=Calgary,+AB&cflt=homeservices').read()


# In[28]:


#Using Beautiful Soup Library to parse the data
soup = BeautifulSoup(r, "lxml")
type(soup)


# In[52]:


#We find the number of chracters in data downloaded
len(str(soup.prettify()))


# In[53]:


#We convert the data to a string format using str. 
#Note in R we use str for structure, but in Python we use str to convert to charachter ( like as.charachter or paste command would do in R)
a=str(soup.prettify()) 


# In[57]:


# We try and find location of a particular tag we are interested in. 
#Note we are using triple quotes to escape scpecial charachters
a.find('''class="snippet"''')


# In[58]:


a[352000:358000]


# In[21]:


#Lets try and find the list of phone numbers. We note both the HTNL tag and the class for  it.
# We use the find_all function 
letters = soup.find_all("span", class_="biz-phone")
letters[1:100]


# In[22]:


#Lets try and see the feedback given by users.

letters2 = soup.find_all("p", class_="snippet")
letters2[1:100]


# In[23]:


type(letters2)


# In[24]:


str(letters2)[1:1000]


# In[25]:


str(letters2).count("service")