import requests
from BeautifulSoup import BeautifulSoup
import HTMLParser


res = requests.get("http://bryannotes.blogspot.tw/")


soup = BeautifulSoup(res.text.encode("utf-8"))


bid_table = soup.findAll('h3',{'class':'post-title entry-title'})

print bid_table[1].findAll('a',{'href':True})

bid_file = open("blog_links.txt",'w')

for link in bid_table:
    links = str([tag['href'] for tag in link.findAll('a',{'href':True})])[3:-2]
    bid_file.write(links+"\n")
    print links
bid_file.close()
    

bid_list = open('blog_links.txt','r')
h = HTMLParser.HTMLParser()
blog = {}
for line in bid_list.readlines():
    pagelink = line.strip()
    request_get = requests.get(pagelink)
    soup_post = BeautifulSoup(request_get.text.encode("utf-8"))
    body = h.unescape(soup_post.find("div",{'class':'post-body entry-content'}).text)
    title = h.unescape(soup_post.find("h3",{'class':'post-title entry-title'}).text)
    blog[title] = body
    

for key in blog:
    print key,
    print len(blog[key])

# coding=UTF-8
f = open("C:\\blog_text.txt","w")

for key in blog:
    f.write(key.encode('utf-8')+",")
    f.write(blog[key].encode('utf-8')+"\n")
f.close()