import urllib
import urllib2
from cookielib import CookieJar
login_url = 'http://www.cc98.org/login.asp'
reply_url = 'http://www.cc98.org/SaveReAnnounce.asp'
# password文件中保存了我的密码
with open('password') as f:
password = f.read().strip()
# 保存了Cookie的对象
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(CookieJar()))
# 用opener登录, Cookie被保存下来
opener.open(login_url, urllib.urlencode(dict(
username='answeror',
password=password,
action='chk'
)))
# 使用保存了用户凭据的opener访问页面
data = opener.open(urllib2.Request(
reply_url,
headers={'Referer': 'http://www.cc98.org'},
data=urllib.urlencode(dict(
method='fastreply',
BoardID='509',
RootID='4222721',
followup='702696700',
UserName='answeror',
passwd='60f6b1a2e519ece3',
star='1',
signflag='yes',
Expression='face7.gif',
Content='自动回帖测试'
))
)).read()
print data
<html> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> <link rel="alternate" type="application/rss+xml" title="察看新帖" href="boardrss.asp" /> <link rel="alternate" type="application/rss+xml" title="热门话题" href="rss.asp" /> <title>回复帖子成功 » CC98论坛</title> <link rel="shortcut icon" href="favicon.ico"> <link rel="icon" href="favicon.ico"> <link rel="stylesheet" href="inc/style.css" type="text/css"> <script type="text/javascript"> var currentUserID = 401578; var currentBoardID = 509; var bannerPath = 'banner/'; var bannerInfo = {"t":[{"p":"5172104394.gif","i":"651"},{"p":"5172113093.gif","i":"652"},{"p":"5172115259.gif","i":"653"}],"b":[{"p":"10172365833.jpg","i":"632"},{"p":"32010553856.gif","i":"646"},{"p":"32023391310.jpg","i":"648"},{"p":"32023405085.gif","i":"649"}]}; </script> <script src="js/mootools-core-1.4.5-full-compat.js"></script> <script type="text/javascript" src="js/common.js"></script> <script type="text/javascript" src="js/ccdialog.js"></script> <script type="text/javascript" src="js/boardquery.js"></script> </head> <body topmargin=0 leftmargin=0> <!-- 菜单 div --> <div id="menuDiv" style='Z-INDEX: 2; display: none; WIDTH: 1px; POSITION: absolute; HEIGHT: 1px; BACKGROUND-COLOR: #9cc5f8'></div> <!-- 头部table --> <table cellspacing="0" cellpadding="0" align="center" style="border: 1px #6595D6 solid; border-top-width: 0px; border-right-width: 1px; border-bottom-width: 0px; border-left-width: 1px; width: 97%;"> <tr> <td width="100%"> <!-- 真正的开始表格 外面那个是用来限制的 --> <table width="100%" align="center" border="0" cellspacing="0" cellpadding="0"> <!-- ==============第一行横杠====================== --> <tr> <td class="TopDarkNav" height="9"></td> </tr> <!-- ==============logo和帮助====================== --> <tr> <td height="70" class="TopLighNav2"> <table border="0" width="100%" align="center"> <tr> <!-- 小logo --> <td align="left" width="25%"> <a href="http://www.cc98.org"> <img border="0" src="images/logo/logo.gif"></a> </td> <!-- 大logo --> <td align="center" width="65%"> <div class="advImg" id="advImg"><a id="advLink" href="#" target="_blank"></a></div> <script language="javascript"> showadvs(1); </script> </td> <!-- 帮助 --> <td align="right" style="line-height: 15pt" width="10%"> <a href="#"><span style="CURSOR: hand" onclick="window.external.AddFavorite('http://www.cc98.org', 'www.cc98.org')">加入收藏</span></a><br> <a href="#"><span style="cursor: pointer" onclick="this.style.behavior='url(#default#homepage)';this.setHomePage('http://www.cc98.org');">设为首页</span></a><br> <a href="#"><span style="cursor: pointer" class="boardQueryLink" id="boardQueryLink">搜索版面</span></a><br> <!--<a href="boardhelp.asp">论坛帮助</a>--> </td> </tr> </table> </td> </tr> <!-- ==============第二行横杠====================== --> <tr> <td class="TopLighNav" height="9"></td> </tr> <!-- ============== 菜单 ====================== --> <tr> <td class="TopLighNav1" height="22" valign="top"> <div style="position: relative; margin-top: 2px"> <div style="position: absolute; width: 960px; top: 0px;"> <b>answeror</b> <a href="#" onmouseout='DelayHideMenu();' onmouseover='ShowMenu(this,logon,100);'>账号选项</a> <img src="pic/navspacer.gif" align="absmiddle"> <a href="usermanager.asp" onmouseout='DelayHideMenu();' onmouseover='ShowMenu(this,manage2,100);'>控制面板</a> <!--<img src=pic/navspacer.gif align=absmiddle> <a href="guidboard.asp" onmouseout='DelayHideMenu()' onMouseOver='ShowMenu(this,hotboard,100)'>热门版面</a>--> <img src="pic/navspacer.gif" align="absmiddle"> <a href="query.asp?boardid=509" onmouseout='DelayHideMenu();' onmouseover='ShowMenu(this,Search,100);'>论坛搜索</a> <img src="pic/navspacer.gif" align="absmiddle"> <!--<a href="#" onmouseout='DelayHideMenu()' onMouseOver='ShowMenu(this,stylelist,100)'>自选风格</a> <img src=pic/navspacer.gif align=absmiddle>--> <a href="boardstat.asp?boardid=509" onmouseout='DelayHideMenu();' onmouseover='ShowMenu(this,boardstat,100);'>论坛状态</a> <!-- <img src=pic/navspacer.gif align=absmiddle> <a href="show.asp?boardid=509" onmouseout='DelayHideMenu()' onMouseOver='ShowMenu(this,downlist,100)'>论坛展区</a> --> <!--<img src=pic/navspacer.gif align=absmiddle> <a href="http://10.71.45.100/newblog" onmouseout='DelayHideMenu()' onMouseOver='ShowMenu(this,blog,100)' target="_blank">98blog</a> <img src=pic/navspacer.gif align=absmiddle> <a href="music.asp" onmouseout='DelayHideMenu()' onMouseOver='ShowMenu(this,music,100)'>点歌</a>--> <img src="pic/navspacer.gif" align="absmiddle"> <a onmouseout='DelayHideMenu();this.style.cursor="auto";' onmouseover='ShowMenu(this,addon,100);this.style.cursor="pointer";' target="_blank">论坛附件</a> <img src="pic/navspacer.gif" align="absmiddle"> <a href="boardhelp.asp" onmouseout='DelayHideMenu();' onmouseover='ShowMenu(this,help,100);'>论坛帮助</a> <img src="pic/navspacer.gif" align="absmiddle"> <a href="javascript:;" onclick="changeSimple(1)">Simple版</a> <!-- <img src="pic/navspacer.gif" align="absmiddle"> <a href="http://bbs.cc98.org/" title="Alpha 就是比 Beta 更烂的...">2.0 Alpha</a> --> </div> <div id="multiLogin"></div> </div> </td> </tr> </table> <!-- 最外层 --> </td> </tr> </table> <table cellspacing="1" cellpadding="3" align="center" border="0" width="97%"> <tr> <td width="65%"></td> <td width="35%" align="right"> <img src="pic/msg_no_new_bar.gif"> <a href="usersms.asp?action=inbox">我的收件箱</a> (<span style="color: gray;">0 新</span>) </td> </tr> </table> <table cellspacing="1" cellpadding="3" align="center" class="tableBorder2"> <tr> <td height="25" valign="middle"> <img src="pic/Forum_nav.gif" align="absmiddle"> <a href="index.asp">www.cc98.org</a> → <a href=list.asp?boardid=35>院系交流</a> → <a href=list.asp?boardid=368>计算机科学与技术学院</a> → <a href=list.asp?boardid=509&page=1>数字媒体技术</a> → 回复帖子成功 <a name="top"></a> </td> </tr> </table> <br> <meta http-equiv=refresh content="3;URL=dispbbs.asp?boardid=509&id=4222721&star=1#bottom"> <table cellpadding="3" cellspacing="1" align="center" class="tableborder1"> <tr align="center"> <th width="100%">状态:回复帖子成功 </th> </tr> <tr> <td width="100%" class="tablebody1">本页面将在3秒后自动返回您所发表的帖子,<b>您可以选择以下操作:</b><br /> <ul> <li><a href="index.asp">返回首页</a></li> <li><a href="list.asp?boardid=509">数字媒体技术</a></li> <li><a href="dispbbs.asp?boardid=509&id=4222721&star=1#bottom">您所发表的帖子</a></li> </ul> </td> </tr> </table> <div id="copyright">Copyright © 2003-2013 CC98 Network Association. Email: contact@cc98.org</div> </body> </html> <!--end:132.813毫秒-->
上面发送给服务器的数据中, 除了Content, 其它都是通过阅读页面的HTML源码, 手工找出来的.
HTTP头中的referer域指HTTP请求是从哪个地址发过来的. 一般用来防盗链. 这里CC98可能是用它来防止其它网站用JS来往CC98发帖.
下面我们根据帖子链接自动提取出要发给服务器的信息:
def passwd(url):
soup = bs4.BeautifulSoup(opener.open(url).read(), 'lxml')
return soup.find('input', attrs={'name': 'passwd'})['value']
print passwd('http://www.cc98.org/dispbbs.asp?boardID=509&ID=4222721&page=1')
60f6b1a2e519ece3
更多信息:
import re
def info(url):
soup = bs4.BeautifulSoup(opener.open(url).read(), 'lxml')
keys = ('RootID', 'followup', 'UserName', 'passwd', 'star', 'signflag')
d = {key: soup.find(attrs={'name': key})['value'] for key in keys}
d['method'] = 'fastreply'
d['Expression'] = 'face7.gif'
d['BoardID'] = re.search(r'BoardID=(\d+)', soup.find(attrs={'name': 'frmAnnounce'})['action']).group(1)
return d
print info('http://www.cc98.org/dispbbs.asp?boardID=509&ID=4222721&page=1')
{'followup': '702696700', 'RootID': '4222721', 'star': '1', 'passwd': '60f6b1a2e519ece3', 'BoardID': '509', 'UserName': 'answeror', 'signflag': 'yes', 'Expression': 'face7.gif', 'method': 'fastreply'}
上面的'lxml'参数是让BeautifulSoup使用lxml解析器, 该解析器具有更好的容错能力. 因为CC98的HTML并不标准, 不能用普通解析器直接解析. 详见这里.
提示: 更多用法参见BeautifulSoup文档.
把上面的代码整合起来, 并且判断发帖是否成功:
def reply(url, content):
d = info(url)
d['Content'] = content
data = opener.open(urllib2.Request(
reply_url,
headers={'Referer': 'http://www.cc98.org'},
data=urllib.urlencode(d)
)).read()
return re.search('成功', data) is not None
print reply('http://www.cc98.org/dispbbs.asp?boardID=509&ID=4222721&page=1', '自动回帖测试')
True
用一个类包装起来, 方便以后使用:
import re
import bs4
import urllib
import urllib2
from cookielib import CookieJar
login_url = 'http://www.cc98.org/login.asp'
reply_url = 'http://www.cc98.org/SaveReAnnounce.asp'
class Replyer(object):
def __init__(self, username, password):
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(CookieJar()))
self.opener.open(login_url, urllib.urlencode(dict(
username=username,
password=password,
action='chk'
)))
def info(self, url):
soup = bs4.BeautifulSoup(self.opener.open(url).read(), 'lxml')
keys = ('RootID', 'followup', 'UserName', 'passwd', 'star', 'signflag')
d = {key: soup.find(attrs={'name': key})['value'] for key in keys}
d['method'] = 'fastreply'
d['Expression'] = 'face7.gif'
d['BoardID'] = re.search(r'BoardID=(\d+)', soup.find(attrs={'name': 'frmAnnounce'})['action']).group(1)
return d
def reply(self, url, content):
d = self.info(url)
d['Content'] = content
data = self.opener.open(urllib2.Request(
reply_url,
headers={'Referer': 'http://www.cc98.org'},
data=urllib.urlencode(d)
)).read()
return re.search('成功', data) is not None
with open('password') as f:
password = f.read().strip()
r = Replyer('answeror', password)
print r.reply('http://www.cc98.org/dispbbs.asp?boardID=509&ID=4222721&page=1', '自动回帖测试')
True