使用Python机器人自动提交HDU OJ题目并验证是否正确
整天胡思乱想乱想乱想就想做。前面明明还有那么多东西没做完,做完这个之后必须把前面的想法做完再去做新东西不然就剁手剁手剁手立贴为证::>_<::::>_<::::>_<::
这个刷题机器人是这阶段最后一个要做的做完这个之后先把前面准备做但没做完的完成不然就剁手!
要做刷题机器人,当然是机器人自己找代码去提交。暂时又没有智能到自己做题的机器人,只能网上找代码了。跟以前一样,python来找代码。
最开始的想法是:张浩斌老师维护了一个公众号hustoj,可以提供各大OJ题解,我打算用C#写模拟点击的程序,利用微信电脑端给公众号发消息获取代码的。有这想法是因为我以为张浩斌老师有各大oj的代码数据库,可是后来发现并不是这样的。所以就自己去抓代码吧。
找来找去有一个特别适合抓代码的网站,这个网站本来就收集了各大牛博客贴的题解,我再抓来放到自己数据库就OK了(放到数据库是因为打算用数据库跟提交代码的机器人中转通信,为以后写vjudge做准备练手用)。
做起来挺快的,也感谢学校老师借给我一台windows服务器用,可以放心的把代码放到服务器跑了(自己的aliyun Linux服务器经常跑着跑着就退出了,不知道是我写的代码有问题还是我没配置好可是代码在windows跑就没问题)
半个多小时已经抓到ID 1700了,有很多题目是没有题解的,看看电影到凌晨两三点估计就能抓完了。
抓取代码如下:
import HTMLParser
import urlparse
import urllib
import urllib2
from urllib2 import Request, urlopen, URLError, HTTPError
import cookielib
import bs4
from bs4 import BeautifulSoup
import MySQLdb
import sys
import datetime
url = 'http://accepted.com.cn/hdoj'
opener = urllib2.build_opener()
for i in range(1224, 5567):
print 'ID', i , ':'
request = Request(url + str(i))
try:
response = opener.open(request)
except URLError, e:
if hasattr(e, 'code'):
print 'Error code: ', e.code
source = 'none'
elif hasattr(e, 'reason'):
print 'We failed to reach a server.'
print 'Reason: ', e.reason
source = 'none'
else:
soup = BeautifulSoup(response, 'html.parser')
a_target = soup.find_all(class_='crayon-plain-wrap')
if a_target != []:
source = a_target[0].text.encode("utf-8")
print 'get code success'
else:
source = 'none'
print 'no code'
time = datetime.datetime.now()
conn = MySQLdb.connect(host='yourdatabaseserver',user='',passwd='',db='',port=)
cur = conn.cursor()
cur.execute('set names utf8')
sql = ( 'insert into `oj` (`oj_name`, `problem_id`,`source`, `if_submit`, `success`, `time`)'
'values (%s,%s,%s,%s,%s,%s)',('hdu', int(i), source, '0', '0', time))
cur.execute(*sql)
conn.commit()
cur.close()
conn.close()
放一张勤劳工作图
没找到好看的电影,处理了一下开多任务同时抓不到半个小时就把能抓到的都抓了。
又一会会二又把提交的代码写出来了,如下:
import HTMLParser
import urlparse
import urllib
import urllib2
from urllib2 import Request, urlopen, URLError, HTTPError
import cookielib
import string
import bs4
from bs4 import BeautifulSoup
import xml.dom.minidom
import re
import os
import PIL
import StringIO
from PIL import Image
from PIL import ImageEnhance
from PIL import ImageFilter
import glob
import pytesser
from pytesser import*
import sys
import time
import MySQLdb
login_url = 'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0¬ice=0'
que_url = 'http://acm.hdu.edu.cn/submit.php?action=submit'
headers = {
'Host':'acm.hdu.edu.cn',
'Origin':'http://acm.hdu.edu.cn',
'Referer':'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0¬ice=0',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language':'en,zh;q=0.8,zh-CN;q=0.6',
'Connection':'keep-alive',
'Cache-Control':'max-age=0',
'Content-Type':'application/x-www-form-urlencoded',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36',
}
cookies = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(cookies)
account = {
'username':'vividtest',
'userpass': ''
}
data = urllib.urlencode(account)
for i in range(1000, 5566):
conn = MySQLdb.connect(host='',user='',passwd='',db='',port=)
cur = conn.cursor()
cur.execute('set names utf8')
sql = "SELECT `source` FROM `oj` WHERE `problem_id`='" + str(i) + "'"
cur.execute(sql)
ans_code = cur.fetchone()[0]
conn.close()
if 'none' == ans_code:
continue
else:
request = urllib2.Request(login_url, data, headers)
try:
response = opener.open(request)
except URLError, e:
if hasattr(e, 'code'):
print 'Error code: ', e.code
elif hasattr(e, 'reason'):
print 'Failed to reach a server.'
print 'Reason: ', e.reason
else:
#print response.read()
values = {
'language':0,
'usercode':ans_code,
'problemid':i
}
data = urllib.urlencode(values)
request = urllib2.Request(que_url, data, headers)
response = opener.open(request)
print response.read()
time.sleep(10)
为了尽量避免给杭电OJ的管理员们带来麻烦,代码休眠十秒提交一次,这是在测试,等开始正式刷题之后我会随机休眠1-10分钟提交一次。
现在已经可以去攻占 杭电OJ 了。
BUT,THINK HIGHTer:
干脆把获取题目评测状态一起写了吧,写完这个功能就可以整合到oj做vjudge了。
写完了,发代码:
import HTMLParser
import urllib
import urllib2
from urllib2 import Request, urlopen, URLError, HTTPError
import cookielib
import bs4
from bs4 import BeautifulSoup
import sys
import time
import MySQLdb
import random
def getStatus(status):
if (status == 'Accepted'):
return 1
elif (status=='Queuing')or(status=='Compiling')or(status=='Running'):
return 2
else:
return 0
login_url = 'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0¬ice=0'
submit_url = 'http://acm.hdu.edu.cn/submit.php?action=submit'
status_url = 'http://acm.hdu.edu.cn/status.php?user=vividtest&pid='
headers = {
'Host':'acm.hdu.edu.cn',
'Origin':'http://acm.hdu.edu.cn',
'Referer':'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0¬ice=0',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language':'en,zh;q=0.8,zh-CN;q=0.6',
'Connection':'keep-alive',
'Cache-Control':'max-age=0',
'Content-Type':'application/x-www-form-urlencoded',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36',
}
cookies = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(cookies)
account = {
'username':'vividtest',
'userpass': ''
}
data = urllib.urlencode(account)
for i in range(1000, 5567):
print 'ID', i
conn = MySQLdb.connect(host='',user='',passwd='',db='',port=)
cur = conn.cursor()
cur.execute('set names utf8')
sql = "SELECT `source` FROM `oj` WHERE `problem_id`='" + str(i) + "'"
cur.execute(sql)
ans_code = cur.fetchone()[0]
cur.close()
conn.close()
if 'none' == ans_code:
print 'none code'
continue
else:
request = urllib2.Request(login_url, data, headers)
try:
response = opener.open(request)
except URLError, e:
if hasattr(e, 'code'):
print 'Error code: ', e.code
source = 'none'
elif hasattr(e, 'reason'):
print 'We failed to reach a server.'
print 'Reason: ', e.reason
source = 'none'
else:
#print response.read()
values = {
'language':0,
'usercode':ans_code,
'problemid':i
}
data = urllib.urlencode(values)
request = urllib2.Request(submit_url,data, headers)
try:
response = opener.open(request)
except URLError, e:
print 'submit failed'
else:
print 'submit success'
time.sleep(1)
request = urllib2.Request(status_url + str(i), None, headers)
try:
response = opener.open(request)
except URLError, e:
print 'get status failed'
else:
status_code = 2
while 2 == status_code:
try:
response = opener.open(request)
except URLError, e:
print 'get status failed---once,I will again'
continue
else:
soup = BeautifulSoup(response, 'html.parser')
table = soup.find_all('tr')
status = table[7].font.text
print status
status_code = getStatus(status)
time.sleep(1)
conn = MySQLdb.connect(host='',user='',passwd='',db='',port=)
cur = conn.cursor()
cur.execute('set names utf8')
sql = ("UPDATE oj SET `if_submit`=1,`success`=%s WHERE `problem_id`='%s'",(status_code, i))
cur.execute(*sql)
conn.commit()
cur.close()
conn.close()
print 'judge complete, statue: ' + status
random_time = random.randint(60,600)
print "I'm sleeping, I will be back in", random_time ,"s"
time.sleep(random_time)
Server运行效果图一张:
又做修改,加入获取不到状态时异常处理,加入编译错误信息记录
import HTMLParser
import urllib
import urllib2
from urllib2 import Request, urlopen, URLError, HTTPError
import cookielib
import bs4
from bs4 import BeautifulSoup
import sys
import time
import MySQLdb
import random
def getStatus(status):
if (status == 'Accepted'):
return 1
elif (status=='Queuing')or(status=='Compiling')or(status=='Running'):
return 2
elif (status=='Compilation Error'):
return 3
else:
return 0
login_url = 'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0¬ice=0'
submit_url = 'http://acm.hdu.edu.cn/submit.php?action=submit'
status_url = 'http://acm.hdu.edu.cn/status.php?user=vividtest&pid='
headers = {
'Host':'acm.hdu.edu.cn',
'Origin':'http://acm.hdu.edu.cn',
'Referer':'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0¬ice=0',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language':'en,zh;q=0.8,zh-CN;q=0.6',
'Connection':'keep-alive',
'Cache-Control':'max-age=0',
'Content-Type':'application/x-www-form-urlencoded',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36',
}
cookies = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(cookies)
account = {
'username':'vividtest',
'userpass': ''
}
data = urllib.urlencode(account)
for i in range(1053, 5567):
run_error = 0
print 'ID', i
conn = MySQLdb.connect(host='',user='',passwd='',db='',port=)
cur = conn.cursor()
cur.execute('set names utf8')
sql = "SELECT `source` FROM `oj` WHERE `problem_id`='" + str(i) + "'"
cur.execute(sql)
ans_code = cur.fetchone()[0]
cur.close()
conn.close()
if 'none' == ans_code:
print 'none code'
continue
else:
request = urllib2.Request(login_url, data, headers)
try:
response = opener.open(request)
except URLError, e:
if hasattr(e, 'code'):
print 'Error code: ', e.code
source = 'none'
elif hasattr(e, 'reason'):
print 'We failed to reach a server.'
print 'Reason: ', e.reason
source = 'none'
else:
#print response.read()
values = {
'language':0,
'usercode':ans_code,
'problemid':i
}
data = urllib.urlencode(values)
request = urllib2.Request(submit_url,data, headers)
try:
response = opener.open(request)
except URLError, e:
print 'submit failed'
else:
print 'submit success'
time.sleep(1)
request = urllib2.Request(status_url + str(i), None, headers)
try:
response = opener.open(request)
except URLError, e:
print 'get status failed'
else:
status_code = 2
while 2 == status_code:
try:
response = opener.open(request)
except URLError, e:
print 'get status failed---once,I will again'
continue
else:
soup = BeautifulSoup(response, 'html.parser')
table = soup.find_all('tr')
try:
status = table[7].font.text
except:
print 'Error I acn not handle'
run_error = 1
else:
print status
status_code = getStatus(status)
time.sleep(1)
if run_error:
break
if run_error:
break
if run_error:
continue
conn = MySQLdb.connect(host='',user='',passwd='',db='',port=)
cur = conn.cursor()
cur.execute('set names utf8')
sql = ("UPDATE oj SET `if_submit`=1,`success`=%s WHERE `problem_id`='%s'",(status_code, i))
cur.execute(*sql)
conn.commit()
cur.close()
conn.close()
print 'judge complete, statue: ' + status
random_time = random.randint(10,60)
print "I'm sleeping, I will be back in", random_time ,"s"
time.sleep(random_time)
再改一次,用sql语句查出还没有提交过的代码自动提交而不是人工更改题目id。这个已经可以当作vjudge评测提交服务运行了,等汤大神有空了请教他python进驻守护进程的办法。代码:
import HTMLParser
import urllib
import urllib2
from urllib2 import Request, urlopen, URLError, HTTPError
import cookielib
import bs4
from bs4 import BeautifulSoup
import sys
import time
import MySQLdb
import random
def getStatus(status):
if (status == 'Accepted'):
return 1
elif (status=='Queuing')or(status=='Compiling')or(status=='Running'):
return 2
elif (status=='Compilation Error'):
return 3
else:
return 0
login_url = 'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0¬ice=0'
submit_url = 'http://acm.hdu.edu.cn/submit.php?action=submit'
status_url = 'http://acm.hdu.edu.cn/status.php?user=vividtest&pid='
headers = {
'Host':'acm.hdu.edu.cn',
'Origin':'http://acm.hdu.edu.cn',
'Referer':'http://acm.hdu.edu.cn/userloginex.php?action=login&cid=0¬ice=0',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language':'en,zh;q=0.8,zh-CN;q=0.6',
'Connection':'keep-alive',
'Cache-Control':'max-age=0',
'Content-Type':'application/x-www-form-urlencoded',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36',
}
cookies = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(cookies)
account = {
'username':'vividtest',
'userpass': ''
}
data = urllib.urlencode(account)
head_file = '#include <iostream>\n#include <cstring>\n#include <cstdio>\n#include <cstdlib>\n#include <cmath>\n#include <string>\n#include <vector>\n#include <list>\n#include <map>\n#include <queue>\n#include <stack>\n#include <bitset>\n#include <algorithm>\n#include <numeric>\n#include <functional>\nusing namespace std;\n'
while True:
run_error = 0
conn = MySQLdb.connect(host='',user='',passwd='',db='',port=)
cur = conn.cursor()
cur.execute('set names utf8')
'''
sql = "SELECT `success` FROM `oj` WHERE `problem_id`='" + str(i) + "'"
cur.execute(sql)
code = cur.fetchone()[0]
if code != 3:
cur.close()
conn.close()
continue
'''
sql = "SELECT `source`,`problem_id` FROM `oj` WHERE `if_submit`=0 AND `source` != 'none' ORDER BY `time` ASC LIMIT 0, 1"
cur.execute(sql)
result = cur.fetchall()
cur.close()
conn.close()
ans_code = result[0][0]
i = result[0][4]
print 'ID', i
if 'none' == ans_code:
print 'none code'
continue
else:
request = urllib2.Request(login_url, data, headers)
try:
response = opener.open(request)
except URLError, e:
if hasattr(e, 'code'):
print 'Error code: ', e.code
elif hasattr(e, 'reason'):
print 'We failed to reach a server.'
print 'Reason: ', e.reason
else:
values = {
'language':0,
'usercode':head_file+ans_code,
'problemid':i
}
data = urllib.urlencode(values)
request = urllib2.Request(submit_url,data, headers)
try:
response = opener.open(request)
except URLError, e:
print 'submit failed'
else:
print 'submit success'
time.sleep(1)
request = urllib2.Request(status_url + str(i), None, headers)
try:
response = opener.open(request)
except URLError, e:
print 'get status failed'
else:
status_code = 2
while 2 == status_code:
try:
response = opener.open(request)
except URLError, e:
print 'get status failed---once,I will again'
time.sleep(2)
continue
else:
soup = BeautifulSoup(response, 'html.parser')
table = soup.find_all('tr')
try:
status = table[7].font.text
except:
print 'Error I acn not handle'
run_error = 1
else:
print status
status_code = getStatus(status)
time.sleep(1)
if run_error:
break
if run_error:
break
if run_error:
continue
conn = MySQLdb.connect(host='',user='',passwd='',db='',port=)
cur = conn.cursor()
cur.execute('set names utf8')
sql = ("UPDATE oj SET `if_submit`=1,`success`=%s WHERE `problem_id`='%s'",(status_code, i))
cur.execute(*sql)
conn.commit()
cur.close()
conn.close()
print 'judge complete, status: ' + status
random_time = random.randint(10,60)
print "I'm sleeping, I will be back in", random_time ,"s"
time.sleep(random_time)
攻陷杭电图