博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
python答题辅助
阅读量:6892 次
发布时间:2019-06-27

本文共 6534 字,大约阅读时间需要 21 分钟。

最近直播答题app很热门,由于之前看过跳一跳的python脚本(非常棒),于是也想写一个答题的脚本。

https://github.com/huanmsf/cai

 

思路:

1、截图

2、文字识别,提取问题和选项(分割后识别准确性会提高)

3、爬取网页数据,根据规则匹配选项

4、根据选项自动点击屏幕该位置(应该循环点击,防止刚好切换到西瓜妹)

5、重复前面步骤

存在的问题:

1、答题时间有限,如果爬去的链接多了,还没解析完时间就到了。爬取的少就缺少分析数据,结果不靠谱。

2、问题和选项需要提取关键字匹配

3、可能要试试其他搜索引擎(百度垃圾信息严重影响正确率)

 目录:

├── baidu.py├── cai.png├── main.py├── need│   └── chi_sim.traineddata├── README└── screenshot.py

 main.py:

from screenshot import pull_screenshotimport time, urllib.request, baidu, ostry:    import Imageexcept ImportError:    from PIL import Image, ImageDrawimport pytesseract# 屏幕顶端到问题的距离/屏幕高度,随分辨率变化(默认1920*1080)top_off_c = 0.15# 问题高度que_h = 300# 答案高度ans_h = 170# 左右偏移量l_r_off = 40# 问题过滤器que_filter = ['.', ' ']# 答案过滤器ans_filter = ["《", "》", ' ']# 问题列表que_list = []# 选项坐标point_A = (0, 0, 0, 0)point_B = (0, 0, 0, 0)point_C = (0, 0, 0, 0)# 辅助找到文字区域def draw():    img = Image.open('cai.png')    w, h = img.size    draw = ImageDraw.Draw(img)    draw.line((40, h * 0.15, w - 40, h * 0.15), fill="red")    draw.line((40, h * 0.15 + 300, w - 40, h * 0.15 + 300), fill="red")    draw.line((40, h * 0.15 + 470, w * 0.7, h * 0.15 + 470), fill="red")    draw.line((40, h * 0.15 + 640, w * 0.7, h * 0.15 + 640), fill="red")    draw.line((40, h * 0.15 + 810, w * 0.7, h * 0.15 + 810), fill="red")    img.show()def click(point):    # img = Image.open('cai.png')    # w, h = img.size    # draw = ImageDraw.Draw(img)    # draw.arc(point, 0, 360, fill="red")    # img.show()    cmd = 'adb shell input swipe {x1} {y1} {x2} {y2} {duration}'.format(        x1=point[0],        y1=point[1],        x2=point[2],        y2=point[3],        duration=1    )    os.system(cmd)def main():    while True:        print(">>>>>>")        pull_screenshot()        img = Image.open('cai.png')        img = img.convert('L')        w, h = img.size        img_q = img.crop((l_r_off, h * top_off_c, w - l_r_off, h * top_off_c + que_h))        img_a = img.crop((l_r_off, h * top_off_c + que_h, w * 0.7, h * top_off_c + que_h + ans_h))        img_b = img.crop((l_r_off, h * top_off_c + que_h + ans_h, w * 0.7, h * top_off_c + que_h + ans_h * 2))        img_c = img.crop((l_r_off, h * top_off_c + que_h + ans_h * 2, w * 0.7, h * top_off_c + que_h + ans_h * 3))        point_A = (w / 3 - 20, h * top_off_c + que_h + ans_h / 2 - 20, w / 3, h * top_off_c + que_h + ans_h / 2)        point_B = (w / 3 - 20, h * top_off_c + que_h + ans_h / 2 * 3 - 20, w / 3, h * top_off_c + que_h + ans_h / 2 * 3)        point_C = (w / 3 - 20, h * top_off_c + que_h + ans_h / 2 * 5 - 20, w / 3, h * top_off_c + que_h + ans_h / 2 * 5)        # need 下的chi文件 复制到/usr/share/tesseract-ocr/4.00/        question = pytesseract.image_to_string(img_q, lang='chi_sim')        ans_a = pytesseract.image_to_string(img_a, lang='chi_sim')        ans_b = pytesseract.image_to_string(img_b, lang='chi_sim')        ans_c = pytesseract.image_to_string(img_c, lang='chi_sim')        ans = ["1", "1", "1"]        for f in que_filter:            question = question.strip().replace(f, "")        for f in ans_filter:            ans_a = ans_a.strip().replace(f, "")            ans_b = ans_b.strip().replace(f, "")            ans_c = ans_c.strip().replace(f, "")        ans[0] = ans_a        ans[1] = ans_b        ans[2] = ans_c        for a in ans:            if not a.strip():                ind = ans.index(a)                ans[ind] = "&*&"        print(question)        print(ans)        if que_list.__contains__(question):            continue        index = baidu.search(question, ans)        # 选第1,2,3个        if index == 0:            click(point_A)        elif index == 1:            click(point_B)        else:            click(point_C)        print("index" + str(index))        que_list.append(question)if __name__ == '__main__':    main()

 baidu.py:

# -*- coding:utf-8 -*-import urllib, time, reimport lxml.etree as etree# 答案积分规则"""某个答案首次出现在一篇文章中+10,再次+3"""def search(question, ans):    cont = {}    q_url = "http://www.baidu.com/s?word=" + urllib.parse.quote(question)    top_page = getdata(q_url)    selector = etree.HTML(top_page)    url_list = selector.xpath('//h3[@class]/a[@data-click]/@href')[0:5]    for url_item in url_list:        if not url_item.startswith('http'):            continue        print(url_item)        sub_page = getdata(url_item)        selector = etree.HTML(sub_page)        try:            content_list = selector.xpath('//div/text()|//span/text()|//p/text()')        except:            return 0        ans_tmp_list = []        for con in content_list:            if con.strip():                for a in ans:                    if a in con:                        if ans_tmp_list.__contains__(a):                            if a in cont.keys():                                cont[a] += 3                            else:                                cont[a] = 3                        else:                            if a in cont.keys():                                cont[a] += 10                            else:                                cont[a] = 10                            ans_tmp_list.append(a)                print(con)    print(cont)    if not cont:        return 0    else:        l = sorted(cont.items(), key=lambda x: x[1], reverse=True)        return ans.index(l[0][0])def getdata(url):    req = urllib.request.Request(url)    try:        response = urllib.request.urlopen(req)    except:        return " "    top_page = ""    try:        top_page = response.read().decode("utf-8", 'ignore')    except:        pass    # print(top_page)    return top_page

 

 screenshot.py:

# -*- coding: utf-8 -*-"""手机屏幕截图的代码(参考跳一跳外挂源码)"""import subprocessimport osimport sysfrom PIL import ImageSCREENSHOT_WAY = 3def pull_screenshot():    global SCREENSHOT_WAY    if 1 <= SCREENSHOT_WAY <= 3:        process = subprocess.Popen(            'adb shell screencap -p',            shell=True, stdout=subprocess.PIPE)        binary_screenshot = process.stdout.read()        if SCREENSHOT_WAY == 2:            binary_screenshot = binary_screenshot.replace(b'\r\n', b'\n')        elif SCREENSHOT_WAY == 1:            binary_screenshot = binary_screenshot.replace(b'\r\r\n', b'\n')        f = open('cai.png', 'wb')        f.write(binary_screenshot)        f.close()    elif SCREENSHOT_WAY == 0:        os.system('adb shell screencap -p /sdcard/cai.png')        os.system('adb pull /sdcard/cai.png .')

 

 

文字识别sudo pip3 install pytesseractsudo apt-get install tesseract-ocr

 

初级版本效果:

 

 题外话:

最近在浏览FB站看到

文中提到可以提前10秒得到题目(不知是否属实),由于访问权限不能看,如有知道怎么搞的请留言交流下,谢谢

转载于:https://www.cnblogs.com/lanqie/p/8290590.html

你可能感兴趣的文章
Java(Java SE7) 体系结构图
查看>>
安装多个版本的unity
查看>>
Unity 脚本的未来发展
查看>>
给vs2010安装上cocos2d-x的模版
查看>>
ffmpeg-20160908[09,10,13,15,19,21,22,24]-bin.7z
查看>>
【java】java处理随机浮点数(小数点后两位)用RMB的大写数值规则输出
查看>>
jquery键盘事件
查看>>
MFC学习之EDIT控件初始化
查看>>
luogu P1972 [SDOI2009]HH的项链 树状数组
查看>>
关于UWP数据绑定的一个坑 x:bind修改为binding
查看>>
线程的状态
查看>>
IDEA小插件之快速修改Maven多模块的工程版本
查看>>
Programming C#.Classes and Objects.成员方法
查看>>
《大道至简》读后感03
查看>>
分布式系统---幂等性设计
查看>>
把“数字的字符串”转换成“整数”时遇到的小麻烦
查看>>
消耗系统内存
查看>>
mysql-connector-c++ 编译安装
查看>>
A - Space Elevator(动态规划专项)
查看>>
mysql使用存储过程和event定期删除
查看>>