您现在的位置是: 网站首页> 学习笔记> 爬虫 爬虫
腾讯滑块
2021-04-11 [滑块验证码] 7895人已围观
import requests
import cv2
from selenium import webdriver
import time
import numpy as np
import pyautogui
import random
from lxml.html import etree
from selenium.common.exceptions import NoSuchWindowException, WebDriverException
class CaptchaOne(object):
def __init__(self):
self.values = []
self.headers = {
}
self.cookie = [
]
self.drive = webdriver.Firefox()
self.drive.maximize_window()
self.drive.set_page_load_timeout(12)
# 先请求一次页面,不然cookie设置时会报域名不正确的异常
self.drive.get('http://www.glidedsky.com/level/web/crawler-captcha-1?page=1')
# 设置cookie
for c in self.cookie:
self.drive.add_cookie({
'name': list(c.keys())[0],
'value': list(c.values())[0],
})
def get_tracks(self, distance):
'''
获取移动轨迹, 先匀加速再匀减速
匀变速运动基本公式:
v = v0+at
s = v0t+1/2at平方
v平方-v0平方 = 2as
:param distance: 需要移动的距离
:return: 存放每0.3秒移动的距离
'''
# 初速度
v = 0
# 单位时间为0.2秒来统计轨迹,轨迹即0.2秒内的位移
t = 0.5
# 位移/轨迹列表,列表内的一个元素代表0.2s的位移
tracks = []
# 当前的位移
current = 0
# 到达mid值开始减速
mid = distance * 3 / 5
while current < distance:
if current < mid:
# 加速度越小位移越小,模拟的轨迹就越多越详细
a = 5
else:
a = -6
# 初速度
v0 = v
# 0.2秒时间内的位移
s = v0 * t + 1 / 2 * a * (t ** 2)
# 当前的位置
current += s
# 添加到轨迹列表中
tracks.append(round(s))
v = v0 + a * t
# print('tracks: ', tracks)
return tracks
def get_page(self, url):
self.drive.get("about:config")
self.set_useragent()
try:
self.drive.get(url)
self.drive.implicitly_wait(10)
# 切换到验证码所在的frame
self.drive.switch_to.frame('tcaptcha_iframe')
# 背景图url
bg_url = self.drive.find_element_by_xpath('//*[@id="cdn1"]').get_attribute('src')
# 滑块图url
sl_url = self.drive.find_element_by_xpath('//*[@id="cdn2"]').get_attribute('src')
distance = self.get_distance(bg_url, sl_url)
if distance['val'] <= 0:
print(f'获取{url}失败, 原因:', distance['msg'])
print(f'开始重新获取{url}...')
self.get_page(url)
else:
# x 730 y 655 x 25 y15 distance['val']
x = 730
y = 655
x0 = x
pyautogui.moveTo(x=730 + random.randint(-20, 20), y=655 + random.randint(-10, 10),
duration=random.randint(25, 35) / 100)
pyautogui.mouseDown()
y += random.randint(2, 5)
x0 = x0 + int(distance['val'] * random.randint(12, 18) / 20)
pyautogui.moveTo(x0, y, duration=random.randint(25, 35) / 100)
y += random.randint(-9, 0)
x0 = x0 + int(distance['val'] * random.randint(15, 25) / 20)
pyautogui.moveTo(x0, y, duration=random.randint(25, 35) / 100)
y += random.randint(0, 8)
pyautogui.moveTo(x + distance['val'], y, duration=random.randint(25, 35) / 100)
time.sleep(random.randint(40, 75) / 100)
pyautogui.mouseUp()
# tracks = self.get_tracks(distance['val'])
# hk = self.drive.find_element_by_xpath('//*[@id="tcaptcha_drag_thumb"]')
#
# action = ActionChains(self.drive)
# action.click_and_hold(hk).perform()
# for x in tracks:
# action.move_by_offset(x, 0)
#
# action.release().perform()
time.sleep(5)
try:
if '拖动下方滑块完成拼图' in self.drive.page_source:
print(f'获取{url}失败, 原因:滑动验证失败!')
print(f'开始重新获取{url}...')
time.sleep(2)
self.get_page(url)
except NoSuchWindowException:
self.drive.switch_to.parent_frame()
res = etree.HTML(self.drive.page_source)
vals = []
for item in res.xpath('//div[@class="card-body"]//div[@class="col-md-1"]/text()'):
vals.append(int(item.strip()))
if vals:
self.values.extend(vals)
print(vals)
else:
print(f'获取{url}失败, 原因:未获取到页面内容!')
print(f'开始重新获取{url}...')
self.get_page(url)
except WebDriverException:
self.drive.switch_to.parent_frame()
res = etree.HTML(self.drive.page_source)
vals = []
for item in res.xpath('//div[@class="card-body"]//div[@class="col-md-1"]/text()'):
vals.append(int(item.strip()))
if vals:
self.values.extend(vals)
print(vals)
else:
print(f'获取{url}失败, 原因:未获取到页面内容!')
print(f'开始重新获取{url}...')
self.get_page(url)
except Exception as e:
print(f'获取{url}失败, 原因:滑动验证失败!')
print(f'开始重新获取{url}...')
self.get_page(url)
return self.values
except Exception as e:
print('Self Error: ', e)
self.get_page(url)
def quit(self):
self.drive.quit()
def get_img_from_net(self, bg_url, sl_url):
'''
通过图片url获取图片
:param bg_url: 阴影缺口图片url
:param sl_url: 小滑块图片url
:return: 返回响应的content
'''
bg_res = requests.get(bg_url)
if bg_res.status_code != 200:
return False, False
sl_res = requests.get(sl_url)
if sl_res.status_code != 200:
return False, False
return bg_res.content, sl_res.content
def get_distance(self, bg_url, sl_url):
'''
获取小滑块到阴影缺口偏移量
:param bg_url: 阴影缺口图片url
:param sl_url: 小滑块图片url
:return: {'val': 0, 'msg': '未获取到图片'} 如果没获取到图片 val为0, 如果获取到图片则 val为计算出的偏移量
'''
# 获取图片
bg_img, sl_img = self.get_img_from_net(bg_url, sl_url)
if not bg_img and sl_img:
return {'val': 0, 'msg': '获取验证码图片失败!'}
# 阴影缺口图
bg_img = cv2.imdecode(np.frombuffer(bg_img, np.uint8), cv2.IMREAD_GRAYSCALE)
# 进行缩放
bg_width, bg_height = bg_img.shape[:2]
bg_img = cv2.resize(bg_img, (int(bg_height * 0.5), int(bg_width * 0.5)))
# 小滑块
sl_img = cv2.imdecode(np.frombuffer(requests.get(sl_url).content, np.uint8), cv2.IMREAD_GRAYSCALE)
# 进行缩放
sl_width, sl_height = sl_img.shape[:2]
sl_img = cv2.resize(sl_img, (int(sl_height * 0.5), int(sl_width * 0.5)))
# 返回值
# [[ 0.10075403 0.1022609 0.10353662 ... 0.07513236 0.07784532
# 0.08106443]
# [ 0.10010067 0.10148325 0.10491841 ... 0.07120055 0.07427745
# 0.07763027]
# ...
# [-0.00110397 -0.01404627 -0.02621776 ... -0.06852742 -0.07676314
# -0.0506512 ]]
ret = cv2.matchTemplate(bg_img, sl_img, cv2.TM_CCOEFF_NORMED)
# 取出最佳匹配值, +13指小滑块左边的透明边距 -40指小滑块左边框距离阴影缺口图左边框的距离
ret = cv2.minMaxLoc(ret)[2:][0][0] + 12 - 40
return {'val': ret, 'msg': '未获取到图片'}
def set_useragent(self):
ua_list = [
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
]
js_code = '''
var customUserAgent = "'''+random.choice(ua_list)+'''";
//修改后的userAgent
Object.defineProperty(navigator, 'userAgent', {
value: customUserAgent,
writable: false
});
console.log(navigator.userAgent);
'''
self.drive.execute_script(js_code)
ip = requests.get('http://nets.tpddns.cn:5010/get/').json()['proxy']
js_base='''var pf = Components.classes["@mozilla.org/preferences-service;1"].getService(Components.interfaces.nsIPrefBranch);
pf.setIntPref("network.proxy.type", 1);
pf.setCharPref("network.proxy.http", "{0}");
pf.setIntPref("network.proxy.http_port", {1});
pf.setCharPref("network.proxy.ssl", "{2}");
pf.setIntPref("network.proxy.ssl_port", {3});'''
self.drive.execute_script(js_base.format(ip.split(':')[0], ip.split(':')[1], ip.split(':')[0], ip.split(':')[1]))
if __name__ == '__main__':
base_url = 'http://www.glidedsky.com/level/web/crawler-captcha-1?page={}'
co = CaptchaOne()
for page in range(1, 1001):
print('page: ', page)
ret = co.get_page(base_url.format(page))
print('sum: ', sum(co.values))
time.sleep(5)
co.quit()
上一篇:git
下一篇:base64字体存为本地文件
相关文章
文章评论
#2023-06-06 21:57 @ Mattie:
Party Snapѕ Photo Booth OᏟ | Photo Booth Rental Orange County 12911 Dungan Ln, Garden Grove, CA 92840 led гental
#2023-06-07 06:53 @ Windy:
Группа объявлений Ульяновск в телеграм. Размещение частных объявлений бесплатно! Коммерческие и рекламные объявления, согласно правил группы. Подпишись, чтобы не потерять! Объявления Ульяновска
#2023-06-07 11:39 @ Johnson:
Группа объявлений Нижнего Тагила в телеграм. Размещение частных объявлений бесплатно! Коммерческие и рекламные объявления, согласно правил группы. Подпишись, чтобы не потерять... Объявления Нижний Тагил
#2023-06-10 15:20 @ Kraig:
примерно на кривляться оформление сильно необходимых документов и доставим авто из ОАЭ под источник Самые Лучшие Микрозаймы Невзирая сверху то, что кредит показывается быстрым а также эффективным средством резолюции финансовых заморочек, жуть шиздец находят решение сверху этот шаг через сложности процедуры евонный оформления. [URL=https://credit-mikrozaim.com]Микрозайм Или Микрозаем[/URL] https://credit-mikrozaim.com/
#2023-06-12 01:53 @ Brigette:
Pɑrty Snaps Photo Booth OC | Phһoto Booth Rental Orange Ϲounty 12911 Dungan Ln, Garden Grove, CA 92840 VOGUE photo boօtһ rentɑl Laɡuna Niguel
#2023-06-12 16:49 @ Helen:
Party Ꮪnaps Photfo Boothh OC | Photo Booth Rental Orаnge County 12911 Dungan Ꮮn, Garden Ꮐroѵe, CA 92840 photo booth rental baby shower
#2023-06-14 09:21 @ toursex:
<a href=https://viagr.cfd>over the counter viagra substitute</a> Recombination efficiency
#2023-06-29 04:07 @ Playelo:
The shaking is usually fast, about 4 to 12 movements per second <a href=https://sildenafi.cfd>how often should you take viagra</a> Doctors and patients are always looking to identify ways to tell if cancer treatments are working
#2024-03-25 22:32 @ Doug:
Sabung Ayam Online
#2024-03-31 01:53 @ Jayme:
Situs Sabung Ayam Digmaan
#2024-06-02 08:41 @ nivaawarp:
In this model the most significant NF ОєB subunit appears to be NF ОєB1, as mice lacking NF ОєB2 and c Rel demonstrated few differences in response compared to WT mice <a href=https://cialis.lat/discover-the-best-prices-for-cialis>brand name cialis online</a>
#2024-06-06 12:21 @ nivaawarp:
<a href=https://cialis.lat/discover-the-best-prices-for-cialis>real cialis no generic</a> Clomid stimulates the production of GnRH GnRH, testosterone, and also prevents the risk of developing gynecomastia male breast growth due to excessive fluid retention
#2024-10-07 14:42 @ nivaawarp:
<a href=https://enhanceyourlife.mom/>where to buy priligy in usa</a> They do care and can only do so much
#2024-10-13 06:04 @ nivaawarp:
Caplan L, Pittman CB, Zeringue AL, Scherrer JF, Wehmeier KR, Cunningham FE, Eisen SA, McDonald JR 2010 An observational study of musculoskeletal pain among patients receiving bisphosphonate therapy <a href=https://enhanceyourlife.mom/>priligy price</a> Curiously, though, the AFL CIO and the American Conservative Union Гў two groups which rarely agree on anything Гў also opposed the bill, supporting instead some kind of market mechanism to determine royalty rates
添加评论
点击排行
本栏推荐
标签云
热评文章
- django使用qq邮箱发送邮件
- mysql8设置数据库远程连接
- pip修改下载源为国内源
- win10看不到win7共享的文件夹的解决方法
- SQLyog连接 Mysql 8.0.11 报error no.1251- Client does not support authentic...
- 使用Oracel Net Nanager配置Oracle数据库远程访问
- 将anaconda的下载源切换为国内的源
- Python+selenium+firefox设置代理IP
- selenium+firefox+js实现动态设置firefox浏览器代理IP
- scrapy文件下载(高新技术企业认定网)
- Python调用JS代码
- Chrome浏览器的overrides的使用
站点信息
- 建站时间:2021-01-01
- 网站程序:Django 3.1.2
- 文章统计:53篇
- 文章评论:35条
- 统计数据:
#2023-06-05 05:21 @ Bridget:
Pагty Snaps Photo Booth OC | Photo Booth Rental Orange County 12911 Dungan Ln, Gɑrden Grove, CA 92840 best photo booth rentɑls near me