您现在的位置是: 网站首页> 学习笔记> 爬虫 爬虫

腾讯滑块

2021-04-11 [滑块验证码] 7895人已围观

import requests
import cv2
from selenium import webdriver
import time
import numpy as np
import pyautogui
import random
from lxml.html import etree
from selenium.common.exceptions import NoSuchWindowException, WebDriverException


class CaptchaOne(object):
    def __init__(self):
        self.values = []
        self.headers = {
        }
        self.cookie = [

        ]
        self.drive = webdriver.Firefox()
        self.drive.maximize_window()
        self.drive.set_page_load_timeout(12)
        # 先请求一次页面,不然cookie设置时会报域名不正确的异常
        self.drive.get('http://www.glidedsky.com/level/web/crawler-captcha-1?page=1')
        # 设置cookie
        for c in self.cookie:
            self.drive.add_cookie({
                'name': list(c.keys())[0],
                'value': list(c.values())[0],
            })

    def get_tracks(self, distance):
        '''
        获取移动轨迹, 先匀加速再匀减速
        匀变速运动基本公式:
        v = v0+at
        s = v0t+1/2at平方
        v平方-v0平方 = 2as
        :param distance: 需要移动的距离
        :return: 存放每0.3秒移动的距离
        '''
        # 初速度
        v = 0
        # 单位时间为0.2秒来统计轨迹,轨迹即0.2秒内的位移
        t = 0.5
        # 位移/轨迹列表,列表内的一个元素代表0.2s的位移
        tracks = []
        # 当前的位移
        current = 0
        # 到达mid值开始减速
        mid = distance * 3 / 5
        while current < distance:
            if current < mid:
                # 加速度越小位移越小,模拟的轨迹就越多越详细
                a = 5
            else:
                a = -6
            # 初速度
            v0 = v
            # 0.2秒时间内的位移
            s = v0 * t + 1 / 2 * a * (t ** 2)
            # 当前的位置
            current += s
            # 添加到轨迹列表中
            tracks.append(round(s))
            v = v0 + a * t
        # print('tracks: ', tracks)
        return tracks

    def get_page(self, url):
        self.drive.get("about:config")
        self.set_useragent()
        try:
            self.drive.get(url)
            self.drive.implicitly_wait(10)
            # 切换到验证码所在的frame
            self.drive.switch_to.frame('tcaptcha_iframe')

            # 背景图url
            bg_url = self.drive.find_element_by_xpath('//*[@id="cdn1"]').get_attribute('src')
            # 滑块图url
            sl_url = self.drive.find_element_by_xpath('//*[@id="cdn2"]').get_attribute('src')

            distance = self.get_distance(bg_url, sl_url)
            if distance['val'] <= 0:
                print(f'获取{url}失败, 原因:', distance['msg'])
                print(f'开始重新获取{url}...')
                self.get_page(url)
            else:
                # x 730 y 655     x 25  y15  distance['val']
                x = 730
                y = 655
                x0 = x
                pyautogui.moveTo(x=730 + random.randint(-20, 20), y=655 + random.randint(-10, 10),
                                 duration=random.randint(25, 35) / 100)
                pyautogui.mouseDown()
                y += random.randint(2, 5)
                x0 = x0 + int(distance['val'] * random.randint(12, 18) / 20)
                pyautogui.moveTo(x0, y, duration=random.randint(25, 35) / 100)
                y += random.randint(-9, 0)
                x0 = x0 + int(distance['val'] * random.randint(15, 25) / 20)
                pyautogui.moveTo(x0, y, duration=random.randint(25, 35) / 100)
                y += random.randint(0, 8)
                pyautogui.moveTo(x + distance['val'], y, duration=random.randint(25, 35) / 100)
                time.sleep(random.randint(40, 75) / 100)
                pyautogui.mouseUp()
                # tracks = self.get_tracks(distance['val'])
                # hk = self.drive.find_element_by_xpath('//*[@id="tcaptcha_drag_thumb"]')
                #
                # action = ActionChains(self.drive)
                # action.click_and_hold(hk).perform()
                # for x in tracks:
                #     action.move_by_offset(x, 0)
                #
                # action.release().perform()
                time.sleep(5)
                try:
                    if '拖动下方滑块完成拼图' in self.drive.page_source:
                        print(f'获取{url}失败, 原因:滑动验证失败!')
                        print(f'开始重新获取{url}...')
                        time.sleep(2)
                        self.get_page(url)
                except NoSuchWindowException:
                    self.drive.switch_to.parent_frame()
                    res = etree.HTML(self.drive.page_source)
                    vals = []
                    for item in res.xpath('//div[@class="card-body"]//div[@class="col-md-1"]/text()'):
                        vals.append(int(item.strip()))

                    if vals:
                        self.values.extend(vals)
                        print(vals)
                    else:
                        print(f'获取{url}失败, 原因:未获取到页面内容!')
                        print(f'开始重新获取{url}...')
                        self.get_page(url)

                except WebDriverException:
                    self.drive.switch_to.parent_frame()
                    res = etree.HTML(self.drive.page_source)
                    vals = []
                    for item in res.xpath('//div[@class="card-body"]//div[@class="col-md-1"]/text()'):
                        vals.append(int(item.strip()))

                    if vals:
                        self.values.extend(vals)
                        print(vals)
                    else:
                        print(f'获取{url}失败, 原因:未获取到页面内容!')
                        print(f'开始重新获取{url}...')
                        self.get_page(url)
                except Exception as e:
                    print(f'获取{url}失败, 原因:滑动验证失败!')
                    print(f'开始重新获取{url}...')
                    self.get_page(url)

            return self.values
        except Exception as e:
            print('Self Error: ', e)
            self.get_page(url)

    def quit(self):
        self.drive.quit()

    def get_img_from_net(self, bg_url, sl_url):
        '''
        通过图片url获取图片
        :param bg_url: 阴影缺口图片url
        :param sl_url: 小滑块图片url
        :return: 返回响应的content
        '''
        bg_res = requests.get(bg_url)
        if bg_res.status_code != 200:
            return False, False
        sl_res = requests.get(sl_url)
        if sl_res.status_code != 200:
            return False, False
        return bg_res.content, sl_res.content

    def get_distance(self, bg_url, sl_url):
        '''
        获取小滑块到阴影缺口偏移量
        :param bg_url: 阴影缺口图片url
        :param sl_url: 小滑块图片url
        :return: {'val': 0, 'msg': '未获取到图片'} 如果没获取到图片 val为0, 如果获取到图片则 val为计算出的偏移量
        '''
        # 获取图片
        bg_img, sl_img = self.get_img_from_net(bg_url, sl_url)
        if not bg_img and sl_img:
            return {'val': 0, 'msg': '获取验证码图片失败!'}

        # 阴影缺口图
        bg_img = cv2.imdecode(np.frombuffer(bg_img, np.uint8), cv2.IMREAD_GRAYSCALE)
        # 进行缩放
        bg_width, bg_height = bg_img.shape[:2]
        bg_img = cv2.resize(bg_img, (int(bg_height * 0.5), int(bg_width * 0.5)))

        # 小滑块
        sl_img = cv2.imdecode(np.frombuffer(requests.get(sl_url).content, np.uint8), cv2.IMREAD_GRAYSCALE)
        # 进行缩放
        sl_width, sl_height = sl_img.shape[:2]
        sl_img = cv2.resize(sl_img, (int(sl_height * 0.5), int(sl_width * 0.5)))

        # 返回值
        # [[ 0.10075403  0.1022609   0.10353662 ...  0.07513236  0.07784532
        #    0.08106443]
        #  [ 0.10010067  0.10148325  0.10491841 ...  0.07120055  0.07427745
        #    0.07763027]
        #  ...
        #  [-0.00110397 -0.01404627 -0.02621776 ... -0.06852742 -0.07676314
        #   -0.0506512 ]]
        ret = cv2.matchTemplate(bg_img, sl_img, cv2.TM_CCOEFF_NORMED)
        # 取出最佳匹配值, +13指小滑块左边的透明边距  -40指小滑块左边框距离阴影缺口图左边框的距离
        ret = cv2.minMaxLoc(ret)[2:][0][0] + 12 - 40
        return {'val': ret, 'msg': '未获取到图片'}


    def set_useragent(self):
        ua_list = [
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
            "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
            "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
            "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
            "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
            "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
            "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
            "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
            "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
            "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
            "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
        ]

        js_code = '''
            var customUserAgent = "'''+random.choice(ua_list)+'''";

            //修改后的userAgent            
            Object.defineProperty(navigator, 'userAgent', {
              value: customUserAgent,
              writable: false
            });
            console.log(navigator.userAgent);
        '''
        self.drive.execute_script(js_code)

        ip = requests.get('http://nets.tpddns.cn:5010/get/').json()['proxy']
        js_base='''var pf = Components.classes["@mozilla.org/preferences-service;1"].getService(Components.interfaces.nsIPrefBranch);

            pf.setIntPref("network.proxy.type", 1);

            pf.setCharPref("network.proxy.http", "{0}");

            pf.setIntPref("network.proxy.http_port", {1});

            pf.setCharPref("network.proxy.ssl", "{2}");

            pf.setIntPref("network.proxy.ssl_port", {3});'''
        self.drive.execute_script(js_base.format(ip.split(':')[0], ip.split(':')[1], ip.split(':')[0], ip.split(':')[1]))

if __name__ == '__main__':
    base_url = 'http://www.glidedsky.com/level/web/crawler-captcha-1?page={}'
    co = CaptchaOne()
    for page in range(1, 1001):
        print('page: ', page)
        ret = co.get_page(base_url.format(page))
        print('sum: ', sum(co.values))
    time.sleep(5)
    co.quit()

上一篇:git

下一篇:base64字体存为本地文件

相关文章

文章评论

#2023-06-05 05:21 @ Bridget:

Pагty Snaps Photo Booth OC | Photo Booth Rental Orange County 12911 Dungan Ln, Gɑrden Grove, CA 92840 best photo booth rentɑls near me

#2023-06-06 21:57 @ Mattie:

Party Snapѕ Photo Booth OᏟ | Photo Booth Rental Orange County 12911 Dungan Ln, Garden Grove, CA 92840 led гental

#2023-06-07 06:53 @ Windy:

Группа объявлений Ульяновск в телеграм. Размещение частных объявлений бесплатно! Коммерческие и рекламные объявления, согласно правил группы. Подпишись, чтобы не потерять! Объявления Ульяновска

#2023-06-07 11:39 @ Johnson:

Группа объявлений Нижнего Тагила в телеграм. Размещение частных объявлений бесплатно! Коммерческие и рекламные объявления, согласно правил группы. Подпишись, чтобы не потерять... Объявления Нижний Тагил

#2023-06-10 15:20 @ Kraig:

примерно на кривляться оформление сильно необходимых документов и доставим авто из ОАЭ под источник Самые Лучшие Микрозаймы Невзирая сверху то, что кредит показывается быстрым а также эффективным средством резолюции финансовых заморочек, жуть шиздец находят решение сверху этот шаг через сложности процедуры евонный оформления. [URL=https://credit-mikrozaim.com]Микрозайм Или Микрозаем[/URL] https://credit-mikrozaim.com/

#2023-06-12 01:53 @ Brigette:

Pɑrty Snaps Photo Booth OC | Phһoto Booth Rental Orange Ϲounty 12911 Dungan Ln, Garden Grove, CA 92840 VOGUE photo boօtһ rentɑl Laɡuna Niguel

#2023-06-12 16:49 @ Helen:

Party Ꮪnaps Photfo Boothh OC | Photo Booth Rental Orаnge County 12911 Dungan Ꮮn, Garden Ꮐroѵe, CA 92840 photo booth rental baby shower

#2023-06-14 09:21 @ toursex:

<a href=https://viagr.cfd>over the counter viagra substitute</a> Recombination efficiency

#2023-06-29 04:07 @ Playelo:

The shaking is usually fast, about 4 to 12 movements per second <a href=https://sildenafi.cfd>how often should you take viagra</a> Doctors and patients are always looking to identify ways to tell if cancer treatments are working

#2024-03-25 22:32 @ Doug:

Sabung Ayam Online

#2024-03-31 01:53 @ Jayme:

Situs Sabung Ayam Digmaan

#2024-06-02 08:41 @ nivaawarp:

In this model the most significant NF ОєB subunit appears to be NF ОєB1, as mice lacking NF ОєB2 and c Rel demonstrated few differences in response compared to WT mice <a href=https://cialis.lat/discover-the-best-prices-for-cialis>brand name cialis online</a>

#2024-06-06 12:21 @ nivaawarp:

<a href=https://cialis.lat/discover-the-best-prices-for-cialis>real cialis no generic</a> Clomid stimulates the production of GnRH GnRH, testosterone, and also prevents the risk of developing gynecomastia male breast growth due to excessive fluid retention

#2024-10-07 14:42 @ nivaawarp:

<a href=https://enhanceyourlife.mom/>where to buy priligy in usa</a> They do care and can only do so much

#2024-10-13 06:04 @ nivaawarp:

Caplan L, Pittman CB, Zeringue AL, Scherrer JF, Wehmeier KR, Cunningham FE, Eisen SA, McDonald JR 2010 An observational study of musculoskeletal pain among patients receiving bisphosphonate therapy <a href=https://enhanceyourlife.mom/>priligy price</a> Curiously, though, the AFL CIO and the American Conservative Union Гў two groups which rarely agree on anything Гў also opposed the bill, supporting instead some kind of market mechanism to determine royalty rates

添加评论





本栏推荐

站点信息

  • 建站时间:2021-01-01
  • 网站程序:Django 3.1.2
  • 文章统计:53篇
  • 文章评论:35条
  • 统计数据