您当前的位置:首页 > 计算机 > 编程开发 > Python

爬虫--破解验证码的几种方式

时间:08-16来源:作者:点击数:
城东书院 www.cdsy.xyz

1.使用selenium 手动输入

2.使用打码平台(超级鹰http://www.chaojiying.com/price.html)推荐

3.机器学习

去第三方打码平台注册账号(超级鹰),拿到Python的接口压缩包

#!/usr/bin/env python
# coding:utf-8

import requests
from hashlib import md5

class Chaojiying_Client(object):

def __init__(self, username, password, soft_id):
    self.username = username
	password =  password.encode('utf8')
    self.password = md5(password).hexdigest()
    self.soft_id = soft_id
    self.base_params = {
        'user': self.username,
        'pass2': self.password,
        'softid': self.soft_id,
    }
    self.headers = {
        'Connection': 'Keep-Alive',
        'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
    }

def PostPic(self, im, codetype):
    """
    im: 图片字节
    codetype: 题目类型 参考 http://www.chaojiying.com/price.html
    """
    params = {
        'codetype': codetype,
    }
    params.update(self.base_params)
    files = {'userfile': ('ccc.jpg', im)}
    r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
    return r.json()

def ReportError(self, im_id):
    """
    im_id:报错题目的图片ID
    """
    params = {
        'id': im_id,
    }
    params.update(self.base_params)
    r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
    return r.json()


if __name__ == '__main__':
	chaojiying = Chaojiying_Client('超级鹰用户名', '超级鹰用户名的密码', '96001')	#用户中心>>软件ID 生成一个替换 96001
	im = open('a.jpg', 'rb').read()													#本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
	print chaojiying.PostPic(im, 1902)												#1902 验证码类型  官方网站>>价格体系 3.4+版 print 后要加()

使用爬虫

from chaojiying_Python.chaojiying import Chaojiying_Client
import random
import requests

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'
}


# 验证码url
# http://icode.renren.com/getcode.do?t=web_login&rnd=0.33034738219753246
def get_code():
    url = 'http://icode.renren.com/getcode.do?t=web_login&rnd=' + str(random.random())
    response = session.get(url, headers=headers)

    # 图片二进制存入本地
    with open('code.jpg', 'wb') as fp:
        fp.write(response.content)

    # 用超级鹰来破解验证码
    chaojiying = Chaojiying_Client('niejeff', 'abcdef123456', '898304')
    img = open('code.jpg', 'rb').read()
    code = chaojiying.PostPic(img, 1902)['pic_str']
    print(code)
    return code


# 登录
def login(code):
    print(code)
    url = 'http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2020732118628'

    data = {
        "email": "18566218480",
        "icode": code,
        "origURL": "http://www.renren.com/home",
        "domain": "renren.com",
        "key_id": '1',
        "captcha_type": "web_login",
        "password": "7ed07397ab7e42f74bad38c9834a208089cfb3d17fd5a356a0fa02f6a760fbdd",
        "rkey": "ef5b7e827f32a9a466aa5259f890f4a6",
        "f":""
    }

    # 发送请求: 登录
    response = session.post(url, headers=headers, data=data)
    content = response.content.decode()
    print(content)


# 登录后
def get_profile():
    url = 'http://www.renren.com/548819077/profile'
    response = session.get(url, headers=headers)
    print(response.text)


if __name__ == '__main__':

    # 保存cookie: 保证是同一个会话
    session = requests.session()

    # 先获取验证码
    code = get_code()

    # 登录
    login(code)

    # 登录后,再获取个人中心
    get_profile()
城东书院 www.cdsy.xyz
方便获取更多学习、工作、生活信息请关注本站微信公众号城东书院 微信服务号城东书院 微信订阅号
推荐内容
相关内容
栏目更新
栏目热门
本栏推荐