顾名思义,网络爬虫就是指在网络上按照一定规律的、自动的抓取网络中的程序或者脚本。在这里,并没有用到数据分析过滤等操作,所以只是普通的通用爬虫。
打开浏览器访问今天的倒霉蛋https://bilibili.iiilab.com,看到页面是用来获取B站视频地址的,页面非常整洁,一个输入框一个按钮,小孩都会操作。我一开始以为这应该就是简单的爬虫了。
按下F12或者Ctrl+Alt+I打开浏览器开发者工具。在输入框中输入示例B站网页https://www.bilibili.com/video/BV1Xt41157R4/?spm_id_from=autoNext,点击解析视频,页面不刷新说明是通过XHR改变页面内容的,观察右边打开的请求头(如图)
,发现方框的元素可能是必须要的,现在应该就有几个疑问了:
打开Fiddler(我这里用的是Fiddler Everywhere,适用一个月)再次点击解析视频请求一次,得到这些网页访问数据
一眼就瞅到了刚才看的/bilibili,点开看到是所想要的接口没错:
右击网址进入参数编辑页面:
现在开始逐步减少参数,把没必要的随机数去掉,先去掉大致判断无用的:
很好,正常请求不错:
接下来逐一尝试,最后发现这些参数都需要,除了请求后显示的Content-Length,有r有s有X-Client-Data更有Cookie中的7个参数,现在来调试Cookie,逐个减少,最后发现除了前4个参数一定,后面可以不需要:
虽然还有不少,但比之前7个强,现在浏览器开发者工具选择ALL,网上找Headers中的Se-Cookie参数,毕竟Cookie不会无故出现的。
找了半天找到两个网页提供了Set-Cookie参数:
加上一个应该不会变的zzz0821=1,应该就能组成完整的Cookie了。然后通过python代码来看一下Cookie全不全:
import requests
headers = {
"Origin": "https://bilibili.iiilab.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36"
}
session = requests.session()
session.headers = headers
session.get('https://bilibili.iiilab.com/')
session.post('https://service0.iiilab.com/sponsor/getByPage', data=dict(page='bilibili'))
print(session.cookies)
返回结果:
<RequestsCookieJar[<Cookie PHPSESSIID=413027716334 for .iiilab.com/>, <Cookie _gsp=GA9a9e0483fa7c11e0 for .iiilab.com/>, <Cookie iii_Session=daag2idlsdmg63g2kq77rko3g1 for .iiilab.com/>]>
然后加上zzz0821=1(注意,requests.session里的Cookie是requests.cookies.RequestsCookieJar类,是name/value类,不是普通的字符串,所以不能当成字符串加进去)
这里我们用requests.sessions.merge_cookies()方法来管理添加Cookie:
import requests
headers = {
"Origin": "https://bilibili.iiilab.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36"
}
session = requests.session()
session.headers = headers
session.get('https://bilibili.iiilab.com/')
session.post('https://service0.iiilab.com/sponsor/getByPage', data=dict(page='bilibili'))
session.cookies = requests.sessions.merge_cookies(session.cookies, dict(zzz0821='1'))
print(session.cookies)
返回结果:
<RequestsCookieJar[<Cookie zzz0821=1 for />, <Cookie PHPSESSIID=415867716334 for .iiilab.com/>, <Cookie _gsp=GAe85ade5653953aa8 for .iiilab.com/>, <Cookie iii_Session=1kamm7aukmkbt8llqv4cna3605 for .iiilab.com/>]>
现在4个Cookie都全了,接下来来考虑其他参数。
点击刚才的/bilibili网址,在右侧点击Initiator,查看发送请求的对象:
jquery只是JavaScript扩展,并不是请求主体,所以跳过它,从上往下找到第一个不是jquery的vue-resource…,看它左边的名字就知道跟网站的功能有关,parseVideo差不多就是从网页中提取视频的意思。
点击后自动跳转至Source界面,正常显示一行代码,当然这肯定不是给人看的,我们点击蓝色按钮或左下角的一对花括号(如果不显示蓝色按钮提示就手动点击代码格式化):
此处正好一个ajax请求,我们选中u(t, site)并右击添加到watches,方便观察变量值的变化
并在该行左侧456行数字处点一下作为断点,然后再次点击按钮:
现在在右下角watches里能看到这是一串值,点击上面蓝色向右小箭头结束调试,再返回Network选项卡里查看最新的一次/bilibili访问:
没错,发现这竟然正是X-Client-Data的值,现在选中u并右击添加watches,再次点击按钮执行,发现这是一个函数:右击可以定位到函数位置
现在开始对里面每一个变量或函数添加观察并定位函数,直到找出所有相关函数。
由于里面有不少相关函数,就不在此全篇给出寻找函数的过程了(PS:找个文本文档,后缀名改成html,文件里写上一对标签<script></script>并在其中粘贴相关函数,其中md5函数是由e(t, e, n)函数命名而成的,所以可以把md5(…)函数写成e(…)),下面我贴上这段JavaScript代码(由于有两个u函数,所以我将主函数名改成了uu):
function d(t, e) {
var n = (65535 & t) + (65535 & e);
return (t >> 16) + (e >> 16) + (n >> 16) << 16 | 65535 & n
}
function s(t, e, n, r, i, o) {
console.log(function a(t, e) {
return t << e | t >>> 32 - e
}(d(d(e, t), d(r, o)), i))
return d(function a(t, e) {
return t << e | t >>> 32 - e
}(d(d(e, t), d(r, o)), i), n)
}
function h(t, e, n, r, i, o, a) {
return s(e & n | ~e & r, t, e, i, o, a)
}
function f(t, e, n, r, i, o, a) {
return s(e & r | n & ~r, t, e, i, o, a)
}
function g(t, e, n, r, i, o, a) {
return s(e ^ n ^ r, t, e, i, o, a)
}
function p(t, e, n, r, i, o, a) {
return s(n ^ (e | ~r), t, e, i, o, a)
}
function n(t) {
return unescape(encodeURIComponent(t))
}
function c(t) {
var e, n = "", r = 32 * t.length;
for (e = 0; e < r; e += 8)
n += String.fromCharCode(t[e >> 5] >>> e % 32 & 255);
return n
}
function l(t) {
var e, n = [];
for (n[(t.length >> 2) - 1] = void 0,
e = 0; e < n.length; e += 1)
n[e] = 0;
var r = 8 * t.length;
for (e = 0; e < r; e += 8)
n[e >> 5] |= (255 & t.charCodeAt(e / 8)) << e % 32;
return n
}
function u(t, e) {
t[e >> 5] |= 128 << e % 32,
t[14 + (e + 64 >>> 9 << 4)] = e;
var n, r, i, o, a, s = 1732584193, u = -271733879, c = -1732584194, l = 271733878;
// console.log(h(s, u, c, l, t[0], 7, -680876936))
for (n = 0; n < t.length; n += 16)
u = p(u = p(u = p(u = p(u = g(u = g(u = g(u = g(u = f(u = f(u = f(u = f(u = h(u = h(u = h(u = h(i = u, c = h(o = c, l = h(a = l, s = h(r = s, u, c, l, t[n], 7, -680876936), u, c, t[n + 1], 12, -389564586), s, u, t[n + 2], 17, 606105819), l, s, t[n + 3], 22, -1044525330), c = h(c, l = h(l, s = h(s, u, c, l, t[n + 4], 7, -176418897), u, c, t[n + 5], 12, 1200080426), s, u, t[n + 6], 17, -1473231341), l, s, t[n + 7], 22, -45705983), c = h(c, l = h(l, s = h(s, u, c, l, t[n + 8], 7, 1770035416), u, c, t[n + 9], 12, -1958414417), s, u, t[n + 10], 17, -42063), l, s, t[n + 11], 22, -1990404162), c = h(c, l = h(l, s = h(s, u, c, l, t[n + 12], 7, 1804603682), u, c, t[n + 13], 12, -40341101), s, u, t[n + 14], 17, -1502002290), l, s, t[n + 15], 22, 1236535329), c = f(c, l = f(l, s = f(s, u, c, l, t[n + 1], 5, -165796510), u, c, t[n + 6], 9, -1069501632), s, u, t[n + 11], 14, 643717713), l, s, t[n], 20, -373897302), c = f(c, l = f(l, s = f(s, u, c, l, t[n + 5], 5, -701558691), u, c, t[n + 10], 9, 38016083), s, u, t[n + 15], 14, -660478335), l, s, t[n + 4], 20, -405537848), c = f(c, l = f(l, s = f(s, u, c, l, t[n + 9], 5, 568446438), u, c, t[n + 14], 9, -1019803690), s, u, t[n + 3], 14, -187363961), l, s, t[n + 8], 20, 1163531501), c = f(c, l = f(l, s = f(s, u, c, l, t[n + 13], 5, -1444681467), u, c, t[n + 2], 9, -51403784), s, u, t[n + 7], 14, 1735328473), l, s, t[n + 12], 20, -1926607734), c = g(c, l = g(l, s = g(s, u, c, l, t[n + 5], 4, -378558), u, c, t[n + 8], 11, -2022574463), s, u, t[n + 11], 16, 1839030562), l, s, t[n + 14], 23, -35309556), c = g(c, l = g(l, s = g(s, u, c, l, t[n + 1], 4, -1530992060), u, c, t[n + 4], 11, 1272893353), s, u, t[n + 7], 16, -155497632), l, s, t[n + 10], 23, -1094730640), c = g(c, l = g(l, s = g(s, u, c, l, t[n + 13], 4, 681279174), u, c, t[n], 11, -358537222), s, u, t[n + 3], 16, -722521979), l, s, t[n + 6], 23, 76029189), c = g(c, l = g(l, s = g(s, u, c, l, t[n + 9], 4, -640364487), u, c, t[n + 12], 11, -421815835), s, u, t[n + 15], 16, 530742520), l, s, t[n + 2], 23, -995338651), c = p(c, l = p(l, s = p(s, u, c, l, t[n], 6, -198630844), u, c, t[n + 7], 10, 1126891415), s, u, t[n + 14], 15, -1416354905), l, s, t[n + 5], 21, -57434055), c = p(c, l = p(l, s = p(s, u, c, l, t[n + 12], 6, 1700485571), u, c, t[n + 3], 10, -1894986606), s, u, t[n + 10], 15, -1051523), l, s, t[n + 1], 21, -2054922799), c = p(c, l = p(l, s = p(s, u, c, l, t[n + 8], 6, 1873313359), u, c, t[n + 15], 10, -30611744), s, u, t[n + 6], 15, -1560198380), l, s, t[n + 13], 21, 1309151649), c = p(c, l = p(l, s = p(s, u, c, l, t[n + 4], 6, -145523070), u, c, t[n + 11], 10, -1120210379), s, u, t[n + 2], 15, 718787259), l, s, t[n + 9], 21, -343485551),
s = d(s, r),
u = d(u, i),
c = d(c, o),
l = d(l, a);
return [s, u, c, l]
}
function a(t) {
return function e(t) {
return c(u(l(t), 8 * t.length))
}(n(t))
}
function o(t) {
var e, n, r = "";
for (n = 0; n < t.length; n += 1)
e = t.charCodeAt(n),
r += "0123456789abcdef".charAt(e >>> 2 & 15) + "0123456789abcdef".charAt(15 & e);
return r
}
function m(t, e) {
return function s(t, e) {
var n, r, i = l(t), o = [], a = [];
for (o[15] = a[15] = void 0,
16 < i.length && (i = u(i, 8 * t.length)),
n = 0; n < 16; n += 1)
o[n] = 909522486 ^ i[n],
a[n] = 1549556828 ^ i[n];
return r = u(o.concat(l(e)), 512 + 8 * e.length),
c(u(a.concat(r), 640))
}(n(t), n(e))
}
function md5(t, e, n) {
return e ? n ? m(e, t) : function r(t, e) {
return o(m(t, e))
}(e, t) : n ? a(t) : function i(t) {
return o(a(t))
}(t)
}
function uu(t, e) {
if (!0 === window.navigator.webdriver || window.document.documentElement.getAttribute("webdriver") || window
.callPhantom || window._phantom)
return md5(o + t + o);
var n = e.charAt(t.charCodeAt(0) % e.length),
r = e.charAt(t.charCodeAt(t.length - 1) % e.length);
return md5(n + t + r)
}
代码挺长的,看着都眼花,其中u(t, e)for循环中的第一行特别长,一行将近2700字符,那行经过我自己的格式化后长这样:
u = p(
u = p(
u = p(
u = p(
u = g(
u = g(
u = g(
u = g(
u = f(
u = f(
u = f(
u = f(
u = h(
u = h(
u = h(
u = h(
i = u, c = h(
o = c, l = h(
a = l, s = h(
r = s, u, c, l, t[n], 7, -680876936), u, c, t[n + 1], 12, -389564586)
, s, u, t[n + 2], 17, 606105819)
, l, s, t[n + 3], 22, -1044525330)
, c = h(
c, l = h(
l, s = h(s, u, c, l, t[n + 4], 7, -176418897), u, c, t[n + 5], 12, 1200080426)
, s, u, t[n + 6], 17, -1473231341), l, s, t[n + 7], 22, -45705983)
, c = h(
c, l = h(
l, s = h(s, u, c, l, t[n + 8], 7, 1770035416), u, c, t[n + 9], 12, -1958414417)
, s, u, t[n + 10], 17, -42063)
, l, s, t[n + 11], 22, -1990404162)
, c = h(
c, l = h(l, s = h(s, u, c, l, t[n + 12], 7, 1804603682), u, c, t[n + 13], 12, -40341101)
, s, u, t[n + 14], 17, -1502002290)
, l, s, t[n + 15], 22, 1236535329)
, c = f(
c, l = f(
l, s = f(s, u, c, l, t[n + 1], 5, -165796510), u, c, t[n + 6], 9, -1069501632)
, s, u, t[n + 11], 14, 643717713)
, l, s, t[n], 20, -373897302)
, c = f(
c, l = f(
l, s = f(s, u, c, l, t[n + 5], 5, -701558691), u, c, t[n + 10], 9, 38016083)
, s, u, t[n + 15], 14, -660478335)
, l, s, t[n + 4], 20, -405537848)
, c = f(
c, l = f(
l, s = f(s, u, c, l, t[n + 9], 5, 568446438), u, c, t[n + 14], 9, -1019803690)
, s, u, t[n + 3], 14, -187363961)
, l, s, t[n + 8], 20, 1163531501)
, c = f(
c, l = f(
l, s = f(s, u, c, l, t[n + 13], 5, -1444681467), u, c, t[n + 2], 9, -51403784)
, s, u, t[n + 7], 14, 1735328473)
, l, s, t[n + 12], 20, -1926607734)
, c = g(
c, l = g(
l, s = g(s, u, c, l, t[n + 5], 4, -378558), u, c, t[n + 8], 11, -2022574463)
, s, u, t[n + 11], 16, 1839030562)
, l, s, t[n + 14], 23, -35309556)
, c = g(
c, l = g(
l, s = g(s, u, c, l, t[n + 1], 4, -1530992060), u, c, t[n + 4], 11, 1272893353)
, s, u, t[n + 7], 16, -155497632)
, l, s, t[n + 10], 23, -1094730640)
, c = g(
c, l = g(
l, s = g(s, u, c, l, t[n + 13], 4, 681279174), u, c, t[n], 11, -358537222)
, s, u, t[n + 3], 16, -722521979)
, l, s, t[n + 6], 23, 76029189)
, c = g(
c, l = g(
l, s = g(s, u, c, l, t[n + 9], 4, -640364487), u, c, t[n + 12], 11, -421815835)
, s, u, t[n + 15], 16, 530742520)
, l, s, t[n + 2], 23, -995338651)
, c = p(
c, l = p(
l, s = p(s, u, c, l, t[n], 6, -198630844), u, c, t[n + 7], 10, 1126891415)
, s, u, t[n + 14], 15, -1416354905)
, l, s, t[n + 5], 21, -57434055)
, c = p(
c, l = p(
l, s = p(s, u, c, l, t[n + 12], 6, 1700485571), u, c, t[n + 3], 10, -1894986606)
, s, u, t[n + 10], 15, -1051523)
, l, s, t[n + 1], 21, -2054922799)
, c = p(
c, l = p(
l, s = p(s, u, c, l, t[n + 8], 6, 1873313359), u, c, t[n + 15], 10, -30611744)
, s, u, t[n + 6], 15, -1560198380)
, l, s, t[n + 13], 21, 1309151649)
, c = p(
c, l = p(
l, s = p(s, u, c, l, t[n + 4], 6, -145523070), u, c, t[n + 11], 10, -1120210379)
, s, u, t[n + 2], 15, 718787259)
, l, s, t[n + 9], 21, -343485551),
这么一看还真不错,看的我果断关掉了这个界面,连仔细看的勇气都没了…不过我还是看了,用了好几十分钟,看到我眼睛贴到屏幕上,终于把这整段代码翻译成了python函数,虽然他可能不太好看,我也没有对它优化的想法,纯粹是为了不想多用一个execjs库,说到这里,JavaScript有个32位无符号右位移运算的操作>>>,这是python没有的。我为了解决脑细胞,从网上找了一个方法:
gt3 = lambda _, __: (_ % (1 << 32)) >> __
这就完成了>>>运算,看着挺不可思议的,html中>是gt,又有三个>,所以我用gt3来命名这个函数。
翻译后的python函数如下:
def x_client_data(t, e='bilibili'):
def d(t, e):
n_ = (65535 & t) + (65535 & e)
return (t >> 16) + (e >> 16) + (n_ >> 16) << 16 | 65535 & n_
def s(t, e, n_, r, i, o):
t = d(d(e, t), d(r, o))
e = i
return d(ctypes.c_int32(t << i).value | gt3(t, 32 - e), n_)
def h(t, e, n_, r, i, o, a):
return s(e & n_ | ~e & r, t, e, i, o, a)
def f(t, e, n_, r, i, o, a):
return s(e & r | n_ & ~r, t, e, i, o, a)
def g(t, e, n_, r, i, o, a):
return s(e ^ n_ ^ r, t, e, i, o, a)
def p(t, e, n_, r, i, o, a):
return s(n_ ^ (e | ~r), t, e, i, o, a)
def n(t):
return html.unescape(parse.unquote(t))
def c(t):
n = ''
for e in range(0, 32 * len(t), 8):
n += chr(gt3(t[e >> 5], e % 32) & 255)
return n
def l(t):
n_ = []
for e in range(len(t) >> 2):
n_.append(0)
for e in range(0, 8 * len(t), 8):
if (e >> 5) < len(n_):
n_[e >> 5] |= (255 & ord(t[e // 8])) << e % 32
else:
n_.append(0 | (255 & ord(t[e // 8])) << e % 32)
return n_
def u(t, e):
while 14 + (gt3((e + 64), 9) << 4) >= len(t) - 1:
t.append(0)
t[e >> 5] |= 128 << e % 32
t[14 + (gt3((e + 64), 9) << 4)] = e
s = 1732584193
u = -271733879
c = -1732584194
l = 271733878
# print(h(s, u, c, l, t[0], 7, -680876936))
for n_ in range(0, len(t), 16):
r = s
s = h(r, u, c, l, t[n_], 7, -680876936)
a = l
l = h(a, s, u, c, t[n_ + 1], 12, -389564586)
o = c
c = h(o, l, s, u, t[n_ + 2], 17, 606105819)
i = u
u = h(i, c, l, s, t[n_ + 3], 22, -1044525330)
s = h(s, u, c, l, t[n_ + 4], 7, -176418897)
l = h(l, s, u, c, t[n_ + 5], 12, 1200080426)
c = h(c, l, s, u, t[n_ + 6], 17, -1473231341)
u = h(u, c, l, s, t[n_ + 7], 22, -45705983)
s = h(s, u, c, l, t[n_ + 8], 7, 1770035416)
l = h(l, s, u, c, t[n_ + 9], 12, -1958414417)
c = h(c, l, s, u, t[n_ + 10], 17, -42063)
u = h(u, c, l, s, t[n_ + 11], 22, -1990404162)
s = h(s, u, c, l, t[n_ + 12], 7, 1804603682)
l = h(l, s, u, c, t[n_ + 13], 12, -40341101)
c = h(c, l, s, u, t[n_ + 14], 17, -1502002290)
u = h(u, c, l, s, t[n_ + 15], 22, 1236535329)
s = f(s, u, c, l, t[n_ + 1], 5, -165796510)
l = f(l, s, u, c, t[n_ + 6], 9, -1069501632)
c = f(c, l, s, u, t[n_ + 11], 14, 643717713)
u = f(u, c, l, s, t[n_], 20, -373897302)
s = f(s, u, c, l, t[n_ + 5], 5, -701558691)
l = f(l, s, u, c, t[n_ + 10], 9, 38016083)
c = f(c, l, s, u, t[n_ + 15], 14, -660478335)
u = f(u, c, l, s, t[n_ + 4], 20, -405537848)
s = f(s, u, c, l, t[n_ + 9], 5, 568446438)
l = f(l, s, u, c, t[n_ + 14], 9, -1019803690)
c = f(c, l, s, u, t[n_ + 3], 14, -187363961)
u = f(u, c, l, s, t[n_ + 8], 20, 1163531501)
s = f(s, u, c, l, t[n_ + 13], 5, -1444681467)
l = f(l, s, u, c, t[n_ + 2], 9, -51403784)
c = f(c, l, s, u, t[n_ + 7], 14, 1735328473)
u = f(u, c, l, s, t[n_ + 12], 20, -1926607734)
s = g(s, u, c, l, t[n_ + 5], 4, -378558)
l = g(l, s, u, c, t[n_ + 8], 11, -2022574463)
c = g(c, l, s, u, t[n_ + 11], 16, 1839030562)
u = g(u, c, l, s, t[n_ + 14], 23, -35309556)
s = g(s, u, c, l, t[n_ + 1], 4, -1530992060)
l = g(l, s, u, c, t[n_ + 4], 11, 1272893353)
c = g(c, l, s, u, t[n_ + 7], 16, -155497632)
u = g(u, c, l, s, t[n_ + 10], 23, -1094730640)
s = g(s, u, c, l, t[n_ + 13], 4, 681279174)
l = g(l, s, u, c, t[n_], 11, -358537222)
c = g(c, l, s, u, t[n_ + 3], 16, -722521979)
u = g(u, c, l, s, t[n_ + 6], 23, 76029189)
s = g(s, u, c, l, t[n_ + 9], 4, -640364487)
l = g(l, s, u, c, t[n_ + 12], 11, -421815835)
c = g(c, l, s, u, t[n_ + 15], 16, 530742520)
u = g(u, c, l, s, t[n_ + 2], 23, -995338651)
s = p(s, u, c, l, t[n_], 6, -198630844)
l = p(l, s, u, c, t[n_ + 7], 10, 1126891415)
c = p(c, l, s, u, t[n_ + 14], 15, -1416354905)
u = p(u, c, l, s, t[n_ + 5], 21, -57434055)
s = p(s, u, c, l, t[n_ + 12], 6, 1700485571)
l = p(l, s, u, c, t[n_ + 3], 10, -1894986606)
c = p(c, l, s, u, t[n_ + 10], 15, -1051523)
u = p(u, c, l, s, t[n_ + 1], 21, -2054922799)
s = p(s, u, c, l, t[n_ + 8], 6, 1873313359)
l = p(l, s, u, c, t[n_ + 15], 10, -30611744)
c = p(c, l, s, u, t[n_ + 6], 15, -1560198380)
u = p(u, c, l, s, t[n_ + 13], 21, 1309151649)
s = p(s, u, c, l, t[n_ + 4], 6, -145523070)
l = p(l, s, u, c, t[n_ + 11], 10, -1120210379)
c = p(c, l, s, u, t[n_ + 2], 15, 718787259)
u = p(u, c, l, s, t[n_ + 9], 21, -343485551)
s = d(s, r)
u = d(u, i)
c = d(c, o)
l = d(l, a)
return [s, u, c, l]
def a(t):
return c(u(l(n(t)), 8 * len(t)))
def o(t):
r = ''
for n_ in range(len(t)):
e = ord(t[n_])
r += "0123456789abcdef"[gt3(e, 2) & 15] + "0123456789abcdef"[15 & e]
return r
def m(t, e):
t = n(t)
e = n(e)
i = l(t)
o = []
a= []
if 16 < len(i):
i = u(i, 8 * len(t))
for n_ in range(16):
o.append(909522486 ^ i[n_])
a.append(1549556828 ^ i[n_])
r = u(o + e, 512 + 8 * len(e))
return r, c(u(a + r, 640))
def md5(t, e=None, n_=None):
if e:
if n_:
m(e, t)
else:
return o(m(e, t))
else:
if n_:
a(t)
else:
return o(a(t))
n_ = e[ord(t[0]) % len(e)]
r = e[ord(t[-1]) % len(e)]
return md5(n_ + t + r)
这段代码在PyCharm里还是有很多波浪线的,作为强迫症的我还是比较倾向于遵守.PEP8编码规范的,至少没有下划线,还整体好看。如果你们谁有能力让这里面编码规范标准化的话可以留言私信我。
这里面e=‘bilibili’是我通过反复看watches观察出来的固定的值观察出来的变量,这里传入的参数t是From-Data中的s参数。
经过对变量的观察,很显然这s是被第451行n = this.generateStr(this.link + “@” + e).toString(10);生成的,这个随机数是在刚才那个Source界面第450行e = Math.random().toString(10).substring(2)随机生成出来的,这也是From-Data中的r随机数参数(猜测是random缩写)。随机数简单,所以现在就剩一个From-Data中的s参数了。观察watches里的变量值,该字符串是bilibili视频地址@随机数,里面的generateStr函数经过查找找到了如下JavaScript代码:
generateStr = function(t) {
var a = function() {
for (var t = 0, e = new Array(256), n = 0; 256 != n; ++n)
t = 1 & (t = 1 & (t = 1 & (t = 1 & (t = 1 & (t = 1 & (t = 1 & (t = 1 & (t = n) ? -306674912 ^ t >>>
1 : t >>> 1) ? -306674912 ^ t >>> 1 : t >>> 1) ? -306674912 ^ t >>> 1 :
t >>> 1) ? -306674912 ^ t >>> 1 : t >>> 1) ? -306674912 ^ t >>> 1 : t >>> 1) ? -
306674912 ^ t >>> 1 : t >>> 1) ? -306674912 ^ t >>> 1 : t >>> 1) ? -306674912 ^ t >>> 1 : t >>>
1,
e[n] = t;
return "undefined" != typeof Int32Array ? new Int32Array(e) : e
}();
return function(t) {
for (var e, n, r = -1, i = 0, o = t.length; i < o;)
r = (e = t.charCodeAt(i++)) < 128 ? r >>> 8 ^ a[255 & (r ^ e)] : e < 2048 ? (r = r >>> 8 ^ a[255 &
(r ^ (192 | e >> 6 & 31))]) >>> 8 ^ a[255 & (r ^ (128 | 63 & e))] : 55296 <= e && e < 57344 ? (
e = 64 + (1023 & e),
n = 1023 & t.charCodeAt(i++),
(r = (r = (r = r >>> 8 ^ a[255 & (r ^ (240 | e >> 8 & 7))]) >>> 8 ^ a[255 & (r ^ (128 | e >>
2 & 63))]) >>> 8 ^ a[255 & (r ^ (128 | n >> 6 & 15 | (3 & e) << 4))]) >>> 8 ^ a[255 & (r ^
(128 | 63 & n))]) : (r = (r = r >>> 8 ^ a[255 & (r ^ (224 | e >> 12 & 15))]) >>> 8 ^ a[
255 & (r ^ (128 | e >> 6 & 63))]) >>> 8 ^ a[255 & (r ^ (128 | 63 & e))];
return -1 ^ r
}(t) >>> 0
}
第一个for循环里的参数很有意思,经过我的展开:
t = 1 & (
t = 1 & (
t = 1 & (
t = 1 & (
t = 1 & (
t = 1 & (
t = 1 & (
t = 1 & (
t = n) ?
-306674912 ^ t >>> 1 :
t >>> 1) ?
-306674912 ^ t >>> 1 :
t >>> 1) ?
-306674912 ^ t >>> 1 :
t >>> 1) ?
-306674912 ^ t >>> 1 :
t >>> 1) ?
-306674912 ^ t >>> 1 :
t >>> 1) ?
- 306674912 ^ t >>> 1 :
t >>> 1) ?
-306674912 ^ t >>> 1 :
t >>> 1) ?
-306674912 ^ t >>> 1 :
t >>> 1
真治愈我的强迫症啊,不过比上面的简单多了,经过我的转化,Python代码如下:
def generate_str(t):
a = []
for n in range(256):
for _ in range(8):
if 1 & n:
n = -306674912 ^ gt3(n, 1)
else:
n = gt3(n, 1)
a.append(n)
r = -1
i = 0
while i < len(t):
e = ord(t[i])
i += 1
if e < 128:
r = gt3(r, 8) ^ a[255 & (r ^ e)]
else:
if e < 2048:
r = gt3(gt3(r, 8) ^ a[255 & (r ^ (192 | e >> 6 & 31))], 8) ^ a[255 & (r ^ (128 | 63 & e))]
else:
if 55296 <= e < 57344:
e = 64 + (1023 & e)
n = 1023 & ord(t[i])
i += 1
r = gt3(gt3(gt3(gt3(r, 8) ^ a[255 & (r ^ (240 | e >> 8 & 7))], 8) ^ a[255 & (r ^ (128 | e >> 2 & 63))], 8) ^ a[255 & (r ^ (128 | n >> 6 & 15 | (3 & e) << 4))], 8) ^ a[255 & (r ^ (128 | 63 & n))]
else:
r = gt3(gt3(gt3(r, 8) ^ a[255 & (r ^ (224 | e >> 12 & 15))], 8) ^ a[255 & (r ^ (128 | e >> 6 & 63))], 8) ^ a[255 & (r ^ (128 | 63 & e))]
return str(gt3(-1 ^ r, 0))
这个随机数根据JavaScript代码的观察,是先获取一个0到1之间的小数,然后去掉0.这两个字符,换句话说,他就是一个随机正整数,在Python中简简单单ran = str(random.random())[2:]就得到了。
同样获取参数r、s、x_client_data,使用JavaScript执行库获取参数值的代码如下:
import execjs
link = 'https://www.bilibili.com/video/BV1Xt41157R4/?spm_id_from=autoNext'
r = execjs.eval("Math.random().toString(10).substring(2)")
s = str(execjs.compile("""
generateStr = function(t) {
var a = function() {
for (var t = 0, e = new Array(256), n = 0; 256 != n; ++n)
t = 1 & (t = 1 & (t = 1 & (t = 1 & (t = 1 & (t = 1 & (t = 1 & (t = 1 & (t = n) ? -306674912 ^ t >>>
1 : t >>> 1) ? -306674912 ^ t >>> 1 : t >>> 1) ? -306674912 ^ t >>> 1 :
t >>> 1) ? -306674912 ^ t >>> 1 : t >>> 1) ? -306674912 ^ t >>> 1 : t >>> 1) ? -
306674912 ^ t >>> 1 : t >>> 1) ? -306674912 ^ t >>> 1 : t >>> 1) ? -306674912 ^ t >>> 1 : t >>>
1,
e[n] = t;
return "undefined" != typeof Int32Array ? new Int32Array(e) : e
}();
return function(t) {
for (var e, n, r = -1, i = 0, o = t.length; i < o;)
r = (e = t.charCodeAt(i++)) < 128 ? r >>> 8 ^ a[255 & (r ^ e)] : e < 2048 ? (r = r >>> 8 ^ a[255 &
(r ^ (192 | e >> 6 & 31))]) >>> 8 ^ a[255 & (r ^ (128 | 63 & e))] : 55296 <= e && e < 57344 ? (
e = 64 + (1023 & e),
n = 1023 & t.charCodeAt(i++),
(r = (r = (r = r >>> 8 ^ a[255 & (r ^ (240 | e >> 8 & 7))]) >>> 8 ^ a[255 & (r ^ (128 | e >>
2 & 63))]) >>> 8 ^ a[255 & (r ^ (128 | n >> 6 & 15 | (3 & e) << 4))]) >>> 8 ^ a[255 & (r ^
(128 | 63 & n))]) : (r = (r = r >>> 8 ^ a[255 & (r ^ (224 | e >> 12 & 15))]) >>> 8 ^ a[
255 & (r ^ (128 | e >> 6 & 63))]) >>> 8 ^ a[255 & (r ^ (128 | 63 & e))];
return -1 ^ r
}(t) >>> 0
}
""").call("generateStr", f"{link}@{r}"))
x_client_data = execjs.compile("""
function d(t, e) {
var n = (65535 & t) + (65535 & e);
return (t >> 16) + (e >> 16) + (n >> 16) << 16 | 65535 & n
}
function s(t, e, n, r, i, o) {
console.log(function a(t, e) {
return t << e | t >>> 32 - e
}(d(d(e, t), d(r, o)), i))
return d(function a(t, e) {
return t << e | t >>> 32 - e
}(d(d(e, t), d(r, o)), i), n)
}
function h(t, e, n, r, i, o, a) {
return s(e & n | ~e & r, t, e, i, o, a)
}
function f(t, e, n, r, i, o, a) {
return s(e & r | n & ~r, t, e, i, o, a)
}
function g(t, e, n, r, i, o, a) {
return s(e ^ n ^ r, t, e, i, o, a)
}
function p(t, e, n, r, i, o, a) {
return s(n ^ (e | ~r), t, e, i, o, a)
}
function n(t) {
return unescape(encodeURIComponent(t))
}
function c(t) {
var e, n = "", r = 32 * t.length;
for (e = 0; e < r; e += 8)
n += String.fromCharCode(t[e >> 5] >>> e % 32 & 255);
return n
}
function l(t) {
var e, n = [];
for (n[(t.length >> 2) - 1] = void 0,
e = 0; e < n.length; e += 1)
n[e] = 0;
var r = 8 * t.length;
for (e = 0; e < r; e += 8)
n[e >> 5] |= (255 & t.charCodeAt(e / 8)) << e % 32;
return n
}
function u(t, e) {
t[e >> 5] |= 128 << e % 32,
t[14 + (e + 64 >>> 9 << 4)] = e;
var n, r, i, o, a, s = 1732584193, u = -271733879, c = -1732584194, l = 271733878;
// console.log(h(s, u, c, l, t[0], 7, -680876936))
for (n = 0; n < t.length; n += 16)
u = p(u = p(u = p(u = p(u = g(u = g(u = g(u = g(u = f(u = f(u = f(u = f(u = h(u = h(u = h(u = h(i = u, c = h(o = c, l = h(a = l, s = h(r = s, u, c, l, t[n], 7, -680876936), u, c, t[n + 1], 12, -389564586), s, u, t[n + 2], 17, 606105819), l, s, t[n + 3], 22, -1044525330), c = h(c, l = h(l, s = h(s, u, c, l, t[n + 4], 7, -176418897), u, c, t[n + 5], 12, 1200080426), s, u, t[n + 6], 17, -1473231341), l, s, t[n + 7], 22, -45705983), c = h(c, l = h(l, s = h(s, u, c, l, t[n + 8], 7, 1770035416), u, c, t[n + 9], 12, -1958414417), s, u, t[n + 10], 17, -42063), l, s, t[n + 11], 22, -1990404162), c = h(c, l = h(l, s = h(s, u, c, l, t[n + 12], 7, 1804603682), u, c, t[n + 13], 12, -40341101), s, u, t[n + 14], 17, -1502002290), l, s, t[n + 15], 22, 1236535329), c = f(c, l = f(l, s = f(s, u, c, l, t[n + 1], 5, -165796510), u, c, t[n + 6], 9, -1069501632), s, u, t[n + 11], 14, 643717713), l, s, t[n], 20, -373897302), c = f(c, l = f(l, s = f(s, u, c, l, t[n + 5], 5, -701558691), u, c, t[n + 10], 9, 38016083), s, u, t[n + 15], 14, -660478335), l, s, t[n + 4], 20, -405537848), c = f(c, l = f(l, s = f(s, u, c, l, t[n + 9], 5, 568446438), u, c, t[n + 14], 9, -1019803690), s, u, t[n + 3], 14, -187363961), l, s, t[n + 8], 20, 1163531501), c = f(c, l = f(l, s = f(s, u, c, l, t[n + 13], 5, -1444681467), u, c, t[n + 2], 9, -51403784), s, u, t[n + 7], 14, 1735328473), l, s, t[n + 12], 20, -1926607734), c = g(c, l = g(l, s = g(s, u, c, l, t[n + 5], 4, -378558), u, c, t[n + 8], 11, -2022574463), s, u, t[n + 11], 16, 1839030562), l, s, t[n + 14], 23, -35309556), c = g(c, l = g(l, s = g(s, u, c, l, t[n + 1], 4, -1530992060), u, c, t[n + 4], 11, 1272893353), s, u, t[n + 7], 16, -155497632), l, s, t[n + 10], 23, -1094730640), c = g(c, l = g(l, s = g(s, u, c, l, t[n + 13], 4, 681279174), u, c, t[n], 11, -358537222), s, u, t[n + 3], 16, -722521979), l, s, t[n + 6], 23, 76029189), c = g(c, l = g(l, s = g(s, u, c, l, t[n + 9], 4, -640364487), u, c, t[n + 12], 11, -421815835), s, u, t[n + 15], 16, 530742520), l, s, t[n + 2], 23, -995338651), c = p(c, l = p(l, s = p(s, u, c, l, t[n], 6, -198630844), u, c, t[n + 7], 10, 1126891415), s, u, t[n + 14], 15, -1416354905), l, s, t[n + 5], 21, -57434055), c = p(c, l = p(l, s = p(s, u, c, l, t[n + 12], 6, 1700485571), u, c, t[n + 3], 10, -1894986606), s, u, t[n + 10], 15, -1051523), l, s, t[n + 1], 21, -2054922799), c = p(c, l = p(l, s = p(s, u, c, l, t[n + 8], 6, 1873313359), u, c, t[n + 15], 10, -30611744), s, u, t[n + 6], 15, -1560198380), l, s, t[n + 13], 21, 1309151649), c = p(c, l = p(l, s = p(s, u, c, l, t[n + 4], 6, -145523070), u, c, t[n + 11], 10, -1120210379), s, u, t[n + 2], 15, 718787259), l, s, t[n + 9], 21, -343485551),
s = d(s, r),
u = d(u, i),
c = d(c, o),
l = d(l, a);
return [s, u, c, l]
}
function a(t) {
return function e(t) {
return c(u(l(t), 8 * t.length))
}(n(t))
}
function o(t) {
var e, n, r = "";
for (n = 0; n < t.length; n += 1)
e = t.charCodeAt(n),
r += "0123456789abcdef".charAt(e >>> 2 & 15) + "0123456789abcdef".charAt(15 & e);
return r
}
function m(t, e) {
return function s(t, e) {
var n, r, i = l(t), o = [], a = [];
for (o[15] = a[15] = void 0,
16 < i.length && (i = u(i, 8 * t.length)),
n = 0; n < 16; n += 1)
o[n] = 909522486 ^ i[n],
a[n] = 1549556828 ^ i[n];
return r = u(o.concat(l(e)), 512 + 8 * e.length),
c(u(a.concat(r), 640))
}(n(t), n(e))
}
function md5(t, e, n) {
return e ? n ? m(e, t) : function r(t, e) {
return o(m(t, e))
}(e, t) : n ? a(t) : function i(t) {
return o(a(t))
}(t)
}
function uu(t, e) {
var n = e.charAt(t.charCodeAt(0) % e.length),
r = e.charAt(t.charCodeAt(t.length - 1) % e.length);
return md5(n + t + r)
}
""").call("uu", s, 'bilibili')
print(r)
print(s)
print(x_client_data)
import html
import ctypes
import random
import requests
from urllib import parse
# 32位无符号右移
gt3 = lambda _, __: (_ % (1 << 32)) >> __
def x_client_data(t, e='bilibili'):
def d(t, e):
n_ = (65535 & t) + (65535 & e)
return (t >> 16) + (e >> 16) + (n_ >> 16) << 16 | 65535 & n_
def s(t, e, n_, r, i, o):
t = d(d(e, t), d(r, o))
e = i
return d(ctypes.c_int32(t << i).value | gt3(t, 32 - e), n_)
def h(t, e, n_, r, i, o, a):
return s(e & n_ | ~e & r, t, e, i, o, a)
def f(t, e, n_, r, i, o, a):
return s(e & r | n_ & ~r, t, e, i, o, a)
def g(t, e, n_, r, i, o, a):
return s(e ^ n_ ^ r, t, e, i, o, a)
def p(t, e, n_, r, i, o, a):
return s(n_ ^ (e | ~r), t, e, i, o, a)
def n(t):
return html.unescape(parse.unquote(t))
def c(t):
n = ''
for e in range(0, 32 * len(t), 8):
n += chr(gt3(t[e >> 5], e % 32) & 255)
return n
def l(t):
n_ = []
for e in range(len(t) >> 2):
n_.append(0)
for e in range(0, 8 * len(t), 8):
if (e >> 5) < len(n_):
n_[e >> 5] |= (255 & ord(t[e // 8])) << e % 32
else:
n_.append(0 | (255 & ord(t[e // 8])) << e % 32)
return n_
def u(t, e):
while 14 + (gt3((e + 64), 9) << 4) >= len(t) - 1:
t.append(0)
t[e >> 5] |= 128 << e % 32
t[14 + (gt3((e + 64), 9) << 4)] = e
s = 1732584193
u = -271733879
c = -1732584194
l = 271733878
# print(h(s, u, c, l, t[0], 7, -680876936))
for n_ in range(0, len(t), 16):
r = s
s = h(r, u, c, l, t[n_], 7, -680876936)
a = l
l = h(a, s, u, c, t[n_ + 1], 12, -389564586)
o = c
c = h(o, l, s, u, t[n_ + 2], 17, 606105819)
i = u
u = h(i, c, l, s, t[n_ + 3], 22, -1044525330)
s = h(s, u, c, l, t[n_ + 4], 7, -176418897)
l = h(l, s, u, c, t[n_ + 5], 12, 1200080426)
c = h(c, l, s, u, t[n_ + 6], 17, -1473231341)
u = h(u, c, l, s, t[n_ + 7], 22, -45705983)
s = h(s, u, c, l, t[n_ + 8], 7, 1770035416)
l = h(l, s, u, c, t[n_ + 9], 12, -1958414417)
c = h(c, l, s, u, t[n_ + 10], 17, -42063)
u = h(u, c, l, s, t[n_ + 11], 22, -1990404162)
s = h(s, u, c, l, t[n_ + 12], 7, 1804603682)
l = h(l, s, u, c, t[n_ + 13], 12, -40341101)
c = h(c, l, s, u, t[n_ + 14], 17, -1502002290)
u = h(u, c, l, s, t[n_ + 15], 22, 1236535329)
s = f(s, u, c, l, t[n_ + 1], 5, -165796510)
l = f(l, s, u, c, t[n_ + 6], 9, -1069501632)
c = f(c, l, s, u, t[n_ + 11], 14, 643717713)
u = f(u, c, l, s, t[n_], 20, -373897302)
s = f(s, u, c, l, t[n_ + 5], 5, -701558691)
l = f(l, s, u, c, t[n_ + 10], 9, 38016083)
c = f(c, l, s, u, t[n_ + 15], 14, -660478335)
u = f(u, c, l, s, t[n_ + 4], 20, -405537848)
s = f(s, u, c, l, t[n_ + 9], 5, 568446438)
l = f(l, s, u, c, t[n_ + 14], 9, -1019803690)
c = f(c, l, s, u, t[n_ + 3], 14, -187363961)
u = f(u, c, l, s, t[n_ + 8], 20, 1163531501)
s = f(s, u, c, l, t[n_ + 13], 5, -1444681467)
l = f(l, s, u, c, t[n_ + 2], 9, -51403784)
c = f(c, l, s, u, t[n_ + 7], 14, 1735328473)
u = f(u, c, l, s, t[n_ + 12], 20, -1926607734)
s = g(s, u, c, l, t[n_ + 5], 4, -378558)
l = g(l, s, u, c, t[n_ + 8], 11, -2022574463)
c = g(c, l, s, u, t[n_ + 11], 16, 1839030562)
u = g(u, c, l, s, t[n_ + 14], 23, -35309556)
s = g(s, u, c, l, t[n_ + 1], 4, -1530992060)
l = g(l, s, u, c, t[n_ + 4], 11, 1272893353)
c = g(c, l, s, u, t[n_ + 7], 16, -155497632)
u = g(u, c, l, s, t[n_ + 10], 23, -1094730640)
s = g(s, u, c, l, t[n_ + 13], 4, 681279174)
l = g(l, s, u, c, t[n_], 11, -358537222)
c = g(c, l, s, u, t[n_ + 3], 16, -722521979)
u = g(u, c, l, s, t[n_ + 6], 23, 76029189)
s = g(s, u, c, l, t[n_ + 9], 4, -640364487)
l = g(l, s, u, c, t[n_ + 12], 11, -421815835)
c = g(c, l, s, u, t[n_ + 15], 16, 530742520)
u = g(u, c, l, s, t[n_ + 2], 23, -995338651)
s = p(s, u, c, l, t[n_], 6, -198630844)
l = p(l, s, u, c, t[n_ + 7], 10, 1126891415)
c = p(c, l, s, u, t[n_ + 14], 15, -1416354905)
u = p(u, c, l, s, t[n_ + 5], 21, -57434055)
s = p(s, u, c, l, t[n_ + 12], 6, 1700485571)
l = p(l, s, u, c, t[n_ + 3], 10, -1894986606)
c = p(c, l, s, u, t[n_ + 10], 15, -1051523)
u = p(u, c, l, s, t[n_ + 1], 21, -2054922799)
s = p(s, u, c, l, t[n_ + 8], 6, 1873313359)
l = p(l, s, u, c, t[n_ + 15], 10, -30611744)
c = p(c, l, s, u, t[n_ + 6], 15, -1560198380)
u = p(u, c, l, s, t[n_ + 13], 21, 1309151649)
s = p(s, u, c, l, t[n_ + 4], 6, -145523070)
l = p(l, s, u, c, t[n_ + 11], 10, -1120210379)
c = p(c, l, s, u, t[n_ + 2], 15, 718787259)
u = p(u, c, l, s, t[n_ + 9], 21, -343485551)
s = d(s, r)
u = d(u, i)
c = d(c, o)
l = d(l, a)
return [s, u, c, l]
def a(t):
return c(u(l(n(t)), 8 * len(t)))
def o(t):
r = ''
for n_ in range(len(t)):
e = ord(t[n_])
r += "0123456789abcdef"[gt3(e, 2) & 15] + "0123456789abcdef"[15 & e]
return r
def m(t, e):
t = n(t)
e = n(e)
i = l(t)
o = []
a= []
if 16 < len(i):
i = u(i, 8 * len(t))
for n_ in range(16):
o.append(909522486 ^ i[n_])
a.append(1549556828 ^ i[n_])
r = u(o + e, 512 + 8 * len(e))
return r, c(u(a + r, 640))
def md5(t, e=None, n_=None):
if e:
if n_:
m(e, t)
else:
return o(m(e, t))
else:
if n_:
a(t)
else:
return o(a(t))
n_ = e[ord(t[0]) % len(e)]
r = e[ord(t[-1]) % len(e)]
return md5(n_ + t + r)
def generate_str(t):
a = []
for n in range(256):
for _ in range(8):
if 1 & n:
n = -306674912 ^ gt3(n, 1)
else:
n = gt3(n, 1)
a.append(n)
r = -1
i = 0
while i < len(t):
e = ord(t[i])
i += 1
if e < 128:
r = gt3(r, 8) ^ a[255 & (r ^ e)]
else:
if e < 2048:
r = gt3(gt3(r, 8) ^ a[255 & (r ^ (192 | e >> 6 & 31))], 8) ^ a[255 & (r ^ (128 | 63 & e))]
else:
if 55296 <= e < 57344:
e = 64 + (1023 & e)
n = 1023 & ord(t[i])
i += 1
r = gt3(gt3(gt3(gt3(r, 8) ^ a[255 & (r ^ (240 | e >> 8 & 7))], 8) ^ a[255 & (r ^ (128 | e >> 2 & 63))], 8) ^ a[255 & (r ^ (128 | n >> 6 & 15 | (3 & e) << 4))], 8) ^ a[255 & (r ^ (128 | 63 & n))]
else:
r = gt3(gt3(gt3(r, 8) ^ a[255 & (r ^ (224 | e >> 12 & 15))], 8) ^ a[255 & (r ^ (128 | e >> 6 & 63))], 8) ^ a[255 & (r ^ (128 | 63 & e))]
return str(gt3(-1 ^ r, 0))
headers = {
"Origin": "https://bilibili.iiilab.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36"
}
session = requests.session()
session.headers = headers
session.get('https://bilibili.iiilab.com/')
session.post('https://service0.iiilab.com/sponsor/getByPage', data=dict(page='bilibili'))
session.headers["Referer"] = "https://bilibili.iiilab.com/"
session.headers["Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8"
# cookies = requests.utils.dict_from_cookiejar(session.cookies)
# cookies.update(dict(zzz0821='1'))
# session.cookies = requests.utils.cookiejar_from_dict(cookies)
session.cookies = requests.sessions.merge_cookies(session.cookies, dict(zzz0821='1'))
# session.headers.update(dict(Cookie=';'.join([(lambda _: f'{_}={cookies[_]}')(_) for _ in cookies])))
link = 'https://www.bilibili.com/video/BV1Xt41157R4/?spm_id_from=autoNext'
ran = str(random.random())[2:]
s = generate_str(f"{link}@{ran}")
session.headers["X-Client-Data"] = x_client_data(s)
res = session.post('https://service0.iiilab.com/video/web/bilibili', data=dict(link=link, r=ran, s=s))
if res.ok:
print(res.json())
以上就是完整的Python脚本代码,希望你不是直接划到这边来复制的,当然一般人可能觉得没必要爬这个,不管你怎么想都没有关系,对我来说这只是一次即兴训练(不过看JavaScript也看的头昏眼花的,眼球都要凸出来了,如果不是即兴,我还是想选择JavaScript执行库的…)。