地址:
第一题地址

题目:
抓取所有(5页)机票的价格,并计算所有机票价格的平均值,填入答案。

初探:
进入网站,惯例先开F12康康

这里被 debugger 卡住了;

你可以 选中行号点右键点 Edit breakpoint

输入 Conditional breakpoints=true,然后点回车,刷新下

如果你比较懒,你也可以在行号点右键,
点击 Add conditional breapoint 输入false,然后点回车,刷新下

但是如果你跟我一样懒的话,我们直接点 Never pause here 就行

抓包分析:
点击 Network ,康康接口参数,只有一个参数m,长成这样应该是被加密了;

刷新下,发现m一直在变,但后面有一串乱码变化不大;

用 Chrome 自带的 视图解析 功能解析下url:视图解码后发现是 | 后面跟着一串数值,每次刷新都在小幅上涨,盲猜应该是时间。

也就是说只要解开m的构造逻辑,我们就可以开始爬数据了;

关键点定位:
既然知道 m 是唯一的请求参数,那就好办了。

先直接在源码里简单搜索下,看看有没有思路。

点击 Elements 跳到页面的源码页,ctrl + f 唤醒搜索框,直接搜索 m=

Bingo!一发入魂,我们来看看找到了什么

一个名为 oo0O0 的方法,看起来这个命名不是特别规范;不管他我们之间看变量 m

var m = “abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=”,
1
这个 m 初始值,不是我们要找那个 m ;我们看看下一个

window.url = “/api/match/1”;
1
这不是我们的 url 吗?我们找到 m 确认下

var m = oo0O0(timestamp.toString()) + window.f;
1
没错,这就是我们要找到那个 m ,oo0O0 这个函数我们刚刚也见过;在浏览器看太伤眼了;我们这两个js脚本考出来美化下;

w();
dd();
function oo0O0(mw) {
window.b = “”;
for (var i = 0, len = window.a.length; i < len; i++) {
console.log(window.a[i]);
window.b += String[document.e + document.g](
window.a[i][document.f + document.h]() – i – window.c
);
}
var U = [“W5r5W6VdIHZcT8kU”, “WQ8CWRaxWQirAW==”];
var J = function (o, E) {
o = o – 0x0;
var N = U[o];
if (J[“bSSGte”] === undefined) {
var Y = function (w) {
var m =
“abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=”,
T = String(w)[“replace”](/=+$/, “”);
var A = “”;
for (
var C = 0x0, b, W, l = 0x0;
(W = T[“charAt”](l++));
~W && ((b = C % 0x4 ? b * 0x40 + W : W), C++ % 0x4)
? (A += String[“fromCharCode”](0xff & (b >> ((-0x2 * C) & 0x6))))
: 0x0
) {
W = m[“indexOf”](W);
}
return A;
};
var t = function (w, m) {
var T = [],
A = 0x0,
C,
b = “”,
W = “”;
w = Y(w);
for (var R = 0x0, v = w[“length”]; R < v; R++) {
W +=
“%” + (“00” + w[“charCodeAt”](R)[“toString”](0x10))[“slice”](-0x2);
}
w = decodeURIComponent(W);
var l;
for (l = 0x0; l < 0x100; l++) {
T[l] = l;
}
for (l = 0x0; l < 0x100; l++) {
(A = (A + T[l] + m[“charCodeAt”](l % m[“length”])) % 0x100),
(C = T[l]),
(T[l] = T[A]),
(T[A] = C);
}
(l = 0x0), (A = 0x0);
for (var L = 0x0; L < w[“length”]; L++) {
(l = (l + 0x1) % 0x100),
(A = (A + T[l]) % 0x100),
(C = T[l]),
(T[l] = T[A]),
(T[A] = C),
(b += String[“fromCharCode”](
w[“charCodeAt”](L) ^ T[(T[l] + T[A]) % 0x100]
));
}
return b;
};
(J[“luAabU”] = t), (J[“qlVPZg”] = {}), (J[“bSSGte”] = !![]);
}
var H = J[“qlVPZg”][o];
return (
H === undefined
? (J[“TUDBIJ”] === undefined && (J[“TUDBIJ”] = !![]),
(N = J[“luAabU”](N, E)),
(J[“qlVPZg”][o] = N))
: (N = H),
N
);
};
eval(
atob(window[“b”])[J(“0x0”, “]dQW”)](J(“0x1”, “GTu!”), “\x27” + mw + “\x27”)
);
return “”;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
window.url = “/api/match/1”;
request = function () {
var timestamp = Date.parse(new Date()) + 100000000;
var m = oo0O0(timestamp.toString()) + window.f;
var list = { page: window.page, m: m + “丨” + timestamp / 1000 };
$.ajax({
url: window.url,
dataType: “json”,
async: false,
data: list,
type: “GET”,
beforeSend: function (request) { },
success: function (data) {
data = data.data;
let html = “”;
let us_sign = `<div class=”b-airfly”><div class=”e-airfly”data-reactid=”.1.3.3.2.0.$KN5911.0″><div class=”col-trip”data-reactid=”.1.3.3.2.0.$KN5911.0.0″><div class=”s-trip”data-reactid=”.1.3.3.2.0.$KN5911.0.0.0″><div class=”col-airline”data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.0″><div class=”d-air”data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.0.0:$0″><div class=”air”data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.0.0:$0.0″><span data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.0.0:$0.0.1″>中国联合航空</span></div><div class=”num”data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.0.0:$0.1″><span class=”n”data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.0.0:$0.1.0″>KN5911</span><span class=”n”data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.0.0:$0.1.1″>波音737(中)</span><noscript data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.0.0:$0.1.2″></noscript></div></div><noscript data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.0.1″></noscript></div><div class=”col-time”data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.1″><div class=”sep-lf”data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.1.0″><h2 data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.1.0.0″>13:50</h2><p class=”airport”data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.1.0.1″><span data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.1.0.1.0″>大兴国际机场</span><span data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.1.0.1.1″></span></p></div><div class=”sep-ct”data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.1.1″><div class=”range”data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.1.1.0″>3小时40分钟</div><div class=”line”data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.1.1.1″></div></div><div class=”sep-rt”data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.1.2″><noscript data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.1.2.0″></noscript><h2 data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.1.2.1″>17:30</h2><p class=”airport”data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.1.2.2″><span data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.1.2.2.0″>宝安机场</span></p></div><noscript data-reactid=”.1.3.3.2.0.$KN5911.0.0.0.1.3″></noscript></div></div></div><div class=”col-price”data-reactid=”.1.3.3.2.0.$KN5911.0.1″><p class=”prc”data-reactid=”.1.3.3.2.0.$KN5911.0.1.0″><span data-reactid=”.1.3.3.2.0.$KN5911.0.1.0.0″><i class=”rmb”data-reactid=”.1.3.3.2.0.$KN5911.0.1.0.0.0″>&yen;</i><span class=”fix_price”data-reactid=”.1.3.3.2.0.$KN5911.0.1.0.0.1″><span class=”prc_wp”style=”width:48px”>price_sole</span></span></span></p><div class=”vim”data-reactid=”.1.3.3.2.0.$KN5911.0.1.1″><span class=”v dis”data-reactid=”.1.3.3.2.0.$KN5911.0.1.1.$0″></span></div></div><div class=”col-fold”data-reactid=”.1.3.3.2.0.$KN5911.0.2″><p class=”fd”data-reactid=”.1.3.3.2.0.$KN5911.0.2.0″>收起</p></div></div><noscript data-reactid=”.1.3.3.2.0.$KN5911.1″></noscript></div>`;
let choice = [
“中国南方航空”,
“吉祥航空”,
“奥凯航空”,
“九元航空”,
“长龙航空”,
“东方航空”,
“中国国际航空”,
“深圳航空”,
“海南航空”,
“春秋航空”,
“上海航空”,
“西部航空”,
“重庆航空”,
“西藏航空”,
“中国联合航空”,
“云南祥鹏航空”,
“云南英安航空”,
“厦门航空”,
“天津航空”,
“山东航空”,
“四川航空”,
“华夏航空”,
“长城航空”,
“成都航空有”,
“北京首都航空”,
“中华航空”,
“意大利国家航空公司”,
“印度百捷航空”,
“越南航空”,
“远东航空”,
“印度航空公司”,
“印度捷特航空有限公司”,
“以色列航空公司”,
“意大利航空”,
“伊朗航空公司”,
“印度尼西亚鹰航空公司”,
“英国航空公司”,
“西方天空航空”,
“西捷航空”,
“西班牙欧洲航空公司”,
“西班牙航空公司”,
“中国南方航空”,
“吉祥航空”,
“奥凯航空”,
“九元航空”,
“长龙航空”,
“东方航空”,
“中国国际航空”,
“深圳航空”,
“海南航空”,
“春秋航空”,
“上海航空”,
“西部航空”,
“重庆航空”,
“西藏航空”,
“中国联合航空”,
“云南祥鹏航空”,
“云南英安航空”,
“厦门航空”,
“天津航空”,
“山东航空”,
“四川航空”,
“华夏航空”,
“长城航空”,
“成都航空有”,
“北京首都航空”,
“中华航空”,
“意大利国家航空公司”,
“印度百捷航空”,
“越南航空”,
“远东航空”,
“印度航空公司”,
“印度捷特航空有限公司”,
“以色列航空公司”,
“意大利航空”,
“伊朗航空公司”,
“印度尼西亚鹰航空公司”,
“英国航空公司”,
“西方天空航空”,
“西捷航空”,
“西班牙欧洲航空公司”,
“西班牙航空公司”,
];
let op = 1;
let jic = [
“北京首都国际机场”,
“上海虹桥国际机场”,
“上海浦东国际机场”,
“天津滨海国际机场”,
“太原武宿机场”,
“呼和浩特白塔机场”,
“沈阳桃仙国际机场”,
“大连周水子国际机场”,
“长春大房身机场”,
“哈尔滨阎家岗国际机场”,
“齐齐哈尔三家子机场”,
“佳木斯东郊机场”,
“厦门高崎国际机场”,
“福州长乐国际机场”,
“杭州萧山国际机场”,
“合肥骆岗机场”,
“宁波栎社机场”,
“南京禄口国际机场”,
“广州白云国际机场”,
“深圳宝安国际机场”,
“长沙黄花机场”,
“海口美亚机场”,
“武汉天河机场”,
“济南遥墙机场”,
“青岛流亭机场”,
“南宁吴墟机场”,
“三亚凤凰国际机场”,
“重庆江北国际机场”,
“成都双流国际机场”,
“昆明巫家坝国际机场”,
“昆明长水国际机场”,
“桂林两江国际机场”,
“西安咸阳国际机场”,
“兰州中川机场”,
“贵阳龙洞堡机场”,
“拉萨贡嘎机场”,
“乌鲁木齐地窝堡机场”,
“南昌向塘机场”,
“郑州新郑机场”,
“北京首都国际机场”,
“上海虹桥国际机场”,
“上海浦东国际机场”,
“天津滨海国际机场”,
“太原武宿机场”,
“呼和浩特白塔机场”,
“沈阳桃仙国际机场”,
“大连周水子国际机场”,
“长春大房身机场”,
“哈尔滨阎家岗国际机场”,
“齐齐哈尔三家子机场”,
“佳木斯东郊机场”,
“厦门高崎国际机场”,
“福州长乐国际机场”,
“杭州萧山国际机场”,
“合肥骆岗机场”,
“宁波栎社机场”,
“南京禄口国际机场”,
“广州白云国际机场”,
“深圳宝安国际机场”,
“长沙黄花机场”,
“海口美亚机场”,
“武汉天河机场”,
“济南遥墙机场”,
“青岛流亭机场”,
“南宁吴墟机场”,
“三亚凤凰国际机场”,
“重庆江北国际机场”,
“成都双流国际机场”,
“昆明巫家坝国际机场”,
“昆明长水国际机场”,
“桂林两江国际机场”,
“西安咸阳国际机场”,
“兰州中川机场”,
“贵阳龙洞堡机场”,
“拉萨贡嘎机场”,
“乌鲁木齐地窝堡机场”,
“南昌向塘机场”,
“郑州新郑机场”,
];
if (window.page) {
} else {
window.page = 1;
}
$.each(data, function (index, val) {
html += us_sign
.replace(“price_sole”, val.value)
.replace(“中国联合航空”, choice[op * window.page])
.replace(“大兴国际”, jic[parseInt((op * window.page) / 2) + 1])
.replace(
“宝安机场”,
jic[jic.length – parseInt((op * window.page) / 2) – 1]
);
op += 1;
});
$(“.m-airfly-lst”).text(“”).append(html);
},
complete: function () { },
error: function () {
alert(
“数据拉取失败。可能是触发了风控系统,若您是正常访问,请使用谷歌浏览器无痕模式,并且校准电脑的系统时间重新尝试”
);
alert(“生而为虫,我很抱歉,请刷新页面,查看问题是否存在”);
$(“.page-message”).eq(0).addClass(“active”);
$(“.page-message”).removeClass(“active”);
},
});
};
request();
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
加密分析:
把无关紧要的代码缩进下

为了你们方便观看,我再贴个删除了缩进代码后的代码版出来

w();
dd();
function oo0O0(mw) {
window.b = “”;
for (var i = 0, len = window.a.length; i < len; i++) {
console.log(window.a[i]);
window.b += String[document.e + document.g](
window.a[i][document.f + document.h]() – i – window.c
);
}
var U = [“W5r5W6VdIHZcT8kU”, “WQ8CWRaxWQirAW==”];
var J = function (o, E) {缩进};
eval(
atob(window[“b”])[J(“0x0”, “]dQW”)](J(“0x1”, “GTu!”), “\x27” + mw + “\x27”)
);
return “”;
}

window.url = “/api/match/1”;
request = function () {
var timestamp = Date.parse(new Date()) + 100000000;
var m = oo0O0(timestamp.toString()) + window.f;
var list = { page: window.page, m: m + “丨” + timestamp / 1000 };
$.ajax({
url: window.url,
dataType: “json”,
async: false,
data: list,
type: “GET”,
beforeSend: function (request) { },
success: function (data) {缩进},
complete: function () { },
error: function () {缩进},
});
};
request();
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
我们主要看看 m

var timestamp = Date.parse(new Date()) + 100000000;
var m = oo0O0(timestamp.toString()) + window.f;
var list = { page: window.page, m: m + “丨” + timestamp / 1000 };
1
2
3
timestamp是个时间戳;
m 是 时间戳 timestamp 通过 oo0O0函数 后加上 window.f 的值;

通过观察我们发现 函数oo0O0 返回的是个空值。
那 m 就是 window.f,四舍五入一下就是

var m = window.f;
1
验证下,回到浏览器,点击 Console 在底下蓝色箭头处输入windows.f

点击 Network,我们可以看到 window.f 和 m 是一样的

结论成立 window.f 就是 m 的一部分,但 | 后面那串数值是怎么回事呢?
我们再看看前面的代码

后面那串数值就是 时间戳 ,拼接数据的时候把 m 和 时间戳 拼在了一起;
那么 window.f 是什么?又是在哪赋的值呢?

找了下拷下来的两个方法和网页的源码,都没找到,应该是藏起来了;
我们回去看看拷下来的方法里面有没有什么看漏了的

eval(atob(window[“b”])[J(“0x0”, “]dQW”)](J(“0x1”, “GTu!”), “\x27” + mw + “\x27”));
1
eval() 函数可计算某个字符串,并执行其中的的 JavaScript 代码。
atob() 方法用于解码使用 base-64 编码的字符串。
1
2
好家伙,果然看漏了。在 oo0O0 函数中,有个 eval 很可疑,window.f 应该就是在这里被加密的,我们来看下到底是个什么玩意,
回到浏览器控制台,点击 Console 在底部蓝色箭头输入 atob(window[“b”])

好家伙直接就抛了个方法出来,看着熟悉的 md5 ,应该就是个md5 的加密算法,我们考下来美化下

var hexcase = 0;
var b64pad = “”;
var chrsz = 16;
function hex_md5(a) {
return binl2hex(core_md5(str2binl(a), a.length * chrsz));
}
function b64_md5(a) {
return binl2b64(core_md5(str2binl(a), a.length * chrsz));
}
function str_md5(a) {
return binl2str(core_md5(str2binl(a), a.length * chrsz));
}
function hex_hmac_md5(a, b) {
return binl2hex(core_hmac_md5(a, b));
}
function b64_hmac_md5(a, b) {
return binl2b64(core_hmac_md5(a, b));
}
function str_hmac_md5(a, b) {
return binl2str(core_hmac_md5(a, b));
}
function md5_vm_test() {
return hex_md5(“abc”) == “900150983cd24fb0d6963f7d28e17f72”;
}
function core_md5(p, k) {
p[k >> 5] |= 128 << k % 32;
p[(((k + 64) >>> 9) << 4) + 14] = k;
var o = 1732584193;
var n = -271733879;
var m = -1732584194;
var l = 271733878;
for (var g = 0; g < p.length; g += 16) {
var j = o;
var h = n;
var f = m;
var e = l;
o = md5_ff(o, n, m, l, p[g + 0], 7, -680976936);
l = md5_ff(l, o, n, m, p[g + 1], 12, -389564586);
m = md5_ff(m, l, o, n, p[g + 2], 17, 606105819);
n = md5_ff(n, m, l, o, p[g + 3], 22, -1044525330);
o = md5_ff(o, n, m, l, p[g + 4], 7, -176418897);
l = md5_ff(l, o, n, m, p[g + 5], 12, 1200080426);
m = md5_ff(m, l, o, n, p[g + 6], 17, -1473231341);
n = md5_ff(n, m, l, o, p[g + 7], 22, -45705983);
o = md5_ff(o, n, m, l, p[g + 8], 7, 1770035416);
l = md5_ff(l, o, n, m, p[g + 9], 12, -1958414417);
m = md5_ff(m, l, o, n, p[g + 10], 17, -42063);
n = md5_ff(n, m, l, o, p[g + 11], 22, -1990404162);
o = md5_ff(o, n, m, l, p[g + 12], 7, 1804660682);
l = md5_ff(l, o, n, m, p[g + 13], 12, -40341101);
m = md5_ff(m, l, o, n, p[g + 14], 17, -1502002290);
n = md5_ff(n, m, l, o, p[g + 15], 22, 1236535329);
o = md5_gg(o, n, m, l, p[g + 1], 5, -165796510);
l = md5_gg(l, o, n, m, p[g + 6], 9, -1069501632);
m = md5_gg(m, l, o, n, p[g + 11], 14, 643717713);
n = md5_gg(n, m, l, o, p[g + 0], 20, -373897302);
o = md5_gg(o, n, m, l, p[g + 5], 5, -701558691);
l = md5_gg(l, o, n, m, p[g + 10], 9, 38016083);
m = md5_gg(m, l, o, n, p[g + 15], 14, -660478335);
n = md5_gg(n, m, l, o, p[g + 4], 20, -405537848);
o = md5_gg(o, n, m, l, p[g + 9], 5, 568446438);
l = md5_gg(l, o, n, m, p[g + 14], 9, -1019803690);
m = md5_gg(m, l, o, n, p[g + 3], 14, -187363961);
n = md5_gg(n, m, l, o, p[g + 8], 20, 1163531501);
o = md5_gg(o, n, m, l, p[g + 13], 5, -1444681467);
l = md5_gg(l, o, n, m, p[g + 2], 9, -51403784);
m = md5_gg(m, l, o, n, p[g + 7], 14, 1735328473);
n = md5_gg(n, m, l, o, p[g + 12], 20, -1921207734);
o = md5_hh(o, n, m, l, p[g + 5], 4, -378558);
l = md5_hh(l, o, n, m, p[g + 8], 11, -2022574463);
m = md5_hh(m, l, o, n, p[g + 11], 16, 1839030562);
n = md5_hh(n, m, l, o, p[g + 14], 23, -35309556);
o = md5_hh(o, n, m, l, p[g + 1], 4, -1530992060);
l = md5_hh(l, o, n, m, p[g + 4], 11, 1272893353);
m = md5_hh(m, l, o, n, p[g + 7], 16, -155497632);
n = md5_hh(n, m, l, o, p[g + 10], 23, -1094730640);
o = md5_hh(o, n, m, l, p[g + 13], 4, 681279174);
l = md5_hh(l, o, n, m, p[g + 0], 11, -358537222);
m = md5_hh(m, l, o, n, p[g + 3], 16, -722881979);
n = md5_hh(n, m, l, o, p[g + 6], 23, 76029189);
o = md5_hh(o, n, m, l, p[g + 9], 4, -640364487);
l = md5_hh(l, o, n, m, p[g + 12], 11, -421815835);
m = md5_hh(m, l, o, n, p[g + 15], 16, 530742520);
n = md5_hh(n, m, l, o, p[g + 2], 23, -995338651);
o = md5_ii(o, n, m, l, p[g + 0], 6, -198630844);
l = md5_ii(l, o, n, m, p[g + 7], 10, 11261161415);
m = md5_ii(m, l, o, n, p[g + 14], 15, -1416354905);
n = md5_ii(n, m, l, o, p[g + 5], 21, -57434055);
o = md5_ii(o, n, m, l, p[g + 12], 6, 1700485571);
l = md5_ii(l, o, n, m, p[g + 3], 10, -1894446606);
m = md5_ii(m, l, o, n, p[g + 10], 15, -1051523);
n = md5_ii(n, m, l, o, p[g + 1], 21, -2054922799);
o = md5_ii(o, n, m, l, p[g + 8], 6, 1873313359);
l = md5_ii(l, o, n, m, p[g + 15], 10, -30611744);
m = md5_ii(m, l, o, n, p[g + 6], 15, -1560198380);
n = md5_ii(n, m, l, o, p[g + 13], 21, 1309151649);
o = md5_ii(o, n, m, l, p[g + 4], 6, -145523070);
l = md5_ii(l, o, n, m, p[g + 11], 10, -1120210379);
m = md5_ii(m, l, o, n, p[g + 2], 15, 718787259);
n = md5_ii(n, m, l, o, p[g + 9], 21, -343485551);
o = safe_add(o, j);
n = safe_add(n, h);
m = safe_add(m, f);
l = safe_add(l, e);
}
return Array(o, n, m, l);
}
function md5_cmn(h, e, d, c, g, f) {
return safe_add(bit_rol(safe_add(safe_add(e, h), safe_add(c, f)), g), d);
}
function md5_ff(g, f, k, j, e, i, h) {
return md5_cmn((f & k) | (~f & j), g, f, e, i, h);
}
function md5_gg(g, f, k, j, e, i, h) {
return md5_cmn((f & j) | (k & ~j), g, f, e, i, h);
}
function md5_hh(g, f, k, j, e, i, h) {
return md5_cmn(f ^ k ^ j, g, f, e, i, h);
}
function md5_ii(g, f, k, j, e, i, h) {
return md5_cmn(k ^ (f | ~j), g, f, e, i, h);
}
function core_hmac_md5(c, f) {
var e = str2binl(c);
if (e.length > 16) {
e = core_md5(e, c.length * chrsz);
}
var a = Array(16),
d = Array(16);
for (var b = 0; b < 16; b++) {
a[b] = e[b] ^ 909522486;
d[b] = e[b] ^ 1549556828;
}
var g = core_md5(a.concat(str2binl(f)), 512 + f.length * chrsz);
return core_md5(d.concat(g), 512 + 128);
}
function safe_add(a, d) {
var c = (a & 65535) + (d & 65535);
var b = (a >> 16) + (d >> 16) + (c >> 16);
return (b << 16) | (c & 65535);
}
function bit_rol(a, b) {
return (a << b) | (a >>> (32 – b));
}
function str2binl(d) {
var c = Array();
var a = (1 << chrsz) – 1;
for (var b = 0; b < d.length * chrsz; b += chrsz) {
c[b >> 5] |= (d.charCodeAt(b / chrsz) & a) << b % 32;
}
return c;
}
function binl2str(c) {
var d = “”;
var a = (1 << chrsz) – 1;
for (var b = 0; b < c.length * 32; b += chrsz) {
d += String.fromCharCode((c[b >> 5] >>> b % 32) & a);
}
return d;
}
function binl2hex(c) {
var b = hexcase ? “0123456789ABCDEF” : “0123456789abcdef”;
var d = “”;
for (var a = 0; a < c.length * 4; a++) {
d +=
b.charAt((c[a >> 2] >> ((a % 4) * 8 + 4)) & 15) +
b.charAt((c[a >> 2] >> ((a % 4) * 8)) & 15);
}
return d;
}
function binl2b64(d) {
var c = “ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 + /”;
var f = “”;
for (var b = 0; b < d.length * 4; b += 3) {
var e =
(((d[b >> 2] >> (8 * (b % 4))) & 255) << 16) |
(((d[(b + 1) >> 2] >> (8 * ((b + 1) % 4))) & 255) << 8) |
((d[(b + 2) >> 2] >> (8 * ((b + 2) % 4))) & 255);
for (var a = 0; a < 4; a++) {
if (b * 8 + a * 6 > d.length * 32) {
f += b64pad;
} else {
f += c.charAt((e >> (6 * (3 – a))) & 63);
}
}
}
return f;
}
window.f = hex_md5(mwqqppz);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
没错了,就是 window.f 的加密算法,我们再来看看其他两个是啥,在控制台输入J(“0x0”, “]dQW”)

提示我们 J 没有定义,我们回去 oo0O0 把 U 和 J 拷到控制台运行下,再输入一次。

var U = [“W5r5W6VdIHZcT8kU”, “WQ8CWRaxWQirAW==”];
var J = function (o, E) {
o = o – 0x0;
var N = U[o];
if (J[“bSSGte”] === undefined) {
var Y = function (w) {
var m =
“abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=”,
T = String(w)[“replace”](/=+$/, “”);
var A = “”;
for (
var C = 0x0, b, W, l = 0x0;
(W = T[“charAt”](l++));
~W && ((b = C % 0x4 ? b * 0x40 + W : W), C++ % 0x4)
? (A += String[“fromCharCode”](0xff & (b >> ((-0x2 * C) & 0x6))))
: 0x0
) {
W = m[“indexOf”](W);
}
return A;
};
var t = function (w, m) {
var T = [],
A = 0x0,
C,
b = “”,
W = “”;
w = Y(w);
for (var R = 0x0, v = w[“length”]; R < v; R++) {
W +=
“%” + (“00” + w[“charCodeAt”](R)[“toString”](0x10))[“slice”](-0x2);
}
w = decodeURIComponent(W);
var l;
for (l = 0x0; l < 0x100; l++) {
T[l] = l;
}
for (l = 0x0; l < 0x100; l++) {
(A = (A + T[l] + m[“charCodeAt”](l % m[“length”])) % 0x100),
(C = T[l]),
(T[l] = T[A]),
(T[A] = C);
}
(l = 0x0), (A = 0x0);
for (var L = 0x0; L < w[“length”]; L++) {
(l = (l + 0x1) % 0x100),
(A = (A + T[l]) % 0x100),
(C = T[l]),
(T[l] = T[A]),
(T[A] = C),
(b += String[“fromCharCode”](
w[“charCodeAt”](L) ^ T[(T[l] + T[A]) % 0x100]
));
}
return b;
};
(J[“luAabU”] = t), (J[“qlVPZg”] = {}), (J[“bSSGte”] = !![]);
}
var H = J[“qlVPZg”][o];
return (
H === undefined
? (J[“TUDBIJ”] === undefined && (J[“TUDBIJ”] = !![]),
(N = J[“luAabU”](N, E)),
(J[“qlVPZg”][o] = N))
: (N = H),
N
);
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

把 eval 简化,四舍五入下就是

eval(atob(window[“b”]).replace( “mwqqppz”,mw ));
1
诶,这样看就舒服多了,把加密算法里的 “mwqqppz” 替换成 传进来的 mw;所以被加密的其实就是当前的时间时间戳;

爬取数据:
思路理清了,那我们就来写脚本爬数据吧!
这里要用到 python 和 execjs , 请提前安装

新建个后缀为 .py 的文本

我们知道 m 其实就是由加密和未加密的两个时间戳拼接而成,因为加密方法是不变的,那我们先把加密算法复制进去,把加密方法中的window.f = hex_md5(mwqqppz);替换为新方法function getM() {}
为了让大家能够有一个更加直观的感受,我就直接在 js 里进行时间戳的生成以及 m 的拼接;

function getM() {
// 生成时间戳
var timestamp = Date.parse(new Date()) + 100000000;
//调用加密算法加密时间戳
var m = hex_md5(timestamp.toString());
//拼接两个时间戳
var list = m + “丨” + timestamp / 1000 ;
//抛出
return list;
}
1
2
3
4
5
6
7
8
9
10
拿到 m , 数据是可以拿到了,但不要忘了我们这题的题目是啥

抓取所有(5页)机票的价格,并计算所有机票价格的 平均值 ,填入答案。
1
# 加密后的 m
m = execjs.compile(js).call(‘getM’)
# 价格
price = 0
# 总数
Sum = 0
# 外循环,遍历页数,发送请求
for i in range(1, 6):
data = {
‘page’: i,
‘m’: execjs.compile(js).call(‘getM’)
}
# 拼接请求参数
queryString = urllib.parse.urlencode(data)
# 发送请求,接收返回值
response = requests.get(‘http://match.yuanrenxue.com/api/match/1?’ + queryString,headers={‘User-Agent’: ‘yuanrenxue.project’}).json()
print(‘http://match.yuanrenxue.com/api/match/1?’ + queryString);
print(response)
# 里循环,遍历返回值价格,计算总数
for value in response[‘data’]:
print(value)
price = price + value[‘value’]
Sum = Sum + 1
AVG = price / Sum
print(‘AVG’, AVG)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
脚本的运行结果

完整的脚本我放在下面了

import requests
import time
import urllib.parse
import execjs
# js加密算法
js = “””
var hexcase = 0;
var b64pad = “”;
var chrsz = 16;
function hex_md5(a) {
return binl2hex(core_md5(str2binl(a), a.length * chrsz));
}
function b64_md5(a) {
return binl2b64(core_md5(str2binl(a), a.length * chrsz));
}
function str_md5(a) {
return binl2str(core_md5(str2binl(a), a.length * chrsz));
}
function hex_hmac_md5(a, b) {
return binl2hex(core_hmac_md5(a, b));
}
function b64_hmac_md5(a, b) {
return binl2b64(core_hmac_md5(a, b));
}
function str_hmac_md5(a, b) {
return binl2str(core_hmac_md5(a, b));
}
function md5_vm_test() {
return hex_md5(“abc”) == “900150983cd24fb0d6963f7d28e17f72”;
}
function core_md5(p, k) {
p[k >> 5] |= 128 << k % 32;
p[(((k + 64) >>> 9) << 4) + 14] = k;
var o = 1732584193;
var n = -271733879;
var m = -1732584194;
var l = 271733878;
for (var g = 0; g < p.length; g += 16) {
var j = o;
var h = n;
var f = m;
var e = l;
o = md5_ff(o, n, m, l, p[g + 0], 7, -680976936);
l = md5_ff(l, o, n, m, p[g + 1], 12, -389564586);
m = md5_ff(m, l, o, n, p[g + 2], 17, 606105819);
n = md5_ff(n, m, l, o, p[g + 3], 22, -1044525330);
o = md5_ff(o, n, m, l, p[g + 4], 7, -176418897);
l = md5_ff(l, o, n, m, p[g + 5], 12, 1200080426);
m = md5_ff(m, l, o, n, p[g + 6], 17, -1473231341);
n = md5_ff(n, m, l, o, p[g + 7], 22, -45705983);
o = md5_ff(o, n, m, l, p[g + 8], 7, 1770035416);
l = md5_ff(l, o, n, m, p[g + 9], 12, -1958414417);
m = md5_ff(m, l, o, n, p[g + 10], 17, -42063);
n = md5_ff(n, m, l, o, p[g + 11], 22, -1990404162);
o = md5_ff(o, n, m, l, p[g + 12], 7, 1804660682);
l = md5_ff(l, o, n, m, p[g + 13], 12, -40341101);
m = md5_ff(m, l, o, n, p[g + 14], 17, -1502002290);
n = md5_ff(n, m, l, o, p[g + 15], 22, 1236535329);
o = md5_gg(o, n, m, l, p[g + 1], 5, -165796510);
l = md5_gg(l, o, n, m, p[g + 6], 9, -1069501632);
m = md5_gg(m, l, o, n, p[g + 11], 14, 643717713);
n = md5_gg(n, m, l, o, p[g + 0], 20, -373897302);
o = md5_gg(o, n, m, l, p[g + 5], 5, -701558691);
l = md5_gg(l, o, n, m, p[g + 10], 9, 38016083);
m = md5_gg(m, l, o, n, p[g + 15], 14, -660478335);
n = md5_gg(n, m, l, o, p[g + 4], 20, -405537848);
o = md5_gg(o, n, m, l, p[g + 9], 5, 568446438);
l = md5_gg(l, o, n, m, p[g + 14], 9, -1019803690);
m = md5_gg(m, l, o, n, p[g + 3], 14, -187363961);
n = md5_gg(n, m, l, o, p[g + 8], 20, 1163531501);
o = md5_gg(o, n, m, l, p[g + 13], 5, -1444681467);
l = md5_gg(l, o, n, m, p[g + 2], 9, -51403784);
m = md5_gg(m, l, o, n, p[g + 7], 14, 1735328473);
n = md5_gg(n, m, l, o, p[g + 12], 20, -1921207734);
o = md5_hh(o, n, m, l, p[g + 5], 4, -378558);
l = md5_hh(l, o, n, m, p[g + 8], 11, -2022574463);
m = md5_hh(m, l, o, n, p[g + 11], 16, 1839030562);
n = md5_hh(n, m, l, o, p[g + 14], 23, -35309556);
o = md5_hh(o, n, m, l, p[g + 1], 4, -1530992060);
l = md5_hh(l, o, n, m, p[g + 4], 11, 1272893353);
m = md5_hh(m, l, o, n, p[g + 7], 16, -155497632);
n = md5_hh(n, m, l, o, p[g + 10], 23, -1094730640);
o = md5_hh(o, n, m, l, p[g + 13], 4, 681279174);
l = md5_hh(l, o, n, m, p[g + 0], 11, -358537222);
m = md5_hh(m, l, o, n, p[g + 3], 16, -722881979);
n = md5_hh(n, m, l, o, p[g + 6], 23, 76029189);
o = md5_hh(o, n, m, l, p[g + 9], 4, -640364487);
l = md5_hh(l, o, n, m, p[g + 12], 11, -421815835);
m = md5_hh(m, l, o, n, p[g + 15], 16, 530742520);
n = md5_hh(n, m, l, o, p[g + 2], 23, -995338651);
o = md5_ii(o, n, m, l, p[g + 0], 6, -198630844);
l = md5_ii(l, o, n, m, p[g + 7], 10, 11261161415);
m = md5_ii(m, l, o, n, p[g + 14], 15, -1416354905);
n = md5_ii(n, m, l, o, p[g + 5], 21, -57434055);
o = md5_ii(o, n, m, l, p[g + 12], 6, 1700485571);
l = md5_ii(l, o, n, m, p[g + 3], 10, -1894446606);
m = md5_ii(m, l, o, n, p[g + 10], 15, -1051523);
n = md5_ii(n, m, l, o, p[g + 1], 21, -2054922799);
o = md5_ii(o, n, m, l, p[g + 8], 6, 1873313359);
l = md5_ii(l, o, n, m, p[g + 15], 10, -30611744);
m = md5_ii(m, l, o, n, p[g + 6], 15, -1560198380);
n = md5_ii(n, m, l, o, p[g + 13], 21, 1309151649);
o = md5_ii(o, n, m, l, p[g + 4], 6, -145523070);
l = md5_ii(l, o, n, m, p[g + 11], 10, -1120210379);
m = md5_ii(m, l, o, n, p[g + 2], 15, 718787259);
n = md5_ii(n, m, l, o, p[g + 9], 21, -343485551);
o = safe_add(o, j);
n = safe_add(n, h);
m = safe_add(m, f);
l = safe_add(l, e);
}
return Array(o, n, m, l);
}
function md5_cmn(h, e, d, c, g, f) {
return safe_add(bit_rol(safe_add(safe_add(e, h), safe_add(c, f)), g), d);
}
function md5_ff(g, f, k, j, e, i, h) {
return md5_cmn((f & k) | (~f & j), g, f, e, i, h);
}
function md5_gg(g, f, k, j, e, i, h) {
return md5_cmn((f & j) | (k & ~j), g, f, e, i, h);
}
function md5_hh(g, f, k, j, e, i, h) {
return md5_cmn(f ^ k ^ j, g, f, e, i, h);
}
function md5_ii(g, f, k, j, e, i, h) {
return md5_cmn(k ^ (f | ~j), g, f, e, i, h);
}
function core_hmac_md5(c, f) {
var e = str2binl(c);
if (e.length > 16) {
e = core_md5(e, c.length * chrsz);
}
var a = Array(16),
d = Array(16);
for (var b = 0; b < 16; b++) {
a[b] = e[b] ^ 909522486;
d[b] = e[b] ^ 1549556828;
}
var g = core_md5(a.concat(str2binl(f)), 512 + f.length * chrsz);
return core_md5(d.concat(g), 512 + 128);
}
function safe_add(a, d) {
var c = (a & 65535) + (d & 65535);
var b = (a >> 16) + (d >> 16) + (c >> 16);
return (b << 16) | (c & 65535);
}
function bit_rol(a, b) {
return (a << b) | (a >>> (32 – b));
}
function str2binl(d) {
var c = Array();
var a = (1 << chrsz) – 1;
for (var b = 0; b < d.length * chrsz; b += chrsz) {
c[b >> 5] |= (d.charCodeAt(b / chrsz) & a) << b % 32;
}
return c;
}
function binl2str(c) {
var d = “”;
var a = (1 << chrsz) – 1;
for (var b = 0; b < c.length * 32; b += chrsz) {
d += String.fromCharCode((c[b >> 5] >>> b % 32) & a);
}
return d;
}
function binl2hex(c) {
var b = hexcase ? “0123456789ABCDEF” : “0123456789abcdef”;
var d = “”;
for (var a = 0; a < c.length * 4; a++) {
d +=
b.charAt((c[a >> 2] >> ((a % 4) * 8 + 4)) & 15) +
b.charAt((c[a >> 2] >> ((a % 4) * 8)) & 15);
}
return d;
}
function binl2b64(d) {
var c = “ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 + /”;
var f = “”;
for (var b = 0; b < d.length * 4; b += 3) {
var e =
(((d[b >> 2] >> (8 * (b % 4))) & 255) << 16) |
(((d[(b + 1) >> 2] >> (8 * ((b + 1) % 4))) & 255) << 8) |
((d[(b + 2) >> 2] >> (8 * ((b + 2) % 4))) & 255);
for (var a = 0; a < 4; a++) {
if (b * 8 + a * 6 > d.length * 32) {
f += b64pad;
} else {
f += c.charAt((e >> (6 * (3 – a))) & 63);
}
}
}
return f;
}

function getM() {
var timestamp = Date.parse(new Date()) + 100000000;
var m = hex_md5(timestamp.toString());
var list = m + “丨” + timestamp / 1000 ;
return list;

}
“””

# 加密后的 m
m = execjs.compile(js).call(‘getM’)
# 价格
price = 0
# 总数
Sum = 0
# 外循环,遍历页数,发送请求
for i in range(1, 6):
data = {
‘page’: i,
‘m’: execjs.compile(js).call(‘getM’)
}
# 拼接请求参数
queryString = urllib.parse.urlencode(data)
# 发送请求,接收返回值
response = requests.get(‘http://match.yuanrenxue.com/api/match/1?’ + queryString,headers={‘User-Agent’: ‘yuanrenxue.project’}).json()
print(‘http://match.yuanrenxue.com/api/match/1?’ + queryString);
print(response)
# 里循环,遍历返回值价格,计算总数
for value in response[‘data’]:
print(value)
price = price + value[‘value’]
Sum = Sum + 1
AVG = price / Sum
print(‘AVG’, AVG)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229

————————————————
版权声明:本文为CSDN博主「临安啊」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/qq_45899097/article/details/113175068