python3爬虫-知乎登陆
2019-04-28 08:24:34来源:博客园 阅读 ()
py文件:
from fake_useragent import UserAgent import requests from http import cookiejar import base64 from PIL import Image import time, json import hashlib, hmac import execjs from urllib import parse ua = UserAgent() class MyException(Exception): def __init__(self, status, msg): self.status = status self.msg = msg class ZhiHu: def __init__(self, username=None, password=None): self.username = username self.password = password self.session = requests.Session() self.session.headers = { "user-agent": ua.random, "referer": "https://www.zhihu.com/", 'host': 'www.zhihu.com', } self.session.cookies = cookiejar.LWPCookieJar(filename="./cookies.txt") self.login_param = { "client_id": "c3cef7c66a1843f8b3a9e6a1e3160e20", "grant_type": "password", "source": "com.zhihu.web", "username": "", "password": "", "ref_source": "homepage", "utm_source": "baidu", } def load_cookies(self): '''加载cookies,保存在session中''' try: self.session.cookies.load(ignore_discard=True, ignore_expires=True) return True except FileNotFoundError: return False def login(self, captcha_lang: str = "en", is_load_cookies: bool = True): ''' 这里进行登陆操作 :param lang: 使用怎样的登陆验证,en表示验证码,zh表示点击倒立汉字 :param is_load_cookies: 是否使用保存的cookies进行登陆 :return: ''' if self.load_cookies() and is_load_cookies: # 进行登陆操作 print("读取cookies文件") if self.check__login(): print("登陆成功") return print("cookies已经失效") # 走到这里说明是没有登陆的,在这里进行登陆操作 # 检测用户名和密码已经输入了 self.check_user_input() # 获取到xsrf的值,并且设置请求头 headers = self.session.headers.copy() xsrf = self.get_xsrf() headers.update({ "content-type": "application/x-www-form-urlencoded", "x-xsrftoken": xsrf, "x-zse-83": "3_1.1", }) self.login_param.update({ "username": self.username, "password": self.password, "lang": captcha_lang }) # 进行formdata的创建 timestamp = int(time.time() * 1000) self.login_param.update({ "timestamp": timestamp, "captcha": self.get_captcha() or "", "signature": self.get_signature(timestamp) }) formdata = self.__encrypt(self.login_param) url = "https://www.zhihu.com/api/v3/oauth/sign_in" # 进行登陆操作 self.session.post(url=url, headers=headers, data=formdata) if self.check__login(): self.session.cookies.save() print("cookies以写入文件") print("登录成功") return True print("登录失败") def check__login(self): '''判断是否已经是登陆状态''' url = "https://www.zhihu.com/" response = self.session.get(url=url, allow_redirects=False) if response.status_code == 302: return False elif response.status_code == 200: return True def check_user_input(self): if not self.username: self.username = input("请输入手机号>>:").strip() if self.username.isdigit() and not self.username.startswith("+86"): self.username = "+86" + self.username if not self.password: self.password = input("请输入密码>>:").strip() def get_captcha(self): '''获取到验证码,这里至少请求一次,请求的方法的顺序get,put,post''' lang = self.login_param.get("lang") if lang == "en": captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=en" else: captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=cn" response = self.session.get(captcha_api) is_use_verify = response.json().get("show_captcha", False) if is_use_verify: # 使用验证,请求方式顺序为put,post # 先获取验证图片的base64 response = self.session.put(captcha_api) base64_img = response.json()['img_base64'].replace(r'\n', '') with open("./captcha.png", "wb") as f: f.write(base64.b64decode(base64_img)) img = Image.open("./captcha.png") if lang == "en": img.show() code = input("请输入图片中的验证码>>:").strip() else: import matplotlib.pyplot as plt plt.imshow(img) print('点击所有倒立的汉字,在命令行中按回车提交') points = plt.ginput(7) code = json.dumps({'img_size': [200, 44], 'input_points': [[i[0] / 2, i[1] / 2] for i in points]}) self.session.post(captcha_api, data={"input_text": code}, headers={"user-agent": ua.random, }) return code def get_no_captch(self): '''调用这个方法,可以实现不需要验证码就可以登录''' lang = self.login_param.get("lang") if lang == "en": captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=en" else: captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=cn" while True: print("正在请求验证码....") time.sleep(0.5) response = self.session.get(captcha_api) is_use_verify = str(response.json().get("show_captcha")) if is_use_verify == 'false': return "" print("继续...") def get_signature(self, timestamp): '''获取signature的值''' ha = hmac.new(key=b"d1b964811afb40118a12068ff74a12f4", digestmod=hashlib.sha1) client_id = self.login_param.get("client_id") grant_type = self.login_param.get("grant_type") source = self.login_param.get("source") ha.update(bytes(grant_type + client_id + source + str(timestamp), encoding="utf-8")) return ha.hexdigest() def get_xsrf(self): url = "https://www.zhihu.com/signin" response = self.session.get(url=url, headers=self.session.headers, allow_redirects=False) _xsrf = response.cookies.get("_xsrf") return _xsrf def __encrypt(self, data: dict): data = parse.urlencode(data) with open("./01.js", "r") as f: js_code = f.read() ctx = execjs.compile(js_code) res = ctx.call("Q", data) return res if __name__ == '__main__': zhihu = ZhiHu() zhihu.login()
js文件:
window = { "encodeURIComponent": encodeURIComponent } navigator = { "userAgent": "5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36" } function s(e) { return (s = "function" == typeof Symbol && "symbol" == typeof Symbol.t ? function (e) { return typeof e } : function (e) { return e && "function" == typeof Symbol && e.constructor === Symbol && e !== Symbol.prototype ? "symbol" : typeof e } )(e) } var t = "1.1" , __g = {}; function i() { } function h(e) { this.s = (2048 & e) >> 11, this.i = (1536 & e) >> 9, this.h = 511 & e, this.A = 511 & e } function A(e) { this.i = (3072 & e) >> 10, this.A = 1023 & e } function n(e) { this.n = (3072 & e) >> 10, this.e = (768 & e) >> 8, this.a = (192 & e) >> 6, this.s = 63 & e } function e(e) { this.i = e >> 10 & 3, this.h = 1023 & e } function a() { } function c(e) { this.n = (3072 & e) >> 10, this.e = (768 & e) >> 8, this.a = (192 & e) >> 6, this.s = 63 & e } function o(e) { this.A = (4095 & e) >> 2, this.s = 3 & e } function r(e) { this.i = e >> 10 & 3, this.h = e >> 2 & 255, this.s = 3 & e } function k(e) { this.s = (4095 & e) >> 10, this.i = (1023 & e) >> 8, this.h = 1023 & e, this.A = 63 & e } function B(e) { this.s = (4095 & e) >> 10, this.n = (1023 & e) >> 8, this.e = (255 & e) >> 6 } function f(e) { this.i = (3072 & e) >> 10, this.A = 1023 & e } function u(e) { this.A = 4095 & e } function C(e) { this.i = (3072 & e) >> 10 } function b(e) { this.A = 4095 & e } function g(e) { this.s = (3840 & e) >> 8, this.i = (192 & e) >> 6, this.h = 63 & e } function G() { this.c = [0, 0, 0, 0], this.o = 0, this.r = [], this.k = [], this.B = [], this.f = [], this.u = [], this.C = !1, this.b = [], this.g = [], this.G = !1, this.Q = null, this.R = null, this.w = [], this.x = 0, this.D = { 0: i, 1: h, 2: A, 3: n, 4: e, 5: a, 6: c, 7: o, 8: r, 9: k, 10: B, 11: f, 12: u, 13: C, 14: b, 15: g } } i.prototype.M = function (e) { e.G = !1 } , h.prototype.M = function (e) { switch (this.s) { case 0: e.c[this.i] = this.h; break; case 1: e.c[this.i] = e.k[this.A] } } , A.prototype.M = function (e) { e.k[this.A] = e.c[this.i] } , n.prototype.M = function (e) { switch (this.s) { case 0: e.c[this.n] = e.c[this.e] + e.c[this.a]; break; case 1: e.c[this.n] = e.c[this.e] - e.c[this.a]; break; case 2: e.c[this.n] = e.c[this.e] * e.c[this.a]; break; case 3: e.c[this.n] = e.c[this.e] / e.c[this.a]; break; case 4: e.c[this.n] = e.c[this.e] % e.c[this.a]; break; case 5: e.c[this.n] = e.c[this.e] == e.c[this.a]; break; case 6: e.c[this.n] = e.c[this.e] >= e.c[this.a]; break; case 7: e.c[this.n] = e.c[this.e] || e.c[this.a]; break; case 8: e.c[this.n] = e.c[this.e] && e.c[this.a]; break; case 9: e.c[this.n] = e.c[this.e] !== e.c[this.a]; break; case 10: e.c[this.n] = s(e.c[this.e]); break; case 11: e.c[this.n] = e.c[this.e] in e.c[this.a]; break; case 12: e.c[this.n] = e.c[this.e] > e.c[this.a]; break; case 13: e.c[this.n] = -e.c[this.e]; break; case 14: e.c[this.n] = e.c[this.e] < e.c[this.a]; break; case 15: e.c[this.n] = e.c[this.e] & e.c[this.a]; break; case 16: e.c[this.n] = e.c[this.e] ^ e.c[this.a]; break; case 17: e.c[this.n] = e.c[this.e] << e.c[this.a]; break; case 18: e.c[this.n] = e.c[this.e] >>> e.c[this.a]; break; case 19: e.c[this.n] = e.c[this.e] | e.c[this.a] } } , e.prototype.M = function (e) { e.r.push(e.o), e.B.push(e.k), e.o = e.c[this.i], e.k = []; for (var t = 0; t < this.h; t++) e.k.unshift(e.f.pop()); e.u.push(e.f), e.f = [] } , a.prototype.M = function (e) { e.o = e.r.pop(), e.k = e.B.pop(), e.f = e.u.pop() } , c.prototype.M = function (e) { switch (this.s) { case 0: e.C = e.c[this.n] >= e.c[this.e]; break; case 1: e.C = e.c[this.n] <= e.c[this.e]; break; case 2: e.C = e.c[this.n] > e.c[this.e]; break; case 3: e.C = e.c[this.n] < e.c[this.e]; break; case 4: e.C = e.c[this.n] == e.c[this.e]; break; case 5: e.C = e.c[this.n] != e.c[this.e]; break; case 6: e.C = e.c[this.n]; break; case 7: e.C = !e.c[this.n] } } , o.prototype.M = function (e) { switch (this.s) { case 0: e.o = this.A; break; case 1: e.C && (e.o = this.A); break; case 2: e.C || (e.o = this.A); break; case 3: e.o = this.A, e.Q = null } e.C = !1 } , r.prototype.M = function (e) { switch (this.s) { case 0: for (var t = [], n = 0; n < this.h; n++) t.unshift(e.f.pop()); e.c[3] = e.c[this.i](t[0], t[1]); break; case 1: for (var r = e.f.pop(), o = [], i = 0; i < this.h; i++) o.unshift(e.f.pop()); e.c[3] = e.c[this.i][r](o[0], o[1]); break; case 2: for (var a = [], c = 0; c < this.h; c++) a.unshift(e.f.pop()); e.c[3] = new e.c[this.i](a[0], a[1]) } } , k.prototype.M = function (e) { switch (this.s) { case 0: e.f.push(e.c[this.i]); break; case 1: e.f.push(this.h); break; case 2: e.f.push(e.k[this.A]); break; case 3: e.f.push(e.g[this.A]) } } , B.prototype.M = function (t) { switch (this.s) { case 0: var s = t.f.pop(); t.c[this.n] = t.c[this.e][s]; break; case 1: var i = t.f.pop() , h = t.f.pop(); t.c[this.e][i] = h; break; case 2: var A = t.f.pop(); t.c[this.n] = eval(A) } } , f.prototype.M = function (e) { e.c[this.i] = e.g[this.A] } , u.prototype.M = function (e) { e.Q = this.A } , C.prototype.M = function (e) { throw e.c[this.i] } , b.prototype.M = function (e) { var t = this , n = [0]; e.k.forEach(function (e) { n.push(e) }); var r = function (r) { var o = new G; return o.k = n, o.k[0] = r, o.J(e.b, t.A, e.g, e.w), o.c[3] }; r.toString = function () { return "() { [native code] }" } , e.c[3] = r } , g.prototype.M = function (e) { switch (this.s) { case 0: for (var t = {}, n = 0; n < this.h; n++) { var r = e.f.pop(); t[e.f.pop()] = r } e.c[this.i] = t; break; case 1: for (var o = [], i = 0; i < this.h; i++) o.unshift(e.f.pop()); e.c[this.i] = o } } , G.prototype.v = function (e) { for (var t = new Buffer(e, "base64").toString("binary"), n = [], r = 0; r < t.length - 1; r += 2) n.push(t.charCodeAt(r) << 8 | t.charCodeAt(r + 1)); this.b = n } , G.prototype.y = function (e) { for (var t = new Buffer(e, "base64").toString("binary"), n = 66, r = [], o = 0; o < t.length; o++) { var i = 24 ^ t.charCodeAt(o) ^ n; r.push(String.fromCharCode(i)), n = i } return r.join("") } , G.prototype.F = function (e) { var t = this; this.g = e.map(function (e) { return "string" == typeof e ? t.y(e) : e }) } , G.prototype.J = function (e, t, n) { for (t = t || 0, n = n || [], this.o = t, "string" == typeof e ? (this.F(n), this.v(e)) : (this.b = e, this.g = n), this.G = !0, this.x = Date.now(); this.G;) { var r = this.b[this.o++]; if ("number" != typeof r) break; var o = Date.now(); if (500 < o - this.x) return; this.x = o; try { this.M(r) } catch (e) { if (this.R = e, !this.Q) throw "execption at " + this.o + ": " + e; this.o = this.Q } } } , G.prototype.M = function (e) { var t = (61440 & e) >> 12; new this.D[t](e).M(this) } , 1 && (new G).J("4AeTAJwAqACcAaQAAAAYAJAAnAKoAJwDgAWTACwAnAKoACACGAESOTRHkQAkAbAEIAMYAJwFoAASAzREJAQYBBIBNEVkBnCiGAC0BjRAJAAYBBICNEVkBnDGGAC0BzRAJACwCJAAnAmoAJwKoACcC4ABnAyMBRAAMwZgBnESsA0aADRAkQAkABgCnA6gABoCnA+hQDRHGAKcEKAAMQdgBnFasBEaADRAkQAkABgCnBKgABoCnBOhQDRHZAZxkrAUGgA0QJEAJAAYApwVoABgBnG6sBYaADRAkQAkABgCnBegAGAGceKwGBoANECRACQAnAmoAJwZoABgBnIOsBoaADRAkQAkABgCnBugABoCnByhQDRHZAZyRrAdGgA0QJEAJAAQACAFsB4gBhgAnAWgABIBNEEkBxgHEgA0RmQGdJoQCBoFFAE5gCgFFAQ5hDSCJAgYB5AAGACcH4AFGAEaCDRSEP8xDzMQIAkQCBoFFAE5gCgFFAQ5hDSCkQAkCBgBGgg0UhD/MQ+QACAIGAkaBxQBOYGSABoAnB+EBRoIN1AUCDmRNJMkCRAIGgUUATmAKAUUBDmENIKRACQIGAEaCDRSEP8xD5AAIAgYCRoHFAI5gZIAGgCcH4QFGgg3UBQQOZE0kyQJGAMaCRQ/OY+SABoGnCCEBTTAJAMYAxoJFAY5khI/Nk+RABoGnCCEBTTAJAMYAxoJFAw5khI/Nk+RABoGnCCEBTTAJAMYAxoJFBI5khI/Nk+RABoGnCCEBTTAJAMYBxIDNEEkB3JsHgNQAA==", 0, ["BRgg", "BSITFQkTERw=", "LQYfEhMA", "PxMVFBMZKB8DEjQaBQcZExMC", "", "NhETEQsE", "Whg=", "Wg==", "MhUcHRARDhg=", "NBcPBxYeDQMF", "Lx4ODys+GhMC", "LgM7OwAKDyk6Cg4=", "Mx8SGQUvMQ==", "SA==", "ORoVGCQgERcCAxo=", "BTcAERcCAxo=", "BRg3ABEXAgMaFAo=", "SQ==", "OA8LGBsP", "GC8LGBsP", "Tg==", "PxAcBQ==", "Tw==", "KRsJDgE=", "TA==", "LQofHg4DBwsP", "TQ==", "PhMaNCwZAxoUDQUeGQ==", "PhMaNCwZAxoUDQUeGTU0GQIeBRsYEQ8=", "Qg==", "BWpUGxkfGRsZFxkbGR8ZGxkHGRsZHxkbGRcZG1MbGR8ZGxkXGRFpGxkfGRsZFxkbGR8ZGxkHGRsZHxkbGRcZGw==", "ORMRCyk0Exk8LQ==", "ORMRCyst"]); var Q = function (e) { return __g._encrypt(e) };
参考的是这位博主的博客:https://home.cnblogs.com/u/zkqiang
原文链接:https://www.cnblogs.com/zhuchunyu/p/10782248.html
如有疑问请与原作者联系
标签:
版权申明:本站文章部分自网络,如有侵权,请联系:west999com@outlook.com
特别注意:本站所有转载文章言论不代表本站观点,本站所提供的摄影照片,插画,设计作品,如需使用,请与原作者联系,版权归原作者所有
- python3基础之“术语表(2)” 2019-08-13
- python3 之 字符串编码小结(Unicode、utf-8、gbk、gb2312等 2019-08-13
- Python3安装impala 2019-08-13
- 小白如何入门 Python 爬虫? 2019-08-13
- python day2-爬虫实现github登录 2019-08-13
IDC资讯: 主机资讯 注册资讯 托管资讯 vps资讯 网站建设
网站运营: 建站经验 策划盈利 搜索优化 网站推广 免费资源
网络编程: Asp.Net编程 Asp编程 Php编程 Xml编程 Access Mssql Mysql 其它
服务器技术: Web服务器 Ftp服务器 Mail服务器 Dns服务器 安全防护
软件技巧: 其它软件 Word Excel Powerpoint Ghost Vista QQ空间 QQ FlashGet 迅雷
网页制作: FrontPages Dreamweaver Javascript css photoshop fireworks Flash