python3爬虫-知乎登陆

2019-04-28 08:24:34来源:博客园 阅读 ()

新老客户大回馈,云服务器低至5折

py文件:

from fake_useragent import UserAgent
import requests
from http import cookiejar
import base64
from PIL import Image
import time, json
import hashlib, hmac
import execjs
from urllib import parse

ua = UserAgent()


class MyException(Exception):
    def __init__(self, status, msg):
        self.status = status
        self.msg = msg


class ZhiHu:

    def __init__(self, username=None, password=None):
        self.username = username
        self.password = password
        self.session = requests.Session()
        self.session.headers = {
            "user-agent": ua.random,
            "referer": "https://www.zhihu.com/",
            'host': 'www.zhihu.com',
        }

        self.session.cookies = cookiejar.LWPCookieJar(filename="./cookies.txt")

        self.login_param = {
            "client_id": "c3cef7c66a1843f8b3a9e6a1e3160e20",
            "grant_type": "password",
            "source": "com.zhihu.web",
            "username": "",
            "password": "",
            "ref_source": "homepage",
            "utm_source": "baidu",

        }

    def load_cookies(self):
        '''加载cookies,保存在session中'''
        try:
            self.session.cookies.load(ignore_discard=True, ignore_expires=True)
            return True
        except FileNotFoundError:
            return False

    def login(self, captcha_lang: str = "en", is_load_cookies: bool = True):
        '''
        这里进行登陆操作
        :param lang:  使用怎样的登陆验证,en表示验证码,zh表示点击倒立汉字
        :param is_load_cookies:  是否使用保存的cookies进行登陆
        :return:
        '''

        if self.load_cookies() and is_load_cookies:
            # 进行登陆操作
            print("读取cookies文件")
            if self.check__login():
                print("登陆成功")
                return
            print("cookies已经失效")

        # 走到这里说明是没有登陆的,在这里进行登陆操作

        # 检测用户名和密码已经输入了
        self.check_user_input()

        # 获取到xsrf的值,并且设置请求头
        headers = self.session.headers.copy()
        xsrf = self.get_xsrf()
        headers.update({
            "content-type": "application/x-www-form-urlencoded",
            "x-xsrftoken": xsrf,
            "x-zse-83": "3_1.1",
        })

        self.login_param.update({
            "username": self.username,
            "password": self.password,
            "lang": captcha_lang
        })

        # 进行formdata的创建
        timestamp = int(time.time() * 1000)
        self.login_param.update({
            "timestamp": timestamp,
            "captcha": self.get_captcha() or "",
            "signature": self.get_signature(timestamp)
        })

        formdata = self.__encrypt(self.login_param)

        url = "https://www.zhihu.com/api/v3/oauth/sign_in"

        # 进行登陆操作
        self.session.post(url=url, headers=headers, data=formdata)
        if self.check__login():
            self.session.cookies.save()
            print("cookies以写入文件")
            print("登录成功")
            return True
        print("登录失败")

    def check__login(self):
        '''判断是否已经是登陆状态'''
        url = "https://www.zhihu.com/"
        response = self.session.get(url=url, allow_redirects=False)
        if response.status_code == 302:
            return False
        elif response.status_code == 200:
            return True

    def check_user_input(self):
        if not self.username:
            self.username = input("请输入手机号>>:").strip()
        if self.username.isdigit() and not self.username.startswith("+86"):
            self.username = "+86" + self.username

        if not self.password:
            self.password = input("请输入密码>>:").strip()

    def get_captcha(self):
        '''获取到验证码,这里至少请求一次,请求的方法的顺序get,put,post'''
        lang = self.login_param.get("lang")
        if lang == "en":
            captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=en"
        else:
            captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=cn"
        response = self.session.get(captcha_api)
        is_use_verify = response.json().get("show_captcha", False)
        if is_use_verify:
            # 使用验证,请求方式顺序为put,post
            # 先获取验证图片的base64
            response = self.session.put(captcha_api)
            base64_img = response.json()['img_base64'].replace(r'\n', '')
            with open("./captcha.png", "wb") as f:
                f.write(base64.b64decode(base64_img))
            img = Image.open("./captcha.png")
            if lang == "en":
                img.show()
                code = input("请输入图片中的验证码>>:").strip()
            else:
                import matplotlib.pyplot as plt
                plt.imshow(img)
                print('点击所有倒立的汉字,在命令行中按回车提交')
                points = plt.ginput(7)
                code = json.dumps({'img_size': [200, 44],
                                   'input_points': [[i[0] / 2, i[1] / 2] for i in points]})

            self.session.post(captcha_api, data={"input_text": code}, headers={"user-agent": ua.random, })
            return code

    def get_no_captch(self):
        '''调用这个方法,可以实现不需要验证码就可以登录'''
        lang = self.login_param.get("lang")
        if lang == "en":
            captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=en"
        else:
            captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=cn"
        while True:
            print("正在请求验证码....")
            time.sleep(0.5)
            response = self.session.get(captcha_api)
            is_use_verify = str(response.json().get("show_captcha"))
            if is_use_verify == 'false':
                return ""
            print("继续...")

    def get_signature(self, timestamp):
        '''获取signature的值'''
        ha = hmac.new(key=b"d1b964811afb40118a12068ff74a12f4", digestmod=hashlib.sha1)
        client_id = self.login_param.get("client_id")
        grant_type = self.login_param.get("grant_type")
        source = self.login_param.get("source")
        ha.update(bytes(grant_type + client_id + source + str(timestamp), encoding="utf-8"))
        return ha.hexdigest()

    def get_xsrf(self):
        url = "https://www.zhihu.com/signin"
        response = self.session.get(url=url, headers=self.session.headers, allow_redirects=False)
        _xsrf = response.cookies.get("_xsrf")
        return _xsrf

    def __encrypt(self, data: dict):
        data = parse.urlencode(data)
        with open("./01.js", "r") as f:
            js_code = f.read()
        ctx = execjs.compile(js_code)
        res = ctx.call("Q", data)
        return res


if __name__ == '__main__':
    zhihu = ZhiHu()
    zhihu.login()

 

js文件:

window = {
    "encodeURIComponent": encodeURIComponent
}
navigator = {
    "userAgent": "5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
}

function s(e) {
    return (s = "function" == typeof Symbol && "symbol" == typeof Symbol.t ? function (e) {
                return typeof e
            }
            : function (e) {
                return e && "function" == typeof Symbol && e.constructor === Symbol && e !== Symbol.prototype ? "symbol" : typeof e
            }
    )(e)
}

var t = "1.1"
    , __g = {};

function i() {
}

function h(e) {
    this.s = (2048 & e) >> 11,
        this.i = (1536 & e) >> 9,
        this.h = 511 & e,
        this.A = 511 & e
}

function A(e) {
    this.i = (3072 & e) >> 10,
        this.A = 1023 & e
}

function n(e) {
    this.n = (3072 & e) >> 10,
        this.e = (768 & e) >> 8,
        this.a = (192 & e) >> 6,
        this.s = 63 & e
}

function e(e) {
    this.i = e >> 10 & 3,
        this.h = 1023 & e
}

function a() {
}

function c(e) {
    this.n = (3072 & e) >> 10,
        this.e = (768 & e) >> 8,
        this.a = (192 & e) >> 6,
        this.s = 63 & e
}

function o(e) {
    this.A = (4095 & e) >> 2,
        this.s = 3 & e
}

function r(e) {
    this.i = e >> 10 & 3,
        this.h = e >> 2 & 255,
        this.s = 3 & e
}

function k(e) {
    this.s = (4095 & e) >> 10,
        this.i = (1023 & e) >> 8,
        this.h = 1023 & e,
        this.A = 63 & e
}

function B(e) {
    this.s = (4095 & e) >> 10,
        this.n = (1023 & e) >> 8,
        this.e = (255 & e) >> 6
}

function f(e) {
    this.i = (3072 & e) >> 10,
        this.A = 1023 & e
}

function u(e) {
    this.A = 4095 & e
}

function C(e) {
    this.i = (3072 & e) >> 10
}

function b(e) {
    this.A = 4095 & e
}

function g(e) {
    this.s = (3840 & e) >> 8,
        this.i = (192 & e) >> 6,
        this.h = 63 & e
}

function G() {
    this.c = [0, 0, 0, 0],
        this.o = 0,
        this.r = [],
        this.k = [],
        this.B = [],
        this.f = [],
        this.u = [],
        this.C = !1,
        this.b = [],
        this.g = [],
        this.G = !1,
        this.Q = null,
        this.R = null,
        this.w = [],
        this.x = 0,
        this.D = {
            0: i,
            1: h,
            2: A,
            3: n,
            4: e,
            5: a,
            6: c,
            7: o,
            8: r,
            9: k,
            10: B,
            11: f,
            12: u,
            13: C,
            14: b,
            15: g
        }
}

i.prototype.M = function (e) {
    e.G = !1
}
    ,
    h.prototype.M = function (e) {
        switch (this.s) {
            case 0:
                e.c[this.i] = this.h;
                break;
            case 1:
                e.c[this.i] = e.k[this.A]
        }
    }
    ,
    A.prototype.M = function (e) {
        e.k[this.A] = e.c[this.i]
    }
    ,
    n.prototype.M = function (e) {
        switch (this.s) {
            case 0:
                e.c[this.n] = e.c[this.e] + e.c[this.a];
                break;
            case 1:
                e.c[this.n] = e.c[this.e] - e.c[this.a];
                break;
            case 2:
                e.c[this.n] = e.c[this.e] * e.c[this.a];
                break;
            case 3:
                e.c[this.n] = e.c[this.e] / e.c[this.a];
                break;
            case 4:
                e.c[this.n] = e.c[this.e] % e.c[this.a];
                break;
            case 5:
                e.c[this.n] = e.c[this.e] == e.c[this.a];
                break;
            case 6:
                e.c[this.n] = e.c[this.e] >= e.c[this.a];
                break;
            case 7:
                e.c[this.n] = e.c[this.e] || e.c[this.a];
                break;
            case 8:
                e.c[this.n] = e.c[this.e] && e.c[this.a];
                break;
            case 9:
                e.c[this.n] = e.c[this.e] !== e.c[this.a];
                break;
            case 10:
                e.c[this.n] = s(e.c[this.e]);
                break;
            case 11:
                e.c[this.n] = e.c[this.e] in e.c[this.a];
                break;
            case 12:
                e.c[this.n] = e.c[this.e] > e.c[this.a];
                break;
            case 13:
                e.c[this.n] = -e.c[this.e];
                break;
            case 14:
                e.c[this.n] = e.c[this.e] < e.c[this.a];
                break;
            case 15:
                e.c[this.n] = e.c[this.e] & e.c[this.a];
                break;
            case 16:
                e.c[this.n] = e.c[this.e] ^ e.c[this.a];
                break;
            case 17:
                e.c[this.n] = e.c[this.e] << e.c[this.a];
                break;
            case 18:
                e.c[this.n] = e.c[this.e] >>> e.c[this.a];
                break;
            case 19:
                e.c[this.n] = e.c[this.e] | e.c[this.a]
        }
    }
    ,
    e.prototype.M = function (e) {
        e.r.push(e.o),
            e.B.push(e.k),
            e.o = e.c[this.i],
            e.k = [];
        for (var t = 0; t < this.h; t++)
            e.k.unshift(e.f.pop());
        e.u.push(e.f),
            e.f = []
    }
    ,
    a.prototype.M = function (e) {
        e.o = e.r.pop(),
            e.k = e.B.pop(),
            e.f = e.u.pop()
    }
    ,
    c.prototype.M = function (e) {
        switch (this.s) {
            case 0:
                e.C = e.c[this.n] >= e.c[this.e];
                break;
            case 1:
                e.C = e.c[this.n] <= e.c[this.e];
                break;
            case 2:
                e.C = e.c[this.n] > e.c[this.e];
                break;
            case 3:
                e.C = e.c[this.n] < e.c[this.e];
                break;
            case 4:
                e.C = e.c[this.n] == e.c[this.e];
                break;
            case 5:
                e.C = e.c[this.n] != e.c[this.e];
                break;
            case 6:
                e.C = e.c[this.n];
                break;
            case 7:
                e.C = !e.c[this.n]
        }
    }
    ,
    o.prototype.M = function (e) {
        switch (this.s) {
            case 0:
                e.o = this.A;
                break;
            case 1:
                e.C && (e.o = this.A);
                break;
            case 2:
                e.C || (e.o = this.A);
                break;
            case 3:
                e.o = this.A,
                    e.Q = null
        }
        e.C = !1
    }
    ,
    r.prototype.M = function (e) {
        switch (this.s) {
            case 0:
                for (var t = [], n = 0; n < this.h; n++)
                    t.unshift(e.f.pop());
                e.c[3] = e.c[this.i](t[0], t[1]);
                break;
            case 1:
                for (var r = e.f.pop(), o = [], i = 0; i < this.h; i++)
                    o.unshift(e.f.pop());
                e.c[3] = e.c[this.i][r](o[0], o[1]);
                break;
            case 2:
                for (var a = [], c = 0; c < this.h; c++)
                    a.unshift(e.f.pop());
                e.c[3] = new e.c[this.i](a[0], a[1])
        }
    }
    ,
    k.prototype.M = function (e) {
        switch (this.s) {
            case 0:
                e.f.push(e.c[this.i]);
                break;
            case 1:
                e.f.push(this.h);
                break;
            case 2:
                e.f.push(e.k[this.A]);
                break;
            case 3:
                e.f.push(e.g[this.A])
        }
    }
    ,
    B.prototype.M = function (t) {
        switch (this.s) {
            case 0:
                var s = t.f.pop();
                t.c[this.n] = t.c[this.e][s];
                break;
            case 1:
                var i = t.f.pop()
                    , h = t.f.pop();
                t.c[this.e][i] = h;
                break;
            case 2:
                var A = t.f.pop();
                t.c[this.n] = eval(A)
        }
    }
    ,
    f.prototype.M = function (e) {
        e.c[this.i] = e.g[this.A]
    }
    ,
    u.prototype.M = function (e) {
        e.Q = this.A
    }
    ,
    C.prototype.M = function (e) {
        throw e.c[this.i]
    }
    ,
    b.prototype.M = function (e) {
        var t = this
            , n = [0];
        e.k.forEach(function (e) {
            n.push(e)
        });
        var r = function (r) {
            var o = new G;
            return o.k = n,
                o.k[0] = r,
                o.J(e.b, t.A, e.g, e.w),
                o.c[3]
        };
        r.toString = function () {
            return "() { [native code] }"
        }
            ,
            e.c[3] = r
    }
    ,
    g.prototype.M = function (e) {
        switch (this.s) {
            case 0:
                for (var t = {}, n = 0; n < this.h; n++) {
                    var r = e.f.pop();
                    t[e.f.pop()] = r
                }
                e.c[this.i] = t;
                break;
            case 1:
                for (var o = [], i = 0; i < this.h; i++)
                    o.unshift(e.f.pop());
                e.c[this.i] = o
        }
    }
    ,
    G.prototype.v = function (e) {
        for (var t = new Buffer(e, "base64").toString("binary"), n = [], r = 0; r < t.length - 1; r += 2)
            n.push(t.charCodeAt(r) << 8 | t.charCodeAt(r + 1));
        this.b = n
    }
    ,
    G.prototype.y = function (e) {
        for (var t = new Buffer(e, "base64").toString("binary"), n = 66, r = [], o = 0; o < t.length; o++) {
            var i = 24 ^ t.charCodeAt(o) ^ n;
            r.push(String.fromCharCode(i)),
                n = i
        }
        return r.join("")
    }
    ,
    G.prototype.F = function (e) {
        var t = this;
        this.g = e.map(function (e) {
            return "string" == typeof e ? t.y(e) : e
        })
    }
    ,
    G.prototype.J = function (e, t, n) {
        for (t = t || 0,
                 n = n || [],
                 this.o = t,
                 "string" == typeof e ? (this.F(n),
                     this.v(e)) : (this.b = e,
                     this.g = n),
                 this.G = !0,
                 this.x = Date.now(); this.G;) {
            var r = this.b[this.o++];
            if ("number" != typeof r)
                break;
            var o = Date.now();
            if (500 < o - this.x)
                return;
            this.x = o;
            try {
                this.M(r)
            } catch (e) {
                if (this.R = e,
                    !this.Q)
                    throw "execption at " + this.o + ": " + e;
                this.o = this.Q
            }
        }
    }
    ,
    G.prototype.M = function (e) {
        var t = (61440 & e) >> 12;
        new this.D[t](e).M(this)
    }
    ,
1 && (new G).J("4AeTAJwAqACcAaQAAAAYAJAAnAKoAJwDgAWTACwAnAKoACACGAESOTRHkQAkAbAEIAMYAJwFoAASAzREJAQYBBIBNEVkBnCiGAC0BjRAJAAYBBICNEVkBnDGGAC0BzRAJACwCJAAnAmoAJwKoACcC4ABnAyMBRAAMwZgBnESsA0aADRAkQAkABgCnA6gABoCnA+hQDRHGAKcEKAAMQdgBnFasBEaADRAkQAkABgCnBKgABoCnBOhQDRHZAZxkrAUGgA0QJEAJAAYApwVoABgBnG6sBYaADRAkQAkABgCnBegAGAGceKwGBoANECRACQAnAmoAJwZoABgBnIOsBoaADRAkQAkABgCnBugABoCnByhQDRHZAZyRrAdGgA0QJEAJAAQACAFsB4gBhgAnAWgABIBNEEkBxgHEgA0RmQGdJoQCBoFFAE5gCgFFAQ5hDSCJAgYB5AAGACcH4AFGAEaCDRSEP8xDzMQIAkQCBoFFAE5gCgFFAQ5hDSCkQAkCBgBGgg0UhD/MQ+QACAIGAkaBxQBOYGSABoAnB+EBRoIN1AUCDmRNJMkCRAIGgUUATmAKAUUBDmENIKRACQIGAEaCDRSEP8xD5AAIAgYCRoHFAI5gZIAGgCcH4QFGgg3UBQQOZE0kyQJGAMaCRQ/OY+SABoGnCCEBTTAJAMYAxoJFAY5khI/Nk+RABoGnCCEBTTAJAMYAxoJFAw5khI/Nk+RABoGnCCEBTTAJAMYAxoJFBI5khI/Nk+RABoGnCCEBTTAJAMYBxIDNEEkB3JsHgNQAA==", 0, ["BRgg", "BSITFQkTERw=", "LQYfEhMA", "PxMVFBMZKB8DEjQaBQcZExMC", "", "NhETEQsE", "Whg=", "Wg==", "MhUcHRARDhg=", "NBcPBxYeDQMF", "Lx4ODys+GhMC", "LgM7OwAKDyk6Cg4=", "Mx8SGQUvMQ==", "SA==", "ORoVGCQgERcCAxo=", "BTcAERcCAxo=", "BRg3ABEXAgMaFAo=", "SQ==", "OA8LGBsP", "GC8LGBsP", "Tg==", "PxAcBQ==", "Tw==", "KRsJDgE=", "TA==", "LQofHg4DBwsP", "TQ==", "PhMaNCwZAxoUDQUeGQ==", "PhMaNCwZAxoUDQUeGTU0GQIeBRsYEQ8=", "Qg==", "BWpUGxkfGRsZFxkbGR8ZGxkHGRsZHxkbGRcZG1MbGR8ZGxkXGRFpGxkfGRsZFxkbGR8ZGxkHGRsZHxkbGRcZGw==", "ORMRCyk0Exk8LQ==", "ORMRCyst"]);
var Q = function (e) {
    return __g._encrypt(e)
};

 

参考的是这位博主的博客:https://home.cnblogs.com/u/zkqiang

 


原文链接:https://www.cnblogs.com/zhuchunyu/p/10782248.html
如有疑问请与原作者联系

标签:

版权申明:本站文章部分自网络,如有侵权,请联系:west999com@outlook.com
特别注意:本站所有转载文章言论不代表本站观点,本站所提供的摄影照片,插画,设计作品,如需使用,请与原作者联系,版权归原作者所有

上一篇:爬虫框架Scrapy 之(二) --- scrapy文件

下一篇:lambda表达式,map函数