Python爬虫之Requests库的基本使用

2018-11-27 08:31:27来源:博客园 阅读 ()

新老客户大回馈,云服务器低至5折

  1 import requests
  2 response = requests.get('http://www.baidu.com/')
  3 print(type(response))
  4 print(response.status_code)
  5 print(type(response.text))
  6 print(response.text)
  7 print(response.cookies)
  8 
  9 # 各种请求方式
 10 import requests
 11 requests.post('http://httpbin.org/post')
 12 requests.put('http://httpbin.org/put')
 13 requests.delete('http://httpbin.org/delete')
 14 requests.head('http://httpbin.org/get')
 15 requests.options('http://httpbin.org/get')
 16 
 17 # 基本GET请求
 18 import requests
 19 response = requests.get('http://httpbin.org/get')
 20 print(response.text)
 21 
 22 # 带参数GET请求
 23 import requests
 24 response = requests.get('http://httpbin.org/get?name=germey&age=22')
 25 print(response.text)
 26 
 27 import requests
 28 data = {
 29     'name': 'germey',
 30     'age': 22
 31 }
 32 response = requests.get('http://httpbin.org/get', params = data)
 33 print(response.text)
 34 
 35 # 解析json
 36 import requests
 37 import json
 38 response = requests.get('http://httpbin.org/get')
 39 print(type(response.text))
 40 print(response.json())
 41 print(json.loads(response.text))
 42 print(type(response.json()))
 43 
 44 # 获取二进制数据
 45 import requests
 46 response = requests.get('http://github.com/favicon.ico')
 47 print(type(response.text), type(response.content))
 48 print(response.text)
 49 print(response.content)
 50 
 51 # 保存图片
 52 import requests
 53 response = requests.get('http://github.com/favicon.ico')
 54 with open('1.ico', 'wb') as f:
 55     f.write(response.content)
 56     f.close()
 57 
 58 # 添加headers 不添加的话会请求失败的
 59 import requests
 60 response = requests.get('http://www.zhihu.com/explore')
 61 print(response.text)
 62 
 63 import requests
 64 headers = {
 65     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'
 66 }
 67 response = requests.get('http://zhihu.com/explore', headers = headers)
 68 print(response.text)
 69 
 70 # 基本的POST请求
 71 import requests
 72 data = {'name': 'germey', 'age': 22}
 73 response = requests.post('http://httpbin.org/post', data = data)
 74 print(response.text)
 75 
 76 import requests
 77 data = {'name':'germey', 'age':22}
 78 headers = {
 79     'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'
 80 }
 81 response = requests.post('http://httpbin.org/post', data = data, headers = headers)
 82 print(response.json())
 83 
 84 # response属性
 85 import requests
 86 response = requests.get('http://www.jianshu.com')
 87 print(type(response.status_code), response.status_code)
 88 print(type(response.headers), response.headers)
 89 print(type(response.cookies), response.cookies)
 90 print(type(response.url), response.url)
 91 print(type(response.history), response.history)
 92 
 93 # 文件上传
 94 import requests
 95 files = {'file':open('1.ico', 'rb')}
 96 response = requests.post('http://httpbin.org/post', files = files)
 97 print(response.text)
 98 
 99 # 获取cookie
100 import requests
101 response = requests.get('http://www.baidu.com')
102 print(response.cookies)
103 for key, value in response.cookies.items():
104     print(key + ' = ' + value)
105 
106 # 会话维持 模拟登陆
107 import requests
108 requests.get('http://httpbin.org/cookies/set/number/123456789')
109 response = requests.get('http://httpbin.org/cookies')
110 print(response.text)
111 
112 import requests
113 s = requests.session()
114 s.get('http://httpbin.org/cookies/set/number/123456789')
115 response = s.get('http://httpbin.org/cookies')
116 print(response.text)
117 
118 # 证书验证
119 import requests
120 response = requests.get('https://www.12306.cn')
121 print(response.status_code)

 

标签:

版权申明:本站文章部分自网络,如有侵权,请联系:west999com@outlook.com
特别注意:本站所有转载文章言论不代表本站观点,本站所提供的摄影照片,插画,设计作品,如需使用,请与原作者联系,版权归原作者所有

上一篇:利用uWSGI和nginx进行服务器部署

下一篇:django-基于中间件实现限制ip频繁访问