一、引用模块
import requests
from bs4 import BeautifulSoup
request模块是进行post登录
beautifulsoup模块对网页进行解析
二、登录获取cookie
#1、登录后获取cookie
def Get_Cookie(url):
session = requests.Session()
headers = {
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
}
req = session.post(url, headers=headers)
#获取当前的Cookie
Cookie= dict(session.cookies)
# print(Cookie)
return Cookie
三、获取登录后的cookie进行一些操作
#2、获取登录cookie后进行一些操作
def Get_Shop_Lise():
#初始化字典
result = []
#获取cookie
cookies = Get_Cookie("http://XXX.com")#登录网址
#构造header(根据爬取网站根据实际场景更改)
header = {
"Accept":"text/plain, */*; q=0.01",
"Accept-Encoding":"gzip, deflate",
"Accept-Language":"zh-CN,zh;q=0.9,zh-TW;q=0.8,en-US;q=0.7,en;q=0.6",
"Connection":"keep-alive",
"Host":"XXXX.com", #这里的host自行更改
"Referer":"/index.htm", #这里的referer自行更改
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"
}
get_url = 'http://xxx.com' #登录后访问的网址
res = requests.post(get_url,headers=header,cookies=cookies)
soup = BeautifulSoup(res.text,'lxml')
for tr in soup.find_all('tr'):
td = tr.find('td')
if td:
#print(td.getText())
result.append(td.getText())
return result