import requests, os, re
from fake_useragent import FakeUserAgent
class BQBSpider(object):
os_path = os.getcwd()+'/爱斗图/'
if not os.path.exists(os_path):
os.mkdir(os_path)
def __init__(self):
'''1.准备数据'''
user_agent = FakeUserAgent().random
self.start_url = 'https://adoutu.com/picture/list/{}'
self.headers = {
'User-Agent': 'user_agent'
}
def parse_start_url(self):
''' 2.发送请求,获取响应 '''
# 循环遍历模拟翻页
for page in range(1, 2777):
# 地址的完整性拼接
start_url = self.start_url.format(page)
# 发送请求
response = requests.get(start_url, headers=self.headers)
'''调用其他方法,前面加self,传递数据'''
self.parse_response_data(response,page)
def parse_response_data(self, response, page):
'''3.解析数据,数据提取'''
# 解析响应
resp_data = response.content.decode()
# 数据提取
img_url_list = re.findall(r'src="(.*?)"', resp_data)
'''三木运算符结合列表推导式的判断'''
img_url_list = [img_url for img_url in img_url_list if 'http' in img_url]
for img_url in img_url_list:
# 去除表情包的名称和格式
img_name = img_url[30:]
# 发送请求,获取二进制数据
data = requests.get(img_url).content
self.save_data(data,img_name)
def save_data(self,data,img_name):
'''4.保存数据'''
with open(self.os_path + img_name,'wb')as f:
f.write(data)
print(f'{img_name}===========保存完成')
if __name__ == '__main__':
f = BQBSpider()
f.parse_start_url()