爬取网页为:
复制代码
1猫猫图http://p.ik123.com/zt/maomi/68_1.html
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101import requests import re from bs4 import BeautifulSoup HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/99.0.4844.51 Safari/537.36 Edg/99.0.1150.30"} # 获取末页的页码 def get_pagesize(html): print("正在获取页码") findend_page = re.findall(r"<a href='68_(d+).html'>末页</a>", html) if findend_page is not None: return findend_page[0] else: print("获取页码异常") # 获取网页代码 def analyzedata(url, key): if key is None: print("正在分析网页") res = requests.get(url=url, headers=HEADERS, timeout=2) res.encoding = "gb2312" html = res.text return html elif key == "img": res = requests.get(url=url, headers=HEADERS) res.encoding = "gb2312" content = res.content return content # 获取待爬网页分页链接 def get_href(baseurl): url_list = [] print("正在获取分页链接") html = analyzedata(baseurl, key=None) # print(html) pagesize = int(get_pagesize(html)) # print(pagesize) for i in range(1, pagesize + 1): if pagesize > 0: url_list.append(f"http://p.ik123.com/zt/maomi/68_{i}.html") else: print("页码异常") return url_list # 获取详情页链接 def get_content_url(url_list): hrefs = [] print("正在获取详情页数据") for href in url_list: html = analyzedata(href, key=None) soup = BeautifulSoup(html, "html.parser") for img in soup.find_all("li", class_="img"): href = re.findall(r'class="preview" href="(.*?)"', str(img)) hrefs.append(href) return hrefs # 获取图片 def getimg(hrefs): print("正在获取图片") num = 0 s = 0 for href in hrefs: num += 1 html = analyzedata(href[0], key=None) soup = BeautifulSoup(html, "html.parser") for img in soup.find_all("img"): s += 1 src = re.findall(r'img alt=".*" src="(.*?)"', str(img))[0] title = re.findall(r'img alt="(.*?)" src=".*"', str(img))[0] save_img(src, title, num, s) # print(src) # 保存图片 def save_img(src, title, num, s): # content = analyzedata(src, text="img") content = analyzedata(src, "img") with open(fr".img{title}{s}.jpg", "wb") as f: f.write(content) print(f"正在保存第{num}页的照片") print(f"已保存{s}张照片") def main(): getimg(get_content_url(get_href("http://p.ik123.com/zt/maomi/68_1.html"))) if __name__ == '__main__': main() # getimg('http://www.ik123.com/q/tuku/keai/62324.html')
最后
以上就是怡然飞鸟最近收集整理的关于python爬取猫猫图的全部内容,更多相关python爬取猫猫图内容请搜索靠谱客的其他文章。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复