带检索功能的爬虫#

代码如下

1
import re
2
import urllib3
3
import urllib
4
import requests
5
import os.path
6
if os.path.exists('img'):
7
    pass
8
else:
9
    os.mkdir('img')
10
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
11
url = 'https://www.pixiv.net/'
12
headers = {
13
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0",
14
    "Cookie":
15
    "填入你的cookie"
16
}
17

18
keywords = str(input('输入关键词进行搜索'))
19
keywords = urllib.parse.quote(keywords)
20
search_url = f"https://www.pixiv.net/ajax/search/artworks/{keywords}?word={keywords}&order=date_d&mode=all&p=1&csw=0&s_mode=s_tag&type=all&lang=zh"
21
resp = requests.get(url=search_url, headers=headers, verify=False)
22
resp.close()
23
# print (resp.json())
24
result = resp.json()
25
num = int(len(result['body']['illustManga']['data']))
26
for i in range(0, num):
27
    id = result['body']['illustManga']['data'][i]['id']
28
    title = result['body']['illustManga']['data'][i]['title']
29
    pageCount = result['body']['illustManga']['data'][i]['pageCount']
30
    createDate = result['body']['illustManga']['data'][i]['createDate']
31
    print(f'作品编号:{i},作品标题:{title},作品id:{id},作品页数:{pageCount},创作时间:{createDate}')
32
choose = int(input('请输入需要爬取的图片编号'))
33
id = result['body']['illustManga']['data'][choose]['id']
34
pageCount = result['body']['illustManga']['data'][choose]['pageCount']
35
img_url = f'https://www.pixiv.net/artworks/{id}'
36
createDate = createDate.replace('-', '/').replace('T', '/').replace(':', '/')
37
createDate = createDate.split('+')
38
createDate = createDate[0]
39
download_url = 'https://i.pximg.net/img-master/img/{createDate}/{id}_p{pageCount}_square1200.jpg'
40
headers_down = {
41
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0",
42
    "Cookie":
43
    "填入你的cookie",
44
    "Referer":
45
    "https://www.pixiv.net/"
46
}
47
image = requests.get(url=download_url, headers=headers_down)
48
image.encoding = 'utf-8'
49
with open(f'img/{id}.png', 'wb')as f:
50
    f.write(image.content)