-
Notifications
You must be signed in to change notification settings - Fork 0
/
pixiv.py
83 lines (75 loc) · 2.96 KB
/
pixiv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from datetime import datetime
import requests
import json
import re
import os
class Pixiv:
def __init__(self):
self.session = requests.Session()
self.session.proxies = {
'http': "http://127.0.0.1:1080",
'https': "http://127.0.0.1:1080"
}
self.session.headers = {
"accept":
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36",
"cookie": ""
}
self.get_cookies()
def get_cookies(self):
with open('.cookies', 'r') as f:
self.session.headers["cookie"] = f.read()
def get_ranklist(self, restrict=False):
url = "https://www.pixiv.net/ranking.php?mode={mode}&content=illust&p=1&format=json".format(
mode="daily_r18" if restrict else "daily")
res = self.session.get(url)
data = json.loads(res.text)
return data["contents"]
def get_images(self, illust_id):
img_urls = []
# pattern = r"/img/\d*.*/\d*_p0"
pattern = r'(?<="original":").*?(?="\})'
for i in illust_id:
page_url = "https://www.pixiv.net/artworks/" + str(i)
res = self.session.get(page_url)
img_url = re.search(pattern, res.text, flags=0).group()
img_urls.append(img_url)
return img_urls
def dl_images(self, dir_name, urls, illust_id):
# Get date.
now = datetime.now()
date = now.strftime('%Y%m%d')
# Get file path.
file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
dir_name, date)
# Create folder.
if not os.path.exists(file_path):
os.makedirs(file_path)
# Download images.
index = 1
for u in urls:
f = self.session.get(u,
headers={
"referer":
"https://www.pixiv.net/artworks/" +
str(illust_id)
})
fm = re.search(r"[.](jpg|png|jpeg)$", u, flags=0).group()
with open(os.path.join(file_path, str(index) + fm), "wb") as img:
img.write(f.content)
index += 1
if __name__ == "__main__":
pixiv = Pixiv()
# Get list.
ranklist, ranklist_r18 = pixiv.get_ranklist(), pixiv.get_ranklist(True)
# Get id of artworks.
id_list = [l["illust_id"] for l in ranklist]
id_list_r18 = [l["illust_id"] for l in ranklist_r18]
# Get url of images.
img_urls = pixiv.get_images(id_list)
img_urls_r18 = pixiv.get_images(id_list_r18)
# Download
pixiv.dl_images("rank_img", img_urls, id_list)
pixiv.dl_images("rank_img_r18", img_urls_r18, id_list_r18)