-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path一键爬取XX阁小说.py
More file actions
44 lines (41 loc) · 1.47 KB
/
一键爬取XX阁小说.py
File metadata and controls
44 lines (41 loc) · 1.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
import requests
from lxml import etree
def download_txt(name):
params = {
"keyword": name
}
host = "https://www.1biqug.com"
resp = requests.get("https://www.1biqug.com/searchbook.php", params=params)
html = resp.content.decode()
html = etree.HTML(html)
ret_list = html.xpath("//li/span[@class='s2']/a/@href")
detail_url = host + ret_list[0]
resp = requests.get(detail_url)
html = etree.HTML(resp.content.decode())
ret_list = html.xpath("//div[@id='list']//dd//a/@href")
print(ret_list)
if not os.path.exists("./{}".format(name)):
os.mkdir("./{}".format(name))
for ret in ret_list[12:]:
url = host + ret
resp = requests.get(url)
info = resp.content.decode()
html = etree.HTML(info)
title = html.xpath("//h1/text()")
path = os.path.join(name, title[0] + ".txt")
path = path.replace("*", "")
content = html.xpath("//div[@id='content']//text()")
if os.path.exists("./{}/{}".format(name, title)):
os.remove("./{}/{}".format(name, title))
f_content = open(path, "a", encoding="utf-8")
for con in content:
if "chaptererror();" in con or "本站最新域名:" in con:
break
f_content.write(con + "\r\n")
f_content.close()
print(title[0])
print(name, "下载完成了")
if __name__ == '__main__':
story = input("请输入小说名:")
download_txt(story)