python爬虫爬取小说

import requests
import re
import os

# 假设我们要检查的文件路径
filename = '1.txt'

# 使用os.path.exists()函数检查文件是否存在
if os.path.exists(filename):
    print(f"文件 '{filename}' 存在。")
    with open(filename, "r+") as file:
        file.truncate(0)  # 从文件开头(位置0)开始截断,清空文件内容
else:
    print(f"文件 '{filename}' 不存在。")


# pip install -r requirements.txt

# pip freeze > requirements.txt

# 查看网页源码 view-source:
# url="https://www.xzmncy.com/list/24980/11627904.html"
url="https://www.xzmncy.com/list/24980/"
# 1.发送请求
response = requests.get(url)
# 2.获取数据
mainText = response.text
text = re.findall('
(.*?)
'
,mainText) text=text[0:5] for info in text: url="https://www.xzmncy.com"+info[0] print(info) response = requests.get(url) htm_data = response.text # 3.获取数据 # .*?:替代任意字符 text ='\n\n'+info[1]+'\n\n'+ ' '+re.findall('

(.*?)

'
,htm_data)[0].replace("
"
,'\n') f = open(filename, mode='a', encoding='utf-8') f.write(text) print(text) # 按 Ctrl+F8 切换断点。

你可能感兴趣的:(python,爬虫,windows)