python爬虫示例

需求:读取wangzhi.txt文件内的网址,把想要的内容输入为文本

import requests
from bs4 import BeautifulSoup

# 从文件中读取URL列表
url_list = []
with open('wangzhi.txt', 'r', encoding='utf-8') as file:
    for line in file:
        url = line.strip()
        if url:
            url_list.append(url)

# 打开一个文本文件以写入结果
with open('result.txt', 'w', encoding='utf-8') as file:
    for inurl in url_list:
        req = requests.get(url=inurl)
        req.encoding = "utf-8"
        html = req.text
        soup = BeautifulSoup(req.text, features="html.parser")
        pinzhong_items = soup.find_all(
            "h1") + soup.find_all("div", id="content")
        for pinzhong_item in pinzhong_items:
            dd = pinzhong_item.text.strip()
            # 将结果写入文件
            file.write(dd + '\n')
            file.write('=' * 50 + '\n')
print("结果已保存到 result.txt 文件")
最后修改于:2023年08月18日 18:11