python爬虫获取小说代码示例
pythonimport requests
from bs4 import BeautifulSoup
def get_novel_content(url):
# 发送请求获取页面内容
response = requests.get(url)
# 检查响应状态码
if response.status_code == 200:
# 使用BeautifulSoup解析页面内容
soup = BeautifulSoup(response.text, 'html.parser')
# 在这里根据网页结构提取小说内容的元素,并解析出文本
# 这里只是一个示例,实际情况需要根据网站结构具体调整
novel_content = soup.find('div', class_='novel-content').text
return novel_content
else:
print("Failed to retrieve content. Status code:", response.status_code)
return None
if __name__ == "__main__":
novel_url = "https://example.com/novel" # 小说网站的URL
novel_content = get_novel_content(novel_url)
if novel_content:
print(novel_content)
在这个示例中,我们使用了requests库来发送HTTP请求并获取网页内容,BeautifulSoup库用于解析HTML。你需要安装这两个库,可以通过pip install requests和pip install beautifulsoup4来安装。
在实际应用中,你需要根据具体的小说网站结构来调整代码中的选择器,以确保能够正确地提取小说内容。同时,请确保你
pythonimport requests
from bs4 import BeautifulSoup
def get_novel_content(url):
try:
# 发送请求获取页面内容
response = requests.get(url)
response.raise_for_status() # 如果请求不成功,会抛出异常
# 使用BeautifulSoup解析页面内容
soup = BeautifulSoup(response.text, 'html.parser')
# 在这里根据网页结构提取小说内容的元素,并解析出文本
# 这里只是一个示例,实际情况需要根据网站结构具体调整
novel_content = soup.find('div', class_='novel-content').text
return novel_content
except requests.exceptions.RequestException as e:
print("Error fetching content:", e)
return None
except Exception as e:
print("An error occurred:", e)
return None
def save_novel_to_file(novel_content, filename):
try:
with open(filename, 'w', encoding='utf-8') as file:
file.write(novel_content)
print("Novel content saved to", filename)
except Exception as e:
print("Error saving novel content:", e)
if __name__ == "__main__":
novel_url = "https://example.com/novel" # 小说网站的URL
novel_content = get_novel_content(novel_url)
if novel_content:
save_novel_to_file(novel_content, "novel.txt")
在这个更新的示例中,我添加了对请求和文件操作的错误处理,以及保存小说内容到本地文件的功能。你可以在save_novel_to_file函数中指定保存小说内容的文件名,并在novel_url变量中指定你要爬取的小说网站的URL。