import re
from bs4 import BeautifulSoup
def cleartxt():
# 读取 txt 文件
with open(r"C:\Users\admin\Desktop\2\fengshui_ok.txt", "r",encoding="utf8") as f:
text = f.read()
# 用 BeautifulSoup 库删除 HTML 代码
soup = BeautifulSoup(text, "html.parser")
text = soup.get_text()
# 用正则表达式删除标点符号
text = re.sub(r"[^\w\s]", "", text)
# 保存清理后的文本
with open(r"C:\Users\admin\Desktop\2\cleaned_file.txt", "w",encoding="utf8") as f:
f.write(text)
© 版权声明
文章版权归作者所有,未经允许请勿转载。
THE END