init commit
This commit is contained in:
BIN
utils/__pycache__/io.cpython-312.pyc
Normal file
BIN
utils/__pycache__/io.cpython-312.pyc
Normal file
Binary file not shown.
55
utils/filter_comments.py
Normal file
55
utils/filter_comments.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import argparse
|
||||
import csv
|
||||
import os
|
||||
|
||||
def filter_comments(csv_in, csv_out, keywords):
|
||||
ks = set(k.lower() for k in keywords if k)
|
||||
rows_out = []
|
||||
with open(csv_in, 'r', encoding='utf-8', newline='') as f:
|
||||
r = csv.reader(f)
|
||||
first = True
|
||||
for row in r:
|
||||
if first and row and row[0].lower() == 'username':
|
||||
first = False
|
||||
continue
|
||||
first = False
|
||||
if not row:
|
||||
continue
|
||||
text = row[1] if len(row) > 1 else ''
|
||||
s = (text or '').lower()
|
||||
if any(k in s for k in ks):
|
||||
rows_out.append(row)
|
||||
os.makedirs(os.path.dirname(csv_out), exist_ok=True)
|
||||
with open(csv_out, 'w', encoding='utf-8', newline='') as wf:
|
||||
w = csv.writer(wf)
|
||||
w.writerow(['username', 'text'])
|
||||
for r in rows_out:
|
||||
w.writerow(r)
|
||||
print(f"input={csv_in} keywords={len(ks)} matched_rows={len(rows_out)} out={csv_out}")
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument('--extern-keywords', default=r'd:\work\test\test\all_keywords.txt')
|
||||
p.add_argument('--local-keywords', default=r'data\keyword.txt')
|
||||
p.add_argument('--csv-in', default=r'data\comments.csv')
|
||||
p.add_argument('--csv-out', default=r'data\key_comment.csv')
|
||||
args = p.parse_args()
|
||||
def _load(path):
|
||||
arr = []
|
||||
try:
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
s = line.strip()
|
||||
if s:
|
||||
arr.append(s)
|
||||
except Exception:
|
||||
arr = []
|
||||
return arr
|
||||
kws = []
|
||||
kws.extend(_load(args.extern_keywords))
|
||||
kws.extend(_load(args.local_keywords))
|
||||
kws.append('pen')
|
||||
filter_comments(args.csv_in, args.csv_out, kws)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
54
utils/io.py
Normal file
54
utils/io.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import json
|
||||
import os
|
||||
import csv
|
||||
|
||||
def load_keywords_from_file(path):
|
||||
"""逐行读取关键词文件,忽略空行,返回列表"""
|
||||
arr = []
|
||||
try:
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
s = line.strip()
|
||||
if s:
|
||||
arr.append(s)
|
||||
except Exception:
|
||||
arr = []
|
||||
return arr
|
||||
|
||||
def write_json(path, obj):
|
||||
"""以 UTF-8 写入 JSON,使用非 ASCII 保留与缩进"""
|
||||
with open(path, 'w', encoding='utf-8') as f:
|
||||
json.dump(obj, f, ensure_ascii=False, indent=2)
|
||||
|
||||
def read_json(path):
|
||||
"""读取 JSON 文件,失败时返回空对象"""
|
||||
try:
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
def ensure_csv_header(path, headers):
|
||||
"""若 CSV 不存在则创建并写入表头;为空路径直接返回"""
|
||||
if not path:
|
||||
return
|
||||
if not os.path.exists(path):
|
||||
with open(path, 'w', newline='', encoding='utf-8') as wf:
|
||||
w = csv.writer(wf)
|
||||
w.writerow(headers)
|
||||
|
||||
def append_csv_rows(path, rows):
|
||||
"""向 CSV 追加多行,行元素按列表给出;为空路径直接返回"""
|
||||
if not path:
|
||||
return
|
||||
with open(path, 'a', newline='', encoding='utf-8') as af:
|
||||
w = csv.writer(af)
|
||||
for r in rows:
|
||||
w.writerow(r)
|
||||
"""通用 IO 工具
|
||||
|
||||
提供:
|
||||
- 关键词文件加载
|
||||
- JSON 读写
|
||||
- CSV 文件写入(确保表头、追加行)
|
||||
"""
|
||||
Reference in New Issue
Block a user