211 lines
7.2 KiB
Python
211 lines
7.2 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""导入小学英语核心词汇到数据库"""
|
|||
|
|
|
|||
|
|
import pandas as pd
|
|||
|
|
import mysql.connector
|
|||
|
|
from datetime import datetime
|
|||
|
|
import uuid
|
|||
|
|
|
|||
|
|
# 数据库配置
|
|||
|
|
db_config = {
|
|||
|
|
'host': 'localhost',
|
|||
|
|
'port': 3306,
|
|||
|
|
'user': 'root',
|
|||
|
|
'password': 'JKjk20011115',
|
|||
|
|
'database': 'ai_english_learning',
|
|||
|
|
'charset': 'utf8mb4'
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 词汇书ID
|
|||
|
|
BOOK_ID = 'primary_core_1000'
|
|||
|
|
|
|||
|
|
def generate_uuid():
|
|||
|
|
"""生成UUID"""
|
|||
|
|
return str(uuid.uuid4())
|
|||
|
|
|
|||
|
|
def import_words_from_excel(file_path):
|
|||
|
|
"""从Excel导入单词"""
|
|||
|
|
try:
|
|||
|
|
# 读取Excel文件
|
|||
|
|
print(f"📖 正在读取文件: {file_path}")
|
|||
|
|
df = pd.read_excel(file_path)
|
|||
|
|
|
|||
|
|
print(f"📊 文件列名: {df.columns.tolist()}")
|
|||
|
|
print(f"📊 总行数: {len(df)}")
|
|||
|
|
print(f"\n前5行数据预览:")
|
|||
|
|
print(df.head())
|
|||
|
|
|
|||
|
|
# 连接数据库
|
|||
|
|
conn = mysql.connector.connect(**db_config)
|
|||
|
|
cursor = conn.cursor()
|
|||
|
|
|
|||
|
|
# 准备SQL语句
|
|||
|
|
insert_vocab_sql = """
|
|||
|
|
INSERT INTO ai_vocabulary
|
|||
|
|
(word, phonetic, level, frequency, is_active, created_at, updated_at)
|
|||
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s)
|
|||
|
|
ON DUPLICATE KEY UPDATE
|
|||
|
|
id = LAST_INSERT_ID(id),
|
|||
|
|
phonetic = VALUES(phonetic),
|
|||
|
|
level = VALUES(level),
|
|||
|
|
frequency = VALUES(frequency),
|
|||
|
|
updated_at = VALUES(updated_at)
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
insert_definition_sql = """
|
|||
|
|
INSERT INTO ai_vocabulary_definitions
|
|||
|
|
(vocabulary_id, part_of_speech, definition_en, definition_cn, sort_order, created_at)
|
|||
|
|
VALUES (%s, %s, %s, %s, %s, %s)
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
insert_example_sql = """
|
|||
|
|
INSERT INTO ai_vocabulary_examples
|
|||
|
|
(vocabulary_id, sentence_en, sentence_cn, sort_order, created_at)
|
|||
|
|
VALUES (%s, %s, %s, %s, %s)
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
insert_book_word_sql = """
|
|||
|
|
INSERT INTO ai_vocabulary_book_words
|
|||
|
|
(book_id, vocabulary_id, sort_order, created_at)
|
|||
|
|
VALUES (%s, %s, %s, %s)
|
|||
|
|
ON DUPLICATE KEY UPDATE sort_order = VALUES(sort_order)
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
success_count = 0
|
|||
|
|
error_count = 0
|
|||
|
|
|
|||
|
|
# 遍历每一行
|
|||
|
|
for index, row in df.iterrows():
|
|||
|
|
try:
|
|||
|
|
# 提取数据(根据实际Excel列名调整)
|
|||
|
|
word = str(row.get('Word', '')).strip()
|
|||
|
|
if not word or word == 'nan':
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 优先使用美式音标
|
|||
|
|
phonetic = str(row.get('美式音标', '')).strip()
|
|||
|
|
if phonetic == 'nan' or not phonetic:
|
|||
|
|
phonetic = str(row.get('英式音标', '')).strip()
|
|||
|
|
if phonetic == 'nan':
|
|||
|
|
phonetic = None
|
|||
|
|
|
|||
|
|
translation = str(row.get('中文含义', '')).strip()
|
|||
|
|
if translation == 'nan':
|
|||
|
|
translation = ''
|
|||
|
|
|
|||
|
|
# 从中文含义中提取词性(如果有的话)
|
|||
|
|
part_of_speech = 'noun' # 默认为名词
|
|||
|
|
if translation:
|
|||
|
|
if 'v.' in translation or '动' in translation:
|
|||
|
|
part_of_speech = 'verb'
|
|||
|
|
elif 'adj.' in translation or '形' in translation:
|
|||
|
|
part_of_speech = 'adjective'
|
|||
|
|
elif 'adv.' in translation or '副' in translation:
|
|||
|
|
part_of_speech = 'adverb'
|
|||
|
|
elif 'prep.' in translation or '介' in translation:
|
|||
|
|
part_of_speech = 'preposition'
|
|||
|
|
elif 'conj.' in translation or '连' in translation:
|
|||
|
|
part_of_speech = 'conjunction'
|
|||
|
|
|
|||
|
|
example_en = str(row.get('例句', '')).strip()
|
|||
|
|
if example_en == 'nan' or not example_en:
|
|||
|
|
example_en = None
|
|||
|
|
|
|||
|
|
example_cn = str(row.get('例句中文翻译', '')).strip()
|
|||
|
|
if example_cn == 'nan' or not example_cn:
|
|||
|
|
example_cn = None
|
|||
|
|
|
|||
|
|
# 插入词汇
|
|||
|
|
now = datetime.now()
|
|||
|
|
cursor.execute(insert_vocab_sql, (
|
|||
|
|
word,
|
|||
|
|
phonetic,
|
|||
|
|
'beginner', # 小学词汇难度为beginner
|
|||
|
|
index + 1, # 使用行号作为频率
|
|||
|
|
True,
|
|||
|
|
now,
|
|||
|
|
now
|
|||
|
|
))
|
|||
|
|
|
|||
|
|
# 获取插入的ID
|
|||
|
|
vocab_id = cursor.lastrowid
|
|||
|
|
|
|||
|
|
# 插入释义
|
|||
|
|
if translation:
|
|||
|
|
cursor.execute(insert_definition_sql, (
|
|||
|
|
vocab_id,
|
|||
|
|
part_of_speech,
|
|||
|
|
word, # 英文定义暂时用单词本身
|
|||
|
|
translation,
|
|||
|
|
0,
|
|||
|
|
now
|
|||
|
|
))
|
|||
|
|
|
|||
|
|
# 插入例句(只取第一个例句)
|
|||
|
|
if example_en and example_cn:
|
|||
|
|
# 如果有多个例句,用分号分隔,只取第一个
|
|||
|
|
first_example_en = example_en.split(';')[0] if ';' in example_en else example_en
|
|||
|
|
first_example_cn = example_cn.split(';')[0] if ';' in example_cn else example_cn
|
|||
|
|
|
|||
|
|
cursor.execute(insert_example_sql, (
|
|||
|
|
vocab_id,
|
|||
|
|
first_example_en,
|
|||
|
|
first_example_cn,
|
|||
|
|
0,
|
|||
|
|
now
|
|||
|
|
))
|
|||
|
|
|
|||
|
|
# 关联到词汇书
|
|||
|
|
cursor.execute(insert_book_word_sql, (
|
|||
|
|
BOOK_ID,
|
|||
|
|
vocab_id,
|
|||
|
|
index,
|
|||
|
|
now
|
|||
|
|
))
|
|||
|
|
|
|||
|
|
success_count += 1
|
|||
|
|
if success_count % 50 == 0:
|
|||
|
|
print(f"✅ 已导入 {success_count} 个单词...")
|
|||
|
|
conn.commit()
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
error_count += 1
|
|||
|
|
print(f"❌ 导入第 {index + 1} 行失败: {e}")
|
|||
|
|
print(f" 数据: {row.to_dict()}")
|
|||
|
|
|
|||
|
|
# 提交事务
|
|||
|
|
conn.commit()
|
|||
|
|
|
|||
|
|
# 更新词汇书的总单词数
|
|||
|
|
cursor.execute(
|
|||
|
|
"UPDATE ai_vocabulary_books SET total_words = %s WHERE id = %s",
|
|||
|
|
(success_count, BOOK_ID)
|
|||
|
|
)
|
|||
|
|
conn.commit()
|
|||
|
|
|
|||
|
|
print(f"\n🎉 导入完成!")
|
|||
|
|
print(f"✅ 成功: {success_count} 个单词")
|
|||
|
|
print(f"❌ 失败: {error_count} 个单词")
|
|||
|
|
|
|||
|
|
# 验证数据
|
|||
|
|
cursor.execute(
|
|||
|
|
"SELECT COUNT(*) FROM ai_vocabulary_book_words WHERE book_id = %s",
|
|||
|
|
(BOOK_ID,)
|
|||
|
|
)
|
|||
|
|
count = cursor.fetchone()[0]
|
|||
|
|
print(f"📊 词汇书中共有 {count} 个单词")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 导入失败: {e}")
|
|||
|
|
import traceback
|
|||
|
|
traceback.print_exc()
|
|||
|
|
finally:
|
|||
|
|
if cursor:
|
|||
|
|
cursor.close()
|
|||
|
|
if conn:
|
|||
|
|
conn.close()
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
import_words_from_excel('data/小学.xlsx')
|