#!/usr/bin/env python3 # -*- coding: utf-8 -*- """导入小学英语核心词汇到数据库""" import pandas as pd import mysql.connector from datetime import datetime import uuid # 数据库配置 db_config = { 'host': 'localhost', 'port': 3306, 'user': 'root', 'password': 'JKjk20011115', 'database': 'ai_english_learning', 'charset': 'utf8mb4' } # 词汇书ID BOOK_ID = 'primary_core_1000' def generate_uuid(): """生成UUID""" return str(uuid.uuid4()) def import_words_from_excel(file_path): """从Excel导入单词""" try: # 读取Excel文件 print(f"📖 正在读取文件: {file_path}") df = pd.read_excel(file_path) print(f"📊 文件列名: {df.columns.tolist()}") print(f"📊 总行数: {len(df)}") print(f"\n前5行数据预览:") print(df.head()) # 连接数据库 conn = mysql.connector.connect(**db_config) cursor = conn.cursor() # 准备SQL语句 insert_vocab_sql = """ INSERT INTO ai_vocabulary (word, phonetic, level, frequency, is_active, created_at, updated_at) VALUES (%s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE id = LAST_INSERT_ID(id), phonetic = VALUES(phonetic), level = VALUES(level), frequency = VALUES(frequency), updated_at = VALUES(updated_at) """ insert_definition_sql = """ INSERT INTO ai_vocabulary_definitions (vocabulary_id, part_of_speech, definition_en, definition_cn, sort_order, created_at) VALUES (%s, %s, %s, %s, %s, %s) """ insert_example_sql = """ INSERT INTO ai_vocabulary_examples (vocabulary_id, sentence_en, sentence_cn, sort_order, created_at) VALUES (%s, %s, %s, %s, %s) """ insert_book_word_sql = """ INSERT INTO ai_vocabulary_book_words (book_id, vocabulary_id, sort_order, created_at) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE sort_order = VALUES(sort_order) """ success_count = 0 error_count = 0 # 遍历每一行 for index, row in df.iterrows(): try: # 提取数据(根据实际Excel列名调整) word = str(row.get('Word', '')).strip() if not word or word == 'nan': continue # 优先使用美式音标 phonetic = str(row.get('美式音标', '')).strip() if phonetic == 'nan' or not phonetic: phonetic = str(row.get('英式音标', '')).strip() if phonetic == 'nan': phonetic = None translation = str(row.get('中文含义', '')).strip() if translation == 'nan': translation = '' # 从中文含义中提取词性(如果有的话) part_of_speech = 'noun' # 默认为名词 if translation: if 'v.' in translation or '动' in translation: part_of_speech = 'verb' elif 'adj.' in translation or '形' in translation: part_of_speech = 'adjective' elif 'adv.' in translation or '副' in translation: part_of_speech = 'adverb' elif 'prep.' in translation or '介' in translation: part_of_speech = 'preposition' elif 'conj.' in translation or '连' in translation: part_of_speech = 'conjunction' example_en = str(row.get('例句', '')).strip() if example_en == 'nan' or not example_en: example_en = None example_cn = str(row.get('例句中文翻译', '')).strip() if example_cn == 'nan' or not example_cn: example_cn = None # 插入词汇 now = datetime.now() cursor.execute(insert_vocab_sql, ( word, phonetic, 'beginner', # 小学词汇难度为beginner index + 1, # 使用行号作为频率 True, now, now )) # 获取插入的ID vocab_id = cursor.lastrowid # 插入释义 if translation: cursor.execute(insert_definition_sql, ( vocab_id, part_of_speech, word, # 英文定义暂时用单词本身 translation, 0, now )) # 插入例句(只取第一个例句) if example_en and example_cn: # 如果有多个例句,用分号分隔,只取第一个 first_example_en = example_en.split(';')[0] if ';' in example_en else example_en first_example_cn = example_cn.split(';')[0] if ';' in example_cn else example_cn cursor.execute(insert_example_sql, ( vocab_id, first_example_en, first_example_cn, 0, now )) # 关联到词汇书 cursor.execute(insert_book_word_sql, ( BOOK_ID, vocab_id, index, now )) success_count += 1 if success_count % 50 == 0: print(f"✅ 已导入 {success_count} 个单词...") conn.commit() except Exception as e: error_count += 1 print(f"❌ 导入第 {index + 1} 行失败: {e}") print(f" 数据: {row.to_dict()}") # 提交事务 conn.commit() # 更新词汇书的总单词数 cursor.execute( "UPDATE ai_vocabulary_books SET total_words = %s WHERE id = %s", (success_count, BOOK_ID) ) conn.commit() print(f"\n🎉 导入完成!") print(f"✅ 成功: {success_count} 个单词") print(f"❌ 失败: {error_count} 个单词") # 验证数据 cursor.execute( "SELECT COUNT(*) FROM ai_vocabulary_book_words WHERE book_id = %s", (BOOK_ID,) ) count = cursor.fetchone()[0] print(f"📊 词汇书中共有 {count} 个单词") except Exception as e: print(f"❌ 导入失败: {e}") import traceback traceback.print_exc() finally: if cursor: cursor.close() if conn: conn.close() if __name__ == '__main__': import_words_from_excel('data/小学.xlsx')