#!/usr/bin/env python3 # -*- coding: utf-8 -*- """完整导入小学英语核心词汇到数据库(包含所有字段)""" import pandas as pd import mysql.connector from datetime import datetime import re # 数据库配置 db_config = { 'host': 'localhost', 'port': 3306, 'user': 'root', 'password': 'JKjk20011115', 'database': 'ai_english_learning', 'charset': 'utf8mb4' } # 词汇书ID BOOK_ID = 'primary_core_1000' def clean_text(text): """清理文本,处理nan和空值""" if pd.isna(text) or str(text).strip() == '' or str(text).strip() == 'nan': return None return str(text).strip() def extract_part_of_speech(translation): """从中文翻译中提取词性""" if not translation: return 'noun' pos_map = { 'v.': 'verb', 'n.': 'noun', 'adj.': 'adjective', 'adv.': 'adverb', 'prep.': 'preposition', 'conj.': 'conjunction', 'pron.': 'pronoun', 'interj.': 'interjection' } for abbr, full in pos_map.items(): if abbr in translation or abbr.replace('.', '') in translation: return full # 中文词性判断 if '动' in translation: return 'verb' elif '形' in translation or '容' in translation: return 'adjective' elif '副' in translation: return 'adverb' elif '介' in translation: return 'preposition' elif '连' in translation: return 'conjunction' return 'noun' # 默认名词 def import_words_from_excel(file_path): """从Excel导入单词""" try: # 读取Excel文件 print(f"📖 正在读取文件: {file_path}") df = pd.read_excel(file_path) print(f"📊 文件列名: {df.columns.tolist()}") print(f"📊 总行数: {len(df)}") # 连接数据库 conn = mysql.connector.connect(**db_config) cursor = conn.cursor() # 先清空旧数据 print("\n清理旧数据...") cursor.execute("DELETE FROM ai_vocabulary_book_words WHERE book_id = %s", (BOOK_ID,)) cursor.execute(""" DELETE v FROM ai_vocabulary v LEFT JOIN ai_vocabulary_book_words bw ON bw.vocabulary_id = v.id WHERE bw.id IS NULL """) conn.commit() # 准备SQL语句 insert_vocab_sql = """ INSERT INTO ai_vocabulary (word, phonetic_us, phonetic_uk, phonetic, level, frequency, is_active, word_root, synonyms, antonyms, derivatives, collocations, created_at, updated_at) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """ insert_definition_sql = """ INSERT INTO ai_vocabulary_definitions (vocabulary_id, part_of_speech, definition_en, definition_cn, sort_order, created_at) VALUES (%s, %s, %s, %s, %s, %s) """ insert_example_sql = """ INSERT INTO ai_vocabulary_examples (vocabulary_id, sentence_en, sentence_cn, sort_order, created_at) VALUES (%s, %s, %s, %s, %s) """ insert_book_word_sql = """ INSERT INTO ai_vocabulary_book_words (book_id, vocabulary_id, sort_order, created_at) VALUES (%s, %s, %s, %s) """ success_count = 0 error_count = 0 # 遍历每一行 for index, row in df.iterrows(): try: # 提取基本数据 word = clean_text(row.get('Word')) if not word: continue # 音标 phonetic_us = clean_text(row.get('美式音标')) phonetic_uk = clean_text(row.get('英式音标')) phonetic = phonetic_us or phonetic_uk # 释义 translation_cn = clean_text(row.get('中文含义')) translation_en = clean_text(row.get('英文翻译(对应中文含义)')) if not translation_cn: print(f"⚠️ 跳过 {word}:缺少中文含义") continue # 如果没有英文翻译,使用单词本身 if not translation_en: translation_en = word # 提取词性 part_of_speech = extract_part_of_speech(translation_cn) # 例句 example_en = clean_text(row.get('例句')) example_cn = clean_text(row.get('例句中文翻译')) # 词根 word_root = clean_text(row.get('词根')) # 同义词(处理为JSON) synonyms_text = clean_text(row.get('同义词(含义)')) synonyms_json = '[]' if synonyms_text: # 分号分隔,格式:word1(含义1);word2(含义2) import json syn_list = [] for syn in synonyms_text.split(';'): syn = syn.strip() if syn: syn_list.append(syn) synonyms_json = json.dumps(syn_list, ensure_ascii=False) # 反义词(处理为JSON) antonyms_text = clean_text(row.get('反义词(含义)')) antonyms_json = '[]' if antonyms_text: import json ant_list = [] for ant in antonyms_text.split(';'): ant = ant.strip() if ant: ant_list.append(ant) antonyms_json = json.dumps(ant_list, ensure_ascii=False) # 派生词(处理为JSON) derivatives_text = clean_text(row.get('派生词(含义)')) derivatives_json = '[]' if derivatives_text: import json der_list = [] for der in derivatives_text.split(';'): der = der.strip() if der: der_list.append(der) derivatives_json = json.dumps(der_list, ensure_ascii=False) # 词组搭配(处理为JSON) phrases_text = clean_text(row.get('词组搭配(中文含义)')) collocations_json = '[]' if phrases_text: import json col_list = [] for phrase in phrases_text.split(';'): phrase = phrase.strip() if phrase: col_list.append(phrase) collocations_json = json.dumps(col_list, ensure_ascii=False) # 插入词汇 now = datetime.now() cursor.execute(insert_vocab_sql, ( word, phonetic_us, phonetic_uk, phonetic, 'beginner', index + 1, True, word_root, synonyms_json, antonyms_json, derivatives_json, collocations_json, now, now )) vocab_id = cursor.lastrowid # 插入主要释义 cursor.execute(insert_definition_sql, ( vocab_id, part_of_speech, translation_en, # ✅ 使用正确的英文翻译 translation_cn, 0, now )) # 插入例句 if example_en and example_cn: # 处理多个例句(用分号分隔) examples_en = example_en.split(';') examples_cn = example_cn.split(';') for i, (ex_en, ex_cn) in enumerate(zip(examples_en, examples_cn)): ex_en = ex_en.strip() ex_cn = ex_cn.strip() if ex_en and ex_cn: cursor.execute(insert_example_sql, ( vocab_id, ex_en, ex_cn, i, now )) # 关联到词汇书 cursor.execute(insert_book_word_sql, ( BOOK_ID, vocab_id, index, now )) success_count += 1 if success_count % 50 == 0: print(f"✅ 已导入 {success_count} 个单词...") conn.commit() except Exception as e: error_count += 1 print(f"❌ 导入第 {index + 1} 行失败: {e}") print(f" 单词: {word if 'word' in locals() else 'N/A'}") # 提交事务 conn.commit() # 更新词汇书的总单词数 cursor.execute( "UPDATE ai_vocabulary_books SET total_words = %s WHERE id = %s", (success_count, BOOK_ID) ) conn.commit() print(f"\n🎉 导入完成!") print(f"✅ 成功: {success_count} 个单词") print(f"❌ 失败: {error_count} 个单词") # 验证数据 cursor.execute( "SELECT COUNT(*) FROM ai_vocabulary_book_words WHERE book_id = %s", (BOOK_ID,) ) count = cursor.fetchone()[0] print(f"📊 词汇书中共有 {count} 个单词") # 检查释义数量 cursor.execute(""" SELECT COUNT(DISTINCT d.vocabulary_id) FROM ai_vocabulary_book_words bw JOIN ai_vocabulary_definitions d ON d.vocabulary_id = bw.vocabulary_id WHERE bw.book_id = %s """, (BOOK_ID,)) def_count = cursor.fetchone()[0] print(f"📊 有释义的单词: {def_count} 个") # 检查例句数量 cursor.execute(""" SELECT COUNT(DISTINCT e.vocabulary_id) FROM ai_vocabulary_book_words bw JOIN ai_vocabulary_examples e ON e.vocabulary_id = bw.vocabulary_id WHERE bw.book_id = %s """, (BOOK_ID,)) ex_count = cursor.fetchone()[0] print(f"📊 有例句的单词: {ex_count} 个") except Exception as e: print(f"❌ 导入失败: {e}") import traceback traceback.print_exc() finally: if cursor: cursor.close() if conn: conn.close() if __name__ == '__main__': import_words_from_excel('data/小学.xlsx')