init
This commit is contained in:
266
data/import_college_words.py
Normal file
266
data/import_college_words.py
Normal file
@@ -0,0 +1,266 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""导入大学英语教材词汇到数据库"""
|
||||
|
||||
import pandas as pd
|
||||
import mysql.connector
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
# 数据库配置
|
||||
db_config = {
|
||||
'host': 'localhost',
|
||||
'port': 3306,
|
||||
'user': 'root',
|
||||
'password': 'JKjk20011115',
|
||||
'database': 'ai_english_learning',
|
||||
'charset': 'utf8mb4'
|
||||
}
|
||||
|
||||
# 词汇书ID
|
||||
BOOK_ID = 'college_textbook'
|
||||
|
||||
def clean_text(text):
|
||||
"""清理文本,处理nan和空值"""
|
||||
if pd.isna(text) or str(text).strip() == '' or str(text).strip() == 'nan':
|
||||
return None
|
||||
return str(text).strip()
|
||||
|
||||
def extract_part_of_speech(translation):
|
||||
"""从中文翻译中提取词性"""
|
||||
if not translation:
|
||||
return 'noun'
|
||||
|
||||
pos_map = {
|
||||
'v.': 'verb',
|
||||
'n.': 'noun',
|
||||
'adj.': 'adjective',
|
||||
'adv.': 'adverb',
|
||||
'prep.': 'preposition',
|
||||
'conj.': 'conjunction',
|
||||
'pron.': 'pronoun',
|
||||
'interj.': 'interjection'
|
||||
}
|
||||
|
||||
for abbr, full in pos_map.items():
|
||||
if abbr in translation or abbr.replace('.', '') in translation:
|
||||
return full
|
||||
|
||||
# 中文词性判断
|
||||
if '动' in translation:
|
||||
return 'verb'
|
||||
elif '形' in translation or '容' in translation:
|
||||
return 'adjective'
|
||||
elif '副' in translation:
|
||||
return 'adverb'
|
||||
elif '介' in translation:
|
||||
return 'preposition'
|
||||
elif '连' in translation:
|
||||
return 'conjunction'
|
||||
|
||||
return 'noun'
|
||||
|
||||
def import_words_from_excel(file_path):
|
||||
"""从Excel导入单词"""
|
||||
try:
|
||||
print(f"📖 正在读取文件: {file_path}")
|
||||
df = pd.read_excel(file_path)
|
||||
|
||||
print(f"📊 文件列名: {df.columns.tolist()}")
|
||||
print(f"📊 总行数: {len(df)}")
|
||||
|
||||
conn = mysql.connector.connect(**db_config)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 清理旧数据
|
||||
print("\n清理旧数据...")
|
||||
cursor.execute("DELETE FROM ai_vocabulary_book_words WHERE book_id = %s", (BOOK_ID,))
|
||||
cursor.execute("""
|
||||
DELETE v FROM ai_vocabulary v
|
||||
LEFT JOIN ai_vocabulary_book_words bw ON bw.vocabulary_id = v.id
|
||||
WHERE bw.id IS NULL
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
# SQL语句
|
||||
insert_vocab_sql = """
|
||||
INSERT INTO ai_vocabulary
|
||||
(word, phonetic_us, phonetic_uk, phonetic, level, frequency, is_active,
|
||||
word_root, synonyms, antonyms, derivatives, collocations, created_at, updated_at)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
insert_definition_sql = """
|
||||
INSERT INTO ai_vocabulary_definitions
|
||||
(vocabulary_id, part_of_speech, definition_en, definition_cn, sort_order, created_at)
|
||||
VALUES (%s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
insert_example_sql = """
|
||||
INSERT INTO ai_vocabulary_examples
|
||||
(vocabulary_id, sentence_en, sentence_cn, sort_order, created_at)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
insert_book_word_sql = """
|
||||
INSERT INTO ai_vocabulary_book_words
|
||||
(book_id, vocabulary_id, sort_order, created_at)
|
||||
VALUES (%s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
success_count = 0
|
||||
error_count = 0
|
||||
|
||||
for index, row in df.iterrows():
|
||||
try:
|
||||
# 尝试多个可能的列名
|
||||
word = clean_text(row.get('Word') or row.get('单词(Word)') or row.get('单词'))
|
||||
if not word:
|
||||
continue
|
||||
|
||||
# 检查单词是否已存在
|
||||
cursor.execute("SELECT id FROM ai_vocabulary WHERE word = %s", (word,))
|
||||
existing = cursor.fetchone()
|
||||
|
||||
if existing:
|
||||
# 单词已存在,使用现有ID
|
||||
vocab_id = existing[0]
|
||||
else:
|
||||
# 单词不存在,插入新记录
|
||||
# 音标
|
||||
phonetic_us = clean_text(row.get('美式音标'))
|
||||
phonetic_uk = clean_text(row.get('英式音标'))
|
||||
phonetic = phonetic_us or phonetic_uk
|
||||
|
||||
# 释义
|
||||
translation_cn = clean_text(row.get('中文含义'))
|
||||
translation_en = clean_text(row.get('英文翻译(对应中文含义)'))
|
||||
|
||||
if not translation_cn:
|
||||
print(f"⚠️ 跳过 {word}:缺少中文含义")
|
||||
continue
|
||||
|
||||
if not translation_en:
|
||||
translation_en = word
|
||||
|
||||
part_of_speech = extract_part_of_speech(translation_cn)
|
||||
|
||||
# 例句
|
||||
example_en = clean_text(row.get('例句'))
|
||||
example_cn = clean_text(row.get('例句中文翻译'))
|
||||
|
||||
# 词根
|
||||
word_root = clean_text(row.get('词根') or row.get('词根/词源'))
|
||||
|
||||
# 同义词
|
||||
synonyms_text = clean_text(row.get('同义词(含义)'))
|
||||
synonyms_json = '[]'
|
||||
if synonyms_text:
|
||||
syn_list = [syn.strip() for syn in synonyms_text.split(';') if syn.strip()]
|
||||
synonyms_json = json.dumps(syn_list, ensure_ascii=False)
|
||||
|
||||
# 反义词
|
||||
antonyms_text = clean_text(row.get('反义词(含义)'))
|
||||
antonyms_json = '[]'
|
||||
if antonyms_text:
|
||||
ant_list = [ant.strip() for ant in antonyms_text.split(';') if ant.strip()]
|
||||
antonyms_json = json.dumps(ant_list, ensure_ascii=False)
|
||||
|
||||
# 派生词
|
||||
derivatives_text = clean_text(row.get('派生词(含义)'))
|
||||
derivatives_json = '[]'
|
||||
if derivatives_text:
|
||||
der_list = [der.strip() for der in derivatives_text.split(';') if der.strip()]
|
||||
derivatives_json = json.dumps(der_list, ensure_ascii=False)
|
||||
|
||||
# 词组搭配
|
||||
phrases_text = clean_text(row.get('词组搭配(中文含义)'))
|
||||
collocations_json = '[]'
|
||||
if phrases_text:
|
||||
col_list = [phrase.strip() for phrase in phrases_text.split(';') if phrase.strip()]
|
||||
collocations_json = json.dumps(col_list, ensure_ascii=False)
|
||||
|
||||
# 插入词汇
|
||||
now = datetime.now()
|
||||
cursor.execute(insert_vocab_sql, (
|
||||
word, phonetic_us, phonetic_uk, phonetic,
|
||||
'intermediate', # 大学难度
|
||||
index + 1, True,
|
||||
word_root, synonyms_json, antonyms_json,
|
||||
derivatives_json, collocations_json,
|
||||
now, now
|
||||
))
|
||||
|
||||
vocab_id = cursor.lastrowid
|
||||
|
||||
# 插入释义
|
||||
cursor.execute(insert_definition_sql, (
|
||||
vocab_id, part_of_speech, translation_en,
|
||||
translation_cn, 0, now
|
||||
))
|
||||
|
||||
# 插入例句
|
||||
if example_en and example_cn:
|
||||
examples_en = example_en.split(';')
|
||||
examples_cn = example_cn.split(';')
|
||||
|
||||
for i, (ex_en, ex_cn) in enumerate(zip(examples_en, examples_cn)):
|
||||
ex_en = ex_en.strip()
|
||||
ex_cn = ex_cn.strip()
|
||||
if ex_en and ex_cn:
|
||||
cursor.execute(insert_example_sql, (
|
||||
vocab_id, ex_en, ex_cn, i, now
|
||||
))
|
||||
|
||||
# 关联到词汇书(无论单词是否已存在,都要关联)
|
||||
now = datetime.now()
|
||||
try:
|
||||
cursor.execute(insert_book_word_sql, (
|
||||
BOOK_ID, vocab_id, index, now
|
||||
))
|
||||
except Exception as link_error:
|
||||
# 如果关联已存在,跳过
|
||||
if '1062' not in str(link_error): # 不是重复键错误
|
||||
raise
|
||||
|
||||
success_count += 1
|
||||
if success_count % 100 == 0:
|
||||
print(f"✅ 已处理 {success_count} 个单词...")
|
||||
conn.commit()
|
||||
|
||||
except Exception as e:
|
||||
error_count += 1
|
||||
print(f"❌ 导入第 {index + 1} 行失败: {e}")
|
||||
|
||||
conn.commit()
|
||||
|
||||
# 更新词汇书总数
|
||||
cursor.execute(
|
||||
"UPDATE ai_vocabulary_books SET total_words = %s WHERE id = %s",
|
||||
(success_count, BOOK_ID)
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
print(f"\n🎉 大学英语教材词汇导入完成!")
|
||||
print(f"✅ 成功: {success_count} 个单词")
|
||||
print(f"❌ 失败: {error_count} 个单词")
|
||||
|
||||
# 验证
|
||||
cursor.execute(
|
||||
"SELECT COUNT(*) FROM ai_vocabulary_book_words WHERE book_id = %s",
|
||||
(BOOK_ID,)
|
||||
)
|
||||
print(f"📊 词汇书中共有 {cursor.fetchone()[0]} 个单词")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 导入失败: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
if 'cursor' in locals():
|
||||
cursor.close()
|
||||
if 'conn' in locals():
|
||||
conn.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
import_words_from_excel('data/大学英语教材词汇.xlsx')
|
||||
210
data/import_primary_words.py
Normal file
210
data/import_primary_words.py
Normal file
@@ -0,0 +1,210 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""导入小学英语核心词汇到数据库"""
|
||||
|
||||
import pandas as pd
|
||||
import mysql.connector
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
|
||||
# 数据库配置
|
||||
db_config = {
|
||||
'host': 'localhost',
|
||||
'port': 3306,
|
||||
'user': 'root',
|
||||
'password': 'JKjk20011115',
|
||||
'database': 'ai_english_learning',
|
||||
'charset': 'utf8mb4'
|
||||
}
|
||||
|
||||
# 词汇书ID
|
||||
BOOK_ID = 'primary_core_1000'
|
||||
|
||||
def generate_uuid():
|
||||
"""生成UUID"""
|
||||
return str(uuid.uuid4())
|
||||
|
||||
def import_words_from_excel(file_path):
|
||||
"""从Excel导入单词"""
|
||||
try:
|
||||
# 读取Excel文件
|
||||
print(f"📖 正在读取文件: {file_path}")
|
||||
df = pd.read_excel(file_path)
|
||||
|
||||
print(f"📊 文件列名: {df.columns.tolist()}")
|
||||
print(f"📊 总行数: {len(df)}")
|
||||
print(f"\n前5行数据预览:")
|
||||
print(df.head())
|
||||
|
||||
# 连接数据库
|
||||
conn = mysql.connector.connect(**db_config)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 准备SQL语句
|
||||
insert_vocab_sql = """
|
||||
INSERT INTO ai_vocabulary
|
||||
(word, phonetic, level, frequency, is_active, created_at, updated_at)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
id = LAST_INSERT_ID(id),
|
||||
phonetic = VALUES(phonetic),
|
||||
level = VALUES(level),
|
||||
frequency = VALUES(frequency),
|
||||
updated_at = VALUES(updated_at)
|
||||
"""
|
||||
|
||||
insert_definition_sql = """
|
||||
INSERT INTO ai_vocabulary_definitions
|
||||
(vocabulary_id, part_of_speech, definition_en, definition_cn, sort_order, created_at)
|
||||
VALUES (%s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
insert_example_sql = """
|
||||
INSERT INTO ai_vocabulary_examples
|
||||
(vocabulary_id, sentence_en, sentence_cn, sort_order, created_at)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
insert_book_word_sql = """
|
||||
INSERT INTO ai_vocabulary_book_words
|
||||
(book_id, vocabulary_id, sort_order, created_at)
|
||||
VALUES (%s, %s, %s, %s)
|
||||
ON DUPLICATE KEY UPDATE sort_order = VALUES(sort_order)
|
||||
"""
|
||||
|
||||
success_count = 0
|
||||
error_count = 0
|
||||
|
||||
# 遍历每一行
|
||||
for index, row in df.iterrows():
|
||||
try:
|
||||
# 提取数据(根据实际Excel列名调整)
|
||||
word = str(row.get('Word', '')).strip()
|
||||
if not word or word == 'nan':
|
||||
continue
|
||||
|
||||
# 优先使用美式音标
|
||||
phonetic = str(row.get('美式音标', '')).strip()
|
||||
if phonetic == 'nan' or not phonetic:
|
||||
phonetic = str(row.get('英式音标', '')).strip()
|
||||
if phonetic == 'nan':
|
||||
phonetic = None
|
||||
|
||||
translation = str(row.get('中文含义', '')).strip()
|
||||
if translation == 'nan':
|
||||
translation = ''
|
||||
|
||||
# 从中文含义中提取词性(如果有的话)
|
||||
part_of_speech = 'noun' # 默认为名词
|
||||
if translation:
|
||||
if 'v.' in translation or '动' in translation:
|
||||
part_of_speech = 'verb'
|
||||
elif 'adj.' in translation or '形' in translation:
|
||||
part_of_speech = 'adjective'
|
||||
elif 'adv.' in translation or '副' in translation:
|
||||
part_of_speech = 'adverb'
|
||||
elif 'prep.' in translation or '介' in translation:
|
||||
part_of_speech = 'preposition'
|
||||
elif 'conj.' in translation or '连' in translation:
|
||||
part_of_speech = 'conjunction'
|
||||
|
||||
example_en = str(row.get('例句', '')).strip()
|
||||
if example_en == 'nan' or not example_en:
|
||||
example_en = None
|
||||
|
||||
example_cn = str(row.get('例句中文翻译', '')).strip()
|
||||
if example_cn == 'nan' or not example_cn:
|
||||
example_cn = None
|
||||
|
||||
# 插入词汇
|
||||
now = datetime.now()
|
||||
cursor.execute(insert_vocab_sql, (
|
||||
word,
|
||||
phonetic,
|
||||
'beginner', # 小学词汇难度为beginner
|
||||
index + 1, # 使用行号作为频率
|
||||
True,
|
||||
now,
|
||||
now
|
||||
))
|
||||
|
||||
# 获取插入的ID
|
||||
vocab_id = cursor.lastrowid
|
||||
|
||||
# 插入释义
|
||||
if translation:
|
||||
cursor.execute(insert_definition_sql, (
|
||||
vocab_id,
|
||||
part_of_speech,
|
||||
word, # 英文定义暂时用单词本身
|
||||
translation,
|
||||
0,
|
||||
now
|
||||
))
|
||||
|
||||
# 插入例句(只取第一个例句)
|
||||
if example_en and example_cn:
|
||||
# 如果有多个例句,用分号分隔,只取第一个
|
||||
first_example_en = example_en.split(';')[0] if ';' in example_en else example_en
|
||||
first_example_cn = example_cn.split(';')[0] if ';' in example_cn else example_cn
|
||||
|
||||
cursor.execute(insert_example_sql, (
|
||||
vocab_id,
|
||||
first_example_en,
|
||||
first_example_cn,
|
||||
0,
|
||||
now
|
||||
))
|
||||
|
||||
# 关联到词汇书
|
||||
cursor.execute(insert_book_word_sql, (
|
||||
BOOK_ID,
|
||||
vocab_id,
|
||||
index,
|
||||
now
|
||||
))
|
||||
|
||||
success_count += 1
|
||||
if success_count % 50 == 0:
|
||||
print(f"✅ 已导入 {success_count} 个单词...")
|
||||
conn.commit()
|
||||
|
||||
except Exception as e:
|
||||
error_count += 1
|
||||
print(f"❌ 导入第 {index + 1} 行失败: {e}")
|
||||
print(f" 数据: {row.to_dict()}")
|
||||
|
||||
# 提交事务
|
||||
conn.commit()
|
||||
|
||||
# 更新词汇书的总单词数
|
||||
cursor.execute(
|
||||
"UPDATE ai_vocabulary_books SET total_words = %s WHERE id = %s",
|
||||
(success_count, BOOK_ID)
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
print(f"\n🎉 导入完成!")
|
||||
print(f"✅ 成功: {success_count} 个单词")
|
||||
print(f"❌ 失败: {error_count} 个单词")
|
||||
|
||||
# 验证数据
|
||||
cursor.execute(
|
||||
"SELECT COUNT(*) FROM ai_vocabulary_book_words WHERE book_id = %s",
|
||||
(BOOK_ID,)
|
||||
)
|
||||
count = cursor.fetchone()[0]
|
||||
print(f"📊 词汇书中共有 {count} 个单词")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 导入失败: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
import_words_from_excel('data/小学.xlsx')
|
||||
320
data/import_primary_words_complete.py
Normal file
320
data/import_primary_words_complete.py
Normal file
@@ -0,0 +1,320 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""完整导入小学英语核心词汇到数据库(包含所有字段)"""
|
||||
|
||||
import pandas as pd
|
||||
import mysql.connector
|
||||
from datetime import datetime
|
||||
import re
|
||||
|
||||
# 数据库配置
|
||||
db_config = {
|
||||
'host': 'localhost',
|
||||
'port': 3306,
|
||||
'user': 'root',
|
||||
'password': 'JKjk20011115',
|
||||
'database': 'ai_english_learning',
|
||||
'charset': 'utf8mb4'
|
||||
}
|
||||
|
||||
# 词汇书ID
|
||||
BOOK_ID = 'primary_core_1000'
|
||||
|
||||
def clean_text(text):
|
||||
"""清理文本,处理nan和空值"""
|
||||
if pd.isna(text) or str(text).strip() == '' or str(text).strip() == 'nan':
|
||||
return None
|
||||
return str(text).strip()
|
||||
|
||||
def extract_part_of_speech(translation):
|
||||
"""从中文翻译中提取词性"""
|
||||
if not translation:
|
||||
return 'noun'
|
||||
|
||||
pos_map = {
|
||||
'v.': 'verb',
|
||||
'n.': 'noun',
|
||||
'adj.': 'adjective',
|
||||
'adv.': 'adverb',
|
||||
'prep.': 'preposition',
|
||||
'conj.': 'conjunction',
|
||||
'pron.': 'pronoun',
|
||||
'interj.': 'interjection'
|
||||
}
|
||||
|
||||
for abbr, full in pos_map.items():
|
||||
if abbr in translation or abbr.replace('.', '') in translation:
|
||||
return full
|
||||
|
||||
# 中文词性判断
|
||||
if '动' in translation:
|
||||
return 'verb'
|
||||
elif '形' in translation or '容' in translation:
|
||||
return 'adjective'
|
||||
elif '副' in translation:
|
||||
return 'adverb'
|
||||
elif '介' in translation:
|
||||
return 'preposition'
|
||||
elif '连' in translation:
|
||||
return 'conjunction'
|
||||
|
||||
return 'noun' # 默认名词
|
||||
|
||||
def import_words_from_excel(file_path):
|
||||
"""从Excel导入单词"""
|
||||
try:
|
||||
# 读取Excel文件
|
||||
print(f"📖 正在读取文件: {file_path}")
|
||||
df = pd.read_excel(file_path)
|
||||
|
||||
print(f"📊 文件列名: {df.columns.tolist()}")
|
||||
print(f"📊 总行数: {len(df)}")
|
||||
|
||||
# 连接数据库
|
||||
conn = mysql.connector.connect(**db_config)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 先清空旧数据
|
||||
print("\n清理旧数据...")
|
||||
cursor.execute("DELETE FROM ai_vocabulary_book_words WHERE book_id = %s", (BOOK_ID,))
|
||||
cursor.execute("""
|
||||
DELETE v FROM ai_vocabulary v
|
||||
LEFT JOIN ai_vocabulary_book_words bw ON bw.vocabulary_id = v.id
|
||||
WHERE bw.id IS NULL
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
# 准备SQL语句
|
||||
insert_vocab_sql = """
|
||||
INSERT INTO ai_vocabulary
|
||||
(word, phonetic_us, phonetic_uk, phonetic, level, frequency, is_active,
|
||||
word_root, synonyms, antonyms, derivatives, collocations, created_at, updated_at)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
insert_definition_sql = """
|
||||
INSERT INTO ai_vocabulary_definitions
|
||||
(vocabulary_id, part_of_speech, definition_en, definition_cn, sort_order, created_at)
|
||||
VALUES (%s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
insert_example_sql = """
|
||||
INSERT INTO ai_vocabulary_examples
|
||||
(vocabulary_id, sentence_en, sentence_cn, sort_order, created_at)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
insert_book_word_sql = """
|
||||
INSERT INTO ai_vocabulary_book_words
|
||||
(book_id, vocabulary_id, sort_order, created_at)
|
||||
VALUES (%s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
success_count = 0
|
||||
error_count = 0
|
||||
|
||||
# 遍历每一行
|
||||
for index, row in df.iterrows():
|
||||
try:
|
||||
# 提取基本数据
|
||||
word = clean_text(row.get('Word'))
|
||||
if not word:
|
||||
continue
|
||||
|
||||
# 音标
|
||||
phonetic_us = clean_text(row.get('美式音标'))
|
||||
phonetic_uk = clean_text(row.get('英式音标'))
|
||||
phonetic = phonetic_us or phonetic_uk
|
||||
|
||||
# 释义
|
||||
translation_cn = clean_text(row.get('中文含义'))
|
||||
translation_en = clean_text(row.get('英文翻译(对应中文含义)'))
|
||||
|
||||
if not translation_cn:
|
||||
print(f"⚠️ 跳过 {word}:缺少中文含义")
|
||||
continue
|
||||
|
||||
# 如果没有英文翻译,使用单词本身
|
||||
if not translation_en:
|
||||
translation_en = word
|
||||
|
||||
# 提取词性
|
||||
part_of_speech = extract_part_of_speech(translation_cn)
|
||||
|
||||
# 例句
|
||||
example_en = clean_text(row.get('例句'))
|
||||
example_cn = clean_text(row.get('例句中文翻译'))
|
||||
|
||||
# 词根
|
||||
word_root = clean_text(row.get('词根'))
|
||||
|
||||
# 同义词(处理为JSON)
|
||||
synonyms_text = clean_text(row.get('同义词(含义)'))
|
||||
synonyms_json = '[]'
|
||||
if synonyms_text:
|
||||
# 分号分隔,格式:word1(含义1);word2(含义2)
|
||||
import json
|
||||
syn_list = []
|
||||
for syn in synonyms_text.split(';'):
|
||||
syn = syn.strip()
|
||||
if syn:
|
||||
syn_list.append(syn)
|
||||
synonyms_json = json.dumps(syn_list, ensure_ascii=False)
|
||||
|
||||
# 反义词(处理为JSON)
|
||||
antonyms_text = clean_text(row.get('反义词(含义)'))
|
||||
antonyms_json = '[]'
|
||||
if antonyms_text:
|
||||
import json
|
||||
ant_list = []
|
||||
for ant in antonyms_text.split(';'):
|
||||
ant = ant.strip()
|
||||
if ant:
|
||||
ant_list.append(ant)
|
||||
antonyms_json = json.dumps(ant_list, ensure_ascii=False)
|
||||
|
||||
# 派生词(处理为JSON)
|
||||
derivatives_text = clean_text(row.get('派生词(含义)'))
|
||||
derivatives_json = '[]'
|
||||
if derivatives_text:
|
||||
import json
|
||||
der_list = []
|
||||
for der in derivatives_text.split(';'):
|
||||
der = der.strip()
|
||||
if der:
|
||||
der_list.append(der)
|
||||
derivatives_json = json.dumps(der_list, ensure_ascii=False)
|
||||
|
||||
# 词组搭配(处理为JSON)
|
||||
phrases_text = clean_text(row.get('词组搭配(中文含义)'))
|
||||
collocations_json = '[]'
|
||||
if phrases_text:
|
||||
import json
|
||||
col_list = []
|
||||
for phrase in phrases_text.split(';'):
|
||||
phrase = phrase.strip()
|
||||
if phrase:
|
||||
col_list.append(phrase)
|
||||
collocations_json = json.dumps(col_list, ensure_ascii=False)
|
||||
|
||||
# 插入词汇
|
||||
now = datetime.now()
|
||||
cursor.execute(insert_vocab_sql, (
|
||||
word,
|
||||
phonetic_us,
|
||||
phonetic_uk,
|
||||
phonetic,
|
||||
'beginner',
|
||||
index + 1,
|
||||
True,
|
||||
word_root,
|
||||
synonyms_json,
|
||||
antonyms_json,
|
||||
derivatives_json,
|
||||
collocations_json,
|
||||
now,
|
||||
now
|
||||
))
|
||||
|
||||
vocab_id = cursor.lastrowid
|
||||
|
||||
# 插入主要释义
|
||||
cursor.execute(insert_definition_sql, (
|
||||
vocab_id,
|
||||
part_of_speech,
|
||||
translation_en, # ✅ 使用正确的英文翻译
|
||||
translation_cn,
|
||||
0,
|
||||
now
|
||||
))
|
||||
|
||||
# 插入例句
|
||||
if example_en and example_cn:
|
||||
# 处理多个例句(用分号分隔)
|
||||
examples_en = example_en.split(';')
|
||||
examples_cn = example_cn.split(';')
|
||||
|
||||
for i, (ex_en, ex_cn) in enumerate(zip(examples_en, examples_cn)):
|
||||
ex_en = ex_en.strip()
|
||||
ex_cn = ex_cn.strip()
|
||||
if ex_en and ex_cn:
|
||||
cursor.execute(insert_example_sql, (
|
||||
vocab_id,
|
||||
ex_en,
|
||||
ex_cn,
|
||||
i,
|
||||
now
|
||||
))
|
||||
|
||||
# 关联到词汇书
|
||||
cursor.execute(insert_book_word_sql, (
|
||||
BOOK_ID,
|
||||
vocab_id,
|
||||
index,
|
||||
now
|
||||
))
|
||||
|
||||
success_count += 1
|
||||
if success_count % 50 == 0:
|
||||
print(f"✅ 已导入 {success_count} 个单词...")
|
||||
conn.commit()
|
||||
|
||||
except Exception as e:
|
||||
error_count += 1
|
||||
print(f"❌ 导入第 {index + 1} 行失败: {e}")
|
||||
print(f" 单词: {word if 'word' in locals() else 'N/A'}")
|
||||
|
||||
# 提交事务
|
||||
conn.commit()
|
||||
|
||||
# 更新词汇书的总单词数
|
||||
cursor.execute(
|
||||
"UPDATE ai_vocabulary_books SET total_words = %s WHERE id = %s",
|
||||
(success_count, BOOK_ID)
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
print(f"\n🎉 导入完成!")
|
||||
print(f"✅ 成功: {success_count} 个单词")
|
||||
print(f"❌ 失败: {error_count} 个单词")
|
||||
|
||||
# 验证数据
|
||||
cursor.execute(
|
||||
"SELECT COUNT(*) FROM ai_vocabulary_book_words WHERE book_id = %s",
|
||||
(BOOK_ID,)
|
||||
)
|
||||
count = cursor.fetchone()[0]
|
||||
print(f"📊 词汇书中共有 {count} 个单词")
|
||||
|
||||
# 检查释义数量
|
||||
cursor.execute("""
|
||||
SELECT COUNT(DISTINCT d.vocabulary_id)
|
||||
FROM ai_vocabulary_book_words bw
|
||||
JOIN ai_vocabulary_definitions d ON d.vocabulary_id = bw.vocabulary_id
|
||||
WHERE bw.book_id = %s
|
||||
""", (BOOK_ID,))
|
||||
def_count = cursor.fetchone()[0]
|
||||
print(f"📊 有释义的单词: {def_count} 个")
|
||||
|
||||
# 检查例句数量
|
||||
cursor.execute("""
|
||||
SELECT COUNT(DISTINCT e.vocabulary_id)
|
||||
FROM ai_vocabulary_book_words bw
|
||||
JOIN ai_vocabulary_examples e ON e.vocabulary_id = bw.vocabulary_id
|
||||
WHERE bw.book_id = %s
|
||||
""", (BOOK_ID,))
|
||||
ex_count = cursor.fetchone()[0]
|
||||
print(f"📊 有例句的单词: {ex_count} 个")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 导入失败: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
import_words_from_excel('data/小学.xlsx')
|
||||
265
data/import_senior_high_words.py
Normal file
265
data/import_senior_high_words.py
Normal file
@@ -0,0 +1,265 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""导入高中英语词汇到数据库"""
|
||||
|
||||
import pandas as pd
|
||||
import mysql.connector
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
# 数据库配置
|
||||
db_config = {
|
||||
'host': 'localhost',
|
||||
'port': 3306,
|
||||
'user': 'root',
|
||||
'password': 'JKjk20011115',
|
||||
'database': 'ai_english_learning',
|
||||
'charset': 'utf8mb4'
|
||||
}
|
||||
|
||||
# 词汇书ID
|
||||
BOOK_ID = 'senior_high_3500'
|
||||
|
||||
def clean_text(text):
|
||||
"""清理文本,处理nan和空值"""
|
||||
if pd.isna(text) or str(text).strip() == '' or str(text).strip() == 'nan':
|
||||
return None
|
||||
return str(text).strip()
|
||||
|
||||
def extract_part_of_speech(translation):
|
||||
"""从中文翻译中提取词性"""
|
||||
if not translation:
|
||||
return 'noun'
|
||||
|
||||
pos_map = {
|
||||
'v.': 'verb',
|
||||
'n.': 'noun',
|
||||
'adj.': 'adjective',
|
||||
'adv.': 'adverb',
|
||||
'prep.': 'preposition',
|
||||
'conj.': 'conjunction',
|
||||
'pron.': 'pronoun',
|
||||
'interj.': 'interjection'
|
||||
}
|
||||
|
||||
for abbr, full in pos_map.items():
|
||||
if abbr in translation or abbr.replace('.', '') in translation:
|
||||
return full
|
||||
|
||||
# 中文词性判断
|
||||
if '动' in translation:
|
||||
return 'verb'
|
||||
elif '形' in translation or '容' in translation:
|
||||
return 'adjective'
|
||||
elif '副' in translation:
|
||||
return 'adverb'
|
||||
elif '介' in translation:
|
||||
return 'preposition'
|
||||
elif '连' in translation:
|
||||
return 'conjunction'
|
||||
|
||||
return 'noun'
|
||||
|
||||
def import_words_from_excel(file_path):
|
||||
"""从Excel导入单词"""
|
||||
try:
|
||||
print(f"📖 正在读取文件: {file_path}")
|
||||
df = pd.read_excel(file_path)
|
||||
|
||||
print(f"📊 文件列名: {df.columns.tolist()}")
|
||||
print(f"📊 总行数: {len(df)}")
|
||||
|
||||
conn = mysql.connector.connect(**db_config)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 清理旧数据
|
||||
print("\n清理旧数据...")
|
||||
cursor.execute("DELETE FROM ai_vocabulary_book_words WHERE book_id = %s", (BOOK_ID,))
|
||||
cursor.execute("""
|
||||
DELETE v FROM ai_vocabulary v
|
||||
LEFT JOIN ai_vocabulary_book_words bw ON bw.vocabulary_id = v.id
|
||||
WHERE bw.id IS NULL
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
# SQL语句
|
||||
insert_vocab_sql = """
|
||||
INSERT INTO ai_vocabulary
|
||||
(word, phonetic_us, phonetic_uk, phonetic, level, frequency, is_active,
|
||||
word_root, synonyms, antonyms, derivatives, collocations, created_at, updated_at)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
insert_definition_sql = """
|
||||
INSERT INTO ai_vocabulary_definitions
|
||||
(vocabulary_id, part_of_speech, definition_en, definition_cn, sort_order, created_at)
|
||||
VALUES (%s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
insert_example_sql = """
|
||||
INSERT INTO ai_vocabulary_examples
|
||||
(vocabulary_id, sentence_en, sentence_cn, sort_order, created_at)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
insert_book_word_sql = """
|
||||
INSERT INTO ai_vocabulary_book_words
|
||||
(book_id, vocabulary_id, sort_order, created_at)
|
||||
VALUES (%s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
success_count = 0
|
||||
error_count = 0
|
||||
|
||||
for index, row in df.iterrows():
|
||||
try:
|
||||
word = clean_text(row.get('Word'))
|
||||
if not word:
|
||||
continue
|
||||
|
||||
# 检查单词是否已存在
|
||||
cursor.execute("SELECT id FROM ai_vocabulary WHERE word = %s", (word,))
|
||||
existing = cursor.fetchone()
|
||||
|
||||
if existing:
|
||||
# 单词已存在,使用现有ID
|
||||
vocab_id = existing[0]
|
||||
else:
|
||||
# 单词不存在,插入新记录
|
||||
# 音标
|
||||
phonetic_us = clean_text(row.get('美式音标'))
|
||||
phonetic_uk = clean_text(row.get('英式音标'))
|
||||
phonetic = phonetic_us or phonetic_uk
|
||||
|
||||
# 释义
|
||||
translation_cn = clean_text(row.get('中文含义'))
|
||||
translation_en = clean_text(row.get('英文翻译(对应中文含义)'))
|
||||
|
||||
if not translation_cn:
|
||||
print(f"⚠️ 跳过 {word}:缺少中文含义")
|
||||
continue
|
||||
|
||||
if not translation_en:
|
||||
translation_en = word
|
||||
|
||||
part_of_speech = extract_part_of_speech(translation_cn)
|
||||
|
||||
# 例句
|
||||
example_en = clean_text(row.get('例句'))
|
||||
example_cn = clean_text(row.get('例句中文翻译'))
|
||||
|
||||
# 词根
|
||||
word_root = clean_text(row.get('词根'))
|
||||
|
||||
# 同义词
|
||||
synonyms_text = clean_text(row.get('同义词(含义)'))
|
||||
synonyms_json = '[]'
|
||||
if synonyms_text:
|
||||
syn_list = [syn.strip() for syn in synonyms_text.split(';') if syn.strip()]
|
||||
synonyms_json = json.dumps(syn_list, ensure_ascii=False)
|
||||
|
||||
# 反义词
|
||||
antonyms_text = clean_text(row.get('反义词(含义)'))
|
||||
antonyms_json = '[]'
|
||||
if antonyms_text:
|
||||
ant_list = [ant.strip() for ant in antonyms_text.split(';') if ant.strip()]
|
||||
antonyms_json = json.dumps(ant_list, ensure_ascii=False)
|
||||
|
||||
# 派生词
|
||||
derivatives_text = clean_text(row.get('派生词(含义)'))
|
||||
derivatives_json = '[]'
|
||||
if derivatives_text:
|
||||
der_list = [der.strip() for der in derivatives_text.split(';') if der.strip()]
|
||||
derivatives_json = json.dumps(der_list, ensure_ascii=False)
|
||||
|
||||
# 词组搭配
|
||||
phrases_text = clean_text(row.get('词组搭配(中文含义)'))
|
||||
collocations_json = '[]'
|
||||
if phrases_text:
|
||||
col_list = [phrase.strip() for phrase in phrases_text.split(';') if phrase.strip()]
|
||||
collocations_json = json.dumps(col_list, ensure_ascii=False)
|
||||
|
||||
# 插入词汇
|
||||
now = datetime.now()
|
||||
cursor.execute(insert_vocab_sql, (
|
||||
word, phonetic_us, phonetic_uk, phonetic,
|
||||
'intermediate', # 高中难度
|
||||
index + 1, True,
|
||||
word_root, synonyms_json, antonyms_json,
|
||||
derivatives_json, collocations_json,
|
||||
now, now
|
||||
))
|
||||
|
||||
vocab_id = cursor.lastrowid
|
||||
|
||||
# 插入释义
|
||||
cursor.execute(insert_definition_sql, (
|
||||
vocab_id, part_of_speech, translation_en,
|
||||
translation_cn, 0, now
|
||||
))
|
||||
|
||||
# 插入例句
|
||||
if example_en and example_cn:
|
||||
examples_en = example_en.split(';')
|
||||
examples_cn = example_cn.split(';')
|
||||
|
||||
for i, (ex_en, ex_cn) in enumerate(zip(examples_en, examples_cn)):
|
||||
ex_en = ex_en.strip()
|
||||
ex_cn = ex_cn.strip()
|
||||
if ex_en and ex_cn:
|
||||
cursor.execute(insert_example_sql, (
|
||||
vocab_id, ex_en, ex_cn, i, now
|
||||
))
|
||||
|
||||
# 关联到词汇书(无论单词是否已存在,都要关联)
|
||||
now = datetime.now()
|
||||
try:
|
||||
cursor.execute(insert_book_word_sql, (
|
||||
BOOK_ID, vocab_id, index, now
|
||||
))
|
||||
except Exception as link_error:
|
||||
# 如果关联已存在,跳过
|
||||
if '1062' not in str(link_error): # 不是重复键错误
|
||||
raise
|
||||
|
||||
success_count += 1
|
||||
if success_count % 100 == 0:
|
||||
print(f"✅ 已处理 {success_count} 个单词...")
|
||||
conn.commit()
|
||||
|
||||
except Exception as e:
|
||||
error_count += 1
|
||||
print(f"❌ 导入第 {index + 1} 行失败: {e}")
|
||||
|
||||
conn.commit()
|
||||
|
||||
# 更新词汇书总数
|
||||
cursor.execute(
|
||||
"UPDATE ai_vocabulary_books SET total_words = %s WHERE id = %s",
|
||||
(success_count, BOOK_ID)
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
print(f"\n🎉 高中词汇导入完成!")
|
||||
print(f"✅ 成功: {success_count} 个单词")
|
||||
print(f"❌ 失败: {error_count} 个单词")
|
||||
|
||||
# 验证
|
||||
cursor.execute(
|
||||
"SELECT COUNT(*) FROM ai_vocabulary_book_words WHERE book_id = %s",
|
||||
(BOOK_ID,)
|
||||
)
|
||||
print(f"📊 词汇书中共有 {cursor.fetchone()[0]} 个单词")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 导入失败: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
if 'cursor' in locals():
|
||||
cursor.close()
|
||||
if 'conn' in locals():
|
||||
conn.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
import_words_from_excel('data/高中英语词汇.xlsx')
|
||||
458
data/insert_all_vocabulary_books.py
Normal file
458
data/insert_all_vocabulary_books.py
Normal file
@@ -0,0 +1,458 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""插入所有词汇书数据到数据库"""
|
||||
|
||||
import mysql.connector
|
||||
from datetime import datetime
|
||||
|
||||
# 数据库配置
|
||||
db_config = {
|
||||
'host': 'localhost',
|
||||
'port': 3306,
|
||||
'user': 'root',
|
||||
'password': 'JKjk20011115',
|
||||
'database': 'ai_english_learning',
|
||||
'charset': 'utf8mb4'
|
||||
}
|
||||
|
||||
# 所有词汇书数据
|
||||
vocabulary_books = [
|
||||
# 学段基础词汇
|
||||
{
|
||||
'id': 'primary_core_1000',
|
||||
'name': '小学英语核心词汇',
|
||||
'description': '小学阶段必备的1000个核心词汇,涵盖日常生活场景',
|
||||
'category': '学段基础词汇',
|
||||
'level': 'beginner',
|
||||
'total_words': 728, # 已导入
|
||||
'icon': '🎈',
|
||||
'color': '#E91E63',
|
||||
'sort_order': 1
|
||||
},
|
||||
{
|
||||
'id': 'junior_high_1500',
|
||||
'name': '初中英语词汇',
|
||||
'description': '初中阶段1500-2500词汇,结合教材要求',
|
||||
'category': '学段基础词汇',
|
||||
'level': 'elementary',
|
||||
'total_words': 1500,
|
||||
'icon': '📝',
|
||||
'color': '#00BCD4',
|
||||
'sort_order': 2
|
||||
},
|
||||
{
|
||||
'id': 'senior_high_3500',
|
||||
'name': '高中英语词汇',
|
||||
'description': '高中阶段2500-3500词汇,涵盖课标与高考高频词',
|
||||
'category': '学段基础词汇',
|
||||
'level': 'intermediate',
|
||||
'total_words': 3500,
|
||||
'icon': '📕',
|
||||
'color': '#FF5722',
|
||||
'sort_order': 3
|
||||
},
|
||||
{
|
||||
'id': 'college_textbook',
|
||||
'name': '大学英语教材词汇',
|
||||
'description': '大学英语精读/泛读配套词汇',
|
||||
'category': '学段基础词汇',
|
||||
'level': 'intermediate',
|
||||
'total_words': 2000,
|
||||
'icon': '📚',
|
||||
'color': '#3F51B5',
|
||||
'sort_order': 4
|
||||
},
|
||||
|
||||
# 国内应试类词汇
|
||||
{
|
||||
'id': 'cet4_core_2500',
|
||||
'name': '大学英语四级核心词汇',
|
||||
'description': '涵盖CET-4考试核心词汇2500个',
|
||||
'category': '国内应试类词汇',
|
||||
'level': 'intermediate',
|
||||
'total_words': 2500,
|
||||
'icon': '📚',
|
||||
'color': '#4CAF50',
|
||||
'sort_order': 11
|
||||
},
|
||||
{
|
||||
'id': 'cet6_core_3000',
|
||||
'name': '大学英语六级核心词汇',
|
||||
'description': '涵盖CET-6考试核心词汇3000个',
|
||||
'category': '国内应试类词汇',
|
||||
'level': 'advanced',
|
||||
'total_words': 3000,
|
||||
'icon': '📖',
|
||||
'color': '#2196F3',
|
||||
'sort_order': 12
|
||||
},
|
||||
{
|
||||
'id': 'postgraduate_vocabulary',
|
||||
'name': '考研英语核心词汇',
|
||||
'description': '考研英语必备核心词汇',
|
||||
'category': '国内应试类词汇',
|
||||
'level': 'advanced',
|
||||
'total_words': 5500,
|
||||
'icon': '🎓',
|
||||
'color': '#9C27B0',
|
||||
'sort_order': 13
|
||||
},
|
||||
{
|
||||
'id': 'tem4_vocabulary',
|
||||
'name': '专四词汇(TEM-4)',
|
||||
'description': '英语专业四级考试词汇',
|
||||
'category': '国内应试类词汇',
|
||||
'level': 'advanced',
|
||||
'total_words': 8000,
|
||||
'icon': '📘',
|
||||
'color': '#FF9800',
|
||||
'sort_order': 14
|
||||
},
|
||||
{
|
||||
'id': 'tem8_vocabulary',
|
||||
'name': '专八词汇(TEM-8)',
|
||||
'description': '英语专业八级考试词汇',
|
||||
'category': '国内应试类词汇',
|
||||
'level': 'expert',
|
||||
'total_words': 12000,
|
||||
'icon': '📙',
|
||||
'color': '#F44336',
|
||||
'sort_order': 15
|
||||
},
|
||||
|
||||
# 出国考试类词汇
|
||||
{
|
||||
'id': 'ielts_high_3500',
|
||||
'name': '雅思高频词汇',
|
||||
'description': '雅思考试高频词汇3500个',
|
||||
'category': '出国考试类词汇',
|
||||
'level': 'advanced',
|
||||
'total_words': 3500,
|
||||
'icon': '🌟',
|
||||
'color': '#9C27B0',
|
||||
'sort_order': 21
|
||||
},
|
||||
{
|
||||
'id': 'ielts_general',
|
||||
'name': '雅思通用词汇(IELTS General)',
|
||||
'description': '雅思通用类考试核心词汇',
|
||||
'category': '出国考试类词汇',
|
||||
'level': 'intermediate',
|
||||
'total_words': 6000,
|
||||
'icon': '⭐',
|
||||
'color': '#673AB7',
|
||||
'sort_order': 22
|
||||
},
|
||||
{
|
||||
'id': 'toefl_high_3500',
|
||||
'name': '托福高频词汇',
|
||||
'description': '托福考试高频词汇3500个',
|
||||
'category': '出国考试类词汇',
|
||||
'level': 'advanced',
|
||||
'total_words': 3500,
|
||||
'icon': '🎓',
|
||||
'color': '#FF9800',
|
||||
'sort_order': 23
|
||||
},
|
||||
{
|
||||
'id': 'toeic_vocabulary',
|
||||
'name': '托业词汇(TOEIC)',
|
||||
'description': '托业考试职场应用词汇',
|
||||
'category': '出国考试类词汇',
|
||||
'level': 'intermediate',
|
||||
'total_words': 6000,
|
||||
'icon': '💼',
|
||||
'color': '#00BCD4',
|
||||
'sort_order': 24
|
||||
},
|
||||
{
|
||||
'id': 'gre_vocabulary',
|
||||
'name': 'GRE词汇',
|
||||
'description': 'GRE学术/研究生申请词汇',
|
||||
'category': '出国考试类词汇',
|
||||
'level': 'expert',
|
||||
'total_words': 15000,
|
||||
'icon': '🔬',
|
||||
'color': '#E91E63',
|
||||
'sort_order': 25
|
||||
},
|
||||
{
|
||||
'id': 'gmat_vocabulary',
|
||||
'name': 'GMAT词汇',
|
||||
'description': 'GMAT商科/管理类研究生词汇',
|
||||
'category': '出国考试类词汇',
|
||||
'level': 'advanced',
|
||||
'total_words': 8000,
|
||||
'icon': '📊',
|
||||
'color': '#4CAF50',
|
||||
'sort_order': 26
|
||||
},
|
||||
{
|
||||
'id': 'sat_vocabulary',
|
||||
'name': 'SAT词汇',
|
||||
'description': 'SAT美本申请词汇',
|
||||
'category': '出国考试类词汇',
|
||||
'level': 'intermediate',
|
||||
'total_words': 5000,
|
||||
'icon': '🎯',
|
||||
'color': '#FF5722',
|
||||
'sort_order': 27
|
||||
},
|
||||
|
||||
# 职业与专业类词汇
|
||||
{
|
||||
'id': 'business_core_1000',
|
||||
'name': '商务英语核心词汇',
|
||||
'description': '商务场景常用核心词汇1000个',
|
||||
'category': '职业与专业类词汇',
|
||||
'level': 'intermediate',
|
||||
'total_words': 1000,
|
||||
'icon': '💼',
|
||||
'color': '#607D8B',
|
||||
'sort_order': 31
|
||||
},
|
||||
{
|
||||
'id': 'bec_preliminary',
|
||||
'name': '商务英语初级(BEC Preliminary)',
|
||||
'description': 'BEC初级商务英语词汇',
|
||||
'category': '职业与专业类词汇',
|
||||
'level': 'intermediate',
|
||||
'total_words': 3000,
|
||||
'icon': '📋',
|
||||
'color': '#00BCD4',
|
||||
'sort_order': 32
|
||||
},
|
||||
{
|
||||
'id': 'bec_vantage',
|
||||
'name': '商务英语中级(BEC Vantage)',
|
||||
'description': 'BEC中级商务英语词汇',
|
||||
'category': '职业与专业类词汇',
|
||||
'level': 'intermediate',
|
||||
'total_words': 4000,
|
||||
'icon': '📊',
|
||||
'color': '#2196F3',
|
||||
'sort_order': 33
|
||||
},
|
||||
{
|
||||
'id': 'bec_higher',
|
||||
'name': '商务英语高级(BEC Higher)',
|
||||
'description': 'BEC高级商务英语词汇',
|
||||
'category': '职业与专业类词汇',
|
||||
'level': 'advanced',
|
||||
'total_words': 5000,
|
||||
'icon': '📈',
|
||||
'color': '#4CAF50',
|
||||
'sort_order': 34
|
||||
},
|
||||
{
|
||||
'id': 'mba_finance',
|
||||
'name': 'MBA/金融词汇',
|
||||
'description': 'MBA、金融、会计、经济学专业词汇',
|
||||
'category': '职业与专业类词汇',
|
||||
'level': 'advanced',
|
||||
'total_words': 6000,
|
||||
'icon': '💰',
|
||||
'color': '#FF9800',
|
||||
'sort_order': 35
|
||||
},
|
||||
{
|
||||
'id': 'medical_english',
|
||||
'name': '医学英语词汇',
|
||||
'description': '医学专业英语词汇',
|
||||
'category': '职业与专业类词汇',
|
||||
'level': 'advanced',
|
||||
'total_words': 8000,
|
||||
'icon': '⚕️',
|
||||
'color': '#F44336',
|
||||
'sort_order': 36
|
||||
},
|
||||
{
|
||||
'id': 'legal_english',
|
||||
'name': '法律英语词汇',
|
||||
'description': '法律专业英语词汇',
|
||||
'category': '职业与专业类词汇',
|
||||
'level': 'advanced',
|
||||
'total_words': 5000,
|
||||
'icon': '⚖️',
|
||||
'color': '#9C27B0',
|
||||
'sort_order': 37
|
||||
},
|
||||
{
|
||||
'id': 'it_engineering',
|
||||
'name': '工程与IT英语',
|
||||
'description': '计算机科学、人工智能、软件工程词汇',
|
||||
'category': '职业与专业类词汇',
|
||||
'level': 'intermediate',
|
||||
'total_words': 4000,
|
||||
'icon': '💻',
|
||||
'color': '#3F51B5',
|
||||
'sort_order': 38
|
||||
},
|
||||
{
|
||||
'id': 'academic_english',
|
||||
'name': '学术英语(EAP)',
|
||||
'description': '学术英语写作/阅读/科研常用词汇',
|
||||
'category': '职业与专业类词汇',
|
||||
'level': 'advanced',
|
||||
'total_words': 6000,
|
||||
'icon': '🔬',
|
||||
'color': '#00BCD4',
|
||||
'sort_order': 39
|
||||
},
|
||||
|
||||
# 功能型词库
|
||||
{
|
||||
'id': 'word_roots_affixes',
|
||||
'name': '词根词缀词汇',
|
||||
'description': '帮助记忆与扩展的词根词缀词汇',
|
||||
'category': '功能型词库',
|
||||
'level': 'intermediate',
|
||||
'total_words': 3000,
|
||||
'icon': '🌱',
|
||||
'color': '#4CAF50',
|
||||
'sort_order': 41
|
||||
},
|
||||
{
|
||||
'id': 'synonyms_antonyms',
|
||||
'name': '同义词/反义词库',
|
||||
'description': '同义词、反义词、近义搭配库',
|
||||
'category': '功能型词库',
|
||||
'level': 'intermediate',
|
||||
'total_words': 2500,
|
||||
'icon': '🔄',
|
||||
'color': '#2196F3',
|
||||
'sort_order': 42
|
||||
},
|
||||
{
|
||||
'id': 'daily_spoken_collocations',
|
||||
'name': '日常口语搭配库',
|
||||
'description': '日常口语常用搭配库',
|
||||
'category': '功能型词库',
|
||||
'level': 'beginner',
|
||||
'total_words': 1500,
|
||||
'icon': '💬',
|
||||
'color': '#FF9800',
|
||||
'sort_order': 43
|
||||
},
|
||||
{
|
||||
'id': 'academic_spoken_collocations',
|
||||
'name': '学术口语搭配库',
|
||||
'description': '学术口语常用搭配库',
|
||||
'category': '功能型词库',
|
||||
'level': 'advanced',
|
||||
'total_words': 2000,
|
||||
'icon': '🎤',
|
||||
'color': '#9C27B0',
|
||||
'sort_order': 44
|
||||
},
|
||||
{
|
||||
'id': 'academic_writing_collocations',
|
||||
'name': '学术写作搭配库',
|
||||
'description': '学术写作常用搭配库(Collocations)',
|
||||
'category': '功能型词库',
|
||||
'level': 'advanced',
|
||||
'total_words': 2500,
|
||||
'icon': '✍️',
|
||||
'color': '#E91E63',
|
||||
'sort_order': 45
|
||||
},
|
||||
{
|
||||
'id': 'daily_life_english',
|
||||
'name': '日常生活英语',
|
||||
'description': '旅游、点餐、购物、出行、租房等日常生活英语',
|
||||
'category': '功能型词库',
|
||||
'level': 'beginner',
|
||||
'total_words': 2000,
|
||||
'icon': '🏠',
|
||||
'color': '#00BCD4',
|
||||
'sort_order': 46
|
||||
},
|
||||
]
|
||||
|
||||
def main():
|
||||
try:
|
||||
# 连接数据库
|
||||
conn = mysql.connector.connect(**db_config)
|
||||
cursor = conn.cursor()
|
||||
|
||||
print(f"📚 准备插入 {len(vocabulary_books)} 个词汇书...")
|
||||
|
||||
# 插入SQL
|
||||
insert_sql = """
|
||||
INSERT INTO ai_vocabulary_books
|
||||
(id, name, description, category, level, total_words, icon, color, is_system, is_active, sort_order, created_at, updated_at)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, TRUE, TRUE, %s, %s, %s)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
name = VALUES(name),
|
||||
description = VALUES(description),
|
||||
category = VALUES(category),
|
||||
level = VALUES(level),
|
||||
icon = VALUES(icon),
|
||||
color = VALUES(color),
|
||||
sort_order = VALUES(sort_order),
|
||||
updated_at = VALUES(updated_at)
|
||||
"""
|
||||
|
||||
success_count = 0
|
||||
update_count = 0
|
||||
|
||||
for book in vocabulary_books:
|
||||
now = datetime.now()
|
||||
|
||||
# 检查是否已存在
|
||||
cursor.execute("SELECT id FROM ai_vocabulary_books WHERE id = %s", (book['id'],))
|
||||
exists = cursor.fetchone()
|
||||
|
||||
cursor.execute(insert_sql, (
|
||||
book['id'],
|
||||
book['name'],
|
||||
book['description'],
|
||||
book['category'],
|
||||
book['level'],
|
||||
book['total_words'],
|
||||
book['icon'],
|
||||
book['color'],
|
||||
book['sort_order'],
|
||||
now,
|
||||
now
|
||||
))
|
||||
|
||||
if exists:
|
||||
update_count += 1
|
||||
print(f"🔄 更新词汇书: {book['name']} ({book['category']})")
|
||||
else:
|
||||
success_count += 1
|
||||
print(f"✅ 插入词汇书: {book['name']} ({book['category']})")
|
||||
|
||||
conn.commit()
|
||||
|
||||
print(f"\n🎉 完成!")
|
||||
print(f"✅ 新增: {success_count} 个词汇书")
|
||||
print(f"🔄 更新: {update_count} 个词汇书")
|
||||
print(f"📊 总计: {success_count + update_count} 个词汇书")
|
||||
|
||||
# 按分类统计
|
||||
cursor.execute("""
|
||||
SELECT category, COUNT(*) as count
|
||||
FROM ai_vocabulary_books
|
||||
WHERE is_system = TRUE AND is_active = TRUE
|
||||
GROUP BY category
|
||||
ORDER BY MIN(sort_order)
|
||||
""")
|
||||
|
||||
print(f"\n📋 分类统计:")
|
||||
for row in cursor.fetchall():
|
||||
print(f" {row[0]}: {row[1]} 个词汇书")
|
||||
|
||||
except mysql.connector.Error as err:
|
||||
print(f"❌ 数据库错误: {err}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
165
data/insert_vocabulary_books.py
Normal file
165
data/insert_vocabulary_books.py
Normal file
@@ -0,0 +1,165 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""插入词汇书数据"""
|
||||
|
||||
import mysql.connector
|
||||
from datetime import datetime
|
||||
|
||||
# 数据库配置
|
||||
db_config = {
|
||||
'host': 'localhost',
|
||||
'port': 3306,
|
||||
'user': 'root',
|
||||
'password': 'JKjk20011115',
|
||||
'database': 'ai_english_learning',
|
||||
'charset': 'utf8mb4'
|
||||
}
|
||||
|
||||
# 词汇书数据
|
||||
vocabulary_books = [
|
||||
{
|
||||
'id': 'cet4_core_2500',
|
||||
'name': '大学英语四级核心词汇',
|
||||
'description': '涵盖CET-4考试核心词汇2500个',
|
||||
'category': 'CET-4核心词汇',
|
||||
'level': 'intermediate',
|
||||
'total_words': 2500,
|
||||
'icon': '📚',
|
||||
'color': '#4CAF50',
|
||||
'sort_order': 1
|
||||
},
|
||||
{
|
||||
'id': 'cet6_core_3000',
|
||||
'name': '大学英语六级核心词汇',
|
||||
'description': '涵盖CET-6考试核心词汇3000个',
|
||||
'category': 'CET-6核心词汇',
|
||||
'level': 'advanced',
|
||||
'total_words': 3000,
|
||||
'icon': '📖',
|
||||
'color': '#2196F3',
|
||||
'sort_order': 2
|
||||
},
|
||||
{
|
||||
'id': 'toefl_high_3500',
|
||||
'name': '托福高频词汇',
|
||||
'description': '托福考试高频词汇3500个',
|
||||
'category': 'TOEFL高频词汇',
|
||||
'level': 'advanced',
|
||||
'total_words': 3500,
|
||||
'icon': '🎓',
|
||||
'color': '#FF9800',
|
||||
'sort_order': 3
|
||||
},
|
||||
{
|
||||
'id': 'ielts_high_3500',
|
||||
'name': '雅思高频词汇',
|
||||
'description': '雅思考试高频词汇3500个',
|
||||
'category': 'IELTS高频词汇',
|
||||
'level': 'advanced',
|
||||
'total_words': 3500,
|
||||
'icon': '🌟',
|
||||
'color': '#9C27B0',
|
||||
'sort_order': 4
|
||||
},
|
||||
{
|
||||
'id': 'primary_core_1000',
|
||||
'name': '小学英语核心词汇',
|
||||
'description': '小学阶段必备核心词汇1000个',
|
||||
'category': '小学核心词汇',
|
||||
'level': 'beginner',
|
||||
'total_words': 1000,
|
||||
'icon': '🎈',
|
||||
'color': '#E91E63',
|
||||
'sort_order': 5
|
||||
},
|
||||
{
|
||||
'id': 'junior_core_1500',
|
||||
'name': '初中英语核心词汇',
|
||||
'description': '初中阶段必备核心词汇1500个',
|
||||
'category': '初中核心词汇',
|
||||
'level': 'elementary',
|
||||
'total_words': 1500,
|
||||
'icon': '📝',
|
||||
'color': '#00BCD4',
|
||||
'sort_order': 6
|
||||
},
|
||||
{
|
||||
'id': 'senior_core_3500',
|
||||
'name': '高中英语核心词汇',
|
||||
'description': '高中阶段必备核心词汇3500个',
|
||||
'category': '高中核心词汇',
|
||||
'level': 'intermediate',
|
||||
'total_words': 3500,
|
||||
'icon': '📕',
|
||||
'color': '#FF5722',
|
||||
'sort_order': 7
|
||||
},
|
||||
{
|
||||
'id': 'business_core_1000',
|
||||
'name': '商务英语核心词汇',
|
||||
'description': '商务场景常用核心词汇1000个',
|
||||
'category': '商务英语',
|
||||
'level': 'intermediate',
|
||||
'total_words': 1000,
|
||||
'icon': '💼',
|
||||
'color': '#607D8B',
|
||||
'sort_order': 8
|
||||
}
|
||||
]
|
||||
|
||||
def main():
|
||||
try:
|
||||
# 连接数据库
|
||||
conn = mysql.connector.connect(**db_config)
|
||||
cursor = conn.cursor()
|
||||
|
||||
print("⏩ 跳过表创建,直接插入数据(表应该已由GORM自动创建)")
|
||||
|
||||
# 插入词汇书数据
|
||||
insert_sql = """
|
||||
INSERT INTO `ai_vocabulary_books`
|
||||
(`id`, `name`, `description`, `category`, `level`, `total_words`, `icon`, `color`, `is_system`, `is_active`, `sort_order`)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, TRUE, TRUE, %s)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
`name` = VALUES(`name`),
|
||||
`description` = VALUES(`description`),
|
||||
`category` = VALUES(`category`),
|
||||
`level` = VALUES(`level`),
|
||||
`total_words` = VALUES(`total_words`),
|
||||
`icon` = VALUES(`icon`),
|
||||
`color` = VALUES(`color`),
|
||||
`sort_order` = VALUES(`sort_order`)
|
||||
"""
|
||||
|
||||
for book in vocabulary_books:
|
||||
cursor.execute(insert_sql, (
|
||||
book['id'],
|
||||
book['name'],
|
||||
book['description'],
|
||||
book['category'],
|
||||
book['level'],
|
||||
book['total_words'],
|
||||
book['icon'],
|
||||
book['color'],
|
||||
book['sort_order']
|
||||
))
|
||||
print(f"✅ 插入词汇书: {book['name']}")
|
||||
|
||||
conn.commit()
|
||||
print(f"\n🎉 成功插入 {len(vocabulary_books)} 个词汇书!")
|
||||
|
||||
# 查询验证
|
||||
cursor.execute("SELECT COUNT(*) FROM ai_vocabulary_books")
|
||||
count = cursor.fetchone()[0]
|
||||
print(f"📊 当前数据库中共有 {count} 个词汇书")
|
||||
|
||||
except mysql.connector.Error as err:
|
||||
print(f"❌ 数据库错误: {err}")
|
||||
finally:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
BIN
data/logo.png
Normal file
BIN
data/logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.3 MiB |
Reference in New Issue
Block a user