Files
baijiahao_text_to_image/split_sql_tables.py

297 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
将包含多个表的SQL文件拆分为单个表的SQL文件
"""
import os
import re
from pathlib import Path
def split_sql_tables(input_file_path):
"""
将SQL文件中的每个表拆分为单独的文件
"""
# 读取输入文件
with open(input_file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 分割SQL内容查找CREATE TABLE语句
# 使用正则表达式匹配CREATE TABLE语句
table_pattern = r'(CREATE TABLE\s+`?(\w+)`?\s*\([^;]*END\s*OF\s*DATA;)?)'
# 更精确的匹配模式寻找CREATE TABLE语句直到遇到下一个CREATE TABLE或文件结尾
create_table_pattern = r'(CREATE TABLE\s+`?(\w+)`?\s*\(.+?)(?=\nCREATE TABLE|\Z)'
# 分离出每个CREATE TABLE语句
tables = re.findall(create_table_pattern, content, re.DOTALL | re.IGNORECASE)
# 如果上面的正则没匹配到,尝试另一种方式
if not tables:
# 分割CREATE TABLE部分
parts = re.split(r'\n(?=CREATE TABLE)', content)
tables = []
for part in parts:
if part.strip().upper().startswith('CREATE TABLE'):
# 提取表名
table_name_match = re.search(r'CREATE TABLE\s+`?(\w+)`?', part, re.IGNORECASE)
if table_name_match:
table_name = table_name_match.group(1)
tables.append((part.strip(), table_name))
# 确保输出目录存在
output_dir = Path(input_file_path).parent / "split_tables"
output_dir.mkdir(exist_ok=True)
# 为每个表创建单独的文件
for table_sql, table_name in tables:
# 清理表名,确保它是有效的文件名
clean_table_name = re.sub(r'[^\w\-_\.]', '_', table_name)
# 创建输出文件路径
output_file_path = output_dir / f"{clean_table_name}.sql"
# 写入表定义到单独的文件
with open(output_file_path, 'w', encoding='utf-8') as f:
f.write("-- SQL table definition\n")
f.write("-- Generated from splitting a larger SQL file\n")
f.write("\n")
f.write(table_sql.strip())
f.write("\n")
print(f"已创建表文件: {output_file_path}")
def split_sql_tables_advanced(input_file_path):
"""
高级方法拆分SQL文件中的表定义
"""
with open(input_file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
# 确保输出目录存在
output_dir = Path(input_file_path).parent / "split_tables"
output_dir.mkdir(exist_ok=True)
current_table_lines = []
in_table_definition = False
current_table_name = ""
i = 0
while i < len(lines):
line = lines[i].strip()
# 检查是否是CREATE TABLE语句
if line.upper().startswith('CREATE TABLE'):
# 如果之前已经在处理表定义,保存之前的表
if in_table_definition and current_table_lines:
save_table_to_file(current_table_name, current_table_lines, output_dir)
current_table_lines = []
# 开始新的表定义
in_table_definition = True
current_table_lines.append(lines[i])
# 提取表名
table_name_match = re.search(r'CREATE TABLE\s+`?(\w+)`?', line, re.IGNORECASE)
if table_name_match:
current_table_name = table_name_match.group(1)
# 检查这一行是否以分号结束
if line.endswith(';'):
# 单行CREATE TABLE语句
save_table_to_file(current_table_name, current_table_lines, output_dir)
current_table_lines = []
in_table_definition = False
else:
# 多行CREATE TABLE语句继续收集行直到遇到分号
pass
elif in_table_definition:
current_table_lines.append(lines[i])
# 检查是否以分号结束
if line.endswith(';'):
# 结束当前表定义
save_table_to_file(current_table_name, current_table_lines, output_dir)
current_table_lines = []
in_table_definition = False
# 如果不在表定义中且遇到CREATE TABLE之前的行忽略或处理其他内容
i += 1
# 处理最后一个表(如果有)
if in_table_definition and current_table_lines:
save_table_to_file(current_table_name, current_table_lines, output_dir)
def save_table_to_file(table_name, table_lines, output_dir):
"""
将表定义保存到文件
"""
# 清理表名,确保它是有效的文件名
clean_table_name = re.sub(r'[^\w\-_\.]', '_', table_name)
# 创建输出文件路径
output_file_path = output_dir / f"{clean_table_name}.sql"
# 写入表定义到单独的文件
with open(output_file_path, 'w', encoding='utf-8') as f:
f.write("-- SQL table definition\n")
f.write("-- Generated from splitting a larger SQL file\n")
f.write("-- Table: " + table_name + "\n")
f.write("\n")
for line in table_lines:
f.write(line.rstrip() + '\n')
print(f"已创建表文件: {output_file_path}")
def extract_create_table_statements(input_file_path):
"""
提取SQL文件中的所有CREATE TABLE语句
"""
with open(input_file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 正则表达式匹配CREATE TABLE语句
# 匹配从CREATE TABLE开始到遇到下一个CREATE TABLE或文件结尾的内容
pattern = r'(CREATE TABLE\s+`?\w+`?[^;]*(?:;|ENGINE.*?;))'
# 更复杂的正则表达式,考虑多行和嵌套括号
# complex_pattern = r'(CREATE TABLE\s+`?(\w+)`?\s*\(((?>[^()]+|\((?<DEPTH>)|\)(?<-DEPTH>))*(?(DEPTH)(?!)))\)[^;]*;)'
# 使用简单方法,逐行解析
lines = content.split('\n')
# 确保输出目录存在
output_dir = Path(input_file_path).parent / "split_tables"
output_dir.mkdir(exist_ok=True)
current_table_lines = []
in_table_definition = False
current_table_name = ""
for line in lines:
stripped_line = line.strip()
if stripped_line.upper().startswith('CREATE TABLE'):
# 如果正在处理上一个表,保存它
if in_table_definition and current_table_lines:
save_table_to_file(current_table_name, current_table_lines, output_dir)
# 开始新表
in_table_definition = True
current_table_name_match = re.search(r'CREATE TABLE\s+`?(\w+)`?', stripped_line, re.IGNORECASE)
if current_table_name_match:
current_table_name = current_table_name_match.group(1)
current_table_lines = [line]
elif in_table_definition:
current_table_lines.append(line)
# 检查行是否以分号结尾,表示表定义结束
if stripped_line.endswith(';'):
# 这可能是一个完整的表定义
# 简单检查是否是表定义的结尾
save_table_to_file(current_table_name, current_table_lines, output_dir)
current_table_lines = []
in_table_definition = False
# 否则跳过非表定义的行
# 处理最后一个表
if in_table_definition and current_table_lines:
save_table_to_file(current_table_name, current_table_lines, output_dir)
def parse_sql_file(input_file_path):
"""
解析SQL文件并拆分表定义
"""
with open(input_file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 查找所有CREATE TABLE语句
# 更安全的解析方法 - 逐行处理
lines = content.split('\n')
# 确保输出目录存在
output_dir = Path(input_file_path).parent / "split_tables"
output_dir.mkdir(exist_ok=True)
current_table_lines = []
in_table_definition = False
current_table_name = ""
for line in lines:
stripped_line = line.strip()
if stripped_line.upper().startswith('CREATE TABLE'):
# 如果正在处理上一个表,保存它
if in_table_definition and current_table_lines:
save_table_to_file_simple(current_table_name, current_table_lines, output_dir)
# 开始新表
in_table_definition = True
# 提取表名
table_name_match = re.search(r'CREATE TABLE\s+(?:IF NOT EXISTS\s+)?`?(\w+)`?', stripped_line, re.IGNORECASE)
if table_name_match:
current_table_name = table_name_match.group(1)
current_table_lines = [line]
elif in_table_definition:
current_table_lines.append(line)
# 检查行是否以分号结尾,表示表定义结束
if stripped_line and stripped_line.endswith(';'):
# 检查是否包含表定义的关键元素如ENGINE, CHARACTER SET等
# 或者是完整的CREATE TABLE语句
if ('ENGINE' in stripped_line or 'CHARACTER SET' in stripped_line or
'ROW_FORMAT' in stripped_line or ') ENGINE' in line or line.count('(') <= line.count(')')):
# 这是一个完整的表定义
save_table_to_file_simple(current_table_name, current_table_lines, output_dir)
current_table_lines = []
in_table_definition = False
# 否则跳过非表定义的行
# 处理最后一个表
if in_table_definition and current_table_lines:
save_table_to_file_simple(current_table_name, current_table_lines, output_dir)
def save_table_to_file_simple(table_name, table_lines, output_dir):
"""
将表定义保存到文件(简化版)
"""
# 清理表名
clean_table_name = re.sub(r'[^\w\-_\.]', '_', table_name)
# 创建输出文件路径
output_file_path = output_dir / f"{clean_table_name}.sql"
# 写入表定义到单独的文件
with open(output_file_path, 'w', encoding='utf-8') as f:
f.write("-- SQL table definition\n")
f.write("-- Generated from splitting a larger SQL file\n")
f.write(f"-- Table: {table_name}\n")
f.write("--\n\n")
for line in table_lines:
f.write(line)
f.write('\n')
print(f"已创建表文件: {output_file_path}")
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
input_file = input("请输入SQL文件路径: ").strip().strip('"\'')
else:
input_file = sys.argv[1].strip('"\'')
if not os.path.exists(input_file):
print(f"错误: 文件 {input_file} 不存在")
sys.exit(1)
print(f"正在拆分SQL文件: {input_file}")
parse_sql_file(input_file)
print("拆分完成!")