297 lines
11 KiB
Python
297 lines
11 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
将包含多个表的SQL文件拆分为单个表的SQL文件
|
||
"""
|
||
|
||
import os
|
||
import re
|
||
from pathlib import Path
|
||
|
||
|
||
def split_sql_tables(input_file_path):
|
||
"""
|
||
将SQL文件中的每个表拆分为单独的文件
|
||
"""
|
||
# 读取输入文件
|
||
with open(input_file_path, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
# 分割SQL内容,查找CREATE TABLE语句
|
||
# 使用正则表达式匹配CREATE TABLE语句
|
||
table_pattern = r'(CREATE TABLE\s+`?(\w+)`?\s*\([^;]*END\s*OF\s*DATA;)?)'
|
||
|
||
# 更精确的匹配模式,寻找CREATE TABLE语句直到遇到下一个CREATE TABLE或文件结尾
|
||
create_table_pattern = r'(CREATE TABLE\s+`?(\w+)`?\s*\(.+?)(?=\nCREATE TABLE|\Z)'
|
||
|
||
# 分离出每个CREATE TABLE语句
|
||
tables = re.findall(create_table_pattern, content, re.DOTALL | re.IGNORECASE)
|
||
|
||
# 如果上面的正则没匹配到,尝试另一种方式
|
||
if not tables:
|
||
# 分割CREATE TABLE部分
|
||
parts = re.split(r'\n(?=CREATE TABLE)', content)
|
||
tables = []
|
||
|
||
for part in parts:
|
||
if part.strip().upper().startswith('CREATE TABLE'):
|
||
# 提取表名
|
||
table_name_match = re.search(r'CREATE TABLE\s+`?(\w+)`?', part, re.IGNORECASE)
|
||
if table_name_match:
|
||
table_name = table_name_match.group(1)
|
||
tables.append((part.strip(), table_name))
|
||
|
||
# 确保输出目录存在
|
||
output_dir = Path(input_file_path).parent / "split_tables"
|
||
output_dir.mkdir(exist_ok=True)
|
||
|
||
# 为每个表创建单独的文件
|
||
for table_sql, table_name in tables:
|
||
# 清理表名,确保它是有效的文件名
|
||
clean_table_name = re.sub(r'[^\w\-_\.]', '_', table_name)
|
||
|
||
# 创建输出文件路径
|
||
output_file_path = output_dir / f"{clean_table_name}.sql"
|
||
|
||
# 写入表定义到单独的文件
|
||
with open(output_file_path, 'w', encoding='utf-8') as f:
|
||
f.write("-- SQL table definition\n")
|
||
f.write("-- Generated from splitting a larger SQL file\n")
|
||
f.write("\n")
|
||
f.write(table_sql.strip())
|
||
f.write("\n")
|
||
|
||
print(f"已创建表文件: {output_file_path}")
|
||
|
||
|
||
def split_sql_tables_advanced(input_file_path):
|
||
"""
|
||
高级方法拆分SQL文件中的表定义
|
||
"""
|
||
with open(input_file_path, 'r', encoding='utf-8') as f:
|
||
lines = f.readlines()
|
||
|
||
# 确保输出目录存在
|
||
output_dir = Path(input_file_path).parent / "split_tables"
|
||
output_dir.mkdir(exist_ok=True)
|
||
|
||
current_table_lines = []
|
||
in_table_definition = False
|
||
current_table_name = ""
|
||
|
||
i = 0
|
||
while i < len(lines):
|
||
line = lines[i].strip()
|
||
|
||
# 检查是否是CREATE TABLE语句
|
||
if line.upper().startswith('CREATE TABLE'):
|
||
# 如果之前已经在处理表定义,保存之前的表
|
||
if in_table_definition and current_table_lines:
|
||
save_table_to_file(current_table_name, current_table_lines, output_dir)
|
||
current_table_lines = []
|
||
|
||
# 开始新的表定义
|
||
in_table_definition = True
|
||
current_table_lines.append(lines[i])
|
||
|
||
# 提取表名
|
||
table_name_match = re.search(r'CREATE TABLE\s+`?(\w+)`?', line, re.IGNORECASE)
|
||
if table_name_match:
|
||
current_table_name = table_name_match.group(1)
|
||
|
||
# 检查这一行是否以分号结束
|
||
if line.endswith(';'):
|
||
# 单行CREATE TABLE语句
|
||
save_table_to_file(current_table_name, current_table_lines, output_dir)
|
||
current_table_lines = []
|
||
in_table_definition = False
|
||
else:
|
||
# 多行CREATE TABLE语句,继续收集行直到遇到分号
|
||
pass
|
||
elif in_table_definition:
|
||
current_table_lines.append(lines[i])
|
||
# 检查是否以分号结束
|
||
if line.endswith(';'):
|
||
# 结束当前表定义
|
||
save_table_to_file(current_table_name, current_table_lines, output_dir)
|
||
current_table_lines = []
|
||
in_table_definition = False
|
||
# 如果不在表定义中且遇到CREATE TABLE之前的行,忽略或处理其他内容
|
||
|
||
i += 1
|
||
|
||
# 处理最后一个表(如果有)
|
||
if in_table_definition and current_table_lines:
|
||
save_table_to_file(current_table_name, current_table_lines, output_dir)
|
||
|
||
|
||
def save_table_to_file(table_name, table_lines, output_dir):
|
||
"""
|
||
将表定义保存到文件
|
||
"""
|
||
# 清理表名,确保它是有效的文件名
|
||
clean_table_name = re.sub(r'[^\w\-_\.]', '_', table_name)
|
||
|
||
# 创建输出文件路径
|
||
output_file_path = output_dir / f"{clean_table_name}.sql"
|
||
|
||
# 写入表定义到单独的文件
|
||
with open(output_file_path, 'w', encoding='utf-8') as f:
|
||
f.write("-- SQL table definition\n")
|
||
f.write("-- Generated from splitting a larger SQL file\n")
|
||
f.write("-- Table: " + table_name + "\n")
|
||
f.write("\n")
|
||
|
||
for line in table_lines:
|
||
f.write(line.rstrip() + '\n')
|
||
|
||
print(f"已创建表文件: {output_file_path}")
|
||
|
||
|
||
def extract_create_table_statements(input_file_path):
|
||
"""
|
||
提取SQL文件中的所有CREATE TABLE语句
|
||
"""
|
||
with open(input_file_path, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
# 正则表达式匹配CREATE TABLE语句
|
||
# 匹配从CREATE TABLE开始到遇到下一个CREATE TABLE或文件结尾的内容
|
||
pattern = r'(CREATE TABLE\s+`?\w+`?[^;]*(?:;|ENGINE.*?;))'
|
||
|
||
# 更复杂的正则表达式,考虑多行和嵌套括号
|
||
# complex_pattern = r'(CREATE TABLE\s+`?(\w+)`?\s*\(((?>[^()]+|\((?<DEPTH>)|\)(?<-DEPTH>))*(?(DEPTH)(?!)))\)[^;]*;)'
|
||
|
||
# 使用简单方法,逐行解析
|
||
lines = content.split('\n')
|
||
|
||
# 确保输出目录存在
|
||
output_dir = Path(input_file_path).parent / "split_tables"
|
||
output_dir.mkdir(exist_ok=True)
|
||
|
||
current_table_lines = []
|
||
in_table_definition = False
|
||
current_table_name = ""
|
||
|
||
for line in lines:
|
||
stripped_line = line.strip()
|
||
|
||
if stripped_line.upper().startswith('CREATE TABLE'):
|
||
# 如果正在处理上一个表,保存它
|
||
if in_table_definition and current_table_lines:
|
||
save_table_to_file(current_table_name, current_table_lines, output_dir)
|
||
|
||
# 开始新表
|
||
in_table_definition = True
|
||
current_table_name_match = re.search(r'CREATE TABLE\s+`?(\w+)`?', stripped_line, re.IGNORECASE)
|
||
if current_table_name_match:
|
||
current_table_name = current_table_name_match.group(1)
|
||
current_table_lines = [line]
|
||
elif in_table_definition:
|
||
current_table_lines.append(line)
|
||
# 检查行是否以分号结尾,表示表定义结束
|
||
if stripped_line.endswith(';'):
|
||
# 这可能是一个完整的表定义
|
||
# 简单检查是否是表定义的结尾
|
||
save_table_to_file(current_table_name, current_table_lines, output_dir)
|
||
current_table_lines = []
|
||
in_table_definition = False
|
||
# 否则跳过非表定义的行
|
||
|
||
# 处理最后一个表
|
||
if in_table_definition and current_table_lines:
|
||
save_table_to_file(current_table_name, current_table_lines, output_dir)
|
||
|
||
|
||
def parse_sql_file(input_file_path):
|
||
"""
|
||
解析SQL文件并拆分表定义
|
||
"""
|
||
with open(input_file_path, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
# 查找所有CREATE TABLE语句
|
||
# 更安全的解析方法 - 逐行处理
|
||
lines = content.split('\n')
|
||
|
||
# 确保输出目录存在
|
||
output_dir = Path(input_file_path).parent / "split_tables"
|
||
output_dir.mkdir(exist_ok=True)
|
||
|
||
current_table_lines = []
|
||
in_table_definition = False
|
||
current_table_name = ""
|
||
|
||
for line in lines:
|
||
stripped_line = line.strip()
|
||
|
||
if stripped_line.upper().startswith('CREATE TABLE'):
|
||
# 如果正在处理上一个表,保存它
|
||
if in_table_definition and current_table_lines:
|
||
save_table_to_file_simple(current_table_name, current_table_lines, output_dir)
|
||
|
||
# 开始新表
|
||
in_table_definition = True
|
||
# 提取表名
|
||
table_name_match = re.search(r'CREATE TABLE\s+(?:IF NOT EXISTS\s+)?`?(\w+)`?', stripped_line, re.IGNORECASE)
|
||
if table_name_match:
|
||
current_table_name = table_name_match.group(1)
|
||
current_table_lines = [line]
|
||
elif in_table_definition:
|
||
current_table_lines.append(line)
|
||
# 检查行是否以分号结尾,表示表定义结束
|
||
if stripped_line and stripped_line.endswith(';'):
|
||
# 检查是否包含表定义的关键元素,如ENGINE, CHARACTER SET等
|
||
# 或者是完整的CREATE TABLE语句
|
||
if ('ENGINE' in stripped_line or 'CHARACTER SET' in stripped_line or
|
||
'ROW_FORMAT' in stripped_line or ') ENGINE' in line or line.count('(') <= line.count(')')):
|
||
# 这是一个完整的表定义
|
||
save_table_to_file_simple(current_table_name, current_table_lines, output_dir)
|
||
current_table_lines = []
|
||
in_table_definition = False
|
||
# 否则跳过非表定义的行
|
||
|
||
# 处理最后一个表
|
||
if in_table_definition and current_table_lines:
|
||
save_table_to_file_simple(current_table_name, current_table_lines, output_dir)
|
||
|
||
|
||
def save_table_to_file_simple(table_name, table_lines, output_dir):
|
||
"""
|
||
将表定义保存到文件(简化版)
|
||
"""
|
||
# 清理表名
|
||
clean_table_name = re.sub(r'[^\w\-_\.]', '_', table_name)
|
||
|
||
# 创建输出文件路径
|
||
output_file_path = output_dir / f"{clean_table_name}.sql"
|
||
|
||
# 写入表定义到单独的文件
|
||
with open(output_file_path, 'w', encoding='utf-8') as f:
|
||
f.write("-- SQL table definition\n")
|
||
f.write("-- Generated from splitting a larger SQL file\n")
|
||
f.write(f"-- Table: {table_name}\n")
|
||
f.write("--\n\n")
|
||
|
||
for line in table_lines:
|
||
f.write(line)
|
||
f.write('\n')
|
||
|
||
print(f"已创建表文件: {output_file_path}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
import sys
|
||
|
||
if len(sys.argv) < 2:
|
||
input_file = input("请输入SQL文件路径: ").strip().strip('"\'')
|
||
else:
|
||
input_file = sys.argv[1].strip('"\'')
|
||
|
||
if not os.path.exists(input_file):
|
||
print(f"错误: 文件 {input_file} 不存在")
|
||
sys.exit(1)
|
||
|
||
print(f"正在拆分SQL文件: {input_file}")
|
||
parse_sql_file(input_file)
|
||
print("拆分完成!") |