493 lines
18 KiB
Python
493 lines
18 KiB
Python
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
CSV 数据落地模块(基于 DATA_SCHEMA.md 的简化实现)
|
|||
|
|
- symbols.csv
|
|||
|
|
- bars_1m.csv
|
|||
|
|
- signals.csv
|
|||
|
|
|
|||
|
|
说明:
|
|||
|
|
- 不做真正的 Upsert(CSV 不擅长),通过读取现有行建立内存索引,避免重复写入关键键。
|
|||
|
|
- 比率字段(如涨跌幅)采用小数存储,例如 4.02% 存 0.0402。
|
|||
|
|
"""
|
|||
|
|
import csv
|
|||
|
|
import os
|
|||
|
|
from datetime import datetime, timezone
|
|||
|
|
from typing import Iterable, Dict, Any, List, Tuple
|
|||
|
|
from utils_id import stable_symbol_id
|
|||
|
|
|
|||
|
|
DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
|
|||
|
|
SYMBOLS_CSV = os.path.join(DATA_DIR, "symbols.csv")
|
|||
|
|
BARS_1M_CSV = os.path.join(DATA_DIR, "bars_1m.csv")
|
|||
|
|
SIGNALS_CSV = os.path.join(DATA_DIR, "signals.csv")
|
|||
|
|
FEATURES_1M_CSV = os.path.join(DATA_DIR, "features_1m.csv")
|
|||
|
|
ETL_RUNS_CSV = os.path.join(DATA_DIR, "etl_runs.csv")
|
|||
|
|
PREMARKET_BARS_CSV = os.path.join(DATA_DIR, "premarket_bars.csv")
|
|||
|
|
PREMARKET_SIGNALS_CSV = os.path.join(DATA_DIR, "premarket_signals.csv")
|
|||
|
|
|
|||
|
|
# 确保目录存在
|
|||
|
|
os.makedirs(DATA_DIR, exist_ok=True)
|
|||
|
|
|
|||
|
|
def _utc_now_iso() -> str:
|
|||
|
|
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|||
|
|
|
|||
|
|
def _floor_minute(dt: datetime) -> datetime:
|
|||
|
|
return dt.replace(second=0, microsecond=0, tzinfo=timezone.utc)
|
|||
|
|
|
|||
|
|
# ---------- symbols.csv ----------
|
|||
|
|
|
|||
|
|
_SYMBOLS_HEADER = [
|
|||
|
|
"id","symbol","name","exchange","currency",
|
|||
|
|
"tick_size","lot_size","sector","industry",
|
|||
|
|
"is_active","first_seen_utc","last_seen_utc"
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
def write_symbols(stocks: Iterable[Dict[str, Any]]) -> Dict[str, int]:
|
|||
|
|
"""将股票基础信息写入 symbols.csv,并返回 symbol->symbol_id 映射。
|
|||
|
|
stocks: 需包含 keys: symbol, name, exchange, currency
|
|||
|
|
"""
|
|||
|
|
existing: Dict[Tuple[str,str], Dict[str, str]] = {}
|
|||
|
|
if os.path.exists(SYMBOLS_CSV):
|
|||
|
|
with open(SYMBOLS_CSV, "r", encoding="utf-8-sig") as f:
|
|||
|
|
reader = csv.DictReader(f)
|
|||
|
|
for row in reader:
|
|||
|
|
existing[(row["symbol"], row["exchange"])] = row
|
|||
|
|
|
|||
|
|
now = _utc_now_iso()
|
|||
|
|
# 生成/更新
|
|||
|
|
for s in stocks:
|
|||
|
|
symbol = s.get("symbol")
|
|||
|
|
name = s.get("name")
|
|||
|
|
exchange = (s.get("exchange") or "US").upper()
|
|||
|
|
currency = (s.get("currency") or "USD").upper()
|
|||
|
|
key = (symbol, exchange)
|
|||
|
|
if key not in existing:
|
|||
|
|
sid = stable_symbol_id(symbol, exchange)
|
|||
|
|
existing[key] = {
|
|||
|
|
"id": str(sid),
|
|||
|
|
"symbol": symbol,
|
|||
|
|
"name": name or "",
|
|||
|
|
"exchange": exchange,
|
|||
|
|
"currency": currency,
|
|||
|
|
"tick_size": "",
|
|||
|
|
"lot_size": "",
|
|||
|
|
"sector": "",
|
|||
|
|
"industry": "",
|
|||
|
|
"is_active": "1",
|
|||
|
|
"first_seen_utc": now,
|
|||
|
|
"last_seen_utc": now,
|
|||
|
|
}
|
|||
|
|
else:
|
|||
|
|
existing[key]["last_seen_utc"] = now
|
|||
|
|
|
|||
|
|
# 写回
|
|||
|
|
with open(SYMBOLS_CSV, "w", newline="", encoding="utf-8-sig") as f:
|
|||
|
|
writer = csv.DictWriter(f, fieldnames=_SYMBOLS_HEADER)
|
|||
|
|
writer.writeheader()
|
|||
|
|
for row in existing.values():
|
|||
|
|
writer.writerow(row)
|
|||
|
|
|
|||
|
|
# 返回映射
|
|||
|
|
return {k[0]: int(v["id"]) for k, v in existing.items() if k[0] == v["symbol"]}
|
|||
|
|
|
|||
|
|
# ---------- bars_1m.csv ----------
|
|||
|
|
|
|||
|
|
_BARS_1M_HEADER = [
|
|||
|
|
"symbol_id","symbol","ts_utc","open","high","low","close",
|
|||
|
|
"volume","vwap","trades_count","source","session"
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
def _upgrade_bars_file_if_needed():
|
|||
|
|
"""如果历史 bars_1m.csv 缺少 session 列,进行一次升级重写,补 session='regular'。"""
|
|||
|
|
if not os.path.exists(BARS_1M_CSV):
|
|||
|
|
return
|
|||
|
|
try:
|
|||
|
|
with open(BARS_1M_CSV, 'r', encoding='utf-8-sig') as f:
|
|||
|
|
reader = csv.reader(f)
|
|||
|
|
rows = list(reader)
|
|||
|
|
if not rows:
|
|||
|
|
return
|
|||
|
|
header = rows[0]
|
|||
|
|
if 'session' in header:
|
|||
|
|
return # 已升级
|
|||
|
|
# 构造新文件内容
|
|||
|
|
old_header = header
|
|||
|
|
# 建立列索引映射
|
|||
|
|
idx_map = {col: i for i, col in enumerate(old_header)}
|
|||
|
|
new_rows = []
|
|||
|
|
new_rows.append(_BARS_1M_HEADER)
|
|||
|
|
for r in rows[1:]:
|
|||
|
|
if not r:
|
|||
|
|
continue
|
|||
|
|
# 依据旧列生成新行
|
|||
|
|
new_line = [
|
|||
|
|
r[idx_map.get('symbol_id','')],
|
|||
|
|
r[idx_map.get('symbol','')],
|
|||
|
|
r[idx_map.get('ts_utc','')],
|
|||
|
|
r[idx_map.get('open','')],
|
|||
|
|
r[idx_map.get('high','')],
|
|||
|
|
r[idx_map.get('low','')],
|
|||
|
|
r[idx_map.get('close','')],
|
|||
|
|
r[idx_map.get('volume','')],
|
|||
|
|
r[idx_map.get('vwap','')],
|
|||
|
|
r[idx_map.get('trades_count','')],
|
|||
|
|
r[idx_map.get('source','')],
|
|||
|
|
'regular'
|
|||
|
|
]
|
|||
|
|
new_rows.append(new_line)
|
|||
|
|
# 写回升级
|
|||
|
|
with open(BARS_1M_CSV, 'w', newline='', encoding='utf-8-sig') as f:
|
|||
|
|
writer = csv.writer(f)
|
|||
|
|
writer.writerows(new_rows)
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"⚠️ bars_1m.csv 升级失败: {e}")
|
|||
|
|
|
|||
|
|
def append_bars_1m(stocks: Iterable[Dict[str, Any]], symbol_id_map: Dict[str, int], source: str = "eastmoney") -> List[Dict[str, Any]]:
|
|||
|
|
"""将当前快照近似为 1 分钟线写入 bars_1m.csv。
|
|||
|
|
由于只有快照,open/high/low/close 统一使用 current_price,volume/vwap/trades_count 为空。
|
|||
|
|
"""
|
|||
|
|
now = _floor_minute(datetime.now(timezone.utc)).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|||
|
|
rows: List[Dict[str, Any]] = []
|
|||
|
|
_upgrade_bars_file_if_needed()
|
|||
|
|
for s in stocks:
|
|||
|
|
symbol = s.get("symbol")
|
|||
|
|
price = s.get("eastmoney_price") or s.get("current_price")
|
|||
|
|
if price is None:
|
|||
|
|
continue
|
|||
|
|
sid = symbol_id_map.get(symbol) or stable_symbol_id(symbol)
|
|||
|
|
rows.append({
|
|||
|
|
"symbol_id": sid,
|
|||
|
|
"symbol": symbol,
|
|||
|
|
"ts_utc": now,
|
|||
|
|
"open": price,
|
|||
|
|
"high": price,
|
|||
|
|
"low": price,
|
|||
|
|
"close": price,
|
|||
|
|
"volume": "",
|
|||
|
|
"vwap": "",
|
|||
|
|
"trades_count": "",
|
|||
|
|
"source": source,
|
|||
|
|
"session": "regular",
|
|||
|
|
})
|
|||
|
|
# 追加写
|
|||
|
|
file_exists = os.path.exists(BARS_1M_CSV)
|
|||
|
|
with open(BARS_1M_CSV, "a", newline="", encoding="utf-8-sig") as f:
|
|||
|
|
writer = csv.DictWriter(f, fieldnames=_BARS_1M_HEADER)
|
|||
|
|
if not file_exists:
|
|||
|
|
writer.writeheader()
|
|||
|
|
for r in rows:
|
|||
|
|
writer.writerow(r)
|
|||
|
|
return rows
|
|||
|
|
|
|||
|
|
def append_bars_session(stocks: Iterable[Dict[str, Any]], symbol_id_map: Dict[str, int], source: str = "futu", session: str = "pre") -> List[Dict[str, Any]]:
|
|||
|
|
"""写入特定交易时段的快照(如盘前/盘后),与常规 bars 共存,通过 session 区分。"""
|
|||
|
|
_upgrade_bars_file_if_needed()
|
|||
|
|
now = _floor_minute(datetime.now(timezone.utc)).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|||
|
|
rows: List[Dict[str, Any]] = []
|
|||
|
|
for s in stocks:
|
|||
|
|
symbol = s.get("symbol")
|
|||
|
|
price = s.get("premarket_price") or s.get("after_hours_price") or s.get("futu_before_open_price")
|
|||
|
|
if price in (None, ""):
|
|||
|
|
continue
|
|||
|
|
try:
|
|||
|
|
price_f = float(price)
|
|||
|
|
except Exception:
|
|||
|
|
continue
|
|||
|
|
sid = symbol_id_map.get(symbol) or stable_symbol_id(symbol)
|
|||
|
|
rows.append({
|
|||
|
|
"symbol_id": sid,
|
|||
|
|
"symbol": symbol,
|
|||
|
|
"ts_utc": now,
|
|||
|
|
"open": price_f,
|
|||
|
|
"high": price_f,
|
|||
|
|
"low": price_f,
|
|||
|
|
"close": price_f,
|
|||
|
|
"volume": "",
|
|||
|
|
"vwap": "",
|
|||
|
|
"trades_count": "",
|
|||
|
|
"source": source,
|
|||
|
|
"session": session,
|
|||
|
|
})
|
|||
|
|
file_exists = os.path.exists(BARS_1M_CSV)
|
|||
|
|
with open(BARS_1M_CSV, "a", newline="", encoding="utf-8-sig") as f:
|
|||
|
|
writer = csv.DictWriter(f, fieldnames=_BARS_1M_HEADER)
|
|||
|
|
if not file_exists:
|
|||
|
|
writer.writeheader()
|
|||
|
|
for r in rows:
|
|||
|
|
writer.writerow(r)
|
|||
|
|
return rows
|
|||
|
|
|
|||
|
|
# ---------- premarket 专用快照与信号 ----------
|
|||
|
|
|
|||
|
|
_PREMARKET_BARS_HEADER = [
|
|||
|
|
'symbol_id','symbol','name','ts_utc','ts_et','price','change','change_ratio','volume','source','session','raw_file'
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
_PREMARKET_SIGNALS_HEADER = [
|
|||
|
|
'id','symbol_id','symbol','generated_at_utc','generated_at_et','signal_type','direction','score','reason','params_json','model_name','version','expires_at_utc'
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
def append_premarket_bars(rows: List[Dict[str, Any]], symbol_id_map: Dict[str, int], source: str = 'futu') -> None:
|
|||
|
|
"""将盘前抓取行写入 premarket_bars.csv。
|
|||
|
|
rows: 需包含 symbol,name,premarket_price,premarket_change,premarket_change_ratio(原始百分比或小数字符串), ts(ET字符串 HH:MM)
|
|||
|
|
"""
|
|||
|
|
if not rows:
|
|||
|
|
return
|
|||
|
|
file_exists = os.path.exists(PREMARKET_BARS_CSV)
|
|||
|
|
now_utc = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
|
|||
|
|
# ET 时间字符串(便于人工查看)
|
|||
|
|
try:
|
|||
|
|
from zoneinfo import ZoneInfo
|
|||
|
|
ts_et_full = datetime.now(ZoneInfo('America/New_York')).strftime('%Y-%m-%dT%H:%M:%S')
|
|||
|
|
except Exception:
|
|||
|
|
ts_et_full = ''
|
|||
|
|
with open(PREMARKET_BARS_CSV, 'a', newline='', encoding='utf-8-sig') as f:
|
|||
|
|
writer = csv.DictWriter(f, fieldnames=_PREMARKET_BARS_HEADER)
|
|||
|
|
if not file_exists:
|
|||
|
|
writer.writeheader()
|
|||
|
|
for r in rows:
|
|||
|
|
symbol = r.get('symbol')
|
|||
|
|
if not symbol:
|
|||
|
|
continue
|
|||
|
|
price = r.get('premarket_price')
|
|||
|
|
if price in (None,'','-'):
|
|||
|
|
continue
|
|||
|
|
try:
|
|||
|
|
price_f = float(price)
|
|||
|
|
except Exception:
|
|||
|
|
continue
|
|||
|
|
# ratio 原始可能是 "3.21%" / "-3.21%" / "0.0321" / ""
|
|||
|
|
ratio_raw = r.get('premarket_change_ratio')
|
|||
|
|
ratio_val = 0.0
|
|||
|
|
if ratio_raw not in (None,''):
|
|||
|
|
txt = str(ratio_raw).strip()
|
|||
|
|
try:
|
|||
|
|
if txt.endswith('%'):
|
|||
|
|
ratio_val = float(txt.replace('%',''))/100.0
|
|||
|
|
else:
|
|||
|
|
# 若原始是小数形式(0.0321)或绝对值>1的百分值(3.21),都兼容
|
|||
|
|
num = float(txt)
|
|||
|
|
ratio_val = num/100.0 if abs(num) > 1 and abs(num) >= 2 else num # 粗略判断
|
|||
|
|
except Exception:
|
|||
|
|
ratio_val = 0.0
|
|||
|
|
sid = symbol_id_map.get(symbol) or stable_symbol_id(symbol)
|
|||
|
|
writer.writerow({
|
|||
|
|
'symbol_id': sid,
|
|||
|
|
'symbol': symbol,
|
|||
|
|
'name': r.get('name',''),
|
|||
|
|
'ts_utc': now_utc,
|
|||
|
|
'ts_et': ts_et_full,
|
|||
|
|
'price': price_f,
|
|||
|
|
'change': r.get('premarket_change',''),
|
|||
|
|
'change_ratio': ratio_val,
|
|||
|
|
'volume': '',
|
|||
|
|
'source': source,
|
|||
|
|
'session': 'pre',
|
|||
|
|
'raw_file': '',
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
def append_premarket_signals(signals: List[Dict[str, Any]], symbol_id_map: Dict[str, int]) -> None:
|
|||
|
|
"""写入盘前信号到 premarket_signals.csv。
|
|||
|
|
signals: 需包含 symbol,direction(BUY/SELL),reason,params_json(可选)
|
|||
|
|
"""
|
|||
|
|
if not signals:
|
|||
|
|
return
|
|||
|
|
file_exists = os.path.exists(PREMARKET_SIGNALS_CSV)
|
|||
|
|
model_name, version = _def_model
|
|||
|
|
now_utc = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
|
|||
|
|
try:
|
|||
|
|
from zoneinfo import ZoneInfo
|
|||
|
|
now_et = datetime.now(ZoneInfo('America/New_York')).strftime('%Y-%m-%dT%H:%M:%S')
|
|||
|
|
except Exception:
|
|||
|
|
now_et = ''
|
|||
|
|
# 简单去重: 同 symbol+direction+当前UTC秒 不重复
|
|||
|
|
seen = set()
|
|||
|
|
if file_exists:
|
|||
|
|
with open(PREMARKET_SIGNALS_CSV,'r',encoding='utf-8-sig') as f:
|
|||
|
|
reader = csv.DictReader(f)
|
|||
|
|
for row in reader:
|
|||
|
|
seen.add((row['symbol'],row['direction'],row['generated_at_utc']))
|
|||
|
|
with open(PREMARKET_SIGNALS_CSV,'a',newline='',encoding='utf-8-sig') as f:
|
|||
|
|
writer = csv.DictWriter(f, fieldnames=_PREMARKET_SIGNALS_HEADER)
|
|||
|
|
if not file_exists:
|
|||
|
|
writer.writeheader()
|
|||
|
|
for sig in signals:
|
|||
|
|
symbol = sig.get('symbol')
|
|||
|
|
direction = sig.get('direction')
|
|||
|
|
if not symbol or not direction:
|
|||
|
|
continue
|
|||
|
|
key = (symbol,direction,now_utc)
|
|||
|
|
if key in seen:
|
|||
|
|
continue
|
|||
|
|
sid = symbol_id_map.get(symbol) or stable_symbol_id(symbol)
|
|||
|
|
params_obj = sig.get('params') or {}
|
|||
|
|
writer.writerow({
|
|||
|
|
'id': f'{sid}-{now_utc}',
|
|||
|
|
'symbol_id': sid,
|
|||
|
|
'symbol': symbol,
|
|||
|
|
'generated_at_utc': now_utc,
|
|||
|
|
'generated_at_et': now_et,
|
|||
|
|
'signal_type': sig.get('signal_type','premarket_alert'),
|
|||
|
|
'direction': direction,
|
|||
|
|
'score': sig.get('score',''),
|
|||
|
|
'reason': sig.get('reason',''),
|
|||
|
|
'params_json': json.dumps(params_obj, ensure_ascii=False),
|
|||
|
|
'model_name': model_name,
|
|||
|
|
'version': version,
|
|||
|
|
'expires_at_utc': '',
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# ---------- signals.csv ----------
|
|||
|
|
|
|||
|
|
_SIGNALS_HEADER = [
|
|||
|
|
"id","symbol_id","symbol","generated_at_utc",
|
|||
|
|
"signal_type","direction","score","horizon",
|
|||
|
|
"params_json","model_name","version","expires_at_utc"
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
_def_model = ("rule_threshold", "v1")
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
|
|||
|
|
def append_signals(signals: Iterable[Dict[str, Any]], symbol_id_map: Dict[str, int]) -> None:
|
|||
|
|
"""将策略信号写入 signals.csv,使用时间+symbol 做近似去重。
|
|||
|
|
输入信号应包含:symbol, type(BUY/SELL), reason/score 可选。
|
|||
|
|
"""
|
|||
|
|
file_exists = os.path.exists(SIGNALS_CSV)
|
|||
|
|
seen_keys = set()
|
|||
|
|
if file_exists:
|
|||
|
|
with open(SIGNALS_CSV, "r", encoding="utf-8-sig") as f:
|
|||
|
|
reader = csv.DictReader(f)
|
|||
|
|
for row in reader:
|
|||
|
|
seen_keys.add((row["symbol"], row["generated_at_utc"], row.get("direction")))
|
|||
|
|
|
|||
|
|
model_name, version = _def_model
|
|||
|
|
|
|||
|
|
with open(SIGNALS_CSV, "a", newline="", encoding="utf-8-sig") as f:
|
|||
|
|
writer = csv.DictWriter(f, fieldnames=_SIGNALS_HEADER)
|
|||
|
|
if not file_exists:
|
|||
|
|
writer.writeheader()
|
|||
|
|
for sig in signals:
|
|||
|
|
symbol = sig.get("symbol")
|
|||
|
|
direction = sig.get("type") or sig.get("direction")
|
|||
|
|
gen_at = sig.get('generated_at_utc') or _utc_now_iso()
|
|||
|
|
key = (symbol, gen_at, direction)
|
|||
|
|
if key in seen_keys:
|
|||
|
|
continue
|
|||
|
|
sid = symbol_id_map.get(symbol) or stable_symbol_id(symbol)
|
|||
|
|
writer.writerow({
|
|||
|
|
"id": f"{sid}-{gen_at}",
|
|||
|
|
"symbol_id": sid,
|
|||
|
|
"symbol": symbol,
|
|||
|
|
"generated_at_utc": gen_at,
|
|||
|
|
"signal_type": "momentum",
|
|||
|
|
"direction": direction,
|
|||
|
|
"score": sig.get("confidence", ""),
|
|||
|
|
"horizon": "intraday",
|
|||
|
|
"params_json": json.dumps({"reason": sig.get("reason", "")}, ensure_ascii=False),
|
|||
|
|
"model_name": model_name,
|
|||
|
|
"version": version,
|
|||
|
|
"expires_at_utc": "",
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# ---------- features_1m.csv ----------
|
|||
|
|
|
|||
|
|
_FEATURES_1M_HEADER = [
|
|||
|
|
'symbol_id','symbol','ts_utc','price','return_1m','ma_5','ma_15','vol_15'
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
def _load_existing_prices() -> Dict[str, List[Tuple[str, float]]]:
|
|||
|
|
data: Dict[str, List[Tuple[str, float]]] = {}
|
|||
|
|
if not os.path.exists(BARS_1M_CSV):
|
|||
|
|
return data
|
|||
|
|
with open(BARS_1M_CSV, 'r', encoding='utf-8-sig') as f:
|
|||
|
|
reader = csv.DictReader(f)
|
|||
|
|
for row in reader:
|
|||
|
|
symbol = row['symbol']
|
|||
|
|
ts = row['ts_utc']
|
|||
|
|
try:
|
|||
|
|
price = float(row['close'])
|
|||
|
|
except Exception:
|
|||
|
|
continue
|
|||
|
|
data.setdefault(symbol, []).append((ts, price))
|
|||
|
|
# 保证按时间排序(CSV 追加已有序,但防御性处理)
|
|||
|
|
for sym in data:
|
|||
|
|
data[sym].sort(key=lambda x: x[0])
|
|||
|
|
return data
|
|||
|
|
|
|||
|
|
def append_features_1m(new_bar_rows: List[Dict[str, Any]]) -> None:
|
|||
|
|
if not new_bar_rows:
|
|||
|
|
return
|
|||
|
|
price_history = _load_existing_prices()
|
|||
|
|
feature_rows: List[Dict[str, Any]] = []
|
|||
|
|
# 按新增行计算特征
|
|||
|
|
for r in new_bar_rows:
|
|||
|
|
symbol = r['symbol']
|
|||
|
|
sid = r['symbol_id']
|
|||
|
|
ts = r['ts_utc']
|
|||
|
|
try:
|
|||
|
|
price = float(r['close'])
|
|||
|
|
except Exception:
|
|||
|
|
continue
|
|||
|
|
series = price_history.get(symbol, [])
|
|||
|
|
# 找到当前索引位置
|
|||
|
|
# 防御:series 已包含当前行,因为新行已追加;若未包含则添加再计算
|
|||
|
|
if not series or series[-1][0] != ts:
|
|||
|
|
series.append((ts, price))
|
|||
|
|
idx = len(series) - 1
|
|||
|
|
# return_1m
|
|||
|
|
ret_1m = 0.0
|
|||
|
|
if idx >= 1:
|
|||
|
|
prev_price = series[idx-1][1]
|
|||
|
|
if prev_price != 0:
|
|||
|
|
ret_1m = (price / prev_price) - 1
|
|||
|
|
# ma_5
|
|||
|
|
window5 = [p for _, p in series[max(0, idx-4):idx+1]]
|
|||
|
|
ma_5 = sum(window5)/len(window5) if window5 else price
|
|||
|
|
# ma_15
|
|||
|
|
window15 = [p for _, p in series[max(0, idx-14):idx+1]]
|
|||
|
|
ma_15 = sum(window15)/len(window15) if window15 else price
|
|||
|
|
# vol_15 = 标准差
|
|||
|
|
vol_15 = 0.0
|
|||
|
|
if len(window15) > 1:
|
|||
|
|
avg15 = ma_15
|
|||
|
|
var = sum((p-avg15)**2 for p in window15)/ (len(window15)-1)
|
|||
|
|
vol_15 = var**0.5
|
|||
|
|
feature_rows.append({
|
|||
|
|
'symbol_id': sid,
|
|||
|
|
'symbol': symbol,
|
|||
|
|
'ts_utc': ts,
|
|||
|
|
'price': price,
|
|||
|
|
'return_1m': ret_1m,
|
|||
|
|
'ma_5': ma_5,
|
|||
|
|
'ma_15': ma_15,
|
|||
|
|
'vol_15': vol_15,
|
|||
|
|
})
|
|||
|
|
file_exists = os.path.exists(FEATURES_1M_CSV)
|
|||
|
|
with open(FEATURES_1M_CSV, 'a', newline='', encoding='utf-8-sig') as f:
|
|||
|
|
writer = csv.DictWriter(f, fieldnames=_FEATURES_1M_HEADER)
|
|||
|
|
if not file_exists:
|
|||
|
|
writer.writeheader()
|
|||
|
|
for fr in feature_rows:
|
|||
|
|
writer.writerow(fr)
|
|||
|
|
|
|||
|
|
# ---------- etl_runs.csv ----------
|
|||
|
|
|
|||
|
|
_ETL_RUNS_HEADER = [
|
|||
|
|
'run_ts_utc','loop','fetched_count','signal_count','duration_seconds','errors'
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
def append_etl_run(loop: int, fetched: int, signals: int, duration: float, errors: int = 0) -> None:
|
|||
|
|
file_exists = os.path.exists(ETL_RUNS_CSV)
|
|||
|
|
with open(ETL_RUNS_CSV, 'a', newline='', encoding='utf-8-sig') as f:
|
|||
|
|
writer = csv.DictWriter(f, fieldnames=_ETL_RUNS_HEADER)
|
|||
|
|
if not file_exists:
|
|||
|
|
writer.writeheader()
|
|||
|
|
writer.writerow({
|
|||
|
|
'run_ts_utc': _utc_now_iso(),
|
|||
|
|
'loop': loop,
|
|||
|
|
'fetched_count': fetched,
|
|||
|
|
'signal_count': signals,
|
|||
|
|
'duration_seconds': f'{duration:.3f}',
|
|||
|
|
'errors': errors,
|
|||
|
|
})
|