fix: label_backfill.py use signal_feature_events as source (not signal_indicators) to match FK constraint

This commit is contained in:
root 2026-03-04 13:02:34 +00:00
parent 9eeb9a2434
commit 18d628359c

View File

@ -3,9 +3,9 @@
label_backfill.py V5.3 信号标签回填脚本 label_backfill.py V5.3 信号标签回填脚本
功能 功能
- 遍历 signal_indicators 中有 signal 的历史记录 - 遍历 signal_feature_events 中有 side 的历史记录
- 根据 signal 时点后 15/30/60 分钟的 agg_trades 价格计算标签 - 根据 side 时点后 15/30/60 分钟的 agg_trades 价格计算标签
- 写入 signal_label_events event_id 复用 signal_indicators.id - 写入 signal_label_events event_id 复用 signal_feature_events.event_id
标签定义严格按 Mark Price + 时间序列方向 标签定义严格按 Mark Price + 时间序列方向
y_binary_60m = 1 if price_60m_later > price_at_signal (LONG) y_binary_60m = 1 if price_60m_later > price_at_signal (LONG)
@ -64,31 +64,33 @@ def ensure_label_table(conn):
def fetch_unlabeled_signals(conn, symbol=None, since_ms=None, limit=BATCH_SIZE): def fetch_unlabeled_signals(conn, symbol=None, since_ms=None, limit=BATCH_SIZE):
"""取尚未回填标签的 signal_indicators有 signal 且 60m 已过期)""" """取尚未回填标签的 signal_feature_events有 side 且 60m 已过期)"""
cutoff_ms = int(time.time() * 1000) - HORIZONS_MS["60m"] - 60_000 # 多留1分钟缓冲 cutoff_ms = int(time.time() * 1000) - HORIZONS_MS["60m"] - 60_000 # 多留1分钟缓冲
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
params = [] params = []
conds = [ conds = [
"si.signal IS NOT NULL", "sfe.side IS NOT NULL",
"si.ts < %s", "sfe.side != ''",
"sfe.ts < %s",
"sle.event_id IS NULL", # 尚未回填 "sle.event_id IS NULL", # 尚未回填
] ]
params.append(cutoff_ms) params.append(cutoff_ms)
if symbol: if symbol:
conds.append("si.symbol = %s") conds.append("sfe.symbol = %s")
params.append(symbol.upper()) params.append(symbol.upper())
if since_ms: if since_ms:
conds.append("si.ts >= %s") conds.append("sfe.ts >= %s")
params.append(since_ms) params.append(since_ms)
where = " AND ".join(conds) where = " AND ".join(conds)
params.append(limit) params.append(limit)
cur.execute( cur.execute(
f""" f"""
SELECT si.id, si.ts, si.symbol, si.signal, si.price, si.atr_value SELECT sfe.event_id AS id, sfe.ts, sfe.symbol, sfe.side AS signal,
FROM signal_indicators si sfe.price, sfe.atr_value
LEFT JOIN {LABEL_TABLE} sle ON sle.event_id = si.id FROM signal_feature_events sfe
LEFT JOIN {LABEL_TABLE} sle ON sle.event_id = sfe.event_id
WHERE {where} WHERE {where}
ORDER BY si.ts ASC ORDER BY sfe.ts ASC
LIMIT %s LIMIT %s
""", """,
params, params,