From 18d628359ccf39968c815d06c50e6b6af0fdb8f7 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 4 Mar 2026 13:02:34 +0000 Subject: [PATCH] fix: label_backfill.py use signal_feature_events as source (not signal_indicators) to match FK constraint --- scripts/label_backfill.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/scripts/label_backfill.py b/scripts/label_backfill.py index 2e10191..c813845 100644 --- a/scripts/label_backfill.py +++ b/scripts/label_backfill.py @@ -3,9 +3,9 @@ label_backfill.py — V5.3 信号标签回填脚本 功能: - - 遍历 signal_indicators 中有 signal 的历史记录 - - 根据 signal 时点后 15/30/60 分钟的 agg_trades 价格计算标签 - - 写入 signal_label_events 表(event_id 复用 signal_indicators.id) + - 遍历 signal_feature_events 中有 side 的历史记录 + - 根据 side 时点后 15/30/60 分钟的 agg_trades 价格计算标签 + - 写入 signal_label_events 表(event_id 复用 signal_feature_events.event_id) 标签定义(严格按 Mark Price + 时间序列方向): y_binary_60m = 1 if price_60m_later > price_at_signal (LONG) @@ -64,31 +64,33 @@ def ensure_label_table(conn): def fetch_unlabeled_signals(conn, symbol=None, since_ms=None, limit=BATCH_SIZE): - """取尚未回填标签的 signal_indicators(有 signal 且 60m 已过期)""" + """取尚未回填标签的 signal_feature_events(有 side 且 60m 已过期)""" cutoff_ms = int(time.time() * 1000) - HORIZONS_MS["60m"] - 60_000 # 多留1分钟缓冲 with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: params = [] conds = [ - "si.signal IS NOT NULL", - "si.ts < %s", + "sfe.side IS NOT NULL", + "sfe.side != ''", + "sfe.ts < %s", "sle.event_id IS NULL", # 尚未回填 ] params.append(cutoff_ms) if symbol: - conds.append("si.symbol = %s") + conds.append("sfe.symbol = %s") params.append(symbol.upper()) if since_ms: - conds.append("si.ts >= %s") + conds.append("sfe.ts >= %s") params.append(since_ms) where = " AND ".join(conds) params.append(limit) cur.execute( f""" - SELECT si.id, si.ts, si.symbol, si.signal, si.price, si.atr_value - FROM signal_indicators si - LEFT JOIN {LABEL_TABLE} sle ON sle.event_id = si.id + SELECT sfe.event_id AS id, sfe.ts, sfe.symbol, sfe.side AS signal, + sfe.price, sfe.atr_value + FROM signal_feature_events sfe + LEFT JOIN {LABEL_TABLE} sle ON sle.event_id = sfe.event_id WHERE {where} - ORDER BY si.ts ASC + ORDER BY sfe.ts ASC LIMIT %s """, params,