arbitrage-engine/backend/market_data_collector.py
fanziqi ad60a53262 review: add code audit annotations and REVIEW.md for v5.1
P0 issues annotated (critical, must fix before live trading):
- signal_engine.py: cooldown blocks reverse-signal position close
- paper_monitor.py + signal_engine.py: pnl_r 2x inflated for TP scenarios
- signal_engine.py: entry price uses 30min VWAP instead of real-time price
- paper_monitor.py + signal_engine.py: concurrent write race on paper_trades

P1 issues annotated (long-term stability):
- db.py: ensure_partitions uses timedelta(30d) causing missed monthly partitions
- signal_engine.py: float precision drift in buy_vol/sell_vol accumulation
- market_data_collector.py: single bare connection with no reconnect logic
- db.py: get_sync_pool initialization not thread-safe
- signal_engine.py: recent_large_trades deque has no maxlen

P2/P3 issues annotated across backend and frontend:
- coinbase_premium KeyError for XRP/SOL symbols
- liquidation_collector: redundant elif condition in aggregation logic
- auth.py: JWT secret hardcoded default, login rate-limit absent
- Frontend: concurrent refresh token race, AuthContext not synced on failure
- Frontend: universal catch{} swallows all API errors silently
- Frontend: serial API requests in LatestSignals, market-indicators over-polling

docs/REVIEW.md: comprehensive audit report with all 34 issues (P0×4, P1×5,
P2×6, P3×4 backend + FE-P1×4, FE-P2×8, FE-P3×3 frontend), fix suggestions
and prioritized remediation roadmap.
2026-03-01 17:14:52 +08:00

204 lines
8.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import asyncio
import json
import logging
import os
import time
from typing import Any
import aiohttp
import psycopg2
from psycopg2.extras import Json
SYMBOLS = ["BTCUSDT", "ETHUSDT", "XRPUSDT", "SOLUSDT"]
INTERVAL_SECONDS = 300
PG_HOST = os.getenv("PG_HOST", "127.0.0.1")
PG_PORT = int(os.getenv("PG_PORT", "5432"))
PG_DB = os.getenv("PG_DB", "arb_engine")
PG_USER = os.getenv("PG_USER", "arb")
PG_PASS = os.getenv("PG_PASS", "arb_engine_2026")
TABLE_SQL = """
CREATE TABLE IF NOT EXISTS market_indicators (
id SERIAL PRIMARY KEY,
symbol VARCHAR(20) NOT NULL,
indicator_type VARCHAR(50) NOT NULL,
timestamp_ms BIGINT NOT NULL,
value JSONB NOT NULL,
created_at TIMESTAMP DEFAULT NOW(),
UNIQUE(symbol, indicator_type, timestamp_ms)
);
CREATE INDEX IF NOT EXISTS idx_market_indicators_lookup
ON market_indicators(symbol, indicator_type, timestamp_ms DESC);
"""
UPSERT_SQL = """
INSERT INTO market_indicators (symbol, indicator_type, timestamp_ms, value)
VALUES (%s, %s, %s, %s)
ON CONFLICT (symbol, indicator_type, timestamp_ms)
DO UPDATE SET value = EXCLUDED.value;
"""
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
)
logger = logging.getLogger("market_data_collector")
class MarketDataCollector:
def __init__(self) -> None:
# [REVIEW] P1 | 使用单个裸 psycopg2 连接,无连接池,无重连机制
# 若数据库连接中断网络抖动、DB重启、Cloud SQL故障切换
# 下次 save_indicator 调用会抛出 OperationalError进程崩溃
# PM2 会重启进程但重启期间约1~5分钟market_indicators 将停止更新
# 从而影响 signal_engine 的评分质量(使用过期的市场指标数据)
# 修复:改用 db.get_sync_conn() 连接池,或在 save_indicator 中捕获并重连
self.conn = psycopg2.connect(
host=PG_HOST,
port=PG_PORT,
dbname=PG_DB,
user=PG_USER,
password=PG_PASS,
)
self.conn.autocommit = False
def close(self) -> None:
try:
self.conn.close()
except Exception:
pass
def ensure_table(self) -> None:
with self.conn.cursor() as cur:
cur.execute(TABLE_SQL)
self.conn.commit()
logger.info("market_indicators table ensured")
def save_indicator(self, symbol: str, indicator_type: str, timestamp_ms: int, payload: Any) -> None:
with self.conn.cursor() as cur:
cur.execute(UPSERT_SQL, (symbol, indicator_type, timestamp_ms, Json(payload)))
self.conn.commit()
async def fetch_json(self, session: aiohttp.ClientSession, url: str, params: dict[str, Any] | None = None) -> Any:
async with session.get(url, params=params, timeout=aiohttp.ClientTimeout(total=20)) as resp:
if resp.status != 200:
text = await resp.text()
raise RuntimeError(f"HTTP {resp.status} {url} {text[:200]}")
return await resp.json()
async def collect_long_short_ratio(self, session: aiohttp.ClientSession, symbol: str) -> None:
endpoint = "https://fapi.binance.com/futures/data/globalLongShortAccountRatio"
data = await self.fetch_json(session, endpoint, {"symbol": symbol, "period": "5m", "limit": 1})
if not data:
raise RuntimeError("empty response")
item = data[0]
ts = int(item.get("timestamp") or int(time.time() * 1000))
self.save_indicator(symbol, "long_short_ratio", ts, item)
async def collect_top_trader_position(self, session: aiohttp.ClientSession, symbol: str) -> None:
endpoint = "https://fapi.binance.com/futures/data/topLongShortPositionRatio"
data = await self.fetch_json(session, endpoint, {"symbol": symbol, "period": "5m", "limit": 1})
if not data:
raise RuntimeError("empty response")
item = data[0]
ts = int(item.get("timestamp") or int(time.time() * 1000))
self.save_indicator(symbol, "top_trader_position", ts, item)
async def collect_open_interest_hist(self, session: aiohttp.ClientSession, symbol: str) -> None:
endpoint = "https://fapi.binance.com/futures/data/openInterestHist"
data = await self.fetch_json(session, endpoint, {"symbol": symbol, "period": "5m", "limit": 1})
if not data:
raise RuntimeError("empty response")
item = data[0]
ts = int(item.get("timestamp") or int(time.time() * 1000))
self.save_indicator(symbol, "open_interest_hist", ts, item)
async def collect_coinbase_premium(self, session: aiohttp.ClientSession, symbol: str) -> None:
pair_map = {
"BTCUSDT": "BTC-USD",
"ETHUSDT": "ETH-USD",
}
# [REVIEW] P2 | XRP/SOL 不在 pair_map 中,访问时抛出 KeyError
# 虽然被 asyncio.gather(return_exceptions=True) 捕获不影响崩溃,
# 但 XRPUSDT/SOLUSDT 永远不会有 coinbase_premium 数据
# signal_engine 对这两个币种的辅助层会用默认值 2 分(中性),不影响正确性
# 但长期是数据缺失应添加if symbol not in pair_map: return
coinbase_pair = pair_map[symbol]
binance_url = "https://api.binance.com/api/v3/ticker/price"
coinbase_url = f"https://api.coinbase.com/v2/prices/{coinbase_pair}/spot"
binance_data = await self.fetch_json(session, binance_url, {"symbol": symbol})
coinbase_data = await self.fetch_json(session, coinbase_url)
binance_price = float(binance_data["price"])
coinbase_price = float(coinbase_data["data"]["amount"])
premium_pct = (coinbase_price - binance_price) / binance_price * 100.0
ts = int(time.time() * 1000)
payload = {
"symbol": symbol,
"coinbase_pair": coinbase_pair,
"binance": binance_data,
"coinbase": coinbase_data,
"premium_pct": premium_pct,
}
self.save_indicator(symbol, "coinbase_premium", ts, payload)
async def collect_funding_rate(self, session: aiohttp.ClientSession, symbol: str) -> None:
endpoint = "https://fapi.binance.com/fapi/v1/fundingRate"
data = await self.fetch_json(session, endpoint, {"symbol": symbol, "limit": 1})
if not data:
raise RuntimeError("empty response")
item = data[0]
ts = int(item.get("fundingTime") or int(time.time() * 1000))
self.save_indicator(symbol, "funding_rate", ts, item)
async def collect_symbol(self, session: aiohttp.ClientSession, symbol: str) -> None:
tasks = [
("long_short_ratio", self.collect_long_short_ratio(session, symbol)),
("top_trader_position", self.collect_top_trader_position(session, symbol)),
("open_interest_hist", self.collect_open_interest_hist(session, symbol)),
("coinbase_premium", self.collect_coinbase_premium(session, symbol)),
("funding_rate", self.collect_funding_rate(session, symbol)),
]
results = await asyncio.gather(*(t[1] for t in tasks), return_exceptions=True)
for i, result in enumerate(results):
name = tasks[i][0]
if isinstance(result, Exception):
logger.error("[%s] %s failed: %s", symbol, name, result)
else:
logger.info("[%s] %s collected", symbol, name)
async def run_forever(self) -> None:
self.ensure_table()
headers = {"User-Agent": "ArbEngine/market-data-collector"}
async with aiohttp.ClientSession(headers=headers) as session:
while True:
started = time.time()
logger.info("start collection round")
await asyncio.gather(*(self.collect_symbol(session, s) for s in SYMBOLS))
elapsed = time.time() - started
sleep_for = max(1, INTERVAL_SECONDS - int(elapsed))
logger.info("round done in %.2fs, sleep %ss", elapsed, sleep_for)
await asyncio.sleep(sleep_for)
async def main() -> None:
collector = MarketDataCollector()
try:
await collector.run_forever()
finally:
collector.close()
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
logger.info("stopped by keyboard interrupt")