From ec425999b0ae9b8993f4a16e1fd2b21a3126cf9e Mon Sep 17 00:00:00 2001 From: zsb Date: Tue, 7 Apr 2026 00:40:53 -0700 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=88=86=E5=8C=BA=E5=BF=AB?= =?UTF-8?q?=E7=85=A7=E6=95=B0=E6=8D=AE=E5=85=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 32 ++++++++++++ crates/bt-demo/src/main.rs | 11 +++- crates/fidc-core/src/data.rs | 45 ++++++++++++++++ crates/fidc-core/tests/partitioned_loader.rs | 55 ++++++++++++++++++++ 4 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 crates/fidc-core/tests/partitioned_loader.rs diff --git a/README.md b/README.md index d9d8511..a2d4f81 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,7 @@ ## Phase 2 新增内容 - `DataSet::bundle_on(date)`:引入按日 snapshot bundle 视图,方便未来直接对接 FiDataCenter / FiDataScraper 预计算快照 +- `DataSet::from_partitioned_dir(path)`:新增按日分区 snapshot 目录读取入口,为真实回测数据源接入打基础 - 策略诊断输出:equity curve 里新增 `diagnostics` 字段,记录市值带、候选样本、退出原因等信息 - 候选资格快照扩展:补入 `is_kcb`、`is_one_yuan` - 增加策略选择行为测试 @@ -122,10 +123,41 @@ ## 运行方式 +默认跑仓库内置 flat demo CSV: + ```bash cargo run --bin bt-demo ``` +如果要接更接近真实数据面的按日分区 snapshot 目录: + +```bash +FIDC_BT_DATA_LAYOUT=partitioned \ +FIDC_BT_DATA_DIR=/path/to/snapshots \ +cargo run --bin bt-demo +``` + +约定目录结构: + +```text +snapshots/ +├── instruments.csv +├── benchmark/ +│ ├── 2024-01-02.csv +│ └── ... +├── market/ +├── factors/ +└── candidates/ +``` + +其中: +- `market/`:日级行情快照 +- `factors/`:估值/因子快照 +- `candidates/`:候选资格/过滤标记快照 +- `benchmark/`:指数快照 + +这层接口是为后续对接 `FiDataCenter / FiDataScraper` 的预计算 snapshot 数据准备的。 + 运行后会生成: - `output/demo/equity_curve.csv` diff --git a/crates/bt-demo/src/main.rs b/crates/bt-demo/src/main.rs index 4e121e4..aa1cfac 100644 --- a/crates/bt-demo/src/main.rs +++ b/crates/bt-demo/src/main.rs @@ -20,12 +20,19 @@ use fidc_core::{ fn main() -> Result<(), Box> { let root = workspace_root(); - let data_dir = root.join("data/demo"); + let data_dir = std::env::var("FIDC_BT_DATA_DIR") + .map(PathBuf::from) + .unwrap_or_else(|_| root.join("data/demo")); + let data_layout = std::env::var("FIDC_BT_DATA_LAYOUT").unwrap_or_else(|_| "flat".to_string()); let output_dir = root.join("output/demo"); fs::create_dir_all(&output_dir)?; - let data = DataSet::from_csv_dir(&data_dir)?; + let data = if data_layout == "partitioned" { + DataSet::from_partitioned_dir(&data_dir)? + } else { + DataSet::from_csv_dir(&data_dir)? + }; let mut strategy_cfg = CnSmallCapRotationConfig::demo(); strategy_cfg.base_index_level = 3000.0; strategy_cfg.base_cap_floor = 38.0; diff --git a/crates/fidc-core/src/data.rs b/crates/fidc-core/src/data.rs index b098115..d14f4bc 100644 --- a/crates/fidc-core/src/data.rs +++ b/crates/fidc-core/src/data.rs @@ -156,7 +156,25 @@ impl DataSet { let factors = read_factors(&path.join("factors.csv"))?; let candidates = read_candidates(&path.join("candidate_flags.csv"))?; let benchmarks = read_benchmarks(&path.join("benchmark.csv"))?; + Self::from_components(instruments, market, factors, candidates, benchmarks) + } + pub fn from_partitioned_dir(path: &Path) -> Result { + let instruments = read_instruments(&path.join("instruments.csv"))?; + let benchmarks = read_partitioned_dir(&path.join("benchmark"), read_benchmarks)?; + let market = read_partitioned_dir(&path.join("market"), read_market)?; + let factors = read_partitioned_dir(&path.join("factors"), read_factors)?; + let candidates = read_partitioned_dir(&path.join("candidates"), read_candidates)?; + Self::from_components(instruments, market, factors, candidates, benchmarks) + } + + fn from_components( + instruments: Vec, + market: Vec, + factors: Vec, + candidates: Vec, + benchmarks: Vec, + ) -> Result { let benchmark_code = collect_benchmark_code(&benchmarks)?; let calendar = TradingCalendar::new(benchmarks.iter().map(|item| item.date).collect()); @@ -456,6 +474,33 @@ impl CsvRow { } } +fn read_partitioned_dir(dir: &Path, mut loader: F) -> Result, DataSetError> +where + F: FnMut(&Path) -> Result, DataSetError>, +{ + let mut files = fs::read_dir(dir) + .map_err(|source| DataSetError::Io { + path: dir.display().to_string(), + source, + })? + .collect::, _>>() + .map_err(|source| DataSetError::Io { + path: dir.display().to_string(), + source, + })?; + files.sort_by_key(|entry| entry.path()); + + let mut rows = Vec::new(); + for entry in files { + let path = entry.path(); + if path.extension().and_then(|x| x.to_str()) != Some("csv") { + continue; + } + rows.extend(loader(&path)?); + } + Ok(rows) +} + fn read_rows(path: &Path) -> Result, DataSetError> { let content = fs::read_to_string(path).map_err(|source| DataSetError::Io { path: path.display().to_string(), diff --git a/crates/fidc-core/tests/partitioned_loader.rs b/crates/fidc-core/tests/partitioned_loader.rs new file mode 100644 index 0000000..77b5945 --- /dev/null +++ b/crates/fidc-core/tests/partitioned_loader.rs @@ -0,0 +1,55 @@ +use fidc_core::DataSet; +use std::fs; +use std::path::PathBuf; +use std::time::{SystemTime, UNIX_EPOCH}; + +fn temp_dir() -> PathBuf { + let uniq = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let dir = std::env::temp_dir().join(format!("fidc-bt-partitioned-{uniq}")); + fs::create_dir_all(&dir).expect("mkdir temp"); + dir +} + +#[test] +fn can_load_partitioned_snapshot_dir() { + let dir = temp_dir(); + fs::create_dir_all(dir.join("benchmark")).unwrap(); + fs::create_dir_all(dir.join("market")).unwrap(); + fs::create_dir_all(dir.join("factors")).unwrap(); + fs::create_dir_all(dir.join("candidates")).unwrap(); + + fs::write( + dir.join("instruments.csv"), + "symbol,name,exchange,lot_size\n000001.SZ,PingAn,SZ,100\n", + ) + .unwrap(); + fs::write( + dir.join("benchmark/2024-01-02.csv"), + "date,benchmark,open,close,prev_close,volume\n2024-01-02,CSI300.DEMO,2990,3000,2980,100000000\n", + ) + .unwrap(); + fs::write( + dir.join("market/2024-01-02.csv"), + "date,symbol,open,high,low,close,prev_close,volume,paused,upper_limit,lower_limit\n2024-01-02,000001.SZ,10,10.5,9.9,10.2,10,100000,false,11,9\n", + ) + .unwrap(); + fs::write( + dir.join("factors/2024-01-02.csv"), + "date,symbol,market_cap_bn,free_float_cap_bn,pe_ttm\n2024-01-02,000001.SZ,40,35,12\n", + ) + .unwrap(); + fs::write( + dir.join("candidates/2024-01-02.csv"), + "date,symbol,is_st,is_new_listing,is_paused,allow_buy,allow_sell,is_kcb,is_one_yuan\n2024-01-02,000001.SZ,false,false,false,true,true,false,false\n", + ) + .unwrap(); + + let data = DataSet::from_partitioned_dir(&dir).expect("partitioned dataset"); + assert_eq!(data.benchmark_code(), "CSI300.DEMO"); + assert!(data.market_snapshots_on(chrono::NaiveDate::from_ymd_opt(2024, 1, 2).unwrap()).len() == 1); + + let _ = fs::remove_dir_all(&dir); +}