增加分区快照数据入口

This commit is contained in:
zsb
2026-04-07 00:40:53 -07:00
parent d039c4e741
commit ec425999b0
4 changed files with 141 additions and 2 deletions

View File

@@ -103,6 +103,7 @@
## Phase 2 新增内容
- `DataSet::bundle_on(date)`:引入按日 snapshot bundle 视图,方便未来直接对接 FiDataCenter / FiDataScraper 预计算快照
- `DataSet::from_partitioned_dir(path)`:新增按日分区 snapshot 目录读取入口,为真实回测数据源接入打基础
- 策略诊断输出equity curve 里新增 `diagnostics` 字段,记录市值带、候选样本、退出原因等信息
- 候选资格快照扩展:补入 `is_kcb``is_one_yuan`
- 增加策略选择行为测试
@@ -122,10 +123,41 @@
## 运行方式
默认跑仓库内置 flat demo CSV
```bash
cargo run --bin bt-demo
```
如果要接更接近真实数据面的按日分区 snapshot 目录:
```bash
FIDC_BT_DATA_LAYOUT=partitioned \
FIDC_BT_DATA_DIR=/path/to/snapshots \
cargo run --bin bt-demo
```
约定目录结构:
```text
snapshots/
├── instruments.csv
├── benchmark/
│ ├── 2024-01-02.csv
│ └── ...
├── market/
├── factors/
└── candidates/
```
其中:
- `market/`:日级行情快照
- `factors/`:估值/因子快照
- `candidates/`:候选资格/过滤标记快照
- `benchmark/`:指数快照
这层接口是为后续对接 `FiDataCenter / FiDataScraper` 的预计算 snapshot 数据准备的。
运行后会生成:
- `output/demo/equity_curve.csv`

View File

@@ -20,12 +20,19 @@ use fidc_core::{
fn main() -> Result<(), Box<dyn Error>> {
let root = workspace_root();
let data_dir = root.join("data/demo");
let data_dir = std::env::var("FIDC_BT_DATA_DIR")
.map(PathBuf::from)
.unwrap_or_else(|_| root.join("data/demo"));
let data_layout = std::env::var("FIDC_BT_DATA_LAYOUT").unwrap_or_else(|_| "flat".to_string());
let output_dir = root.join("output/demo");
fs::create_dir_all(&output_dir)?;
let data = DataSet::from_csv_dir(&data_dir)?;
let data = if data_layout == "partitioned" {
DataSet::from_partitioned_dir(&data_dir)?
} else {
DataSet::from_csv_dir(&data_dir)?
};
let mut strategy_cfg = CnSmallCapRotationConfig::demo();
strategy_cfg.base_index_level = 3000.0;
strategy_cfg.base_cap_floor = 38.0;

View File

@@ -156,7 +156,25 @@ impl DataSet {
let factors = read_factors(&path.join("factors.csv"))?;
let candidates = read_candidates(&path.join("candidate_flags.csv"))?;
let benchmarks = read_benchmarks(&path.join("benchmark.csv"))?;
Self::from_components(instruments, market, factors, candidates, benchmarks)
}
pub fn from_partitioned_dir(path: &Path) -> Result<Self, DataSetError> {
let instruments = read_instruments(&path.join("instruments.csv"))?;
let benchmarks = read_partitioned_dir(&path.join("benchmark"), read_benchmarks)?;
let market = read_partitioned_dir(&path.join("market"), read_market)?;
let factors = read_partitioned_dir(&path.join("factors"), read_factors)?;
let candidates = read_partitioned_dir(&path.join("candidates"), read_candidates)?;
Self::from_components(instruments, market, factors, candidates, benchmarks)
}
fn from_components(
instruments: Vec<Instrument>,
market: Vec<DailyMarketSnapshot>,
factors: Vec<DailyFactorSnapshot>,
candidates: Vec<CandidateEligibility>,
benchmarks: Vec<BenchmarkSnapshot>,
) -> Result<Self, DataSetError> {
let benchmark_code = collect_benchmark_code(&benchmarks)?;
let calendar = TradingCalendar::new(benchmarks.iter().map(|item| item.date).collect());
@@ -456,6 +474,33 @@ impl CsvRow {
}
}
fn read_partitioned_dir<T, F>(dir: &Path, mut loader: F) -> Result<Vec<T>, DataSetError>
where
F: FnMut(&Path) -> Result<Vec<T>, DataSetError>,
{
let mut files = fs::read_dir(dir)
.map_err(|source| DataSetError::Io {
path: dir.display().to_string(),
source,
})?
.collect::<Result<Vec<_>, _>>()
.map_err(|source| DataSetError::Io {
path: dir.display().to_string(),
source,
})?;
files.sort_by_key(|entry| entry.path());
let mut rows = Vec::new();
for entry in files {
let path = entry.path();
if path.extension().and_then(|x| x.to_str()) != Some("csv") {
continue;
}
rows.extend(loader(&path)?);
}
Ok(rows)
}
fn read_rows(path: &Path) -> Result<Vec<CsvRow>, DataSetError> {
let content = fs::read_to_string(path).map_err(|source| DataSetError::Io {
path: path.display().to_string(),

View File

@@ -0,0 +1,55 @@
use fidc_core::DataSet;
use std::fs;
use std::path::PathBuf;
use std::time::{SystemTime, UNIX_EPOCH};
fn temp_dir() -> PathBuf {
let uniq = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("clock")
.as_nanos();
let dir = std::env::temp_dir().join(format!("fidc-bt-partitioned-{uniq}"));
fs::create_dir_all(&dir).expect("mkdir temp");
dir
}
#[test]
fn can_load_partitioned_snapshot_dir() {
let dir = temp_dir();
fs::create_dir_all(dir.join("benchmark")).unwrap();
fs::create_dir_all(dir.join("market")).unwrap();
fs::create_dir_all(dir.join("factors")).unwrap();
fs::create_dir_all(dir.join("candidates")).unwrap();
fs::write(
dir.join("instruments.csv"),
"symbol,name,exchange,lot_size\n000001.SZ,PingAn,SZ,100\n",
)
.unwrap();
fs::write(
dir.join("benchmark/2024-01-02.csv"),
"date,benchmark,open,close,prev_close,volume\n2024-01-02,CSI300.DEMO,2990,3000,2980,100000000\n",
)
.unwrap();
fs::write(
dir.join("market/2024-01-02.csv"),
"date,symbol,open,high,low,close,prev_close,volume,paused,upper_limit,lower_limit\n2024-01-02,000001.SZ,10,10.5,9.9,10.2,10,100000,false,11,9\n",
)
.unwrap();
fs::write(
dir.join("factors/2024-01-02.csv"),
"date,symbol,market_cap_bn,free_float_cap_bn,pe_ttm\n2024-01-02,000001.SZ,40,35,12\n",
)
.unwrap();
fs::write(
dir.join("candidates/2024-01-02.csv"),
"date,symbol,is_st,is_new_listing,is_paused,allow_buy,allow_sell,is_kcb,is_one_yuan\n2024-01-02,000001.SZ,false,false,false,true,true,false,false\n",
)
.unwrap();
let data = DataSet::from_partitioned_dir(&dir).expect("partitioned dataset");
assert_eq!(data.benchmark_code(), "CSI300.DEMO");
assert!(data.market_snapshots_on(chrono::NaiveDate::from_ymd_opt(2024, 1, 2).unwrap()).len() == 1);
let _ = fs::remove_dir_all(&dir);
}