增加分区快照数据入口

This commit is contained in:
zsb
2026-04-07 00:40:53 -07:00
parent d039c4e741
commit ec425999b0
4 changed files with 141 additions and 2 deletions

View File

@@ -156,7 +156,25 @@ impl DataSet {
let factors = read_factors(&path.join("factors.csv"))?;
let candidates = read_candidates(&path.join("candidate_flags.csv"))?;
let benchmarks = read_benchmarks(&path.join("benchmark.csv"))?;
Self::from_components(instruments, market, factors, candidates, benchmarks)
}
pub fn from_partitioned_dir(path: &Path) -> Result<Self, DataSetError> {
let instruments = read_instruments(&path.join("instruments.csv"))?;
let benchmarks = read_partitioned_dir(&path.join("benchmark"), read_benchmarks)?;
let market = read_partitioned_dir(&path.join("market"), read_market)?;
let factors = read_partitioned_dir(&path.join("factors"), read_factors)?;
let candidates = read_partitioned_dir(&path.join("candidates"), read_candidates)?;
Self::from_components(instruments, market, factors, candidates, benchmarks)
}
fn from_components(
instruments: Vec<Instrument>,
market: Vec<DailyMarketSnapshot>,
factors: Vec<DailyFactorSnapshot>,
candidates: Vec<CandidateEligibility>,
benchmarks: Vec<BenchmarkSnapshot>,
) -> Result<Self, DataSetError> {
let benchmark_code = collect_benchmark_code(&benchmarks)?;
let calendar = TradingCalendar::new(benchmarks.iter().map(|item| item.date).collect());
@@ -456,6 +474,33 @@ impl CsvRow {
}
}
fn read_partitioned_dir<T, F>(dir: &Path, mut loader: F) -> Result<Vec<T>, DataSetError>
where
F: FnMut(&Path) -> Result<Vec<T>, DataSetError>,
{
let mut files = fs::read_dir(dir)
.map_err(|source| DataSetError::Io {
path: dir.display().to_string(),
source,
})?
.collect::<Result<Vec<_>, _>>()
.map_err(|source| DataSetError::Io {
path: dir.display().to_string(),
source,
})?;
files.sort_by_key(|entry| entry.path());
let mut rows = Vec::new();
for entry in files {
let path = entry.path();
if path.extension().and_then(|x| x.to_str()) != Some("csv") {
continue;
}
rows.extend(loader(&path)?);
}
Ok(rows)
}
fn read_rows(path: &Path) -> Result<Vec<CsvRow>, DataSetError> {
let content = fs::read_to_string(path).map_err(|source| DataSetError::Io {
path: path.display().to_string(),