From 47988cd7e769c7a23cf18730371d0e8539362ca4 Mon Sep 17 00:00:00 2001 From: boris Date: Thu, 23 Apr 2026 23:05:43 -0700 Subject: [PATCH] Add string factor support --- crates/fidc-core/src/data.rs | 316 +++++++++++++++++- crates/fidc-core/src/lib.rs | 5 +- .../fidc-core/src/platform_expr_strategy.rs | 245 +++++++++++++- crates/fidc-core/src/strategy.rs | 25 +- crates/fidc-core/src/strategy_ai.rs | 20 +- docs/engine-capability-roadmap.md | 7 +- 6 files changed, 587 insertions(+), 31 deletions(-) diff --git a/crates/fidc-core/src/data.rs b/crates/fidc-core/src/data.rs index 8e4d3a8..c3c409b 100644 --- a/crates/fidc-core/src/data.rs +++ b/crates/fidc-core/src/data.rs @@ -411,6 +411,15 @@ pub struct FactorValue { pub value: f64, } +#[derive(Debug, Clone, Serialize)] +pub struct FactorTextValue { + #[serde(with = "date_format")] + pub date: NaiveDate, + pub symbol: String, + pub field: String, + pub value: String, +} + #[derive(Debug, Clone, Serialize)] pub struct SecuritiesMarginRecord { #[serde(with = "date_format")] @@ -694,6 +703,8 @@ pub struct DataSet { market_index: HashMap<(NaiveDate, String), DailyMarketSnapshot>, factor_by_date: BTreeMap>, factor_index: HashMap<(NaiveDate, String), DailyFactorSnapshot>, + factor_text_by_date: BTreeMap>, + factor_text_index: HashMap<(NaiveDate, String, String), FactorTextValue>, candidate_by_date: BTreeMap>, candidate_index: HashMap<(NaiveDate, String), CandidateEligibility>, corporate_actions_by_date: BTreeMap>, @@ -712,6 +723,7 @@ impl DataSet { let instruments = read_instruments(&path.join("instruments.csv"))?; let market = read_market(&path.join("market.csv"))?; let factors = read_factors(&path.join("factors.csv"))?; + let factor_texts = read_factor_texts(&path.join("factors.csv"))?; let candidates = read_candidates(&path.join("candidate_flags.csv"))?; let benchmarks = read_benchmarks(&path.join("benchmark.csv"))?; let corporate_actions_path = path.join("corporate_actions.csv"); @@ -738,7 +750,7 @@ impl DataSet { } else { Vec::new() }; - Self::from_components_with_actions_quotes_futures_and_depth( + Self::from_components_with_actions_quotes_futures_depth_and_factor_texts( instruments, market, factors, @@ -748,6 +760,7 @@ impl DataSet { execution_quotes, futures_params, order_book_depth, + factor_texts, ) } @@ -756,6 +769,7 @@ impl DataSet { let benchmarks = read_partitioned_dir(&path.join("benchmark"), read_benchmarks)?; let market = read_partitioned_dir(&path.join("market"), read_market)?; let factors = read_partitioned_dir(&path.join("factors"), read_factors)?; + let factor_texts = read_partitioned_dir(&path.join("factors"), read_factor_texts)?; let candidates = read_partitioned_dir(&path.join("candidates"), read_candidates)?; let corporate_actions_dir = path.join("corporate_actions"); let corporate_actions = if corporate_actions_dir.exists() { @@ -781,7 +795,7 @@ impl DataSet { } else { Vec::new() }; - Self::from_components_with_actions_quotes_futures_and_depth( + Self::from_components_with_actions_quotes_futures_depth_and_factor_texts( instruments, market, factors, @@ -791,6 +805,7 @@ impl DataSet { execution_quotes, futures_params, order_book_depth, + factor_texts, ) } @@ -885,6 +900,54 @@ impl DataSet { execution_quotes: Vec, futures_params: Vec, order_book_depth: Vec, + ) -> Result { + Self::from_components_with_actions_quotes_futures_depth_and_factor_texts( + instruments, + market, + factors, + candidates, + benchmarks, + corporate_actions, + execution_quotes, + futures_params, + order_book_depth, + Vec::new(), + ) + } + + pub fn from_components_with_factor_texts( + instruments: Vec, + market: Vec, + factors: Vec, + candidates: Vec, + benchmarks: Vec, + factor_texts: Vec, + ) -> Result { + Self::from_components_with_actions_quotes_futures_depth_and_factor_texts( + instruments, + market, + factors, + candidates, + benchmarks, + Vec::new(), + Vec::new(), + Vec::new(), + Vec::new(), + factor_texts, + ) + } + + pub fn from_components_with_actions_quotes_futures_depth_and_factor_texts( + instruments: Vec, + market: Vec, + factors: Vec, + candidates: Vec, + benchmarks: Vec, + corporate_actions: Vec, + execution_quotes: Vec, + futures_params: Vec, + order_book_depth: Vec, + factor_texts: Vec, ) -> Result { let benchmark_code = collect_benchmark_code(&benchmarks)?; let calendar = TradingCalendar::new(benchmarks.iter().map(|item| item.date).collect()); @@ -905,6 +968,22 @@ impl DataSet { .into_iter() .map(|item| ((item.date, item.symbol.clone()), item)) .collect::>(); + let factor_texts = factor_texts + .into_iter() + .filter_map(|mut item| { + item.field = normalize_field(&item.field); + if item.field.is_empty() { + None + } else { + Some(item) + } + }) + .collect::>(); + let factor_text_by_date = group_by_date(factor_texts.clone(), |item| item.date); + let factor_text_index = factor_texts + .into_iter() + .map(|item| ((item.date, item.symbol.clone(), item.field.clone()), item)) + .collect::>(); let candidate_by_date = group_by_date(candidates.clone(), |item| item.date); let candidate_index = candidates @@ -933,6 +1012,8 @@ impl DataSet { market_index, factor_by_date, factor_index, + factor_text_by_date, + factor_text_index, candidate_by_date, candidate_index, corporate_actions_by_date, @@ -1271,6 +1352,30 @@ impl DataSet { rows } + pub fn get_factor_text( + &self, + symbol: &str, + start: NaiveDate, + end: NaiveDate, + field: &str, + ) -> Vec { + if start > end { + return Vec::new(); + } + let field = normalize_field(field); + let mut rows = self + .factor_text_by_date + .range(start..=end) + .flat_map(|(_, snapshots)| snapshots.iter()) + .filter(|snapshot| { + snapshot.symbol == symbol && normalize_field(&snapshot.field) == field + }) + .cloned() + .collect::>(); + rows.sort_by_key(|row| row.date); + rows + } + pub fn get_yield_curve( &self, start: NaiveDate, @@ -1555,6 +1660,33 @@ impl DataSet { None } + pub fn get_industry_name( + &self, + symbol: &str, + date: NaiveDate, + source: &str, + level: usize, + ) -> Option { + let fields = industry_name_factor_aliases(source, level); + for (factor_date, snapshots) in self.factor_text_by_date.range(..=date).rev() { + for snapshot in snapshots { + if snapshot.symbol != symbol { + continue; + } + let normalized = normalize_field(&snapshot.field); + if fields.iter().any(|field| field == &normalized) { + return Some(FactorTextValue { + date: *factor_date, + symbol: snapshot.symbol.clone(), + field: snapshot.field.clone(), + value: snapshot.value.clone(), + }); + } + } + } + None + } + pub fn get_dominant_future(&self, underlying_symbol: &str, date: NaiveDate) -> Option { let underlying = normalize_field(underlying_symbol); let mut candidates = self @@ -1656,6 +1788,13 @@ impl DataSet { .unwrap_or_default() } + pub fn factor_text_snapshots_on(&self, date: NaiveDate) -> Vec<&FactorTextValue> { + self.factor_text_by_date + .get(&date) + .map(|rows| rows.iter().collect()) + .unwrap_or_default() + } + pub fn market_snapshots_on(&self, date: NaiveDate) -> Vec<&DailyMarketSnapshot> { self.market_by_date .get(&date) @@ -1796,6 +1935,12 @@ impl DataSet { .and_then(|snapshot| factor_numeric_value(snapshot, field)) } + pub fn factor_text_value(&self, date: NaiveDate, symbol: &str, field: &str) -> Option { + self.factor_text_index + .get(&(date, symbol.to_string(), normalize_field(field))) + .map(|row| row.value.clone()) + } + fn get_first_available_factor_series( &self, symbol: &str, @@ -2034,6 +2179,7 @@ fn read_factors(path: &Path) -> Result, DataSetError> { let rows = read_rows(path)?; let mut snapshots = Vec::new(); for row in rows { + let (extra_factors, _) = parse_extra_factor_maps(&row); snapshots.push(DailyFactorSnapshot { date: row.parse_date(0)?, symbol: row.get(1)?.to_string(), @@ -2042,17 +2188,76 @@ fn read_factors(path: &Path) -> Result, DataSetError> { pe_ttm: row.parse_f64(4)?, turnover_ratio: row.parse_optional_f64(5), effective_turnover_ratio: row.parse_optional_f64(6), - extra_factors: row - .fields - .get(7) - .filter(|value| !value.trim().is_empty()) - .and_then(|value| serde_json::from_str::>(value).ok()) - .unwrap_or_default(), + extra_factors, }); } Ok(snapshots) } +fn read_factor_texts(path: &Path) -> Result, DataSetError> { + let rows = read_rows(path)?; + let mut text_values = Vec::new(); + for row in rows { + let date = row.parse_date(0)?; + let symbol = row.get(1)?.to_string(); + let (_, extra_text_factors) = parse_extra_factor_maps(&row); + for (field, value) in extra_text_factors { + text_values.push(FactorTextValue { + date, + symbol: symbol.clone(), + field, + value, + }); + } + } + Ok(text_values) +} + +fn parse_extra_factor_maps(row: &CsvRow) -> (BTreeMap, BTreeMap) { + let mut numeric = BTreeMap::new(); + let mut text = BTreeMap::new(); + for value in row.fields.get(7).into_iter().chain(row.fields.get(8)) { + merge_extra_factor_json(value, &mut numeric, &mut text); + } + (numeric, text) +} + +fn merge_extra_factor_json( + raw: &str, + numeric: &mut BTreeMap, + text: &mut BTreeMap, +) { + let trimmed = raw.trim(); + if trimmed.is_empty() { + return; + } + let Ok(serde_json::Value::Object(map)) = serde_json::from_str::(trimmed) + else { + return; + }; + for (key, value) in map { + let key = normalize_field(&key); + if key.is_empty() { + continue; + } + match value { + serde_json::Value::Number(number) => { + if let Some(value) = number.as_f64().filter(|value| value.is_finite()) { + numeric.insert(key, value); + } + } + serde_json::Value::String(value) => { + text.insert(key, value); + } + serde_json::Value::Bool(value) => { + numeric.insert(key.clone(), if value { 1.0 } else { 0.0 }); + text.insert(key, value.to_string()); + } + _ => {} + } + } +} + fn normalized_aliases(values: &[String]) -> Vec { let mut aliases = Vec::new(); for value in values { @@ -2191,6 +2396,21 @@ fn industry_factor_aliases(source: &str, level: usize) -> Vec { ]) } +fn industry_name_factor_aliases(source: &str, level: usize) -> Vec { + let source = normalize_field(source); + normalized_aliases(&[ + format!("industry_{source}_l{level}_name"), + format!("industry_{source}_{level}_name"), + format!("industry_{source}_name_l{level}"), + format!("{source}_industry_l{level}_name"), + format!("{source}_industry_{level}_name"), + format!("{source}_industry_name_l{level}"), + format!("industry_l{level}_name"), + format!("industry_{level}_name"), + "industry_name".to_string(), + ]) +} + fn factor_numeric_value(snapshot: &DailyFactorSnapshot, field: &str) -> Option { match field { "market_cap" | "market_cap_bn" => Some(snapshot.market_cap_bn), @@ -2653,16 +2873,39 @@ fn read_rows(path: &Path) -> Result, DataSetError> { rows.push(CsvRow { path: path.display().to_string(), line: line_no, - fields: line - .split(',') - .map(|field| field.trim().to_string()) - .collect(), + fields: split_csv_line(line), }); } Ok(rows) } +fn split_csv_line(line: &str) -> Vec { + let mut fields = Vec::new(); + let mut field = String::new(); + let mut chars = line.trim_start_matches('\u{feff}').chars().peekable(); + let mut in_quotes = false; + + while let Some(ch) = chars.next() { + match ch { + '"' if in_quotes && chars.peek() == Some(&'"') => { + field.push('"'); + chars.next(); + } + '"' => { + in_quotes = !in_quotes; + } + ',' if !in_quotes => { + fields.push(field.trim().to_string()); + field.clear(); + } + _ => field.push(ch), + } + } + fields.push(field.trim().to_string()); + fields +} + fn group_by_date(rows: Vec, mut date_of: F) -> BTreeMap> where F: FnMut(&T) -> NaiveDate, @@ -2854,3 +3097,52 @@ fn build_eligible_universe( per_date } + +#[cfg(test)] +mod tests { + use super::*; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn temp_csv_path(name: &str) -> std::path::PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos(); + std::env::temp_dir().join(format!("{}_{}_{}.csv", name, std::process::id(), nanos)) + } + + #[test] + fn reads_mixed_numeric_and_text_extra_factors_from_quoted_csv_json() { + let path = temp_csv_path("mixed_factor_maps"); + fs::write( + &path, + concat!( + "date,symbol,market_cap_bn,free_float_cap_bn,pe_ttm,turnover_ratio,effective_turnover_ratio,extra_factors\n", + "2025-01-02,000001.SZ,12,10,8,1,1,\"{\"\"custom_alpha\"\":7,\"\"industry_name\"\":\"\"electronics,hardware\"\",\"\"flag\"\":true}\"\n" + ), + ) + .unwrap(); + + let factors = read_factors(&path).unwrap(); + let text_factors = read_factor_texts(&path).unwrap(); + fs::remove_file(&path).ok(); + + assert_eq!(factors.len(), 1); + assert_eq!( + factors[0].extra_factors.get("custom_alpha").copied(), + Some(7.0) + ); + assert_eq!(factors[0].extra_factors.get("flag").copied(), Some(1.0)); + assert_eq!(text_factors.len(), 2); + assert!( + text_factors + .iter() + .any(|row| row.field == "industry_name" && row.value == "electronics,hardware") + ); + assert!( + text_factors + .iter() + .any(|row| row.field == "flag" && row.value == "true") + ); + } +} diff --git a/crates/fidc-core/src/lib.rs b/crates/fidc-core/src/lib.rs index c414d46..2506df1 100644 --- a/crates/fidc-core/src/lib.rs +++ b/crates/fidc-core/src/lib.rs @@ -22,8 +22,9 @@ pub use cost::{ChinaAShareCostModel, CostModel, TradingCost}; pub use data::{ BenchmarkSnapshot, CandidateEligibility, CorporateAction, DailyFactorSnapshot, DailyMarketSnapshot, DailySnapshotBundle, DataSet, DataSetError, DividendRecord, - EligibleUniverseSnapshot, FactorValue, IntradayExecutionQuote, IntradayOrderBookDepthLevel, - PriceBar, PriceField, SecuritiesMarginRecord, SplitRecord, YieldCurvePoint, + EligibleUniverseSnapshot, FactorTextValue, FactorValue, IntradayExecutionQuote, + IntradayOrderBookDepthLevel, PriceBar, PriceField, SecuritiesMarginRecord, SplitRecord, + YieldCurvePoint, }; pub use engine::{ AnalyzerMonthlyReturnRow, AnalyzerPositionRow, AnalyzerReport, AnalyzerRiskSummary, diff --git a/crates/fidc-core/src/platform_expr_strategy.rs b/crates/fidc-core/src/platform_expr_strategy.rs index feaee71..9fb4eda 100644 --- a/crates/fidc-core/src/platform_expr_strategy.rs +++ b/crates/fidc-core/src/platform_expr_strategy.rs @@ -327,6 +327,7 @@ struct DayExpressionState { is_month_start: bool, is_month_end: bool, available_factor_names: BTreeSet, + available_text_factor_names: BTreeSet, } #[derive(Debug, Clone)] @@ -373,6 +374,7 @@ struct StockExpressionState { stock_volume_ma20: f64, stock_volume_ma60: f64, extra_factors: BTreeMap, + extra_text_factors: BTreeMap, } #[derive(Debug, Clone)] @@ -636,6 +638,8 @@ impl PlatformExprStrategy { | "sma" | "factor_value" | "get_factor_value" + | "factor_text" + | "get_factor_text" | "dividend_cash" | "has_dividend" | "split_ratio" @@ -659,6 +663,8 @@ impl PlatformExprStrategy { | "get_pit_financials_value" | "industry_code" | "get_industry_code" + | "industry_name" + | "get_industry_name" | "yield_curve" | "get_yield_curve_value" | "is_margin_stock" @@ -1196,6 +1202,12 @@ impl PlatformExprStrategy { .into_iter() .flat_map(|row| row.extra_factors.keys().cloned()) .collect(), + available_text_factor_names: ctx + .data + .factor_text_snapshots_on(date) + .into_iter() + .map(|row| row.field.clone()) + .collect(), }) } @@ -1315,6 +1327,13 @@ impl PlatformExprStrategy { stock_volume_ma20, stock_volume_ma60, extra_factors: factor.extra_factors.clone(), + extra_text_factors: ctx + .data + .factor_text_snapshots_on(date) + .into_iter() + .filter(|row| row.symbol == symbol) + .map(|row| (row.field.clone(), row.value.clone())) + .collect(), }) } @@ -1833,6 +1852,9 @@ impl PlatformExprStrategy { for (key, value) in &stock.extra_factors { factors.insert(key.clone().into(), Dynamic::from(*value)); } + for (key, value) in &stock.extra_text_factors { + factors.insert(key.clone().into(), Dynamic::from(value.clone())); + } scope.push("factors", factors); let reserved_names = Self::reserved_scope_names(); for (key, value) in &stock.extra_factors { @@ -1840,6 +1862,11 @@ impl PlatformExprStrategy { scope.push_dynamic(key.clone(), Dynamic::from(*value)); } } + for (key, value) in &stock.extra_text_factors { + if Self::is_expression_identifier(key) && !reserved_names.contains(key.as_str()) { + scope.push_dynamic(key.clone(), Dynamic::from(value.clone())); + } + } for key in &day.available_factor_names { if Self::is_expression_identifier(key) && !reserved_names.contains(key.as_str()) @@ -1848,6 +1875,14 @@ impl PlatformExprStrategy { scope.push_dynamic(key.clone(), Dynamic::from(0.0)); } } + for key in &day.available_text_factor_names { + if Self::is_expression_identifier(key) + && !reserved_names.contains(key.as_str()) + && !stock.extra_text_factors.contains_key(key) + { + scope.push_dynamic(key.clone(), Dynamic::from(String::new())); + } + } } if let Some(position) = position { scope.push("order_book_id", position.order_book_id.clone()); @@ -1918,12 +1953,17 @@ impl PlatformExprStrategy { for identifier in Self::extract_identifier_candidates(&expanded_expr) { if reserved_names.contains(identifier.as_str()) || prelude_declared_identifiers.contains(&identifier) - || !day.available_factor_names.contains(&identifier) + || (!day.available_factor_names.contains(&identifier) + && !day.available_text_factor_names.contains(&identifier)) { continue; } - let value = item.extra_factors.get(&identifier).copied().unwrap_or(0.0); - scope.push_dynamic(identifier, Dynamic::from(value)); + if let Some(value) = item.extra_text_factors.get(&identifier) { + scope.push_dynamic(identifier, Dynamic::from(value.clone())); + } else { + let value = item.extra_factors.get(&identifier).copied().unwrap_or(0.0); + scope.push_dynamic(identifier, Dynamic::from(value)); + } } } let factor_alias_prelude = stock @@ -1931,6 +1971,7 @@ impl PlatformExprStrategy { let reserved_names = Self::reserved_scope_names(); item.extra_factors .keys() + .chain(item.extra_text_factors.keys()) .filter(|key| { Self::is_expression_identifier(key) && !reserved_names.contains(key.as_str()) @@ -2068,15 +2109,15 @@ impl PlatformExprStrategy { let args = Self::split_top_level_args(args_src); match helper { "factor" => { - let key = Self::parse_string_or_identifier( + let key = Self::normalize_runtime_factor_key(&Self::parse_string_or_identifier( args.first().map(String::as_str).unwrap_or_default(), - )?; + )?); Ok(format!("factors[{}]", Self::quote_rhai_string(&key))) } "day_factor" => { - let key = Self::parse_string_or_identifier( + let key = Self::normalize_runtime_factor_key(&Self::parse_string_or_identifier( args.first().map(String::as_str).unwrap_or_default(), - )?; + )?); Ok(format!("day_factors[{}]", Self::quote_rhai_string(&key))) } "rolling_mean" | "sma" => { @@ -2109,6 +2150,25 @@ impl PlatformExprStrategy { .unwrap_or(0.0); Ok(Self::format_rhai_float(value)) } + "factor_text" | "get_factor_text" => { + if args.is_empty() || args.len() > 2 { + return Err(BacktestError::Execution(format!( + "{helper} expects field and optional lookback" + ))); + } + let stock = stock.ok_or_else(|| { + BacktestError::Execution(format!("{helper} requires stock context")) + })?; + let field = Self::parse_string_or_identifier(&args[0])?; + let lookback = Self::parse_optional_positive_usize(args.get(1), 1)?; + let start = self.helper_start_date(ctx, day.date, lookback); + let value = ctx + .get_factor_text(&stock.symbol, start, day.date, &field) + .last() + .map(|row| row.value.clone()) + .unwrap_or_default(); + Ok(Self::quote_rhai_string(&value)) + } "dividend_cash" | "has_dividend" => { let (symbol, lookback) = self.parse_symbol_lookback_helper_args(helper, &args, stock, 1, 2)?; @@ -2297,6 +2357,27 @@ impl PlatformExprStrategy { .unwrap_or(0.0); Ok(Self::format_rhai_float(value)) } + "industry_name" | "get_industry_name" => { + if args.len() > 2 { + return Err(BacktestError::Execution(format!( + "{helper} expects optional source and optional level" + ))); + } + let stock = stock.ok_or_else(|| { + BacktestError::Execution(format!("{helper} requires stock context")) + })?; + let source = args + .first() + .map(|arg| Self::parse_string_or_identifier(arg)) + .transpose()? + .unwrap_or_else(|| "citics".to_string()); + let level = Self::parse_optional_positive_usize(args.get(1), 1)?; + let value = ctx + .get_industry_name(&stock.symbol, &source, level) + .map(|row| row.value) + .unwrap_or_default(); + Ok(Self::quote_rhai_string(&value)) + } "yield_curve" | "get_yield_curve_value" => { if args.is_empty() || args.len() > 2 { return Err(BacktestError::Execution(format!( @@ -2610,6 +2691,13 @@ impl PlatformExprStrategy { ))) } + fn normalize_runtime_factor_key(key: &str) -> String { + key.trim() + .trim_matches('"') + .trim_matches('\'') + .to_ascii_lowercase() + } + fn parse_positive_usize(raw: &str) -> Result { let trimmed = raw.trim(); let value = trimmed.parse::().map_err(|_| { @@ -4378,7 +4466,7 @@ mod tests { }; use crate::{ AlgoOrderStyle, BenchmarkSnapshot, CandidateEligibility, CorporateAction, - DailyFactorSnapshot, DailyMarketSnapshot, DataSet, FuturesCommissionType, + DailyFactorSnapshot, DailyMarketSnapshot, DataSet, FactorTextValue, FuturesCommissionType, FuturesTradingParameter, Instrument, OpenOrderView, PortfolioState, ProcessEvent, ProcessEventKind, ScheduleStage, ScheduleTimeRule, Strategy, StrategyContext, TargetPortfolioOrderPricing, TradingCalendar, default_stage_time, @@ -4841,6 +4929,147 @@ mod tests { } } + #[test] + fn platform_strategy_can_filter_with_string_factors() { + let date = d(2025, 2, 3); + let data = DataSet::from_components_with_factor_texts( + vec![Instrument { + symbol: "000001.SZ".to_string(), + name: "Text Factor Stock".to_string(), + board: "SZSE".to_string(), + round_lot: 100, + listed_at: Some(d(2010, 1, 1)), + delisted_at: None, + status: "active".to_string(), + }], + vec![DailyMarketSnapshot { + date, + symbol: "000001.SZ".to_string(), + timestamp: Some("10:18:00".to_string()), + day_open: 10.0, + open: 10.0, + high: 10.2, + low: 9.9, + close: 10.1, + last_price: 10.05, + bid1: 10.04, + ask1: 10.05, + prev_close: 9.95, + volume: 1_000_000, + tick_volume: 5_000, + bid1_volume: 1_000, + ask1_volume: 1_000, + trading_phase: Some("continuous".to_string()), + paused: false, + upper_limit: 10.94, + lower_limit: 8.96, + price_tick: 0.01, + }], + vec![DailyFactorSnapshot { + date, + symbol: "000001.SZ".to_string(), + market_cap_bn: 12.0, + free_float_cap_bn: 10.0, + pe_ttm: 8.0, + turnover_ratio: Some(22.0), + effective_turnover_ratio: Some(18.0), + extra_factors: BTreeMap::new(), + }], + vec![CandidateEligibility { + date, + symbol: "000001.SZ".to_string(), + is_st: false, + is_new_listing: false, + is_paused: false, + allow_buy: true, + allow_sell: true, + is_kcb: false, + is_one_yuan: false, + }], + vec![BenchmarkSnapshot { + date, + benchmark: "000852.SH".to_string(), + open: 1000.0, + close: 1002.0, + prev_close: 998.0, + volume: 1_000_000, + }], + vec![ + FactorTextValue { + date, + symbol: "000001.SZ".to_string(), + field: "industry_name".to_string(), + value: "electronics".to_string(), + }, + FactorTextValue { + date, + symbol: "000001.SZ".to_string(), + field: "industry_citics_l1_name".to_string(), + value: "electronics".to_string(), + }, + FactorTextValue { + date, + symbol: "000001.SZ".to_string(), + field: "concept".to_string(), + value: "ai_chip".to_string(), + }, + ], + ) + .expect("dataset"); + let portfolio = PortfolioState::new(1_000_000.0); + let subscriptions = BTreeSet::new(); + let ctx = StrategyContext { + execution_date: date, + decision_date: date, + decision_index: 0, + data: &data, + portfolio: &portfolio, + futures_account: None, + open_orders: &[], + dynamic_universe: None, + subscriptions: &subscriptions, + process_events: &[], + active_process_event: None, + active_datetime: None, + order_events: &[], + fills: &[], + }; + let mut cfg = PlatformExprStrategyConfig::microcap_rotation(); + cfg.signal_symbol = "000001.SZ".to_string(); + cfg.rotation_enabled = false; + cfg.benchmark_short_ma_days = 1; + cfg.benchmark_long_ma_days = 1; + cfg.explicit_actions = vec![PlatformTradeAction::Order { + kind: PlatformExplicitOrderKind::Value, + symbol: "000001.SZ".to_string(), + amount_expr: "1000".to_string(), + limit_price_expr: None, + start_time_expr: None, + end_time_expr: None, + when_expr: Some( + concat!( + "factor(\"industry_name\") == \"electronics\"", + " && factor_text(\"concept\") == \"ai_chip\"", + " && concept == \"ai_chip\"", + " && industry_name(\"citics\", 1) == \"electronics\"" + ) + .to_string(), + ), + reason: "string_factor_entry".to_string(), + }]; + let mut strategy = PlatformExprStrategy::new(cfg); + + let decision = strategy.on_day(&ctx).expect("platform decision"); + + assert_eq!(decision.order_intents.len(), 1); + match &decision.order_intents[0] { + crate::strategy::OrderIntent::Value { reason, .. } => { + assert_eq!(reason, "string_factor_entry"); + } + other => panic!("unexpected string factor intent: {other:?}"), + } + } + #[test] fn platform_strategy_emits_target_shares_explicit_action() { let date = d(2025, 2, 3); diff --git a/crates/fidc-core/src/strategy.rs b/crates/fidc-core/src/strategy.rs index ac216cf..7b9a16b 100644 --- a/crates/fidc-core/src/strategy.rs +++ b/crates/fidc-core/src/strategy.rs @@ -8,8 +8,9 @@ use chrono::{Datelike, Duration, NaiveDate, NaiveDateTime, NaiveTime}; use crate::cost::ChinaAShareCostModel; use crate::data::{ - DailyMarketSnapshot, DataSet, DividendRecord, FactorValue, IntradayExecutionQuote, PriceBar, - PriceField, SecuritiesMarginRecord, SplitRecord, YieldCurvePoint, + DailyMarketSnapshot, DataSet, DividendRecord, FactorTextValue, FactorValue, + IntradayExecutionQuote, PriceBar, PriceField, SecuritiesMarginRecord, SplitRecord, + YieldCurvePoint, }; use crate::engine::BacktestError; use crate::events::{FillEvent, OrderEvent, OrderSide, OrderStatus, ProcessEvent}; @@ -630,6 +631,16 @@ impl StrategyContext<'_> { self.data.get_factor(symbol, start, end, field) } + pub fn get_factor_text( + &self, + symbol: &str, + start: NaiveDate, + end: NaiveDate, + field: &str, + ) -> Vec { + self.data.get_factor_text(symbol, start, end, field) + } + pub fn get_yield_curve( &self, start: NaiveDate, @@ -738,6 +749,16 @@ impl StrategyContext<'_> { .get_industry(symbol, self.execution_date, source, level) } + pub fn get_industry_name( + &self, + symbol: &str, + source: &str, + level: usize, + ) -> Option { + self.data + .get_industry_name(symbol, self.execution_date, source, level) + } + pub fn get_dominant_future(&self, underlying_symbol: &str) -> Option { self.data .get_dominant_future(underlying_symbol, self.execution_date) diff --git a/crates/fidc-core/src/strategy_ai.rs b/crates/fidc-core/src/strategy_ai.rs index ec5ddc8..2ff8c51 100644 --- a/crates/fidc-core/src/strategy_ai.rs +++ b/crates/fidc-core/src/strategy_ai.rs @@ -83,6 +83,7 @@ pub fn built_in_strategy_manual() -> StrategyAiManual { overview: vec![ "平台策略脚本采用声明式 DSL + 表达式执行模型。".to_string(), "支持 let 变量、fn 自定义函数、when/unless/else 条件块、数据库字段因子映射。".to_string(), + "支持数值型和字符串型数据库因子,字符串字段可用于行业、概念、标签、板块等分类过滤。".to_string(), "禁止自由 Python/JavaScript 命令式语句,最终必须输出平台 DSL。".to_string(), ], statement_blocks: vec![ @@ -178,6 +179,7 @@ pub fn built_in_strategy_manual() -> StrategyAiManual { ManualField { name: "in_dynamic_universe/is_subscribed".to_string(), field_type: "bool".to_string(), detail: "当前证券是否在动态 universe 内,以及是否仍在订阅集合中。".to_string() }, ManualField { name: "stock_ma5/stock_ma10/stock_ma20/stock_ma30".to_string(), field_type: "float".to_string(), detail: "个股价格均线内建别名。只内建这几个窗口;15 日、45 日等任意窗口请改用 sma(\"close\", n)。".to_string() }, ManualField { name: "stock_volume_ma5/stock_volume_ma10/stock_volume_ma20/stock_volume_ma60".to_string(), field_type: "float".to_string(), detail: "个股成交量均线内建别名。只内建这几个窗口;任意窗口请改用 rolling_mean(\"volume\", n)。".to_string() }, + ManualField { name: "factors[\"field\"] / factor(\"field\")".to_string(), field_type: "float/string".to_string(), detail: "当前证券当日数据库因子。数值字段返回数字,字符串字段返回字符串;字符串字段名如果是合法标识符,也可直接写字段名,例如 concept == \"ai_chip\"。".to_string() }, ManualField { name: "listed_days".to_string(), field_type: "int".to_string(), detail: "上市天数。".to_string() }, ], }, @@ -201,7 +203,7 @@ pub fn built_in_strategy_manual() -> StrategyAiManual { }, ], functions: vec![ - ManualFunction { name: "factor".to_string(), signature: "factor(\"column_name\")".to_string(), detail: "读取当前股票的数据库因子列。".to_string() }, + ManualFunction { name: "factor".to_string(), signature: "factor(\"column_name\")".to_string(), detail: "读取当前股票当日数据库因子列。数值因子返回 float,字符串因子返回 string;缺失字段默认返回 0 或空字符串,建议重要条件配合 diagnostics 查看候选过滤数量。".to_string() }, ManualFunction { name: "day_factor".to_string(), signature: "day_factor(\"field_name\")".to_string(), detail: "读取日级/指数级字段映射。".to_string() }, ManualFunction { name: "history_bars".to_string(), signature: "ctx.history_bars(symbol, count, \"1d\" | \"1m\" | \"tick\", \"close\", include_now)".to_string(), detail: "回测内核策略上下文数据 API,返回指定证券最近 N 条数值序列。日线字段支持 open/high/low/close/last/prev_close/volume/upper_limit/lower_limit;分钟或 tick 字段支持 last/bid1/ask1/volume_delta/amount_delta。日线 include_now=false 排除当前交易日;分钟/tick 会按当前 on_bar、on_tick 或调度时刻截断,include_now=false 排除当前 bar/tick,避免未来函数。".to_string() }, ManualFunction { name: "current_snapshot".to_string(), signature: "ctx.current_snapshot(symbol)".to_string(), detail: "读取当前交易日指定证券的日级快照,可用于获得当日 open/close/last/upper_limit/lower_limit 等字段。".to_string() }, @@ -212,7 +214,8 @@ pub fn built_in_strategy_manual() -> StrategyAiManual { ManualFunction { name: "get_price".to_string(), signature: "ctx.get_price(symbol, start_date, end_date, \"1d\" | \"1m\" | \"tick\")".to_string(), detail: "按日期区间读取统一 PriceBar 序列。日线返回 open/high/low/close/last/volume/盘口字段;分钟或 tick 返回按 timestamp 排序的 last/bid1/ask1/volume_delta/amount_delta 映射,便于服务层转成表格或前端明细。".to_string() }, ManualFunction { name: "get_dividend / dividend_cash / has_dividend".to_string(), signature: "dividend_cash(lookback) / has_dividend(lookback)".to_string(), detail: "高级数据 风格分红 API。Rust Context 可用 ctx.get_dividend(symbol, start_date) 读取明细;平台表达式可用 dividend_cash(lookback) 汇总当前股票最近 N 个交易日现金分红,用 has_dividend(lookback) 判断是否发生分红,也支持 dividend_cash(\"600000.SH\", lookback)。".to_string() }, ManualFunction { name: "get_split / split_ratio / has_split".to_string(), signature: "split_ratio(lookback) / has_split(lookback)".to_string(), detail: "高级数据 风格拆分/送转 API。Rust Context 可用 ctx.get_split(symbol, start_date) 读取明细;平台表达式可用 split_ratio(lookback) 计算当前股票最近 N 个交易日累计拆分比例,has_split(lookback) 判断是否发生送转。".to_string() }, - ManualFunction { name: "get_factor / factor_value".to_string(), signature: "factor_value(\"field\", lookback=1)".to_string(), detail: "因子 API。factor(\"field\") 读取当前股票当日因子;factor_value(\"field\", lookback) 会在最近 N 个交易日内取该字段最新值,适合读取任意数据库指标或自定义因子。Rust Context 可用 ctx.get_factor(symbol, start, end, field) 读取完整序列。".to_string() }, + ManualFunction { name: "get_factor / factor_value".to_string(), signature: "factor_value(\"field\", lookback=1)".to_string(), detail: "数值因子 API。factor(\"field\") 读取当前股票当日因子;factor_value(\"field\", lookback) 会在最近 N 个交易日内取该字段最新数值,适合读取任意数据库指标或自定义数值因子。Rust Context 可用 ctx.get_factor(symbol, start, end, field) 读取完整数值序列。".to_string() }, + ManualFunction { name: "get_factor_text / factor_text".to_string(), signature: "factor_text(\"field\", lookback=1)".to_string(), detail: "字符串因子 API。读取最近 N 个交易日内指定字段的最新字符串值,适合行业名称、概念、标签、风格分类、自定义文本信号等字段。示例:factor_text(\"concept\") == \"ai_chip\" 或 factor(\"industry_name\") == \"电子\"。Rust Context 可用 ctx.get_factor_text(symbol, start, end, field) 读取完整字符串序列。".to_string() }, ManualFunction { name: "get_yield_curve / yield_curve".to_string(), signature: "yield_curve(\"1y\", lookback=1)".to_string(), detail: "收益率曲线 API。平台表达式从 factors 中的 yield_curve_1y / yc_1y 等字段读取最近值;Rust Context 可用 ctx.get_yield_curve(start, end, Some(\"1y\")) 读取序列。".to_string() }, ManualFunction { name: "get_margin_stocks / is_margin_stock".to_string(), signature: "is_margin_stock(\"all\" | \"stock\" | \"cash\")".to_string(), detail: "融资融券标的 API。平台表达式用 is_margin_stock(...) 判断当前股票是否在 margin_all/margin_stock/margin_cash 标记中;Rust Context 可用 ctx.get_margin_stocks(type) 返回标的列表。".to_string() }, ManualFunction { name: "get_securities_margin / securities_margin".to_string(), signature: "securities_margin(\"field\", lookback=1)".to_string(), detail: "融资融券明细 API。平台表达式读取当前股票最近 N 个交易日指定融资融券字段最新值;Rust Context 可用 ctx.get_securities_margin(symbol, start, end, field) 读取序列。".to_string() }, @@ -221,7 +224,7 @@ pub fn built_in_strategy_manual() -> StrategyAiManual { ManualFunction { name: "get_price_change_rate / price_change_rate".to_string(), signature: "price_change_rate(lookback=1)".to_string(), detail: "涨跌幅 API,默认按日行情 close / prev_close - 1 计算,缺少行情时回退 factors 中的 price_change_rate/change_rate/pct_change。返回小数,例如 0.1 表示上涨 10%。".to_string() }, ManualFunction { name: "get_stock_connect / stock_connect".to_string(), signature: "stock_connect(\"north_bound\" | \"south_bound\" | \"all\", lookback=1)".to_string(), detail: "陆股通/互联互通标记 API,从 stock_connect_north_bound、north_bound、stock_connect_south_bound 等因子读取,返回数值标记。".to_string() }, ManualFunction { name: "current_performance / fundamental / financial / pit_financial".to_string(), signature: "fundamental(\"net_profit\", lookback=1)".to_string(), detail: "财务与基本面 API。它们都是对 factors 的通用映射:fundamental(field) 会依次读取 fundamental_field / fundamentals_field / field,financial(field) 读取 financial_field / financials_field / field,pit_financial(field) 读取 pit_financial_field / pit_financials_field / field,current_performance(field) 读取 current_performance_field / current_performances_field / field。".to_string() }, - ManualFunction { name: "get_industry / industry_code".to_string(), signature: "industry_code(\"citics\", 1)".to_string(), detail: "行业 API。当前 core 的 factors 仅承载数值字段,因此行业先支持数值 code:按 industry_citics_l1、industry_citics_1、citics_industry_l1、industry_code 等字段读取最近可用值;字符串行业名称需要数据链路扩展字符串型因子后再暴露。".to_string() }, + ManualFunction { name: "get_industry / industry_code / industry_name".to_string(), signature: "industry_code(\"citics\", 1) / industry_name(\"citics\", 1)".to_string(), detail: "行业 API。industry_code 读取数值行业代码,按 industry_citics_l1、industry_citics_1、citics_industry_l1、industry_code 等别名查找;industry_name 读取字符串行业名称,按 industry_citics_l1_name、citics_industry_l1_name、industry_name 等别名查找。".to_string() }, ManualFunction { name: "get_dominant_future / dominant_future / dominant_future_price".to_string(), signature: "dominant_future(\"IF\") / dominant_future_price(\"IF\", \"close\", lookback=1)".to_string(), detail: "主力合约 API。dominant_future 返回当前日期匹配前缀的主力期货合约代码;dominant_future_price 读取该主力合约最近 N 个交易日指定字段的最新价格。Rust Context 可用 ctx.get_dominant_future(...) 和 ctx.get_dominant_future_price(...)。".to_string() }, ManualFunction { name: "order/order_status/order_avg_price/order_transaction_cost".to_string(), signature: "ctx.order(order_id)".to_string(), detail: "按订单 id 查询运行时订单对象,支持已结束订单和当前挂单。返回字段包括 status、filled_quantity、unfilled_quantity、avg_price、transaction_cost、symbol、side、reason;可用便捷函数读取状态、成交均价和费用,对齐 平台内核 Order 的核心属性。".to_string() }, ManualFunction { name: "account/portfolio_view/accounts".to_string(), signature: "ctx.account()".to_string(), detail: "返回当前股票账户/组合运行时视图,字段包括 account_type、cash、available_cash、frozen_cash、market_value、total_value、unit_net_value、daily_pnl、daily_returns、total_returns、transaction_cost、trading_pnl、position_pnl 等;DSL 中同名字段可直接使用。也可用 ctx.stock_account()、ctx.account_by_type(\"STOCK\")、ctx.accounts() 按账户类型读取;当前股票回测路径不会把 FUTURE 虚假映射成 STOCK。".to_string() }, @@ -245,7 +248,12 @@ pub fn built_in_strategy_manual() -> StrategyAiManual { }, ManualFactorSource { table: "fi_data_center.stock_indicator_factors_v1".to_string(), - detail: "股票指标因子原表,可映射进 factors[...]。股本、换手率、财务、陆股通、行业 code 等 高级数据 风格 API 均优先从这里或 bt_daily_features_v1 的 extra_factors 中读取。".to_string(), + detail: "股票指标因子原表,可映射进 factors[...]。股本、换手率、财务、陆股通、行业 code/name、概念、标签等数据 API 均优先从这里或 bt_daily_features_v1 的 extra_factors 中读取;数值 JSON 字段进入数值因子,字符串 JSON 字段进入字符串因子。".to_string(), + fields: vec![], + }, + ManualFactorSource { + table: "factors.csv / factors/".to_string(), + detail: "引擎离线数据源。基础列为 date,symbol,market_cap_bn,free_float_cap_bn,pe_ttm,turnover_ratio,effective_turnover_ratio,后续 JSON 扩展列可混合数值和字符串字段,例如 {\"custom_alpha\": 7, \"industry_name\": \"电子\", \"concept\": \"ai_chip\"}。字段名会去除首尾空白并转小写;建议统一使用小写下划线。".to_string(), fields: vec![], }, ManualFactorSource { @@ -268,6 +276,10 @@ pub fn built_in_strategy_manual() -> StrategyAiManual { title: "涨停触达后满仓,否则半仓".to_string(), code: "allocation.buy_scale(touched_upper_limit ? 1.0 : 0.5)".to_string(), }, + ManualExample { + title: "字符串因子过滤".to_string(), + code: "filter.stock_expr(industry_name(\"citics\", 1) == \"电子\" && factor_text(\"concept\") == \"ai_chip\")".to_string(), + }, ManualExample { title: "next tick 撮合 + tick 滑点".to_string(), code: "execution.matching_type(\"next_tick_last\")\nexecution.slippage(\"tick_size\", 1)".to_string(), diff --git a/docs/engine-capability-roadmap.md b/docs/engine-capability-roadmap.md index 8a558c8..6ddf3cb 100644 --- a/docs/engine-capability-roadmap.md +++ b/docs/engine-capability-roadmap.md @@ -41,7 +41,7 @@ futures path. Confirmed aligned areas: | P1 | Futures transaction cost decider | Closed. `FuturesTransactionCostModel` calculates by-money/by-volume open/close/close-today costs from trading parameters. | None. | | P1 | Futures settlement price mode | Closed. Engine supports configurable settlement price mode and resolves settlement/prev-settlement from factor fields with close/prev_close fallback. | Add dedicated settlement columns if the storage layer later separates them from factors. | | P1 | Frontend risk validators for futures | Closed for zero quantity, invalid limit price, active-contract, trading-phase, tick-aligned limit price, price-limit, self-trade crossing risk, paused/no executable price, margin, and close-position rejection diagnostics. | Add more exchange metadata columns only when source data exposes them. | -| P2 | Advanced data helper APIs | Closed for the engine-native data model. `DataSet` and `StrategyContext` expose dividends, splits, yield curves, generic factors, margin lists, securities margin, shares, turnover, price change, industry code, stock-connect, fundamentals, financials, point-in-time financials, current performance, dominant future, and dominant future price helpers. | Add string factor support only if source data exposes non-numeric categories. | +| P2 | Advanced data helper APIs | Closed for the engine-native data model. `DataSet` and `StrategyContext` expose dividends, splits, yield curves, numeric and string generic factors, margin lists, securities margin, shares, turnover, price change, industry code/name, stock-connect, fundamentals, financials, point-in-time financials, current performance, dominant future, and dominant future price helpers. | None. | | P2 | Analyzer/report output | Closed for normalized trades, positions, monthly returns, risk summary, equity curve, benchmark series, metrics, and JSON report bundle via `BacktestResult::analyzer_report(_json)`. | UI/service download endpoints can serialize this report directly. | | P3 | Mod/config/plugin architecture | Closed for a lightweight engine-native model: `BacktestProcessMod`, `BacktestProcessModLoader`, enabled-name installation, and event-bus lifecycle hooks. | Add concrete production mods/toggles as requirements appear. | @@ -88,6 +88,8 @@ futures path. Confirmed aligned areas: margin, shares, turnover, price change, stock-connect, industry code, fundamentals, financials, point-in-time financials, current performance, dominant future, and dominant future price helpers. +- [x] String-valued factor ingestion and expression helpers for category fields + such as industry names, concepts, labels, and custom text factors. - [x] Normalized trades, positions, benchmark, monthly returns, risk summary, equity curve, metrics, and downloadable analyzer bundle. - [x] Event-bus process listeners and installable process mods. @@ -97,5 +99,4 @@ futures path. Confirmed aligned areas: P0-P2 capability gaps are closed in the engine core. P3 has a lightweight event-driven extension loader. Remaining work should be driven by concrete production strategy or UI requirements: optional vendor-specific depth fields, -additional exchange metadata columns, string-valued factor categories, and -service/UI download wiring. +additional exchange metadata columns, and service/UI download wiring.