Improve jq microcap execution semantics

This commit is contained in:
boris
2026-04-18 18:02:50 +08:00
parent 9f4165e689
commit 0e2c25e4c4
26 changed files with 5058 additions and 362 deletions

View File

@@ -1,7 +1,7 @@
use chrono::NaiveDate;
use serde::Serialize;
use crate::data::{BenchmarkSnapshot, DataSet};
use crate::data::{BenchmarkSnapshot, DataSet, EligibleUniverseSnapshot};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BandRegime {
@@ -48,7 +48,10 @@ pub struct SelectionContext<'a> {
pub trait UniverseSelector {
fn select(&self, ctx: &SelectionContext<'_>) -> Vec<UniverseCandidate>;
fn select_with_diagnostics(&self, ctx: &SelectionContext<'_>) -> (Vec<UniverseCandidate>, SelectionDiagnostics);
fn select_with_diagnostics(
&self,
ctx: &SelectionContext<'_>,
) -> (Vec<UniverseCandidate>, SelectionDiagnostics);
}
#[derive(Debug, Clone)]
@@ -103,7 +106,10 @@ impl UniverseSelector for DynamicMarketCapBandSelector {
self.select_with_diagnostics(ctx).0
}
fn select_with_diagnostics(&self, ctx: &SelectionContext<'_>) -> (Vec<UniverseCandidate>, SelectionDiagnostics) {
fn select_with_diagnostics(
&self,
ctx: &SelectionContext<'_>,
) -> (Vec<UniverseCandidate>, SelectionDiagnostics) {
let _regime = self.regime(ctx.reference_level);
let (min_cap, max_cap) = self.band_for_level(ctx.reference_level);
let mut diagnostics = SelectionDiagnostics {
@@ -125,78 +131,24 @@ impl UniverseSelector for DynamicMarketCapBandSelector {
rejection_examples: Vec::new(),
};
diagnostics.factor_total = ctx.data.factor_snapshots_on(ctx.decision_date).len();
diagnostics.market_cap_missing_count = diagnostics
.factor_total
.saturating_sub(ctx.data.eligible_universe_on(ctx.decision_date).len());
let eligible = ctx.data.eligible_universe_on(ctx.decision_date);
let start_idx = lower_bound_by_market_cap(eligible, min_cap);
let mut selected = Vec::new();
for factor in ctx.data.factor_snapshots_on(ctx.decision_date) {
diagnostics.factor_total += 1;
if factor.market_cap_bn <= 0.0 || !factor.market_cap_bn.is_finite() {
diagnostics.market_cap_missing_count += 1;
if diagnostics.missing_market_cap_symbols.len() < 8 {
diagnostics.missing_market_cap_symbols.push(factor.symbol.clone());
}
if diagnostics.rejection_examples.len() < 12 {
diagnostics.rejection_examples.push(format!("{}: market_cap missing_or_non_positive", factor.symbol));
}
continue;
for factor in eligible.iter().skip(start_idx) {
if factor.market_cap_bn > max_cap {
break;
}
let Some(candidate) = ctx.data.candidate(ctx.decision_date, &factor.symbol) else {
diagnostics.candidate_missing_count += 1;
if diagnostics.rejection_examples.len() < 12 {
diagnostics.rejection_examples.push(format!("{}: candidate snapshot missing", factor.symbol));
}
continue;
};
let Some(market) = ctx.data.market(ctx.decision_date, &factor.symbol) else {
diagnostics.market_missing_count += 1;
if diagnostics.rejection_examples.len() < 12 {
diagnostics.rejection_examples.push(format!("{}: market snapshot missing", factor.symbol));
}
continue;
};
if !candidate.eligible_for_selection() {
diagnostics.not_eligible_count += 1;
if diagnostics.rejection_examples.len() < 12 {
diagnostics.rejection_examples.push(format!("{}: candidate flags rejected", factor.symbol));
}
continue;
}
if market.paused {
diagnostics.paused_count += 1;
if diagnostics.rejection_examples.len() < 12 {
diagnostics.rejection_examples.push(format!("{}: market paused", factor.symbol));
}
continue;
}
if factor.market_cap_bn < min_cap || factor.market_cap_bn > max_cap {
diagnostics.out_of_band_count += 1;
if diagnostics.rejection_examples.len() < 12 {
diagnostics.rejection_examples.push(format!(
"{}: market_cap {:.2} out_of_band {:.2}-{:.2}",
factor.symbol, factor.market_cap_bn, min_cap, max_cap
));
}
continue;
}
selected.push(UniverseCandidate {
symbol: factor.symbol.clone(),
market_cap_bn: factor.market_cap_bn,
free_float_cap_bn: factor.free_float_cap_bn,
band_low: min_cap,
band_high: max_cap,
});
selected.push(to_universe_candidate(factor, min_cap, max_cap));
}
selected.sort_by(|left, right| {
left.market_cap_bn
.partial_cmp(&right.market_cap_bn)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| left.symbol.cmp(&right.symbol))
});
diagnostics.out_of_band_count = eligible.len().saturating_sub(selected.len());
diagnostics.selected_before_limit = selected.len();
if selected.len() > self.top_n {
selected.truncate(self.top_n);
@@ -206,3 +158,31 @@ impl UniverseSelector for DynamicMarketCapBandSelector {
(selected, diagnostics)
}
}
fn lower_bound_by_market_cap(rows: &[EligibleUniverseSnapshot], target: f64) -> usize {
let mut left = 0usize;
let mut right = rows.len();
while left < right {
let mid = left + (right - left) / 2;
if rows[mid].market_cap_bn < target {
left = mid + 1;
} else {
right = mid;
}
}
left
}
fn to_universe_candidate(
factor: &EligibleUniverseSnapshot,
band_low: f64,
band_high: f64,
) -> UniverseCandidate {
UniverseCandidate {
symbol: factor.symbol.clone(),
market_cap_bn: factor.market_cap_bn,
free_float_cap_bn: factor.free_float_cap_bn,
band_low,
band_high,
}
}