Skip to main content

xlog_logic/hypergraph/
eligibility.rs

1//! Eligibility analysis: decide whether a [`HypergraphRule`] can be
2//! planned as a multiway join, or must fall back to the existing
3//! binary-join lowering.
4//!
5//! "Eligible" here means **could be planned as multiway** — it does
6//! NOT mean "must use multiway". A rule with exactly two positive
7//! atoms is eligible because the planner could legally choose either
8//! a multiway plan or a binary plan; the choice is the planner's, not
9//! the analyzer's. The CPU reference evaluator (PR 2) and later GPU
10//! kernels (PR 3) consume both shapes.
11//!
12//! "Ineligible" means at least one [`Boundary`] makes multiway
13//! planning either impossible (negation, aggregation in head) or
14//! unsupported by the executor context under consideration. Each
15//! boundary is reported separately so the explain output and tests
16//! can lock in *why* a rule fell back.
17
18use super::ir::{HypergraphRule, VertexId};
19use std::collections::BTreeMap;
20use xlog_core::ScalarType;
21
22/// The maximum number of distinct join-key variables supported by
23/// the existing binary-fallback executor. Borrowed verbatim from the
24/// `pack_keys_gpu_on_stream` constraint in
25/// `xlog-cuda/src/provider/relational.rs`.
26///
27/// This is a **binary-fallback** constraint, not a hypergraph
28/// property. WCOJ eligibility uses [`ExecutorContext::WcojEligible`]
29/// and a separate context limit; hash fallback continues to use
30/// this value verbatim.
31pub const BINARY_FALLBACK_KEY_LIMIT: usize = 4;
32
33/// The widest K-clique shape the WCOJ planner architecture admits at
34/// the eligibility layer. K=7 and K=8 are accepted here so the
35/// Phase-2 templates can inherit the same planner contract.
36pub const WCOJ_ELIGIBLE_KEY_LIMIT: usize = 8;
37
38/// Executor capability context for join-key width checks.
39#[derive(Debug, Clone, Copy, PartialEq, Eq)]
40pub enum ExecutorContext {
41    /// Existing hash/binary fallback executor. The 4-key
42    /// `pack_keys_gpu_on_stream` limit remains binding.
43    HashFallback,
44    /// WCOJ-capable planner path. K5 through K8 are admissible;
45    /// K9+ remains outside the current executor contract.
46    WcojEligible,
47}
48
49impl ExecutorContext {
50    fn join_key_limit(self) -> usize {
51        match self {
52            Self::HashFallback => BINARY_FALLBACK_KEY_LIMIT,
53            Self::WcojEligible => WCOJ_ELIGIBLE_KEY_LIMIT,
54        }
55    }
56}
57
58/// Verdict for a single rule.
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub enum Eligibility {
61    /// The rule could be planned as a multiway join. Note: this is
62    /// *eligibility*, not *requirement* — the eventual planner is
63    /// free to lower an Eligible rule as a binary join if its cost
64    /// model says so. Two-atom rules are Eligible: both multiway
65    /// and binary are valid lowerings.
66    Eligible,
67    /// The rule must fall back to binary-join lowering. Each
68    /// [`Boundary`] in the vector is one independent reason; the
69    /// list is non-empty.
70    Ineligible(Vec<Boundary>),
71}
72
73impl Eligibility {
74    /// Iterate over boundaries (empty for [`Eligibility::Eligible`]).
75    pub fn boundaries(&self) -> &[Boundary] {
76        match self {
77            Eligibility::Eligible => &[],
78            Eligibility::Ineligible(bs) => bs,
79        }
80    }
81}
82
83/// One reason a rule is ineligible for multiway planning.
84///
85/// Boundaries are reported independently — a rule that is *both*
86/// negated *and* over the key limit produces two boundaries, not one
87/// "first-failed" boundary. This makes the eligibility report stable
88/// against ordering changes in future analyzer passes.
89#[derive(Debug, Clone, PartialEq, Eq)]
90pub enum Boundary {
91    /// The rule is a ground fact (no body to plan).
92    GroundFact,
93    /// The rule head contains an aggregation expression. Multiway
94    /// joins underneath an aggregation re-introduce the
95    /// aggregation-boundary problem the binary lowering already
96    /// handles via `GroupBy + Project`; we leave that machinery
97    /// intact rather than re-derive it inside multiway.
98    HeadAggregation,
99    /// The body contains a [`crate::ast::BodyLiteral::Negated`]
100    /// literal. Negated literals lower to set difference (`Diff`)
101    /// in the binary path — multiway has no equivalent at this
102    /// stage of the planner foundation.
103    BodyNegation,
104    /// The body contains a [`crate::ast::BodyLiteral::IsExpr`]
105    /// computed binding. Binding-via-expression introduces a
106    /// dependency between body literals that the join graph alone
107    /// does not capture.
108    BodyIsExpr,
109    /// The body has fewer than two positive atoms. Multiway needs
110    /// at least two atoms to be meaningful; one-atom or zero-atom
111    /// bodies are not multiway candidates regardless of executor.
112    InsufficientPositiveAtoms {
113        /// Observed number of positive body atoms.
114        positive_count: usize,
115    },
116    /// The total number of distinct join-key variables exceeds the
117    /// binary-fallback executor's `pack_keys` limit. Carried as a
118    /// runtime value so the explain output can name the count.
119    /// See [`BINARY_FALLBACK_KEY_LIMIT`].
120    JoinKeysExceedBinaryFallbackLimit {
121        /// Executor context whose key-width cap was exceeded.
122        context: ExecutorContext,
123        /// Observed distinct join-key count.
124        count: usize,
125        /// Hard limit for the selected executor context.
126        limit: usize,
127    },
128    /// A join-key variable has a [`ScalarType`] not supported by
129    /// the executor under consideration.
130    ///
131    /// Emitted by [`analyze_typed`] when a join-key vertex has a
132    /// known type (derived from a body atom's relation schema —
133    /// see [`crate::hypergraph::typed::evaluate_rule_typed`] and
134    /// the typed fixpoint variants) that is outside
135    /// [`WCOJ_SUPPORTED_KEY_TYPES`]. Structural [`analyze`] never
136    /// emits this variant.
137    ///
138    /// **Locked policy (PR 5):** unknown ≠ unsupported. A vertex
139    /// whose type cannot be derived from the supplied
140    /// [`crate::hypergraph::RefRelationStore`] is **not** flagged
141    /// here. Transitive type propagation across recursive SCC
142    /// predicates is a follow-up slice.
143    UnsupportedKeyType {
144        /// Source name of the variable whose type is unsupported.
145        var: String,
146        /// Inferred scalar type that the executor cannot handle.
147        ty: ScalarType,
148    },
149}
150
151/// Analyze a [`HypergraphRule`] and return the eligibility verdict.
152///
153/// All boundaries are checked; the returned vector is in a stable
154/// order matching the [`Boundary`] enum's declaration order (modulo
155/// the one-or-zero-positive-atoms check, which is reported with the
156/// observed `positive_count`). Order stability matters for the
157/// explain-output snapshot tests.
158pub fn analyze(hg: &HypergraphRule, context: ExecutorContext) -> Eligibility {
159    let mut boundaries = Vec::new();
160
161    if hg.is_fact {
162        boundaries.push(Boundary::GroundFact);
163    }
164    if hg.head_has_aggregation {
165        boundaries.push(Boundary::HeadAggregation);
166    }
167    if hg.has_negation {
168        boundaries.push(Boundary::BodyNegation);
169    }
170    if hg.has_is_expr {
171        boundaries.push(Boundary::BodyIsExpr);
172    }
173    let positive_count = hg.hyperedge_count();
174    if !hg.is_fact && positive_count < 2 {
175        boundaries.push(Boundary::InsufficientPositiveAtoms { positive_count });
176    }
177
178    // Count distinct join-key variables. A "join-key variable" is
179    // any vertex shared by two or more hyperedges. Variables that
180    // appear in only one atom contribute to projection / output
181    // schema but are not join keys.
182    let join_key_count = count_join_keys(hg);
183    let join_key_limit = context.join_key_limit();
184    if join_key_count > join_key_limit {
185        boundaries.push(Boundary::JoinKeysExceedBinaryFallbackLimit {
186            context,
187            count: join_key_count,
188            limit: join_key_limit,
189        });
190    }
191
192    if boundaries.is_empty() {
193        Eligibility::Eligible
194    } else {
195        Eligibility::Ineligible(boundaries)
196    }
197}
198
199/// Return true when [`analyze`] says the rule is eligible in the
200/// selected executor context.
201pub fn is_eligible(hg: &HypergraphRule, context: ExecutorContext) -> bool {
202    matches!(analyze(hg, context), Eligibility::Eligible)
203}
204
205/// Count vertices that appear in two or more hyperedges. These are
206/// the variables the planner must equi-join across atoms; vertices
207/// in only one hyperedge do not constrain the cross-atom plan and
208/// are not join keys.
209fn count_join_keys(hg: &HypergraphRule) -> usize {
210    let mut occurrences: Vec<usize> = vec![0; hg.vertex_count()];
211    for edge in &hg.hyperedges {
212        // Count each vertex AT MOST ONCE per hyperedge — a self-join
213        // within a single atom (e.g. `p(X, X)`) is not a multi-atom
214        // join key.
215        for vid in edge.vertices() {
216            let VertexId(idx) = vid;
217            occurrences[idx] += 1;
218        }
219    }
220    occurrences.iter().filter(|c| **c >= 2).count()
221}
222
223/// Scalar types that the WCOJ reference evaluator supports as
224/// join-key types. Membership is checked by [`analyze_typed`] when
225/// emitting [`Boundary::UnsupportedKeyType`].
226///
227/// The set is intentionally narrow and not configurable in PR 2:
228/// only `U32`, `U64`, and `Symbol` are supported. Future PRs may
229/// widen the set as the reference evaluator and (later) GPU
230/// kernels grow type coverage; widening is a deliberate
231/// configuration change, not a parameter to this function.
232pub const WCOJ_SUPPORTED_KEY_TYPES: &[ScalarType] =
233    &[ScalarType::U32, ScalarType::U64, ScalarType::Symbol];
234
235/// Typed eligibility analysis.
236///
237/// Same as [`analyze`], but additionally consults `vertex_types` —
238/// a map from variable name to inferred [`ScalarType`] — to emit
239/// [`Boundary::UnsupportedKeyType`] for join-key vertices whose
240/// type is outside [`WCOJ_SUPPORTED_KEY_TYPES`].
241///
242/// "Join-key vertex" matches the same definition used by [`analyze`]:
243/// a vertex that appears in two or more hyperedges. Projection-only
244/// vertices (those appearing in exactly one hyperedge) are NOT
245/// checked — their types do not affect WCOJ planning.
246///
247/// **Locked policy (PR 5): unknown ≠ unsupported.** Vertices
248/// missing from `vertex_types` are NOT flagged. The
249/// [`crate::hypergraph::typed`] gate populates this map via
250/// schema-driven derivation from a
251/// [`crate::hypergraph::RefRelationStore`]; vertices anchored only
252/// through predicates absent from that store (e.g. an SCC
253/// predicate referenced recursively before its first iteration)
254/// stay absent and pass through. Transitive type propagation
255/// across recursive predicates is a follow-up slice.
256pub fn analyze_typed(
257    hg: &HypergraphRule,
258    vertex_types: &BTreeMap<String, ScalarType>,
259    context: ExecutorContext,
260) -> Eligibility {
261    // Start from the structural verdict so structural boundaries
262    // (negation, aggregation, etc.) carry through.
263    let base = analyze(hg, context);
264    let mut boundaries: Vec<Boundary> = base.boundaries().to_vec();
265
266    let join_key_ids = join_key_vertex_ids(hg);
267    for vid in join_key_ids {
268        let name = &hg.vertex(vid).name;
269        if let Some(&ty) = vertex_types.get(name) {
270            if !WCOJ_SUPPORTED_KEY_TYPES.contains(&ty) {
271                boundaries.push(Boundary::UnsupportedKeyType {
272                    var: name.clone(),
273                    ty,
274                });
275            }
276        }
277    }
278
279    if boundaries.is_empty() {
280        Eligibility::Eligible
281    } else {
282        Eligibility::Ineligible(boundaries)
283    }
284}
285
286/// Return the [`VertexId`]s of vertices that appear in two or more
287/// hyperedges (the "join key" set), in vertex-id order.
288fn join_key_vertex_ids(hg: &HypergraphRule) -> Vec<VertexId> {
289    let mut occurrences: Vec<usize> = vec![0; hg.vertex_count()];
290    for edge in &hg.hyperedges {
291        for vid in edge.vertices() {
292            let VertexId(idx) = vid;
293            occurrences[idx] += 1;
294        }
295    }
296    occurrences
297        .iter()
298        .enumerate()
299        .filter(|(_, c)| **c >= 2)
300        .map(|(i, _)| VertexId(i))
301        .collect()
302}