Skip to main content

subcog/storage/traits/
graph.rs

1//! Graph backend trait for knowledge graph storage.
2//!
3//! The graph layer provides entity and relationship storage with temporal tracking,
4//! enabling knowledge graph construction and traversal.
5//!
6//! # Available Implementations
7//!
8//! | Backend | Use Case | Features |
9//! |---------|----------|----------|
10//! | `SqliteGraphBackend` | Default; embedded | Recursive CTEs for traversal |
11//! | `InMemoryGraphBackend` | Testing | Fast, no persistence |
12//! | `Neo4jGraphBackend` | Future | Native graph operations |
13//!
14//! # Error Modes and Guarantees
15//!
16//! All backends return `Result<T>` with errors propagated via [`crate::Error`].
17//!
18//! ## Entity Operations
19//!
20//! | Operation | Complexity | Notes |
21//! |-----------|------------|-------|
22//! | `store_entity` | O(1) | Insert or update |
23//! | `get_entity` | O(1) | By ID lookup |
24//! | `query_entities` | O(log n) | With filters |
25//! | `delete_entity` | O(k) | k = relationship count |
26//!
27//! ## Relationship Operations
28//!
29//! | Operation | Complexity | Notes |
30//! |-----------|------------|-------|
31//! | `store_relationship` | O(1) | Insert or update |
32//! | `query_relationships` | O(log n) | With filters |
33//! | `traverse` | O(b^d) | b = branching, d = depth |
34//!
35//! ## Temporal Queries
36//!
37//! Bitemporal queries filter by both `valid_time` and `transaction_time`:
38//! - `valid_at`: Filter entities/relationships valid at a point in time
39//! - `as_of`: Filter by when records were known to the system
40//!
41//! # Example
42//!
43//! ```rust,ignore
44//! use subcog::storage::traits::GraphBackend;
45//! use subcog::models::graph::{Entity, EntityType, EntityQuery};
46//!
47//! // Query all Person entities with confidence > 0.8
48//! let query = EntityQuery::new()
49//!     .with_type(EntityType::Person)
50//!     .with_min_confidence(0.8)
51//!     .with_limit(20);
52//!
53//! let people = backend.query_entities(&query)?;
54//! ```
55
56use crate::Result;
57use crate::models::graph::{
58    Entity, EntityId, EntityMention, EntityQuery, EntityType, Relationship, RelationshipQuery,
59    RelationshipType, TraversalResult,
60};
61use crate::models::temporal::BitemporalPoint;
62use crate::models::{Domain, MemoryId};
63
64/// Trait for graph layer backends.
65///
66/// Graph backends provide entity and relationship storage for knowledge graph
67/// construction, with support for temporal queries and graph traversal.
68///
69/// # Implementor Notes
70///
71/// - Methods use `&self` to enable sharing via `Arc<dyn GraphBackend>`
72/// - Use interior mutability (e.g., `Mutex<Connection>`) for mutable state
73/// - Implement `traverse()` with recursive CTEs or similar for efficient multi-hop queries
74/// - Support bitemporal filtering on all query methods
75/// - Entity deletion should cascade to relationships (or return error if referenced)
76pub trait GraphBackend: Send + Sync {
77    // ========================================================================
78    // Entity CRUD Operations
79    // ========================================================================
80
81    /// Stores an entity in the graph.
82    ///
83    /// If an entity with the same ID exists, it is updated.
84    /// Uses interior mutability for thread-safe concurrent access.
85    ///
86    /// # Errors
87    ///
88    /// Returns an error if the storage operation fails.
89    fn store_entity(&self, entity: &Entity) -> Result<()>;
90
91    /// Retrieves an entity by ID.
92    ///
93    /// # Errors
94    ///
95    /// Returns an error if the lookup operation fails.
96    fn get_entity(&self, id: &EntityId) -> Result<Option<Entity>>;
97
98    /// Queries entities with optional filters.
99    ///
100    /// Returns entities matching the query criteria, ordered by relevance
101    /// (mention count, confidence, or recency depending on query).
102    ///
103    /// # Errors
104    ///
105    /// Returns an error if the query operation fails.
106    fn query_entities(&self, query: &EntityQuery) -> Result<Vec<Entity>>;
107
108    /// Deletes an entity by ID.
109    ///
110    /// This also removes all relationships involving the entity and
111    /// all entity mentions.
112    ///
113    /// Returns `true` if the entity was deleted, `false` if not found.
114    ///
115    /// # Errors
116    ///
117    /// Returns an error if the deletion operation fails.
118    fn delete_entity(&self, id: &EntityId) -> Result<bool>;
119
120    /// Merges multiple entities into a canonical entity.
121    ///
122    /// The first entity ID becomes the canonical entity. All relationships
123    /// from the other entities are re-pointed to the canonical entity,
124    /// and the other entities are deleted.
125    ///
126    /// # Arguments
127    ///
128    /// * `entity_ids` - Entity IDs to merge (first is canonical)
129    /// * `canonical_name` - New canonical name for the merged entity
130    ///
131    /// # Errors
132    ///
133    /// Returns an error if any entity is not found or the merge fails.
134    fn merge_entities(&self, entity_ids: &[EntityId], canonical_name: &str) -> Result<Entity>;
135
136    /// Finds entities by name using fuzzy matching.
137    ///
138    /// Searches both canonical names and aliases.
139    ///
140    /// # Errors
141    ///
142    /// Returns an error if the search operation fails.
143    fn find_entities_by_name(
144        &self,
145        name: &str,
146        entity_type: Option<EntityType>,
147        domain: Option<&Domain>,
148        limit: usize,
149    ) -> Result<Vec<Entity>>;
150
151    // ========================================================================
152    // Relationship CRUD Operations
153    // ========================================================================
154
155    /// Stores a relationship in the graph.
156    ///
157    /// If a relationship between the same entities with the same type exists,
158    /// it may be updated or a new version created (depending on temporal settings).
159    ///
160    /// # Errors
161    ///
162    /// Returns an error if the storage operation fails or if either entity
163    /// referenced by the relationship does not exist.
164    fn store_relationship(&self, relationship: &Relationship) -> Result<()>;
165
166    /// Queries relationships with optional filters.
167    ///
168    /// Returns relationships matching the query criteria.
169    ///
170    /// # Errors
171    ///
172    /// Returns an error if the query operation fails.
173    fn query_relationships(&self, query: &RelationshipQuery) -> Result<Vec<Relationship>>;
174
175    /// Deletes relationships matching the query.
176    ///
177    /// Returns the number of relationships deleted.
178    ///
179    /// # Errors
180    ///
181    /// Returns an error if the deletion operation fails.
182    fn delete_relationships(&self, query: &RelationshipQuery) -> Result<usize>;
183
184    /// Gets all relationship types between two entities.
185    ///
186    /// # Errors
187    ///
188    /// Returns an error if the query operation fails.
189    fn get_relationship_types(
190        &self,
191        from_entity: &EntityId,
192        to_entity: &EntityId,
193    ) -> Result<Vec<RelationshipType>>;
194
195    // ========================================================================
196    // Entity Mention Operations
197    // ========================================================================
198
199    /// Stores an entity mention (link between entity and memory).
200    ///
201    /// # Errors
202    ///
203    /// Returns an error if the storage operation fails.
204    fn store_mention(&self, mention: &EntityMention) -> Result<()>;
205
206    /// Gets all mentions of an entity.
207    ///
208    /// Returns memory IDs where the entity was mentioned, with confidence scores.
209    ///
210    /// # Errors
211    ///
212    /// Returns an error if the query operation fails.
213    fn get_mentions_for_entity(&self, entity_id: &EntityId) -> Result<Vec<EntityMention>>;
214
215    /// Gets all entities mentioned in a memory.
216    ///
217    /// # Errors
218    ///
219    /// Returns an error if the query operation fails.
220    fn get_entities_in_memory(&self, memory_id: &MemoryId) -> Result<Vec<Entity>>;
221
222    /// Deletes all mentions of an entity.
223    ///
224    /// Returns the number of mentions deleted.
225    ///
226    /// # Errors
227    ///
228    /// Returns an error if the deletion operation fails.
229    fn delete_mentions_for_entity(&self, entity_id: &EntityId) -> Result<usize>;
230
231    /// Deletes all entity mentions for a memory.
232    ///
233    /// Returns the number of mentions deleted.
234    ///
235    /// # Errors
236    ///
237    /// Returns an error if the deletion operation fails.
238    fn delete_mentions_for_memory(&self, memory_id: &MemoryId) -> Result<usize>;
239
240    // ========================================================================
241    // Graph Traversal Operations
242    // ========================================================================
243
244    /// Traverses the graph from a starting entity.
245    ///
246    /// Performs breadth-first traversal up to `max_depth` hops, collecting
247    /// all reachable entities and the relationships used to reach them.
248    ///
249    /// # Arguments
250    ///
251    /// * `start` - Starting entity ID
252    /// * `max_depth` - Maximum traversal depth (1 = immediate neighbors)
253    /// * `relationship_types` - Optional filter for relationship types
254    /// * `min_confidence` - Minimum confidence threshold for relationships
255    ///
256    /// # Errors
257    ///
258    /// Returns an error if the traversal operation fails.
259    fn traverse(
260        &self,
261        start: &EntityId,
262        max_depth: u32,
263        relationship_types: Option<&[RelationshipType]>,
264        min_confidence: Option<f32>,
265    ) -> Result<TraversalResult>;
266
267    /// Finds the shortest path between two entities.
268    ///
269    /// Returns `None` if no path exists within `max_depth`.
270    ///
271    /// # Errors
272    ///
273    /// Returns an error if the operation fails.
274    fn find_path(
275        &self,
276        from: &EntityId,
277        to: &EntityId,
278        max_depth: u32,
279    ) -> Result<Option<TraversalResult>>;
280
281    /// Gets entities related to a given entity within N hops.
282    ///
283    /// Convenience method combining traversal with entity extraction.
284    ///
285    /// # Errors
286    ///
287    /// Returns an error if the operation fails.
288    fn get_related_entities(
289        &self,
290        entity_id: &EntityId,
291        max_depth: u32,
292        limit: usize,
293    ) -> Result<Vec<Entity>> {
294        let result = self.traverse(entity_id, max_depth, None, None)?;
295        let entities = result
296            .entities
297            .into_iter()
298            .filter(|e| e.id != *entity_id) // Exclude starting entity
299            .take(limit)
300            .collect();
301        Ok(entities)
302    }
303
304    // ========================================================================
305    // Temporal Query Operations
306    // ========================================================================
307
308    /// Queries entities at a specific point in bitemporal space.
309    ///
310    /// Returns entities that were valid at `point.valid_at` and were known
311    /// to the system as of `point.as_of`.
312    ///
313    /// # Errors
314    ///
315    /// Returns an error if the query operation fails.
316    fn query_entities_at(
317        &self,
318        query: &EntityQuery,
319        point: &BitemporalPoint,
320    ) -> Result<Vec<Entity>>;
321
322    /// Queries relationships at a specific point in bitemporal space.
323    ///
324    /// # Errors
325    ///
326    /// Returns an error if the query operation fails.
327    fn query_relationships_at(
328        &self,
329        query: &RelationshipQuery,
330        point: &BitemporalPoint,
331    ) -> Result<Vec<Relationship>>;
332
333    /// Closes (ends) an entity's valid time at the given timestamp.
334    ///
335    /// This marks the entity as no longer valid from the given time forward,
336    /// without deleting historical data.
337    ///
338    /// # Errors
339    ///
340    /// Returns an error if the entity is not found or the operation fails.
341    fn close_entity_valid_time(&self, id: &EntityId, end_time: i64) -> Result<()>;
342
343    /// Closes (ends) a relationship's valid time at the given timestamp.
344    ///
345    /// # Errors
346    ///
347    /// Returns an error if the relationship is not found or the operation fails.
348    fn close_relationship_valid_time(
349        &self,
350        from_entity: &EntityId,
351        to_entity: &EntityId,
352        relationship_type: RelationshipType,
353        end_time: i64,
354    ) -> Result<()>;
355
356    // ========================================================================
357    // Utility Operations
358    // ========================================================================
359
360    /// Returns statistics about the graph.
361    ///
362    /// # Errors
363    ///
364    /// Returns an error if the operation fails.
365    fn get_stats(&self) -> Result<GraphStats>;
366
367    /// Clears all graph data.
368    ///
369    /// Use with caution - this removes all entities, relationships, and mentions.
370    ///
371    /// # Errors
372    ///
373    /// Returns an error if the operation fails.
374    fn clear(&self) -> Result<()>;
375}
376
377/// Statistics about the knowledge graph.
378#[derive(Debug, Clone, Default)]
379pub struct GraphStats {
380    /// Total number of entities.
381    pub entity_count: usize,
382    /// Number of entities by type.
383    pub entities_by_type: std::collections::HashMap<EntityType, usize>,
384    /// Total number of relationships.
385    pub relationship_count: usize,
386    /// Number of relationships by type.
387    pub relationships_by_type: std::collections::HashMap<RelationshipType, usize>,
388    /// Total number of entity mentions.
389    pub mention_count: usize,
390    /// Average relationships per entity.
391    pub avg_relationships_per_entity: f32,
392}
393
394impl GraphStats {
395    /// Creates empty stats.
396    #[must_use]
397    pub fn new() -> Self {
398        Self::default()
399    }
400}
401
402#[cfg(test)]
403mod tests {
404    use super::*;
405
406    #[test]
407    fn test_graph_stats_default() {
408        let stats = GraphStats::default();
409        assert_eq!(stats.entity_count, 0);
410        assert_eq!(stats.relationship_count, 0);
411        assert_eq!(stats.mention_count, 0);
412    }
413}