Skip to main content

subcog/storage/traits/
index.rs

1//! Index backend trait.
2//!
3//! The index layer provides full-text search capabilities using BM25 or similar algorithms.
4//! It enables keyword-based retrieval of memories.
5//!
6//! # Available Implementations
7//!
8//! | Backend | Use Case | Features |
9//! |---------|----------|----------|
10//! | `SqliteBackend` | Default; embedded | FTS5 with BM25 ranking |
11//! | `PostgresBackend` | Multi-user | `ts_rank` with stemming |
12//! | `RediSearchBackend` | High throughput | Prefix/fuzzy matching |
13//!
14//! # Error Modes and Guarantees
15//!
16//! All backends return `Result<T>` with errors propagated via [`crate::Error`].
17//!
18//! ## Indexing Behavior
19//!
20//! | Backend | Atomicity | Index Lag | Rebuild Cost |
21//! |---------|-----------|-----------|--------------|
22//! | `SQLite` | Transactional | Immediate | O(n) |
23//! | PostgreSQL | Transactional | Immediate | O(n log n) |
24//! | `RediSearch` | Eventual | <100ms | O(n) |
25//!
26//! ## Error Recovery
27//!
28//! | Error Type | Recovery Strategy |
29//! |------------|-------------------|
30//! | `Error::Storage` | Check DB connection; retry |
31//! | `Error::InvalidInput` | Query syntax error; validate before calling |
32//! | `Error::OperationFailed` | Index corruption; call `reindex()` |
33//!
34//! ## Consistency with Persistence Layer
35//!
36//! The index is a **derived view** of the persistence layer. If the index becomes
37//! stale or corrupted, call `reindex()` to rebuild from the authoritative persistence store.
38//!
39//! ## Performance Characteristics
40//!
41//! - **Search complexity**: O(log n) for indexed queries
42//! - **Batch efficiency**: `get_memories_batch()` avoids N+1 query pattern
43//! - **FTS tokenization**: Whitespace + punctuation split (`SQLite`), language-aware (`PostgreSQL`)
44
45use crate::Result;
46use crate::models::{Memory, MemoryId, SearchFilter};
47
48/// Trait for index layer backends.
49///
50/// Index backends provide full-text search capabilities using BM25 or similar algorithms.
51///
52/// # Implementor Notes
53///
54/// - Methods use `&self` to enable sharing via `Arc<dyn IndexBackend>`
55/// - Use interior mutability (e.g., `Mutex<Connection>`) for mutable state
56/// - Implement `get_memories_batch()` with an optimized query (e.g., SQL `IN` clause)
57/// - Use FTS ranking scores for the `f32` score in search results
58/// - Ensure `clear()` does not affect the persistence layer
59pub trait IndexBackend: Send + Sync {
60    /// Indexes a memory for full-text search.
61    ///
62    /// Uses interior mutability for thread-safe concurrent access.
63    ///
64    /// # Errors
65    ///
66    /// Returns an error if the indexing operation fails.
67    fn index(&self, memory: &Memory) -> Result<()>;
68
69    /// Removes a memory from the index.
70    ///
71    /// Uses interior mutability for thread-safe concurrent access.
72    ///
73    /// # Errors
74    ///
75    /// Returns an error if the removal operation fails.
76    fn remove(&self, id: &MemoryId) -> Result<bool>;
77
78    /// Searches for memories matching a text query.
79    ///
80    /// Returns memory IDs with their BM25 scores, ordered by relevance.
81    ///
82    /// # Errors
83    ///
84    /// Returns an error if the search operation fails.
85    fn search(
86        &self,
87        query: &str,
88        filter: &SearchFilter,
89        limit: usize,
90    ) -> Result<Vec<(MemoryId, f32)>>;
91
92    /// Re-indexes all memories.
93    ///
94    /// Uses interior mutability for thread-safe concurrent access.
95    ///
96    /// # Errors
97    ///
98    /// Returns an error if any memory fails to index.
99    fn reindex(&self, memories: &[Memory]) -> Result<()> {
100        for memory in memories {
101            self.index(memory)?;
102        }
103        Ok(())
104    }
105
106    /// Clears the entire index.
107    ///
108    /// Uses interior mutability for thread-safe concurrent access.
109    ///
110    /// # Errors
111    ///
112    /// Returns an error if the clear operation fails.
113    fn clear(&self) -> Result<()>;
114
115    /// Lists all indexed memories, optionally filtered.
116    ///
117    /// Unlike `search`, this doesn't require a query and returns all entries.
118    ///
119    /// # Errors
120    ///
121    /// Returns an error if the operation fails.
122    fn list_all(&self, filter: &SearchFilter, limit: usize) -> Result<Vec<(MemoryId, f32)>>;
123
124    /// Retrieves a memory by ID.
125    ///
126    /// # Errors
127    ///
128    /// Returns an error if the operation fails.
129    fn get_memory(&self, id: &MemoryId) -> Result<Option<Memory>>;
130
131    /// Retrieves multiple memories by their IDs in a single batch query.
132    ///
133    /// This is more efficient than calling `get_memory` in a loop (N+1 query pattern).
134    /// Returns memories in the same order as the input IDs, with None for missing IDs.
135    ///
136    /// # Errors
137    ///
138    /// Returns an error if the operation fails.
139    fn get_memories_batch(&self, ids: &[MemoryId]) -> Result<Vec<Option<Memory>>> {
140        // Default implementation falls back to individual queries
141        ids.iter().map(|id| self.get_memory(id)).collect()
142    }
143}