subcog/storage/traits/index.rs
1//! Index backend trait.
2//!
3//! The index layer provides full-text search capabilities using BM25 or similar algorithms.
4//! It enables keyword-based retrieval of memories.
5//!
6//! # Available Implementations
7//!
8//! | Backend | Use Case | Features |
9//! |---------|----------|----------|
10//! | `SqliteBackend` | Default; embedded | FTS5 with BM25 ranking |
11//! | `PostgresBackend` | Multi-user | `ts_rank` with stemming |
12//! | `RediSearchBackend` | High throughput | Prefix/fuzzy matching |
13//!
14//! # Error Modes and Guarantees
15//!
16//! All backends return `Result<T>` with errors propagated via [`crate::Error`].
17//!
18//! ## Indexing Behavior
19//!
20//! | Backend | Atomicity | Index Lag | Rebuild Cost |
21//! |---------|-----------|-----------|--------------|
22//! | `SQLite` | Transactional | Immediate | O(n) |
23//! | PostgreSQL | Transactional | Immediate | O(n log n) |
24//! | `RediSearch` | Eventual | <100ms | O(n) |
25//!
26//! ## Error Recovery
27//!
28//! | Error Type | Recovery Strategy |
29//! |------------|-------------------|
30//! | `Error::Storage` | Check DB connection; retry |
31//! | `Error::InvalidInput` | Query syntax error; validate before calling |
32//! | `Error::OperationFailed` | Index corruption; call `reindex()` |
33//!
34//! ## Consistency with Persistence Layer
35//!
36//! The index is a **derived view** of the persistence layer. If the index becomes
37//! stale or corrupted, call `reindex()` to rebuild from the authoritative persistence store.
38//!
39//! ## Performance Characteristics
40//!
41//! - **Search complexity**: O(log n) for indexed queries
42//! - **Batch efficiency**: `get_memories_batch()` avoids N+1 query pattern
43//! - **FTS tokenization**: Whitespace + punctuation split (`SQLite`), language-aware (`PostgreSQL`)
44
45use crate::Result;
46use crate::models::{Memory, MemoryId, SearchFilter};
47
48/// Trait for index layer backends.
49///
50/// Index backends provide full-text search capabilities using BM25 or similar algorithms.
51///
52/// # Implementor Notes
53///
54/// - Methods use `&self` to enable sharing via `Arc<dyn IndexBackend>`
55/// - Use interior mutability (e.g., `Mutex<Connection>`) for mutable state
56/// - Implement `get_memories_batch()` with an optimized query (e.g., SQL `IN` clause)
57/// - Use FTS ranking scores for the `f32` score in search results
58/// - Ensure `clear()` does not affect the persistence layer
59pub trait IndexBackend: Send + Sync {
60 /// Indexes a memory for full-text search.
61 ///
62 /// Uses interior mutability for thread-safe concurrent access.
63 ///
64 /// # Errors
65 ///
66 /// Returns an error if the indexing operation fails.
67 fn index(&self, memory: &Memory) -> Result<()>;
68
69 /// Removes a memory from the index.
70 ///
71 /// Uses interior mutability for thread-safe concurrent access.
72 ///
73 /// # Errors
74 ///
75 /// Returns an error if the removal operation fails.
76 fn remove(&self, id: &MemoryId) -> Result<bool>;
77
78 /// Searches for memories matching a text query.
79 ///
80 /// Returns memory IDs with their BM25 scores, ordered by relevance.
81 ///
82 /// # Errors
83 ///
84 /// Returns an error if the search operation fails.
85 fn search(
86 &self,
87 query: &str,
88 filter: &SearchFilter,
89 limit: usize,
90 ) -> Result<Vec<(MemoryId, f32)>>;
91
92 /// Re-indexes all memories.
93 ///
94 /// Uses interior mutability for thread-safe concurrent access.
95 ///
96 /// # Errors
97 ///
98 /// Returns an error if any memory fails to index.
99 fn reindex(&self, memories: &[Memory]) -> Result<()> {
100 for memory in memories {
101 self.index(memory)?;
102 }
103 Ok(())
104 }
105
106 /// Clears the entire index.
107 ///
108 /// Uses interior mutability for thread-safe concurrent access.
109 ///
110 /// # Errors
111 ///
112 /// Returns an error if the clear operation fails.
113 fn clear(&self) -> Result<()>;
114
115 /// Lists all indexed memories, optionally filtered.
116 ///
117 /// Unlike `search`, this doesn't require a query and returns all entries.
118 ///
119 /// # Errors
120 ///
121 /// Returns an error if the operation fails.
122 fn list_all(&self, filter: &SearchFilter, limit: usize) -> Result<Vec<(MemoryId, f32)>>;
123
124 /// Retrieves a memory by ID.
125 ///
126 /// # Errors
127 ///
128 /// Returns an error if the operation fails.
129 fn get_memory(&self, id: &MemoryId) -> Result<Option<Memory>>;
130
131 /// Retrieves multiple memories by their IDs in a single batch query.
132 ///
133 /// This is more efficient than calling `get_memory` in a loop (N+1 query pattern).
134 /// Returns memories in the same order as the input IDs, with None for missing IDs.
135 ///
136 /// # Errors
137 ///
138 /// Returns an error if the operation fails.
139 fn get_memories_batch(&self, ids: &[MemoryId]) -> Result<Vec<Option<Memory>>> {
140 // Default implementation falls back to individual queries
141 ids.iter().map(|id| self.get_memory(id)).collect()
142 }
143}