subcog/storage/traits/vector.rs
1//! Vector backend trait (DOC-H4).
2//!
3//! Provides the abstraction layer for vector similarity search backends.
4//! Implementations use HNSW or similar algorithms for approximate nearest neighbor search.
5//!
6//! # Available Implementations
7//!
8//! | Backend | Use Case | Configuration |
9//! |---------|----------|---------------|
10//! | `UsearchBackend` | Local HNSW index | Default, no external deps |
11//! | `PgVectorBackend` | PostgreSQL with pgvector | Requires PostgreSQL + pgvector extension |
12//! | `RedisVectorBackend` | Redis Vector Search | Requires Redis Stack |
13//!
14//! # Usage Example
15//!
16//! ```rust,ignore
17//! use subcog::storage::vector::UsearchBackend;
18//! use subcog::storage::traits::{VectorBackend, VectorFilter};
19//! use subcog::models::MemoryId;
20//!
21//! // Create a 384-dimensional vector backend (MiniLM-L6 embeddings)
22//! let mut backend = UsearchBackend::new(384)?;
23//!
24//! // Insert embeddings
25//! let embedding: Vec<f32> = generate_embedding("Use PostgreSQL for storage");
26//! backend.upsert(&MemoryId::new("mem-001"), &embedding)?;
27//!
28//! // Search for similar vectors
29//! let query_embedding: Vec<f32> = generate_embedding("database storage choice");
30//! let results = backend.search(&query_embedding, &VectorFilter::new(), 10)?;
31//!
32//! for (id, similarity) in results {
33//! println!("{}: {:.2}% similar", id.as_str(), similarity * 100.0);
34//! }
35//! ```
36//!
37//! # Hybrid Search
38//!
39//! Vector search is typically combined with BM25 text search using Reciprocal Rank Fusion:
40//!
41//! ```rust,ignore
42//! use subcog::services::RecallService;
43//!
44//! // RecallService automatically combines vector + BM25 results
45//! let service = RecallService::new(config)?;
46//! let results = service.search("database decisions", SearchFilter::new(), 10)?;
47//! ```
48
49use crate::Result;
50use crate::models::{Domain, MemoryId, Namespace, SearchFilter};
51
52/// Filter criteria specific to vector similarity search.
53///
54/// This type provides a subset of [`SearchFilter`] fields that are applicable
55/// to vector search operations, making the API more type-safe and explicit.
56///
57/// # Fields
58///
59/// | Field | Description |
60/// |-------|-------------|
61/// | `namespaces` | Filter by memory namespaces |
62/// | `domains` | Filter by memory domains |
63/// | `min_score` | Minimum cosine similarity threshold (0.0 to 1.0) |
64///
65/// # Example
66///
67/// ```rust
68/// use subcog::storage::traits::VectorFilter;
69/// use subcog::models::Namespace;
70///
71/// let filter = VectorFilter::new()
72/// .with_namespace(Namespace::Decisions)
73/// .with_min_score(0.7);
74/// ```
75#[derive(Debug, Clone, Default)]
76pub struct VectorFilter {
77 /// Filter by namespaces.
78 pub namespaces: Vec<Namespace>,
79 /// Filter by domains.
80 pub domains: Vec<Domain>,
81 /// Minimum similarity score (0.0 to 1.0).
82 pub min_score: Option<f32>,
83}
84
85impl VectorFilter {
86 /// Creates an empty filter (matches all).
87 #[must_use]
88 pub const fn new() -> Self {
89 Self {
90 namespaces: Vec::new(),
91 domains: Vec::new(),
92 min_score: None,
93 }
94 }
95
96 /// Adds a namespace filter.
97 #[must_use]
98 pub fn with_namespace(mut self, namespace: Namespace) -> Self {
99 self.namespaces.push(namespace);
100 self
101 }
102
103 /// Adds a domain filter.
104 #[must_use]
105 pub fn with_domain(mut self, domain: Domain) -> Self {
106 self.domains.push(domain);
107 self
108 }
109
110 /// Sets the minimum score threshold.
111 #[must_use]
112 pub const fn with_min_score(mut self, score: f32) -> Self {
113 self.min_score = Some(score);
114 self
115 }
116
117 /// Returns true if the filter is empty (matches all).
118 #[must_use]
119 pub const fn is_empty(&self) -> bool {
120 self.namespaces.is_empty() && self.domains.is_empty() && self.min_score.is_none()
121 }
122}
123
124impl From<&SearchFilter> for VectorFilter {
125 /// Converts a [`SearchFilter`] to a [`VectorFilter`], extracting only
126 /// the fields applicable to vector search.
127 fn from(filter: &SearchFilter) -> Self {
128 Self {
129 namespaces: filter.namespaces.clone(),
130 domains: filter.domains.clone(),
131 min_score: filter.min_score,
132 }
133 }
134}
135
136impl From<SearchFilter> for VectorFilter {
137 fn from(filter: SearchFilter) -> Self {
138 Self::from(&filter)
139 }
140}
141
142/// Trait for vector layer backends.
143///
144/// Vector backends provide similarity search using embedding vectors.
145/// Implementations should be thread-safe (`Send + Sync`).
146///
147/// # Implementor Notes
148///
149/// - Methods use `&self` to enable sharing via `Arc<dyn VectorBackend>`
150/// - Use interior mutability (e.g., `Mutex<HashMap<K,V>>`) for mutable state
151///
152/// # Dimensionality
153///
154/// All embeddings must match the backend's [`dimensions()`](VectorBackend::dimensions).
155/// The default `FastEmbed` model (`all-MiniLM-L6-v2`) produces 384-dimensional vectors.
156pub trait VectorBackend: Send + Sync {
157 /// The dimensionality of embedding vectors.
158 fn dimensions(&self) -> usize;
159
160 /// Inserts or updates an embedding for a memory.
161 ///
162 /// Uses interior mutability for thread-safe concurrent access.
163 ///
164 /// # Errors
165 ///
166 /// Returns an error if the upsert operation fails.
167 fn upsert(&self, id: &MemoryId, embedding: &[f32]) -> Result<()>;
168
169 /// Removes an embedding by memory ID.
170 ///
171 /// Uses interior mutability for thread-safe concurrent access.
172 ///
173 /// # Errors
174 ///
175 /// Returns an error if the removal operation fails.
176 fn remove(&self, id: &MemoryId) -> Result<bool>;
177
178 /// Searches for similar embeddings.
179 ///
180 /// Returns memory IDs with their cosine similarity scores (0.0 to 1.0),
181 /// ordered by descending similarity.
182 ///
183 /// # Arguments
184 ///
185 /// * `query_embedding` - The query vector to find similar embeddings for
186 /// * `filter` - A [`VectorFilter`] for namespace/domain filtering and min score threshold
187 /// * `limit` - Maximum number of results to return
188 ///
189 /// # Errors
190 ///
191 /// Returns an error if the search operation fails.
192 fn search(
193 &self,
194 query_embedding: &[f32],
195 filter: &VectorFilter,
196 limit: usize,
197 ) -> Result<Vec<(MemoryId, f32)>>;
198
199 /// Returns the total count of indexed embeddings.
200 ///
201 /// # Errors
202 ///
203 /// Returns an error if the count operation fails.
204 fn count(&self) -> Result<usize>;
205
206 /// Clears all embeddings.
207 ///
208 /// Uses interior mutability for thread-safe concurrent access.
209 ///
210 /// # Errors
211 ///
212 /// Returns an error if the clear operation fails.
213 fn clear(&self) -> Result<()>;
214}