Skip to main content

subcog/storage/traits/
vector.rs

1//! Vector backend trait (DOC-H4).
2//!
3//! Provides the abstraction layer for vector similarity search backends.
4//! Implementations use HNSW or similar algorithms for approximate nearest neighbor search.
5//!
6//! # Available Implementations
7//!
8//! | Backend | Use Case | Configuration |
9//! |---------|----------|---------------|
10//! | `UsearchBackend` | Local HNSW index | Default, no external deps |
11//! | `PgVectorBackend` | PostgreSQL with pgvector | Requires PostgreSQL + pgvector extension |
12//! | `RedisVectorBackend` | Redis Vector Search | Requires Redis Stack |
13//!
14//! # Usage Example
15//!
16//! ```rust,ignore
17//! use subcog::storage::vector::UsearchBackend;
18//! use subcog::storage::traits::{VectorBackend, VectorFilter};
19//! use subcog::models::MemoryId;
20//!
21//! // Create a 384-dimensional vector backend (MiniLM-L6 embeddings)
22//! let mut backend = UsearchBackend::new(384)?;
23//!
24//! // Insert embeddings
25//! let embedding: Vec<f32> = generate_embedding("Use PostgreSQL for storage");
26//! backend.upsert(&MemoryId::new("mem-001"), &embedding)?;
27//!
28//! // Search for similar vectors
29//! let query_embedding: Vec<f32> = generate_embedding("database storage choice");
30//! let results = backend.search(&query_embedding, &VectorFilter::new(), 10)?;
31//!
32//! for (id, similarity) in results {
33//!     println!("{}: {:.2}% similar", id.as_str(), similarity * 100.0);
34//! }
35//! ```
36//!
37//! # Hybrid Search
38//!
39//! Vector search is typically combined with BM25 text search using Reciprocal Rank Fusion:
40//!
41//! ```rust,ignore
42//! use subcog::services::RecallService;
43//!
44//! // RecallService automatically combines vector + BM25 results
45//! let service = RecallService::new(config)?;
46//! let results = service.search("database decisions", SearchFilter::new(), 10)?;
47//! ```
48
49use crate::Result;
50use crate::models::{Domain, MemoryId, Namespace, SearchFilter};
51
52/// Filter criteria specific to vector similarity search.
53///
54/// This type provides a subset of [`SearchFilter`] fields that are applicable
55/// to vector search operations, making the API more type-safe and explicit.
56///
57/// # Fields
58///
59/// | Field | Description |
60/// |-------|-------------|
61/// | `namespaces` | Filter by memory namespaces |
62/// | `domains` | Filter by memory domains |
63/// | `min_score` | Minimum cosine similarity threshold (0.0 to 1.0) |
64///
65/// # Example
66///
67/// ```rust
68/// use subcog::storage::traits::VectorFilter;
69/// use subcog::models::Namespace;
70///
71/// let filter = VectorFilter::new()
72///     .with_namespace(Namespace::Decisions)
73///     .with_min_score(0.7);
74/// ```
75#[derive(Debug, Clone, Default)]
76pub struct VectorFilter {
77    /// Filter by namespaces.
78    pub namespaces: Vec<Namespace>,
79    /// Filter by domains.
80    pub domains: Vec<Domain>,
81    /// Minimum similarity score (0.0 to 1.0).
82    pub min_score: Option<f32>,
83}
84
85impl VectorFilter {
86    /// Creates an empty filter (matches all).
87    #[must_use]
88    pub const fn new() -> Self {
89        Self {
90            namespaces: Vec::new(),
91            domains: Vec::new(),
92            min_score: None,
93        }
94    }
95
96    /// Adds a namespace filter.
97    #[must_use]
98    pub fn with_namespace(mut self, namespace: Namespace) -> Self {
99        self.namespaces.push(namespace);
100        self
101    }
102
103    /// Adds a domain filter.
104    #[must_use]
105    pub fn with_domain(mut self, domain: Domain) -> Self {
106        self.domains.push(domain);
107        self
108    }
109
110    /// Sets the minimum score threshold.
111    #[must_use]
112    pub const fn with_min_score(mut self, score: f32) -> Self {
113        self.min_score = Some(score);
114        self
115    }
116
117    /// Returns true if the filter is empty (matches all).
118    #[must_use]
119    pub const fn is_empty(&self) -> bool {
120        self.namespaces.is_empty() && self.domains.is_empty() && self.min_score.is_none()
121    }
122}
123
124impl From<&SearchFilter> for VectorFilter {
125    /// Converts a [`SearchFilter`] to a [`VectorFilter`], extracting only
126    /// the fields applicable to vector search.
127    fn from(filter: &SearchFilter) -> Self {
128        Self {
129            namespaces: filter.namespaces.clone(),
130            domains: filter.domains.clone(),
131            min_score: filter.min_score,
132        }
133    }
134}
135
136impl From<SearchFilter> for VectorFilter {
137    fn from(filter: SearchFilter) -> Self {
138        Self::from(&filter)
139    }
140}
141
142/// Trait for vector layer backends.
143///
144/// Vector backends provide similarity search using embedding vectors.
145/// Implementations should be thread-safe (`Send + Sync`).
146///
147/// # Implementor Notes
148///
149/// - Methods use `&self` to enable sharing via `Arc<dyn VectorBackend>`
150/// - Use interior mutability (e.g., `Mutex<HashMap<K,V>>`) for mutable state
151///
152/// # Dimensionality
153///
154/// All embeddings must match the backend's [`dimensions()`](VectorBackend::dimensions).
155/// The default `FastEmbed` model (`all-MiniLM-L6-v2`) produces 384-dimensional vectors.
156pub trait VectorBackend: Send + Sync {
157    /// The dimensionality of embedding vectors.
158    fn dimensions(&self) -> usize;
159
160    /// Inserts or updates an embedding for a memory.
161    ///
162    /// Uses interior mutability for thread-safe concurrent access.
163    ///
164    /// # Errors
165    ///
166    /// Returns an error if the upsert operation fails.
167    fn upsert(&self, id: &MemoryId, embedding: &[f32]) -> Result<()>;
168
169    /// Removes an embedding by memory ID.
170    ///
171    /// Uses interior mutability for thread-safe concurrent access.
172    ///
173    /// # Errors
174    ///
175    /// Returns an error if the removal operation fails.
176    fn remove(&self, id: &MemoryId) -> Result<bool>;
177
178    /// Searches for similar embeddings.
179    ///
180    /// Returns memory IDs with their cosine similarity scores (0.0 to 1.0),
181    /// ordered by descending similarity.
182    ///
183    /// # Arguments
184    ///
185    /// * `query_embedding` - The query vector to find similar embeddings for
186    /// * `filter` - A [`VectorFilter`] for namespace/domain filtering and min score threshold
187    /// * `limit` - Maximum number of results to return
188    ///
189    /// # Errors
190    ///
191    /// Returns an error if the search operation fails.
192    fn search(
193        &self,
194        query_embedding: &[f32],
195        filter: &VectorFilter,
196        limit: usize,
197    ) -> Result<Vec<(MemoryId, f32)>>;
198
199    /// Returns the total count of indexed embeddings.
200    ///
201    /// # Errors
202    ///
203    /// Returns an error if the count operation fails.
204    fn count(&self) -> Result<usize>;
205
206    /// Clears all embeddings.
207    ///
208    /// Uses interior mutability for thread-safe concurrent access.
209    ///
210    /// # Errors
211    ///
212    /// Returns an error if the clear operation fails.
213    fn clear(&self) -> Result<()>;
214}