Skip to main content

subcog/embedding/
mod.rs

1//! Embedding generation.
2//!
3//! Provides embedding generation using fastembed or fallback to BM25-only.
4
5// Allow cast precision loss for hash-based embedding calculations.
6#![allow(clippy::cast_precision_loss)]
7// Allow cast possible truncation for hash index calculations on 32-bit platforms.
8#![allow(clippy::cast_possible_truncation)]
9
10mod bulkhead;
11mod fallback;
12mod fastembed;
13
14pub use bulkhead::{BulkheadEmbedder, EmbeddingBulkheadConfig};
15pub use fallback::FallbackEmbedder;
16pub use fastembed::{FastEmbedEmbedder, cosine_similarity};
17
18/// Default embedding dimensions for the all-MiniLM-L6-v2 model.
19///
20/// This is the authoritative source for embedding dimensions across the codebase (QUAL-H2).
21/// All vector backends should use this constant for consistency.
22pub const DEFAULT_DIMENSIONS: usize = 384;
23
24use crate::Result;
25
26/// Trait for embedding generators.
27pub trait Embedder: Send + Sync {
28    /// Returns the embedding dimensions.
29    fn dimensions(&self) -> usize;
30
31    /// Generates an embedding for the given text.
32    ///
33    /// # Errors
34    ///
35    /// Returns an error if embedding generation fails.
36    fn embed(&self, text: &str) -> Result<Vec<f32>>;
37
38    /// Generates embeddings for multiple texts.
39    ///
40    /// # Errors
41    ///
42    /// Returns an error if embedding generation fails.
43    fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
44        texts.iter().map(|t| self.embed(t)).collect()
45    }
46}