subcog/services/
migration.rs

1//! Migration service.
2//!
3//! Provides functionality for migrating existing memories to use new features,
4//! primarily generating embeddings for memories that lack them.
5
6use std::sync::Arc;
7
8use crate::Result;
9use crate::embedding::Embedder;
10use crate::models::{Memory, MemoryId};
11use crate::storage::{IndexBackend, VectorBackend};
12
13/// Statistics from a migration operation.
14#[derive(Debug, Clone, Default)]
15pub struct MigrationStats {
16    /// Number of memories that were migrated.
17    pub migrated: usize,
18    /// Number of memories that were skipped.
19    pub skipped: usize,
20    /// Number of errors encountered.
21    pub errors: usize,
22    /// Total memories processed.
23    pub total: usize,
24}
25
26impl MigrationStats {
27    /// Creates a new empty migration stats instance.
28    #[must_use]
29    pub const fn new() -> Self {
30        Self {
31            migrated: 0,
32            skipped: 0,
33            errors: 0,
34            total: 0,
35        }
36    }
37}
38
39/// Options for migration.
40#[derive(Debug, Clone, Default)]
41pub struct MigrationOptions {
42    /// If true, don't actually modify anything.
43    pub dry_run: bool,
44    /// If true, re-generate embeddings even for memories that already have them.
45    pub force: bool,
46    /// Maximum number of memories to process.
47    pub limit: usize,
48}
49
50impl MigrationOptions {
51    /// Creates a new migration options instance with defaults.
52    #[must_use]
53    pub const fn new() -> Self {
54        Self {
55            dry_run: false,
56            force: false,
57            limit: 10000,
58        }
59    }
60
61    /// Sets the `dry_run` option.
62    #[must_use]
63    pub const fn with_dry_run(mut self, dry_run: bool) -> Self {
64        self.dry_run = dry_run;
65        self
66    }
67
68    /// Sets the force option.
69    #[must_use]
70    pub const fn with_force(mut self, force: bool) -> Self {
71        self.force = force;
72        self
73    }
74
75    /// Sets the limit option.
76    #[must_use]
77    pub const fn with_limit(mut self, limit: usize) -> Self {
78        self.limit = limit;
79        self
80    }
81}
82
83/// Migration service for generating embeddings for existing memories.
84pub struct MigrationService<I, E, V>
85where
86    I: IndexBackend,
87    E: Embedder,
88    V: VectorBackend,
89{
90    index: Arc<I>,
91    embedder: Arc<E>,
92    vector: Arc<V>,
93}
94
95impl<I, E, V> MigrationService<I, E, V>
96where
97    I: IndexBackend,
98    E: Embedder,
99    V: VectorBackend,
100{
101    /// Creates a new migration service.
102    ///
103    /// # Arguments
104    ///
105    /// * `index` - The index backend to read memories from
106    /// * `embedder` - The embedder to generate embeddings
107    /// * `vector` - The vector backend to store embeddings
108    pub const fn new(index: Arc<I>, embedder: Arc<E>, vector: Arc<V>) -> Self {
109        Self {
110            index,
111            embedder,
112            vector,
113        }
114    }
115
116    /// Migrates embeddings for all memories in the index.
117    ///
118    /// # Arguments
119    ///
120    /// * `options` - Migration options
121    ///
122    /// # Returns
123    ///
124    /// Statistics about the migration.
125    ///
126    /// # Errors
127    ///
128    /// Returns an error if the migration fails.
129    pub fn migrate_embeddings(&self, options: &MigrationOptions) -> Result<MigrationStats> {
130        let filter = crate::SearchFilter::new();
131        let memories = self.index.list_all(&filter, options.limit)?;
132
133        let mut stats = MigrationStats {
134            total: memories.len(),
135            ..Default::default()
136        };
137
138        for (memory_id, _score) in memories {
139            let result = self.migrate_single(&memory_id, options);
140            match result {
141                Ok(true) => stats.migrated += 1,
142                Ok(false) => stats.skipped += 1,
143                Err(e) => {
144                    tracing::warn!("Failed to migrate memory {}: {e}", memory_id.as_str());
145                    stats.errors += 1;
146                },
147            }
148        }
149
150        Ok(stats)
151    }
152
153    /// Migrates a single memory.
154    ///
155    /// # Arguments
156    ///
157    /// * `memory_id` - The ID of the memory to migrate
158    /// * `options` - Migration options
159    ///
160    /// # Returns
161    ///
162    /// `true` if the memory was migrated, `false` if it was skipped.
163    ///
164    /// # Errors
165    ///
166    /// Returns an error if the migration fails.
167    pub fn migrate_single(&self, memory_id: &MemoryId, options: &MigrationOptions) -> Result<bool> {
168        // Get the full memory
169        let memory = match self.index.get_memory(memory_id)? {
170            Some(m) => m,
171            None => return Ok(false),
172        };
173
174        // Check if already has embedding (unless force)
175        if !options.force && memory.embedding.is_some() {
176            return Ok(false);
177        }
178
179        if options.dry_run {
180            tracing::debug!("Would migrate: {}", memory_id.as_str());
181            return Ok(true);
182        }
183
184        // Generate embedding
185        let embedding = self.embedder.embed(&memory.content)?;
186
187        // Store in vector backend
188        self.vector.upsert(memory_id, &embedding)?;
189
190        Ok(true)
191    }
192
193    /// Checks if a memory needs migration.
194    ///
195    /// # Arguments
196    ///
197    /// * `memory` - The memory to check
198    /// * `force` - Whether to force re-migration
199    ///
200    /// # Returns
201    ///
202    /// `true` if the memory needs migration.
203    #[must_use]
204    pub const fn needs_migration(memory: &Memory, force: bool) -> bool {
205        force || memory.embedding.is_none()
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212
213    #[test]
214    fn test_migration_stats_new() {
215        let stats = MigrationStats::new();
216        assert_eq!(stats.migrated, 0);
217        assert_eq!(stats.skipped, 0);
218        assert_eq!(stats.errors, 0);
219        assert_eq!(stats.total, 0);
220    }
221
222    #[test]
223    fn test_migration_stats_default() {
224        let stats = MigrationStats::default();
225        assert_eq!(stats.migrated, 0);
226        assert_eq!(stats.skipped, 0);
227        assert_eq!(stats.errors, 0);
228        assert_eq!(stats.total, 0);
229    }
230
231    #[test]
232    fn test_migration_options_new() {
233        let options = MigrationOptions::new();
234        assert!(!options.dry_run);
235        assert!(!options.force);
236        assert_eq!(options.limit, 10000);
237    }
238
239    #[test]
240    fn test_migration_options_default() {
241        let options = MigrationOptions::default();
242        assert!(!options.dry_run);
243        assert!(!options.force);
244        assert_eq!(options.limit, 0);
245    }
246
247    #[test]
248    fn test_migration_options_with_dry_run() {
249        let options = MigrationOptions::new().with_dry_run(true);
250        assert!(options.dry_run);
251        assert!(!options.force);
252    }
253
254    #[test]
255    fn test_migration_options_with_force() {
256        let options = MigrationOptions::new().with_force(true);
257        assert!(!options.dry_run);
258        assert!(options.force);
259    }
260
261    #[test]
262    fn test_migration_options_with_limit() {
263        let options = MigrationOptions::new().with_limit(100);
264        assert_eq!(options.limit, 100);
265    }
266
267    #[test]
268    fn test_migration_options_chaining() {
269        let options = MigrationOptions::new()
270            .with_dry_run(true)
271            .with_force(true)
272            .with_limit(50);
273        assert!(options.dry_run);
274        assert!(options.force);
275        assert_eq!(options.limit, 50);
276    }
277
278    fn create_test_memory(id: &str, with_embedding: bool) -> Memory {
279        Memory {
280            id: MemoryId::new(id),
281            namespace: crate::Namespace::Decisions,
282            content: "test content".to_string(),
283            domain: crate::Domain::new(),
284            project_id: None,
285            branch: None,
286            file_path: None,
287            status: crate::MemoryStatus::Active,
288            created_at: 0,
289            updated_at: 0,
290            tombstoned_at: None,
291            expires_at: None,
292            embedding: if with_embedding {
293                Some(vec![0.1, 0.2, 0.3])
294            } else {
295                None
296            },
297            tags: vec![],
298            #[cfg(feature = "group-scope")]
299            group_id: None,
300            source: None,
301            is_summary: false,
302            source_memory_ids: None,
303            consolidation_timestamp: None,
304        }
305    }
306
307    #[test]
308    fn test_needs_migration_no_embedding() {
309        let memory = create_test_memory("test", false);
310        assert!(MigrationService::<
311            crate::storage::index::SqliteBackend,
312            crate::embedding::FastEmbedEmbedder,
313            crate::storage::vector::UsearchBackend,
314        >::needs_migration(&memory, false));
315    }
316
317    #[test]
318    fn test_needs_migration_has_embedding() {
319        let memory = create_test_memory("test", true);
320        assert!(!MigrationService::<
321            crate::storage::index::SqliteBackend,
322            crate::embedding::FastEmbedEmbedder,
323            crate::storage::vector::UsearchBackend,
324        >::needs_migration(&memory, false));
325    }
326
327    #[test]
328    fn test_needs_migration_force() {
329        let memory = create_test_memory("test", true);
330        assert!(MigrationService::<
331            crate::storage::index::SqliteBackend,
332            crate::embedding::FastEmbedEmbedder,
333            crate::storage::vector::UsearchBackend,
334        >::needs_migration(&memory, true));
335    }
336}