1use crate::context::GitContext;
6use crate::storage::traits::IndexBackend;
7use crate::{Error, Result};
8use chrono::{TimeZone, Utc};
9use git2::Repository;
10use std::collections::HashSet;
11use std::path::Path;
12use std::sync::Arc;
13use std::time::{Duration, Instant};
14use tracing::{debug, info, info_span, instrument, warn};
15
16#[inline]
18fn duration_to_millis(duration: Duration) -> u64 {
19 u64::try_from(duration.as_millis()).unwrap_or(u64::MAX)
20}
21
22#[inline]
27fn usize_to_f64(value: usize) -> f64 {
28 let capped = u32::try_from(value).unwrap_or(u32::MAX);
29 f64::from(capped)
30}
31
32#[inline]
37fn u64_to_f64(value: u64) -> f64 {
38 let capped = u32::try_from(value).unwrap_or(u32::MAX);
39 f64::from(capped)
40}
41
42#[derive(Debug, Clone, Default)]
47pub struct GcResult {
48 pub branches_checked: usize,
50
51 pub stale_branches: Vec<String>,
53
54 pub memories_tombstoned: usize,
56
57 pub dry_run: bool,
59
60 pub duration_ms: u64,
62}
63
64impl GcResult {
65 #[must_use]
67 pub const fn has_stale_branches(&self) -> bool {
68 !self.stale_branches.is_empty()
69 }
70
71 #[must_use]
73 pub fn summary(&self) -> String {
74 let action = if self.dry_run {
75 "would tombstone"
76 } else {
77 "tombstoned"
78 };
79
80 if self.stale_branches.is_empty() {
81 format!(
82 "No stale branches found ({} branches checked in {}ms)",
83 self.branches_checked, self.duration_ms
84 )
85 } else {
86 format!(
87 "Found {} stale branches, {} {} memories ({}ms)",
88 self.stale_branches.len(),
89 action,
90 self.memories_tombstoned,
91 self.duration_ms
92 )
93 }
94 }
95}
96
97pub struct BranchGarbageCollector<I: IndexBackend> {
125 index: Arc<I>,
127
128 repo_path: Option<std::path::PathBuf>,
131}
132
133impl<I: IndexBackend> BranchGarbageCollector<I> {
134 #[must_use]
151 pub fn new(index: Arc<I>) -> Self {
152 let _ = Arc::strong_count(&index);
154 Self {
155 index,
156 repo_path: None,
157 }
158 }
159
160 #[must_use]
182 pub fn with_repo_path(index: Arc<I>, repo_path: &Path) -> Self {
183 Self {
184 index,
185 repo_path: Some(repo_path.to_path_buf()),
186 }
187 }
188
189 #[instrument(
229 name = "subcog.gc.branches",
230 skip(self),
231 fields(
232 request_id = tracing::field::Empty,
233 component = "gc",
234 operation = "stale_branches",
235 project_id = %project_id,
236 dry_run = dry_run
237 )
238 )]
239 pub fn gc_stale_branches(&self, project_id: &str, dry_run: bool) -> Result<GcResult> {
240 let start = Instant::now();
241 if let Some(request_id) = crate::observability::current_request_id() {
242 tracing::Span::current().record("request_id", request_id.as_str());
243 }
244
245 let repo = {
247 let _span = info_span!("subcog.gc.branches.discover_repo").entered();
248 self.discover_repository()?
249 };
250
251 let current_branches = {
253 let _span = info_span!("subcog.gc.branches.list_repo").entered();
254 Self::get_current_branches(&repo)?
255 };
256 debug!(
257 branch_count = current_branches.len(),
258 "Discovered current branches"
259 );
260
261 let indexed_branches = {
263 let _span = info_span!("subcog.gc.branches.list_index").entered();
264 self.get_indexed_branches(project_id)?
265 };
266 let branches_checked = indexed_branches.len();
267 debug!(
268 branch_count = branches_checked,
269 "Found indexed branches for project"
270 );
271
272 let stale_branches: Vec<String> = indexed_branches
274 .into_iter()
275 .filter(|branch| !current_branches.contains(branch))
276 .collect();
277
278 if stale_branches.is_empty() {
279 info!("No stale branches found");
280 return Ok(GcResult {
281 branches_checked,
282 stale_branches: Vec::new(),
283 memories_tombstoned: 0,
284 dry_run,
285 duration_ms: duration_to_millis(start.elapsed()),
286 });
287 }
288
289 info!(
290 stale_count = stale_branches.len(),
291 branches = ?stale_branches,
292 "Found stale branches"
293 );
294
295 let memories_tombstoned = if dry_run {
297 self.count_memories_for_branches(project_id, &stale_branches)?
298 } else {
299 self.tombstone_memories_for_branches(project_id, &stale_branches)
300 };
301
302 let duration_ms = duration_to_millis(start.elapsed());
303
304 metrics::counter!(
306 "gc_stale_branches_total",
307 "dry_run" => dry_run.to_string()
308 )
309 .increment(1);
310 metrics::gauge!("gc_stale_branch_count").set(usize_to_f64(stale_branches.len()));
311 metrics::gauge!("gc_memories_tombstoned").set(usize_to_f64(memories_tombstoned));
312 metrics::histogram!("gc_duration_ms").record(u64_to_f64(duration_ms));
313 metrics::histogram!(
314 "memory_lifecycle_duration_ms",
315 "component" => "gc",
316 "operation" => "stale_branches"
317 )
318 .record(u64_to_f64(duration_ms));
319
320 Ok(GcResult {
321 branches_checked,
322 stale_branches,
323 memories_tombstoned,
324 dry_run,
325 duration_ms,
326 })
327 }
328
329 fn discover_repository(&self) -> Result<Repository> {
331 let path = self.repo_path.as_deref().map_or_else(
332 || {
333 std::env::current_dir().map_err(|e| Error::OperationFailed {
334 operation: "get_cwd".to_string(),
335 cause: e.to_string(),
336 })
337 },
338 |p| Ok(p.to_path_buf()),
339 )?;
340
341 Repository::discover(&path).map_err(|e| Error::OperationFailed {
342 operation: "discover_repository".to_string(),
343 cause: format!(
344 "Failed to discover git repository at {}: {}",
345 path.display(),
346 e
347 ),
348 })
349 }
350
351 fn get_current_branches(repo: &Repository) -> Result<HashSet<String>> {
353 let mut branches = HashSet::new();
354
355 let local_branches =
357 repo.branches(Some(git2::BranchType::Local))
358 .map_err(|e| Error::OperationFailed {
359 operation: "list_branches".to_string(),
360 cause: e.to_string(),
361 })?;
362
363 for branch_result in local_branches {
364 let (branch, _) = branch_result.map_err(|e| Error::OperationFailed {
365 operation: "get_branch".to_string(),
366 cause: e.to_string(),
367 })?;
368
369 if let Ok(Some(name)) = branch.name() {
370 branches.insert(name.to_string());
371 }
372 }
373
374 let remote_branches =
377 repo.branches(Some(git2::BranchType::Remote))
378 .map_err(|e| Error::OperationFailed {
379 operation: "list_remote_branches".to_string(),
380 cause: e.to_string(),
381 })?;
382
383 for branch_result in remote_branches {
384 let (branch, _) = branch_result.map_err(|e| Error::OperationFailed {
385 operation: "get_remote_branch".to_string(),
386 cause: e.to_string(),
387 })?;
388
389 let branch_name = branch
391 .name()
392 .ok()
393 .flatten()
394 .and_then(|name| name.split('/').nth(1))
395 .map(String::from);
396
397 if let Some(name) = branch_name {
398 branches.insert(name);
399 }
400 }
401
402 Ok(branches)
403 }
404
405 fn get_indexed_branches(&self, project_id: &str) -> Result<Vec<String>> {
409 use crate::models::SearchFilter;
410
411 let filter = SearchFilter::new()
412 .with_project_id(project_id)
413 .with_include_tombstoned(false);
414
415 let results = self.index.list_all(&filter, 10000)?;
416
417 let ids: Vec<_> = results.into_iter().map(|(id, _)| id).collect();
419 let memories = self.index.get_memories_batch(&ids)?;
420
421 let branches: HashSet<String> = memories
422 .into_iter()
423 .flatten()
424 .filter_map(|memory| memory.branch)
425 .collect();
426
427 Ok(branches.into_iter().collect())
428 }
429
430 fn count_memories_for_branches(&self, project_id: &str, branches: &[String]) -> Result<usize> {
432 use crate::models::SearchFilter;
433
434 let mut total = 0;
435 for branch in branches {
436 let filter = SearchFilter::new()
437 .with_project_id(project_id)
438 .with_branch(branch)
439 .with_include_tombstoned(false);
440
441 let results = self.index.list_all(&filter, 10000)?;
442 total += results.len();
443 }
444
445 Ok(total)
446 }
447
448 fn tombstone_memories_for_branches(&self, project_id: &str, branches: &[String]) -> usize {
453 let now = crate::current_timestamp();
457
458 let total: usize = branches
459 .iter()
460 .map(|branch| self.tombstone_branch_memories(project_id, branch, now))
461 .sum();
462
463 info!(count = total, "Tombstoned memories from stale branches");
464 total
465 }
466
467 fn tombstone_branch_memories(&self, project_id: &str, branch: &str, now: u64) -> usize {
471 use crate::models::SearchFilter;
472
473 let filter = SearchFilter::new()
474 .with_project_id(project_id)
475 .with_branch(branch)
476 .with_include_tombstoned(false);
477
478 let results = self.index.list_all(&filter, 10000).unwrap_or_default();
479
480 let ids: Vec<_> = results.iter().map(|(id, _)| id.clone()).collect();
482 let memories = self.index.get_memories_batch(&ids).unwrap_or_default();
483
484 results
485 .into_iter()
486 .zip(memories)
487 .filter_map(|((id, _), mem_opt)| mem_opt.map(|m| (id, m)))
488 .filter(|(id, memory)| self.try_tombstone_memory(id, memory.clone(), now))
489 .count()
490 }
491
492 fn try_tombstone_memory(
494 &self,
495 id: &crate::models::MemoryId,
496 mut memory: crate::models::Memory,
497 now: u64,
498 ) -> bool {
499 let now_i64 = i64::try_from(now).unwrap_or(i64::MAX);
500 let now_dt = Utc
501 .timestamp_opt(now_i64, 0)
502 .single()
503 .unwrap_or_else(Utc::now);
504 memory.tombstoned_at = Some(now_dt);
505 match self.index.index(&memory) {
506 Ok(()) => true,
507 Err(e) => {
508 warn!(memory_id = %id.as_str(), error = %e, "Failed to tombstone memory");
509 false
510 },
511 }
512 }
513}
514
515#[must_use]
528pub fn branch_exists(branch: &str) -> bool {
529 let ctx = GitContext::from_cwd();
530
531 if !ctx.is_git_repo() {
533 return true;
534 }
535
536 if ctx
538 .branch
539 .as_deref()
540 .is_some_and(|current| current == branch)
541 {
542 return true;
543 }
544
545 let Ok(cwd) = std::env::current_dir() else {
547 return true;
548 };
549
550 let Ok(repo) = Repository::discover(&cwd) else {
551 return true;
552 };
553
554 let in_local = repo
556 .branches(Some(git2::BranchType::Local))
557 .ok()
558 .is_some_and(|branches| {
559 branches
560 .flatten()
561 .filter_map(|(b, _)| b.name().ok().flatten().map(String::from))
562 .any(|name| name == branch)
563 });
564
565 if in_local {
566 return true;
567 }
568
569 repo.branches(Some(git2::BranchType::Remote))
572 .ok()
573 .is_some_and(|branches| {
574 branches
575 .flatten()
576 .filter_map(|(b, _)| b.name().ok().flatten().map(String::from))
577 .filter_map(|name| name.split_once('/').map(|(_, branch)| branch.to_string()))
578 .any(|name| name == branch)
579 })
580}
581
582#[cfg(test)]
583mod tests {
584 use super::*;
585 use crate::models::{Domain, Memory, MemoryId, MemoryStatus, Namespace};
586 use crate::storage::index::SqliteBackend;
587 use git2::Signature;
588 use tempfile::TempDir;
589
590 fn create_test_memory(id: &str, project_id: &str, branch: &str) -> Memory {
591 Memory {
592 id: MemoryId::new(id),
593 content: format!("Test memory for {branch}"),
594 namespace: Namespace::Decisions,
595 domain: Domain::new(),
596 project_id: Some(project_id.to_string()),
597 branch: Some(branch.to_string()),
598 file_path: None,
599 status: MemoryStatus::Active,
600 created_at: 1_234_567_890,
601 updated_at: 1_234_567_890,
602 tombstoned_at: None,
603 expires_at: None,
604 embedding: None,
605 tags: vec!["test".to_string()],
606 #[cfg(feature = "group-scope")]
607 group_id: None,
608 source: None,
609 is_summary: false,
610 source_memory_ids: None,
611 consolidation_timestamp: None,
612 }
613 }
614
615 fn create_test_repo() -> (TempDir, Repository) {
616 let dir = TempDir::new().expect("Failed to create temp dir");
617 let repo = Repository::init(dir.path()).expect("Failed to init repo");
618
619 {
621 let sig = Signature::now("test", "test@test.com").expect("Failed to create signature");
622 let tree_id = repo
623 .index()
624 .expect("Failed to get index")
625 .write_tree()
626 .expect("Failed to write tree");
627 let tree = repo.find_tree(tree_id).expect("Failed to find tree");
628 repo.commit(Some("HEAD"), &sig, &sig, "Initial commit", &tree, &[])
629 .expect("Failed to create commit");
630 }
631
632 (dir, repo)
633 }
634
635 #[test]
636 fn test_gc_result_summary_no_stale() {
637 let result = GcResult {
638 branches_checked: 5,
639 stale_branches: Vec::new(),
640 memories_tombstoned: 0,
641 dry_run: false,
642 duration_ms: 100,
643 };
644
645 assert!(!result.has_stale_branches());
646 assert!(result.summary().contains("No stale branches"));
647 assert!(result.summary().contains("5 branches checked"));
648 }
649
650 #[test]
651 fn test_gc_result_summary_with_stale_dry_run() {
652 let result = GcResult {
653 branches_checked: 5,
654 stale_branches: vec!["old-feature".to_string()],
655 memories_tombstoned: 3,
656 dry_run: true,
657 duration_ms: 150,
658 };
659
660 assert!(result.has_stale_branches());
661 assert!(result.summary().contains("would tombstone"));
662 assert!(result.summary().contains("1 stale branches"));
663 assert!(result.summary().contains('3'));
664 }
665
666 #[test]
667 fn test_gc_result_summary_with_stale() {
668 let result = GcResult {
669 branches_checked: 5,
670 stale_branches: vec!["old-feature".to_string(), "deleted-branch".to_string()],
671 memories_tombstoned: 7,
672 dry_run: false,
673 duration_ms: 200,
674 };
675
676 assert!(result.has_stale_branches());
677 assert!(result.summary().contains("tombstoned"));
678 assert!(!result.summary().contains("would tombstone"));
679 assert!(result.summary().contains("2 stale branches"));
680 assert!(result.summary().contains('7'));
681 }
682
683 #[test]
684 fn test_get_current_branches() {
685 let (dir, repo) = create_test_repo();
686
687 let head = repo.head().expect("Failed to get HEAD");
689 let commit = repo
690 .find_commit(head.target().expect("Failed to get target"))
691 .expect("Failed to find commit");
692
693 repo.branch("feature-a", &commit, false)
694 .expect("Failed to create branch");
695 repo.branch("feature-b", &commit, false)
696 .expect("Failed to create branch");
697
698 let backend = Arc::new(SqliteBackend::in_memory().expect("Failed to create backend"));
699 let _gc = BranchGarbageCollector::with_repo_path(backend, dir.path());
700
701 let branches = BranchGarbageCollector::<SqliteBackend>::get_current_branches(&repo)
702 .expect("Failed to get branches");
703
704 assert!(branches.len() >= 2);
706 assert!(branches.contains("feature-a"));
707 assert!(branches.contains("feature-b"));
708 }
709
710 #[test]
711 fn test_gc_with_no_stale_branches() {
712 let (dir, repo) = create_test_repo();
713 let project_id = "github.com/test/repo";
714
715 let head = repo.head().expect("Failed to get HEAD");
717 let commit = repo
718 .find_commit(head.target().expect("Failed to get target"))
719 .expect("Failed to find commit");
720 repo.branch("feature-a", &commit, false)
721 .expect("Failed to create branch");
722
723 let backend = Arc::new(SqliteBackend::in_memory().expect("Failed to create backend"));
725 let memory = create_test_memory("mem1", project_id, "feature-a");
726 backend.index(&memory).expect("Failed to index memory");
727
728 let gc = BranchGarbageCollector::with_repo_path(Arc::clone(&backend), dir.path());
729
730 let result = gc
731 .gc_stale_branches(project_id, true)
732 .expect("GC should succeed");
733
734 assert!(!result.has_stale_branches());
735 assert_eq!(result.memories_tombstoned, 0);
736 }
737
738 #[test]
739 fn test_gc_with_stale_branch_dry_run() {
740 let (dir, _repo) = create_test_repo();
741 let project_id = "github.com/test/repo";
742
743 let backend = Arc::new(SqliteBackend::in_memory().expect("Failed to create backend"));
745 let memory = create_test_memory("mem1", project_id, "deleted-branch");
746 backend.index(&memory).expect("Failed to index memory");
747
748 let gc = BranchGarbageCollector::with_repo_path(Arc::clone(&backend), dir.path());
749
750 let result = gc
751 .gc_stale_branches(project_id, true)
752 .expect("GC should succeed");
753
754 assert!(result.has_stale_branches());
755 assert!(
756 result
757 .stale_branches
758 .contains(&"deleted-branch".to_string())
759 );
760 assert_eq!(result.memories_tombstoned, 1);
761 assert!(result.dry_run);
762
763 let memory = backend
765 .get_memory(&MemoryId::new("mem1"))
766 .expect("Failed to get memory")
767 .expect("Memory should exist");
768 assert!(memory.tombstoned_at.is_none());
769 }
770
771 #[test]
772 fn test_gc_with_stale_branch_actual() {
773 let (dir, _repo) = create_test_repo();
774 let project_id = "github.com/test/repo";
775
776 let backend = Arc::new(SqliteBackend::in_memory().expect("Failed to create backend"));
778 let memory = create_test_memory("mem1", project_id, "deleted-branch");
779 backend.index(&memory).expect("Failed to index memory");
780
781 let gc = BranchGarbageCollector::with_repo_path(Arc::clone(&backend), dir.path());
782
783 let result = gc
784 .gc_stale_branches(project_id, false)
785 .expect("GC should succeed");
786
787 assert!(result.has_stale_branches());
788 assert_eq!(result.memories_tombstoned, 1);
789 assert!(!result.dry_run);
790
791 let memory = backend
793 .get_memory(&MemoryId::new("mem1"))
794 .expect("Failed to get memory")
795 .expect("Memory should exist");
796 assert!(memory.tombstoned_at.is_some());
797 }
798
799 #[test]
800 fn test_gc_multiple_memories_same_stale_branch() {
801 let (dir, _repo) = create_test_repo();
802 let project_id = "github.com/test/repo";
803
804 let backend = Arc::new(SqliteBackend::in_memory().expect("Failed to create backend"));
805
806 for i in 0..5 {
808 let memory = create_test_memory(&format!("mem{i}"), project_id, "old-feature");
809 backend.index(&memory).expect("Failed to index memory");
810 }
811
812 let gc = BranchGarbageCollector::with_repo_path(Arc::clone(&backend), dir.path());
813
814 let result = gc
815 .gc_stale_branches(project_id, false)
816 .expect("GC should succeed");
817
818 assert_eq!(result.stale_branches.len(), 1);
819 assert_eq!(result.memories_tombstoned, 5);
820 }
821
822 #[test]
823 fn test_gc_preserves_other_project_memories() {
824 let (dir, _repo) = create_test_repo();
825
826 let backend = Arc::new(SqliteBackend::in_memory().expect("Failed to create backend"));
827
828 let memory_a = create_test_memory("memA", "github.com/org/project-a", "deleted-branch");
830 backend.index(&memory_a).expect("Failed to index memory");
831
832 let memory_b = create_test_memory("memB", "github.com/org/project-b", "deleted-branch");
834 backend.index(&memory_b).expect("Failed to index memory");
835
836 let gc = BranchGarbageCollector::with_repo_path(Arc::clone(&backend), dir.path());
837
838 let result = gc
840 .gc_stale_branches("github.com/org/project-a", false)
841 .expect("GC should succeed");
842
843 assert_eq!(result.memories_tombstoned, 1);
844
845 let mem_a = backend
847 .get_memory(&MemoryId::new("memA"))
848 .expect("Failed to get memory")
849 .expect("Memory should exist");
850 assert!(mem_a.tombstoned_at.is_some());
851
852 let mem_b = backend
854 .get_memory(&MemoryId::new("memB"))
855 .expect("Failed to get memory")
856 .expect("Memory should exist");
857 assert!(mem_b.tombstoned_at.is_none());
858 }
859
860 #[test]
861 fn test_branch_exists_current_branch() {
862 let ctx = GitContext::from_cwd();
865 if let Some(ref branch) = ctx.branch {
866 assert!(branch_exists(branch));
867 }
868 }
869
870 #[test]
871 fn test_branch_exists_nonexistent() {
872 let fake_branch = "definitely-does-not-exist-12345";
874 let _ = branch_exists(fake_branch);
877 }
878}