subcog/services/mod.rs
1//! Business logic services.
2//!
3//! Services orchestrate storage backends and provide high-level operations.
4//!
5//! # Examples
6//!
7//! Create a service container and capture a memory:
8//!
9//! ```rust,ignore
10//! use subcog::services::ServiceContainer;
11//! use subcog::models::{CaptureRequest, Namespace, Domain};
12//!
13//! let container = ServiceContainer::from_current_dir_or_user()?;
14//!
15//! let request = CaptureRequest {
16//! content: "Use PostgreSQL for production storage".to_string(),
17//! namespace: Namespace::Decisions,
18//! domain: Domain::default(),
19//! tags: vec!["database".to_string(), "architecture".to_string()],
20//! source: Some("ARCHITECTURE.md".to_string()),
21//! skip_security_check: false,
22//! };
23//!
24//! let result = container.capture().capture(request)?;
25//! println!("Captured: {}", result.urn);
26//! # Ok::<(), subcog::Error>(())
27//! ```
28//!
29//! Search for memories with the recall service:
30//!
31//! ```rust,ignore
32//! use subcog::services::ServiceContainer;
33//! use subcog::models::{SearchFilter, SearchMode};
34//!
35//! let container = ServiceContainer::from_current_dir_or_user()?;
36//! let recall = container.recall()?;
37//!
38//! let filter = SearchFilter::new().with_namespace(subcog::models::Namespace::Decisions);
39//! let results = recall.search("database storage", SearchMode::Hybrid, &filter, 10)?;
40//!
41//! for hit in &results.memories {
42//! println!("{}: {:.2}", hit.memory.id.as_str(), hit.score);
43//! }
44//! # Ok::<(), subcog::Error>(())
45//! ```
46//!
47//! # Clippy Lints
48//!
49//! The following lints are allowed at module level due to their pervasive nature
50//! in service code. Each has a documented rationale:
51//!
52//! | Lint | Rationale |
53//! |------|-----------|
54//! | `cast_precision_loss` | Metrics/score calculations don't require exact precision |
55//! | `unused_self` | Methods retained for API consistency or future extension |
56//! | `option_if_let_else` | If-let chains often clearer than nested `map_or_else` |
57//! | `manual_let_else` | Match patterns with logging clearer than `let...else` |
58//! | `unnecessary_wraps` | Result types for API consistency across trait impls |
59//! | `or_fun_call` | Entry API with closures for lazy initialization |
60//! | `significant_drop_tightening` | Drop timing not critical for correctness |
61
62// Metrics and scoring calculations don't require exact precision
63#![allow(clippy::cast_precision_loss)]
64// Methods kept for API consistency or future self usage
65#![allow(clippy::unused_self)]
66// If-let chains often clearer than nested map_or_else
67#![allow(clippy::option_if_let_else)]
68// Match patterns with logging are clearer than let-else
69#![allow(clippy::manual_let_else)]
70// Result types maintained for API consistency across trait implementations
71#![allow(clippy::unnecessary_wraps)]
72// Entry API with closures for lazy initialization
73#![allow(clippy::or_fun_call)]
74// Drop timing not critical for correctness in service code
75#![allow(clippy::significant_drop_tightening)]
76
77pub mod auth;
78mod backend_factory;
79mod capture;
80mod consolidation;
81mod context;
82mod context_template;
83mod data_subject;
84pub mod deduplication;
85mod enrichment;
86mod entity_extraction;
87mod graph;
88mod graph_rag;
89pub mod migration;
90mod path_manager;
91mod prompt;
92mod prompt_enrichment;
93mod prompt_parser;
94mod query_parser;
95mod recall;
96mod sync;
97mod tombstone;
98mod topic_index;
99
100#[cfg(feature = "group-scope")]
101pub mod group;
102
103pub use auth::{AuthContext, AuthContextBuilder, Permission};
104pub use backend_factory::{BackendFactory, BackendSet};
105pub use capture::{CaptureService, EntityExtractionCallback, EntityExtractionStats};
106pub use consolidation::{ConsolidationService, ConsolidationStats};
107pub use context::{ContextBuilderService, MemoryStatistics};
108pub use context_template::{
109 ContextTemplateFilter, ContextTemplateService, RenderResult, ValidationIssue, ValidationResult,
110 ValidationSeverity,
111};
112pub use data_subject::{
113 ConsentPurpose, ConsentRecord, ConsentStatus, DataSubjectService, DeletionResult,
114 ExportMetadata, ExportedMemory, UserDataExport,
115};
116pub use deduplication::{
117 DeduplicationConfig, DeduplicationService, Deduplicator, DuplicateCheckResult, DuplicateReason,
118};
119pub use enrichment::{EnrichmentResult, EnrichmentService, EnrichmentStats};
120pub use entity_extraction::{
121 EntityExtractorService, ExtractedEntity, ExtractedRelationship, ExtractionResult,
122 InferenceResult, InferredRelationship,
123};
124pub use graph::GraphService;
125pub use graph_rag::{
126 ExpansionConfig, GraphRAGConfig, GraphRAGService, GraphSearchHit, GraphSearchResults,
127 SearchProvenance,
128};
129pub use path_manager::{
130 GRAPH_DB_NAME, INDEX_DB_NAME, PathManager, SUBCOG_DIR_NAME, VECTOR_INDEX_NAME,
131};
132pub use prompt::{PromptFilter, PromptService, SaveOptions, SaveResult};
133pub use prompt_enrichment::{
134 ENRICHMENT_TIMEOUT, EnrichmentRequest, EnrichmentStatus, PROMPT_ENRICHMENT_SYSTEM_PROMPT,
135 PartialMetadata, PromptEnrichmentResult, PromptEnrichmentService,
136};
137pub use prompt_parser::{PromptFormat, PromptParser};
138pub use query_parser::parse_filter_query;
139pub use recall::RecallService;
140pub use sync::SyncService;
141pub use tombstone::TombstoneService;
142pub use topic_index::{TopicIndexService, TopicInfo};
143
144// Group service (feature-gated)
145#[cfg(feature = "group-scope")]
146pub use group::GroupService;
147
148use crate::cli::build_llm_provider_for_entity_extraction;
149use crate::config::SubcogConfig;
150use crate::context::GitContext;
151use crate::embedding::Embedder;
152use crate::models::{Memory, MemoryId, SearchFilter};
153use crate::storage::index::{
154 DomainIndexConfig, DomainIndexManager, DomainScope, OrgIndexConfig, SqliteBackend,
155 find_repo_root, get_user_data_dir,
156};
157use crate::storage::traits::{IndexBackend, VectorBackend};
158use crate::{Error, Result};
159use std::path::{Path, PathBuf};
160use std::sync::{Arc, Mutex};
161
162// ============================================================================
163// Service Factory Functions
164// ============================================================================
165
166/// Creates a [`PromptService`] for the given repository path.
167///
168/// This is the canonical way to create a `PromptService` from MCP or CLI layers.
169/// Configuration is loaded from the default location and merged with repo settings.
170///
171/// # Arguments
172///
173/// * `repo_path` - Path to or within a git repository
174///
175/// # Returns
176///
177/// A fully configured `PromptService` with storage backends initialized.
178///
179/// # Example
180///
181/// ```rust,ignore
182/// use subcog::services::prompt_service_for_repo;
183///
184/// let service = prompt_service_for_repo("/path/to/repo")?;
185/// let prompts = service.list(PromptFilter::new())?;
186/// ```
187#[must_use]
188pub fn prompt_service_for_repo(repo_path: impl AsRef<Path>) -> PromptService {
189 let repo_path = repo_path.as_ref();
190 let config = SubcogConfig::load_default().with_repo_path(repo_path);
191 PromptService::with_subcog_config(config).with_repo_path(repo_path)
192}
193
194/// Creates a [`PromptService`] for the current working directory.
195///
196/// This is a convenience function for CLI commands that operate on the current directory.
197///
198/// # Errors
199///
200/// Returns an error if the current working directory cannot be determined.
201///
202/// # Example
203///
204/// ```rust,ignore
205/// use subcog::services::prompt_service_for_cwd;
206///
207/// let service = prompt_service_for_cwd()?;
208/// let prompts = service.list(PromptFilter::new())?;
209/// ```
210pub fn prompt_service_for_cwd() -> Result<PromptService> {
211 let cwd = std::env::current_dir().map_err(|e| Error::OperationFailed {
212 operation: "get_current_dir".to_string(),
213 cause: e.to_string(),
214 })?;
215 Ok(prompt_service_for_repo(&cwd))
216}
217
218// ============================================================================
219// Service Container
220// ============================================================================
221
222/// Container for initialized services with configured backends.
223///
224/// Unlike the previous singleton design, this can be instantiated per-context
225/// with domain-scoped indices.
226///
227/// # `DomainIndexManager` Complexity
228///
229/// The `index_manager` field uses [`DomainIndexManager`] to provide multi-domain
230/// index support with lazy initialization. Key complexity points:
231///
232/// ## Architecture
233///
234/// ```text
235/// ServiceContainer
236/// └── Mutex<DomainIndexManager>
237/// ├── Project index (<user-data>/index.db) // faceted by project/branch/path
238/// ├── User index (<user-data>/index.db) // user-wide
239/// └── Org index (configured path) // optional
240/// ```
241///
242/// ## Lazy Initialization
243///
244/// Indices are created on-demand via `index_for_scope()`:
245/// 1. Lock the `Mutex<DomainIndexManager>`
246/// 2. Check if index exists for requested `DomainScope`
247/// 3. If missing, create `SQLite` database at scope-specific path
248/// 4. Return reference to the index
249///
250/// ## Thread Safety
251///
252/// - `Mutex` guards the manager, not individual indices
253/// - Each index has its own internal locking via `SqliteBackend`
254/// - Callers should minimize lock hold time
255///
256/// ## Path Resolution
257///
258/// | Scope | Path |
259/// |-------|------|
260/// | Project | `<user-data>/index.db` |
261/// | User | `<user-data>/index.db` |
262/// | Org | Configured via `OrgIndexConfig` |
263///
264/// ## Error Handling
265///
266/// - Missing repo returns `Error::OperationFailed`
267/// - `SQLite` initialization errors propagate as `Error::OperationFailed`
268/// - Index creation is idempotent (safe to call multiple times)
269pub struct ServiceContainer {
270 /// Capture service.
271 capture: CaptureService,
272 /// Sync service.
273 sync: SyncService,
274 /// Domain index manager for multi-domain indices.
275 ///
276 /// See struct-level documentation for complexity notes.
277 index_manager: Mutex<DomainIndexManager>,
278 /// Repository path (if known).
279 repo_path: Option<PathBuf>,
280 /// User data directory (from config, used for graph and other user-scoped data).
281 user_data_dir: PathBuf,
282 /// Shared embedder for both capture and recall.
283 embedder: Option<Arc<dyn Embedder>>,
284 /// Shared vector backend for both capture and recall.
285 vector: Option<Arc<dyn VectorBackend + Send + Sync>>,
286}
287
288impl ServiceContainer {
289 /// Creates a new service container for a repository.
290 ///
291 /// # Arguments
292 ///
293 /// * `repo_path` - Path to or within a git repository
294 /// * `org_config` - Optional organization index configuration
295 ///
296 /// # Errors
297 ///
298 /// Returns an error if the repository cannot be found or backends fail to initialize.
299 pub fn for_repo(
300 repo_path: impl Into<PathBuf>,
301 org_config: Option<OrgIndexConfig>,
302 ) -> Result<Self> {
303 let repo_path = repo_path.into();
304
305 // Find repository root
306 let repo_root = find_repo_root(&repo_path)?;
307
308 if org_config.is_some() {
309 let config = SubcogConfig::load_default().with_repo_path(&repo_root);
310 if !(config.features.org_scope_enabled || cfg!(feature = "org-scope")) {
311 tracing::warn!(
312 "Org-scope config provided but org-scope is disabled. \
313 Set SUBCOG_ORG_SCOPE_ENABLED=true or build with --features org-scope."
314 );
315 return Err(Error::FeatureNotEnabled("org-scope".to_string()));
316 }
317 }
318
319 // Load config FIRST to get user's configured data_dir (respects config.toml)
320 // This must happen before creating DomainIndexManager so all components use the same path.
321 let subcog_config = SubcogConfig::load_default();
322
323 let config = DomainIndexConfig {
324 repo_path: Some(repo_root.clone()),
325 org_config,
326 user_data_dir: Some(subcog_config.data_dir.clone()),
327 };
328
329 let index_manager = DomainIndexManager::new(config)?;
330
331 // Create CaptureService with repo_path for project-scoped storage
332 // Propagate auto_extract_entities from loaded config
333 let mut capture_config = crate::config::Config::new().with_repo_path(&repo_root);
334 capture_config.features.auto_extract_entities =
335 subcog_config.features.auto_extract_entities;
336 let user_data_dir = subcog_config.data_dir.clone();
337
338 std::fs::create_dir_all(&user_data_dir).map_err(|e| Error::OperationFailed {
339 operation: "create_user_data_dir".to_string(),
340 cause: format!(
341 "Cannot create {}: {}. Please create manually with: mkdir -p {}",
342 user_data_dir.display(),
343 e,
344 user_data_dir.display()
345 ),
346 })?;
347
348 // Create storage paths using user-level data directory (project facets)
349 let paths = PathManager::for_user(&user_data_dir);
350
351 // Create backends using factory (centralizes initialization logic)
352 let backends = BackendFactory::create_all(&paths.index_path(), &paths.vector_path());
353
354 // Build LLM provider for entity extraction with longer timeout (120s default)
355 let llm_provider = build_llm_provider_for_entity_extraction(&subcog_config);
356
357 // Create entity extraction callback if auto-extraction is enabled
358 let entity_extraction =
359 Self::create_entity_extraction_callback(&capture_config, &paths, llm_provider);
360
361 // Build CaptureService based on available backends
362 let capture = Self::build_capture_service(capture_config, &backends, entity_extraction);
363
364 Ok(Self {
365 capture,
366 sync: SyncService::default(),
367 index_manager: Mutex::new(index_manager),
368 repo_path: Some(repo_root),
369 user_data_dir,
370 embedder: backends.embedder,
371 vector: backends.vector,
372 })
373 }
374
375 /// Creates a service container from the current directory.
376 ///
377 /// # Errors
378 ///
379 /// Returns an error if not in a git repository.
380 pub fn from_current_dir() -> Result<Self> {
381 let cwd = std::env::current_dir().map_err(|e| Error::OperationFailed {
382 operation: "get_current_dir".to_string(),
383 cause: e.to_string(),
384 })?;
385
386 Self::for_repo(cwd, None)
387 }
388
389 /// Creates a service container for user-scoped storage.
390 ///
391 /// Used when operating outside a git repository. Stores memories in the
392 /// user's local data directory using `SQLite` persistence.
393 ///
394 /// # Storage Paths
395 ///
396 /// | Platform | Path |
397 /// |----------|------|
398 /// | macOS | `~/Library/Application Support/subcog/` |
399 /// | Linux | `~/.local/share/subcog/` |
400 /// | Windows | `C:\Users\<User>\AppData\Local\subcog\` |
401 ///
402 /// # Errors
403 ///
404 /// Returns an error if the user data directory cannot be created or
405 /// storage backends fail to initialize.
406 pub fn for_user() -> Result<Self> {
407 // Load config to get user's configured data_dir (respects config.toml)
408 let subcog_config = SubcogConfig::load_default();
409 let user_data_dir = subcog_config.data_dir.clone();
410
411 // Ensure user data directory exists
412 std::fs::create_dir_all(&user_data_dir).map_err(|e| Error::OperationFailed {
413 operation: "create_user_data_dir".to_string(),
414 cause: format!(
415 "Cannot create {}: {}. Please create manually with: mkdir -p {}",
416 user_data_dir.display(),
417 e,
418 user_data_dir.display()
419 ),
420 })?;
421
422 // Create storage paths using PathManager
423 let paths = PathManager::for_user(&user_data_dir);
424
425 // Create domain index config for user-only mode (no repo)
426 // Pass user_data_dir to ensure consistency with CaptureService paths
427 let config = DomainIndexConfig {
428 repo_path: None,
429 org_config: None,
430 user_data_dir: Some(user_data_dir.clone()),
431 };
432 let index_manager = DomainIndexManager::new(config)?;
433
434 // Create CaptureService WITHOUT repo_path (user scope)
435 // Propagate auto_extract_entities from loaded config
436 let mut capture_config = crate::config::Config::new();
437 capture_config.features.auto_extract_entities =
438 subcog_config.features.auto_extract_entities;
439
440 // Create backends using factory (centralizes initialization logic)
441 let backends = BackendFactory::create_all(&paths.index_path(), &paths.vector_path());
442
443 // Build LLM provider for entity extraction with longer timeout (120s default)
444 let llm_provider = build_llm_provider_for_entity_extraction(&subcog_config);
445
446 // Create entity extraction callback if auto-extraction is enabled
447 let entity_extraction =
448 Self::create_entity_extraction_callback(&capture_config, &paths, llm_provider);
449
450 // Build CaptureService based on available backends
451 let capture = Self::build_capture_service(capture_config, &backends, entity_extraction);
452
453 tracing::info!(
454 user_data_dir = %user_data_dir.display(),
455 "Created user-scoped service container"
456 );
457
458 Ok(Self {
459 capture,
460 sync: SyncService::no_op(),
461 index_manager: Mutex::new(index_manager),
462 repo_path: None,
463 user_data_dir,
464 embedder: backends.embedder,
465 vector: backends.vector,
466 })
467 }
468
469 /// Creates a service container from the current directory, falling back to user scope.
470 ///
471 /// This is the recommended factory method for CLI and MCP entry points:
472 /// - If in a git repository → uses project scope (user-level index + project facets)
473 /// - If NOT in a git repository → uses user scope (user-level index)
474 ///
475 /// # Examples
476 ///
477 /// ```rust,ignore
478 /// // Works in any directory
479 /// let container = ServiceContainer::from_current_dir_or_user()?;
480 ///
481 /// // In git repo: subcog://project/{namespace}/{id}
482 /// // Outside git: subcog://user/{namespace}/{id}
483 /// let result = container.capture().capture(request)?;
484 /// ```
485 ///
486 /// # Errors
487 ///
488 /// Returns an error only if both project and user scope fail to initialize.
489 pub fn from_current_dir_or_user() -> Result<Self> {
490 // Try project scope first
491 match Self::from_current_dir() {
492 Ok(container) => {
493 tracing::debug!("Using project-scoped service container");
494 Ok(container)
495 },
496 Err(e) => {
497 tracing::debug!(
498 error = %e,
499 "Not in git repository, falling back to user scope"
500 );
501 Self::for_user()
502 },
503 }
504 }
505
506 /// Returns whether this container is using user scope (no git repository).
507 #[must_use]
508 pub const fn is_user_scope(&self) -> bool {
509 self.repo_path.is_none()
510 }
511
512 /// Creates a recall service for a specific domain scope.
513 ///
514 /// The recall service is configured with:
515 /// - Index backend (`SQLite` FTS5) for text search
516 /// - Embedder for generating query embeddings (if available)
517 /// - Vector backend for similarity search (if available)
518 ///
519 /// # Errors
520 ///
521 /// Returns an error if the index cannot be initialized.
522 pub fn recall_for_scope(&self, scope: DomainScope) -> Result<RecallService> {
523 // Use high-level API that handles path resolution and directory creation
524 let index = {
525 let manager = self
526 .index_manager
527 .lock()
528 .map_err(|e| Error::OperationFailed {
529 operation: "lock_index_manager".to_string(),
530 cause: e.to_string(),
531 })?;
532 manager.create_backend(scope)?
533 }; // Lock released here
534
535 // Start with index-only service
536 let mut service = RecallService::with_index(index);
537
538 // Add embedder and vector backends if available
539 if let Some(ref embedder) = self.embedder {
540 service = service.with_embedder(Arc::clone(embedder));
541 }
542 if let Some(ref vector) = self.vector {
543 service = service.with_vector(Arc::clone(vector));
544 }
545
546 if matches!(scope, DomainScope::Project)
547 && let Some(filter) = self.project_scope_filter()
548 {
549 service = service.with_scope_filter(filter);
550 }
551
552 Ok(service)
553 }
554
555 fn project_scope_filter(&self) -> Option<SearchFilter> {
556 let repo_path = self.repo_path.as_ref()?;
557 let git_context = GitContext::from_path(repo_path);
558 git_context
559 .project_id
560 .map(|project_id| SearchFilter::new().with_project_id(project_id))
561 }
562
563 /// Creates a recall service for the appropriate scope.
564 ///
565 /// Uses user scope for user-scoped containers, project scope otherwise.
566 ///
567 /// # Errors
568 ///
569 /// Returns an error if the index cannot be initialized.
570 pub fn recall(&self) -> Result<RecallService> {
571 let scope = if self.is_user_scope() {
572 DomainScope::User
573 } else {
574 DomainScope::Project
575 };
576 self.recall_for_scope(scope)
577 }
578
579 /// Returns the capture service.
580 #[must_use]
581 pub const fn capture(&self) -> &CaptureService {
582 &self.capture
583 }
584
585 /// Returns the sync service.
586 #[must_use]
587 pub const fn sync(&self) -> &SyncService {
588 &self.sync
589 }
590
591 /// Returns the repository path.
592 #[must_use]
593 pub const fn repo_path(&self) -> Option<&PathBuf> {
594 self.repo_path.as_ref()
595 }
596
597 /// Returns a reference to the embedder if available.
598 #[must_use]
599 pub fn embedder(&self) -> Option<Arc<dyn Embedder>> {
600 self.embedder.clone()
601 }
602
603 /// Returns a reference to the vector backend if available.
604 #[must_use]
605 pub fn vector(&self) -> Option<Arc<dyn VectorBackend + Send + Sync>> {
606 self.vector.clone()
607 }
608
609 /// Creates an index backend for the project scope.
610 ///
611 /// # Errors
612 ///
613 /// Returns an error if the index cannot be initialized.
614 pub fn index(&self) -> Result<SqliteBackend> {
615 let manager = self
616 .index_manager
617 .lock()
618 .map_err(|e| Error::OperationFailed {
619 operation: "lock_index_manager".to_string(),
620 cause: e.to_string(),
621 })?;
622 manager.create_backend(DomainScope::Project)
623 }
624
625 /// Builds a `CaptureService` from available backends.
626 ///
627 /// Applies graceful degradation: uses whatever backends are available.
628 ///
629 /// # Arguments
630 ///
631 /// * `config` - The capture configuration
632 /// * `backends` - Available storage backends
633 /// * `entity_extraction` - Optional callback for entity extraction (Graph RAG)
634 fn build_capture_service(
635 config: crate::config::Config,
636 backends: &BackendSet,
637 entity_extraction: Option<capture::EntityExtractionCallback>,
638 ) -> CaptureService {
639 let mut service = CaptureService::new(config);
640
641 // Add embedder if available
642 if let Some(ref embedder) = backends.embedder {
643 service = service.with_embedder(Arc::clone(embedder));
644 }
645
646 // Add index backend if available
647 if let Some(ref index) = backends.index {
648 service = service.with_index(Arc::clone(index));
649 }
650
651 // Add vector backend if available
652 if let Some(ref vector) = backends.vector {
653 service = service.with_vector(Arc::clone(vector));
654 }
655
656 // Add entity extraction callback if provided
657 if let Some(callback) = entity_extraction {
658 service = service.with_entity_extraction(callback);
659 }
660
661 service
662 }
663
664 /// Creates an entity extraction callback if auto-extraction is enabled.
665 ///
666 /// The callback:
667 /// 1. Extracts entities from the memory content using [`EntityExtractorService`]
668 /// 2. Stores entities and relationships in the [`GraphService`]
669 /// 3. Records mentions linking memories to entities
670 ///
671 /// # Arguments
672 ///
673 /// * `config` - The capture configuration (checked for `auto_extract_entities` flag)
674 /// * `paths` - Path manager for locating the graph database
675 /// * `llm` - Optional LLM provider for intelligent extraction
676 ///
677 /// # Returns
678 ///
679 /// `Some(callback)` if auto-extraction is enabled and graph backend initializes,
680 /// `None` otherwise (graceful degradation).
681 #[allow(clippy::excessive_nesting)] // Callback closures require nested scopes
682 fn create_entity_extraction_callback(
683 config: &crate::config::Config,
684 paths: &PathManager,
685 llm: Option<Arc<dyn crate::llm::LlmProvider>>,
686 ) -> Option<capture::EntityExtractionCallback> {
687 // Check if auto-extraction is enabled
688 if !config.features.auto_extract_entities {
689 return None;
690 }
691
692 // Create graph backend (gracefully degrade if it fails)
693 let graph_path = paths.graph_path();
694 let graph_backend = match crate::storage::graph::SqliteGraphBackend::new(&graph_path) {
695 Ok(backend) => backend,
696 Err(e) => {
697 tracing::warn!(
698 error = %e,
699 "Failed to create graph backend for entity extraction, disabling"
700 );
701 return None;
702 },
703 };
704
705 // Create services wrapped in Arc for sharing
706 let graph_service = Arc::new(GraphService::new(graph_backend));
707 let domain = crate::models::Domain::new(); // Default domain for extraction
708
709 let entity_extractor = if let Some(llm) = llm {
710 Arc::new(EntityExtractorService::with_shared_llm(llm, domain))
711 } else {
712 Arc::new(EntityExtractorService::without_llm(domain))
713 };
714
715 // Create the callback that captures the services
716 let callback: capture::EntityExtractionCallback = Arc::new(move |content, memory_id| {
717 use crate::models::graph::{Entity, EntityType, Relationship, RelationshipType};
718 use std::collections::HashMap;
719
720 let mut stats = capture::EntityExtractionStats::default();
721
722 // Extract entities from content
723 let extraction = entity_extractor.extract(content)?;
724 stats.used_fallback = extraction.used_fallback;
725
726 // Map entity names to IDs for relationship resolution
727 let mut name_to_id: HashMap<String, crate::models::graph::EntityId> = HashMap::new();
728
729 // Store entities in graph
730 for extracted in &extraction.entities {
731 // Parse entity type, defaulting to Concept if unknown
732 let entity_type =
733 EntityType::parse(&extracted.entity_type).unwrap_or(EntityType::Concept);
734
735 // Create the Entity from ExtractedEntity
736 let entity =
737 Entity::new(entity_type, &extracted.name, crate::models::Domain::new())
738 .with_confidence(extracted.confidence)
739 .with_aliases(extracted.aliases.iter().cloned());
740
741 // Store entity with deduplication (returns actual ID, existing or new)
742 match graph_service.store_entity_deduped(&entity) {
743 Ok(actual_id) => {
744 stats.entities_stored += 1;
745
746 // Track name to ID mapping for relationship resolution
747 // Use the actual ID returned (may be existing entity's ID)
748 name_to_id.insert(extracted.name.clone(), actual_id.clone());
749 for alias in &extracted.aliases {
750 name_to_id.insert(alias.clone(), actual_id.clone());
751 }
752
753 // Record mention linking memory to entity
754 if let Err(e) = graph_service.record_mention(&actual_id, memory_id) {
755 tracing::debug!(
756 memory_id = %memory_id,
757 entity_id = %actual_id.as_ref(),
758 error = %e,
759 "Failed to record entity mention"
760 );
761 }
762 },
763 Err(e) => {
764 tracing::debug!(
765 entity_name = %extracted.name,
766 error = %e,
767 "Failed to store entity"
768 );
769 },
770 }
771 }
772
773 // Store relationships in graph
774 for extracted_rel in &extraction.relationships {
775 // Look up entity IDs by name - skip if either entity not found
776 let (Some(from), Some(to)) = (
777 name_to_id.get(&extracted_rel.from),
778 name_to_id.get(&extracted_rel.to),
779 ) else {
780 tracing::debug!(
781 from = %extracted_rel.from,
782 to = %extracted_rel.to,
783 "Skipping relationship: one or both entities not found"
784 );
785 continue;
786 };
787
788 // Parse relationship type, defaulting to RelatesTo if unknown
789 let rel_type = RelationshipType::parse(&extracted_rel.relationship_type)
790 .unwrap_or(RelationshipType::RelatesTo);
791
792 let relationship = Relationship::new(from.clone(), to.clone(), rel_type)
793 .with_confidence(extracted_rel.confidence);
794
795 if let Err(e) = graph_service.store_relationship(&relationship) {
796 tracing::debug!(
797 from = %extracted_rel.from,
798 to = %extracted_rel.to,
799 error = %e,
800 "Failed to store relationship"
801 );
802 } else {
803 stats.relationships_stored += 1;
804 }
805 }
806
807 Ok(stats)
808 });
809
810 Some(callback)
811 }
812
813 /// Creates a deduplication service without embedding support.
814 ///
815 /// This variant supports:
816 /// - Exact match (SHA256 hash comparison)
817 /// - Recent capture (LRU cache with TTL)
818 ///
819 /// For full semantic similarity support, create a `DeduplicationService`
820 /// directly with an embedder and vector backend.
821 ///
822 /// # Errors
823 ///
824 /// Returns an error if the recall service cannot be initialized.
825 pub fn deduplication(
826 &self,
827 ) -> Result<
828 deduplication::DeduplicationService<
829 crate::embedding::FastEmbedEmbedder,
830 crate::storage::vector::UsearchBackend,
831 >,
832 > {
833 let recall = std::sync::Arc::new(self.recall()?);
834 let config = deduplication::DeduplicationConfig::from_env();
835 Ok(deduplication::DeduplicationService::without_embeddings(
836 recall, config,
837 ))
838 }
839
840 /// Creates a deduplication service with custom configuration.
841 ///
842 /// # Arguments
843 ///
844 /// * `config` - Custom deduplication configuration
845 ///
846 /// # Errors
847 ///
848 /// Returns an error if the recall service cannot be initialized.
849 pub fn deduplication_with_config(
850 &self,
851 config: deduplication::DeduplicationConfig,
852 ) -> Result<
853 deduplication::DeduplicationService<
854 crate::embedding::FastEmbedEmbedder,
855 crate::storage::vector::UsearchBackend,
856 >,
857 > {
858 let recall = std::sync::Arc::new(self.recall()?);
859 Ok(deduplication::DeduplicationService::without_embeddings(
860 recall, config,
861 ))
862 }
863
864 /// Creates a data subject service for GDPR operations.
865 ///
866 /// Provides:
867 /// - `export_user_data()` - Export all user data (GDPR Article 20)
868 /// - `delete_user_data()` - Delete all user data (GDPR Article 17)
869 ///
870 /// # Errors
871 ///
872 /// Returns an error if the index backend cannot be initialized.
873 pub fn data_subject(&self) -> Result<DataSubjectService> {
874 let index = self.index()?;
875 let mut service = DataSubjectService::new(index);
876 if let Some(ref vector) = self.vector {
877 service = service.with_vector(Arc::clone(vector));
878 }
879 Ok(service)
880 }
881
882 /// Gets the index path for a domain scope.
883 ///
884 /// # Errors
885 ///
886 /// Returns an error if the path cannot be determined.
887 pub fn get_index_path(&self, scope: DomainScope) -> Result<PathBuf> {
888 let manager = self
889 .index_manager
890 .lock()
891 .map_err(|e| Error::OperationFailed {
892 operation: "lock_index_manager".to_string(),
893 cause: e.to_string(),
894 })?;
895 manager.get_index_path(scope)
896 }
897
898 /// Rebuilds the FTS index from `SQLite` data for a specific scope.
899 ///
900 /// Since `SQLite` is the authoritative storage, this function reads all memories
901 /// from the `SQLite` database and rebuilds the FTS5 full-text search index.
902 ///
903 /// # Arguments
904 ///
905 /// * `scope` - The domain scope to reindex
906 ///
907 /// # Returns
908 ///
909 /// The number of memories indexed.
910 ///
911 /// # Errors
912 ///
913 /// Returns an error if reading or indexing fails.
914 pub fn reindex_scope(&self, scope: DomainScope) -> Result<usize> {
915 use crate::models::SearchFilter;
916
917 // Create index backend using high-level API
918 let index = {
919 let manager = self
920 .index_manager
921 .lock()
922 .map_err(|e| Error::OperationFailed {
923 operation: "lock_index_manager".to_string(),
924 cause: e.to_string(),
925 })?;
926 manager.create_backend(scope)?
927 };
928
929 // Get all memory IDs from SQLite
930 let filter = SearchFilter::default();
931 let all_ids = index.list_all(&filter, usize::MAX)?;
932
933 if all_ids.is_empty() {
934 return Ok(0);
935 }
936
937 // Get full memories
938 let ids: Vec<MemoryId> = all_ids.into_iter().map(|(id, _)| id).collect();
939 let memories: Vec<Memory> = index
940 .get_memories_batch(&ids)?
941 .into_iter()
942 .flatten()
943 .collect();
944
945 if memories.is_empty() {
946 return Ok(0);
947 }
948
949 // Clear FTS and rebuild
950 index.clear()?;
951 let count = memories.len();
952 index.reindex(&memories)?;
953
954 Ok(count)
955 }
956
957 /// Reindexes memories for the project scope (default).
958 ///
959 /// # Errors
960 ///
961 /// Returns an error if notes cannot be read or indexing fails.
962 pub fn reindex(&self) -> Result<usize> {
963 self.reindex_scope(DomainScope::Project)
964 }
965
966 /// Reindexes all domain scopes.
967 ///
968 /// # Returns
969 ///
970 /// A map of scope to count of indexed memories.
971 ///
972 /// # Errors
973 ///
974 /// Returns an error if any scope fails to reindex.
975 pub fn reindex_all(&self) -> Result<std::collections::HashMap<DomainScope, usize>> {
976 let mut results = std::collections::HashMap::new();
977
978 for scope in [DomainScope::Project, DomainScope::User, DomainScope::Org] {
979 match self.reindex_scope(scope) {
980 Ok(count) => {
981 results.insert(scope, count);
982 },
983 Err(e) => {
984 tracing::warn!("Failed to reindex scope {:?}: {e}", scope);
985 },
986 }
987 }
988
989 Ok(results)
990 }
991
992 /// Creates a graph service for knowledge graph operations.
993 ///
994 /// The graph service stores entities and relationships in a dedicated
995 /// `SQLite` database (`graph.db`) in the user data directory.
996 ///
997 /// # Errors
998 ///
999 /// Returns an error if the graph backend cannot be initialized.
1000 ///
1001 /// # Example
1002 ///
1003 /// ```rust,ignore
1004 /// let container = ServiceContainer::from_current_dir_or_user()?;
1005 /// let graph = container.graph()?;
1006 ///
1007 /// let entity = graph.store_entity(Entity::new(EntityType::Technology, "Rust", domain))?;
1008 /// ```
1009 pub fn graph(&self) -> Result<GraphService<crate::storage::graph::SqliteGraphBackend>> {
1010 use crate::storage::graph::SqliteGraphBackend;
1011
1012 // Use the configured user_data_dir (respects config.toml data_dir setting)
1013 let paths = PathManager::for_user(&self.user_data_dir);
1014 let graph_path = paths.graph_path();
1015
1016 let backend = SqliteGraphBackend::new(&graph_path).map_err(|e| Error::OperationFailed {
1017 operation: "create_graph_backend".to_string(),
1018 cause: e.to_string(),
1019 })?;
1020
1021 Ok(GraphService::new(backend))
1022 }
1023
1024 /// Creates an entity extractor service for extracting entities from text.
1025 ///
1026 /// The extractor uses pattern-based fallback when no LLM is provided.
1027 /// For LLM-powered extraction, use [`Self::entity_extractor_with_llm`].
1028 ///
1029 /// # Returns
1030 ///
1031 /// An [`EntityExtractorService`] configured for the appropriate domain.
1032 #[must_use]
1033 pub fn entity_extractor(&self) -> EntityExtractorService {
1034 let domain = self.current_domain();
1035 EntityExtractorService::without_llm(domain)
1036 }
1037
1038 /// Creates an entity extractor service with LLM support.
1039 ///
1040 /// The extractor uses the provided LLM for intelligent entity extraction.
1041 /// Falls back to pattern-based extraction if LLM calls fail.
1042 ///
1043 /// # Arguments
1044 ///
1045 /// * `llm` - The LLM provider to use for extraction.
1046 ///
1047 /// # Returns
1048 ///
1049 /// An [`EntityExtractorService`] configured with LLM support.
1050 pub fn entity_extractor_with_llm(
1051 &self,
1052 llm: Arc<dyn crate::llm::LlmProvider>,
1053 ) -> EntityExtractorService {
1054 let domain = self.current_domain();
1055 EntityExtractorService::with_shared_llm(llm, domain)
1056 }
1057
1058 /// Returns the current domain based on scope.
1059 ///
1060 /// - If in a git repository: returns project-scoped domain (`Domain::new()`)
1061 /// - If NOT in a git repository: returns user-scoped domain (`Domain::for_user()`)
1062 fn current_domain(&self) -> crate::models::Domain {
1063 if self.repo_path.is_some() {
1064 // Project scope: uses user-level storage with project facets
1065 crate::models::Domain::new()
1066 } else {
1067 // User scope: uses user-level storage without project facets
1068 crate::models::Domain::for_user()
1069 }
1070 }
1071
1072 /// Creates a webhook service for event notifications.
1073 ///
1074 /// The webhook service subscribes to memory events and delivers them to
1075 /// configured webhook endpoints. Configuration is loaded from
1076 /// `~/.config/subcog/webhooks.yaml`.
1077 ///
1078 /// Returns `Ok(None)` if no webhooks are configured.
1079 ///
1080 /// # Errors
1081 ///
1082 /// Returns an error if the configuration is invalid or the audit database
1083 /// cannot be created.
1084 ///
1085 /// # Example
1086 ///
1087 /// ```rust,ignore
1088 /// let container = ServiceContainer::from_current_dir_or_user()?;
1089 /// if let Some(webhook_service) = container.webhook_service()? {
1090 /// // Start webhook dispatcher as background task
1091 /// let _handle = webhook_service.start();
1092 /// }
1093 /// ```
1094 pub fn webhook_service(&self) -> Result<Option<crate::webhooks::WebhookService>> {
1095 let scope = if self.is_user_scope() {
1096 crate::storage::index::DomainScope::User
1097 } else {
1098 crate::storage::index::DomainScope::Project
1099 };
1100
1101 let user_data_dir = get_user_data_dir()?;
1102 crate::webhooks::WebhookService::from_config_file(scope, &user_data_dir)
1103 }
1104}