Skip to main content

subcog/services/
mod.rs

1//! Business logic services.
2//!
3//! Services orchestrate storage backends and provide high-level operations.
4//!
5//! # Examples
6//!
7//! Create a service container and capture a memory:
8//!
9//! ```rust,ignore
10//! use subcog::services::ServiceContainer;
11//! use subcog::models::{CaptureRequest, Namespace, Domain};
12//!
13//! let container = ServiceContainer::from_current_dir_or_user()?;
14//!
15//! let request = CaptureRequest {
16//!     content: "Use PostgreSQL for production storage".to_string(),
17//!     namespace: Namespace::Decisions,
18//!     domain: Domain::default(),
19//!     tags: vec!["database".to_string(), "architecture".to_string()],
20//!     source: Some("ARCHITECTURE.md".to_string()),
21//!     skip_security_check: false,
22//! };
23//!
24//! let result = container.capture().capture(request)?;
25//! println!("Captured: {}", result.urn);
26//! # Ok::<(), subcog::Error>(())
27//! ```
28//!
29//! Search for memories with the recall service:
30//!
31//! ```rust,ignore
32//! use subcog::services::ServiceContainer;
33//! use subcog::models::{SearchFilter, SearchMode};
34//!
35//! let container = ServiceContainer::from_current_dir_or_user()?;
36//! let recall = container.recall()?;
37//!
38//! let filter = SearchFilter::new().with_namespace(subcog::models::Namespace::Decisions);
39//! let results = recall.search("database storage", SearchMode::Hybrid, &filter, 10)?;
40//!
41//! for hit in &results.memories {
42//!     println!("{}: {:.2}", hit.memory.id.as_str(), hit.score);
43//! }
44//! # Ok::<(), subcog::Error>(())
45//! ```
46//!
47//! # Clippy Lints
48//!
49//! The following lints are allowed at module level due to their pervasive nature
50//! in service code. Each has a documented rationale:
51//!
52//! | Lint | Rationale |
53//! |------|-----------|
54//! | `cast_precision_loss` | Metrics/score calculations don't require exact precision |
55//! | `unused_self` | Methods retained for API consistency or future extension |
56//! | `option_if_let_else` | If-let chains often clearer than nested `map_or_else` |
57//! | `manual_let_else` | Match patterns with logging clearer than `let...else` |
58//! | `unnecessary_wraps` | Result types for API consistency across trait impls |
59//! | `or_fun_call` | Entry API with closures for lazy initialization |
60//! | `significant_drop_tightening` | Drop timing not critical for correctness |
61
62// Metrics and scoring calculations don't require exact precision
63#![allow(clippy::cast_precision_loss)]
64// Methods kept for API consistency or future self usage
65#![allow(clippy::unused_self)]
66// If-let chains often clearer than nested map_or_else
67#![allow(clippy::option_if_let_else)]
68// Match patterns with logging are clearer than let-else
69#![allow(clippy::manual_let_else)]
70// Result types maintained for API consistency across trait implementations
71#![allow(clippy::unnecessary_wraps)]
72// Entry API with closures for lazy initialization
73#![allow(clippy::or_fun_call)]
74// Drop timing not critical for correctness in service code
75#![allow(clippy::significant_drop_tightening)]
76
77pub mod auth;
78mod backend_factory;
79mod capture;
80mod consolidation;
81mod context;
82mod context_template;
83mod data_subject;
84pub mod deduplication;
85mod enrichment;
86mod entity_extraction;
87mod graph;
88mod graph_rag;
89pub mod migration;
90mod path_manager;
91mod prompt;
92mod prompt_enrichment;
93mod prompt_parser;
94mod query_parser;
95mod recall;
96mod sync;
97mod tombstone;
98mod topic_index;
99
100#[cfg(feature = "group-scope")]
101pub mod group;
102
103pub use auth::{AuthContext, AuthContextBuilder, Permission};
104pub use backend_factory::{BackendFactory, BackendSet};
105pub use capture::{CaptureService, EntityExtractionCallback, EntityExtractionStats};
106pub use consolidation::{ConsolidationService, ConsolidationStats};
107pub use context::{ContextBuilderService, MemoryStatistics};
108pub use context_template::{
109    ContextTemplateFilter, ContextTemplateService, RenderResult, ValidationIssue, ValidationResult,
110    ValidationSeverity,
111};
112pub use data_subject::{
113    ConsentPurpose, ConsentRecord, ConsentStatus, DataSubjectService, DeletionResult,
114    ExportMetadata, ExportedMemory, UserDataExport,
115};
116pub use deduplication::{
117    DeduplicationConfig, DeduplicationService, Deduplicator, DuplicateCheckResult, DuplicateReason,
118};
119pub use enrichment::{EnrichmentResult, EnrichmentService, EnrichmentStats};
120pub use entity_extraction::{
121    EntityExtractorService, ExtractedEntity, ExtractedRelationship, ExtractionResult,
122    InferenceResult, InferredRelationship,
123};
124pub use graph::GraphService;
125pub use graph_rag::{
126    ExpansionConfig, GraphRAGConfig, GraphRAGService, GraphSearchHit, GraphSearchResults,
127    SearchProvenance,
128};
129pub use path_manager::{
130    GRAPH_DB_NAME, INDEX_DB_NAME, PathManager, SUBCOG_DIR_NAME, VECTOR_INDEX_NAME,
131};
132pub use prompt::{PromptFilter, PromptService, SaveOptions, SaveResult};
133pub use prompt_enrichment::{
134    ENRICHMENT_TIMEOUT, EnrichmentRequest, EnrichmentStatus, PROMPT_ENRICHMENT_SYSTEM_PROMPT,
135    PartialMetadata, PromptEnrichmentResult, PromptEnrichmentService,
136};
137pub use prompt_parser::{PromptFormat, PromptParser};
138pub use query_parser::parse_filter_query;
139pub use recall::RecallService;
140pub use sync::SyncService;
141pub use tombstone::TombstoneService;
142pub use topic_index::{TopicIndexService, TopicInfo};
143
144// Group service (feature-gated)
145#[cfg(feature = "group-scope")]
146pub use group::GroupService;
147
148use crate::cli::build_llm_provider_for_entity_extraction;
149use crate::config::SubcogConfig;
150use crate::context::GitContext;
151use crate::embedding::Embedder;
152use crate::models::{Memory, MemoryId, SearchFilter};
153use crate::storage::index::{
154    DomainIndexConfig, DomainIndexManager, DomainScope, OrgIndexConfig, SqliteBackend,
155    find_repo_root, get_user_data_dir,
156};
157use crate::storage::traits::{IndexBackend, VectorBackend};
158use crate::{Error, Result};
159use std::path::{Path, PathBuf};
160use std::sync::{Arc, Mutex};
161
162// ============================================================================
163// Service Factory Functions
164// ============================================================================
165
166/// Creates a [`PromptService`] for the given repository path.
167///
168/// This is the canonical way to create a `PromptService` from MCP or CLI layers.
169/// Configuration is loaded from the default location and merged with repo settings.
170///
171/// # Arguments
172///
173/// * `repo_path` - Path to or within a git repository
174///
175/// # Returns
176///
177/// A fully configured `PromptService` with storage backends initialized.
178///
179/// # Example
180///
181/// ```rust,ignore
182/// use subcog::services::prompt_service_for_repo;
183///
184/// let service = prompt_service_for_repo("/path/to/repo")?;
185/// let prompts = service.list(PromptFilter::new())?;
186/// ```
187#[must_use]
188pub fn prompt_service_for_repo(repo_path: impl AsRef<Path>) -> PromptService {
189    let repo_path = repo_path.as_ref();
190    let config = SubcogConfig::load_default().with_repo_path(repo_path);
191    PromptService::with_subcog_config(config).with_repo_path(repo_path)
192}
193
194/// Creates a [`PromptService`] for the current working directory.
195///
196/// This is a convenience function for CLI commands that operate on the current directory.
197///
198/// # Errors
199///
200/// Returns an error if the current working directory cannot be determined.
201///
202/// # Example
203///
204/// ```rust,ignore
205/// use subcog::services::prompt_service_for_cwd;
206///
207/// let service = prompt_service_for_cwd()?;
208/// let prompts = service.list(PromptFilter::new())?;
209/// ```
210pub fn prompt_service_for_cwd() -> Result<PromptService> {
211    let cwd = std::env::current_dir().map_err(|e| Error::OperationFailed {
212        operation: "get_current_dir".to_string(),
213        cause: e.to_string(),
214    })?;
215    Ok(prompt_service_for_repo(&cwd))
216}
217
218// ============================================================================
219// Service Container
220// ============================================================================
221
222/// Container for initialized services with configured backends.
223///
224/// Unlike the previous singleton design, this can be instantiated per-context
225/// with domain-scoped indices.
226///
227/// # `DomainIndexManager` Complexity
228///
229/// The `index_manager` field uses [`DomainIndexManager`] to provide multi-domain
230/// index support with lazy initialization. Key complexity points:
231///
232/// ## Architecture
233///
234/// ```text
235/// ServiceContainer
236///   └── Mutex<DomainIndexManager>
237///         ├── Project index (<user-data>/index.db) // faceted by project/branch/path
238///         ├── User index (<user-data>/index.db)    // user-wide
239///         └── Org index (configured path)          // optional
240/// ```
241///
242/// ## Lazy Initialization
243///
244/// Indices are created on-demand via `index_for_scope()`:
245/// 1. Lock the `Mutex<DomainIndexManager>`
246/// 2. Check if index exists for requested `DomainScope`
247/// 3. If missing, create `SQLite` database at scope-specific path
248/// 4. Return reference to the index
249///
250/// ## Thread Safety
251///
252/// - `Mutex` guards the manager, not individual indices
253/// - Each index has its own internal locking via `SqliteBackend`
254/// - Callers should minimize lock hold time
255///
256/// ## Path Resolution
257///
258/// | Scope | Path |
259/// |-------|------|
260/// | Project | `<user-data>/index.db` |
261/// | User | `<user-data>/index.db` |
262/// | Org | Configured via `OrgIndexConfig` |
263///
264/// ## Error Handling
265///
266/// - Missing repo returns `Error::OperationFailed`
267/// - `SQLite` initialization errors propagate as `Error::OperationFailed`
268/// - Index creation is idempotent (safe to call multiple times)
269pub struct ServiceContainer {
270    /// Capture service.
271    capture: CaptureService,
272    /// Sync service.
273    sync: SyncService,
274    /// Domain index manager for multi-domain indices.
275    ///
276    /// See struct-level documentation for complexity notes.
277    index_manager: Mutex<DomainIndexManager>,
278    /// Repository path (if known).
279    repo_path: Option<PathBuf>,
280    /// User data directory (from config, used for graph and other user-scoped data).
281    user_data_dir: PathBuf,
282    /// Shared embedder for both capture and recall.
283    embedder: Option<Arc<dyn Embedder>>,
284    /// Shared vector backend for both capture and recall.
285    vector: Option<Arc<dyn VectorBackend + Send + Sync>>,
286}
287
288impl ServiceContainer {
289    /// Creates a new service container for a repository.
290    ///
291    /// # Arguments
292    ///
293    /// * `repo_path` - Path to or within a git repository
294    /// * `org_config` - Optional organization index configuration
295    ///
296    /// # Errors
297    ///
298    /// Returns an error if the repository cannot be found or backends fail to initialize.
299    pub fn for_repo(
300        repo_path: impl Into<PathBuf>,
301        org_config: Option<OrgIndexConfig>,
302    ) -> Result<Self> {
303        let repo_path = repo_path.into();
304
305        // Find repository root
306        let repo_root = find_repo_root(&repo_path)?;
307
308        if org_config.is_some() {
309            let config = SubcogConfig::load_default().with_repo_path(&repo_root);
310            if !(config.features.org_scope_enabled || cfg!(feature = "org-scope")) {
311                tracing::warn!(
312                    "Org-scope config provided but org-scope is disabled. \
313                     Set SUBCOG_ORG_SCOPE_ENABLED=true or build with --features org-scope."
314                );
315                return Err(Error::FeatureNotEnabled("org-scope".to_string()));
316            }
317        }
318
319        // Load config FIRST to get user's configured data_dir (respects config.toml)
320        // This must happen before creating DomainIndexManager so all components use the same path.
321        let subcog_config = SubcogConfig::load_default();
322
323        let config = DomainIndexConfig {
324            repo_path: Some(repo_root.clone()),
325            org_config,
326            user_data_dir: Some(subcog_config.data_dir.clone()),
327        };
328
329        let index_manager = DomainIndexManager::new(config)?;
330
331        // Create CaptureService with repo_path for project-scoped storage
332        // Propagate auto_extract_entities from loaded config
333        let mut capture_config = crate::config::Config::new().with_repo_path(&repo_root);
334        capture_config.features.auto_extract_entities =
335            subcog_config.features.auto_extract_entities;
336        let user_data_dir = subcog_config.data_dir.clone();
337
338        std::fs::create_dir_all(&user_data_dir).map_err(|e| Error::OperationFailed {
339            operation: "create_user_data_dir".to_string(),
340            cause: format!(
341                "Cannot create {}: {}. Please create manually with: mkdir -p {}",
342                user_data_dir.display(),
343                e,
344                user_data_dir.display()
345            ),
346        })?;
347
348        // Create storage paths using user-level data directory (project facets)
349        let paths = PathManager::for_user(&user_data_dir);
350
351        // Create backends using factory (centralizes initialization logic)
352        let backends = BackendFactory::create_all(&paths.index_path(), &paths.vector_path());
353
354        // Build LLM provider for entity extraction with longer timeout (120s default)
355        let llm_provider = build_llm_provider_for_entity_extraction(&subcog_config);
356
357        // Create entity extraction callback if auto-extraction is enabled
358        let entity_extraction =
359            Self::create_entity_extraction_callback(&capture_config, &paths, llm_provider);
360
361        // Build CaptureService based on available backends
362        let capture = Self::build_capture_service(capture_config, &backends, entity_extraction);
363
364        Ok(Self {
365            capture,
366            sync: SyncService::default(),
367            index_manager: Mutex::new(index_manager),
368            repo_path: Some(repo_root),
369            user_data_dir,
370            embedder: backends.embedder,
371            vector: backends.vector,
372        })
373    }
374
375    /// Creates a service container from the current directory.
376    ///
377    /// # Errors
378    ///
379    /// Returns an error if not in a git repository.
380    pub fn from_current_dir() -> Result<Self> {
381        let cwd = std::env::current_dir().map_err(|e| Error::OperationFailed {
382            operation: "get_current_dir".to_string(),
383            cause: e.to_string(),
384        })?;
385
386        Self::for_repo(cwd, None)
387    }
388
389    /// Creates a service container for user-scoped storage.
390    ///
391    /// Used when operating outside a git repository. Stores memories in the
392    /// user's local data directory using `SQLite` persistence.
393    ///
394    /// # Storage Paths
395    ///
396    /// | Platform | Path |
397    /// |----------|------|
398    /// | macOS | `~/Library/Application Support/subcog/` |
399    /// | Linux | `~/.local/share/subcog/` |
400    /// | Windows | `C:\Users\<User>\AppData\Local\subcog\` |
401    ///
402    /// # Errors
403    ///
404    /// Returns an error if the user data directory cannot be created or
405    /// storage backends fail to initialize.
406    pub fn for_user() -> Result<Self> {
407        // Load config to get user's configured data_dir (respects config.toml)
408        let subcog_config = SubcogConfig::load_default();
409        let user_data_dir = subcog_config.data_dir.clone();
410
411        // Ensure user data directory exists
412        std::fs::create_dir_all(&user_data_dir).map_err(|e| Error::OperationFailed {
413            operation: "create_user_data_dir".to_string(),
414            cause: format!(
415                "Cannot create {}: {}. Please create manually with: mkdir -p {}",
416                user_data_dir.display(),
417                e,
418                user_data_dir.display()
419            ),
420        })?;
421
422        // Create storage paths using PathManager
423        let paths = PathManager::for_user(&user_data_dir);
424
425        // Create domain index config for user-only mode (no repo)
426        // Pass user_data_dir to ensure consistency with CaptureService paths
427        let config = DomainIndexConfig {
428            repo_path: None,
429            org_config: None,
430            user_data_dir: Some(user_data_dir.clone()),
431        };
432        let index_manager = DomainIndexManager::new(config)?;
433
434        // Create CaptureService WITHOUT repo_path (user scope)
435        // Propagate auto_extract_entities from loaded config
436        let mut capture_config = crate::config::Config::new();
437        capture_config.features.auto_extract_entities =
438            subcog_config.features.auto_extract_entities;
439
440        // Create backends using factory (centralizes initialization logic)
441        let backends = BackendFactory::create_all(&paths.index_path(), &paths.vector_path());
442
443        // Build LLM provider for entity extraction with longer timeout (120s default)
444        let llm_provider = build_llm_provider_for_entity_extraction(&subcog_config);
445
446        // Create entity extraction callback if auto-extraction is enabled
447        let entity_extraction =
448            Self::create_entity_extraction_callback(&capture_config, &paths, llm_provider);
449
450        // Build CaptureService based on available backends
451        let capture = Self::build_capture_service(capture_config, &backends, entity_extraction);
452
453        tracing::info!(
454            user_data_dir = %user_data_dir.display(),
455            "Created user-scoped service container"
456        );
457
458        Ok(Self {
459            capture,
460            sync: SyncService::no_op(),
461            index_manager: Mutex::new(index_manager),
462            repo_path: None,
463            user_data_dir,
464            embedder: backends.embedder,
465            vector: backends.vector,
466        })
467    }
468
469    /// Creates a service container from the current directory, falling back to user scope.
470    ///
471    /// This is the recommended factory method for CLI and MCP entry points:
472    /// - If in a git repository → uses project scope (user-level index + project facets)
473    /// - If NOT in a git repository → uses user scope (user-level index)
474    ///
475    /// # Examples
476    ///
477    /// ```rust,ignore
478    /// // Works in any directory
479    /// let container = ServiceContainer::from_current_dir_or_user()?;
480    ///
481    /// // In git repo: subcog://project/{namespace}/{id}
482    /// // Outside git: subcog://user/{namespace}/{id}
483    /// let result = container.capture().capture(request)?;
484    /// ```
485    ///
486    /// # Errors
487    ///
488    /// Returns an error only if both project and user scope fail to initialize.
489    pub fn from_current_dir_or_user() -> Result<Self> {
490        // Try project scope first
491        match Self::from_current_dir() {
492            Ok(container) => {
493                tracing::debug!("Using project-scoped service container");
494                Ok(container)
495            },
496            Err(e) => {
497                tracing::debug!(
498                    error = %e,
499                    "Not in git repository, falling back to user scope"
500                );
501                Self::for_user()
502            },
503        }
504    }
505
506    /// Returns whether this container is using user scope (no git repository).
507    #[must_use]
508    pub const fn is_user_scope(&self) -> bool {
509        self.repo_path.is_none()
510    }
511
512    /// Creates a recall service for a specific domain scope.
513    ///
514    /// The recall service is configured with:
515    /// - Index backend (`SQLite` FTS5) for text search
516    /// - Embedder for generating query embeddings (if available)
517    /// - Vector backend for similarity search (if available)
518    ///
519    /// # Errors
520    ///
521    /// Returns an error if the index cannot be initialized.
522    pub fn recall_for_scope(&self, scope: DomainScope) -> Result<RecallService> {
523        // Use high-level API that handles path resolution and directory creation
524        let index = {
525            let manager = self
526                .index_manager
527                .lock()
528                .map_err(|e| Error::OperationFailed {
529                    operation: "lock_index_manager".to_string(),
530                    cause: e.to_string(),
531                })?;
532            manager.create_backend(scope)?
533        }; // Lock released here
534
535        // Start with index-only service
536        let mut service = RecallService::with_index(index);
537
538        // Add embedder and vector backends if available
539        if let Some(ref embedder) = self.embedder {
540            service = service.with_embedder(Arc::clone(embedder));
541        }
542        if let Some(ref vector) = self.vector {
543            service = service.with_vector(Arc::clone(vector));
544        }
545
546        if matches!(scope, DomainScope::Project)
547            && let Some(filter) = self.project_scope_filter()
548        {
549            service = service.with_scope_filter(filter);
550        }
551
552        Ok(service)
553    }
554
555    fn project_scope_filter(&self) -> Option<SearchFilter> {
556        let repo_path = self.repo_path.as_ref()?;
557        let git_context = GitContext::from_path(repo_path);
558        git_context
559            .project_id
560            .map(|project_id| SearchFilter::new().with_project_id(project_id))
561    }
562
563    /// Creates a recall service for the appropriate scope.
564    ///
565    /// Uses user scope for user-scoped containers, project scope otherwise.
566    ///
567    /// # Errors
568    ///
569    /// Returns an error if the index cannot be initialized.
570    pub fn recall(&self) -> Result<RecallService> {
571        let scope = if self.is_user_scope() {
572            DomainScope::User
573        } else {
574            DomainScope::Project
575        };
576        self.recall_for_scope(scope)
577    }
578
579    /// Returns the capture service.
580    #[must_use]
581    pub const fn capture(&self) -> &CaptureService {
582        &self.capture
583    }
584
585    /// Returns the sync service.
586    #[must_use]
587    pub const fn sync(&self) -> &SyncService {
588        &self.sync
589    }
590
591    /// Returns the repository path.
592    #[must_use]
593    pub const fn repo_path(&self) -> Option<&PathBuf> {
594        self.repo_path.as_ref()
595    }
596
597    /// Returns a reference to the embedder if available.
598    #[must_use]
599    pub fn embedder(&self) -> Option<Arc<dyn Embedder>> {
600        self.embedder.clone()
601    }
602
603    /// Returns a reference to the vector backend if available.
604    #[must_use]
605    pub fn vector(&self) -> Option<Arc<dyn VectorBackend + Send + Sync>> {
606        self.vector.clone()
607    }
608
609    /// Creates an index backend for the project scope.
610    ///
611    /// # Errors
612    ///
613    /// Returns an error if the index cannot be initialized.
614    pub fn index(&self) -> Result<SqliteBackend> {
615        let manager = self
616            .index_manager
617            .lock()
618            .map_err(|e| Error::OperationFailed {
619                operation: "lock_index_manager".to_string(),
620                cause: e.to_string(),
621            })?;
622        manager.create_backend(DomainScope::Project)
623    }
624
625    /// Builds a `CaptureService` from available backends.
626    ///
627    /// Applies graceful degradation: uses whatever backends are available.
628    ///
629    /// # Arguments
630    ///
631    /// * `config` - The capture configuration
632    /// * `backends` - Available storage backends
633    /// * `entity_extraction` - Optional callback for entity extraction (Graph RAG)
634    fn build_capture_service(
635        config: crate::config::Config,
636        backends: &BackendSet,
637        entity_extraction: Option<capture::EntityExtractionCallback>,
638    ) -> CaptureService {
639        let mut service = CaptureService::new(config);
640
641        // Add embedder if available
642        if let Some(ref embedder) = backends.embedder {
643            service = service.with_embedder(Arc::clone(embedder));
644        }
645
646        // Add index backend if available
647        if let Some(ref index) = backends.index {
648            service = service.with_index(Arc::clone(index));
649        }
650
651        // Add vector backend if available
652        if let Some(ref vector) = backends.vector {
653            service = service.with_vector(Arc::clone(vector));
654        }
655
656        // Add entity extraction callback if provided
657        if let Some(callback) = entity_extraction {
658            service = service.with_entity_extraction(callback);
659        }
660
661        service
662    }
663
664    /// Creates an entity extraction callback if auto-extraction is enabled.
665    ///
666    /// The callback:
667    /// 1. Extracts entities from the memory content using [`EntityExtractorService`]
668    /// 2. Stores entities and relationships in the [`GraphService`]
669    /// 3. Records mentions linking memories to entities
670    ///
671    /// # Arguments
672    ///
673    /// * `config` - The capture configuration (checked for `auto_extract_entities` flag)
674    /// * `paths` - Path manager for locating the graph database
675    /// * `llm` - Optional LLM provider for intelligent extraction
676    ///
677    /// # Returns
678    ///
679    /// `Some(callback)` if auto-extraction is enabled and graph backend initializes,
680    /// `None` otherwise (graceful degradation).
681    #[allow(clippy::excessive_nesting)] // Callback closures require nested scopes
682    fn create_entity_extraction_callback(
683        config: &crate::config::Config,
684        paths: &PathManager,
685        llm: Option<Arc<dyn crate::llm::LlmProvider>>,
686    ) -> Option<capture::EntityExtractionCallback> {
687        // Check if auto-extraction is enabled
688        if !config.features.auto_extract_entities {
689            return None;
690        }
691
692        // Create graph backend (gracefully degrade if it fails)
693        let graph_path = paths.graph_path();
694        let graph_backend = match crate::storage::graph::SqliteGraphBackend::new(&graph_path) {
695            Ok(backend) => backend,
696            Err(e) => {
697                tracing::warn!(
698                    error = %e,
699                    "Failed to create graph backend for entity extraction, disabling"
700                );
701                return None;
702            },
703        };
704
705        // Create services wrapped in Arc for sharing
706        let graph_service = Arc::new(GraphService::new(graph_backend));
707        let domain = crate::models::Domain::new(); // Default domain for extraction
708
709        let entity_extractor = if let Some(llm) = llm {
710            Arc::new(EntityExtractorService::with_shared_llm(llm, domain))
711        } else {
712            Arc::new(EntityExtractorService::without_llm(domain))
713        };
714
715        // Create the callback that captures the services
716        let callback: capture::EntityExtractionCallback = Arc::new(move |content, memory_id| {
717            use crate::models::graph::{Entity, EntityType, Relationship, RelationshipType};
718            use std::collections::HashMap;
719
720            let mut stats = capture::EntityExtractionStats::default();
721
722            // Extract entities from content
723            let extraction = entity_extractor.extract(content)?;
724            stats.used_fallback = extraction.used_fallback;
725
726            // Map entity names to IDs for relationship resolution
727            let mut name_to_id: HashMap<String, crate::models::graph::EntityId> = HashMap::new();
728
729            // Store entities in graph
730            for extracted in &extraction.entities {
731                // Parse entity type, defaulting to Concept if unknown
732                let entity_type =
733                    EntityType::parse(&extracted.entity_type).unwrap_or(EntityType::Concept);
734
735                // Create the Entity from ExtractedEntity
736                let entity =
737                    Entity::new(entity_type, &extracted.name, crate::models::Domain::new())
738                        .with_confidence(extracted.confidence)
739                        .with_aliases(extracted.aliases.iter().cloned());
740
741                // Store entity with deduplication (returns actual ID, existing or new)
742                match graph_service.store_entity_deduped(&entity) {
743                    Ok(actual_id) => {
744                        stats.entities_stored += 1;
745
746                        // Track name to ID mapping for relationship resolution
747                        // Use the actual ID returned (may be existing entity's ID)
748                        name_to_id.insert(extracted.name.clone(), actual_id.clone());
749                        for alias in &extracted.aliases {
750                            name_to_id.insert(alias.clone(), actual_id.clone());
751                        }
752
753                        // Record mention linking memory to entity
754                        if let Err(e) = graph_service.record_mention(&actual_id, memory_id) {
755                            tracing::debug!(
756                                memory_id = %memory_id,
757                                entity_id = %actual_id.as_ref(),
758                                error = %e,
759                                "Failed to record entity mention"
760                            );
761                        }
762                    },
763                    Err(e) => {
764                        tracing::debug!(
765                            entity_name = %extracted.name,
766                            error = %e,
767                            "Failed to store entity"
768                        );
769                    },
770                }
771            }
772
773            // Store relationships in graph
774            for extracted_rel in &extraction.relationships {
775                // Look up entity IDs by name - skip if either entity not found
776                let (Some(from), Some(to)) = (
777                    name_to_id.get(&extracted_rel.from),
778                    name_to_id.get(&extracted_rel.to),
779                ) else {
780                    tracing::debug!(
781                        from = %extracted_rel.from,
782                        to = %extracted_rel.to,
783                        "Skipping relationship: one or both entities not found"
784                    );
785                    continue;
786                };
787
788                // Parse relationship type, defaulting to RelatesTo if unknown
789                let rel_type = RelationshipType::parse(&extracted_rel.relationship_type)
790                    .unwrap_or(RelationshipType::RelatesTo);
791
792                let relationship = Relationship::new(from.clone(), to.clone(), rel_type)
793                    .with_confidence(extracted_rel.confidence);
794
795                if let Err(e) = graph_service.store_relationship(&relationship) {
796                    tracing::debug!(
797                        from = %extracted_rel.from,
798                        to = %extracted_rel.to,
799                        error = %e,
800                        "Failed to store relationship"
801                    );
802                } else {
803                    stats.relationships_stored += 1;
804                }
805            }
806
807            Ok(stats)
808        });
809
810        Some(callback)
811    }
812
813    /// Creates a deduplication service without embedding support.
814    ///
815    /// This variant supports:
816    /// - Exact match (SHA256 hash comparison)
817    /// - Recent capture (LRU cache with TTL)
818    ///
819    /// For full semantic similarity support, create a `DeduplicationService`
820    /// directly with an embedder and vector backend.
821    ///
822    /// # Errors
823    ///
824    /// Returns an error if the recall service cannot be initialized.
825    pub fn deduplication(
826        &self,
827    ) -> Result<
828        deduplication::DeduplicationService<
829            crate::embedding::FastEmbedEmbedder,
830            crate::storage::vector::UsearchBackend,
831        >,
832    > {
833        let recall = std::sync::Arc::new(self.recall()?);
834        let config = deduplication::DeduplicationConfig::from_env();
835        Ok(deduplication::DeduplicationService::without_embeddings(
836            recall, config,
837        ))
838    }
839
840    /// Creates a deduplication service with custom configuration.
841    ///
842    /// # Arguments
843    ///
844    /// * `config` - Custom deduplication configuration
845    ///
846    /// # Errors
847    ///
848    /// Returns an error if the recall service cannot be initialized.
849    pub fn deduplication_with_config(
850        &self,
851        config: deduplication::DeduplicationConfig,
852    ) -> Result<
853        deduplication::DeduplicationService<
854            crate::embedding::FastEmbedEmbedder,
855            crate::storage::vector::UsearchBackend,
856        >,
857    > {
858        let recall = std::sync::Arc::new(self.recall()?);
859        Ok(deduplication::DeduplicationService::without_embeddings(
860            recall, config,
861        ))
862    }
863
864    /// Creates a data subject service for GDPR operations.
865    ///
866    /// Provides:
867    /// - `export_user_data()` - Export all user data (GDPR Article 20)
868    /// - `delete_user_data()` - Delete all user data (GDPR Article 17)
869    ///
870    /// # Errors
871    ///
872    /// Returns an error if the index backend cannot be initialized.
873    pub fn data_subject(&self) -> Result<DataSubjectService> {
874        let index = self.index()?;
875        let mut service = DataSubjectService::new(index);
876        if let Some(ref vector) = self.vector {
877            service = service.with_vector(Arc::clone(vector));
878        }
879        Ok(service)
880    }
881
882    /// Gets the index path for a domain scope.
883    ///
884    /// # Errors
885    ///
886    /// Returns an error if the path cannot be determined.
887    pub fn get_index_path(&self, scope: DomainScope) -> Result<PathBuf> {
888        let manager = self
889            .index_manager
890            .lock()
891            .map_err(|e| Error::OperationFailed {
892                operation: "lock_index_manager".to_string(),
893                cause: e.to_string(),
894            })?;
895        manager.get_index_path(scope)
896    }
897
898    /// Rebuilds the FTS index from `SQLite` data for a specific scope.
899    ///
900    /// Since `SQLite` is the authoritative storage, this function reads all memories
901    /// from the `SQLite` database and rebuilds the FTS5 full-text search index.
902    ///
903    /// # Arguments
904    ///
905    /// * `scope` - The domain scope to reindex
906    ///
907    /// # Returns
908    ///
909    /// The number of memories indexed.
910    ///
911    /// # Errors
912    ///
913    /// Returns an error if reading or indexing fails.
914    pub fn reindex_scope(&self, scope: DomainScope) -> Result<usize> {
915        use crate::models::SearchFilter;
916
917        // Create index backend using high-level API
918        let index = {
919            let manager = self
920                .index_manager
921                .lock()
922                .map_err(|e| Error::OperationFailed {
923                    operation: "lock_index_manager".to_string(),
924                    cause: e.to_string(),
925                })?;
926            manager.create_backend(scope)?
927        };
928
929        // Get all memory IDs from SQLite
930        let filter = SearchFilter::default();
931        let all_ids = index.list_all(&filter, usize::MAX)?;
932
933        if all_ids.is_empty() {
934            return Ok(0);
935        }
936
937        // Get full memories
938        let ids: Vec<MemoryId> = all_ids.into_iter().map(|(id, _)| id).collect();
939        let memories: Vec<Memory> = index
940            .get_memories_batch(&ids)?
941            .into_iter()
942            .flatten()
943            .collect();
944
945        if memories.is_empty() {
946            return Ok(0);
947        }
948
949        // Clear FTS and rebuild
950        index.clear()?;
951        let count = memories.len();
952        index.reindex(&memories)?;
953
954        Ok(count)
955    }
956
957    /// Reindexes memories for the project scope (default).
958    ///
959    /// # Errors
960    ///
961    /// Returns an error if notes cannot be read or indexing fails.
962    pub fn reindex(&self) -> Result<usize> {
963        self.reindex_scope(DomainScope::Project)
964    }
965
966    /// Reindexes all domain scopes.
967    ///
968    /// # Returns
969    ///
970    /// A map of scope to count of indexed memories.
971    ///
972    /// # Errors
973    ///
974    /// Returns an error if any scope fails to reindex.
975    pub fn reindex_all(&self) -> Result<std::collections::HashMap<DomainScope, usize>> {
976        let mut results = std::collections::HashMap::new();
977
978        for scope in [DomainScope::Project, DomainScope::User, DomainScope::Org] {
979            match self.reindex_scope(scope) {
980                Ok(count) => {
981                    results.insert(scope, count);
982                },
983                Err(e) => {
984                    tracing::warn!("Failed to reindex scope {:?}: {e}", scope);
985                },
986            }
987        }
988
989        Ok(results)
990    }
991
992    /// Creates a graph service for knowledge graph operations.
993    ///
994    /// The graph service stores entities and relationships in a dedicated
995    /// `SQLite` database (`graph.db`) in the user data directory.
996    ///
997    /// # Errors
998    ///
999    /// Returns an error if the graph backend cannot be initialized.
1000    ///
1001    /// # Example
1002    ///
1003    /// ```rust,ignore
1004    /// let container = ServiceContainer::from_current_dir_or_user()?;
1005    /// let graph = container.graph()?;
1006    ///
1007    /// let entity = graph.store_entity(Entity::new(EntityType::Technology, "Rust", domain))?;
1008    /// ```
1009    pub fn graph(&self) -> Result<GraphService<crate::storage::graph::SqliteGraphBackend>> {
1010        use crate::storage::graph::SqliteGraphBackend;
1011
1012        // Use the configured user_data_dir (respects config.toml data_dir setting)
1013        let paths = PathManager::for_user(&self.user_data_dir);
1014        let graph_path = paths.graph_path();
1015
1016        let backend = SqliteGraphBackend::new(&graph_path).map_err(|e| Error::OperationFailed {
1017            operation: "create_graph_backend".to_string(),
1018            cause: e.to_string(),
1019        })?;
1020
1021        Ok(GraphService::new(backend))
1022    }
1023
1024    /// Creates an entity extractor service for extracting entities from text.
1025    ///
1026    /// The extractor uses pattern-based fallback when no LLM is provided.
1027    /// For LLM-powered extraction, use [`Self::entity_extractor_with_llm`].
1028    ///
1029    /// # Returns
1030    ///
1031    /// An [`EntityExtractorService`] configured for the appropriate domain.
1032    #[must_use]
1033    pub fn entity_extractor(&self) -> EntityExtractorService {
1034        let domain = self.current_domain();
1035        EntityExtractorService::without_llm(domain)
1036    }
1037
1038    /// Creates an entity extractor service with LLM support.
1039    ///
1040    /// The extractor uses the provided LLM for intelligent entity extraction.
1041    /// Falls back to pattern-based extraction if LLM calls fail.
1042    ///
1043    /// # Arguments
1044    ///
1045    /// * `llm` - The LLM provider to use for extraction.
1046    ///
1047    /// # Returns
1048    ///
1049    /// An [`EntityExtractorService`] configured with LLM support.
1050    pub fn entity_extractor_with_llm(
1051        &self,
1052        llm: Arc<dyn crate::llm::LlmProvider>,
1053    ) -> EntityExtractorService {
1054        let domain = self.current_domain();
1055        EntityExtractorService::with_shared_llm(llm, domain)
1056    }
1057
1058    /// Returns the current domain based on scope.
1059    ///
1060    /// - If in a git repository: returns project-scoped domain (`Domain::new()`)
1061    /// - If NOT in a git repository: returns user-scoped domain (`Domain::for_user()`)
1062    fn current_domain(&self) -> crate::models::Domain {
1063        if self.repo_path.is_some() {
1064            // Project scope: uses user-level storage with project facets
1065            crate::models::Domain::new()
1066        } else {
1067            // User scope: uses user-level storage without project facets
1068            crate::models::Domain::for_user()
1069        }
1070    }
1071
1072    /// Creates a webhook service for event notifications.
1073    ///
1074    /// The webhook service subscribes to memory events and delivers them to
1075    /// configured webhook endpoints. Configuration is loaded from
1076    /// `~/.config/subcog/webhooks.yaml`.
1077    ///
1078    /// Returns `Ok(None)` if no webhooks are configured.
1079    ///
1080    /// # Errors
1081    ///
1082    /// Returns an error if the configuration is invalid or the audit database
1083    /// cannot be created.
1084    ///
1085    /// # Example
1086    ///
1087    /// ```rust,ignore
1088    /// let container = ServiceContainer::from_current_dir_or_user()?;
1089    /// if let Some(webhook_service) = container.webhook_service()? {
1090    ///     // Start webhook dispatcher as background task
1091    ///     let _handle = webhook_service.start();
1092    /// }
1093    /// ```
1094    pub fn webhook_service(&self) -> Result<Option<crate::webhooks::WebhookService>> {
1095        let scope = if self.is_user_scope() {
1096            crate::storage::index::DomainScope::User
1097        } else {
1098            crate::storage::index::DomainScope::Project
1099        };
1100
1101        let user_data_dir = get_user_data_dir()?;
1102        crate::webhooks::WebhookService::from_config_file(scope, &user_data_dir)
1103    }
1104}