Skip to main content

subcog/llm/
mod.rs

1//! LLM client abstraction (DOC-H3).
2//!
3//! Provides a unified interface for different LLM providers including
4//! Anthropic Claude, `OpenAI` GPT, Ollama (local), and LM Studio (local).
5//!
6//! # Supported Providers
7//!
8//! | Provider | Client | Environment Variables |
9//! |----------|--------|----------------------|
10//! | Anthropic | [`AnthropicClient`] | `ANTHROPIC_API_KEY` |
11//! | `OpenAI` | [`OpenAiClient`] | `OPENAI_API_KEY` |
12//! | Ollama | [`OllamaClient`] | `OLLAMA_HOST`, `OLLAMA_MODEL` |
13//! | LM Studio | [`LmStudioClient`] | `LMSTUDIO_ENDPOINT`, `LMSTUDIO_MODEL` |
14//!
15//! # Usage Examples
16//!
17//! ## Basic Completion
18//!
19//! ```rust,ignore
20//! use subcog::llm::{LlmProvider, AnthropicClient};
21//!
22//! let client = AnthropicClient::new();
23//! let response = client.complete("Explain Rust ownership in one sentence")?;
24//! println!("{response}");
25//! ```
26//!
27//! ## With System Prompt
28//!
29//! ```rust,ignore
30//! use subcog::llm::{LlmProvider, OpenAiClient};
31//!
32//! let client = OpenAiClient::new();
33//! let response = client.complete_with_system(
34//!     "You are a helpful coding assistant.",
35//!     "How do I parse JSON in Rust?"
36//! )?;
37//! ```
38//!
39//! ## Capture Analysis
40//!
41//! ```rust,ignore
42//! use subcog::llm::{LlmProvider, OllamaClient};
43//!
44//! let client = OllamaClient::new();
45//! let analysis = client.analyze_for_capture(
46//!     "Decision: Use PostgreSQL for the primary database due to JSONB support"
47//! )?;
48//!
49//! if analysis.should_capture && analysis.confidence > 0.8 {
50//!     println!("Suggested namespace: {:?}", analysis.suggested_namespace);
51//!     println!("Suggested tags: {:?}", analysis.suggested_tags);
52//! }
53//! ```
54//!
55//! ## Resilient Provider with Circuit Breaker
56//!
57//! ```rust,ignore
58//! use subcog::llm::{LlmProvider, AnthropicClient, ResilientLlmProvider, LlmResilienceConfig};
59//!
60//! let base_client = AnthropicClient::new();
61//! let resilient = ResilientLlmProvider::new(
62//!     Box::new(base_client),
63//!     LlmResilienceConfig::default()
64//! );
65//!
66//! // Automatically retries with exponential backoff and circuit breaker
67//! let response = resilient.complete("Hello world")?;
68//! ```
69//!
70//! # Configuration
71//!
72//! Providers can be configured via environment variables or the config file:
73//!
74//! ```toml
75//! [llm]
76//! provider = "anthropic"  # or "openai", "ollama", "lmstudio"
77//! model = "claude-sonnet-4-20250514"
78//! timeout_ms = 30000
79//! max_retries = 3
80//! ```
81//!
82//! # Implementing a New Provider
83//!
84//! To add a new LLM provider:
85//!
86//! 1. Create a new module (e.g., `src/llm/newprovider.rs`)
87//! 2. Implement the [`LlmProvider`] trait
88//! 3. Export the client from this module
89//!
90//! ## Required Trait Methods
91//!
92//! | Method | Purpose |
93//! |--------|---------|
94//! | [`LlmProvider::name`] | Return provider identifier (e.g., "anthropic") |
95//! | [`LlmProvider::complete`] | Generate completion for a prompt |
96//! | [`LlmProvider::analyze_for_capture`] | Analyze content for memory capture |
97//!
98//! ## Optional Methods (with defaults)
99//!
100//! | Method | Default Behavior |
101//! |--------|------------------|
102//! | `complete_with_system` | Concatenates system and user prompts |
103//! | `analyze_for_capture_extended` | Uses unified system prompt |
104//! | `classify_search_intent` | Uses unified system prompt |
105//! | `analyze_for_consolidation` | Uses unified system prompt |
106//!
107//! ## Example Implementation
108//!
109//! ```rust,ignore
110//! use subcog::llm::{LlmProvider, CaptureAnalysis};
111//! use subcog::Result;
112//!
113//! pub struct MyProvider {
114//!     api_key: String,
115//!     model: String,
116//! }
117//!
118//! impl LlmProvider for MyProvider {
119//!     fn name(&self) -> &'static str {
120//!         "myprovider"
121//!     }
122//!
123//!     fn complete(&self, prompt: &str) -> Result<String> {
124//!         // Make API call to your provider
125//!         todo!()
126//!     }
127//!
128//!     fn analyze_for_capture(&self, content: &str) -> Result<CaptureAnalysis> {
129//!         // Use CAPTURE_ANALYSIS_PROMPT or custom prompt
130//!         let prompt = format!(
131//!             "{}\n\nContent: {content}",
132//!             subcog::llm::CAPTURE_ANALYSIS_PROMPT
133//!         );
134//!         let response = self.complete(&prompt)?;
135//!         // Parse JSON response into CaptureAnalysis
136//!         todo!()
137//!     }
138//! }
139//! ```
140//!
141//! ## HTTP Client Guidelines
142//!
143//! Use [`build_http_client`] with [`LlmHttpConfig`] for consistent timeout handling:
144//!
145//! ```rust,ignore
146//! use subcog::llm::{build_http_client, LlmHttpConfig};
147//!
148//! let config = LlmHttpConfig::from_env();
149//! let client = build_http_client(config);
150//! ```
151
152mod anthropic;
153mod lmstudio;
154mod ollama;
155mod openai;
156mod resilience;
157pub mod system_prompt;
158
159pub use anthropic::AnthropicClient;
160pub use lmstudio::LmStudioClient;
161pub use ollama::OllamaClient;
162pub use openai::OpenAiClient;
163pub use resilience::{LlmResilienceConfig, ResilientLlmProvider};
164pub use system_prompt::{
165    ArchiveCandidate, BASE_SYSTEM_PROMPT, CAPTURE_ANALYSIS_PROMPT, CONSOLIDATION_PROMPT,
166    ConsolidationAnalysis, ContradictionAssessment, ContradictionDetail, ENRICHMENT_PROMPT,
167    ExtendedCaptureAnalysis, ExtendedSearchIntent, MEMORY_SUMMARIZATION_PROMPT, MergeCandidate,
168    OperationMode, SEARCH_INTENT_PROMPT, SecurityAssessment, build_system_prompt,
169    build_system_prompt_with_config,
170};
171
172use crate::Result;
173use crate::security::{ContentRedactor, RedactionConfig};
174use std::sync::LazyLock;
175use std::time::Duration;
176
177/// Trait for LLM providers.
178pub trait LlmProvider: Send + Sync {
179    /// The provider name.
180    fn name(&self) -> &'static str;
181
182    /// Generates a completion for the given prompt.
183    ///
184    /// # Errors
185    ///
186    /// Returns an error if the completion fails.
187    fn complete(&self, prompt: &str) -> Result<String>;
188
189    /// Generates a completion with a system prompt.
190    ///
191    /// # Errors
192    ///
193    /// Returns an error if the completion fails.
194    ///
195    /// Default implementation concatenates system and user prompts.
196    /// Providers should override this to use native system prompt support.
197    fn complete_with_system(&self, system: &str, user: &str) -> Result<String> {
198        let combined = format!("{system}\n\n---\n\nUser message:\n{user}");
199        self.complete(&combined)
200    }
201
202    /// Analyzes content for memory capture.
203    ///
204    /// # Errors
205    ///
206    /// Returns an error if analysis fails.
207    fn analyze_for_capture(&self, content: &str) -> Result<CaptureAnalysis>;
208
209    /// Analyzes content for memory capture with extended security analysis.
210    ///
211    /// Uses the unified subcog system prompt for comprehensive analysis
212    /// including adversarial detection and contradiction checking.
213    ///
214    /// # Arguments
215    ///
216    /// * `content` - The content to analyze.
217    /// * `existing_memories` - Optional context of existing memories for contradiction detection.
218    ///
219    /// # Errors
220    ///
221    /// Returns an error if analysis fails.
222    fn analyze_for_capture_extended(
223        &self,
224        content: &str,
225        existing_memories: Option<&str>,
226    ) -> Result<ExtendedCaptureAnalysis> {
227        let system = build_system_prompt(OperationMode::CaptureAnalysis, existing_memories);
228        let user = format!("Analyze this content for capture:\n\n{content}");
229        let response = self.complete_with_system(&system, &user)?;
230        parse_extended_capture_analysis(&response)
231    }
232
233    /// Classifies search intent with namespace weights.
234    ///
235    /// Uses the unified subcog system prompt for intent classification
236    /// with enhanced topic extraction and namespace weighting.
237    ///
238    /// # Errors
239    ///
240    /// Returns an error if classification fails.
241    fn classify_search_intent(&self, prompt: &str) -> Result<ExtendedSearchIntent> {
242        let system = build_system_prompt(OperationMode::SearchIntent, None);
243        let user = format!("Classify the search intent of this prompt:\n\n{prompt}");
244        let response = self.complete_with_system(&system, &user)?;
245        parse_extended_search_intent(&response)
246    }
247
248    /// Analyzes memories for consolidation.
249    ///
250    /// Uses the unified subcog system prompt to identify merge candidates,
251    /// archive candidates, and contradictions.
252    ///
253    /// # Arguments
254    ///
255    /// * `memories` - JSON array of memories to analyze.
256    ///
257    /// # Errors
258    ///
259    /// Returns an error if analysis fails.
260    fn analyze_for_consolidation(&self, memories: &str) -> Result<ConsolidationAnalysis> {
261        let system = build_system_prompt(OperationMode::Consolidation, None);
262        let user = format!("Analyze these memories for consolidation:\n\n{memories}");
263        let response = self.complete_with_system(&system, &user)?;
264        parse_consolidation_analysis(&response)
265    }
266}
267
268/// Analysis result for content capture.
269#[derive(Debug, Clone)]
270pub struct CaptureAnalysis {
271    /// Whether the content should be captured.
272    pub should_capture: bool,
273    /// Confidence score (0.0 to 1.0).
274    pub confidence: f32,
275    /// Suggested namespace.
276    pub suggested_namespace: Option<String>,
277    /// Suggested tags.
278    pub suggested_tags: Vec<String>,
279    /// Reasoning for the decision.
280    pub reasoning: String,
281}
282
283/// HTTP client configuration for LLM providers.
284#[derive(Debug, Clone, Copy)]
285pub struct LlmHttpConfig {
286    /// Request timeout in milliseconds (0 to disable).
287    pub timeout_ms: u64,
288    /// Connect timeout in milliseconds (0 to disable).
289    pub connect_timeout_ms: u64,
290}
291
292const MAX_LLM_ERROR_RESPONSE_CHARS: usize = 200;
293static LLM_ERROR_REDACTOR: LazyLock<ContentRedactor> =
294    LazyLock::new(|| ContentRedactor::with_config(RedactionConfig::new().with_pii()));
295
296impl Default for LlmHttpConfig {
297    fn default() -> Self {
298        Self {
299            timeout_ms: 30_000,
300            connect_timeout_ms: 3_000,
301        }
302    }
303}
304
305impl LlmHttpConfig {
306    /// Loads HTTP configuration from environment variables.
307    #[must_use]
308    pub fn from_env() -> Self {
309        Self::default().with_env_overrides()
310    }
311
312    /// Loads HTTP configuration from config file settings.
313    #[must_use]
314    pub fn from_config(config: &crate::config::LlmConfig) -> Self {
315        let mut settings = Self::default();
316        if let Some(timeout_ms) = config.timeout_ms {
317            settings.timeout_ms = timeout_ms;
318        }
319        if let Some(connect_timeout_ms) = config.connect_timeout_ms {
320            settings.connect_timeout_ms = connect_timeout_ms;
321        }
322        settings
323    }
324
325    /// Applies environment variable overrides.
326    #[must_use]
327    pub fn with_env_overrides(mut self) -> Self {
328        if let Some(timeout_ms) = std::env::var("SUBCOG_LLM_TIMEOUT_MS")
329            .ok()
330            .and_then(|v| v.parse::<u64>().ok())
331        {
332            self.timeout_ms = timeout_ms;
333        }
334        if let Some(connect_timeout_ms) = std::env::var("SUBCOG_LLM_CONNECT_TIMEOUT_MS")
335            .ok()
336            .and_then(|v| v.parse::<u64>().ok())
337        {
338            self.connect_timeout_ms = connect_timeout_ms;
339        }
340        self
341    }
342}
343
344/// Builds a blocking HTTP client for LLM requests with configured timeouts.
345#[must_use]
346pub fn build_http_client(config: LlmHttpConfig) -> reqwest::blocking::Client {
347    let mut builder = reqwest::blocking::Client::builder();
348    if config.timeout_ms > 0 {
349        builder = builder.timeout(Duration::from_millis(config.timeout_ms));
350    }
351    if config.connect_timeout_ms > 0 {
352        builder = builder.connect_timeout(Duration::from_millis(config.connect_timeout_ms));
353    }
354
355    builder.build().unwrap_or_else(|err| {
356        tracing::warn!("Failed to build LLM HTTP client: {err}");
357        let mut fallback = reqwest::blocking::Client::builder();
358        if config.timeout_ms > 0 {
359            fallback = fallback.timeout(Duration::from_millis(config.timeout_ms));
360        }
361        if config.connect_timeout_ms > 0 {
362            fallback = fallback.connect_timeout(Duration::from_millis(config.connect_timeout_ms));
363        }
364        fallback.build().unwrap_or_else(|fallback_err| {
365            tracing::warn!("Failed to build LLM HTTP fallback client: {fallback_err}");
366            reqwest::blocking::Client::new()
367        })
368    })
369}
370
371pub(crate) fn sanitize_llm_response_for_error(response: &str) -> String {
372    let redacted = LLM_ERROR_REDACTOR.redact(response);
373    if redacted.chars().count() > MAX_LLM_ERROR_RESPONSE_CHARS {
374        let truncated: String = redacted
375            .chars()
376            .take(MAX_LLM_ERROR_RESPONSE_CHARS)
377            .collect();
378        format!("{truncated}...(truncated)")
379    } else {
380        redacted
381    }
382}
383
384/// Parses an extended capture analysis response from LLM output.
385///
386/// Handles various JSON formats and extracts from markdown code blocks.
387fn parse_extended_capture_analysis(response: &str) -> Result<ExtendedCaptureAnalysis> {
388    let json_str = extract_json_from_response(response);
389    let sanitized = sanitize_llm_response_for_error(response);
390    serde_json::from_str(json_str).map_err(|e| crate::Error::OperationFailed {
391        operation: "parse_extended_capture_analysis".to_string(),
392        cause: format!("Invalid JSON: {e}. Response: {sanitized}"),
393    })
394}
395
396/// Parses an extended search intent response from LLM output.
397fn parse_extended_search_intent(response: &str) -> Result<ExtendedSearchIntent> {
398    let json_str = extract_json_from_response(response);
399    let sanitized = sanitize_llm_response_for_error(response);
400    serde_json::from_str(json_str).map_err(|e| crate::Error::OperationFailed {
401        operation: "parse_extended_search_intent".to_string(),
402        cause: format!("Invalid JSON: {e}. Response: {sanitized}"),
403    })
404}
405
406/// Parses a consolidation analysis response from LLM output.
407fn parse_consolidation_analysis(response: &str) -> Result<ConsolidationAnalysis> {
408    let json_str = extract_json_from_response(response);
409    let sanitized = sanitize_llm_response_for_error(response);
410    serde_json::from_str(json_str).map_err(|e| crate::Error::OperationFailed {
411        operation: "parse_consolidation_analysis".to_string(),
412        cause: format!("Invalid JSON: {e}. Response: {sanitized}"),
413    })
414}
415
416/// Extracts JSON from LLM response, handling markdown code blocks (CQ-H2).
417///
418/// This is a centralized utility for extracting JSON from LLM responses that may
419/// include markdown formatting, prose, or other surrounding text.
420///
421/// # Handling
422///
423/// 1. Markdown code blocks with `json` language marker
424/// 2. Markdown code blocks without language marker
425/// 3. Raw JSON objects (first `{` to last `}`)
426/// 4. JSON arrays (first `[` to last `]`)
427///
428/// # Arguments
429///
430/// * `response` - The raw LLM response text
431///
432/// # Returns
433///
434/// The extracted JSON string, or the trimmed input if no JSON found
435#[must_use]
436pub fn extract_json_from_response(response: &str) -> &str {
437    let trimmed = response.trim();
438
439    // Handle ```json ... ``` blocks
440    if let Some((json_start, end)) = trimmed.find("```json").and_then(|start| {
441        let json_start = start + 7;
442        trimmed[json_start..]
443            .find("```")
444            .map(|end| (json_start, end))
445    }) {
446        return trimmed[json_start..json_start + end].trim();
447    }
448
449    // Handle ``` ... ``` blocks (without json marker)
450    if let Some((json_start, end)) = trimmed.find("```").and_then(|start| {
451        let content_start = start + 3;
452        // Skip language identifier if present (e.g., "json\n")
453        let after_marker = &trimmed[content_start..];
454        let json_start = after_marker
455            .find('{')
456            .map_or(content_start, |pos| content_start + pos);
457        trimmed[json_start..]
458            .find("```")
459            .map(|end| (json_start, end))
460    }) {
461        return trimmed[json_start..json_start + end].trim();
462    }
463
464    // Handle raw JSON (find first { to last })
465    if let (Some(start), Some(end)) = (trimmed.find('{'), trimmed.rfind('}')) {
466        return &trimmed[start..=end];
467    }
468
469    // Handle JSON array (for enrichment responses)
470    if let (Some(start), Some(end)) = (trimmed.find('['), trimmed.rfind(']')) {
471        return &trimmed[start..=end];
472    }
473
474    trimmed
475}
476
477#[cfg(test)]
478mod tests {
479    use super::*;
480
481    #[test]
482    fn test_extract_json_raw() {
483        let response = r#"{"key": "value"}"#;
484        let json = extract_json_from_response(response);
485        assert_eq!(json, r#"{"key": "value"}"#);
486    }
487
488    #[test]
489    fn test_extract_json_markdown() {
490        let response = "```json\n{\"key\": \"value\"}\n```";
491        let json = extract_json_from_response(response);
492        assert!(json.contains("\"key\""));
493    }
494
495    #[test]
496    fn test_extract_json_with_prefix() {
497        let response = "Here is the result: {\"key\": \"value\"} hope this helps";
498        let json = extract_json_from_response(response);
499        assert_eq!(json, r#"{"key": "value"}"#);
500    }
501
502    #[test]
503    fn test_extract_json_array() {
504        let response = r#"["tag1", "tag2", "tag3"]"#;
505        let json = extract_json_from_response(response);
506        assert_eq!(json, r#"["tag1", "tag2", "tag3"]"#);
507    }
508
509    #[test]
510    fn test_parse_extended_capture_analysis_success() {
511        let response = r#"{
512            "should_capture": true,
513            "confidence": 0.85,
514            "suggested_namespace": "decisions",
515            "suggested_tags": ["rust"],
516            "reasoning": "Clear decision",
517            "security_assessment": {
518                "injection_risk": 0.0,
519                "poisoning_risk": 0.0,
520                "social_engineering_risk": 0.0,
521                "flags": [],
522                "recommendation": "capture"
523            },
524            "contradiction_assessment": {
525                "has_contradictions": false,
526                "contradiction_risk": 0.0
527            }
528        }"#;
529
530        let result = parse_extended_capture_analysis(response);
531        assert!(result.is_ok());
532        let analysis = result.unwrap();
533        assert!(analysis.should_capture);
534        assert!((analysis.confidence - 0.85).abs() < f32::EPSILON);
535    }
536
537    #[test]
538    fn test_parse_extended_search_intent_success() {
539        let response = r#"{
540            "intent_type": "howto",
541            "confidence": 0.9,
542            "topics": ["authentication"],
543            "reasoning": "User asking how to implement",
544            "namespace_weights": {"patterns": 0.3}
545        }"#;
546
547        let result = parse_extended_search_intent(response);
548        assert!(result.is_ok());
549        let intent = result.unwrap();
550        assert_eq!(intent.intent_type, "howto");
551    }
552
553    #[test]
554    fn test_parse_consolidation_analysis_success() {
555        let response = r#"{
556            "merge_candidates": [],
557            "archive_candidates": [],
558            "contradictions": [],
559            "summary": "No consolidation needed"
560        }"#;
561
562        let result = parse_consolidation_analysis(response);
563        assert!(result.is_ok());
564        let analysis = result.unwrap();
565        assert!(analysis.merge_candidates.is_empty());
566    }
567}