Skip to main content

subcog/io/
validation.rs

1//! Import validation and normalization.
2//!
3//! Validates imported memory data and applies defaults before storage.
4
5use crate::models::{CaptureRequest, Domain, Namespace};
6use crate::services::deduplication::ContentHasher;
7
8use super::traits::ImportedMemory;
9
10/// Severity of a validation issue.
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum ValidationSeverity {
13    /// Warning: issue noted but import can proceed.
14    Warning,
15    /// Error: import of this record should be skipped.
16    Error,
17}
18
19/// A validation issue found during import.
20#[derive(Debug, Clone)]
21pub struct ValidationIssue {
22    /// The field that has an issue.
23    pub field: String,
24    /// Description of the issue.
25    pub message: String,
26    /// Severity of the issue.
27    pub severity: ValidationSeverity,
28}
29
30impl ValidationIssue {
31    /// Creates a warning issue.
32    #[must_use]
33    pub fn warning(field: impl Into<String>, message: impl Into<String>) -> Self {
34        Self {
35            field: field.into(),
36            message: message.into(),
37            severity: ValidationSeverity::Warning,
38        }
39    }
40
41    /// Creates an error issue.
42    #[must_use]
43    pub fn error(field: impl Into<String>, message: impl Into<String>) -> Self {
44        Self {
45            field: field.into(),
46            message: message.into(),
47            severity: ValidationSeverity::Error,
48        }
49    }
50}
51
52/// Result of validating an imported memory.
53#[derive(Debug, Clone)]
54pub struct ValidationResult {
55    /// Whether the memory is valid for import.
56    pub is_valid: bool,
57    /// Issues found during validation.
58    pub issues: Vec<ValidationIssue>,
59    /// Content hash for deduplication.
60    pub content_hash: String,
61}
62
63impl ValidationResult {
64    /// Creates a successful validation result.
65    #[must_use]
66    pub const fn valid(content_hash: String) -> Self {
67        Self {
68            is_valid: true,
69            issues: Vec::new(),
70            content_hash,
71        }
72    }
73
74    /// Creates a failed validation result.
75    #[must_use]
76    pub const fn invalid(issues: Vec<ValidationIssue>) -> Self {
77        Self {
78            is_valid: false,
79            issues,
80            content_hash: String::new(),
81        }
82    }
83
84    /// Adds a warning to the result.
85    #[must_use]
86    pub fn with_warning(mut self, field: impl Into<String>, message: impl Into<String>) -> Self {
87        self.issues.push(ValidationIssue::warning(field, message));
88        self
89    }
90}
91
92/// Validates and normalizes imported memory data.
93///
94/// Applies defaults for missing fields and validates required fields.
95///
96/// # Defaults
97///
98/// - `namespace`: Configurable, defaults to `Namespace::Decisions`
99/// - `domain`: Context-dependent (project if in git repo, else user)
100/// - `tags`: Empty vector
101/// - `source`: None
102pub struct ImportValidator {
103    /// Default namespace for memories without one.
104    default_namespace: Namespace,
105    /// Default domain for memories without one.
106    default_domain: Domain,
107    /// Maximum content length (bytes).
108    max_content_length: usize,
109}
110
111impl Default for ImportValidator {
112    fn default() -> Self {
113        Self {
114            default_namespace: Namespace::Decisions,
115            default_domain: Domain::new(),
116            max_content_length: 500_000, // 500KB, same as CaptureService
117        }
118    }
119}
120
121impl ImportValidator {
122    /// Creates a new validator with default settings.
123    #[must_use]
124    pub fn new() -> Self {
125        Self::default()
126    }
127
128    /// Sets the default namespace.
129    #[must_use]
130    pub const fn with_default_namespace(mut self, namespace: Namespace) -> Self {
131        self.default_namespace = namespace;
132        self
133    }
134
135    /// Sets the default domain.
136    #[must_use]
137    pub fn with_default_domain(mut self, domain: Domain) -> Self {
138        self.default_domain = domain;
139        self
140    }
141
142    /// Validates an imported memory.
143    ///
144    /// # Returns
145    ///
146    /// A [`ValidationResult`] indicating whether the memory is valid
147    /// and any issues found.
148    #[must_use]
149    pub fn validate(&self, imported: &ImportedMemory) -> ValidationResult {
150        let mut issues = Vec::new();
151
152        // Content is required and must not be empty
153        let trimmed = imported.content.trim();
154        if trimmed.is_empty() {
155            issues.push(ValidationIssue::error("content", "Content cannot be empty"));
156            return ValidationResult::invalid(issues);
157        }
158
159        // Check content length
160        if imported.content.len() > self.max_content_length {
161            issues.push(ValidationIssue::error(
162                "content",
163                format!(
164                    "Content exceeds maximum size of {} bytes (got {} bytes)",
165                    self.max_content_length,
166                    imported.content.len()
167                ),
168            ));
169            return ValidationResult::invalid(issues);
170        }
171
172        // Validate namespace if provided
173        if let Some(ref ns) = imported.namespace {
174            if Namespace::parse(ns).is_none() {
175                issues.push(ValidationIssue::warning(
176                    "namespace",
177                    format!("Unknown namespace '{ns}', using default"),
178                ));
179            }
180        } else {
181            issues.push(ValidationIssue::warning(
182                "namespace",
183                "Namespace not specified, using default",
184            ));
185        }
186
187        // Compute content hash for deduplication
188        let content_hash = ContentHasher::hash(&imported.content);
189
190        let mut result = ValidationResult::valid(content_hash);
191        result.issues = issues;
192        result
193    }
194
195    /// Converts an imported memory to a capture request.
196    ///
197    /// Applies defaults for missing fields.
198    #[must_use]
199    pub fn to_capture_request(&self, imported: ImportedMemory) -> CaptureRequest {
200        let namespace = imported
201            .namespace
202            .as_ref()
203            .and_then(|ns| Namespace::parse(ns))
204            .unwrap_or(self.default_namespace);
205
206        let domain = imported
207            .domain
208            .as_ref()
209            .map_or_else(|| self.default_domain.clone(), |d| parse_domain(d));
210
211        CaptureRequest {
212            content: imported.content,
213            namespace,
214            domain,
215            tags: imported.tags,
216            source: imported.source,
217            skip_security_check: false,
218            ttl_seconds: imported.ttl_seconds,
219            scope: None,
220            #[cfg(feature = "group-scope")]
221            group_id: None,
222        }
223    }
224
225    /// Returns the content hash tag for an imported memory.
226    ///
227    /// Used for duplicate detection before capture.
228    #[must_use]
229    pub fn content_hash_tag(&self, imported: &ImportedMemory) -> String {
230        ContentHasher::content_to_tag(&imported.content)
231    }
232}
233
234/// Parses a domain string into a Domain.
235fn parse_domain(s: &str) -> Domain {
236    match s.to_lowercase().as_str() {
237        "user" => Domain::for_user(),
238        "org" => Domain::for_org(),
239        _ => Domain::new(), // Default to project-scoped
240    }
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246
247    #[test]
248    fn test_validate_valid_memory() {
249        let validator = ImportValidator::new();
250        let imported = ImportedMemory::new("Valid content")
251            .with_namespace("decisions")
252            .with_tag("test");
253
254        let result = validator.validate(&imported);
255        assert!(result.is_valid);
256        assert!(!result.content_hash.is_empty());
257    }
258
259    #[test]
260    fn test_validate_empty_content() {
261        let validator = ImportValidator::new();
262        let imported = ImportedMemory::new("   ");
263
264        let result = validator.validate(&imported);
265        assert!(!result.is_valid);
266        assert!(result.issues.iter().any(|i| i.field == "content"));
267    }
268
269    #[test]
270    fn test_validate_content_too_long() {
271        let validator = ImportValidator::new();
272        let imported = ImportedMemory::new("x".repeat(600_000));
273
274        let result = validator.validate(&imported);
275        assert!(!result.is_valid);
276        assert!(
277            result
278                .issues
279                .iter()
280                .any(|i| i.message.contains("maximum size"))
281        );
282    }
283
284    #[test]
285    fn test_validate_unknown_namespace() {
286        let validator = ImportValidator::new();
287        let imported = ImportedMemory::new("Content").with_namespace("unknown-ns");
288
289        let result = validator.validate(&imported);
290        assert!(result.is_valid); // Warning, not error
291        assert!(
292            result
293                .issues
294                .iter()
295                .any(|i| i.severity == ValidationSeverity::Warning)
296        );
297    }
298
299    #[test]
300    fn test_to_capture_request() {
301        let validator = ImportValidator::new().with_default_namespace(Namespace::Learnings);
302
303        let imported = ImportedMemory::new("Test content")
304            .with_tag("rust")
305            .with_source("test.rs");
306
307        let request = validator.to_capture_request(imported);
308        assert_eq!(request.content, "Test content");
309        assert_eq!(request.namespace, Namespace::Learnings);
310        assert_eq!(request.tags, vec!["rust"]);
311        assert_eq!(request.source, Some("test.rs".to_string()));
312    }
313
314    #[test]
315    fn test_content_hash_tag() {
316        let validator = ImportValidator::new();
317        let imported = ImportedMemory::new("Test content");
318
319        let tag = validator.content_hash_tag(&imported);
320        assert!(tag.starts_with("hash:sha256:"));
321    }
322}