Skip to main content

subcog/storage/persistence/
filesystem.rs

1//! Filesystem-based persistence backend.
2//!
3//! A fallback backend that stores memories as individual JSON files.
4//! Useful for testing and environments without git.
5//!
6//! # Security
7//!
8//! This module includes protections against filesystem-based attacks:
9//! - **Path traversal**: Memory IDs are validated to prevent directory escape
10//! - **File size limits**: Maximum file size enforced to prevent memory exhaustion
11//! - **Encryption at rest**: Optional AES-256-GCM encryption (CRIT-005)
12//!
13//! # Encryption
14//!
15//! When the `encryption` feature is enabled and `SUBCOG_ENCRYPTION_KEY` is set,
16//! all memory files are encrypted with AES-256-GCM before writing to disk.
17//!
18//! ```bash
19//! # Generate a key
20//! openssl rand -base64 32
21//!
22//! # Enable encryption
23//! export SUBCOG_ENCRYPTION_KEY="your-base64-encoded-key"
24//! ```
25
26use crate::models::{Memory, MemoryId};
27use crate::security::encryption::is_encrypted;
28#[cfg(feature = "encryption")]
29use crate::security::encryption::{EncryptionConfig, Encryptor};
30use crate::storage::traits::PersistenceBackend;
31use crate::{Error, Result};
32use chrono::{TimeZone, Utc};
33use serde::{Deserialize, Serialize};
34use std::fs;
35use std::path::{Path, PathBuf};
36
37/// Maximum file size for memory files (1MB).
38/// Prevents memory exhaustion from maliciously large files.
39const MAX_FILE_SIZE: u64 = 1024 * 1024;
40
41/// Serializable memory format for filesystem storage.
42#[derive(Debug, Serialize, Deserialize)]
43struct StoredMemory {
44    id: String,
45    content: String,
46    namespace: String,
47    domain_org: Option<String>,
48    domain_project: Option<String>,
49    domain_repo: Option<String>,
50    project_id: Option<String>,
51    branch: Option<String>,
52    file_path: Option<String>,
53    status: String,
54    created_at: u64,
55    updated_at: u64,
56    #[serde(default)]
57    tombstoned_at: Option<u64>,
58    /// Expiration timestamp (Unix epoch seconds).
59    #[serde(default)]
60    expires_at: Option<u64>,
61    embedding: Option<Vec<f32>>,
62    tags: Vec<String>,
63    source: Option<String>,
64    /// Whether this memory is a consolidation summary node.
65    #[serde(default)]
66    is_summary: bool,
67    /// IDs of memories that were consolidated into this summary.
68    #[serde(default)]
69    source_memory_ids: Option<Vec<String>>,
70    /// Timestamp when this memory was consolidated.
71    #[serde(default)]
72    consolidation_timestamp: Option<u64>,
73}
74
75impl From<&Memory> for StoredMemory {
76    fn from(m: &Memory) -> Self {
77        Self {
78            id: m.id.as_str().to_string(),
79            content: m.content.clone(),
80            namespace: m.namespace.as_str().to_string(),
81            domain_org: m.domain.organization.clone(),
82            domain_project: m.domain.project.clone(),
83            domain_repo: m.domain.repository.clone(),
84            project_id: m.project_id.clone(),
85            branch: m.branch.clone(),
86            file_path: m.file_path.clone(),
87            status: m.status.as_str().to_string(),
88            created_at: m.created_at,
89            updated_at: m.updated_at,
90            tombstoned_at: m
91                .tombstoned_at
92                .and_then(|ts| u64::try_from(ts.timestamp()).ok()),
93            expires_at: m.expires_at,
94            embedding: m.embedding.clone(),
95            tags: m.tags.clone(),
96            source: m.source.clone(),
97            is_summary: m.is_summary,
98            source_memory_ids: m
99                .source_memory_ids
100                .as_ref()
101                .map(|ids| ids.iter().map(|id| id.as_str().to_string()).collect()),
102            consolidation_timestamp: m.consolidation_timestamp,
103        }
104    }
105}
106
107impl StoredMemory {
108    fn to_memory(&self) -> Memory {
109        use crate::models::{Domain, MemoryStatus, Namespace};
110
111        let namespace = match self.namespace.as_str() {
112            "decisions" => Namespace::Decisions,
113            "patterns" => Namespace::Patterns,
114            "learnings" => Namespace::Learnings,
115            "context" => Namespace::Context,
116            "tech-debt" => Namespace::TechDebt,
117            "apis" => Namespace::Apis,
118            "config" => Namespace::Config,
119            "security" => Namespace::Security,
120            "performance" => Namespace::Performance,
121            "testing" => Namespace::Testing,
122            _ => Namespace::Decisions,
123        };
124
125        let status = match self.status.as_str() {
126            "active" => MemoryStatus::Active,
127            "archived" => MemoryStatus::Archived,
128            "superseded" => MemoryStatus::Superseded,
129            "pending" => MemoryStatus::Pending,
130            "deleted" => MemoryStatus::Deleted,
131            "tombstoned" => MemoryStatus::Tombstoned,
132            _ => MemoryStatus::Active,
133        };
134
135        Memory {
136            id: MemoryId::new(&self.id),
137            content: self.content.clone(),
138            namespace,
139            domain: Domain {
140                organization: self.domain_org.clone(),
141                project: self.domain_project.clone(),
142                repository: self.domain_repo.clone(),
143            },
144            project_id: self.project_id.clone(),
145            branch: self.branch.clone(),
146            file_path: self.file_path.clone(),
147            status,
148            created_at: self.created_at,
149            updated_at: self.updated_at,
150            tombstoned_at: self.tombstoned_at.and_then(|ts| {
151                let ts_i64 = i64::try_from(ts).unwrap_or(i64::MAX);
152                Utc.timestamp_opt(ts_i64, 0).single()
153            }),
154            expires_at: self.expires_at,
155            embedding: self.embedding.clone(),
156            tags: self.tags.clone(),
157            #[cfg(feature = "group-scope")]
158            group_id: None, // TODO: Add group_id to filesystem persistence
159            source: self.source.clone(),
160            is_summary: self.is_summary,
161            source_memory_ids: self
162                .source_memory_ids
163                .as_ref()
164                .map(|ids| ids.iter().map(MemoryId::new).collect()),
165            consolidation_timestamp: self.consolidation_timestamp,
166        }
167    }
168}
169
170/// Filesystem-based persistence backend.
171pub struct FilesystemBackend {
172    /// Base directory for storage.
173    base_path: PathBuf,
174    /// Optional encryptor for encryption at rest.
175    #[cfg(feature = "encryption")]
176    encryptor: Option<Encryptor>,
177}
178
179impl FilesystemBackend {
180    /// Creates a new filesystem backend.
181    ///
182    /// If the `encryption` feature is enabled and `SUBCOG_ENCRYPTION_KEY` is set,
183    /// encryption at rest is automatically enabled.
184    ///
185    /// # Errors
186    ///
187    /// Returns an error if the directory cannot be created.
188    pub fn new(base_path: impl Into<PathBuf>) -> Self {
189        let path = base_path.into();
190
191        // Try to create directory, ignore errors for now
192        let _ = fs::create_dir_all(&path);
193
194        #[cfg(feature = "encryption")]
195        let encryptor = Self::try_create_encryptor();
196
197        Self {
198            base_path: path,
199            #[cfg(feature = "encryption")]
200            encryptor,
201        }
202    }
203
204    /// Creates a new filesystem backend with checked directory creation.
205    ///
206    /// # Errors
207    ///
208    /// Returns an error if the directory cannot be created.
209    pub fn with_create(base_path: impl Into<PathBuf>) -> Result<Self> {
210        let base_path = base_path.into();
211
212        // Ensure directory exists
213        fs::create_dir_all(&base_path).map_err(|e| Error::OperationFailed {
214            operation: "create_storage_dir".to_string(),
215            cause: e.to_string(),
216        })?;
217
218        #[cfg(feature = "encryption")]
219        let encryptor = Self::try_create_encryptor();
220
221        Ok(Self {
222            base_path,
223            #[cfg(feature = "encryption")]
224            encryptor,
225        })
226    }
227
228    /// Tries to create an encryptor from environment configuration.
229    #[cfg(feature = "encryption")]
230    fn try_create_encryptor() -> Option<Encryptor> {
231        EncryptionConfig::try_from_env().map_or_else(
232            || {
233                tracing::debug!("Encryption key not configured, storing files unencrypted");
234                None
235            },
236            |config| match Encryptor::new(config) {
237                Ok(enc) => {
238                    tracing::info!("Encryption at rest enabled for filesystem backend");
239                    Some(enc)
240                },
241                Err(e) => {
242                    tracing::warn!("Failed to create encryptor: {e}");
243                    None
244                },
245            },
246        )
247    }
248
249    /// Decrypts data if it's encrypted, returns as-is otherwise.
250    ///
251    /// This helper reduces nesting in the `get` method (`clippy::excessive_nesting` fix).
252    #[cfg(feature = "encryption")]
253    fn decrypt_if_needed(&self, raw_data: Vec<u8>) -> Result<Vec<u8>> {
254        if !is_encrypted(&raw_data) {
255            return Ok(raw_data);
256        }
257        self.encryptor.as_ref().map_or_else(
258            || {
259                Err(Error::OperationFailed {
260                    operation: "decrypt_memory".to_string(),
261                    cause: "File is encrypted but no encryption key configured".to_string(),
262                })
263            },
264            |encryptor| encryptor.decrypt(&raw_data),
265        )
266    }
267
268    /// Decrypts data if it's encrypted, returns as-is otherwise.
269    ///
270    /// Non-encryption version - returns error if data is encrypted.
271    #[cfg(not(feature = "encryption"))]
272    fn decrypt_if_needed(&self, raw_data: Vec<u8>) -> Result<Vec<u8>> {
273        if is_encrypted(&raw_data) {
274            return Err(Error::OperationFailed {
275                operation: "decrypt_memory".to_string(),
276                cause: "File is encrypted but encryption feature not enabled".to_string(),
277            });
278        }
279        Ok(raw_data)
280    }
281
282    /// Returns whether encryption is enabled.
283    #[cfg(feature = "encryption")]
284    #[must_use]
285    pub const fn encryption_enabled(&self) -> bool {
286        self.encryptor.is_some()
287    }
288
289    /// Returns whether encryption is enabled.
290    #[cfg(not(feature = "encryption"))]
291    #[must_use]
292    pub const fn encryption_enabled(&self) -> bool {
293        false
294    }
295
296    /// Returns the path for a memory file.
297    ///
298    /// # Security
299    ///
300    /// The memory ID is sanitized to prevent path traversal attacks.
301    /// Only alphanumeric characters, dashes, and underscores are allowed.
302    fn memory_path(&self, id: &MemoryId) -> Result<PathBuf> {
303        let id_str = id.as_str();
304
305        // Validate ID to prevent path traversal attacks (PEN-H2)
306        if !Self::is_safe_filename(id_str) {
307            return Err(Error::InvalidInput(format!(
308                "Memory ID contains invalid characters: {id_str}",
309            )));
310        }
311
312        let path = self.base_path.join(format!("{id_str}.json"));
313
314        // Double-check: ensure the resulting path is under base_path
315        // Note: We compare the non-canonical paths because:
316        // 1. The ID validation above prevents ".." and "/" in the filename
317        // 2. The file may not exist yet (for store operations)
318        // 3. Canonicalization would fail for non-existent files
319        // The is_safe_filename check is the primary security barrier
320        if !path.starts_with(&self.base_path) {
321            return Err(Error::InvalidInput(format!(
322                "Path traversal attempt detected for ID: {id_str}",
323            )));
324        }
325
326        Ok(path)
327    }
328
329    /// Checks if a filename is safe (no path traversal).
330    fn is_safe_filename(name: &str) -> bool {
331        // Only allow alphanumeric, dash, underscore
332        // Reject: .. / \ NUL and other special chars
333        !name.is_empty()
334            && name.len() <= 255
335            && name
336                .chars()
337                .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
338    }
339
340    /// Returns the base path.
341    #[must_use]
342    pub fn base_path(&self) -> &Path {
343        &self.base_path
344    }
345}
346
347impl PersistenceBackend for FilesystemBackend {
348    fn store(&self, memory: &Memory) -> Result<()> {
349        // Ensure directory exists before storing
350        let _ = fs::create_dir_all(&self.base_path);
351
352        let path = self.memory_path(&memory.id)?;
353        let stored = StoredMemory::from(memory);
354
355        let json = serde_json::to_string_pretty(&stored).map_err(|e| Error::OperationFailed {
356            operation: "serialize_memory".to_string(),
357            cause: e.to_string(),
358        })?;
359
360        // CRIT-005: Encrypt if encryption is enabled
361        #[cfg(feature = "encryption")]
362        let data = if let Some(ref encryptor) = self.encryptor {
363            encryptor.encrypt(json.as_bytes())?
364        } else {
365            json.into_bytes()
366        };
367
368        #[cfg(not(feature = "encryption"))]
369        let data = json.into_bytes();
370
371        fs::write(&path, data).map_err(|e| Error::OperationFailed {
372            operation: "write_memory_file".to_string(),
373            cause: e.to_string(),
374        })?;
375
376        Ok(())
377    }
378
379    fn get(&self, id: &MemoryId) -> Result<Option<Memory>> {
380        let path = match self.memory_path(id) {
381            Ok(p) => p,
382            Err(_) => return Ok(None), // Invalid ID means no memory
383        };
384
385        if !path.exists() {
386            return Ok(None);
387        }
388
389        // PEN-H4: Validate file size before reading to prevent memory exhaustion
390        let metadata = fs::metadata(&path).map_err(|e| Error::OperationFailed {
391            operation: "read_file_metadata".to_string(),
392            cause: e.to_string(),
393        })?;
394
395        if metadata.len() > MAX_FILE_SIZE {
396            return Err(Error::InvalidInput(format!(
397                "Memory file exceeds maximum size of {MAX_FILE_SIZE} bytes: {}",
398                path.display()
399            )));
400        }
401
402        // Read raw bytes first to detect encryption
403        let raw_data = fs::read(&path).map_err(|e| Error::OperationFailed {
404            operation: "read_memory_file".to_string(),
405            cause: e.to_string(),
406        })?;
407
408        // CRIT-005: Decrypt if file is encrypted (uses helper to reduce nesting)
409        let json_bytes = self.decrypt_if_needed(raw_data)?;
410
411        let json = String::from_utf8(json_bytes).map_err(|e| Error::OperationFailed {
412            operation: "decode_memory_file".to_string(),
413            cause: e.to_string(),
414        })?;
415
416        let stored: StoredMemory =
417            serde_json::from_str(&json).map_err(|e| Error::OperationFailed {
418                operation: "deserialize_memory".to_string(),
419                cause: e.to_string(),
420            })?;
421
422        Ok(Some(stored.to_memory()))
423    }
424
425    fn delete(&self, id: &MemoryId) -> Result<bool> {
426        let path = match self.memory_path(id) {
427            Ok(p) => p,
428            Err(_) => return Ok(false), // Invalid ID means nothing to delete
429        };
430
431        if !path.exists() {
432            return Ok(false);
433        }
434
435        fs::remove_file(&path).map_err(|e| Error::OperationFailed {
436            operation: "delete_memory_file".to_string(),
437            cause: e.to_string(),
438        })?;
439
440        Ok(true)
441    }
442
443    fn list_ids(&self) -> Result<Vec<MemoryId>> {
444        let mut ids = Vec::new();
445
446        // If directory doesn't exist, return empty list
447        if !self.base_path.exists() {
448            return Ok(ids);
449        }
450
451        let entries = fs::read_dir(&self.base_path).map_err(|e| Error::OperationFailed {
452            operation: "read_storage_dir".to_string(),
453            cause: e.to_string(),
454        })?;
455
456        for entry in entries {
457            let entry = entry.map_err(|e| Error::OperationFailed {
458                operation: "read_dir_entry".to_string(),
459                cause: e.to_string(),
460            })?;
461
462            if let Some(id) = extract_memory_id_from_path(&entry.path()) {
463                ids.push(id);
464            }
465        }
466
467        Ok(ids)
468    }
469}
470
471/// Extracts a memory ID from a JSON file path.
472fn extract_memory_id_from_path(path: &Path) -> Option<MemoryId> {
473    // Check if it's a JSON file
474    if path.extension().is_none_or(|ext| ext != "json") {
475        return None;
476    }
477
478    // Get the file stem (name without extension) and convert to string
479    let stem = path.file_stem()?;
480    let id_str = stem.to_str()?;
481
482    Some(MemoryId::new(id_str))
483}
484
485#[cfg(test)]
486mod tests {
487    use super::*;
488    use crate::models::{Domain, MemoryStatus, Namespace};
489    use serde_json;
490    use tempfile::TempDir;
491
492    fn create_test_memory(id: &str) -> Memory {
493        Memory {
494            id: MemoryId::new(id),
495            content: "Test content".to_string(),
496            namespace: Namespace::Decisions,
497            domain: Domain::new(),
498            project_id: None,
499            branch: None,
500            file_path: None,
501            status: MemoryStatus::Active,
502            created_at: 1_234_567_890,
503            updated_at: 1_234_567_890,
504            tombstoned_at: None,
505            expires_at: None,
506            embedding: None,
507            tags: vec!["test".to_string()],
508            #[cfg(feature = "group-scope")]
509            group_id: None,
510            source: Some("test.rs".to_string()),
511            is_summary: false,
512            source_memory_ids: None,
513            consolidation_timestamp: None,
514        }
515    }
516
517    #[test]
518    fn test_store_and_get() {
519        let dir = TempDir::new().unwrap();
520        let backend = FilesystemBackend::new(dir.path());
521
522        let memory = create_test_memory("test_id");
523        backend.store(&memory).unwrap();
524
525        let retrieved = backend.get(&MemoryId::new("test_id")).unwrap();
526        assert!(retrieved.is_some());
527
528        let retrieved = retrieved.unwrap();
529        assert_eq!(retrieved.id.as_str(), "test_id");
530        assert_eq!(retrieved.content, "Test content");
531        assert_eq!(retrieved.namespace, Namespace::Decisions);
532    }
533
534    #[test]
535    fn test_get_nonexistent() {
536        let dir = TempDir::new().unwrap();
537        let backend = FilesystemBackend::new(dir.path());
538
539        let result = backend.get(&MemoryId::new("nonexistent")).unwrap();
540        assert!(result.is_none());
541    }
542
543    #[test]
544    fn test_delete() {
545        let dir = TempDir::new().unwrap();
546        let backend = FilesystemBackend::new(dir.path());
547
548        let memory = create_test_memory("to_delete");
549        backend.store(&memory).unwrap();
550
551        let deleted = backend.delete(&MemoryId::new("to_delete")).unwrap();
552        assert!(deleted);
553
554        let retrieved = backend.get(&MemoryId::new("to_delete")).unwrap();
555        assert!(retrieved.is_none());
556    }
557
558    #[test]
559    fn test_deserialize_without_tombstoned_at() {
560        let json = r#"{
561            "id": "legacy-id",
562            "content": "Legacy content",
563            "namespace": "decisions",
564            "domain_org": null,
565            "domain_project": null,
566            "domain_repo": null,
567            "project_id": null,
568            "branch": null,
569            "file_path": null,
570            "status": "active",
571            "created_at": 123,
572            "updated_at": 123,
573            "embedding": null,
574            "tags": [],
575            "source": null
576        }"#;
577
578        let stored: StoredMemory = serde_json::from_str(json).unwrap();
579        let memory = stored.to_memory();
580        assert!(memory.tombstoned_at.is_none());
581    }
582
583    #[test]
584    fn test_delete_nonexistent() {
585        let dir = TempDir::new().unwrap();
586        let backend = FilesystemBackend::new(dir.path());
587
588        let deleted = backend.delete(&MemoryId::new("nonexistent")).unwrap();
589        assert!(!deleted);
590    }
591
592    #[test]
593    fn test_list_ids() {
594        let dir = TempDir::new().unwrap();
595        let backend = FilesystemBackend::new(dir.path());
596
597        backend.store(&create_test_memory("id1")).unwrap();
598        backend.store(&create_test_memory("id2")).unwrap();
599        backend.store(&create_test_memory("id3")).unwrap();
600
601        let ids = backend.list_ids().unwrap();
602        assert_eq!(ids.len(), 3);
603    }
604
605    #[test]
606    fn test_count() {
607        let dir = TempDir::new().unwrap();
608        let backend = FilesystemBackend::new(dir.path());
609
610        assert_eq!(backend.count().unwrap(), 0);
611
612        backend.store(&create_test_memory("id1")).unwrap();
613        backend.store(&create_test_memory("id2")).unwrap();
614
615        assert_eq!(backend.count().unwrap(), 2);
616    }
617
618    #[test]
619    fn test_exists() {
620        let dir = TempDir::new().unwrap();
621        let backend = FilesystemBackend::new(dir.path());
622
623        backend.store(&create_test_memory("exists")).unwrap();
624
625        assert!(backend.exists(&MemoryId::new("exists")).unwrap());
626        assert!(!backend.exists(&MemoryId::new("not_exists")).unwrap());
627    }
628
629    #[test]
630    fn test_update_memory() {
631        let dir = TempDir::new().unwrap();
632        let backend = FilesystemBackend::new(dir.path());
633
634        let mut memory = create_test_memory("update_test");
635        backend.store(&memory).unwrap();
636
637        memory.content = "Updated content".to_string();
638        memory.updated_at = 9_999_999_999;
639        backend.store(&memory).unwrap();
640
641        let retrieved = backend.get(&MemoryId::new("update_test")).unwrap().unwrap();
642        assert_eq!(retrieved.content, "Updated content");
643        assert_eq!(retrieved.updated_at, 9_999_999_999);
644    }
645
646    #[test]
647    fn test_path_traversal_protection() {
648        let dir = TempDir::new().unwrap();
649        let backend = FilesystemBackend::new(dir.path());
650
651        // Attempt path traversal with ".."
652        let result = backend.memory_path(&MemoryId::new("../../../etc/passwd"));
653        assert!(result.is_err());
654
655        // Attempt with forward slash
656        let result = backend.memory_path(&MemoryId::new("dir/subdir/file"));
657        assert!(result.is_err());
658
659        // Attempt with backslash
660        let result = backend.memory_path(&MemoryId::new("dir\\subdir\\file"));
661        assert!(result.is_err());
662    }
663
664    #[test]
665    fn test_safe_filename_validation() {
666        // Valid filenames
667        assert!(FilesystemBackend::is_safe_filename("valid_id"));
668        assert!(FilesystemBackend::is_safe_filename("valid-id-123"));
669        assert!(FilesystemBackend::is_safe_filename("abc123"));
670        assert!(FilesystemBackend::is_safe_filename("UPPERCASE"));
671
672        // Invalid filenames
673        assert!(!FilesystemBackend::is_safe_filename(""));
674        assert!(!FilesystemBackend::is_safe_filename("../path"));
675        assert!(!FilesystemBackend::is_safe_filename("path/to/file"));
676        assert!(!FilesystemBackend::is_safe_filename("path\\to\\file"));
677        assert!(!FilesystemBackend::is_safe_filename("file.json"));
678        assert!(!FilesystemBackend::is_safe_filename("file with space"));
679    }
680
681    #[test]
682    fn test_with_create_success() {
683        let dir = TempDir::new().unwrap();
684        let subdir = dir.path().join("subdir");
685
686        let backend = FilesystemBackend::with_create(&subdir);
687        assert!(backend.is_ok());
688        assert!(subdir.exists());
689    }
690
691    #[test]
692    fn test_base_path_accessor() {
693        let dir = TempDir::new().unwrap();
694        let backend = FilesystemBackend::new(dir.path());
695
696        assert_eq!(backend.base_path(), dir.path());
697    }
698
699    #[test]
700    fn test_memory_roundtrip_all_namespaces() {
701        let dir = TempDir::new().unwrap();
702        let backend = FilesystemBackend::new(dir.path());
703
704        let namespaces = [
705            Namespace::Decisions,
706            Namespace::Patterns,
707            Namespace::Learnings,
708            Namespace::Context,
709            Namespace::TechDebt,
710            Namespace::Apis,
711            Namespace::Config,
712            Namespace::Security,
713            Namespace::Performance,
714            Namespace::Testing,
715        ];
716
717        for (i, ns) in namespaces.iter().enumerate() {
718            let id = format!("ns_test_{i}");
719            let mut memory = create_test_memory(&id);
720            memory.namespace = *ns;
721
722            backend.store(&memory).unwrap();
723            let retrieved = backend.get(&MemoryId::new(&id)).unwrap().unwrap();
724            assert_eq!(retrieved.namespace, *ns);
725        }
726    }
727}