subcog/io/formats/
json.rs

1//! JSON format adapter for import/export.
2//!
3//! Supports both newline-delimited JSON (NDJSON/JSONL) and JSON arrays.
4
5use crate::io::traits::{ExportSink, ExportableMemory, ImportSource, ImportedMemory};
6use crate::{Error, Result};
7use std::io::{BufRead, Write};
8
9/// JSON import source.
10///
11/// Automatically detects and handles both formats:
12/// - **NDJSON/JSONL**: One JSON object per line
13/// - **Array**: A JSON array of objects `[{...}, {...}]`
14pub struct JsonImportSource<R: BufRead> {
15    reader: R,
16    /// Buffered memories when parsing array format.
17    buffer: Vec<ImportedMemory>,
18    /// Current index into buffer (for array format).
19    buffer_index: usize,
20    /// Whether we've detected and started parsing.
21    started: bool,
22    /// Whether we're in array mode.
23    array_mode: bool,
24    /// Line number for error reporting.
25    line_number: usize,
26}
27
28impl<R: BufRead> JsonImportSource<R> {
29    /// Creates a new JSON import source.
30    #[must_use]
31    pub const fn new(reader: R) -> Self {
32        Self {
33            reader,
34            buffer: Vec::new(),
35            buffer_index: 0,
36            started: false,
37            array_mode: false,
38            line_number: 0,
39        }
40    }
41
42    /// Peeks at the first non-whitespace character to detect format.
43    fn detect_format(&mut self) -> Result<bool> {
44        // Read first line to detect format
45        let mut first_line = String::new();
46        let bytes_read =
47            self.reader
48                .read_line(&mut first_line)
49                .map_err(|e| Error::OperationFailed {
50                    operation: "read_json".to_string(),
51                    cause: e.to_string(),
52                })?;
53        if bytes_read == 0 {
54            return Ok(false); // Empty file
55        }
56        self.line_number = 1;
57
58        let trimmed = first_line.trim();
59        if trimmed.is_empty() {
60            return Ok(false);
61        }
62
63        // If starts with '[', it's an array
64        if trimmed.starts_with('[') {
65            self.array_mode = true;
66            // Read remaining content and prepend first line
67            let mut remaining = String::new();
68            self.reader
69                .read_to_string(&mut remaining)
70                .map_err(|e| Error::OperationFailed {
71                    operation: "read_json".to_string(),
72                    cause: e.to_string(),
73                })?;
74            let full_content = format!("{first_line}{remaining}");
75
76            let memories: Vec<ImportedMemory> = serde_json::from_str(&full_content)
77                .map_err(|e| Error::InvalidInput(format!("Failed to parse JSON array: {e}")))?;
78
79            self.buffer = memories;
80        } else {
81            // NDJSON mode - parse first line as object
82            let memory: ImportedMemory = serde_json::from_str(trimmed).map_err(|e| {
83                Error::InvalidInput(format!("Line 1: Failed to parse JSON object: {e}"))
84            })?;
85            self.buffer.push(memory);
86        }
87
88        self.buffer_index = 0;
89        self.started = true;
90        Ok(true)
91    }
92}
93
94impl<R: BufRead> ImportSource for JsonImportSource<R> {
95    fn next(&mut self) -> Result<Option<ImportedMemory>> {
96        // First call: detect format
97        if !self.started && !self.detect_format()? {
98            return Ok(None);
99        }
100
101        // Array mode: return from buffer
102        if self.array_mode {
103            if self.buffer_index < self.buffer.len() {
104                let memory = self.buffer[self.buffer_index].clone();
105                self.buffer_index += 1;
106                return Ok(Some(memory));
107            }
108            return Ok(None);
109        }
110
111        // NDJSON mode: return buffered first, then read lines
112        if self.buffer_index < self.buffer.len() {
113            let memory = self.buffer[self.buffer_index].clone();
114            self.buffer_index += 1;
115            return Ok(Some(memory));
116        }
117
118        // Read next line
119        let mut line = String::new();
120        loop {
121            line.clear();
122            let bytes_read =
123                self.reader
124                    .read_line(&mut line)
125                    .map_err(|e| Error::OperationFailed {
126                        operation: "read_json".to_string(),
127                        cause: e.to_string(),
128                    })?;
129            if bytes_read == 0 {
130                return Ok(None);
131            }
132            self.line_number += 1;
133
134            let trimmed = line.trim();
135            if !trimmed.is_empty() {
136                break;
137            }
138        }
139
140        let memory: ImportedMemory = serde_json::from_str(line.trim()).map_err(|e| {
141            Error::InvalidInput(format!(
142                "Line {}: Failed to parse JSON: {e}",
143                self.line_number
144            ))
145        })?;
146
147        Ok(Some(memory))
148    }
149
150    fn size_hint(&self) -> Option<usize> {
151        if self.array_mode {
152            Some(self.buffer.len())
153        } else {
154            None
155        }
156    }
157}
158
159/// JSON export sink.
160///
161/// Writes memories as newline-delimited JSON (NDJSON).
162pub struct JsonExportSink<W: Write> {
163    writer: W,
164    /// Number of records written.
165    count: usize,
166}
167
168impl<W: Write> JsonExportSink<W> {
169    /// Creates a new JSON export sink.
170    #[must_use]
171    pub const fn new(writer: W) -> Self {
172        Self { writer, count: 0 }
173    }
174}
175
176impl<W: Write + Send> ExportSink for JsonExportSink<W> {
177    fn write(&mut self, memory: &ExportableMemory) -> Result<()> {
178        serde_json::to_writer(&mut self.writer, memory).map_err(|e| Error::OperationFailed {
179            operation: "write_json".to_string(),
180            cause: e.to_string(),
181        })?;
182        writeln!(self.writer).map_err(|e| Error::OperationFailed {
183            operation: "write_json".to_string(),
184            cause: e.to_string(),
185        })?;
186        self.count += 1;
187        Ok(())
188    }
189
190    fn finalize(mut self: Box<Self>) -> Result<()> {
191        self.writer.flush().map_err(|e| Error::OperationFailed {
192            operation: "flush_json".to_string(),
193            cause: e.to_string(),
194        })?;
195        Ok(())
196    }
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202    use std::io::Cursor;
203
204    #[test]
205    fn test_import_ndjson() {
206        let input = r#"{"content": "First memory", "namespace": "decisions"}
207{"content": "Second memory", "tags": ["rust", "test"]}
208"#;
209        let mut source = JsonImportSource::new(Cursor::new(input));
210
211        let first = source.next().unwrap().unwrap();
212        assert_eq!(first.content, "First memory");
213        assert_eq!(first.namespace, Some("decisions".to_string()));
214
215        let second = source.next().unwrap().unwrap();
216        assert_eq!(second.content, "Second memory");
217        assert_eq!(second.tags, vec!["rust", "test"]);
218
219        assert!(source.next().unwrap().is_none());
220    }
221
222    #[test]
223    fn test_import_array() {
224        let input = r#"[
225            {"content": "First memory"},
226            {"content": "Second memory"}
227        ]"#;
228        let mut source = JsonImportSource::new(Cursor::new(input));
229
230        let first = source.next().unwrap().unwrap();
231        assert_eq!(first.content, "First memory");
232
233        let second = source.next().unwrap().unwrap();
234        assert_eq!(second.content, "Second memory");
235
236        assert!(source.next().unwrap().is_none());
237    }
238
239    #[test]
240    fn test_export_ndjson() {
241        let mut output = Vec::new();
242        {
243            let mut sink = JsonExportSink::new(&mut output);
244            sink.write(&ExportableMemory {
245                id: "1".to_string(),
246                content: "Test".to_string(),
247                namespace: "decisions".to_string(),
248                domain: "project".to_string(),
249                project_id: None,
250                branch: None,
251                file_path: None,
252                status: "active".to_string(),
253                created_at: 0,
254                updated_at: 0,
255                tags: vec![],
256                source: None,
257            })
258            .unwrap();
259            Box::new(sink).finalize().unwrap();
260        }
261
262        let output_str = String::from_utf8(output).unwrap();
263        assert!(output_str.contains("\"content\":\"Test\""));
264        assert!(output_str.ends_with('\n'));
265    }
266}