Skip to main content

subcog/io/formats/
mod.rs

1//! Format adapters for import/export.
2//!
3//! Each format implements [`ImportSource`] and/or [`ExportSink`] traits.
4
5pub mod csv;
6pub mod json;
7#[cfg(feature = "parquet-export")]
8pub mod parquet;
9pub mod yaml;
10
11use crate::{Error, Result};
12use std::io::{BufRead, Write};
13use std::path::Path;
14use std::str::FromStr;
15
16use super::traits::{ExportSink, ImportSource};
17
18/// Supported file formats for import/export.
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
20pub enum Format {
21    /// JSON format (newline-delimited or array).
22    Json,
23    /// YAML format (document stream).
24    Yaml,
25    /// CSV format with configurable column mapping.
26    Csv,
27    /// Apache Parquet columnar format (export only).
28    #[cfg(feature = "parquet-export")]
29    Parquet,
30}
31
32impl Format {
33    /// Returns all available formats for import.
34    #[must_use]
35    pub fn import_formats() -> Vec<Self> {
36        vec![Self::Json, Self::Yaml, Self::Csv]
37    }
38
39    /// Returns all available formats for export.
40    #[must_use]
41    pub fn export_formats() -> Vec<Self> {
42        #[cfg(feature = "parquet-export")]
43        {
44            vec![Self::Json, Self::Yaml, Self::Csv, Self::Parquet]
45        }
46        #[cfg(not(feature = "parquet-export"))]
47        {
48            vec![Self::Json, Self::Yaml, Self::Csv]
49        }
50    }
51
52    /// Returns the file extension for this format.
53    #[must_use]
54    pub const fn extension(&self) -> &'static str {
55        match self {
56            Self::Json => "json",
57            Self::Yaml => "yaml",
58            Self::Csv => "csv",
59            #[cfg(feature = "parquet-export")]
60            Self::Parquet => "parquet",
61        }
62    }
63
64    /// Returns the MIME type for this format.
65    #[must_use]
66    pub const fn mime_type(&self) -> &'static str {
67        match self {
68            Self::Json => "application/json",
69            Self::Yaml => "application/x-yaml",
70            Self::Csv => "text/csv",
71            #[cfg(feature = "parquet-export")]
72            Self::Parquet => "application/vnd.apache.parquet",
73        }
74    }
75
76    /// Detects format from file extension.
77    ///
78    /// # Errors
79    ///
80    /// Returns an error if the extension is not recognized.
81    pub fn from_path(path: &Path) -> Result<Self> {
82        let ext = path
83            .extension()
84            .and_then(|e| e.to_str())
85            .map(str::to_lowercase);
86
87        match ext.as_deref() {
88            Some("json" | "ndjson" | "jsonl") => Ok(Self::Json),
89            Some("yaml" | "yml") => Ok(Self::Yaml),
90            Some("csv" | "tsv") => Ok(Self::Csv),
91            #[cfg(feature = "parquet-export")]
92            Some("parquet" | "pq") => Ok(Self::Parquet),
93            Some(ext) => Err(Error::InvalidInput(format!(
94                "Unsupported file extension: .{ext}"
95            ))),
96            None => Err(Error::InvalidInput(
97                "Cannot determine format: file has no extension".to_string(),
98            )),
99        }
100    }
101
102    /// Returns whether this format supports import.
103    #[must_use]
104    pub const fn supports_import(&self) -> bool {
105        match self {
106            Self::Json | Self::Yaml | Self::Csv => true,
107            #[cfg(feature = "parquet-export")]
108            Self::Parquet => false,
109        }
110    }
111
112    /// Returns whether this format supports export.
113    #[must_use]
114    pub const fn supports_export(&self) -> bool {
115        true // All formats support export
116    }
117}
118
119impl FromStr for Format {
120    type Err = Error;
121
122    fn from_str(s: &str) -> Result<Self> {
123        match s.to_lowercase().as_str() {
124            "json" | "ndjson" | "jsonl" => Ok(Self::Json),
125            "yaml" | "yml" => Ok(Self::Yaml),
126            "csv" | "tsv" => Ok(Self::Csv),
127            #[cfg(feature = "parquet-export")]
128            "parquet" | "pq" => Ok(Self::Parquet),
129            _ => Err(Error::InvalidInput(format!("Unknown format: {s}"))),
130        }
131    }
132}
133
134impl std::fmt::Display for Format {
135    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
136        match self {
137            Self::Json => write!(f, "json"),
138            Self::Yaml => write!(f, "yaml"),
139            Self::Csv => write!(f, "csv"),
140            #[cfg(feature = "parquet-export")]
141            Self::Parquet => write!(f, "parquet"),
142        }
143    }
144}
145
146/// Creates an import source for the given format and reader.
147///
148/// # Errors
149///
150/// Returns an error if the format doesn't support import.
151pub fn create_import_source<R: BufRead + 'static>(
152    reader: R,
153    format: Format,
154) -> Result<Box<dyn ImportSource>> {
155    match format {
156        Format::Json => Ok(Box::new(json::JsonImportSource::new(reader))),
157        Format::Yaml => Ok(Box::new(yaml::YamlImportSource::new(reader)?)),
158        Format::Csv => Ok(Box::new(csv::CsvImportSource::new(reader)?)),
159        #[cfg(feature = "parquet-export")]
160        Format::Parquet => Err(Error::NotImplemented(
161            "Parquet import is not supported".to_string(),
162        )),
163    }
164}
165
166/// Creates an export sink for the given format and writer.
167///
168/// # Errors
169///
170/// Returns an error if sink creation fails.
171pub fn create_export_sink<W: Write + Send + 'static>(
172    writer: W,
173    format: Format,
174) -> Result<Box<dyn ExportSink>> {
175    match format {
176        Format::Json => Ok(Box::new(json::JsonExportSink::new(writer))),
177        Format::Yaml => Ok(Box::new(yaml::YamlExportSink::new(writer))),
178        Format::Csv => Ok(Box::new(csv::CsvExportSink::new(writer)?)),
179        #[cfg(feature = "parquet-export")]
180        Format::Parquet => Ok(Box::new(parquet::ParquetExportSink::new(writer)?)),
181    }
182}
183
184#[cfg(test)]
185mod tests {
186    use super::*;
187
188    #[test]
189    fn test_format_from_str() {
190        assert_eq!(Format::from_str("json").unwrap(), Format::Json);
191        assert_eq!(Format::from_str("YAML").unwrap(), Format::Yaml);
192        assert_eq!(Format::from_str("csv").unwrap(), Format::Csv);
193        assert!(Format::from_str("unknown").is_err());
194    }
195
196    #[test]
197    fn test_format_from_path() {
198        assert_eq!(
199            Format::from_path(Path::new("test.json")).unwrap(),
200            Format::Json
201        );
202        assert_eq!(
203            Format::from_path(Path::new("test.yml")).unwrap(),
204            Format::Yaml
205        );
206        assert_eq!(
207            Format::from_path(Path::new("test.csv")).unwrap(),
208            Format::Csv
209        );
210        assert!(Format::from_path(Path::new("test.txt")).is_err());
211    }
212
213    #[test]
214    fn test_format_extension() {
215        assert_eq!(Format::Json.extension(), "json");
216        assert_eq!(Format::Yaml.extension(), "yaml");
217        assert_eq!(Format::Csv.extension(), "csv");
218    }
219
220    #[test]
221    fn test_format_supports() {
222        assert!(Format::Json.supports_import());
223        assert!(Format::Json.supports_export());
224        assert!(Format::Yaml.supports_import());
225        assert!(Format::Csv.supports_import());
226    }
227}