Skip to main content

subcog/services/
prompt_parser.rs

1//! Prompt file parsing service.
2//!
3//! Supports parsing prompt templates from multiple formats:
4//! - Markdown with YAML front matter (`.md`)
5//! - YAML files (`.yaml`, `.yml`)
6//! - JSON files (`.json`)
7//! - Plain text (`.txt` or no extension)
8//!
9//! # Format Examples
10//!
11//! ## Markdown
12//! ```text
13//! ---
14//! name: code-review
15//! description: Review code for issues
16//! tags: [code, review]
17//! ---
18//! Please review the following code:
19//! {{code}}
20//! ```
21//!
22//! ## YAML
23//! ```yaml
24//! name: code-review
25//! description: Review code for issues
26//! content: |
27//!   Please review the following code:
28//!   {{code}}
29//! tags:
30//!   - code
31//!   - review
32//! ```
33//!
34//! ## JSON
35//! ```json
36//! {
37//!   "name": "code-review",
38//!   "description": "Review code for issues",
39//!   "content": "Please review {{code}}",
40//!   "tags": ["code", "review"]
41//! }
42//! ```
43
44use std::io::{self, Read};
45use std::path::Path;
46
47use crate::git::YamlFrontMatterParser;
48use crate::models::{PromptTemplate, PromptVariable, extract_variables};
49use crate::{Error, Result};
50
51/// Supported prompt file formats.
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
53pub enum PromptFormat {
54    /// Markdown with optional YAML front matter.
55    #[default]
56    Markdown,
57    /// YAML format.
58    Yaml,
59    /// JSON format.
60    Json,
61    /// Plain text (template content only).
62    PlainText,
63}
64
65impl PromptFormat {
66    /// Detects format from file extension.
67    ///
68    /// # Arguments
69    ///
70    /// * `path` - File path to detect format from
71    ///
72    /// # Returns
73    ///
74    /// The detected format based on extension, defaulting to `Markdown`.
75    #[must_use]
76    pub fn from_extension(path: &Path) -> Self {
77        path.extension()
78            .and_then(std::ffi::OsStr::to_str)
79            .map_or(Self::Markdown, Self::from_extension_str)
80    }
81
82    /// Detects format from extension string.
83    #[must_use]
84    pub fn from_extension_str(ext: &str) -> Self {
85        match ext.to_lowercase().as_str() {
86            "yaml" | "yml" => Self::Yaml,
87            "json" => Self::Json,
88            "txt" => Self::PlainText,
89            // Markdown is the default for .md, .markdown, and any unknown extension
90            _ => Self::Markdown,
91        }
92    }
93
94    /// Returns the file extension for this format.
95    #[must_use]
96    pub const fn extension(&self) -> &'static str {
97        match self {
98            Self::Markdown => "md",
99            Self::Yaml => "yaml",
100            Self::Json => "json",
101            Self::PlainText => "txt",
102        }
103    }
104
105    /// Returns the MIME type for this format.
106    #[must_use]
107    pub const fn mime_type(&self) -> &'static str {
108        match self {
109            Self::Markdown => "text/markdown",
110            Self::Yaml => "application/x-yaml",
111            Self::Json => "application/json",
112            Self::PlainText => "text/plain",
113        }
114    }
115}
116
117/// Parser for prompt template files.
118pub struct PromptParser;
119
120impl PromptParser {
121    /// Parses a prompt template from a file.
122    ///
123    /// The format is auto-detected from the file extension.
124    ///
125    /// # Arguments
126    ///
127    /// * `path` - Path to the prompt file
128    ///
129    /// # Errors
130    ///
131    /// Returns an error if the file cannot be read or parsed.
132    ///
133    /// # Example
134    ///
135    /// ```no_run
136    /// use subcog::services::PromptParser;
137    ///
138    /// let template = PromptParser::from_file("prompts/review.md")?;
139    /// println!("Loaded: {}", template.name);
140    /// # Ok::<(), subcog::Error>(())
141    /// ```
142    pub fn from_file(path: impl AsRef<Path>) -> Result<PromptTemplate> {
143        let path = path.as_ref();
144        let content = std::fs::read_to_string(path).map_err(|e| Error::OperationFailed {
145            operation: "read_prompt_file".to_string(),
146            cause: e.to_string(),
147        })?;
148
149        let format = PromptFormat::from_extension(path);
150        let mut template = Self::parse(&content, format)?;
151
152        // If no name was specified, derive from filename
153        if template.name.is_empty() {
154            template.name = path
155                .file_stem()
156                .and_then(std::ffi::OsStr::to_str)
157                .unwrap_or("unnamed")
158                .to_string();
159        }
160
161        Ok(template)
162    }
163
164    /// Parses a prompt template from stdin.
165    ///
166    /// # Arguments
167    ///
168    /// * `format` - The format to parse as
169    /// * `name` - Name for the template (required since stdin has no filename)
170    ///
171    /// # Errors
172    ///
173    /// Returns an error if stdin cannot be read or parsed.
174    pub fn from_stdin(format: PromptFormat, name: impl Into<String>) -> Result<PromptTemplate> {
175        let mut content = String::new();
176        io::stdin()
177            .read_to_string(&mut content)
178            .map_err(|e| Error::OperationFailed {
179                operation: "read_stdin".to_string(),
180                cause: e.to_string(),
181            })?;
182
183        let mut template = Self::parse(&content, format)?;
184        if template.name.is_empty() {
185            template.name = name.into();
186        }
187        Ok(template)
188    }
189
190    /// Parses a prompt template from string content.
191    ///
192    /// # Arguments
193    ///
194    /// * `content` - The raw content to parse
195    /// * `format` - The format to parse as
196    ///
197    /// # Errors
198    ///
199    /// Returns an error if parsing fails.
200    pub fn parse(content: &str, format: PromptFormat) -> Result<PromptTemplate> {
201        match format {
202            PromptFormat::Markdown => Self::parse_markdown(content),
203            PromptFormat::Yaml => Self::parse_yaml(content),
204            PromptFormat::Json => Self::parse_json(content),
205            PromptFormat::PlainText => Self::parse_plain_text(content),
206        }
207    }
208
209    /// Parses markdown with optional YAML front matter.
210    fn parse_markdown(content: &str) -> Result<PromptTemplate> {
211        let (metadata, body) = YamlFrontMatterParser::parse(content)?;
212
213        let name = metadata
214            .get("name")
215            .and_then(serde_json::Value::as_str)
216            .unwrap_or("")
217            .to_string();
218
219        let description = metadata
220            .get("description")
221            .and_then(serde_json::Value::as_str)
222            .unwrap_or("")
223            .to_string();
224
225        let tags = metadata
226            .get("tags")
227            .and_then(serde_json::Value::as_array)
228            .map_or_else(Vec::new, |arr| {
229                arr.iter()
230                    .filter_map(|v| v.as_str().map(String::from))
231                    .collect()
232            });
233
234        let author = metadata
235            .get("author")
236            .and_then(serde_json::Value::as_str)
237            .map(String::from);
238
239        // Parse explicit variable definitions from front matter
240        let explicit_variables = metadata
241            .get("variables")
242            .and_then(serde_json::Value::as_array)
243            .map_or_else(Vec::new, |arr| {
244                arr.iter().filter_map(parse_variable_def).collect()
245            });
246
247        // Extract variables from content and merge with explicit definitions
248        let extracted = extract_variables(&body);
249        let variables = merge_variables(explicit_variables, extracted);
250
251        Ok(PromptTemplate {
252            name,
253            description,
254            content: body,
255            variables,
256            tags,
257            author,
258            usage_count: 0,
259            created_at: 0,
260            updated_at: 0,
261        })
262    }
263
264    /// Parses YAML format.
265    fn parse_yaml(content: &str) -> Result<PromptTemplate> {
266        let value: serde_json::Value = serde_yaml_ng::from_str(content)
267            .map_err(|e| Error::InvalidInput(format!("Invalid YAML: {e}")))?;
268
269        Self::parse_structured(&value)
270    }
271
272    /// Parses JSON format.
273    fn parse_json(content: &str) -> Result<PromptTemplate> {
274        let value: serde_json::Value = serde_json::from_str(content)
275            .map_err(|e| Error::InvalidInput(format!("Invalid JSON: {e}")))?;
276
277        Self::parse_structured(&value)
278    }
279
280    /// Parses a structured value (JSON or YAML converted to JSON).
281    fn parse_structured(value: &serde_json::Value) -> Result<PromptTemplate> {
282        let name = value
283            .get("name")
284            .and_then(serde_json::Value::as_str)
285            .unwrap_or("")
286            .to_string();
287
288        let description = value
289            .get("description")
290            .and_then(serde_json::Value::as_str)
291            .unwrap_or("")
292            .to_string();
293
294        let content = value
295            .get("content")
296            .and_then(serde_json::Value::as_str)
297            .ok_or_else(|| Error::InvalidInput("Missing 'content' field".to_string()))?
298            .to_string();
299
300        let tags = value
301            .get("tags")
302            .and_then(serde_json::Value::as_array)
303            .map_or_else(Vec::new, |arr| {
304                arr.iter()
305                    .filter_map(|v| v.as_str().map(String::from))
306                    .collect()
307            });
308
309        let author = value
310            .get("author")
311            .and_then(serde_json::Value::as_str)
312            .map(String::from);
313
314        // Parse explicit variable definitions
315        let explicit_variables = value
316            .get("variables")
317            .and_then(serde_json::Value::as_array)
318            .map_or_else(Vec::new, |arr| {
319                arr.iter().filter_map(parse_variable_def).collect()
320            });
321
322        // Extract variables from content and merge with explicit definitions
323        let extracted = extract_variables(&content);
324        let variables = merge_variables(explicit_variables, extracted);
325
326        Ok(PromptTemplate {
327            name,
328            description,
329            content,
330            variables,
331            tags,
332            author,
333            usage_count: 0,
334            created_at: 0,
335            updated_at: 0,
336        })
337    }
338
339    /// Parses plain text (just content, no metadata).
340    fn parse_plain_text(content: &str) -> Result<PromptTemplate> {
341        let extracted = extract_variables(content);
342        let variables = extracted
343            .into_iter()
344            .map(|v| PromptVariable::new(v.name))
345            .collect();
346
347        Ok(PromptTemplate {
348            name: String::new(),
349            description: String::new(),
350            content: content.to_string(),
351            variables,
352            tags: Vec::new(),
353            author: None,
354            usage_count: 0,
355            created_at: 0,
356            updated_at: 0,
357        })
358    }
359
360    /// Serializes a prompt template to the specified format.
361    ///
362    /// # Arguments
363    ///
364    /// * `template` - The template to serialize
365    /// * `format` - The output format
366    ///
367    /// # Errors
368    ///
369    /// Returns an error if serialization fails.
370    pub fn serialize(template: &PromptTemplate, format: PromptFormat) -> Result<String> {
371        match format {
372            PromptFormat::Markdown => Self::serialize_markdown(template),
373            PromptFormat::Yaml => Self::serialize_yaml(template),
374            PromptFormat::Json => Self::serialize_json(template),
375            PromptFormat::PlainText => Ok(template.content.clone()),
376        }
377    }
378
379    /// Serializes to markdown with YAML front matter.
380    fn serialize_markdown(template: &PromptTemplate) -> Result<String> {
381        use serde_json::json;
382
383        let mut metadata = json!({
384            "name": template.name,
385        });
386
387        if !template.description.is_empty() {
388            metadata["description"] = json!(template.description);
389        }
390
391        if !template.tags.is_empty() {
392            metadata["tags"] = json!(template.tags);
393        }
394
395        if let Some(author) = &template.author {
396            metadata["author"] = json!(author);
397        }
398
399        // Add variable definitions if any have non-default settings
400        let has_custom_vars = template
401            .variables
402            .iter()
403            .any(|v| v.description.is_some() || v.default.is_some() || !v.required);
404
405        if has_custom_vars {
406            let vars: Vec<_> = template
407                .variables
408                .iter()
409                .map(serialize_variable_to_json)
410                .collect();
411            metadata["variables"] = json!(vars);
412        }
413
414        YamlFrontMatterParser::serialize(&metadata, &template.content)
415    }
416
417    /// Serializes to YAML.
418    fn serialize_yaml(template: &PromptTemplate) -> Result<String> {
419        serde_yaml_ng::to_string(template).map_err(|e| Error::OperationFailed {
420            operation: "serialize_yaml".to_string(),
421            cause: e.to_string(),
422        })
423    }
424
425    /// Serializes to JSON.
426    fn serialize_json(template: &PromptTemplate) -> Result<String> {
427        serde_json::to_string_pretty(template).map_err(|e| Error::OperationFailed {
428            operation: "serialize_json".to_string(),
429            cause: e.to_string(),
430        })
431    }
432}
433
434/// Serializes a `PromptVariable` to a JSON value for markdown front matter.
435fn serialize_variable_to_json(v: &PromptVariable) -> serde_json::Value {
436    use serde_json::json;
437
438    let mut var = json!({"name": v.name});
439    if let Some(desc) = &v.description {
440        var["description"] = json!(desc);
441    }
442    if let Some(default) = &v.default {
443        var["default"] = json!(default);
444    }
445    if !v.required {
446        var["required"] = json!(false);
447    }
448    var
449}
450
451/// Parses a variable definition from a JSON value.
452fn parse_variable_def(value: &serde_json::Value) -> Option<PromptVariable> {
453    // Support both object format and simple string format
454    if let Some(name) = value.as_str() {
455        return Some(PromptVariable::new(name));
456    }
457
458    let name = value.get("name")?.as_str()?;
459    let description = value
460        .get("description")
461        .and_then(serde_json::Value::as_str)
462        .map(String::from);
463    let default = value
464        .get("default")
465        .and_then(serde_json::Value::as_str)
466        .map(String::from);
467    let required = value
468        .get("required")
469        .and_then(serde_json::Value::as_bool)
470        .unwrap_or(true);
471
472    Some(PromptVariable {
473        name: name.to_string(),
474        description,
475        default,
476        required,
477    })
478}
479
480/// Merges explicit variable definitions with extracted variables.
481///
482/// Explicit definitions take precedence. Variables extracted from content
483/// that are not in explicit definitions are added with default settings.
484fn merge_variables(
485    explicit: Vec<PromptVariable>,
486    extracted: Vec<crate::models::ExtractedVariable>,
487) -> Vec<PromptVariable> {
488    use std::collections::HashSet;
489
490    // Collect names first, then transfer ownership
491    let explicit_names: HashSet<String> = explicit.iter().map(|v| v.name.clone()).collect();
492
493    let mut result = explicit;
494
495    // Add any extracted variables that weren't explicitly defined
496    for ext in extracted {
497        if !explicit_names.contains(&ext.name) {
498            result.push(PromptVariable::new(ext.name));
499        }
500    }
501
502    result
503}
504
505#[cfg(test)]
506mod tests {
507    use super::*;
508
509    #[test]
510    fn test_format_from_extension() {
511        assert_eq!(
512            PromptFormat::from_extension(Path::new("test.md")),
513            PromptFormat::Markdown
514        );
515        assert_eq!(
516            PromptFormat::from_extension(Path::new("test.yaml")),
517            PromptFormat::Yaml
518        );
519        assert_eq!(
520            PromptFormat::from_extension(Path::new("test.yml")),
521            PromptFormat::Yaml
522        );
523        assert_eq!(
524            PromptFormat::from_extension(Path::new("test.json")),
525            PromptFormat::Json
526        );
527        assert_eq!(
528            PromptFormat::from_extension(Path::new("test.txt")),
529            PromptFormat::PlainText
530        );
531        assert_eq!(
532            PromptFormat::from_extension(Path::new("test")),
533            PromptFormat::Markdown
534        );
535    }
536
537    #[test]
538    fn test_format_extension() {
539        assert_eq!(PromptFormat::Markdown.extension(), "md");
540        assert_eq!(PromptFormat::Yaml.extension(), "yaml");
541        assert_eq!(PromptFormat::Json.extension(), "json");
542        assert_eq!(PromptFormat::PlainText.extension(), "txt");
543    }
544
545    #[test]
546    fn test_parse_markdown_with_front_matter() {
547        let content = r"---
548name: code-review
549description: Review code for issues
550tags:
551  - code
552  - review
553---
554Please review this {{language}} code:
555{{code}}
556";
557
558        let template = PromptParser::parse(content, PromptFormat::Markdown).unwrap();
559
560        assert_eq!(template.name, "code-review");
561        assert_eq!(template.description, "Review code for issues");
562        assert_eq!(template.tags, vec!["code", "review"]);
563        assert!(template.content.contains("{{language}}"));
564        assert!(template.content.contains("{{code}}"));
565        assert_eq!(template.variables.len(), 2);
566    }
567
568    #[test]
569    fn test_parse_markdown_without_front_matter() {
570        let content = "Hello {{name}}, welcome to {{place}}!";
571
572        let template = PromptParser::parse(content, PromptFormat::Markdown).unwrap();
573
574        assert!(template.name.is_empty());
575        assert_eq!(template.content, content);
576        assert_eq!(template.variables.len(), 2);
577        assert_eq!(template.variables[0].name, "name");
578        assert_eq!(template.variables[1].name, "place");
579    }
580
581    #[test]
582    fn test_parse_yaml() {
583        let content = r#"
584name: greeting
585description: A friendly greeting
586content: "Hello {{name}}!"
587tags:
588  - greeting
589  - friendly
590"#;
591
592        let template = PromptParser::parse(content, PromptFormat::Yaml).unwrap();
593
594        assert_eq!(template.name, "greeting");
595        assert_eq!(template.description, "A friendly greeting");
596        assert_eq!(template.content, "Hello {{name}}!");
597        assert_eq!(template.tags, vec!["greeting", "friendly"]);
598        assert_eq!(template.variables.len(), 1);
599    }
600
601    #[test]
602    fn test_parse_yaml_with_variables() {
603        let content = r#"
604name: email
605content: "Dear {{recipient}}, {{body}} Regards, {{sender}}"
606variables:
607  - name: recipient
608    description: Email recipient
609    required: true
610  - name: sender
611    default: "Support Team"
612    required: false
613"#;
614
615        let template = PromptParser::parse(content, PromptFormat::Yaml).unwrap();
616
617        assert_eq!(template.variables.len(), 3);
618
619        let recipient = template.variables.iter().find(|v| v.name == "recipient");
620        assert!(recipient.is_some());
621        assert_eq!(
622            recipient.unwrap().description,
623            Some("Email recipient".to_string())
624        );
625
626        let sender = template.variables.iter().find(|v| v.name == "sender");
627        assert!(sender.is_some());
628        assert!(!sender.unwrap().required);
629        assert_eq!(sender.unwrap().default, Some("Support Team".to_string()));
630
631        // body should be auto-extracted
632        let body = template.variables.iter().find(|v| v.name == "body");
633        assert!(body.is_some());
634    }
635
636    #[test]
637    fn test_parse_json() {
638        let content = r#"{
639            "name": "json-prompt",
640            "description": "A JSON-defined prompt",
641            "content": "Process {{input}} and return {{output}}",
642            "tags": ["json", "test"]
643        }"#;
644
645        let template = PromptParser::parse(content, PromptFormat::Json).unwrap();
646
647        assert_eq!(template.name, "json-prompt");
648        assert_eq!(template.tags, vec!["json", "test"]);
649        assert_eq!(template.variables.len(), 2);
650    }
651
652    #[test]
653    fn test_parse_plain_text() {
654        let content = "Simple {{variable}} template.";
655
656        let template = PromptParser::parse(content, PromptFormat::PlainText).unwrap();
657
658        assert!(template.name.is_empty());
659        assert_eq!(template.content, content);
660        assert_eq!(template.variables.len(), 1);
661        assert_eq!(template.variables[0].name, "variable");
662    }
663
664    #[test]
665    fn test_parse_json_missing_content() {
666        let content = r#"{"name": "incomplete"}"#;
667
668        let result = PromptParser::parse(content, PromptFormat::Json);
669        assert!(result.is_err());
670        assert!(result.unwrap_err().to_string().contains("content"));
671    }
672
673    #[test]
674    fn test_serialize_markdown() {
675        let template = PromptTemplate::new("test-prompt", "Hello {{name}}!")
676            .with_description("A test prompt")
677            .with_tags(vec!["test".to_string()]);
678
679        let serialized = PromptParser::serialize(&template, PromptFormat::Markdown).unwrap();
680
681        assert!(serialized.contains("---"));
682        assert!(serialized.contains("name: test-prompt"));
683        assert!(serialized.contains("description: A test prompt"));
684        assert!(serialized.contains("Hello {{name}}!"));
685    }
686
687    #[test]
688    fn test_serialize_yaml() {
689        let template =
690            PromptTemplate::new("yaml-test", "Content {{var}}").with_description("YAML test");
691
692        let serialized = PromptParser::serialize(&template, PromptFormat::Yaml).unwrap();
693
694        assert!(serialized.contains("name: yaml-test"));
695        assert!(serialized.contains("content:"));
696    }
697
698    #[test]
699    fn test_serialize_json() {
700        let template = PromptTemplate::new("json-test", "Content {{var}}");
701
702        let serialized = PromptParser::serialize(&template, PromptFormat::Json).unwrap();
703
704        assert!(serialized.contains("\"name\": \"json-test\""));
705        let parsed: serde_json::Value = serde_json::from_str(&serialized).unwrap();
706        assert_eq!(parsed["name"], "json-test");
707    }
708
709    #[test]
710    fn test_serialize_plain_text() {
711        let template = PromptTemplate::new("plain", "Just {{content}}");
712
713        let serialized = PromptParser::serialize(&template, PromptFormat::PlainText).unwrap();
714
715        assert_eq!(serialized, "Just {{content}}");
716    }
717
718    #[test]
719    fn test_roundtrip_markdown() {
720        let original = PromptTemplate::new("roundtrip", "Test {{var}}")
721            .with_description("Roundtrip test")
722            .with_tags(vec!["test".to_string()]);
723
724        let serialized = PromptParser::serialize(&original, PromptFormat::Markdown).unwrap();
725        let parsed = PromptParser::parse(&serialized, PromptFormat::Markdown).unwrap();
726
727        assert_eq!(parsed.name, original.name);
728        assert_eq!(parsed.description, original.description);
729        assert_eq!(parsed.content, original.content);
730        assert_eq!(parsed.tags, original.tags);
731    }
732
733    #[test]
734    fn test_merge_variables() {
735        let explicit = vec![
736            PromptVariable::new("name").with_description("User name"),
737            PromptVariable::optional("status", "active"),
738        ];
739
740        let extracted = vec![
741            crate::models::ExtractedVariable {
742                name: "name".to_string(),
743                position: 0,
744            },
745            crate::models::ExtractedVariable {
746                name: "extra".to_string(),
747                position: 10,
748            },
749        ];
750
751        let merged = merge_variables(explicit, extracted);
752
753        assert_eq!(merged.len(), 3);
754        // Explicit definitions preserved
755        assert!(
756            merged
757                .iter()
758                .any(|v| v.name == "name" && v.description == Some("User name".to_string()))
759        );
760        assert!(merged.iter().any(|v| v.name == "status" && !v.required));
761        // Extra variable added with defaults
762        assert!(merged.iter().any(|v| v.name == "extra" && v.required));
763    }
764
765    #[test]
766    fn test_parse_variable_def_string() {
767        let value = serde_json::json!("simple_var");
768        let var = parse_variable_def(&value).unwrap();
769        assert_eq!(var.name, "simple_var");
770        assert!(var.required);
771    }
772
773    #[test]
774    fn test_parse_variable_def_object() {
775        let value = serde_json::json!({
776            "name": "complex_var",
777            "description": "A complex variable",
778            "default": "default_value",
779            "required": false
780        });
781
782        let var = parse_variable_def(&value).unwrap();
783        assert_eq!(var.name, "complex_var");
784        assert_eq!(var.description, Some("A complex variable".to_string()));
785        assert_eq!(var.default, Some("default_value".to_string()));
786        assert!(!var.required);
787    }
788}