Skip to main content

subcog/models/
prompt.rs

1//! Prompt template models.
2//!
3//! Provides data structures for user-defined prompt templates with variable substitution.
4//!
5//! # Code Block Detection Edge Cases
6//!
7//! Variable extraction automatically skips `{{variable}}` patterns inside fenced code blocks
8//! to avoid capturing documentation examples. This section documents edge cases and behaviors.
9//!
10//! ## Supported Code Block Syntaxes
11//!
12//! | Syntax | Supported | Notes |
13//! |--------|-----------|-------|
14//! | ` ```language ... ``` ` | ✓ | Standard fenced code block |
15//! | ` ``` ... ``` ` | ✓ | Code block without language |
16//! | ` ~~~ ... ~~~ ` | ✓ | Tilde fenced code block |
17//! | Indented code (4 spaces) | ✗ | Only fenced blocks detected |
18//!
19//! ## Edge Cases
20//!
21//! ### 1. Unclosed Code Blocks
22//!
23//! Input: triple-backtick rust, `let x = "{{var}}";`, no closing triple-backtick
24//!
25//! **Behavior**: Unclosed blocks are not detected, so variables inside ARE extracted.
26//! This is intentional - malformed content shouldn't silently exclude variables.
27//!
28//! ### 2. Nested Code Blocks (within markdown)
29//!
30//! Input: Outer tilde block containing inner backtick block with `{{inner_var}}`
31//!
32//! **Behavior**: Both tilde and backtick blocks are detected. Variables inside
33//! either syntax are excluded. Nested blocks are handled correctly.
34//!
35//! ### 3. Variables at Block Boundaries
36//!
37//! Input: `{{before}}` immediately before triple-backtick, `{{after}}` immediately after
38//!
39//! **Behavior**: Both `{{before}}` and `{{after}}` are extracted. Only content
40//! strictly between the opening and closing triple-backticks is excluded.
41//!
42//! ### 4. Inline Code (single backticks)
43//!
44//! Input: `Use {{var}} syntax for variables.` (single backticks around var)
45//!
46//! **Behavior**: Single backticks DO NOT exclude variables. Only triple-backtick
47//! fenced blocks are detected. `{{var}}` IS extracted.
48//!
49//! ### 5. Empty Code Blocks
50//!
51//! Input: Empty triple-backtick block
52//!
53//! **Behavior**: Empty blocks are detected but contain no variables to exclude.
54//!
55//! ## Workarounds
56//!
57//! If you need a `{{variable}}` pattern in your actual prompt output (not as a variable):
58//!
59//! 1. **Escape it**: Use `\{\{literal\}\}` (will be preserved literally)
60//! 2. **Put it in a code block**: Variables in fenced blocks are not substituted
61//! 3. **Use a variable with literal value**: Define `open_brace`/`close_brace` variables
62
63use regex::Regex;
64use serde::{Deserialize, Serialize};
65use std::collections::{HashMap, HashSet};
66use std::hash::BuildHasher;
67use std::sync::LazyLock;
68
69use crate::{Error, Result};
70
71/// Creates a compile-time verified regex wrapped in [`LazyLock`].
72///
73/// # Safety
74///
75/// The regex pattern is verified at compile time and cannot fail at runtime.
76/// The `unreachable!()` branch exists only for type checking.
77macro_rules! lazy_regex {
78    ($pattern:expr) => {
79        LazyLock::new(|| Regex::new($pattern).unwrap_or_else(|_| unreachable!()))
80    };
81}
82
83/// Regex pattern for extracting template variables: `{{variable_name}}`.
84/// Supports dots for context template auto-variables like `{{memory.id}}`.
85static VARIABLE_PATTERN: LazyLock<Regex> = lazy_regex!(r"\{\{([\w.]+)\}\}");
86
87/// Regex pattern for detecting any content between `{{` and `}}`.
88static VALIDATION_PATTERN: LazyLock<Regex> = lazy_regex!(r"\{\{([^}]*)\}\}");
89
90/// Regex pattern for detecting fenced code blocks (triple backticks with optional language identifier).
91/// Matches: ``` followed by optional language, then content, then ```
92static CODE_BLOCK_BACKTICK_PATTERN: LazyLock<Regex> =
93    lazy_regex!(r"```([a-zA-Z0-9_-]*)\n?([\s\S]*?)```");
94
95/// Regex pattern for detecting tilde fenced code blocks.
96/// Matches: ~~~ followed by optional language, then content, then ~~~
97static CODE_BLOCK_TILDE_PATTERN: LazyLock<Regex> =
98    lazy_regex!(r"~~~([a-zA-Z0-9_-]*)\n?([\s\S]*?)~~~");
99
100/// Represents a fenced code block region in content.
101#[derive(Debug, Clone, PartialEq, Eq)]
102pub struct CodeBlockRegion {
103    /// Start byte position (inclusive).
104    pub start: usize,
105    /// End byte position (exclusive).
106    pub end: usize,
107    /// Optional language identifier (e.g., "rust", "markdown").
108    pub language: Option<String>,
109}
110
111impl CodeBlockRegion {
112    /// Creates a new code block region.
113    ///
114    /// Note: This cannot be `const` because `Option<String>` is not a `Copy` type.
115    #[allow(clippy::missing_const_for_fn)]
116    #[must_use]
117    pub fn new(start: usize, end: usize, language: Option<String>) -> Self {
118        Self {
119            start,
120            end,
121            language,
122        }
123    }
124
125    /// Checks if a byte position falls within this region.
126    #[must_use]
127    pub const fn contains(&self, position: usize) -> bool {
128        position >= self.start && position < self.end
129    }
130}
131
132/// Detects fenced code blocks in content.
133///
134/// Returns regions sorted by start position. Handles:
135/// - Code blocks with language identifiers (```rust, ```markdown, ~~~rust, ~~~markdown)
136/// - Empty code blocks
137/// - Multiple code blocks
138/// - Both backtick (\`\`\`) and tilde (~~~) syntax
139///
140/// # Returns
141///
142/// A list of code block regions in order of appearance.
143#[must_use]
144pub fn detect_code_blocks(content: &str) -> Vec<CodeBlockRegion> {
145    let mut regions = Vec::new();
146
147    // Detect backtick code blocks (```)
148    for cap in CODE_BLOCK_BACKTICK_PATTERN.captures_iter(content) {
149        if let Some(full_match) = cap.get(0) {
150            let language = cap
151                .get(1)
152                .map(|m| m.as_str().trim())
153                .filter(|lang| !lang.is_empty())
154                .map(ToString::to_string);
155
156            regions.push(CodeBlockRegion::new(
157                full_match.start(),
158                full_match.end(),
159                language,
160            ));
161        }
162    }
163
164    // Detect tilde code blocks (~~~)
165    for cap in CODE_BLOCK_TILDE_PATTERN.captures_iter(content) {
166        if let Some(full_match) = cap.get(0) {
167            let language = cap
168                .get(1)
169                .map(|m| m.as_str().trim())
170                .filter(|lang| !lang.is_empty())
171                .map(ToString::to_string);
172
173            regions.push(CodeBlockRegion::new(
174                full_match.start(),
175                full_match.end(),
176                language,
177            ));
178        }
179    }
180
181    // Sort by start position (combines backtick and tilde blocks in order)
182    regions.sort_by_key(|r| r.start);
183    regions
184}
185
186/// Checks if a byte position falls within any exclusion region.
187///
188/// # Arguments
189///
190/// * `position` - The byte position to check.
191/// * `regions` - The list of exclusion regions (e.g., code blocks).
192///
193/// # Returns
194///
195/// `true` if the position is inside any exclusion region.
196#[must_use]
197pub fn is_in_exclusion(position: usize, regions: &[CodeBlockRegion]) -> bool {
198    regions.iter().any(|r| r.contains(position))
199}
200
201/// Extracts variables from prompt content, excluding those inside code blocks.
202///
203/// This is the internal implementation that takes pre-computed exclusion regions.
204fn extract_variables_with_exclusions(
205    content: &str,
206    exclusions: &[CodeBlockRegion],
207) -> Vec<ExtractedVariable> {
208    let mut seen = HashSet::new();
209    let mut variables = Vec::new();
210
211    for cap in VARIABLE_PATTERN.captures_iter(content) {
212        if let Some(name_match) = cap.get(1) {
213            let position = cap.get(0).map_or(0, |m| m.start());
214
215            // Skip variables inside exclusion regions (code blocks)
216            if is_in_exclusion(position, exclusions) {
217                continue;
218            }
219
220            let name = name_match.as_str().to_string();
221            if seen.insert(name.clone()) {
222                variables.push(ExtractedVariable { name, position });
223            }
224        }
225    }
226
227    variables
228}
229
230/// A user-defined prompt template.
231#[derive(Debug, Clone, Default, Serialize, Deserialize)]
232pub struct PromptTemplate {
233    /// Unique prompt name (kebab-case).
234    pub name: String,
235    /// Human-readable description.
236    #[serde(default)]
237    pub description: String,
238    /// The prompt content with `{{variable}}` placeholders.
239    pub content: String,
240    /// Extracted variables with optional metadata.
241    #[serde(default)]
242    pub variables: Vec<PromptVariable>,
243    /// Categorization tags.
244    #[serde(default)]
245    pub tags: Vec<String>,
246    /// Author identifier.
247    #[serde(default)]
248    pub author: Option<String>,
249    /// Usage count for popularity ranking.
250    #[serde(default)]
251    pub usage_count: u64,
252    /// Creation timestamp (Unix epoch seconds).
253    #[serde(default)]
254    pub created_at: u64,
255    /// Last update timestamp (Unix epoch seconds).
256    #[serde(default)]
257    pub updated_at: u64,
258}
259
260impl PromptTemplate {
261    /// Creates a new prompt template with the given name and content.
262    #[must_use]
263    pub fn new(name: impl Into<String>, content: impl Into<String>) -> Self {
264        let content = content.into();
265        let variables = extract_variables(&content)
266            .into_iter()
267            .map(|v| PromptVariable {
268                name: v.name,
269                description: None,
270                default: None,
271                required: true,
272            })
273            .collect();
274
275        Self {
276            name: name.into(),
277            description: String::new(),
278            content,
279            variables,
280            tags: Vec::new(),
281            author: None,
282            usage_count: 0,
283            created_at: 0,
284            updated_at: 0,
285        }
286    }
287
288    /// Sets the description.
289    #[must_use]
290    pub fn with_description(mut self, description: impl Into<String>) -> Self {
291        self.description = description.into();
292        self
293    }
294
295    /// Sets the tags.
296    #[must_use]
297    pub fn with_tags(mut self, tags: Vec<String>) -> Self {
298        self.tags = tags;
299        self
300    }
301
302    /// Sets the author.
303    #[must_use]
304    pub fn with_author(mut self, author: impl Into<String>) -> Self {
305        self.author = Some(author.into());
306        self
307    }
308
309    /// Sets explicit variable definitions, overriding auto-detected ones.
310    #[must_use]
311    pub fn with_variables(mut self, variables: Vec<PromptVariable>) -> Self {
312        self.variables = variables;
313        self
314    }
315
316    /// Returns the list of variable names in this template.
317    #[must_use]
318    pub fn variable_names(&self) -> Vec<&str> {
319        self.variables.iter().map(|v| v.name.as_str()).collect()
320    }
321
322    /// Populates the template with the given variable values.
323    ///
324    /// # Errors
325    ///
326    /// Returns an error if a required variable is missing and has no default.
327    pub fn populate(&self, values: &HashMap<String, String>) -> Result<String> {
328        substitute_variables(&self.content, values, &self.variables)
329    }
330}
331
332/// A template variable definition.
333#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
334pub struct PromptVariable {
335    /// Variable name (without braces).
336    pub name: String,
337    /// Human-readable description for elicitation.
338    #[serde(default)]
339    pub description: Option<String>,
340    /// Default value if not provided.
341    #[serde(default)]
342    pub default: Option<String>,
343    /// Whether the variable is required.
344    #[serde(default = "default_required")]
345    pub required: bool,
346}
347
348/// Default value for `required` field (true).
349const fn default_required() -> bool {
350    true
351}
352
353impl PromptVariable {
354    /// Creates a new required variable.
355    #[must_use]
356    pub fn new(name: impl Into<String>) -> Self {
357        Self {
358            name: name.into(),
359            description: None,
360            default: None,
361            required: true,
362        }
363    }
364
365    /// Creates a new optional variable with a default value.
366    #[must_use]
367    pub fn optional(name: impl Into<String>, default: impl Into<String>) -> Self {
368        Self {
369            name: name.into(),
370            description: None,
371            default: Some(default.into()),
372            required: false,
373        }
374    }
375
376    /// Sets the description.
377    #[must_use]
378    pub fn with_description(mut self, description: impl Into<String>) -> Self {
379        self.description = Some(description.into());
380        self
381    }
382}
383
384impl Default for PromptVariable {
385    fn default() -> Self {
386        Self {
387            name: String::new(),
388            description: None,
389            default: None,
390            required: true,
391        }
392    }
393}
394
395/// Result of extracting a variable from prompt content.
396#[derive(Debug, Clone, PartialEq, Eq)]
397pub struct ExtractedVariable {
398    /// Variable name (without braces).
399    pub name: String,
400    /// Byte position in the content where the variable starts.
401    pub position: usize,
402}
403
404/// Extracts variables from prompt content.
405///
406/// Variables are identified by the pattern `{{variable_name}}` where `variable_name`
407/// consists of alphanumeric characters and underscores.
408///
409/// **Important**: Variables inside fenced code blocks (``` ```) are automatically
410/// excluded to avoid capturing example/documentation patterns.
411///
412/// # Returns
413///
414/// A list of extracted variables in order of first appearance, deduplicated.
415#[must_use]
416pub fn extract_variables(content: &str) -> Vec<ExtractedVariable> {
417    let code_blocks = detect_code_blocks(content);
418    extract_variables_with_exclusions(content, &code_blocks)
419}
420
421/// Maximum length for variable values (64KB).
422///
423/// Values exceeding this limit are truncated to prevent denial-of-service attacks
424/// via memory exhaustion.
425pub const MAX_VARIABLE_VALUE_LENGTH: usize = 65_536;
426
427/// Sanitizes a variable value to prevent template injection attacks.
428///
429/// Performs three safety transformations:
430/// 1. **Escape nested patterns**: Converts `{{` to `{ {` to prevent recursive substitution
431/// 2. **Remove control characters**: Strips ASCII control chars (0x00-0x1F) except:
432///    - Tab (0x09)
433///    - Newline (0x0A)
434///    - Carriage return (0x0D)
435/// 3. **Length limiting**: Truncates values exceeding [`MAX_VARIABLE_VALUE_LENGTH`]
436///
437/// # Arguments
438///
439/// * `value` - The raw user-provided variable value.
440///
441/// # Returns
442///
443/// A sanitized string safe for template substitution.
444///
445/// # Examples
446///
447/// ```rust
448/// use subcog::models::sanitize_variable_value;
449///
450/// // Nested patterns are escaped
451/// assert_eq!(
452///     sanitize_variable_value("prefix {{nested}} suffix"),
453///     "prefix { {nested} } suffix"
454/// );
455///
456/// // Control characters are removed
457/// assert_eq!(
458///     sanitize_variable_value("hello\x00world"),
459///     "helloworld"
460/// );
461///
462/// // Allowed whitespace is preserved
463/// assert_eq!(
464///     sanitize_variable_value("line1\nline2\ttabbed"),
465///     "line1\nline2\ttabbed"
466/// );
467/// ```
468#[must_use]
469pub fn sanitize_variable_value(value: &str) -> String {
470    // Step 1: Truncate to maximum length
471    let truncated = if value.len() > MAX_VARIABLE_VALUE_LENGTH {
472        // Find a valid UTF-8 boundary near the limit
473        let mut end = MAX_VARIABLE_VALUE_LENGTH;
474        while end > 0 && !value.is_char_boundary(end) {
475            end -= 1;
476        }
477        &value[..end]
478    } else {
479        value
480    };
481
482    // Step 2: Escape nested patterns and remove control characters
483    let mut result = String::with_capacity(truncated.len());
484    let mut chars = truncated.chars().peekable();
485
486    while let Some(c) = chars.next() {
487        match c {
488            // Escape opening braces that could form nested patterns
489            '{' if chars.peek() == Some(&'{') => {
490                result.push_str("{ {");
491                chars.next(); // consume the second '{'
492            },
493            // Escape closing braces that could form nested patterns
494            '}' if chars.peek() == Some(&'}') => {
495                result.push_str("} }");
496                chars.next(); // consume the second '}'
497            },
498            // Remove control characters except tab, newline, carriage return
499            c if c.is_ascii_control() && c != '\t' && c != '\n' && c != '\r' => {
500                // Skip this character
501            },
502            // Pass through all other characters
503            _ => result.push(c),
504        }
505    }
506
507    result
508}
509
510/// Substitutes variables in prompt content.
511///
512/// # Arguments
513///
514/// * `content` - The template content with `{{variable}}` placeholders.
515/// * `values` - A map of variable names to their values.
516/// * `variables` - Variable definitions for defaults and required checks.
517///
518/// # Security
519///
520/// All user-provided variable values are sanitized via [`sanitize_variable_value`]
521/// to prevent template injection attacks:
522/// - Nested `{{...}}` patterns in values are escaped
523/// - Control characters are removed
524/// - Excessively long values are truncated
525///
526/// # Errors
527///
528/// Returns an error if a required variable is missing and has no default.
529pub fn substitute_variables<S: BuildHasher>(
530    content: &str,
531    values: &HashMap<String, String, S>,
532    variables: &[PromptVariable],
533) -> Result<String> {
534    // Build effective values map with defaults, sanitizing all values
535    let mut effective_values: HashMap<String, String> = HashMap::new();
536
537    // Add provided values (sanitized)
538    for (k, v) in values {
539        effective_values.insert(k.clone(), sanitize_variable_value(v));
540    }
541
542    // Apply defaults and check required (sanitize defaults for defense-in-depth)
543    for var in variables {
544        if !effective_values.contains_key(&var.name) {
545            if let Some(default) = &var.default {
546                effective_values.insert(var.name.clone(), sanitize_variable_value(default));
547            } else if var.required {
548                return Err(Error::InvalidInput(format!(
549                    "Missing required variable '{}'. Provide it with: --var {}=VALUE",
550                    var.name, var.name
551                )));
552            }
553        }
554    }
555
556    // Check for variables in content that aren't in the variables list
557    for extracted in extract_variables(content) {
558        if !effective_values.contains_key(&extracted.name) {
559            // Variable found in content but not provided and not in definitions
560            // For flexibility, we'll just leave it unreplaced or error
561            return Err(Error::InvalidInput(format!(
562                "Missing variable '{}'. Provide it with: --var {}=VALUE",
563                extracted.name, extracted.name
564            )));
565        }
566    }
567
568    // Perform substitution
569    let result = VARIABLE_PATTERN
570        .replace_all(content, |caps: &regex::Captures| {
571            caps.get(1)
572                .and_then(|m| effective_values.get(m.as_str()))
573                .map_or_else(|| caps[0].to_string(), String::clone)
574        })
575        .to_string();
576
577    Ok(result)
578}
579
580/// Validation result for prompt content.
581#[derive(Debug, Clone, Default)]
582pub struct ValidationResult {
583    /// Whether the prompt is valid.
584    pub is_valid: bool,
585    /// List of issues found.
586    pub issues: Vec<ValidationIssue>,
587}
588
589impl ValidationResult {
590    /// Creates a valid result with no issues.
591    #[must_use]
592    pub const fn valid() -> Self {
593        Self {
594            is_valid: true,
595            issues: Vec::new(),
596        }
597    }
598
599    /// Creates an invalid result with the given issues.
600    #[must_use]
601    pub const fn invalid(issues: Vec<ValidationIssue>) -> Self {
602        Self {
603            is_valid: false,
604            issues,
605        }
606    }
607
608    /// Adds an issue and marks the result as invalid.
609    pub fn add_issue(&mut self, issue: ValidationIssue) {
610        self.is_valid = false;
611        self.issues.push(issue);
612    }
613}
614
615/// A validation issue found in prompt content.
616#[derive(Debug, Clone)]
617pub struct ValidationIssue {
618    /// Severity of the issue.
619    pub severity: IssueSeverity,
620    /// Description of the issue.
621    pub message: String,
622    /// Byte position in the content where the issue was found.
623    pub position: Option<usize>,
624}
625
626impl ValidationIssue {
627    /// Creates a new error-level issue.
628    #[must_use]
629    pub fn error(message: impl Into<String>) -> Self {
630        Self {
631            severity: IssueSeverity::Error,
632            message: message.into(),
633            position: None,
634        }
635    }
636
637    /// Creates a new warning-level issue.
638    #[must_use]
639    pub fn warning(message: impl Into<String>) -> Self {
640        Self {
641            severity: IssueSeverity::Warning,
642            message: message.into(),
643            position: None,
644        }
645    }
646
647    /// Sets the position of the issue.
648    #[must_use]
649    pub const fn at_position(mut self, position: usize) -> Self {
650        self.position = Some(position);
651        self
652    }
653}
654
655/// Severity level for validation issues.
656#[derive(Debug, Clone, Copy, PartialEq, Eq)]
657pub enum IssueSeverity {
658    /// Critical issue that must be fixed.
659    Error,
660    /// Non-critical issue that should be addressed.
661    Warning,
662}
663
664/// Reserved variable name prefixes that cannot be used.
665const RESERVED_PREFIXES: &[&str] = &["subcog_", "system_", "__"];
666
667/// Checks if a variable name uses a reserved prefix.
668#[must_use]
669pub fn is_reserved_variable_name(name: &str) -> bool {
670    let lower = name.to_lowercase();
671    RESERVED_PREFIXES
672        .iter()
673        .any(|prefix| lower.starts_with(prefix))
674}
675
676/// Validates prompt content for common issues.
677///
678/// Checks for:
679/// - Unclosed braces (e.g., `{{var` without closing `}}`)
680/// - Invalid variable names (non-alphanumeric characters)
681/// - Reserved variable names (e.g., `subcog_*`, `system_*`, `__*`)
682/// - Duplicate variable definitions
683///
684/// # Returns
685///
686/// A validation result indicating whether the content is valid.
687#[must_use]
688pub fn validate_prompt_content(content: &str) -> ValidationResult {
689    let mut result = ValidationResult::valid();
690    let mut seen_names: HashSet<String> = HashSet::new();
691
692    // Check for unclosed braces
693    let open_count = content.matches("{{").count();
694    let close_count = content.matches("}}").count();
695
696    if open_count != close_count {
697        result.add_issue(ValidationIssue::error(format!(
698            "Unbalanced braces: {open_count} opening '{{{{' vs {close_count} closing '}}}}'"
699        )));
700    }
701
702    // Check for single braces that might indicate typos
703    // Pattern: single { not followed by { or single } not followed by }
704    let mut i = 0;
705    let bytes = content.as_bytes();
706    while i < bytes.len() {
707        if bytes[i] == b'{' {
708            if i + 1 < bytes.len() && bytes[i + 1] == b'{' {
709                // Valid opening {{, skip both
710                i += 2;
711                continue;
712            }
713            // Single { - might be intentional (like in code blocks)
714            // Only warn if it looks like a malformed variable
715            if i + 1 < bytes.len() && bytes[i + 1].is_ascii_alphabetic() {
716                result.add_issue(
717                    ValidationIssue::warning("Single '{' found - did you mean '{{'?")
718                        .at_position(i),
719                );
720            }
721        } else if bytes[i] == b'}' {
722            // Check if this is the first } of a }} pair
723            if i + 1 < bytes.len() && bytes[i + 1] == b'}' {
724                // Valid closing }}, skip both
725                i += 2;
726                continue;
727            }
728            // Single } - warn if preceded by alphanumeric (likely typo)
729            if i > 0 && bytes[i - 1].is_ascii_alphanumeric() {
730                result.add_issue(
731                    ValidationIssue::warning("Single '}' found - did you mean '}}'?")
732                        .at_position(i),
733                );
734            }
735        }
736        i += 1;
737    }
738
739    // Check for invalid variable names (variables extracted but with issues)
740    // The regex only matches valid names, so this catches edge cases
741    // like {{123}} which wouldn't match \w+ starting with digit
742    for cap in VALIDATION_PATTERN.captures_iter(content) {
743        if let Some(inner) = cap.get(1) {
744            let name = inner.as_str();
745            if name.is_empty() {
746                result.add_issue(
747                    ValidationIssue::error("Empty variable name: {{}}").at_position(inner.start()),
748                );
749            } else if !name
750                .chars()
751                .next()
752                .is_some_and(|c| c.is_ascii_alphabetic() || c == '_')
753            {
754                result.add_issue(
755                    ValidationIssue::error(format!(
756                        "Invalid variable name '{name}': must start with letter or underscore"
757                    ))
758                    .at_position(inner.start()),
759                );
760            } else if !name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
761                result.add_issue(
762                    ValidationIssue::error(format!(
763                        "Invalid variable name '{name}': contains invalid characters"
764                    ))
765                    .at_position(inner.start()),
766                );
767            } else if is_reserved_variable_name(name) {
768                result.add_issue(
769                    ValidationIssue::error(format!(
770                        "Reserved variable name '{name}': cannot use 'subcog_', 'system_', or '__' prefix"
771                    ))
772                    .at_position(inner.start()),
773                );
774            } else if !seen_names.insert(name.to_lowercase()) {
775                // Note: This is a warning, not an error, as duplicate variables
776                // are functionally valid (just redundant)
777                result.add_issue(
778                    ValidationIssue::warning(format!("Duplicate variable name: '{name}'"))
779                        .at_position(inner.start()),
780                );
781            }
782        }
783    }
784
785    result
786}
787
788#[cfg(test)]
789mod tests {
790    use super::*;
791
792    #[test]
793    fn test_extract_variables_simple() {
794        let content = "Hello {{name}}, your {{item}} is ready.";
795        let vars = extract_variables(content);
796
797        assert_eq!(vars.len(), 2);
798        assert_eq!(vars[0].name, "name");
799        assert_eq!(vars[1].name, "item");
800    }
801
802    #[test]
803    fn test_extract_variables_deduplicates() {
804        let content = "{{name}} and {{name}} again, plus {{other}}.";
805        let vars = extract_variables(content);
806
807        assert_eq!(vars.len(), 2);
808        assert_eq!(vars[0].name, "name");
809        assert_eq!(vars[1].name, "other");
810    }
811
812    #[test]
813    fn test_extract_variables_underscores() {
814        let content = "{{user_name}} and {{item_count}}.";
815        let vars = extract_variables(content);
816
817        assert_eq!(vars.len(), 2);
818        assert_eq!(vars[0].name, "user_name");
819        assert_eq!(vars[1].name, "item_count");
820    }
821
822    #[test]
823    fn test_extract_variables_empty() {
824        let content = "No variables here.";
825        let vars = extract_variables(content);
826
827        assert!(vars.is_empty());
828    }
829
830    #[test]
831    fn test_substitute_variables_complete() {
832        let content = "Hello {{name}}, your {{item}} is ready.";
833        let mut values = HashMap::new();
834        values.insert("name".to_string(), "Alice".to_string());
835        values.insert("item".to_string(), "order".to_string());
836
837        let result = substitute_variables(content, &values, &[]).unwrap();
838        assert_eq!(result, "Hello Alice, your order is ready.");
839    }
840
841    #[test]
842    fn test_substitute_variables_with_defaults() {
843        let content = "Hello {{name}}, status: {{status}}.";
844        let mut values = HashMap::new();
845        values.insert("name".to_string(), "Bob".to_string());
846
847        let variables = vec![
848            PromptVariable::new("name"),
849            PromptVariable::optional("status", "pending"),
850        ];
851
852        let result = substitute_variables(content, &values, &variables).unwrap();
853        assert_eq!(result, "Hello Bob, status: pending.");
854    }
855
856    #[test]
857    fn test_substitute_variables_missing_required() {
858        let content = "Hello {{name}}.";
859        let values = HashMap::new();
860
861        let variables = vec![PromptVariable::new("name")];
862
863        let result = substitute_variables(content, &values, &variables);
864        assert!(result.is_err());
865        let err_msg = result.unwrap_err().to_string();
866        assert!(err_msg.contains("Missing required variable"));
867        assert!(err_msg.contains("--var name=VALUE"));
868    }
869
870    #[test]
871    fn test_prompt_template_new() {
872        let template = PromptTemplate::new("greeting", "Hello {{name}}!");
873
874        assert_eq!(template.name, "greeting");
875        assert_eq!(template.content, "Hello {{name}}!");
876        assert_eq!(template.variables.len(), 1);
877        assert_eq!(template.variables[0].name, "name");
878    }
879
880    #[test]
881    fn test_prompt_template_populate() {
882        let template = PromptTemplate::new("greeting", "Hello {{name}}!");
883
884        let mut values = HashMap::new();
885        values.insert("name".to_string(), "World".to_string());
886
887        let result = template.populate(&values).unwrap();
888        assert_eq!(result, "Hello World!");
889    }
890
891    #[test]
892    fn test_prompt_template_serialization() {
893        let template = PromptTemplate::new("test", "{{var}}")
894            .with_description("A test prompt")
895            .with_tags(vec!["test".to_string()]);
896
897        let json = serde_json::to_string(&template).unwrap();
898        let parsed: PromptTemplate = serde_json::from_str(&json).unwrap();
899
900        assert_eq!(parsed.name, "test");
901        assert_eq!(parsed.description, "A test prompt");
902        assert_eq!(parsed.tags, vec!["test"]);
903    }
904
905    #[test]
906    fn test_validate_prompt_content_valid() {
907        let content = "Hello {{name}}, your {{item}} is ready.";
908        let result = validate_prompt_content(content);
909
910        assert!(result.is_valid);
911        assert!(result.issues.is_empty());
912    }
913
914    #[test]
915    fn test_validate_prompt_content_unclosed_braces() {
916        let content = "Hello {{name}, missing close.";
917        let result = validate_prompt_content(content);
918
919        assert!(!result.is_valid);
920        assert!(
921            result
922                .issues
923                .iter()
924                .any(|i| i.message.contains("Unbalanced"))
925        );
926    }
927
928    #[test]
929    fn test_validate_prompt_content_empty_variable() {
930        let content = "Hello {{}}, empty.";
931        let result = validate_prompt_content(content);
932
933        assert!(!result.is_valid);
934        assert!(
935            result
936                .issues
937                .iter()
938                .any(|i| i.message.contains("Empty variable"))
939        );
940    }
941
942    #[test]
943    fn test_validate_prompt_content_invalid_name() {
944        let content = "Hello {{123bad}}, invalid.";
945        let result = validate_prompt_content(content);
946
947        assert!(!result.is_valid);
948        assert!(
949            result
950                .issues
951                .iter()
952                .any(|i| i.message.contains("must start with letter"))
953        );
954    }
955
956    #[test]
957    fn test_prompt_variable_builders() {
958        let required = PromptVariable::new("name").with_description("User's name");
959        assert!(required.required);
960        assert_eq!(required.description, Some("User's name".to_string()));
961
962        let optional = PromptVariable::optional("status", "pending");
963        assert!(!optional.required);
964        assert_eq!(optional.default, Some("pending".to_string()));
965    }
966
967    #[test]
968    fn test_is_reserved_variable_name() {
969        // Reserved prefixes
970        assert!(is_reserved_variable_name("subcog_version"));
971        assert!(is_reserved_variable_name("SUBCOG_CONFIG"));
972        assert!(is_reserved_variable_name("system_path"));
973        assert!(is_reserved_variable_name("System_User"));
974        assert!(is_reserved_variable_name("__private"));
975        assert!(is_reserved_variable_name("__init"));
976
977        // Valid names
978        assert!(!is_reserved_variable_name("name"));
979        assert!(!is_reserved_variable_name("user_name"));
980        assert!(!is_reserved_variable_name("mySubcog"));
981        assert!(!is_reserved_variable_name("_underscore"));
982    }
983
984    #[test]
985    fn test_validate_prompt_content_reserved_name() {
986        let content = "Config: {{subcog_config}}";
987        let result = validate_prompt_content(content);
988
989        assert!(!result.is_valid);
990        assert!(result.issues.iter().any(|i| i.message.contains("Reserved")));
991    }
992
993    #[test]
994    fn test_validate_prompt_content_duplicate_variable() {
995        let content = "Hello {{name}} and {{name}} again";
996        let result = validate_prompt_content(content);
997
998        // Should have a warning for duplicate, but still be functionally valid
999        // (warnings don't make it invalid, only errors do)
1000        assert!(
1001            result
1002                .issues
1003                .iter()
1004                .any(|i| i.message.contains("Duplicate"))
1005        );
1006    }
1007
1008    #[test]
1009    fn test_validate_prompt_content_system_prefix() {
1010        let content = "Path: {{system_path}}";
1011        let result = validate_prompt_content(content);
1012
1013        assert!(!result.is_valid);
1014        assert!(result.issues.iter().any(|i| i.message.contains("system_")));
1015    }
1016
1017    #[test]
1018    fn test_validate_prompt_content_double_underscore() {
1019        let content = "Private: {{__internal}}";
1020        let result = validate_prompt_content(content);
1021
1022        assert!(!result.is_valid);
1023        assert!(result.issues.iter().any(|i| i.message.contains("__")));
1024    }
1025
1026    // ============================================================
1027    // Task 1.4: Unit Tests for Code Block Detection
1028    // ============================================================
1029
1030    #[test]
1031    fn test_detect_code_blocks_single() {
1032        let content = "Before\n```rust\nlet x = 1;\n```\nAfter";
1033        let blocks = detect_code_blocks(content);
1034
1035        assert_eq!(blocks.len(), 1);
1036        assert_eq!(blocks[0].language, Some("rust".to_string()));
1037        assert!(blocks[0].start < blocks[0].end);
1038    }
1039
1040    #[test]
1041    fn test_detect_code_blocks_multiple() {
1042        let content =
1043            "```python\nprint('hello')\n```\n\nSome text\n\n```javascript\nconsole.log('hi');\n```";
1044        let blocks = detect_code_blocks(content);
1045
1046        assert_eq!(blocks.len(), 2);
1047        assert_eq!(blocks[0].language, Some("python".to_string()));
1048        assert_eq!(blocks[1].language, Some("javascript".to_string()));
1049        assert!(blocks[0].end <= blocks[1].start);
1050    }
1051
1052    #[test]
1053    fn test_detect_code_blocks_with_language_identifier() {
1054        let content = "```markdown\n# Header\n```";
1055        let blocks = detect_code_blocks(content);
1056
1057        assert_eq!(blocks.len(), 1);
1058        assert_eq!(blocks[0].language, Some("markdown".to_string()));
1059    }
1060
1061    #[test]
1062    fn test_detect_code_blocks_empty() {
1063        let content = "```\n```";
1064        let blocks = detect_code_blocks(content);
1065
1066        assert_eq!(blocks.len(), 1);
1067        assert!(blocks[0].language.is_none());
1068    }
1069
1070    #[test]
1071    fn test_detect_code_blocks_no_language() {
1072        let content = "```\nplain code\n```";
1073        let blocks = detect_code_blocks(content);
1074
1075        assert_eq!(blocks.len(), 1);
1076        assert!(blocks[0].language.is_none());
1077    }
1078
1079    #[test]
1080    fn test_detect_code_blocks_none() {
1081        let content = "No code blocks here, just regular text.";
1082        let blocks = detect_code_blocks(content);
1083
1084        assert!(blocks.is_empty());
1085    }
1086
1087    #[test]
1088    fn test_detect_code_blocks_unclosed() {
1089        // Unclosed code blocks should not match (regex requires closing ```)
1090        let content = "```rust\nunclosed code block without ending";
1091        let blocks = detect_code_blocks(content);
1092
1093        assert!(blocks.is_empty());
1094    }
1095
1096    #[test]
1097    fn test_code_block_region_contains() {
1098        let region = CodeBlockRegion::new(10, 50, Some("rust".to_string()));
1099
1100        assert!(!region.contains(9)); // Before
1101        assert!(region.contains(10)); // Start (inclusive)
1102        assert!(region.contains(30)); // Middle
1103        assert!(region.contains(49)); // End - 1
1104        assert!(!region.contains(50)); // End (exclusive)
1105        assert!(!region.contains(51)); // After
1106    }
1107
1108    // ============================================================
1109    // Task 1.5: Unit Tests for Context-Aware Extraction
1110    // ============================================================
1111
1112    #[test]
1113    fn test_extract_variables_outside_code_block() {
1114        let content = "Process {{file}} for issues.";
1115        let vars = extract_variables(content);
1116
1117        assert_eq!(vars.len(), 1);
1118        assert_eq!(vars[0].name, "file");
1119    }
1120
1121    #[test]
1122    fn test_extract_variables_inside_code_block_not_extracted() {
1123        let content = "Text before\n```\n{{timestamp}}\n```\nText after";
1124        let vars = extract_variables(content);
1125
1126        // Variable inside code block should NOT be extracted
1127        assert!(vars.is_empty());
1128    }
1129
1130    #[test]
1131    fn test_extract_variables_mixed_inside_outside() {
1132        let content = "Scan {{PROJECT_ROOT_PATH}} for issues.\n\n## Example Output\n```markdown\n**Generated:** {{timestamp}}\n**Files:** {{count}}\n```";
1133        let vars = extract_variables(content);
1134
1135        // Only the variable OUTSIDE the code block should be extracted
1136        assert_eq!(vars.len(), 1);
1137        assert_eq!(vars[0].name, "PROJECT_ROOT_PATH");
1138    }
1139
1140    #[test]
1141    fn test_extract_variables_multiple_code_blocks() {
1142        let content = "Use {{var1}} here.\n\n```\n{{inside1}}\n```\n\nThen {{var2}}.\n\n```rust\n{{inside2}}\n```\n\nFinally {{var3}}.";
1143        let vars = extract_variables(content);
1144
1145        // Only variables outside code blocks should be extracted
1146        assert_eq!(vars.len(), 3);
1147        let names: Vec<&str> = vars.iter().map(|v| v.name.as_str()).collect();
1148        assert!(names.contains(&"var1"));
1149        assert!(names.contains(&"var2"));
1150        assert!(names.contains(&"var3"));
1151        assert!(!names.contains(&"inside1"));
1152        assert!(!names.contains(&"inside2"));
1153    }
1154
1155    #[test]
1156    fn test_extract_variables_at_boundary() {
1157        // Variable immediately before code block
1158        let content = "{{before}}```\ncode\n```{{after}}";
1159        let vars = extract_variables(content);
1160
1161        // Both should be extracted (they're outside the code block)
1162        assert_eq!(vars.len(), 2);
1163        let names: Vec<&str> = vars.iter().map(|v| v.name.as_str()).collect();
1164        assert!(names.contains(&"before"));
1165        assert!(names.contains(&"after"));
1166    }
1167
1168    #[test]
1169    fn test_extract_variables_backward_compatible_no_code_blocks() {
1170        let content = "Hello {{name}}, your {{item}} is ready for {{action}}.";
1171        let vars = extract_variables(content);
1172
1173        // Should work exactly as before when there are no code blocks
1174        assert_eq!(vars.len(), 3);
1175        assert_eq!(vars[0].name, "name");
1176        assert_eq!(vars[1].name, "item");
1177        assert_eq!(vars[2].name, "action");
1178    }
1179
1180    #[test]
1181    fn test_extract_variables_empty_content() {
1182        let content = "";
1183        let vars = extract_variables(content);
1184
1185        assert!(vars.is_empty());
1186    }
1187
1188    #[test]
1189    fn test_is_in_exclusion_helper() {
1190        let regions = vec![
1191            CodeBlockRegion::new(10, 20, None),
1192            CodeBlockRegion::new(50, 80, Some("rust".to_string())),
1193        ];
1194
1195        assert!(!is_in_exclusion(5, &regions)); // Before all
1196        assert!(is_in_exclusion(10, &regions)); // Start of first
1197        assert!(is_in_exclusion(15, &regions)); // Inside first
1198        assert!(!is_in_exclusion(20, &regions)); // End of first (exclusive)
1199        assert!(!is_in_exclusion(30, &regions)); // Between regions
1200        assert!(is_in_exclusion(50, &regions)); // Start of second
1201        assert!(is_in_exclusion(70, &regions)); // Inside second
1202        assert!(!is_in_exclusion(80, &regions)); // End of second (exclusive)
1203        assert!(!is_in_exclusion(100, &regions)); // After all
1204    }
1205
1206    #[test]
1207    fn test_is_in_exclusion_empty_regions() {
1208        let regions: Vec<CodeBlockRegion> = vec![];
1209
1210        assert!(!is_in_exclusion(0, &regions));
1211        assert!(!is_in_exclusion(100, &regions));
1212    }
1213
1214    #[test]
1215    fn test_prompt_template_with_code_blocks_extracts_correctly() {
1216        let content = "Review {{file}} for {{issue_type}} issues.\n\n```example\nOutput: {{example_var}}\n```";
1217        let template = PromptTemplate::new("review", content);
1218
1219        // Only variables outside code blocks should be in the template
1220        assert_eq!(template.variables.len(), 2);
1221        let var_names: Vec<&str> = template.variables.iter().map(|v| v.name.as_str()).collect();
1222        assert!(var_names.contains(&"file"));
1223        assert!(var_names.contains(&"issue_type"));
1224        assert!(!var_names.contains(&"example_var"));
1225    }
1226
1227    // ============================================================
1228    // Tilde Code Block Tests
1229    // ============================================================
1230
1231    #[test]
1232    fn test_detect_tilde_code_blocks_single() {
1233        let content = "Before\n~~~rust\nlet x = 1;\n~~~\nAfter";
1234        let blocks = detect_code_blocks(content);
1235
1236        assert_eq!(blocks.len(), 1);
1237        assert_eq!(blocks[0].language, Some("rust".to_string()));
1238        assert!(blocks[0].start < blocks[0].end);
1239    }
1240
1241    #[test]
1242    fn test_detect_tilde_code_blocks_no_language() {
1243        let content = "~~~\nplain code\n~~~";
1244        let blocks = detect_code_blocks(content);
1245
1246        assert_eq!(blocks.len(), 1);
1247        assert!(blocks[0].language.is_none());
1248    }
1249
1250    #[test]
1251    fn test_detect_tilde_code_blocks_empty() {
1252        let content = "~~~\n~~~";
1253        let blocks = detect_code_blocks(content);
1254
1255        assert_eq!(blocks.len(), 1);
1256        assert!(blocks[0].language.is_none());
1257    }
1258
1259    #[test]
1260    fn test_detect_mixed_backtick_and_tilde_blocks() {
1261        let content =
1262            "```python\nprint('hello')\n```\n\nSome text\n\n~~~javascript\nconsole.log('hi');\n~~~";
1263        let blocks = detect_code_blocks(content);
1264
1265        assert_eq!(blocks.len(), 2);
1266        assert_eq!(blocks[0].language, Some("python".to_string()));
1267        assert_eq!(blocks[1].language, Some("javascript".to_string()));
1268        assert!(blocks[0].end <= blocks[1].start);
1269    }
1270
1271    #[test]
1272    fn test_extract_variables_inside_tilde_block_not_extracted() {
1273        let content = "Text before\n~~~\n{{timestamp}}\n~~~\nText after";
1274        let vars = extract_variables(content);
1275
1276        // Variable inside tilde code block should NOT be extracted
1277        assert!(vars.is_empty());
1278    }
1279
1280    #[test]
1281    fn test_extract_variables_mixed_tilde_and_backtick() {
1282        let content = "Use {{var1}} here.\n\n~~~\n{{inside_tilde}}\n~~~\n\nThen {{var2}}.\n\n```rust\n{{inside_backtick}}\n```\n\nFinally {{var3}}.";
1283        let vars = extract_variables(content);
1284
1285        // Only variables outside both code block types should be extracted
1286        assert_eq!(vars.len(), 3);
1287        let names: Vec<&str> = vars.iter().map(|v| v.name.as_str()).collect();
1288        assert!(names.contains(&"var1"));
1289        assert!(names.contains(&"var2"));
1290        assert!(names.contains(&"var3"));
1291        assert!(!names.contains(&"inside_tilde"));
1292        assert!(!names.contains(&"inside_backtick"));
1293    }
1294
1295    #[test]
1296    fn test_detect_tilde_code_blocks_unclosed() {
1297        // Unclosed tilde code blocks should not match (regex requires closing ~~~)
1298        let content = "~~~rust\nunclosed code block without ending";
1299        let blocks = detect_code_blocks(content);
1300
1301        assert!(blocks.is_empty());
1302    }
1303
1304    #[test]
1305    fn test_extract_variables_at_tilde_boundary() {
1306        // Variable immediately before/after tilde code block
1307        let content = "{{before}}~~~\ncode\n~~~{{after}}";
1308        let vars = extract_variables(content);
1309
1310        // Both should be extracted (they're outside the code block)
1311        assert_eq!(vars.len(), 2);
1312        let names: Vec<&str> = vars.iter().map(|v| v.name.as_str()).collect();
1313        assert!(names.contains(&"before"));
1314        assert!(names.contains(&"after"));
1315    }
1316
1317    #[test]
1318    fn test_nested_tilde_within_backtick() {
1319        // Backtick block containing tilde syntax (tilde inside should be literal)
1320        let content = "{{outside}}\n```markdown\n~~~\n{{inside}}\n~~~\n```";
1321        let vars = extract_variables(content);
1322
1323        // Only outside variable should be extracted
1324        assert_eq!(vars.len(), 1);
1325        assert_eq!(vars[0].name, "outside");
1326    }
1327
1328    #[test]
1329    fn test_nested_backtick_within_tilde() {
1330        // Tilde block containing backtick syntax (backtick inside should be literal)
1331        let content = "{{outside}}\n~~~markdown\n```\n{{inside}}\n```\n~~~";
1332        let vars = extract_variables(content);
1333
1334        // Only outside variable should be extracted
1335        assert_eq!(vars.len(), 1);
1336        assert_eq!(vars[0].name, "outside");
1337    }
1338
1339    // ============================================================
1340    // Variable Value Sanitization Tests (Template Injection Prevention)
1341    // ============================================================
1342
1343    #[test]
1344    fn test_sanitize_variable_value_passthrough_normal() {
1345        // Normal values should pass through unchanged
1346        assert_eq!(sanitize_variable_value("hello world"), "hello world");
1347        assert_eq!(
1348            sanitize_variable_value("user@example.com"),
1349            "user@example.com"
1350        );
1351        assert_eq!(
1352            sanitize_variable_value("path/to/file.rs"),
1353            "path/to/file.rs"
1354        );
1355    }
1356
1357    #[test]
1358    fn test_sanitize_variable_value_escapes_nested_patterns() {
1359        // Nested {{...}} patterns should be escaped to prevent recursive substitution
1360        assert_eq!(
1361            sanitize_variable_value("prefix {{nested}} suffix"),
1362            "prefix { {nested} } suffix"
1363        );
1364        assert_eq!(
1365            sanitize_variable_value("{{start}} middle {{end}}"),
1366            "{ {start} } middle { {end} }"
1367        );
1368    }
1369
1370    #[test]
1371    fn test_sanitize_variable_value_escapes_only_double_braces() {
1372        // Single braces should pass through
1373        assert_eq!(sanitize_variable_value("{single}"), "{single}");
1374        assert_eq!(sanitize_variable_value("a { b } c"), "a { b } c");
1375    }
1376
1377    #[test]
1378    fn test_sanitize_variable_value_removes_control_chars() {
1379        // Control characters (except tab, newline, CR) should be removed
1380        assert_eq!(sanitize_variable_value("hello\x00world"), "helloworld");
1381        assert_eq!(sanitize_variable_value("a\x01b\x02c"), "abc");
1382        assert_eq!(sanitize_variable_value("\x1Fstart"), "start");
1383    }
1384
1385    #[test]
1386    fn test_sanitize_variable_value_preserves_allowed_whitespace() {
1387        // Tab, newline, and carriage return should be preserved
1388        assert_eq!(sanitize_variable_value("line1\nline2"), "line1\nline2");
1389        assert_eq!(sanitize_variable_value("col1\tcol2"), "col1\tcol2");
1390        assert_eq!(sanitize_variable_value("line1\r\nline2"), "line1\r\nline2");
1391    }
1392
1393    #[test]
1394    fn test_sanitize_variable_value_truncates_long_values() {
1395        // Values exceeding MAX_VARIABLE_VALUE_LENGTH should be truncated
1396        let long_value = "x".repeat(MAX_VARIABLE_VALUE_LENGTH + 1000);
1397        let sanitized = sanitize_variable_value(&long_value);
1398        assert!(sanitized.len() <= MAX_VARIABLE_VALUE_LENGTH);
1399    }
1400
1401    #[test]
1402    fn test_sanitize_variable_value_truncates_at_utf8_boundary() {
1403        // Truncation should not break UTF-8 characters
1404        // U+1F600 (😀) is 4 bytes
1405        let emoji = "😀";
1406        let value = format!("{}{}", "a".repeat(MAX_VARIABLE_VALUE_LENGTH - 2), emoji);
1407        let sanitized = sanitize_variable_value(&value);
1408        // Should truncate before the emoji rather than in the middle
1409        assert!(sanitized.is_char_boundary(sanitized.len()));
1410    }
1411
1412    #[test]
1413    fn test_sanitize_variable_value_empty() {
1414        assert_eq!(sanitize_variable_value(""), "");
1415    }
1416
1417    #[test]
1418    fn test_sanitize_variable_value_combined() {
1419        // Test multiple sanitization rules together
1420        let input = "{{injection}}\x00with\tcontrol\nchars";
1421        let expected = "{ {injection} }with\tcontrol\nchars";
1422        assert_eq!(sanitize_variable_value(input), expected);
1423    }
1424
1425    #[test]
1426    fn test_substitute_variables_sanitizes_user_input() {
1427        // User-provided values with injection attempts should be sanitized
1428        let content = "Hello {{name}}, your code: {{code}}";
1429        let mut values = HashMap::new();
1430        values.insert("name".to_string(), "{{malicious}}".to_string());
1431        values.insert("code".to_string(), "normal\x00value".to_string());
1432
1433        let result = substitute_variables(content, &values, &[]).unwrap();
1434
1435        // Nested patterns escaped, control chars removed
1436        assert_eq!(result, "Hello { {malicious} }, your code: normalvalue");
1437    }
1438
1439    #[test]
1440    fn test_substitute_variables_sanitizes_defaults() {
1441        // Even default values should be sanitized (defense-in-depth)
1442        let content = "Status: {{status}}";
1443        let values: HashMap<String, String> = HashMap::new();
1444
1445        let variables = vec![PromptVariable::optional("status", "{{default_injection}}")];
1446
1447        let result = substitute_variables(content, &values, &variables).unwrap();
1448        assert_eq!(result, "Status: { {default_injection} }");
1449    }
1450
1451    #[test]
1452    fn test_sanitize_prevents_recursive_substitution() {
1453        // Ensure that a value containing variable syntax doesn't get substituted again
1454        let content = "Result: {{output}}";
1455        let mut values = HashMap::new();
1456        // Attacker tries to inject another variable reference
1457        values.insert("output".to_string(), "{{secret}}".to_string());
1458
1459        let result = substitute_variables(content, &values, &[]).unwrap();
1460
1461        // The injected pattern should be escaped, not substituted
1462        assert_eq!(result, "Result: { {secret} }");
1463        assert!(!result.contains("{{secret}}"));
1464    }
1465}