Skip to main content

subcog/security/
secrets.rs

1//! Secret detection patterns.
2// Allow expect() on static regex patterns - these are guaranteed to compile
3#![allow(clippy::expect_used)]
4//!
5//! Detects common secret patterns in content to prevent accidental capture.
6
7use regex::Regex;
8use std::sync::LazyLock;
9
10/// A detected secret match.
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub struct SecretMatch {
13    /// Type of secret detected.
14    pub secret_type: String,
15    /// Start position in content.
16    pub start: usize,
17    /// End position in content.
18    pub end: usize,
19    /// The matched text (for debugging, will be redacted in production).
20    pub matched_text: String,
21}
22
23/// Pattern for detecting secrets.
24struct SecretPattern {
25    name: &'static str,
26    regex: &'static LazyLock<Regex>,
27}
28
29// Define regex patterns as separate statics
30// Note: These patterns are static and guaranteed to compile, so expect() is safe
31static AWS_ACCESS_KEY_REGEX: LazyLock<Regex> = LazyLock::new(|| {
32    Regex::new(r"(?i)AKIA[0-9A-Z]{16}").expect("static regex: AWS access key pattern")
33});
34
35static AWS_SECRET_KEY_REGEX: LazyLock<Regex> = LazyLock::new(|| {
36    Regex::new(r#"(?i)(?:aws_secret_access_key|aws_secret_key|secret_access_key)\s*[=:]\s*['"]?([A-Za-z0-9/+=]{40})['"]?"#).expect("static regex: AWS secret key pattern")
37});
38
39static GITHUB_TOKEN_REGEX: LazyLock<Regex> = LazyLock::new(|| {
40    Regex::new(r"gh[pousr]_[A-Za-z0-9_]{36,}").expect("static regex: GitHub token pattern")
41});
42
43static GITHUB_PAT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
44    Regex::new(r"github_pat_[A-Za-z0-9_]{22,}").expect("static regex: GitHub PAT pattern")
45});
46
47/// Generic API key pattern with reduced false positives.
48///
49/// Requires:
50/// - Assignment operator (= or :) with optional whitespace
51/// - Optional quotes around the value
52/// - Value must be at least 24 chars (not 20) to reduce UUIDs/short IDs
53/// - Placeholder filtering is handled in `detect()` method
54static GENERIC_API_KEY_REGEX: LazyLock<Regex> = LazyLock::new(|| {
55    Regex::new(r#"(?i)(?:api[_-]?key|apikey)\s*[=:]\s*['"]?([A-Za-z0-9_\-]{24,})['"]?"#)
56        .expect("static regex: generic API key pattern")
57});
58
59static GENERIC_SECRET_REGEX: LazyLock<Regex> = LazyLock::new(|| {
60    Regex::new(r#"(?i)(?:secret|password|passwd|pwd)\s*[=:]\s*['"]?([^\s'"]{8,})['"]?"#)
61        .expect("static regex: generic secret pattern")
62});
63
64static PRIVATE_KEY_REGEX: LazyLock<Regex> = LazyLock::new(|| {
65    Regex::new(r"-----BEGIN (?:RSA |DSA |EC |OPENSSH |PGP )?PRIVATE KEY-----")
66        .expect("static regex: private key pattern")
67});
68
69static JWT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
70    Regex::new(r"eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*")
71        .expect("static regex: JWT pattern")
72});
73
74static SLACK_TOKEN_REGEX: LazyLock<Regex> = LazyLock::new(|| {
75    Regex::new(r"xox[baprs]-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*")
76        .expect("static regex: Slack token pattern")
77});
78
79static SLACK_WEBHOOK_REGEX: LazyLock<Regex> = LazyLock::new(|| {
80    Regex::new(r"https://hooks\.slack\.com/services/T[A-Z0-9]+/B[A-Z0-9]+/[a-zA-Z0-9]+")
81        .expect("static regex: Slack webhook pattern")
82});
83
84static GOOGLE_API_KEY_REGEX: LazyLock<Regex> = LazyLock::new(|| {
85    Regex::new(r"AIza[0-9A-Za-z_-]{35}").expect("static regex: Google API key pattern")
86});
87
88static STRIPE_API_KEY_REGEX: LazyLock<Regex> = LazyLock::new(|| {
89    Regex::new(r"(?:sk|pk)_(?:live|test)_[A-Za-z0-9]{24,}")
90        .expect("static regex: Stripe API key pattern")
91});
92
93static DATABASE_URL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
94    Regex::new(r"(?i)(?:postgres|mysql|mongodb|redis)://[^:]+:[^@]+@[^\s]+")
95        .expect("static regex: database URL pattern")
96});
97
98/// Bearer token pattern with reduced false positives.
99///
100/// Requires:
101/// - "Bearer " prefix (case-insensitive)
102/// - Token must be at least 20 chars to exclude short strings
103/// - Placeholder filtering is handled in `detect()` method
104static BEARER_TOKEN_REGEX: LazyLock<Regex> = LazyLock::new(|| {
105    Regex::new(r"(?i)bearer\s+([A-Za-z0-9_\-.]{20,})").expect("static regex: bearer token pattern")
106});
107
108static OPENAI_API_KEY_REGEX: LazyLock<Regex> = LazyLock::new(|| {
109    Regex::new(r"sk-[A-Za-z0-9]{48}").expect("static regex: OpenAI API key pattern")
110});
111
112static ANTHROPIC_API_KEY_REGEX: LazyLock<Regex> = LazyLock::new(|| {
113    Regex::new(r"sk-ant-api[A-Za-z0-9_-]{90,}").expect("static regex: Anthropic API key pattern")
114});
115
116// HIGH-SEC-012: GCP/Azure/Twilio credentials
117static GCP_SERVICE_ACCOUNT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
118    Regex::new(r#"(?i)"type"\s*:\s*"service_account""#)
119        .expect("static regex: GCP service account pattern")
120});
121
122static AZURE_STORAGE_KEY_REGEX: LazyLock<Regex> = LazyLock::new(|| {
123    Regex::new(r"(?i)(?:AccountKey|SharedAccessSignature)\s*=\s*[A-Za-z0-9+/=]{44,}")
124        .expect("static regex: Azure storage key pattern")
125});
126
127static AZURE_AD_CLIENT_SECRET_REGEX: LazyLock<Regex> = LazyLock::new(|| {
128    Regex::new(
129        r#"(?i)(?:client_secret|azure_client_secret)\s*[=:]\s*['"]?([A-Za-z0-9~._-]{34,})['"]?"#,
130    )
131    .expect("static regex: Azure AD client secret pattern")
132});
133
134static TWILIO_API_KEY_REGEX: LazyLock<Regex> =
135    LazyLock::new(|| Regex::new(r"SK[a-f0-9]{32}").expect("static regex: Twilio API key pattern"));
136
137static TWILIO_AUTH_TOKEN_REGEX: LazyLock<Regex> = LazyLock::new(|| {
138    Regex::new(r#"(?i)(?:twilio_auth_token|auth_token)\s*[=:]\s*['"]?([a-f0-9]{32})['"]?"#)
139        .expect("static regex: Twilio auth token pattern")
140});
141
142static SENDGRID_API_KEY_REGEX: LazyLock<Regex> = LazyLock::new(|| {
143    Regex::new(r"SG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43}")
144        .expect("static regex: SendGrid API key pattern")
145});
146
147static MAILGUN_API_KEY_REGEX: LazyLock<Regex> = LazyLock::new(|| {
148    Regex::new(r"key-[a-f0-9]{32}").expect("static regex: Mailgun API key pattern")
149});
150
151/// Returns the list of secret patterns to check.
152fn secret_patterns() -> Vec<SecretPattern> {
153    vec![
154        SecretPattern {
155            name: "AWS Access Key ID",
156            regex: &AWS_ACCESS_KEY_REGEX,
157        },
158        SecretPattern {
159            name: "AWS Secret Access Key",
160            regex: &AWS_SECRET_KEY_REGEX,
161        },
162        SecretPattern {
163            name: "GitHub Token",
164            regex: &GITHUB_TOKEN_REGEX,
165        },
166        SecretPattern {
167            name: "GitHub Personal Access Token (Classic)",
168            regex: &GITHUB_PAT_REGEX,
169        },
170        SecretPattern {
171            name: "Generic API Key",
172            regex: &GENERIC_API_KEY_REGEX,
173        },
174        SecretPattern {
175            name: "Generic Secret",
176            regex: &GENERIC_SECRET_REGEX,
177        },
178        SecretPattern {
179            name: "Private Key",
180            regex: &PRIVATE_KEY_REGEX,
181        },
182        SecretPattern {
183            name: "JWT Token",
184            regex: &JWT_REGEX,
185        },
186        SecretPattern {
187            name: "Slack Token",
188            regex: &SLACK_TOKEN_REGEX,
189        },
190        SecretPattern {
191            name: "Slack Webhook",
192            regex: &SLACK_WEBHOOK_REGEX,
193        },
194        SecretPattern {
195            name: "Google API Key",
196            regex: &GOOGLE_API_KEY_REGEX,
197        },
198        SecretPattern {
199            name: "Stripe API Key",
200            regex: &STRIPE_API_KEY_REGEX,
201        },
202        SecretPattern {
203            name: "Database URL with Credentials",
204            regex: &DATABASE_URL_REGEX,
205        },
206        SecretPattern {
207            name: "Bearer Token",
208            regex: &BEARER_TOKEN_REGEX,
209        },
210        SecretPattern {
211            name: "OpenAI API Key",
212            regex: &OPENAI_API_KEY_REGEX,
213        },
214        SecretPattern {
215            name: "Anthropic API Key",
216            regex: &ANTHROPIC_API_KEY_REGEX,
217        },
218        // HIGH-SEC-012: Cloud provider credentials
219        SecretPattern {
220            name: "GCP Service Account",
221            regex: &GCP_SERVICE_ACCOUNT_REGEX,
222        },
223        SecretPattern {
224            name: "Azure Storage Key",
225            regex: &AZURE_STORAGE_KEY_REGEX,
226        },
227        SecretPattern {
228            name: "Azure AD Client Secret",
229            regex: &AZURE_AD_CLIENT_SECRET_REGEX,
230        },
231        SecretPattern {
232            name: "Twilio API Key",
233            regex: &TWILIO_API_KEY_REGEX,
234        },
235        SecretPattern {
236            name: "Twilio Auth Token",
237            regex: &TWILIO_AUTH_TOKEN_REGEX,
238        },
239        SecretPattern {
240            name: "SendGrid API Key",
241            regex: &SENDGRID_API_KEY_REGEX,
242        },
243        SecretPattern {
244            name: "Mailgun API Key",
245            regex: &MAILGUN_API_KEY_REGEX,
246        },
247    ]
248}
249
250/// Detector for secrets in content.
251pub struct SecretDetector {
252    /// Minimum length for generic secret values.
253    min_secret_length: usize,
254}
255
256impl SecretDetector {
257    /// Creates a new secret detector.
258    #[must_use]
259    pub const fn new() -> Self {
260        Self {
261            min_secret_length: 8,
262        }
263    }
264
265    /// Sets the minimum secret length for generic patterns.
266    #[must_use]
267    pub const fn with_min_length(mut self, length: usize) -> Self {
268        self.min_secret_length = length;
269        self
270    }
271
272    /// Checks if content contains any secrets.
273    #[must_use]
274    pub fn contains_secrets(&self, content: &str) -> bool {
275        !self.detect(content).is_empty()
276    }
277
278    /// Returns all detected secret matches.
279    #[must_use]
280    pub fn detect(&self, content: &str) -> Vec<SecretMatch> {
281        let mut matches = Vec::new();
282
283        for pattern in secret_patterns() {
284            self.collect_pattern_matches(pattern, content, &mut matches);
285        }
286
287        // Sort by position
288        matches.sort_by_key(|m| m.start);
289
290        // Remove overlapping matches (keep the first one)
291        Self::deduplicate_overlapping(matches)
292    }
293
294    /// Collects matches for a single pattern into the result vector.
295    fn collect_pattern_matches(
296        &self,
297        pattern: SecretPattern,
298        content: &str,
299        matches: &mut Vec<SecretMatch>,
300    ) {
301        for m in pattern.regex.find_iter(content) {
302            if let Some(secret_match) = self.process_match(&pattern, &m) {
303                matches.push(secret_match);
304            }
305        }
306    }
307
308    /// Processes a single regex match and returns a `SecretMatch` if it should be included.
309    fn process_match(&self, pattern: &SecretPattern, m: &regex::Match<'_>) -> Option<SecretMatch> {
310        let matched_text = m.as_str().to_string();
311
312        // Only apply placeholder filtering to generic patterns that are prone to false positives
313        // Specific patterns (AWS, GitHub, OpenAI, etc.) have precise formats that don't need filtering
314        let should_filter = pattern.name == "Generic API Key"
315            || pattern.name == "Generic Secret"
316            || pattern.name == "Bearer Token";
317
318        if should_filter && Self::is_placeholder(&matched_text) {
319            return None;
320        }
321
322        Some(SecretMatch {
323            secret_type: pattern.name.to_string(),
324            start: m.start(),
325            end: m.end(),
326            matched_text,
327        })
328    }
329
330    /// Removes overlapping matches, keeping the first occurrence.
331    fn deduplicate_overlapping(sorted_matches: Vec<SecretMatch>) -> Vec<SecretMatch> {
332        let mut result = Vec::new();
333        let mut last_end = 0;
334        for m in sorted_matches {
335            if m.start >= last_end {
336                last_end = m.end;
337                result.push(m);
338            }
339        }
340        result
341    }
342
343    /// Returns the types of secrets detected.
344    #[must_use]
345    pub fn detect_types(&self, content: &str) -> Vec<String> {
346        self.detect(content)
347            .into_iter()
348            .map(|m| m.secret_type)
349            .collect()
350    }
351
352    /// Returns the count of secrets detected.
353    #[must_use]
354    pub fn count(&self, content: &str) -> usize {
355        self.detect(content).len()
356    }
357
358    /// Checks if a matched value is a common placeholder (false positive).
359    ///
360    /// This reduces false positives for generic API key and bearer token patterns.
361    fn is_placeholder(value: &str) -> bool {
362        // Common placeholder prefixes/patterns (case-insensitive)
363        const PLACEHOLDER_PATTERNS: &[&str] = &[
364            "example",
365            "test",
366            "demo",
367            "your_",
368            "your-",
369            "my_",
370            "my-",
371            "placeholder",
372            "changeme",
373            "xxx",
374            "yyy",
375            "zzz",
376            "foo",
377            "bar",
378            "baz",
379            "sample",
380            "fake",
381            "dummy",
382            "mock",
383        ];
384
385        let lower = value.to_lowercase();
386        PLACEHOLDER_PATTERNS
387            .iter()
388            .any(|&pattern| lower.contains(pattern))
389    }
390}
391
392impl Default for SecretDetector {
393    fn default() -> Self {
394        Self::new()
395    }
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401
402    #[test]
403    fn test_detect_aws_access_key() {
404        let detector = SecretDetector::new();
405        let content = "My AWS key is AKIAIOSFODNN7EXAMPLE";
406        let matches = detector.detect(content);
407
408        assert_eq!(matches.len(), 1);
409        assert_eq!(matches[0].secret_type, "AWS Access Key ID");
410    }
411
412    #[test]
413    fn test_detect_github_token() {
414        let detector = SecretDetector::new();
415
416        // Fine-grained token
417        let content = "GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
418        assert!(detector.contains_secrets(content));
419
420        // Classic PAT
421        let content2 = "token: github_pat_xxxxxxxxxxxxxxxxxxxxxx_yyyyyyyy";
422        assert!(detector.contains_secrets(content2));
423    }
424
425    #[test]
426    fn test_detect_private_key() {
427        let detector = SecretDetector::new();
428        let content = "-----BEGIN RSA PRIVATE KEY-----\nMIIE...";
429        let matches = detector.detect(content);
430
431        assert_eq!(matches.len(), 1);
432        assert_eq!(matches[0].secret_type, "Private Key");
433    }
434
435    #[test]
436    fn test_detect_jwt() {
437        let detector = SecretDetector::new();
438        // Test JWT without Bearer prefix to avoid overlap with Bearer Token pattern
439        let content = "token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U";
440        let matches = detector.detect(content);
441
442        assert!(!matches.is_empty());
443        assert!(matches.iter().any(|m| m.secret_type == "JWT Token"));
444    }
445
446    #[test]
447    fn test_detect_bearer_token() {
448        let detector = SecretDetector::new();
449        // Test Bearer token detection with realistic token (20+ chars, not a placeholder)
450        let content = "Authorization: Bearer eyJhbGciOiJIUzI1NiJ9ab";
451        let matches = detector.detect(content);
452
453        assert!(!matches.is_empty());
454        assert!(matches.iter().any(|m| m.secret_type == "Bearer Token"));
455    }
456
457    #[test]
458    fn test_bearer_token_rejects_placeholders() {
459        let detector = SecretDetector::new();
460
461        // Short tokens (< 20 chars) should NOT match
462        let content = "Authorization: Bearer shorttoken";
463        assert!(!detector.contains_secrets(content));
464
465        // "example" placeholder should NOT match (contains placeholder pattern)
466        let content2 = "Authorization: Bearer example_abcdefgh1234567890";
467        assert!(!detector.contains_secrets(content2));
468
469        // "test" placeholder should NOT match
470        let content3 = "Authorization: Bearer test_token_1234567890abc";
471        assert!(!detector.contains_secrets(content3));
472    }
473
474    #[test]
475    fn test_detect_slack_webhook() {
476        let detector = SecretDetector::new();
477        // Build test URL in parts to avoid GitHub secret scanning
478        let base = "https://hooks.slack.com/services/";
479        let fake_ids = [
480            "T", "FAKE", "FAKE", "TEST/B", "FAKE", "FAKE", "TEST/", "fake", "token", "here",
481        ];
482        let content = format!("SLACK_WEBHOOK={base}{}", fake_ids.join(""));
483        let matches = detector.detect(&content);
484
485        assert!(!matches.is_empty());
486        assert!(matches.iter().any(|m| m.secret_type == "Slack Webhook"));
487    }
488
489    #[test]
490    fn test_detect_stripe_key() {
491        let detector = SecretDetector::new();
492        // Use sk_test_ prefix which is for test keys, not live
493        let content = "STRIPE_KEY=sk_test_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
494        assert!(detector.contains_secrets(content));
495    }
496
497    #[test]
498    fn test_detect_database_url() {
499        let detector = SecretDetector::new();
500        let content = "DATABASE_URL=postgres://user:password@localhost:5432/db";
501        let matches = detector.detect(content);
502
503        assert!(!matches.is_empty());
504        assert!(
505            matches
506                .iter()
507                .any(|m| m.secret_type == "Database URL with Credentials")
508        );
509    }
510
511    #[test]
512    fn test_detect_openai_key() {
513        let detector = SecretDetector::new();
514        let content = "OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
515        assert!(detector.contains_secrets(content));
516    }
517
518    #[test]
519    fn test_no_secrets() {
520        let detector = SecretDetector::new();
521        let content = "This is just regular text with no secrets.";
522        assert!(!detector.contains_secrets(content));
523        assert!(detector.detect(content).is_empty());
524    }
525
526    #[test]
527    fn test_multiple_secrets() {
528        let detector = SecretDetector::new();
529        let content = "AWS_KEY=AKIAIOSFODNN7EXAMPLE and GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
530        let matches = detector.detect(content);
531
532        assert_eq!(matches.len(), 2);
533    }
534
535    #[test]
536    fn test_detect_types() {
537        let detector = SecretDetector::new();
538        let content = "AKIAIOSFODNN7EXAMPLE";
539        let types = detector.detect_types(content);
540
541        assert!(types.contains(&"AWS Access Key ID".to_string()));
542    }
543
544    #[test]
545    fn test_count() {
546        let detector = SecretDetector::new();
547        let content = "AKIAIOSFODNN7EXAMPLE and ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
548        assert_eq!(detector.count(content), 2);
549    }
550
551    // ============================================================================
552    // Command Injection and Bypass Tests
553    // ============================================================================
554
555    #[test]
556    fn test_bypass_null_byte_injection() {
557        let detector = SecretDetector::new();
558
559        // Attempt to bypass with null bytes (should still detect)
560        let content = "AKIA\0IOSFODNN7EXAMPLE";
561        // Null bytes in the middle break the pattern, so shouldn't detect
562        assert!(!detector.contains_secrets(content));
563
564        // But adjacent null bytes shouldn't prevent detection
565        let content2 = "\0AKIAIOSFODNN7EXAMPLE\0";
566        assert!(detector.contains_secrets(content2));
567    }
568
569    #[test]
570    fn test_bypass_unicode_homoglyphs() {
571        let detector = SecretDetector::new();
572
573        // Attempt bypass with Unicode look-alikes (Cyrillic 'А' instead of ASCII 'A')
574        // U+0410 CYRILLIC CAPITAL LETTER A
575        let content = "АKIAIOSFODNN7EXAMPLE"; // First char is Cyrillic
576        // Should NOT detect because pattern expects ASCII 'A'
577        assert!(!detector.contains_secrets(content));
578
579        // Normal ASCII should still work
580        let content2 = "AKIAIOSFODNN7EXAMPLE";
581        assert!(detector.contains_secrets(content2));
582    }
583
584    #[test]
585    fn test_bypass_invisible_characters() {
586        let detector = SecretDetector::new();
587
588        // Zero-width space (U+200B) between characters
589        let content = "AKIA\u{200B}IOSFODNN7EXAMPLE";
590        // Should NOT detect due to invisible character
591        assert!(!detector.contains_secrets(content));
592
593        // Zero-width joiner (U+200D)
594        let content2 = "AKIA\u{200D}IOSFODNN7EXAMPLE";
595        assert!(!detector.contains_secrets(content2));
596    }
597
598    #[test]
599    fn test_bypass_whitespace_variations() {
600        let detector = SecretDetector::new();
601
602        // Non-breaking space in assignment (U+00A0)
603        // Use 24+ chars for the key value (new minimum), non-placeholder
604        let content = "api_key =\u{00A0}k8s_prod_auth_1234567890abcdef";
605        // Pattern should handle various whitespace
606        assert!(detector.contains_secrets(content));
607
608        // Tab character
609        let content2 = "api_key\t=\tk8s_prod_auth_1234567890abcdef";
610        assert!(detector.contains_secrets(content2));
611    }
612
613    #[test]
614    fn test_case_insensitive_aws_detection() {
615        let detector = SecretDetector::new();
616
617        // AWS access key regex uses (?i) flag - case-insensitive matching
618        // This is intentional to catch keys even when case is altered
619
620        // Lowercase should still match (case-insensitive)
621        let content = "akiaiosfodnn7example";
622        assert!(detector.contains_secrets(content));
623
624        // Mixed case should still match
625        let content2 = "AkIaIOSFODNN7EXAMPLE";
626        assert!(detector.contains_secrets(content2));
627
628        // Proper uppercase format should match
629        let content3 = "AKIAIOSFODNN7EXAMPLE";
630        assert!(detector.contains_secrets(content3));
631    }
632
633    #[test]
634    fn test_bypass_padding_and_wrapping() {
635        let detector = SecretDetector::new();
636
637        // Secret wrapped in other text
638        let content = "The key prefix-AKIAIOSFODNN7EXAMPLE-suffix is here";
639        assert!(detector.contains_secrets(content));
640
641        // Secret at very end
642        let content2 = "key: AKIAIOSFODNN7EXAMPLE";
643        assert!(detector.contains_secrets(content2));
644
645        // Secret at very start
646        let content3 = "AKIAIOSFODNN7EXAMPLE is leaked";
647        assert!(detector.contains_secrets(content3));
648    }
649
650    #[test]
651    fn test_bypass_encoding_variations() {
652        let detector = SecretDetector::new();
653
654        // URL encoded (shouldn't detect - these are encoded)
655        let content = "AKIA%49OSFODNN7EXAMPLE"; // %49 = 'I'
656        // Pattern expects literal characters, not URL encoding
657        assert!(!detector.contains_secrets(content));
658
659        // Base64 encoded secret (detector should NOT decode)
660        // Base64 of "AKIAIOSFODNN7EXAMPLE" would be different
661        let content2 = "QUtJQUlPU0ZPRE5ON0VYQU1QTEU="; // base64
662        assert!(!detector.contains_secrets(content2));
663    }
664
665    #[test]
666    fn test_bypass_comment_injection() {
667        let detector = SecretDetector::new();
668
669        // Secret in code comments
670        let content = "// AKIAIOSFODNN7EXAMPLE";
671        assert!(detector.contains_secrets(content));
672
673        // Secret in HTML comment
674        let content2 = "<!-- AKIAIOSFODNN7EXAMPLE -->";
675        assert!(detector.contains_secrets(content2));
676
677        // Secret in JSON string
678        let content3 = r#"{"key": "AKIAIOSFODNN7EXAMPLE"}"#;
679        assert!(detector.contains_secrets(content3));
680    }
681
682    #[test]
683    fn test_bypass_line_breaks() {
684        let detector = SecretDetector::new();
685
686        // Secret split across lines (should NOT detect)
687        let content = "AKIA\nIOSFODNN7EXAMPLE";
688        assert!(!detector.contains_secrets(content));
689
690        // CRLF
691        let content2 = "AKIA\r\nIOSFODNN7EXAMPLE";
692        assert!(!detector.contains_secrets(content2));
693
694        // Secret on its own line should detect
695        let content3 = "line1\nAKIAIOSFODNN7EXAMPLE\nline3";
696        assert!(detector.contains_secrets(content3));
697    }
698
699    #[test]
700    fn test_bypass_string_concatenation() {
701        let detector = SecretDetector::new();
702
703        // Concatenated in code (detector sees raw text, not executed code)
704        let content = r#""AKIA" + "IOSFODNN7EXAMPLE""#;
705        // Neither part is a valid key on its own
706        assert!(!detector.contains_secrets(content));
707
708        // But if they appear together in output, should detect
709        let content2 = "key = AKIAIOSFODNN7EXAMPLE";
710        assert!(detector.contains_secrets(content2));
711    }
712
713    #[test]
714    fn test_near_miss_patterns() {
715        let detector = SecretDetector::new();
716
717        // Almost AWS key but too short
718        let content = "AKIAIOSFODNN7EXA"; // 16 chars after AKIA but needs to be complete
719        assert!(!detector.contains_secrets(content));
720
721        // Almost GitHub token but wrong prefix
722        let content2 = "ghx_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
723        assert!(!detector.contains_secrets(content2));
724
725        // Almost JWT but missing signature
726        let content3 = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0";
727        assert!(!detector.contains_secrets(content3));
728    }
729
730    #[test]
731    fn test_false_positive_resistance() {
732        let detector = SecretDetector::new();
733
734        // Common words that might trigger patterns
735        let content = "The API documentation describes the key features";
736        assert!(!detector.contains_secrets(content));
737
738        // UUID that looks similar to tokens
739        let content2 = "id: 550e8400-e29b-41d4-a716-446655440000";
740        assert!(!detector.contains_secrets(content2));
741
742        // Version strings
743        let content3 = "version: 1.2.3-beta.4";
744        assert!(!detector.contains_secrets(content3));
745    }
746
747    #[test]
748    fn test_nested_secrets() {
749        let detector = SecretDetector::new();
750
751        // Content with multiple distinct secrets on same line
752        // AWS key (20 chars) and GitHub token (ghp_ + 36+ chars) separated by newline
753        let content = "AKIAIOSFODNN7EXAMPLE\nghp_abcdefghijklmnopqrstuvwxyz0123456789";
754        let matches = detector.detect(content);
755
756        // Should detect both patterns (AWS access key and GitHub token)
757        assert!(
758            matches.len() >= 2,
759            "Expected 2+ matches, got {}: {:?}",
760            matches.len(),
761            matches
762        );
763    }
764
765    #[test]
766    fn test_overlapping_patterns() {
767        let detector = SecretDetector::new();
768
769        // Content that matches multiple patterns
770        let content = "password = sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
771        let matches = detector.detect(content);
772
773        // May match generic secret and OpenAI key pattern
774        assert!(!matches.is_empty());
775    }
776
777    #[test]
778    fn test_minimum_length_enforcement() {
779        let detector = SecretDetector::new();
780
781        // Generic password too short (< 8 chars)
782        let content = "password = short";
783        assert!(!detector.contains_secrets(content));
784
785        // Generic password just at minimum (8 chars)
786        let content2 = "password = 12345678";
787        assert!(detector.contains_secrets(content2));
788    }
789
790    #[test]
791    fn test_multiline_key_format() {
792        let detector = SecretDetector::new();
793
794        // Private key header on its own line
795        let content = "-----BEGIN RSA PRIVATE KEY-----";
796        assert!(detector.contains_secrets(content));
797
798        // PEM formatted key
799        let content2 = r"-----BEGIN PRIVATE KEY-----
800MIIEvQIBADANBgkqhkiG9w0BAQEFAASC
801-----END PRIVATE KEY-----";
802        assert!(detector.contains_secrets(content2));
803    }
804
805    // ============================================================================
806    // Cloud Provider Credential Tests (HIGH-SEC-012)
807    // ============================================================================
808
809    #[test]
810    fn test_detect_gcp_service_account() {
811        let detector = SecretDetector::new();
812        let content = r#"{"type": "service_account", "project_id": "my-project"}"#;
813        let matches = detector.detect(content);
814
815        assert!(!matches.is_empty());
816        assert!(
817            matches
818                .iter()
819                .any(|m| m.secret_type == "GCP Service Account")
820        );
821    }
822
823    #[test]
824    fn test_detect_azure_storage_key() {
825        let detector = SecretDetector::new();
826        // Azure storage key format: AccountKey=base64string (44+ chars)
827        let content = "AccountKey=dGhpc2lzYXRlc3RrZXl0aGF0aXNsb25nZW5vdWdodG9tYXRjaA==";
828        let matches = detector.detect(content);
829
830        assert!(!matches.is_empty());
831        assert!(matches.iter().any(|m| m.secret_type == "Azure Storage Key"));
832    }
833
834    #[test]
835    fn test_detect_azure_sas_token() {
836        let detector = SecretDetector::new();
837        // SAS signature is base64-encoded, 44+ chars
838        let content = "SharedAccessSignature=dGhpc2lzYXRlc3RzaWduYXR1cmV0aGF0aXNsb25nZW5vdWdo";
839        let matches = detector.detect(content);
840
841        assert!(!matches.is_empty());
842        assert!(matches.iter().any(|m| m.secret_type == "Azure Storage Key"));
843    }
844
845    #[test]
846    fn test_detect_azure_ad_client_secret() {
847        let detector = SecretDetector::new();
848        // Azure AD client secrets are typically 34+ character strings
849        let content = "client_secret = 'abcdefghijklmnopqrstuvwxyz12345678'";
850        let matches = detector.detect(content);
851
852        assert!(!matches.is_empty());
853        assert!(
854            matches
855                .iter()
856                .any(|m| m.secret_type == "Azure AD Client Secret")
857        );
858    }
859
860    #[test]
861    fn test_detect_twilio_api_key() {
862        let detector = SecretDetector::new();
863        // Twilio API keys start with SK followed by 32 hex chars
864        // Use test pattern that matches format but is obviously fake
865        let content = "TWILIO_SID=SK00000000000000000000000000000000";
866        let matches = detector.detect(content);
867
868        assert!(!matches.is_empty());
869        assert!(matches.iter().any(|m| m.secret_type == "Twilio API Key"));
870    }
871
872    #[test]
873    fn test_detect_twilio_auth_token() {
874        let detector = SecretDetector::new();
875        let content = "twilio_auth_token = 'abcdef0123456789abcdef0123456789'";
876        let matches = detector.detect(content);
877
878        assert!(!matches.is_empty());
879        assert!(matches.iter().any(|m| m.secret_type == "Twilio Auth Token"));
880    }
881
882    #[test]
883    fn test_detect_sendgrid_api_key() {
884        let detector = SecretDetector::new();
885        // SendGrid API keys: SG.<22 chars>.<43 chars>
886        let content = "SENDGRID_API_KEY=SG.abcdefghijklmnopqrstuv.abcdefghijklmnopqrstuvwxyz0123456789abcdefg";
887        let matches = detector.detect(content);
888
889        assert!(!matches.is_empty());
890        assert!(matches.iter().any(|m| m.secret_type == "SendGrid API Key"));
891    }
892
893    #[test]
894    fn test_detect_mailgun_api_key() {
895        let detector = SecretDetector::new();
896        // Mailgun API keys: key-<32 hex chars>
897        // Use standalone key to avoid overlap with Generic API Key pattern
898        let content = "MAILGUN_TOKEN=key-abcdef0123456789abcdef0123456789";
899        let matches = detector.detect(content);
900
901        assert!(!matches.is_empty());
902        assert!(matches.iter().any(|m| m.secret_type == "Mailgun API Key"));
903    }
904
905    #[test]
906    fn test_cloud_credentials_case_insensitive() {
907        let detector = SecretDetector::new();
908
909        // GCP service account with different casing
910        let content = r#"{"TYPE": "SERVICE_ACCOUNT"}"#;
911        assert!(detector.contains_secrets(content));
912
913        // Azure with different casing
914        let content2 = "accountkey=dGhpc2lzYXRlc3RrZXl0aGF0aXNsb25nZW5vdWdodG9tYXRjaA==";
915        assert!(detector.contains_secrets(content2));
916    }
917}