Skip to main content

subcog/hooks/
search_patterns.rs

1//! Search intent detection patterns.
2//!
3//! Static pattern data for keyword-based search intent detection.
4//! Extracted from `search_intent.rs` to reduce file size.
5// Allow expect() on static regex patterns - these are guaranteed to compile
6#![allow(clippy::expect_used)]
7
8use regex::Regex;
9use std::collections::HashSet;
10use std::sync::LazyLock;
11
12use super::search_intent::SearchIntentType;
13
14/// A search signal pattern with associated intent type.
15#[derive(Debug)]
16pub struct SearchSignal {
17    /// The regex pattern to match.
18    pub pattern: Regex,
19    /// The intent type this pattern indicates.
20    pub intent_type: SearchIntentType,
21    /// Human-readable description of the signal.
22    #[allow(dead_code)]
23    pub description: &'static str,
24}
25
26/// Static search signal patterns grouped by intent type.
27pub static SEARCH_SIGNALS: LazyLock<Vec<SearchSignal>> = LazyLock::new(|| {
28    vec![
29        // HowTo patterns
30        SearchSignal {
31            pattern: Regex::new(r"(?i)\bhow\s+(do|can|should|would)\s+(i|we|you)\b")
32                .expect("static regex: how do I"),
33            intent_type: SearchIntentType::HowTo,
34            description: "how do I/we/you",
35        },
36        SearchSignal {
37            pattern: Regex::new(r"(?i)\bhow\s+to\b").expect("static regex: how to"),
38            intent_type: SearchIntentType::HowTo,
39            description: "how to",
40        },
41        SearchSignal {
42            pattern: Regex::new(r"(?i)\b(implement|create|build|make|add|write)\s+a?\b")
43                .expect("static regex: implement/create"),
44            intent_type: SearchIntentType::HowTo,
45            description: "implement/create/build",
46        },
47        SearchSignal {
48            pattern: Regex::new(r"(?i)\bsteps?\s+(to|for)\b").expect("static regex: steps to"),
49            intent_type: SearchIntentType::HowTo,
50            description: "steps to/for",
51        },
52        SearchSignal {
53            pattern: Regex::new(r"(?i)\bguide\s+(me|us|to)\b").expect("static regex: guide me"),
54            intent_type: SearchIntentType::HowTo,
55            description: "guide me/us/to",
56        },
57        // Location patterns
58        SearchSignal {
59            pattern: Regex::new(r"(?i)\bwhere\s+(is|are|can\s+i\s+find)\b")
60                .expect("static regex: where is"),
61            intent_type: SearchIntentType::Location,
62            description: "where is/are",
63        },
64        SearchSignal {
65            pattern: Regex::new(r"(?i)\b(find|locate|show\s+me)\s+(the|a)?\b")
66                .expect("static regex: find/locate"),
67            intent_type: SearchIntentType::Location,
68            description: "find/locate/show me",
69        },
70        SearchSignal {
71            pattern: Regex::new(r"(?i)\b(which|what)\s+file\b").expect("static regex: which file"),
72            intent_type: SearchIntentType::Location,
73            description: "which/what file",
74        },
75        SearchSignal {
76            pattern: Regex::new(r"(?i)\blook\s+(for|at|up)\b").expect("static regex: look for"),
77            intent_type: SearchIntentType::Location,
78            description: "look for/at/up",
79        },
80        // Explanation patterns
81        SearchSignal {
82            pattern: Regex::new(r"(?i)\bwhat\s+(is|are|does)\b").expect("static regex: what is"),
83            intent_type: SearchIntentType::Explanation,
84            description: "what is/are/does",
85        },
86        SearchSignal {
87            pattern: Regex::new(r"(?i)\bexplain\b").expect("static regex: explain"),
88            intent_type: SearchIntentType::Explanation,
89            description: "explain",
90        },
91        SearchSignal {
92            pattern: Regex::new(r"(?i)\b(tell|help)\s+me\s+(about|understand)\b")
93                .expect("static regex: tell me about"),
94            intent_type: SearchIntentType::Explanation,
95            description: "tell me about/understand",
96        },
97        SearchSignal {
98            pattern: Regex::new(r"(?i)\bwhat('s|\s+is)\s+the\s+(purpose|meaning|role)\b")
99                .expect("static regex: what's the purpose"),
100            intent_type: SearchIntentType::Explanation,
101            description: "what's the purpose/meaning/role",
102        },
103        SearchSignal {
104            pattern: Regex::new(r"(?i)\bcan\s+you\s+describe\b")
105                .expect("static regex: can you describe"),
106            intent_type: SearchIntentType::Explanation,
107            description: "can you describe",
108        },
109        // Comparison patterns
110        SearchSignal {
111            pattern: Regex::new(r"(?i)\bdifference\s+between\b")
112                .expect("static regex: difference between"),
113            intent_type: SearchIntentType::Comparison,
114            description: "difference between",
115        },
116        SearchSignal {
117            pattern: Regex::new(r"(?i)\b(compare|vs\.?|versus)\b").expect("static regex: compare"),
118            intent_type: SearchIntentType::Comparison,
119            description: "compare/vs/versus",
120        },
121        SearchSignal {
122            pattern: Regex::new(r"(?i)\bwhich\s+(is|one|should)\s+(better|best|prefer)\b")
123                .expect("static regex: which is better"),
124            intent_type: SearchIntentType::Comparison,
125            description: "which is better",
126        },
127        SearchSignal {
128            pattern: Regex::new(r"(?i)\b(pros|cons|advantages|disadvantages)\b")
129                .expect("static regex: pros/cons"),
130            intent_type: SearchIntentType::Comparison,
131            description: "pros/cons/advantages/disadvantages",
132        },
133        // Troubleshoot patterns
134        SearchSignal {
135            pattern: Regex::new(r"(?i)\bwhy\s+(is|does|am|are)\b.*\b(error|fail|wrong|issue)\b")
136                .expect("static regex: why is error"),
137            intent_type: SearchIntentType::Troubleshoot,
138            description: "why is/does...error",
139        },
140        SearchSignal {
141            pattern: Regex::new(r"(?i)\b(error|exception|failure|crash|bug)\b")
142                .expect("static regex: error/exception"),
143            intent_type: SearchIntentType::Troubleshoot,
144            description: "error/exception/failure/crash/bug",
145        },
146        SearchSignal {
147            pattern: Regex::new(r"(?i)\b(not\s+working|doesn't\s+work|won't\s+work|broken)\b")
148                .expect("static regex: not working"),
149            intent_type: SearchIntentType::Troubleshoot,
150            description: "not working/doesn't work/broken",
151        },
152        SearchSignal {
153            pattern: Regex::new(r"(?i)\b(fix|solve|resolve|debug)\b")
154                .expect("static regex: fix/solve"),
155            intent_type: SearchIntentType::Troubleshoot,
156            description: "fix/solve/resolve/debug",
157        },
158        SearchSignal {
159            pattern: Regex::new(r"(?i)\b(issue|problem)\s+with\b")
160                .expect("static regex: issue with"),
161            intent_type: SearchIntentType::Troubleshoot,
162            description: "issue/problem with",
163        },
164        // General patterns
165        SearchSignal {
166            pattern: Regex::new(r"(?i)\b(search|find|lookup|query)\b")
167                .expect("static regex: search/find"),
168            intent_type: SearchIntentType::General,
169            description: "search/find/lookup",
170        },
171        SearchSignal {
172            pattern: Regex::new(r"(?i)\bshow\s+(me|us)\b").expect("static regex: show me"),
173            intent_type: SearchIntentType::General,
174            description: "show me/us",
175        },
176    ]
177});
178
179/// Common stop words to filter from topic extraction.
180pub static STOP_WORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
181    [
182        "a",
183        "an",
184        "the",
185        "and",
186        "or",
187        "but",
188        "in",
189        "on",
190        "at",
191        "to",
192        "for",
193        "of",
194        "with",
195        "by",
196        "from",
197        "as",
198        "is",
199        "was",
200        "are",
201        "were",
202        "been",
203        "be",
204        "have",
205        "has",
206        "had",
207        "do",
208        "does",
209        "did",
210        "will",
211        "would",
212        "could",
213        "should",
214        "may",
215        "might",
216        "must",
217        "shall",
218        "can",
219        "need",
220        "i",
221        "you",
222        "he",
223        "she",
224        "it",
225        "we",
226        "they",
227        "me",
228        "him",
229        "her",
230        "us",
231        "them",
232        "my",
233        "your",
234        "his",
235        "its",
236        "our",
237        "their",
238        "this",
239        "that",
240        "these",
241        "those",
242        "what",
243        "which",
244        "who",
245        "whom",
246        "how",
247        "when",
248        "where",
249        "why",
250        "all",
251        "each",
252        "every",
253        "both",
254        "few",
255        "more",
256        "most",
257        "other",
258        "some",
259        "such",
260        "no",
261        "nor",
262        "not",
263        "only",
264        "own",
265        "same",
266        "so",
267        "than",
268        "too",
269        "very",
270        "just",
271        "about",
272        "also",
273        "now",
274        "here",
275        "there",
276        "up",
277        "down",
278        "out",
279        "if",
280        "then",
281        "into",
282        "through",
283        "during",
284        "before",
285        "after",
286        "above",
287        "below",
288        "between",
289        "under",
290        "again",
291        "further",
292        "once",
293        "any",
294        "something",
295        "anything",
296        "nothing",
297    ]
298    .into_iter()
299    .collect()
300});