1#![allow(clippy::expect_used)]
7
8use regex::Regex;
9use std::collections::HashSet;
10use std::sync::LazyLock;
11
12use super::search_intent::SearchIntentType;
13
14#[derive(Debug)]
16pub struct SearchSignal {
17 pub pattern: Regex,
19 pub intent_type: SearchIntentType,
21 #[allow(dead_code)]
23 pub description: &'static str,
24}
25
26pub static SEARCH_SIGNALS: LazyLock<Vec<SearchSignal>> = LazyLock::new(|| {
28 vec![
29 SearchSignal {
31 pattern: Regex::new(r"(?i)\bhow\s+(do|can|should|would)\s+(i|we|you)\b")
32 .expect("static regex: how do I"),
33 intent_type: SearchIntentType::HowTo,
34 description: "how do I/we/you",
35 },
36 SearchSignal {
37 pattern: Regex::new(r"(?i)\bhow\s+to\b").expect("static regex: how to"),
38 intent_type: SearchIntentType::HowTo,
39 description: "how to",
40 },
41 SearchSignal {
42 pattern: Regex::new(r"(?i)\b(implement|create|build|make|add|write)\s+a?\b")
43 .expect("static regex: implement/create"),
44 intent_type: SearchIntentType::HowTo,
45 description: "implement/create/build",
46 },
47 SearchSignal {
48 pattern: Regex::new(r"(?i)\bsteps?\s+(to|for)\b").expect("static regex: steps to"),
49 intent_type: SearchIntentType::HowTo,
50 description: "steps to/for",
51 },
52 SearchSignal {
53 pattern: Regex::new(r"(?i)\bguide\s+(me|us|to)\b").expect("static regex: guide me"),
54 intent_type: SearchIntentType::HowTo,
55 description: "guide me/us/to",
56 },
57 SearchSignal {
59 pattern: Regex::new(r"(?i)\bwhere\s+(is|are|can\s+i\s+find)\b")
60 .expect("static regex: where is"),
61 intent_type: SearchIntentType::Location,
62 description: "where is/are",
63 },
64 SearchSignal {
65 pattern: Regex::new(r"(?i)\b(find|locate|show\s+me)\s+(the|a)?\b")
66 .expect("static regex: find/locate"),
67 intent_type: SearchIntentType::Location,
68 description: "find/locate/show me",
69 },
70 SearchSignal {
71 pattern: Regex::new(r"(?i)\b(which|what)\s+file\b").expect("static regex: which file"),
72 intent_type: SearchIntentType::Location,
73 description: "which/what file",
74 },
75 SearchSignal {
76 pattern: Regex::new(r"(?i)\blook\s+(for|at|up)\b").expect("static regex: look for"),
77 intent_type: SearchIntentType::Location,
78 description: "look for/at/up",
79 },
80 SearchSignal {
82 pattern: Regex::new(r"(?i)\bwhat\s+(is|are|does)\b").expect("static regex: what is"),
83 intent_type: SearchIntentType::Explanation,
84 description: "what is/are/does",
85 },
86 SearchSignal {
87 pattern: Regex::new(r"(?i)\bexplain\b").expect("static regex: explain"),
88 intent_type: SearchIntentType::Explanation,
89 description: "explain",
90 },
91 SearchSignal {
92 pattern: Regex::new(r"(?i)\b(tell|help)\s+me\s+(about|understand)\b")
93 .expect("static regex: tell me about"),
94 intent_type: SearchIntentType::Explanation,
95 description: "tell me about/understand",
96 },
97 SearchSignal {
98 pattern: Regex::new(r"(?i)\bwhat('s|\s+is)\s+the\s+(purpose|meaning|role)\b")
99 .expect("static regex: what's the purpose"),
100 intent_type: SearchIntentType::Explanation,
101 description: "what's the purpose/meaning/role",
102 },
103 SearchSignal {
104 pattern: Regex::new(r"(?i)\bcan\s+you\s+describe\b")
105 .expect("static regex: can you describe"),
106 intent_type: SearchIntentType::Explanation,
107 description: "can you describe",
108 },
109 SearchSignal {
111 pattern: Regex::new(r"(?i)\bdifference\s+between\b")
112 .expect("static regex: difference between"),
113 intent_type: SearchIntentType::Comparison,
114 description: "difference between",
115 },
116 SearchSignal {
117 pattern: Regex::new(r"(?i)\b(compare|vs\.?|versus)\b").expect("static regex: compare"),
118 intent_type: SearchIntentType::Comparison,
119 description: "compare/vs/versus",
120 },
121 SearchSignal {
122 pattern: Regex::new(r"(?i)\bwhich\s+(is|one|should)\s+(better|best|prefer)\b")
123 .expect("static regex: which is better"),
124 intent_type: SearchIntentType::Comparison,
125 description: "which is better",
126 },
127 SearchSignal {
128 pattern: Regex::new(r"(?i)\b(pros|cons|advantages|disadvantages)\b")
129 .expect("static regex: pros/cons"),
130 intent_type: SearchIntentType::Comparison,
131 description: "pros/cons/advantages/disadvantages",
132 },
133 SearchSignal {
135 pattern: Regex::new(r"(?i)\bwhy\s+(is|does|am|are)\b.*\b(error|fail|wrong|issue)\b")
136 .expect("static regex: why is error"),
137 intent_type: SearchIntentType::Troubleshoot,
138 description: "why is/does...error",
139 },
140 SearchSignal {
141 pattern: Regex::new(r"(?i)\b(error|exception|failure|crash|bug)\b")
142 .expect("static regex: error/exception"),
143 intent_type: SearchIntentType::Troubleshoot,
144 description: "error/exception/failure/crash/bug",
145 },
146 SearchSignal {
147 pattern: Regex::new(r"(?i)\b(not\s+working|doesn't\s+work|won't\s+work|broken)\b")
148 .expect("static regex: not working"),
149 intent_type: SearchIntentType::Troubleshoot,
150 description: "not working/doesn't work/broken",
151 },
152 SearchSignal {
153 pattern: Regex::new(r"(?i)\b(fix|solve|resolve|debug)\b")
154 .expect("static regex: fix/solve"),
155 intent_type: SearchIntentType::Troubleshoot,
156 description: "fix/solve/resolve/debug",
157 },
158 SearchSignal {
159 pattern: Regex::new(r"(?i)\b(issue|problem)\s+with\b")
160 .expect("static regex: issue with"),
161 intent_type: SearchIntentType::Troubleshoot,
162 description: "issue/problem with",
163 },
164 SearchSignal {
166 pattern: Regex::new(r"(?i)\b(search|find|lookup|query)\b")
167 .expect("static regex: search/find"),
168 intent_type: SearchIntentType::General,
169 description: "search/find/lookup",
170 },
171 SearchSignal {
172 pattern: Regex::new(r"(?i)\bshow\s+(me|us)\b").expect("static regex: show me"),
173 intent_type: SearchIntentType::General,
174 description: "show me/us",
175 },
176 ]
177});
178
179pub static STOP_WORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
181 [
182 "a",
183 "an",
184 "the",
185 "and",
186 "or",
187 "but",
188 "in",
189 "on",
190 "at",
191 "to",
192 "for",
193 "of",
194 "with",
195 "by",
196 "from",
197 "as",
198 "is",
199 "was",
200 "are",
201 "were",
202 "been",
203 "be",
204 "have",
205 "has",
206 "had",
207 "do",
208 "does",
209 "did",
210 "will",
211 "would",
212 "could",
213 "should",
214 "may",
215 "might",
216 "must",
217 "shall",
218 "can",
219 "need",
220 "i",
221 "you",
222 "he",
223 "she",
224 "it",
225 "we",
226 "they",
227 "me",
228 "him",
229 "her",
230 "us",
231 "them",
232 "my",
233 "your",
234 "his",
235 "its",
236 "our",
237 "their",
238 "this",
239 "that",
240 "these",
241 "those",
242 "what",
243 "which",
244 "who",
245 "whom",
246 "how",
247 "when",
248 "where",
249 "why",
250 "all",
251 "each",
252 "every",
253 "both",
254 "few",
255 "more",
256 "most",
257 "other",
258 "some",
259 "such",
260 "no",
261 "nor",
262 "not",
263 "only",
264 "own",
265 "same",
266 "so",
267 "than",
268 "too",
269 "very",
270 "just",
271 "about",
272 "also",
273 "now",
274 "here",
275 "there",
276 "up",
277 "down",
278 "out",
279 "if",
280 "then",
281 "into",
282 "through",
283 "during",
284 "before",
285 "after",
286 "above",
287 "below",
288 "between",
289 "under",
290 "again",
291 "further",
292 "once",
293 "any",
294 "something",
295 "anything",
296 "nothing",
297 ]
298 .into_iter()
299 .collect()
300});