Skip to main content

subcog/services/
query_parser.rs

1//! Filter query parser for memory search.
2//!
3//! Parses GitHub-style filter syntax like:
4//! - `ns:decisions` - Filter by namespace
5//! - `tag:rust` - Filter by tag (AND with other tags)
6//! - `tag:rust,python` - Filter by tags (OR logic)
7//! - `-tag:test` - Exclude memories with tag
8//! - `since:7d` - Filter by time
9//! - `source:src/*` - Filter by source pattern
10//! - `status:active` - Filter by status
11//! - `project:github.com/org/repo` - Filter by project identifier
12//! - `branch:main` - Filter by branch name
13//! - `path:src/main.rs` - Filter by file path
14//! - `entity:PostgreSQL` - Filter by entity name (memories mentioning this entity)
15//! - `entity:Rust,Python` - Filter by multiple entities (OR logic)
16
17use crate::models::{MemoryStatus, Namespace, SearchFilter};
18
19/// Parses a filter query string into a `SearchFilter`.
20///
21/// # Arguments
22///
23/// * `query` - The filter query string (e.g., "ns:decisions tag:rust")
24///
25/// # Returns
26///
27/// A `SearchFilter` populated with the parsed criteria.
28///
29/// # Examples
30///
31/// ```
32/// use subcog::services::parse_filter_query;
33///
34/// let filter = parse_filter_query("ns:decisions tag:rust -tag:test");
35/// assert_eq!(filter.namespaces.len(), 1);
36/// assert_eq!(filter.tags.len(), 1);
37/// assert_eq!(filter.excluded_tags.len(), 1);
38/// ```
39#[must_use]
40pub fn parse_filter_query(query: &str) -> SearchFilter {
41    let mut filter = SearchFilter::new();
42
43    // Split on whitespace to get individual tokens
44    for token in query.split_whitespace() {
45        parse_token(token, &mut filter);
46    }
47
48    filter
49}
50
51/// Parses a single filter token and updates the filter.
52fn parse_token(token: &str, filter: &mut SearchFilter) {
53    // Check for exclusion prefix
54    if let Some(rest) = token.strip_prefix('-') {
55        parse_excluded_token(rest, filter);
56        return;
57    }
58
59    // Parse key:value tokens
60    let Some((key, value)) = token.split_once(':') else {
61        return;
62    };
63
64    match key.to_lowercase().as_str() {
65        "ns" | "namespace" => {
66            if let Some(ns) = Namespace::parse(value) {
67                filter.namespaces.push(ns);
68            }
69        },
70        "tag" | "tags" => parse_tag_value(value, filter),
71        "since" => {
72            if let Some(timestamp) = parse_duration_to_timestamp(value) {
73                filter.created_after = Some(timestamp);
74            }
75        },
76        "source" | "src" => {
77            filter.source_pattern = Some(value.to_string());
78        },
79        "project" | "proj" | "repo" => {
80            filter.project_id = Some(value.to_string());
81        },
82        "branch" => {
83            filter.branch = Some(value.to_string());
84        },
85        "path" | "file" | "file_path" => {
86            filter.file_path = Some(value.to_string());
87        },
88        "status" => {
89            if let Some(status) = parse_status(value) {
90                filter.statuses.push(status);
91            }
92        },
93        "entity" | "ent" | "entities" => {
94            // Entity filter: entity:PostgreSQL or entity:Rust,Python (OR logic)
95            filter.entity_names.extend(
96                value
97                    .split(',')
98                    .map(str::trim)
99                    .filter(|s| !s.is_empty())
100                    .map(ToString::to_string),
101            );
102        },
103        _ => {
104            // Unknown key, ignore
105        },
106    }
107}
108
109/// Parses an excluded token (prefixed with -).
110fn parse_excluded_token(rest: &str, filter: &mut SearchFilter) {
111    let Some(tag_value) = rest.strip_prefix("tag:") else {
112        return;
113    };
114    // Excluded tags support comma-separated values
115    for tag in tag_value.split(',') {
116        let tag = tag.trim();
117        if !tag.is_empty() {
118            filter.excluded_tags.push(tag.to_string());
119        }
120    }
121}
122
123/// Parses tag values and adds them to the appropriate filter field.
124fn parse_tag_value(value: &str, filter: &mut SearchFilter) {
125    // Comma-separated values use OR logic (tags_any)
126    // Space-separated (multiple tag: tokens) use AND logic (tags)
127    let tags: Vec<&str> = value
128        .split(',')
129        .map(str::trim)
130        .filter(|s| !s.is_empty())
131        .collect();
132
133    if tags.len() > 1 {
134        // Multiple values in one token = OR logic
135        filter.tags_any.extend(tags.iter().map(|&t| t.to_string()));
136    } else if let Some(&tag) = tags.first() {
137        // Single value = AND logic
138        filter.tags.push(tag.to_string());
139    }
140}
141
142/// Parses a duration string (e.g., "7d", "30d") into a Unix timestamp.
143///
144/// Returns the timestamp representing "now minus duration".
145fn parse_duration_to_timestamp(duration: &str) -> Option<u64> {
146    let duration = duration.trim().to_lowercase();
147
148    // Parse number and unit
149    let (num_str, unit) = if duration.ends_with('d') {
150        (duration.trim_end_matches('d'), "d")
151    } else if duration.ends_with('h') {
152        (duration.trim_end_matches('h'), "h")
153    } else if duration.ends_with('w') {
154        (duration.trim_end_matches('w'), "w")
155    } else {
156        // Default to days if no unit
157        (duration.as_str(), "d")
158    };
159
160    let num: u64 = num_str.parse().ok()?;
161
162    let seconds = match unit {
163        "h" => num.checked_mul(3600)?,
164        "w" => num.checked_mul(604_800)?,
165        // Default to days for "d" and unknown units
166        _ => num.checked_mul(86400)?,
167    };
168
169    // Get current time and subtract duration
170    let now = std::time::SystemTime::now()
171        .duration_since(std::time::UNIX_EPOCH)
172        .ok()?
173        .as_secs();
174
175    Some(now.saturating_sub(seconds))
176}
177
178/// Parses a status string into a `MemoryStatus`.
179fn parse_status(s: &str) -> Option<MemoryStatus> {
180    match s.to_lowercase().as_str() {
181        "active" => Some(MemoryStatus::Active),
182        "archived" => Some(MemoryStatus::Archived),
183        "superseded" => Some(MemoryStatus::Superseded),
184        "pending" => Some(MemoryStatus::Pending),
185        "deleted" => Some(MemoryStatus::Deleted),
186        _ => None,
187    }
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193
194    #[test]
195    fn test_parse_empty_query() {
196        let filter = parse_filter_query("");
197        assert!(filter.is_empty());
198    }
199
200    #[test]
201    fn test_parse_namespace() {
202        let filter = parse_filter_query("ns:decisions");
203        assert_eq!(filter.namespaces.len(), 1);
204        assert_eq!(filter.namespaces[0], Namespace::Decisions);
205    }
206
207    #[test]
208    fn test_parse_namespace_full_name() {
209        let filter = parse_filter_query("namespace:patterns");
210        assert_eq!(filter.namespaces.len(), 1);
211        assert_eq!(filter.namespaces[0], Namespace::Patterns);
212    }
213
214    #[test]
215    fn test_parse_single_tag() {
216        let filter = parse_filter_query("tag:rust");
217        assert_eq!(filter.tags.len(), 1);
218        assert_eq!(filter.tags[0], "rust");
219        assert!(filter.tags_any.is_empty());
220    }
221
222    #[test]
223    fn test_parse_multiple_tags_and_logic() {
224        let filter = parse_filter_query("tag:rust tag:error");
225        assert_eq!(filter.tags.len(), 2);
226        assert!(filter.tags.contains(&"rust".to_string()));
227        assert!(filter.tags.contains(&"error".to_string()));
228    }
229
230    #[test]
231    fn test_parse_tags_or_logic() {
232        let filter = parse_filter_query("tag:rust,python,go");
233        assert!(filter.tags.is_empty());
234        assert_eq!(filter.tags_any.len(), 3);
235        assert!(filter.tags_any.contains(&"rust".to_string()));
236        assert!(filter.tags_any.contains(&"python".to_string()));
237        assert!(filter.tags_any.contains(&"go".to_string()));
238    }
239
240    #[test]
241    fn test_parse_excluded_tags() {
242        let filter = parse_filter_query("-tag:test");
243        assert_eq!(filter.excluded_tags.len(), 1);
244        assert_eq!(filter.excluded_tags[0], "test");
245    }
246
247    #[test]
248    fn test_parse_excluded_tags_multiple() {
249        let filter = parse_filter_query("-tag:test,deprecated");
250        assert_eq!(filter.excluded_tags.len(), 2);
251        assert!(filter.excluded_tags.contains(&"test".to_string()));
252        assert!(filter.excluded_tags.contains(&"deprecated".to_string()));
253    }
254
255    #[test]
256    fn test_parse_status() {
257        let filter = parse_filter_query("status:active");
258        assert_eq!(filter.statuses.len(), 1);
259        assert_eq!(filter.statuses[0], MemoryStatus::Active);
260    }
261
262    #[test]
263    fn test_parse_source_pattern() {
264        let filter = parse_filter_query("source:src/*");
265        assert_eq!(filter.source_pattern, Some("src/*".to_string()));
266    }
267
268    #[test]
269    fn test_parse_project_branch_path() {
270        let filter = parse_filter_query("project:github.com/org/repo branch:main path:src/lib.rs");
271        assert_eq!(filter.project_id.as_deref(), Some("github.com/org/repo"));
272        assert_eq!(filter.branch.as_deref(), Some("main"));
273        assert_eq!(filter.file_path.as_deref(), Some("src/lib.rs"));
274    }
275
276    #[test]
277    fn test_parse_since() {
278        let filter = parse_filter_query("since:7d");
279        assert!(filter.created_after.is_some());
280
281        let now = std::time::SystemTime::now()
282            .duration_since(std::time::UNIX_EPOCH)
283            .unwrap()
284            .as_secs();
285        let seven_days_ago = now - (7 * 86400);
286
287        // Allow 1 second tolerance
288        let diff = filter.created_after.unwrap().abs_diff(seven_days_ago);
289        assert!(diff <= 1);
290    }
291
292    #[test]
293    fn test_parse_complex_query() {
294        let filter = parse_filter_query("ns:decisions tag:rust tag:database -tag:test since:30d");
295        assert_eq!(filter.namespaces.len(), 1);
296        assert_eq!(filter.namespaces[0], Namespace::Decisions);
297        assert_eq!(filter.tags.len(), 2);
298        assert!(filter.tags.contains(&"rust".to_string()));
299        assert!(filter.tags.contains(&"database".to_string()));
300        assert_eq!(filter.excluded_tags.len(), 1);
301        assert_eq!(filter.excluded_tags[0], "test");
302        assert!(filter.created_after.is_some());
303    }
304
305    #[test]
306    fn test_parse_unknown_filter_ignored() {
307        let filter = parse_filter_query("unknown:value tag:rust");
308        assert_eq!(filter.tags.len(), 1);
309        assert_eq!(filter.tags[0], "rust");
310    }
311
312    #[test]
313    fn test_parse_case_insensitive() {
314        let filter = parse_filter_query("NS:DECISIONS TAG:Rust STATUS:Active");
315        assert_eq!(filter.namespaces.len(), 1);
316        assert_eq!(filter.tags.len(), 1);
317        assert_eq!(filter.tags[0], "Rust"); // Tag value preserves case
318        assert_eq!(filter.statuses.len(), 1);
319    }
320
321    #[test]
322    fn test_parse_entity_filter() {
323        let filter = parse_filter_query("entity:PostgreSQL");
324        assert_eq!(filter.entity_names.len(), 1);
325        assert_eq!(filter.entity_names[0], "PostgreSQL");
326    }
327
328    #[test]
329    fn test_parse_entity_filter_multiple() {
330        let filter = parse_filter_query("entity:Rust,Python,Go");
331        assert_eq!(filter.entity_names.len(), 3);
332        assert!(filter.entity_names.contains(&"Rust".to_string()));
333        assert!(filter.entity_names.contains(&"Python".to_string()));
334        assert!(filter.entity_names.contains(&"Go".to_string()));
335    }
336
337    #[test]
338    fn test_parse_entity_filter_with_other_filters() {
339        let filter = parse_filter_query("ns:decisions entity:PostgreSQL tag:database");
340        assert_eq!(filter.namespaces.len(), 1);
341        assert_eq!(filter.entity_names.len(), 1);
342        assert_eq!(filter.entity_names[0], "PostgreSQL");
343        assert_eq!(filter.tags.len(), 1);
344    }
345
346    #[test]
347    fn test_parse_entity_filter_aliases() {
348        // Test "ent" alias
349        let filter = parse_filter_query("ent:Redis");
350        assert_eq!(filter.entity_names.len(), 1);
351        assert_eq!(filter.entity_names[0], "Redis");
352
353        // Test "entities" alias
354        let filter2 = parse_filter_query("entities:Kafka,RabbitMQ");
355        assert_eq!(filter2.entity_names.len(), 2);
356    }
357}