Skip to main content

subcog/services/
topic_index.rs

1//! Topic index service for memory organization.
2//!
3//! Maintains an index of topics extracted from memories for quick lookup
4//! and topic-based resource access.
5
6use crate::models::{MemoryId, Namespace};
7use crate::services::RecallService;
8use crate::{Error, Result, SearchFilter};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::sync::{Arc, RwLock};
12use std::time::{Duration, Instant};
13
14// Topic index configuration constants
15/// Default refresh interval in seconds.
16const DEFAULT_REFRESH_INTERVAL_SECS: u64 = 300;
17/// Maximum memories to retrieve for index building.
18const MAX_INDEX_MEMORIES: usize = 10000;
19/// Minimum word length to consider for topic extraction.
20const MIN_TOPIC_WORD_LENGTH: usize = 3;
21/// Maximum word length to consider for topic extraction.
22const MAX_TOPIC_WORD_LENGTH: usize = 30;
23
24/// Information about a topic in the index.
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct TopicInfo {
27    /// Topic name (normalized, lowercase).
28    pub name: String,
29    /// Number of memories with this topic.
30    pub memory_count: usize,
31    /// Namespaces where this topic appears.
32    pub namespaces: Vec<Namespace>,
33}
34
35/// Service for maintaining topic → memory mappings.
36pub struct TopicIndexService {
37    /// Topic to memory ID mappings.
38    topics: Arc<RwLock<HashMap<String, Vec<MemoryId>>>>,
39    /// Topic to namespace mappings.
40    topic_namespaces: Arc<RwLock<HashMap<String, Vec<Namespace>>>>,
41    /// Last refresh timestamp.
42    last_refresh: Arc<RwLock<Option<Instant>>>,
43    /// Refresh interval.
44    refresh_interval: Duration,
45}
46
47impl TopicIndexService {
48    /// Creates a new topic index service.
49    #[must_use]
50    pub fn new() -> Self {
51        Self {
52            topics: Arc::new(RwLock::new(HashMap::new())),
53            topic_namespaces: Arc::new(RwLock::new(HashMap::new())),
54            last_refresh: Arc::new(RwLock::new(None)),
55            refresh_interval: Duration::from_secs(DEFAULT_REFRESH_INTERVAL_SECS),
56        }
57    }
58
59    /// Sets the refresh interval.
60    #[must_use]
61    pub const fn with_refresh_interval(mut self, interval: Duration) -> Self {
62        self.refresh_interval = interval;
63        self
64    }
65
66    /// Checks if the index needs refreshing.
67    #[must_use]
68    pub fn needs_refresh(&self) -> bool {
69        let last = self.last_refresh.read().ok().and_then(|guard| *guard);
70        match last {
71            Some(t) => t.elapsed() > self.refresh_interval,
72            None => true,
73        }
74    }
75
76    /// Builds the topic index from a recall service.
77    ///
78    /// Extracts topics from:
79    /// - Memory tags
80    /// - Memory namespace names
81    /// - Keywords in memory content
82    ///
83    /// # Errors
84    ///
85    /// Returns an error if memory retrieval fails.
86    pub fn build_index(&self, recall: &RecallService) -> Result<()> {
87        let filter = SearchFilter::new();
88        let result = recall.list_all(&filter, MAX_INDEX_MEMORIES)?;
89
90        let mut topics_map: HashMap<String, Vec<MemoryId>> = HashMap::new();
91        let mut namespace_map: HashMap<String, Vec<Namespace>> = HashMap::new();
92
93        for hit in &result.memories {
94            let memory = &hit.memory;
95
96            // Extract topics from tags
97            for tag in &memory.tags {
98                let topic = normalize_topic(tag);
99                add_topic_entry(
100                    &topic,
101                    &memory.id,
102                    memory.namespace,
103                    &mut topics_map,
104                    &mut namespace_map,
105                );
106            }
107
108            // Extract topics from namespace name
109            let ns_topic = normalize_topic(memory.namespace.as_str());
110            add_topic_entry(
111                &ns_topic,
112                &memory.id,
113                memory.namespace,
114                &mut topics_map,
115                &mut namespace_map,
116            );
117
118            // Extract keyword topics from content (top 5 keywords)
119            let keywords = extract_content_keywords(&memory.content);
120            for keyword in keywords.into_iter().take(5) {
121                let topic = normalize_topic(&keyword);
122                add_topic_with_min_length(
123                    &topic,
124                    3,
125                    &memory.id,
126                    memory.namespace,
127                    &mut topics_map,
128                    &mut namespace_map,
129                );
130            }
131        }
132
133        // Deduplicate memory IDs per topic
134        for ids in topics_map.values_mut() {
135            ids.sort_by(|a, b| a.as_str().cmp(b.as_str()));
136            ids.dedup_by(|a, b| a.as_str() == b.as_str());
137        }
138
139        // Update the index
140        {
141            let mut guard = self.topics.write().map_err(|_| Error::OperationFailed {
142                operation: "build_index".to_string(),
143                cause: "Lock poisoned".to_string(),
144            })?;
145            *guard = topics_map;
146        }
147
148        {
149            let mut guard = self
150                .topic_namespaces
151                .write()
152                .map_err(|_| Error::OperationFailed {
153                    operation: "build_index".to_string(),
154                    cause: "Lock poisoned".to_string(),
155                })?;
156            *guard = namespace_map;
157        }
158
159        // Update last refresh time
160        {
161            let mut guard = self
162                .last_refresh
163                .write()
164                .map_err(|_| Error::OperationFailed {
165                    operation: "build_index".to_string(),
166                    cause: "Lock poisoned".to_string(),
167                })?;
168            *guard = Some(Instant::now());
169        }
170
171        Ok(())
172    }
173
174    /// Lists all topics with their metadata.
175    ///
176    /// # Errors
177    ///
178    /// Returns an error if the lock is poisoned.
179    pub fn list_topics(&self) -> Result<Vec<TopicInfo>> {
180        let topics_guard = self.topics.read().map_err(|_| Error::OperationFailed {
181            operation: "list_topics".to_string(),
182            cause: "Lock poisoned".to_string(),
183        })?;
184
185        let ns_guard = self
186            .topic_namespaces
187            .read()
188            .map_err(|_| Error::OperationFailed {
189                operation: "list_topics".to_string(),
190                cause: "Lock poisoned".to_string(),
191            })?;
192
193        let mut topics: Vec<TopicInfo> = topics_guard
194            .iter()
195            .map(|(name, ids)| TopicInfo {
196                name: name.clone(),
197                memory_count: ids.len(),
198                namespaces: ns_guard.get(name).cloned().unwrap_or_default(),
199            })
200            .collect();
201
202        // Sort by memory count descending
203        topics.sort_by(|a, b| b.memory_count.cmp(&a.memory_count));
204
205        Ok(topics)
206    }
207
208    /// Gets memory IDs for a specific topic.
209    ///
210    /// # Errors
211    ///
212    /// Returns an error if the lock is poisoned.
213    pub fn get_topic_memories(&self, topic: &str) -> Result<Vec<MemoryId>> {
214        let normalized = normalize_topic(topic);
215        let guard = self.topics.read().map_err(|_| Error::OperationFailed {
216            operation: "get_topic_memories".to_string(),
217            cause: "Lock poisoned".to_string(),
218        })?;
219
220        Ok(guard.get(&normalized).cloned().unwrap_or_default())
221    }
222
223    /// Gets topic info for a specific topic.
224    ///
225    /// # Errors
226    ///
227    /// Returns an error if the lock is poisoned.
228    pub fn get_topic_info(&self, topic: &str) -> Result<Option<TopicInfo>> {
229        let normalized = normalize_topic(topic);
230
231        let topics_guard = self.topics.read().map_err(|_| Error::OperationFailed {
232            operation: "get_topic_info".to_string(),
233            cause: "Lock poisoned".to_string(),
234        })?;
235
236        let ns_guard = self
237            .topic_namespaces
238            .read()
239            .map_err(|_| Error::OperationFailed {
240                operation: "get_topic_info".to_string(),
241                cause: "Lock poisoned".to_string(),
242            })?;
243
244        match topics_guard.get(&normalized) {
245            Some(ids) => Ok(Some(TopicInfo {
246                name: normalized,
247                memory_count: ids.len(),
248                namespaces: ns_guard.get(topic).cloned().unwrap_or_default(),
249            })),
250            None => Ok(None),
251        }
252    }
253
254    /// Adds a memory to the topic index.
255    ///
256    /// Call this when a new memory is captured to keep the index up to date.
257    ///
258    /// # Errors
259    ///
260    /// Returns an error if the lock is poisoned.
261    pub fn add_memory(
262        &self,
263        memory_id: &MemoryId,
264        tags: &[String],
265        namespace: Namespace,
266    ) -> Result<()> {
267        let mut topics_guard = self.topics.write().map_err(|_| Error::OperationFailed {
268            operation: "add_memory".to_string(),
269            cause: "Lock poisoned".to_string(),
270        })?;
271
272        let mut ns_guard = self
273            .topic_namespaces
274            .write()
275            .map_err(|_| Error::OperationFailed {
276                operation: "add_memory".to_string(),
277                cause: "Lock poisoned".to_string(),
278            })?;
279
280        // Add from tags
281        for tag in tags {
282            let topic = normalize_topic(tag);
283            add_topic_entry_guarded(
284                &topic,
285                memory_id,
286                namespace,
287                &mut topics_guard,
288                &mut ns_guard,
289            );
290        }
291
292        // Add from namespace
293        let ns_topic = normalize_topic(namespace.as_str());
294        add_topic_entry_guarded(
295            &ns_topic,
296            memory_id,
297            namespace,
298            &mut topics_guard,
299            &mut ns_guard,
300        );
301
302        Ok(())
303    }
304
305    /// Returns the number of topics in the index.
306    #[must_use]
307    pub fn topic_count(&self) -> usize {
308        self.topics.read().map(|guard| guard.len()).unwrap_or(0)
309    }
310
311    /// Returns the total number of topic-memory associations.
312    #[must_use]
313    pub fn association_count(&self) -> usize {
314        self.topics
315            .read()
316            .map(|guard| guard.values().map(Vec::len).sum())
317            .unwrap_or(0)
318    }
319
320    /// Removes a memory from the topic index (PERF-M1: incremental updates).
321    ///
322    /// Call this when a memory is deleted to keep the index up to date
323    /// without requiring a full rebuild.
324    ///
325    /// # Errors
326    ///
327    /// Returns an error if the lock is poisoned.
328    pub fn remove_memory(&self, memory_id: &MemoryId) -> Result<()> {
329        let mut topics_guard = self.topics.write().map_err(|_| Error::OperationFailed {
330            operation: "remove_memory".to_string(),
331            cause: "Lock poisoned".to_string(),
332        })?;
333
334        // Remove the memory ID from all topic entries
335        for ids in topics_guard.values_mut() {
336            ids.retain(|id| id.as_str() != memory_id.as_str());
337        }
338
339        // Remove empty topics
340        topics_guard.retain(|_, ids| !ids.is_empty());
341
342        // Note: We don't remove from topic_namespaces since other memories
343        // with that namespace may still exist. Namespace cleanup happens
344        // during full rebuild or can be done separately if needed.
345
346        Ok(())
347    }
348
349    /// Updates a memory in the topic index (PERF-M1: incremental updates).
350    ///
351    /// This is a convenience method that removes the old entry and adds
352    /// the new one. Use this when tags or namespace change.
353    ///
354    /// # Errors
355    ///
356    /// Returns an error if the lock is poisoned.
357    pub fn update_memory(
358        &self,
359        memory_id: &MemoryId,
360        new_tags: &[String],
361        new_namespace: Namespace,
362    ) -> Result<()> {
363        self.remove_memory(memory_id)?;
364        self.add_memory(memory_id, new_tags, new_namespace)?;
365        Ok(())
366    }
367
368    /// Adds content-based keywords to the topic index (PERF-M1: incremental updates).
369    ///
370    /// Call this after `add_memory()` to also index keywords from the memory content.
371    /// This is separated to allow callers to control whether content keywords are indexed.
372    ///
373    /// # Errors
374    ///
375    /// Returns an error if the lock is poisoned.
376    pub fn add_content_keywords(
377        &self,
378        memory_id: &MemoryId,
379        content: &str,
380        namespace: Namespace,
381    ) -> Result<()> {
382        let mut topics_guard = self.topics.write().map_err(|_| Error::OperationFailed {
383            operation: "add_content_keywords".to_string(),
384            cause: "Lock poisoned".to_string(),
385        })?;
386
387        let mut ns_guard = self
388            .topic_namespaces
389            .write()
390            .map_err(|_| Error::OperationFailed {
391                operation: "add_content_keywords".to_string(),
392                cause: "Lock poisoned".to_string(),
393            })?;
394
395        let keywords = extract_content_keywords(content);
396        for keyword in keywords.into_iter().take(5) {
397            let topic = normalize_topic(&keyword);
398            if topic.len() >= 3 {
399                add_topic_entry_guarded(
400                    &topic,
401                    memory_id,
402                    namespace,
403                    &mut topics_guard,
404                    &mut ns_guard,
405                );
406            }
407        }
408
409        Ok(())
410    }
411
412    /// Clears the entire topic index.
413    ///
414    /// Use before a full rebuild or when resetting state.
415    ///
416    /// # Errors
417    ///
418    /// Returns an error if the lock is poisoned.
419    pub fn clear(&self) -> Result<()> {
420        {
421            let mut guard = self.topics.write().map_err(|_| Error::OperationFailed {
422                operation: "clear".to_string(),
423                cause: "Lock poisoned".to_string(),
424            })?;
425            guard.clear();
426        }
427
428        {
429            let mut guard = self
430                .topic_namespaces
431                .write()
432                .map_err(|_| Error::OperationFailed {
433                    operation: "clear".to_string(),
434                    cause: "Lock poisoned".to_string(),
435                })?;
436            guard.clear();
437        }
438
439        {
440            let mut guard = self
441                .last_refresh
442                .write()
443                .map_err(|_| Error::OperationFailed {
444                    operation: "clear".to_string(),
445                    cause: "Lock poisoned".to_string(),
446                })?;
447            *guard = None;
448        }
449
450        Ok(())
451    }
452}
453
454impl Default for TopicIndexService {
455    fn default() -> Self {
456        Self::new()
457    }
458}
459
460/// Adds a topic entry to the maps (helper to reduce nesting).
461fn add_topic_entry(
462    topic: &str,
463    memory_id: &MemoryId,
464    namespace: Namespace,
465    topics_map: &mut HashMap<String, Vec<MemoryId>>,
466    namespace_map: &mut HashMap<String, Vec<Namespace>>,
467) {
468    if topic.is_empty() {
469        return;
470    }
471    topics_map
472        .entry(topic.to_string())
473        .or_default()
474        .push(memory_id.clone());
475    insert_namespace_if_missing(namespace_map, topic, namespace);
476}
477
478/// Adds a topic entry only if it meets minimum length requirement.
479fn add_topic_with_min_length(
480    topic: &str,
481    min_len: usize,
482    memory_id: &MemoryId,
483    namespace: Namespace,
484    topics_map: &mut HashMap<String, Vec<MemoryId>>,
485    namespace_map: &mut HashMap<String, Vec<Namespace>>,
486) {
487    if topic.len() >= min_len {
488        add_topic_entry(topic, memory_id, namespace, topics_map, namespace_map);
489    }
490}
491
492/// Adds a topic entry to guarded maps (for use with lock guards).
493fn add_topic_entry_guarded(
494    topic: &str,
495    memory_id: &MemoryId,
496    namespace: Namespace,
497    topics_guard: &mut HashMap<String, Vec<MemoryId>>,
498    ns_guard: &mut HashMap<String, Vec<Namespace>>,
499) {
500    if topic.is_empty() {
501        return;
502    }
503    topics_guard
504        .entry(topic.to_string())
505        .or_default()
506        .push(memory_id.clone());
507    insert_namespace_if_missing(ns_guard, topic, namespace);
508}
509
510/// Inserts a namespace into the list if not already present.
511fn insert_namespace_if_missing(
512    map: &mut HashMap<String, Vec<Namespace>>,
513    topic: &str,
514    namespace: Namespace,
515) {
516    let ns_list = map.entry(topic.to_string()).or_default();
517    if !ns_list.contains(&namespace) {
518        ns_list.push(namespace);
519    }
520}
521
522/// Normalizes a topic name for consistent indexing.
523fn normalize_topic(topic: &str) -> String {
524    topic
525        .trim()
526        .to_lowercase()
527        .replace(|c: char| !c.is_alphanumeric() && c != '-', "")
528}
529
530/// Extracts keyword topics from content.
531fn extract_content_keywords(content: &str) -> Vec<String> {
532    // Stop words to filter out
533    static STOP_WORDS: &[&str] = &[
534        "the", "a", "an", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had",
535        "do", "does", "did", "will", "would", "could", "should", "may", "might", "must", "shall",
536        "can", "need", "dare", "ought", "used", "to", "of", "in", "for", "on", "with", "at", "by",
537        "from", "as", "into", "through", "during", "before", "after", "above", "below", "between",
538        "under", "again", "further", "then", "once", "here", "there", "when", "where", "why",
539        "how", "all", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not",
540        "only", "own", "same", "so", "than", "too", "very", "just", "also", "now", "and", "but",
541        "or", "if", "because", "until", "while", "this", "that", "these", "those", "what", "which",
542        "who", "whom", "whose", "it", "its", "they", "them", "their", "we", "us", "our", "you",
543        "your", "i", "my", "me", "he", "him", "his", "she", "her",
544    ];
545
546    let words: Vec<String> = content
547        .split(|c: char| !c.is_alphanumeric())
548        .filter(|w| w.len() >= MIN_TOPIC_WORD_LENGTH && w.len() <= MAX_TOPIC_WORD_LENGTH)
549        .map(str::to_lowercase)
550        .filter(|w| !STOP_WORDS.contains(&w.as_str()))
551        .filter(|w| !w.chars().all(char::is_numeric))
552        .collect();
553
554    // Count word frequencies
555    let mut freq: HashMap<String, usize> = HashMap::new();
556    for word in words {
557        *freq.entry(word).or_insert(0) += 1;
558    }
559
560    // Sort by frequency and return top keywords
561    let mut sorted: Vec<_> = freq.into_iter().collect();
562    sorted.sort_by(|a, b| b.1.cmp(&a.1));
563    sorted.into_iter().map(|(w, _)| w).collect()
564}
565
566#[cfg(test)]
567mod tests {
568    use super::*;
569
570    #[test]
571    fn test_topic_index_creation() {
572        let service = TopicIndexService::new();
573        assert!(service.needs_refresh());
574        assert_eq!(service.topic_count(), 0);
575    }
576
577    #[test]
578    fn test_normalize_topic() {
579        assert_eq!(normalize_topic("Rust"), "rust");
580        assert_eq!(normalize_topic("  Python  "), "python");
581        assert_eq!(normalize_topic("error-handling"), "error-handling");
582        assert_eq!(normalize_topic("test_case"), "testcase");
583    }
584
585    #[test]
586    fn test_extract_content_keywords() {
587        let content = "The Rust programming language is great for systems programming";
588        let keywords = extract_content_keywords(content);
589
590        assert!(keywords.contains(&"rust".to_string()));
591        assert!(keywords.contains(&"programming".to_string()));
592        assert!(!keywords.contains(&"the".to_string())); // stop word
593    }
594
595    #[test]
596    fn test_add_memory() {
597        let service = TopicIndexService::new();
598        let id = MemoryId::new("test-123");
599        let tags = vec!["rust".to_string(), "error-handling".to_string()];
600
601        service.add_memory(&id, &tags, Namespace::Patterns).unwrap();
602
603        let rust_memories = service.get_topic_memories("rust").unwrap();
604        assert_eq!(rust_memories.len(), 1);
605        assert_eq!(rust_memories[0].as_str(), "test-123");
606
607        let patterns_memories = service.get_topic_memories("patterns").unwrap();
608        assert_eq!(patterns_memories.len(), 1);
609    }
610
611    #[test]
612    fn test_list_topics() {
613        let service = TopicIndexService::new();
614        let id1 = MemoryId::new("test-1");
615        let id2 = MemoryId::new("test-2");
616
617        service
618            .add_memory(&id1, &["rust".to_string()], Namespace::Decisions)
619            .unwrap();
620        service
621            .add_memory(
622                &id2,
623                &["rust".to_string(), "async".to_string()],
624                Namespace::Patterns,
625            )
626            .unwrap();
627
628        let topics = service.list_topics().unwrap();
629
630        // rust should have 2 memories, async should have 1
631        let rust_topic = topics.iter().find(|t| t.name == "rust").unwrap();
632        assert_eq!(rust_topic.memory_count, 2);
633
634        let async_topic = topics.iter().find(|t| t.name == "async").unwrap();
635        assert_eq!(async_topic.memory_count, 1);
636    }
637
638    #[test]
639    fn test_get_topic_info() {
640        let service = TopicIndexService::new();
641        let id = MemoryId::new("test-1");
642
643        service
644            .add_memory(&id, &["authentication".to_string()], Namespace::Decisions)
645            .unwrap();
646
647        let info = service.get_topic_info("authentication").unwrap();
648        assert!(info.is_some());
649        let info = info.unwrap();
650        assert_eq!(info.name, "authentication");
651        assert_eq!(info.memory_count, 1);
652        assert!(info.namespaces.contains(&Namespace::Decisions));
653    }
654
655    #[test]
656    fn test_get_topic_info_not_found() {
657        let service = TopicIndexService::new();
658        let info = service.get_topic_info("nonexistent").unwrap();
659        assert!(info.is_none());
660    }
661
662    #[test]
663    fn test_topic_info_serialization() {
664        let info = TopicInfo {
665            name: "rust".to_string(),
666            memory_count: 5,
667            namespaces: vec![Namespace::Decisions, Namespace::Patterns],
668        };
669
670        let json = serde_json::to_string(&info).unwrap();
671        assert!(json.contains("rust"));
672        assert!(json.contains('5'));
673    }
674
675    #[test]
676    fn test_refresh_interval() {
677        let service = TopicIndexService::new().with_refresh_interval(Duration::from_secs(60));
678        assert!(service.needs_refresh());
679
680        // After setting last_refresh, should not need refresh
681        {
682            let mut guard = service.last_refresh.write().unwrap();
683            *guard = Some(Instant::now());
684        }
685        assert!(!service.needs_refresh());
686    }
687
688    // Tests for incremental update methods (PERF-M1)
689
690    #[test]
691    fn test_remove_memory() {
692        let service = TopicIndexService::new();
693        let id1 = MemoryId::new("test-1");
694        let id2 = MemoryId::new("test-2");
695
696        service
697            .add_memory(&id1, &["rust".to_string()], Namespace::Decisions)
698            .unwrap();
699        service
700            .add_memory(&id2, &["rust".to_string()], Namespace::Patterns)
701            .unwrap();
702
703        assert_eq!(service.get_topic_memories("rust").unwrap().len(), 2);
704
705        // Remove one memory
706        service.remove_memory(&id1).unwrap();
707
708        // Should only have one memory left
709        let rust_memories = service.get_topic_memories("rust").unwrap();
710        assert_eq!(rust_memories.len(), 1);
711        assert_eq!(rust_memories[0].as_str(), "test-2");
712    }
713
714    #[test]
715    fn test_remove_memory_cleans_empty_topics() {
716        let service = TopicIndexService::new();
717        let id = MemoryId::new("test-1");
718
719        service
720            .add_memory(&id, &["unique-topic".to_string()], Namespace::Decisions)
721            .unwrap();
722
723        assert_eq!(service.get_topic_memories("unique-topic").unwrap().len(), 1);
724
725        // Remove the only memory with this topic
726        service.remove_memory(&id).unwrap();
727
728        // Topic should be empty and removed
729        assert!(
730            service
731                .get_topic_memories("unique-topic")
732                .unwrap()
733                .is_empty()
734        );
735
736        // Topic count should reflect removal
737        let topics = service.list_topics().unwrap();
738        assert!(!topics.iter().any(|t| t.name == "unique-topic"));
739    }
740
741    #[test]
742    fn test_remove_nonexistent_memory() {
743        let service = TopicIndexService::new();
744        let id = MemoryId::new("nonexistent");
745
746        // Should not error when removing a memory that doesn't exist
747        assert!(service.remove_memory(&id).is_ok());
748    }
749
750    #[test]
751    fn test_update_memory() {
752        let service = TopicIndexService::new();
753        let id = MemoryId::new("test-1");
754
755        // Add with initial tags
756        service
757            .add_memory(&id, &["old-tag".to_string()], Namespace::Decisions)
758            .unwrap();
759        assert_eq!(service.get_topic_memories("old-tag").unwrap().len(), 1);
760
761        // Update with new tags
762        service
763            .update_memory(&id, &["new-tag".to_string()], Namespace::Patterns)
764            .unwrap();
765
766        // Old tag should be gone (topic removed since empty)
767        assert!(service.get_topic_memories("old-tag").unwrap().is_empty());
768
769        // New tag should exist
770        assert_eq!(service.get_topic_memories("new-tag").unwrap().len(), 1);
771        assert_eq!(service.get_topic_memories("patterns").unwrap().len(), 1);
772    }
773
774    #[test]
775    fn test_add_content_keywords() {
776        let service = TopicIndexService::new();
777        let id = MemoryId::new("test-1");
778
779        // Add memory first (for namespace)
780        service.add_memory(&id, &[], Namespace::Learnings).unwrap();
781
782        // Add content keywords - use "rust" multiple times to ensure it's in top 5
783        // (extract_content_keywords takes top 5 by frequency, HashMap order is non-deterministic)
784        let content = "Rust rust RUST programming systems";
785        service
786            .add_content_keywords(&id, content, Namespace::Learnings)
787            .unwrap();
788
789        // "rust" appears 3 times, so it should be in top 5 keywords
790        let rust_memories = service.get_topic_memories("rust").unwrap();
791        assert!(!rust_memories.is_empty());
792        assert!(rust_memories.iter().any(|m| m.as_str() == "test-1"));
793    }
794
795    #[test]
796    fn test_add_content_keywords_filters_short_words() {
797        let service = TopicIndexService::new();
798        let id = MemoryId::new("test-1");
799
800        let content = "A is to be or not to be";
801        service
802            .add_content_keywords(&id, content, Namespace::Learnings)
803            .unwrap();
804
805        // Short words should not be indexed
806        assert!(service.get_topic_memories("a").unwrap().is_empty());
807        assert!(service.get_topic_memories("is").unwrap().is_empty());
808        assert!(service.get_topic_memories("to").unwrap().is_empty());
809    }
810
811    #[test]
812    fn test_clear() {
813        let service = TopicIndexService::new();
814        let id1 = MemoryId::new("test-1");
815        let id2 = MemoryId::new("test-2");
816
817        service
818            .add_memory(&id1, &["rust".to_string()], Namespace::Decisions)
819            .unwrap();
820        service
821            .add_memory(&id2, &["python".to_string()], Namespace::Patterns)
822            .unwrap();
823
824        // Set last_refresh
825        {
826            let mut guard = service.last_refresh.write().unwrap();
827            *guard = Some(Instant::now());
828        }
829
830        assert_eq!(service.topic_count(), 4); // rust, python, decisions, patterns
831        assert!(!service.needs_refresh());
832
833        // Clear the index
834        service.clear().unwrap();
835
836        assert_eq!(service.topic_count(), 0);
837        assert_eq!(service.association_count(), 0);
838        assert!(service.needs_refresh()); // last_refresh cleared
839    }
840
841    #[test]
842    fn test_incremental_vs_full_rebuild_equivalence() {
843        // Verify that incremental add/remove produces same result as full rebuild would
844        let service = TopicIndexService::new();
845        let id1 = MemoryId::new("test-1");
846        let id2 = MemoryId::new("test-2");
847        let id3 = MemoryId::new("test-3");
848
849        // Add memories incrementally
850        service
851            .add_memory(
852                &id1,
853                &["rust".to_string(), "async".to_string()],
854                Namespace::Decisions,
855            )
856            .unwrap();
857        service
858            .add_memory(&id2, &["rust".to_string()], Namespace::Patterns)
859            .unwrap();
860        service
861            .add_memory(&id3, &["python".to_string()], Namespace::Learnings)
862            .unwrap();
863
864        // Remove one
865        service.remove_memory(&id2).unwrap();
866
867        // Verify final state
868        let rust_memories = service.get_topic_memories("rust").unwrap();
869        assert_eq!(rust_memories.len(), 1);
870        assert_eq!(rust_memories[0].as_str(), "test-1");
871
872        let async_memories = service.get_topic_memories("async").unwrap();
873        assert_eq!(async_memories.len(), 1);
874
875        let python_memories = service.get_topic_memories("python").unwrap();
876        assert_eq!(python_memories.len(), 1);
877    }
878}