1use crate::models::{MemoryId, Namespace};
7use crate::services::RecallService;
8use crate::{Error, Result, SearchFilter};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::sync::{Arc, RwLock};
12use std::time::{Duration, Instant};
13
14const DEFAULT_REFRESH_INTERVAL_SECS: u64 = 300;
17const MAX_INDEX_MEMORIES: usize = 10000;
19const MIN_TOPIC_WORD_LENGTH: usize = 3;
21const MAX_TOPIC_WORD_LENGTH: usize = 30;
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct TopicInfo {
27 pub name: String,
29 pub memory_count: usize,
31 pub namespaces: Vec<Namespace>,
33}
34
35pub struct TopicIndexService {
37 topics: Arc<RwLock<HashMap<String, Vec<MemoryId>>>>,
39 topic_namespaces: Arc<RwLock<HashMap<String, Vec<Namespace>>>>,
41 last_refresh: Arc<RwLock<Option<Instant>>>,
43 refresh_interval: Duration,
45}
46
47impl TopicIndexService {
48 #[must_use]
50 pub fn new() -> Self {
51 Self {
52 topics: Arc::new(RwLock::new(HashMap::new())),
53 topic_namespaces: Arc::new(RwLock::new(HashMap::new())),
54 last_refresh: Arc::new(RwLock::new(None)),
55 refresh_interval: Duration::from_secs(DEFAULT_REFRESH_INTERVAL_SECS),
56 }
57 }
58
59 #[must_use]
61 pub const fn with_refresh_interval(mut self, interval: Duration) -> Self {
62 self.refresh_interval = interval;
63 self
64 }
65
66 #[must_use]
68 pub fn needs_refresh(&self) -> bool {
69 let last = self.last_refresh.read().ok().and_then(|guard| *guard);
70 match last {
71 Some(t) => t.elapsed() > self.refresh_interval,
72 None => true,
73 }
74 }
75
76 pub fn build_index(&self, recall: &RecallService) -> Result<()> {
87 let filter = SearchFilter::new();
88 let result = recall.list_all(&filter, MAX_INDEX_MEMORIES)?;
89
90 let mut topics_map: HashMap<String, Vec<MemoryId>> = HashMap::new();
91 let mut namespace_map: HashMap<String, Vec<Namespace>> = HashMap::new();
92
93 for hit in &result.memories {
94 let memory = &hit.memory;
95
96 for tag in &memory.tags {
98 let topic = normalize_topic(tag);
99 add_topic_entry(
100 &topic,
101 &memory.id,
102 memory.namespace,
103 &mut topics_map,
104 &mut namespace_map,
105 );
106 }
107
108 let ns_topic = normalize_topic(memory.namespace.as_str());
110 add_topic_entry(
111 &ns_topic,
112 &memory.id,
113 memory.namespace,
114 &mut topics_map,
115 &mut namespace_map,
116 );
117
118 let keywords = extract_content_keywords(&memory.content);
120 for keyword in keywords.into_iter().take(5) {
121 let topic = normalize_topic(&keyword);
122 add_topic_with_min_length(
123 &topic,
124 3,
125 &memory.id,
126 memory.namespace,
127 &mut topics_map,
128 &mut namespace_map,
129 );
130 }
131 }
132
133 for ids in topics_map.values_mut() {
135 ids.sort_by(|a, b| a.as_str().cmp(b.as_str()));
136 ids.dedup_by(|a, b| a.as_str() == b.as_str());
137 }
138
139 {
141 let mut guard = self.topics.write().map_err(|_| Error::OperationFailed {
142 operation: "build_index".to_string(),
143 cause: "Lock poisoned".to_string(),
144 })?;
145 *guard = topics_map;
146 }
147
148 {
149 let mut guard = self
150 .topic_namespaces
151 .write()
152 .map_err(|_| Error::OperationFailed {
153 operation: "build_index".to_string(),
154 cause: "Lock poisoned".to_string(),
155 })?;
156 *guard = namespace_map;
157 }
158
159 {
161 let mut guard = self
162 .last_refresh
163 .write()
164 .map_err(|_| Error::OperationFailed {
165 operation: "build_index".to_string(),
166 cause: "Lock poisoned".to_string(),
167 })?;
168 *guard = Some(Instant::now());
169 }
170
171 Ok(())
172 }
173
174 pub fn list_topics(&self) -> Result<Vec<TopicInfo>> {
180 let topics_guard = self.topics.read().map_err(|_| Error::OperationFailed {
181 operation: "list_topics".to_string(),
182 cause: "Lock poisoned".to_string(),
183 })?;
184
185 let ns_guard = self
186 .topic_namespaces
187 .read()
188 .map_err(|_| Error::OperationFailed {
189 operation: "list_topics".to_string(),
190 cause: "Lock poisoned".to_string(),
191 })?;
192
193 let mut topics: Vec<TopicInfo> = topics_guard
194 .iter()
195 .map(|(name, ids)| TopicInfo {
196 name: name.clone(),
197 memory_count: ids.len(),
198 namespaces: ns_guard.get(name).cloned().unwrap_or_default(),
199 })
200 .collect();
201
202 topics.sort_by(|a, b| b.memory_count.cmp(&a.memory_count));
204
205 Ok(topics)
206 }
207
208 pub fn get_topic_memories(&self, topic: &str) -> Result<Vec<MemoryId>> {
214 let normalized = normalize_topic(topic);
215 let guard = self.topics.read().map_err(|_| Error::OperationFailed {
216 operation: "get_topic_memories".to_string(),
217 cause: "Lock poisoned".to_string(),
218 })?;
219
220 Ok(guard.get(&normalized).cloned().unwrap_or_default())
221 }
222
223 pub fn get_topic_info(&self, topic: &str) -> Result<Option<TopicInfo>> {
229 let normalized = normalize_topic(topic);
230
231 let topics_guard = self.topics.read().map_err(|_| Error::OperationFailed {
232 operation: "get_topic_info".to_string(),
233 cause: "Lock poisoned".to_string(),
234 })?;
235
236 let ns_guard = self
237 .topic_namespaces
238 .read()
239 .map_err(|_| Error::OperationFailed {
240 operation: "get_topic_info".to_string(),
241 cause: "Lock poisoned".to_string(),
242 })?;
243
244 match topics_guard.get(&normalized) {
245 Some(ids) => Ok(Some(TopicInfo {
246 name: normalized,
247 memory_count: ids.len(),
248 namespaces: ns_guard.get(topic).cloned().unwrap_or_default(),
249 })),
250 None => Ok(None),
251 }
252 }
253
254 pub fn add_memory(
262 &self,
263 memory_id: &MemoryId,
264 tags: &[String],
265 namespace: Namespace,
266 ) -> Result<()> {
267 let mut topics_guard = self.topics.write().map_err(|_| Error::OperationFailed {
268 operation: "add_memory".to_string(),
269 cause: "Lock poisoned".to_string(),
270 })?;
271
272 let mut ns_guard = self
273 .topic_namespaces
274 .write()
275 .map_err(|_| Error::OperationFailed {
276 operation: "add_memory".to_string(),
277 cause: "Lock poisoned".to_string(),
278 })?;
279
280 for tag in tags {
282 let topic = normalize_topic(tag);
283 add_topic_entry_guarded(
284 &topic,
285 memory_id,
286 namespace,
287 &mut topics_guard,
288 &mut ns_guard,
289 );
290 }
291
292 let ns_topic = normalize_topic(namespace.as_str());
294 add_topic_entry_guarded(
295 &ns_topic,
296 memory_id,
297 namespace,
298 &mut topics_guard,
299 &mut ns_guard,
300 );
301
302 Ok(())
303 }
304
305 #[must_use]
307 pub fn topic_count(&self) -> usize {
308 self.topics.read().map(|guard| guard.len()).unwrap_or(0)
309 }
310
311 #[must_use]
313 pub fn association_count(&self) -> usize {
314 self.topics
315 .read()
316 .map(|guard| guard.values().map(Vec::len).sum())
317 .unwrap_or(0)
318 }
319
320 pub fn remove_memory(&self, memory_id: &MemoryId) -> Result<()> {
329 let mut topics_guard = self.topics.write().map_err(|_| Error::OperationFailed {
330 operation: "remove_memory".to_string(),
331 cause: "Lock poisoned".to_string(),
332 })?;
333
334 for ids in topics_guard.values_mut() {
336 ids.retain(|id| id.as_str() != memory_id.as_str());
337 }
338
339 topics_guard.retain(|_, ids| !ids.is_empty());
341
342 Ok(())
347 }
348
349 pub fn update_memory(
358 &self,
359 memory_id: &MemoryId,
360 new_tags: &[String],
361 new_namespace: Namespace,
362 ) -> Result<()> {
363 self.remove_memory(memory_id)?;
364 self.add_memory(memory_id, new_tags, new_namespace)?;
365 Ok(())
366 }
367
368 pub fn add_content_keywords(
377 &self,
378 memory_id: &MemoryId,
379 content: &str,
380 namespace: Namespace,
381 ) -> Result<()> {
382 let mut topics_guard = self.topics.write().map_err(|_| Error::OperationFailed {
383 operation: "add_content_keywords".to_string(),
384 cause: "Lock poisoned".to_string(),
385 })?;
386
387 let mut ns_guard = self
388 .topic_namespaces
389 .write()
390 .map_err(|_| Error::OperationFailed {
391 operation: "add_content_keywords".to_string(),
392 cause: "Lock poisoned".to_string(),
393 })?;
394
395 let keywords = extract_content_keywords(content);
396 for keyword in keywords.into_iter().take(5) {
397 let topic = normalize_topic(&keyword);
398 if topic.len() >= 3 {
399 add_topic_entry_guarded(
400 &topic,
401 memory_id,
402 namespace,
403 &mut topics_guard,
404 &mut ns_guard,
405 );
406 }
407 }
408
409 Ok(())
410 }
411
412 pub fn clear(&self) -> Result<()> {
420 {
421 let mut guard = self.topics.write().map_err(|_| Error::OperationFailed {
422 operation: "clear".to_string(),
423 cause: "Lock poisoned".to_string(),
424 })?;
425 guard.clear();
426 }
427
428 {
429 let mut guard = self
430 .topic_namespaces
431 .write()
432 .map_err(|_| Error::OperationFailed {
433 operation: "clear".to_string(),
434 cause: "Lock poisoned".to_string(),
435 })?;
436 guard.clear();
437 }
438
439 {
440 let mut guard = self
441 .last_refresh
442 .write()
443 .map_err(|_| Error::OperationFailed {
444 operation: "clear".to_string(),
445 cause: "Lock poisoned".to_string(),
446 })?;
447 *guard = None;
448 }
449
450 Ok(())
451 }
452}
453
454impl Default for TopicIndexService {
455 fn default() -> Self {
456 Self::new()
457 }
458}
459
460fn add_topic_entry(
462 topic: &str,
463 memory_id: &MemoryId,
464 namespace: Namespace,
465 topics_map: &mut HashMap<String, Vec<MemoryId>>,
466 namespace_map: &mut HashMap<String, Vec<Namespace>>,
467) {
468 if topic.is_empty() {
469 return;
470 }
471 topics_map
472 .entry(topic.to_string())
473 .or_default()
474 .push(memory_id.clone());
475 insert_namespace_if_missing(namespace_map, topic, namespace);
476}
477
478fn add_topic_with_min_length(
480 topic: &str,
481 min_len: usize,
482 memory_id: &MemoryId,
483 namespace: Namespace,
484 topics_map: &mut HashMap<String, Vec<MemoryId>>,
485 namespace_map: &mut HashMap<String, Vec<Namespace>>,
486) {
487 if topic.len() >= min_len {
488 add_topic_entry(topic, memory_id, namespace, topics_map, namespace_map);
489 }
490}
491
492fn add_topic_entry_guarded(
494 topic: &str,
495 memory_id: &MemoryId,
496 namespace: Namespace,
497 topics_guard: &mut HashMap<String, Vec<MemoryId>>,
498 ns_guard: &mut HashMap<String, Vec<Namespace>>,
499) {
500 if topic.is_empty() {
501 return;
502 }
503 topics_guard
504 .entry(topic.to_string())
505 .or_default()
506 .push(memory_id.clone());
507 insert_namespace_if_missing(ns_guard, topic, namespace);
508}
509
510fn insert_namespace_if_missing(
512 map: &mut HashMap<String, Vec<Namespace>>,
513 topic: &str,
514 namespace: Namespace,
515) {
516 let ns_list = map.entry(topic.to_string()).or_default();
517 if !ns_list.contains(&namespace) {
518 ns_list.push(namespace);
519 }
520}
521
522fn normalize_topic(topic: &str) -> String {
524 topic
525 .trim()
526 .to_lowercase()
527 .replace(|c: char| !c.is_alphanumeric() && c != '-', "")
528}
529
530fn extract_content_keywords(content: &str) -> Vec<String> {
532 static STOP_WORDS: &[&str] = &[
534 "the", "a", "an", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had",
535 "do", "does", "did", "will", "would", "could", "should", "may", "might", "must", "shall",
536 "can", "need", "dare", "ought", "used", "to", "of", "in", "for", "on", "with", "at", "by",
537 "from", "as", "into", "through", "during", "before", "after", "above", "below", "between",
538 "under", "again", "further", "then", "once", "here", "there", "when", "where", "why",
539 "how", "all", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not",
540 "only", "own", "same", "so", "than", "too", "very", "just", "also", "now", "and", "but",
541 "or", "if", "because", "until", "while", "this", "that", "these", "those", "what", "which",
542 "who", "whom", "whose", "it", "its", "they", "them", "their", "we", "us", "our", "you",
543 "your", "i", "my", "me", "he", "him", "his", "she", "her",
544 ];
545
546 let words: Vec<String> = content
547 .split(|c: char| !c.is_alphanumeric())
548 .filter(|w| w.len() >= MIN_TOPIC_WORD_LENGTH && w.len() <= MAX_TOPIC_WORD_LENGTH)
549 .map(str::to_lowercase)
550 .filter(|w| !STOP_WORDS.contains(&w.as_str()))
551 .filter(|w| !w.chars().all(char::is_numeric))
552 .collect();
553
554 let mut freq: HashMap<String, usize> = HashMap::new();
556 for word in words {
557 *freq.entry(word).or_insert(0) += 1;
558 }
559
560 let mut sorted: Vec<_> = freq.into_iter().collect();
562 sorted.sort_by(|a, b| b.1.cmp(&a.1));
563 sorted.into_iter().map(|(w, _)| w).collect()
564}
565
566#[cfg(test)]
567mod tests {
568 use super::*;
569
570 #[test]
571 fn test_topic_index_creation() {
572 let service = TopicIndexService::new();
573 assert!(service.needs_refresh());
574 assert_eq!(service.topic_count(), 0);
575 }
576
577 #[test]
578 fn test_normalize_topic() {
579 assert_eq!(normalize_topic("Rust"), "rust");
580 assert_eq!(normalize_topic(" Python "), "python");
581 assert_eq!(normalize_topic("error-handling"), "error-handling");
582 assert_eq!(normalize_topic("test_case"), "testcase");
583 }
584
585 #[test]
586 fn test_extract_content_keywords() {
587 let content = "The Rust programming language is great for systems programming";
588 let keywords = extract_content_keywords(content);
589
590 assert!(keywords.contains(&"rust".to_string()));
591 assert!(keywords.contains(&"programming".to_string()));
592 assert!(!keywords.contains(&"the".to_string())); }
594
595 #[test]
596 fn test_add_memory() {
597 let service = TopicIndexService::new();
598 let id = MemoryId::new("test-123");
599 let tags = vec!["rust".to_string(), "error-handling".to_string()];
600
601 service.add_memory(&id, &tags, Namespace::Patterns).unwrap();
602
603 let rust_memories = service.get_topic_memories("rust").unwrap();
604 assert_eq!(rust_memories.len(), 1);
605 assert_eq!(rust_memories[0].as_str(), "test-123");
606
607 let patterns_memories = service.get_topic_memories("patterns").unwrap();
608 assert_eq!(patterns_memories.len(), 1);
609 }
610
611 #[test]
612 fn test_list_topics() {
613 let service = TopicIndexService::new();
614 let id1 = MemoryId::new("test-1");
615 let id2 = MemoryId::new("test-2");
616
617 service
618 .add_memory(&id1, &["rust".to_string()], Namespace::Decisions)
619 .unwrap();
620 service
621 .add_memory(
622 &id2,
623 &["rust".to_string(), "async".to_string()],
624 Namespace::Patterns,
625 )
626 .unwrap();
627
628 let topics = service.list_topics().unwrap();
629
630 let rust_topic = topics.iter().find(|t| t.name == "rust").unwrap();
632 assert_eq!(rust_topic.memory_count, 2);
633
634 let async_topic = topics.iter().find(|t| t.name == "async").unwrap();
635 assert_eq!(async_topic.memory_count, 1);
636 }
637
638 #[test]
639 fn test_get_topic_info() {
640 let service = TopicIndexService::new();
641 let id = MemoryId::new("test-1");
642
643 service
644 .add_memory(&id, &["authentication".to_string()], Namespace::Decisions)
645 .unwrap();
646
647 let info = service.get_topic_info("authentication").unwrap();
648 assert!(info.is_some());
649 let info = info.unwrap();
650 assert_eq!(info.name, "authentication");
651 assert_eq!(info.memory_count, 1);
652 assert!(info.namespaces.contains(&Namespace::Decisions));
653 }
654
655 #[test]
656 fn test_get_topic_info_not_found() {
657 let service = TopicIndexService::new();
658 let info = service.get_topic_info("nonexistent").unwrap();
659 assert!(info.is_none());
660 }
661
662 #[test]
663 fn test_topic_info_serialization() {
664 let info = TopicInfo {
665 name: "rust".to_string(),
666 memory_count: 5,
667 namespaces: vec![Namespace::Decisions, Namespace::Patterns],
668 };
669
670 let json = serde_json::to_string(&info).unwrap();
671 assert!(json.contains("rust"));
672 assert!(json.contains('5'));
673 }
674
675 #[test]
676 fn test_refresh_interval() {
677 let service = TopicIndexService::new().with_refresh_interval(Duration::from_secs(60));
678 assert!(service.needs_refresh());
679
680 {
682 let mut guard = service.last_refresh.write().unwrap();
683 *guard = Some(Instant::now());
684 }
685 assert!(!service.needs_refresh());
686 }
687
688 #[test]
691 fn test_remove_memory() {
692 let service = TopicIndexService::new();
693 let id1 = MemoryId::new("test-1");
694 let id2 = MemoryId::new("test-2");
695
696 service
697 .add_memory(&id1, &["rust".to_string()], Namespace::Decisions)
698 .unwrap();
699 service
700 .add_memory(&id2, &["rust".to_string()], Namespace::Patterns)
701 .unwrap();
702
703 assert_eq!(service.get_topic_memories("rust").unwrap().len(), 2);
704
705 service.remove_memory(&id1).unwrap();
707
708 let rust_memories = service.get_topic_memories("rust").unwrap();
710 assert_eq!(rust_memories.len(), 1);
711 assert_eq!(rust_memories[0].as_str(), "test-2");
712 }
713
714 #[test]
715 fn test_remove_memory_cleans_empty_topics() {
716 let service = TopicIndexService::new();
717 let id = MemoryId::new("test-1");
718
719 service
720 .add_memory(&id, &["unique-topic".to_string()], Namespace::Decisions)
721 .unwrap();
722
723 assert_eq!(service.get_topic_memories("unique-topic").unwrap().len(), 1);
724
725 service.remove_memory(&id).unwrap();
727
728 assert!(
730 service
731 .get_topic_memories("unique-topic")
732 .unwrap()
733 .is_empty()
734 );
735
736 let topics = service.list_topics().unwrap();
738 assert!(!topics.iter().any(|t| t.name == "unique-topic"));
739 }
740
741 #[test]
742 fn test_remove_nonexistent_memory() {
743 let service = TopicIndexService::new();
744 let id = MemoryId::new("nonexistent");
745
746 assert!(service.remove_memory(&id).is_ok());
748 }
749
750 #[test]
751 fn test_update_memory() {
752 let service = TopicIndexService::new();
753 let id = MemoryId::new("test-1");
754
755 service
757 .add_memory(&id, &["old-tag".to_string()], Namespace::Decisions)
758 .unwrap();
759 assert_eq!(service.get_topic_memories("old-tag").unwrap().len(), 1);
760
761 service
763 .update_memory(&id, &["new-tag".to_string()], Namespace::Patterns)
764 .unwrap();
765
766 assert!(service.get_topic_memories("old-tag").unwrap().is_empty());
768
769 assert_eq!(service.get_topic_memories("new-tag").unwrap().len(), 1);
771 assert_eq!(service.get_topic_memories("patterns").unwrap().len(), 1);
772 }
773
774 #[test]
775 fn test_add_content_keywords() {
776 let service = TopicIndexService::new();
777 let id = MemoryId::new("test-1");
778
779 service.add_memory(&id, &[], Namespace::Learnings).unwrap();
781
782 let content = "Rust rust RUST programming systems";
785 service
786 .add_content_keywords(&id, content, Namespace::Learnings)
787 .unwrap();
788
789 let rust_memories = service.get_topic_memories("rust").unwrap();
791 assert!(!rust_memories.is_empty());
792 assert!(rust_memories.iter().any(|m| m.as_str() == "test-1"));
793 }
794
795 #[test]
796 fn test_add_content_keywords_filters_short_words() {
797 let service = TopicIndexService::new();
798 let id = MemoryId::new("test-1");
799
800 let content = "A is to be or not to be";
801 service
802 .add_content_keywords(&id, content, Namespace::Learnings)
803 .unwrap();
804
805 assert!(service.get_topic_memories("a").unwrap().is_empty());
807 assert!(service.get_topic_memories("is").unwrap().is_empty());
808 assert!(service.get_topic_memories("to").unwrap().is_empty());
809 }
810
811 #[test]
812 fn test_clear() {
813 let service = TopicIndexService::new();
814 let id1 = MemoryId::new("test-1");
815 let id2 = MemoryId::new("test-2");
816
817 service
818 .add_memory(&id1, &["rust".to_string()], Namespace::Decisions)
819 .unwrap();
820 service
821 .add_memory(&id2, &["python".to_string()], Namespace::Patterns)
822 .unwrap();
823
824 {
826 let mut guard = service.last_refresh.write().unwrap();
827 *guard = Some(Instant::now());
828 }
829
830 assert_eq!(service.topic_count(), 4); assert!(!service.needs_refresh());
832
833 service.clear().unwrap();
835
836 assert_eq!(service.topic_count(), 0);
837 assert_eq!(service.association_count(), 0);
838 assert!(service.needs_refresh()); }
840
841 #[test]
842 fn test_incremental_vs_full_rebuild_equivalence() {
843 let service = TopicIndexService::new();
845 let id1 = MemoryId::new("test-1");
846 let id2 = MemoryId::new("test-2");
847 let id3 = MemoryId::new("test-3");
848
849 service
851 .add_memory(
852 &id1,
853 &["rust".to_string(), "async".to_string()],
854 Namespace::Decisions,
855 )
856 .unwrap();
857 service
858 .add_memory(&id2, &["rust".to_string()], Namespace::Patterns)
859 .unwrap();
860 service
861 .add_memory(&id3, &["python".to_string()], Namespace::Learnings)
862 .unwrap();
863
864 service.remove_memory(&id2).unwrap();
866
867 let rust_memories = service.get_topic_memories("rust").unwrap();
869 assert_eq!(rust_memories.len(), 1);
870 assert_eq!(rust_memories[0].as_str(), "test-1");
871
872 let async_memories = service.get_topic_memories("async").unwrap();
873 assert_eq!(async_memories.len(), 1);
874
875 let python_memories = service.get_topic_memories("python").unwrap();
876 assert_eq!(python_memories.len(), 1);
877 }
878}