subcog/services/deduplication/
exact_match.rs1use crate::Result;
7use crate::models::{MemoryId, Namespace, SearchFilter};
8use crate::services::recall::RecallService;
9use std::sync::Arc;
10use std::time::Instant;
11use tracing::instrument;
12
13use super::hasher::ContentHasher;
14
15pub struct ExactMatchChecker {
40 recall: Arc<RecallService>,
42}
43
44impl ExactMatchChecker {
45 #[must_use]
51 pub const fn new(recall: Arc<RecallService>) -> Self {
52 Self { recall }
53 }
54
55 #[instrument(
81 skip(self, content),
82 fields(
83 operation = "exact_match_check",
84 namespace = %namespace.as_str(),
85 content_length = content.len()
86 )
87 )]
88 #[allow(clippy::cast_precision_loss)] #[allow(clippy::option_if_let_else)] pub fn check(
91 &self,
92 content: &str,
93 namespace: Namespace,
94 domain: &str,
95 ) -> Result<Option<(MemoryId, String)>> {
96 let start = Instant::now();
97
98 let hash = ContentHasher::hash(content);
100 let hash_tag = ContentHasher::hash_to_tag(&hash);
101
102 tracing::debug!(hash_tag = %hash_tag, "Searching for exact match");
103
104 let filter = SearchFilter::new()
106 .with_namespace(namespace)
107 .with_tag(&hash_tag);
108
109 let result = self.recall.list_all(&filter, 1)?;
112
113 let duration_ms = start.elapsed().as_millis();
115 metrics::histogram!(
116 "deduplication_check_duration_ms",
117 "checker" => "exact_match",
118 "found" => if result.memories.is_empty() { "false" } else { "true" }
119 )
120 .record(duration_ms as f64);
121
122 if let Some(hit) = result.memories.first() {
123 let memory_id = hit.memory.id.clone();
124 let urn = format!("subcog://{}/{}/{}", domain, namespace.as_str(), memory_id);
125
126 tracing::debug!(
127 memory_id = %memory_id,
128 urn = %urn,
129 duration_ms = %duration_ms,
130 "Exact match found"
131 );
132
133 Ok(Some((memory_id, urn)))
134 } else {
135 tracing::debug!(duration_ms = %duration_ms, "No exact match found");
136 Ok(None)
137 }
138 }
139
140 #[must_use]
153 pub fn content_to_tag(content: &str) -> String {
154 let hash = ContentHasher::hash(content);
155 ContentHasher::hash_to_tag(&hash)
156 }
157}
158
159#[cfg(test)]
160mod tests {
161 use super::*;
162 use crate::models::{Domain, Memory, MemoryStatus};
163 use crate::storage::index::SqliteBackend;
164 use crate::storage::traits::IndexBackend;
165
166 fn create_test_memory(
167 id: &str,
168 content: &str,
169 namespace: Namespace,
170 tags: Vec<String>,
171 ) -> Memory {
172 Memory {
173 id: MemoryId::new(id),
174 content: content.to_string(),
175 namespace,
176 domain: Domain::new(),
177 project_id: None,
178 branch: None,
179 file_path: None,
180 status: MemoryStatus::Active,
181 created_at: 1_234_567_890,
182 updated_at: 1_234_567_890,
183 tombstoned_at: None,
184 expires_at: None,
185 embedding: None,
186 tags,
187 #[cfg(feature = "group-scope")]
188 group_id: None,
189 source: None,
190 is_summary: false,
191 source_memory_ids: None,
192 consolidation_timestamp: None,
193 }
194 }
195
196 #[test]
197 fn test_content_to_tag() {
198 let content = "Use PostgreSQL for storage";
199 let tag = ExactMatchChecker::content_to_tag(content);
200
201 assert!(tag.starts_with("hash:sha256:"));
202 assert_eq!(tag.len(), "hash:sha256:".len() + 16);
203 }
204
205 #[test]
206 fn test_content_to_tag_normalization() {
207 let tag1 = ExactMatchChecker::content_to_tag("Use PostgreSQL for storage");
209 let tag2 = ExactMatchChecker::content_to_tag(" Use PostgreSQL for storage ");
210
211 assert_eq!(tag1, tag2);
212 }
213
214 #[test]
215 fn test_content_to_tag_case_insensitive() {
216 let tag1 = ExactMatchChecker::content_to_tag("Use PostgreSQL");
218 let tag2 = ExactMatchChecker::content_to_tag("use postgresql");
219
220 assert_eq!(tag1, tag2);
221 }
222
223 #[test]
224 fn test_check_no_match() {
225 let index = SqliteBackend::in_memory().unwrap();
227 let recall = Arc::new(RecallService::with_index(index));
228 let checker = ExactMatchChecker::new(recall);
229
230 let result = checker
232 .check("Non-existent content", Namespace::Decisions, "project")
233 .unwrap();
234
235 assert!(result.is_none());
236 }
237
238 #[test]
239 fn test_check_with_match() {
240 let index = SqliteBackend::in_memory().unwrap();
242
243 let content = "Use PostgreSQL for storage";
245 let hash_tag = ExactMatchChecker::content_to_tag(content);
246 let memory = create_test_memory(
247 "test-memory-123",
248 content,
249 Namespace::Decisions,
250 vec![hash_tag],
251 );
252
253 index.index(&memory).unwrap();
254
255 let recall = Arc::new(RecallService::with_index(index));
256 let checker = ExactMatchChecker::new(recall);
257
258 let result = checker
260 .check(content, Namespace::Decisions, "project")
261 .unwrap();
262
263 assert!(result.is_some());
264 let (id, urn) = result.unwrap();
265 assert_eq!(id.as_str(), "test-memory-123");
266 assert_eq!(urn, "subcog://project/decisions/test-memory-123");
267 }
268
269 #[test]
270 fn test_check_different_namespace() {
271 let index = SqliteBackend::in_memory().unwrap();
273
274 let content = "Use PostgreSQL for storage";
276 let hash_tag = ExactMatchChecker::content_to_tag(content);
277 let memory = create_test_memory(
278 "test-memory-123",
279 content,
280 Namespace::Decisions,
281 vec![hash_tag],
282 );
283
284 index.index(&memory).unwrap();
285
286 let recall = Arc::new(RecallService::with_index(index));
287 let checker = ExactMatchChecker::new(recall);
288
289 let result = checker
291 .check(content, Namespace::Patterns, "project")
292 .unwrap();
293
294 assert!(result.is_none());
295 }
296
297 #[test]
298 fn test_check_normalized_content_matches() {
299 let index = SqliteBackend::in_memory().unwrap();
301
302 let original_content = "Use PostgreSQL";
304 let hash_tag = ExactMatchChecker::content_to_tag(original_content);
305 let memory = create_test_memory(
306 "test-memory-456",
307 original_content,
308 Namespace::Decisions,
309 vec![hash_tag],
310 );
311
312 index.index(&memory).unwrap();
313
314 let recall = Arc::new(RecallService::with_index(index));
315 let checker = ExactMatchChecker::new(recall);
316
317 let result = checker
319 .check(" USE postgresql ", Namespace::Decisions, "project")
320 .unwrap();
321
322 assert!(result.is_some());
323 let (id, _) = result.unwrap();
324 assert_eq!(id.as_str(), "test-memory-456");
325 }
326}