1use crate::llm::{LlmProvider, OperationMode, build_system_prompt};
21use crate::models::Domain;
22use crate::models::graph::{Entity, EntityType, Relationship, RelationshipType};
23use crate::{Error, Result};
24use serde::{Deserialize, Serialize};
25use std::collections::HashMap;
26use std::sync::Arc;
27
28static TECH_PATTERNS: &[&str] = &[
32 "Rust",
34 "Python",
35 "Java",
36 "JavaScript",
37 "TypeScript",
38 "Go",
39 "C++",
40 "C#",
41 "Ruby",
42 "PHP",
43 "Swift",
44 "Kotlin",
45 "Scala",
46 "Elixir",
47 "Haskell",
48 "Clojure",
49 "F#",
50 "Zig",
51 "PostgreSQL",
53 "MySQL",
54 "SQLite",
55 "Redis",
56 "MongoDB",
57 "Cassandra",
58 "DynamoDB",
59 "CockroachDB",
60 "ClickHouse",
61 "Elasticsearch",
62 "Neo4j",
63 "Firestore",
64 "React",
66 "Vue",
67 "Angular",
68 "Svelte",
69 "Next.js",
70 "Nuxt",
71 "Express",
72 "Django",
73 "Rails",
74 "Laravel",
75 "Spring",
76 "Flask",
77 "FastAPI",
78 "Actix",
79 "AWS",
81 "Azure",
82 "GCP",
83 "Cloudflare",
84 "Vercel",
85 "Netlify",
86 "Heroku",
87 "DigitalOcean",
88 "Linode",
89 "Docker",
91 "Kubernetes",
92 "k8s",
93 "Podman",
94 "Nomad",
95 "ECS",
96 "EKS",
97 "GKE",
98 "Terraform",
100 "Ansible",
101 "Prometheus",
102 "Grafana",
103 "Datadog",
104 "Jaeger",
105 "Kafka",
107 "RabbitMQ",
108 "NATS",
109 "Pulsar",
110 "SQS",
111 "Pub/Sub",
112 "Webpack",
114 "Vite",
115 "esbuild",
116 "Rollup",
117 "Cargo",
118 "npm",
119 "yarn",
120 "pnpm",
121 "Maven",
122 "Gradle",
123 "Node.js",
125 "Deno",
126 "Bun",
127 "WASM",
128 "REST",
130 "GraphQL",
131 "gRPC",
132 "WebSocket",
133 "MQTT",
134 "OpenAPI",
135];
136
137#[derive(Debug, Clone, Default, Serialize, Deserialize)]
139pub struct ExtractionResult {
140 pub entities: Vec<ExtractedEntity>,
142 pub relationships: Vec<ExtractedRelationship>,
144 pub used_fallback: bool,
146 pub warnings: Vec<String>,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
152pub struct ExtractedEntity {
153 pub name: String,
155 #[serde(rename = "type")]
157 pub entity_type: String,
158 #[serde(default = "default_confidence")]
160 pub confidence: f32,
161 #[serde(default)]
163 pub aliases: Vec<String>,
164 #[serde(default)]
166 pub description: Option<String>,
167}
168
169const fn default_confidence() -> f32 {
170 0.8
171}
172
173#[derive(Debug, Clone, Serialize, Deserialize)]
175pub struct ExtractedRelationship {
176 pub from: String,
178 pub to: String,
180 #[serde(rename = "type")]
182 pub relationship_type: String,
183 #[serde(default = "default_confidence")]
185 pub confidence: f32,
186 #[serde(default)]
188 pub evidence: Option<String>,
189}
190
191#[derive(Debug, Clone, Default, Serialize, Deserialize)]
193pub struct InferenceResult {
194 pub relationships: Vec<InferredRelationship>,
196 pub used_fallback: bool,
198 pub warnings: Vec<String>,
200}
201
202#[derive(Debug, Clone, Serialize, Deserialize)]
204pub struct InferredRelationship {
205 pub from: String,
207 pub to: String,
209 #[serde(rename = "type")]
211 pub relationship_type: String,
212 #[serde(default = "default_confidence")]
214 pub confidence: f32,
215 #[serde(default)]
217 pub reasoning: Option<String>,
218}
219
220#[derive(Debug, Clone, Deserialize)]
222struct LlmExtractionResponse {
223 #[serde(default)]
224 entities: Vec<ExtractedEntity>,
225 #[serde(default)]
226 relationships: Vec<ExtractedRelationship>,
227}
228
229#[derive(Debug, Clone, Deserialize)]
231struct LlmInferenceResponse {
232 #[serde(default)]
233 relationships: Vec<InferredRelationship>,
234}
235
236pub struct EntityExtractorService {
241 llm: Option<Arc<dyn LlmProvider>>,
243 domain: Domain,
245 min_confidence: f32,
247}
248
249impl EntityExtractorService {
250 #[must_use]
252 pub fn new(llm: Box<dyn LlmProvider>, domain: Domain) -> Self {
253 Self {
254 llm: Some(Arc::from(llm)),
255 domain,
256 min_confidence: 0.5,
257 }
258 }
259
260 #[must_use]
262 pub const fn without_llm(domain: Domain) -> Self {
263 Self {
264 llm: None,
265 domain,
266 min_confidence: 0.5,
267 }
268 }
269
270 #[must_use]
272 pub const fn with_shared_llm(llm: Arc<dyn LlmProvider>, domain: Domain) -> Self {
273 Self {
274 llm: Some(llm),
275 domain,
276 min_confidence: 0.5,
277 }
278 }
279
280 #[must_use]
282 pub const fn with_min_confidence(mut self, threshold: f32) -> Self {
283 self.min_confidence = threshold;
284 self
285 }
286
287 pub fn extract(&self, text: &str) -> Result<ExtractionResult> {
301 if text.trim().is_empty() {
302 return Ok(ExtractionResult::default());
303 }
304
305 match &self.llm {
306 Some(llm) => self.extract_with_llm(llm, text),
307 None => Ok(self.extract_fallback(text)),
308 }
309 }
310
311 fn extract_with_llm(&self, llm: &Arc<dyn LlmProvider>, text: &str) -> Result<ExtractionResult> {
313 let system = build_system_prompt(OperationMode::EntityExtraction, None);
314 let user = format!("Extract entities and relationships from this text:\n\n{text}");
315
316 let response = match llm.complete_with_system(&system, &user) {
317 Ok(r) => r,
318 Err(e) => {
319 tracing::warn!(error = %e, "LLM extraction failed, using fallback");
320 return Ok(self.extract_fallback(text));
321 },
322 };
323
324 let parsed = self.parse_llm_response(&response)?;
326
327 let entities: Vec<_> = parsed
329 .entities
330 .into_iter()
331 .filter(|e| e.confidence >= self.min_confidence)
332 .collect();
333
334 let relationships: Vec<_> = parsed
335 .relationships
336 .into_iter()
337 .filter(|r| r.confidence >= self.min_confidence)
338 .collect();
339
340 Ok(ExtractionResult {
341 entities,
342 relationships,
343 used_fallback: false,
344 warnings: Vec::new(),
345 })
346 }
347
348 fn parse_llm_response(&self, response: &str) -> Result<LlmExtractionResponse> {
350 let json_str = self.extract_json(response);
352
353 serde_json::from_str(&json_str).map_err(|e| {
354 tracing::warn!(error = %e, response = %response, "Failed to parse LLM response");
355 Error::OperationFailed {
356 operation: "parse_entity_extraction".to_string(),
357 cause: format!("Invalid JSON response: {e}"),
358 }
359 })
360 }
361
362 fn extract_json(&self, response: &str) -> String {
364 let trimmed = response.trim();
365
366 if let Some(json) = self.extract_json_from_markdown(trimmed) {
368 return json;
369 }
370
371 if let Some(json) = self.extract_raw_json(trimmed) {
373 return json;
374 }
375
376 trimmed.to_string()
378 }
379
380 fn extract_json_from_markdown(&self, text: &str) -> Option<String> {
382 let start = text.find("```json")?;
383 let end_offset = text[start..]
384 .find("```\n")
385 .or_else(|| text[start..].rfind("```"))?;
386
387 let json_start = start + 7; let json_end = start + end_offset;
389
390 if json_start < json_end {
391 Some(text[json_start..json_end].trim().to_string())
392 } else {
393 None
394 }
395 }
396
397 fn extract_raw_json(&self, text: &str) -> Option<String> {
399 let start = text.find('{')?;
400 let end = text.rfind('}')?;
401
402 if start < end {
403 Some(text[start..=end].to_string())
404 } else {
405 None
406 }
407 }
408
409 fn extract_fallback(&self, text: &str) -> ExtractionResult {
413 let mut entities = Vec::new();
414 let mut warnings = vec!["LLM unavailable, using pattern-based fallback".to_string()];
415
416 for pattern in TECH_PATTERNS {
417 if text.contains(pattern) {
418 entities.push(ExtractedEntity {
419 name: (*pattern).to_string(),
420 entity_type: "Technology".to_string(),
421 confidence: 0.7,
422 aliases: Vec::new(),
423 description: None,
424 });
425 }
426 }
427
428 if entities.is_empty() {
429 warnings.push("No entities detected with fallback patterns".to_string());
430 }
431
432 ExtractionResult {
433 entities,
434 relationships: Vec::new(),
435 used_fallback: true,
436 warnings,
437 }
438 }
439
440 #[must_use]
450 pub fn to_graph_entities(&self, extracted: &ExtractionResult) -> Vec<Entity> {
451 extracted
452 .entities
453 .iter()
454 .map(|e| {
455 let entity_type = parse_entity_type(&e.entity_type);
456 let mut entity = Entity::new(entity_type, &e.name, self.domain.clone());
457 entity.confidence = e.confidence;
458 entity.aliases.clone_from(&e.aliases);
459 if let Some(desc) = &e.description {
460 entity
461 .properties
462 .insert("description".to_string(), desc.clone());
463 }
464 entity
465 })
466 .collect()
467 }
468
469 #[must_use]
482 pub fn to_graph_relationships(
483 &self,
484 extracted: &ExtractionResult,
485 entity_map: &std::collections::HashMap<String, Entity>,
486 ) -> Vec<Relationship> {
487 extracted
488 .relationships
489 .iter()
490 .filter_map(|r| {
491 let from_entity = entity_map.get(&r.from)?;
492 let to_entity = entity_map.get(&r.to)?;
493 let rel_type = parse_relationship_type(&r.relationship_type);
494
495 let mut rel =
496 Relationship::new(from_entity.id.clone(), to_entity.id.clone(), rel_type);
497 rel.confidence = r.confidence;
498 if let Some(evidence) = &r.evidence {
499 rel.properties
500 .insert("evidence".to_string(), evidence.clone());
501 }
502 Some(rel)
503 })
504 .collect()
505 }
506
507 pub fn infer_relationships(&self, entities: &[Entity]) -> Result<InferenceResult> {
524 if entities.is_empty() {
525 return Ok(InferenceResult::default());
526 }
527
528 match &self.llm {
529 Some(llm) => self.infer_with_llm(llm, entities),
530 None => Ok(self.infer_fallback(entities)),
531 }
532 }
533
534 fn infer_with_llm(
536 &self,
537 llm: &Arc<dyn LlmProvider>,
538 entities: &[Entity],
539 ) -> Result<InferenceResult> {
540 let system = build_system_prompt(OperationMode::RelationshipInference, None);
541 let user = self.format_entities_for_inference(entities);
542
543 let response = match llm.complete_with_system(&system, &user) {
544 Ok(r) => r,
545 Err(e) => {
546 tracing::warn!(error = %e, "LLM inference failed, using fallback");
547 return Ok(self.infer_fallback(entities));
548 },
549 };
550
551 let parsed = self.parse_inference_response(&response)?;
553
554 let relationships: Vec<_> = parsed
556 .relationships
557 .into_iter()
558 .filter(|r| r.confidence >= self.min_confidence)
559 .collect();
560
561 Ok(InferenceResult {
562 relationships,
563 used_fallback: false,
564 warnings: Vec::new(),
565 })
566 }
567
568 fn format_entities_for_inference(&self, entities: &[Entity]) -> String {
570 use std::fmt::Write;
571
572 let mut output = String::from("Analyze these entities for potential relationships:\n\n");
573
574 for entity in entities {
575 let _ = writeln!(
576 output,
577 "- {} (type: {:?}, id: {})",
578 entity.name, entity.entity_type, entity.id
579 );
580 if !entity.aliases.is_empty() {
581 let _ = writeln!(output, " Aliases: {}", entity.aliases.join(", "));
582 }
583 }
584
585 output
586 }
587
588 fn parse_inference_response(&self, response: &str) -> Result<LlmInferenceResponse> {
590 let json_str = self.extract_json(response);
591
592 serde_json::from_str(&json_str).map_err(|e| {
593 tracing::warn!(error = %e, response = %response, "Failed to parse inference response");
594 Error::OperationFailed {
595 operation: "parse_relationship_inference".to_string(),
596 cause: format!("Invalid JSON response: {e}"),
597 }
598 })
599 }
600
601 fn infer_fallback(&self, entities: &[Entity]) -> InferenceResult {
605 let mut relationships = Vec::new();
606 let warnings = vec!["LLM unavailable, using heuristic-based fallback".to_string()];
607
608 let entity_map: HashMap<&str, &Entity> =
610 entities.iter().map(|e| (e.name.as_str(), e)).collect();
611
612 let tech_deps: &[(&str, &str)] = &[
614 ("Rust", "cargo"),
615 ("Python", "pip"),
616 ("Node.js", "npm"),
617 ("Java", "Maven"),
618 ("Ruby", "bundler"),
619 ("Go", "go modules"),
620 ("PostgreSQL", "SQL"),
621 ("MySQL", "SQL"),
622 ("SQLite", "SQL"),
623 ("Docker", "containers"),
624 ("Kubernetes", "Docker"),
625 ];
626
627 for (from, to) in tech_deps {
628 if entity_map.contains_key(*from) && entity_map.contains_key(*to) {
629 relationships.push(InferredRelationship {
630 from: (*from).to_string(),
631 to: (*to).to_string(),
632 relationship_type: "Uses".to_string(),
633 confidence: 0.7,
634 reasoning: Some(format!("{from} commonly uses {to}")),
635 });
636 }
637 }
638
639 InferenceResult {
640 relationships,
641 used_fallback: true,
642 warnings,
643 }
644 }
645
646 #[must_use]
657 pub fn inferred_to_graph_relationships(
658 &self,
659 inferred: &InferenceResult,
660 entity_map: &HashMap<String, Entity>,
661 ) -> Vec<Relationship> {
662 inferred
663 .relationships
664 .iter()
665 .filter_map(|r| {
666 let from_entity = entity_map.get(&r.from)?;
667 let to_entity = entity_map.get(&r.to)?;
668 let rel_type = parse_relationship_type(&r.relationship_type);
669
670 let mut rel =
671 Relationship::new(from_entity.id.clone(), to_entity.id.clone(), rel_type);
672 rel.confidence = r.confidence;
673 if let Some(reasoning) = &r.reasoning {
674 rel.properties
675 .insert("reasoning".to_string(), reasoning.clone());
676 }
677 Some(rel)
678 })
679 .collect()
680 }
681}
682
683fn parse_entity_type(s: &str) -> EntityType {
685 match s.to_lowercase().as_str() {
686 "person" => EntityType::Person,
687 "organization" | "org" | "company" | "team" => EntityType::Organization,
688 "technology" | "tech" | "framework" | "tool" | "language" => EntityType::Technology,
689 "file" | "source" | "config" => EntityType::File,
690 _ => EntityType::Concept,
692 }
693}
694
695fn parse_relationship_type(s: &str) -> RelationshipType {
697 match s.to_lowercase().as_str() {
698 "worksat" | "works_at" | "employedby" => RelationshipType::WorksAt,
699 "created" | "authored" | "wrote" => RelationshipType::Created,
700 "uses" | "utilizes" | "employs" => RelationshipType::Uses,
701 "implements" | "realizes" => RelationshipType::Implements,
702 "partof" | "part_of" | "belongsto" => RelationshipType::PartOf,
703 "mentionedin" | "mentioned_in" => RelationshipType::MentionedIn,
704 "supersedes" | "replaces" => RelationshipType::Supersedes,
705 "conflictswith" | "conflicts_with" | "contradicts" => RelationshipType::ConflictsWith,
706 _ => RelationshipType::RelatesTo, }
708}
709
710#[cfg(test)]
711mod tests {
712 use super::*;
713
714 #[test]
715 fn test_extraction_result_default() {
716 let result = ExtractionResult::default();
717 assert!(result.entities.is_empty());
718 assert!(result.relationships.is_empty());
719 assert!(!result.used_fallback);
720 }
721
722 #[test]
723 fn test_parse_entity_type() {
724 assert_eq!(parse_entity_type("Person"), EntityType::Person);
725 assert_eq!(parse_entity_type("PERSON"), EntityType::Person);
726 assert_eq!(parse_entity_type("Organization"), EntityType::Organization);
727 assert_eq!(parse_entity_type("company"), EntityType::Organization);
728 assert_eq!(parse_entity_type("Technology"), EntityType::Technology);
729 assert_eq!(parse_entity_type("framework"), EntityType::Technology);
730 assert_eq!(parse_entity_type("Concept"), EntityType::Concept);
731 assert_eq!(parse_entity_type("File"), EntityType::File);
732 assert_eq!(parse_entity_type("unknown"), EntityType::Concept);
733 }
734
735 #[test]
736 fn test_parse_relationship_type() {
737 assert_eq!(
738 parse_relationship_type("WorksAt"),
739 RelationshipType::WorksAt
740 );
741 assert_eq!(
742 parse_relationship_type("works_at"),
743 RelationshipType::WorksAt
744 );
745 assert_eq!(
746 parse_relationship_type("Created"),
747 RelationshipType::Created
748 );
749 assert_eq!(parse_relationship_type("Uses"), RelationshipType::Uses);
750 assert_eq!(
751 parse_relationship_type("Implements"),
752 RelationshipType::Implements
753 );
754 assert_eq!(parse_relationship_type("PartOf"), RelationshipType::PartOf);
755 assert_eq!(
756 parse_relationship_type("Supersedes"),
757 RelationshipType::Supersedes
758 );
759 assert_eq!(
760 parse_relationship_type("ConflictsWith"),
761 RelationshipType::ConflictsWith
762 );
763 assert_eq!(
764 parse_relationship_type("unknown"),
765 RelationshipType::RelatesTo
766 );
767 }
768
769 #[test]
770 fn test_extract_json_raw() {
771 let service = EntityExtractorService::without_llm(Domain::for_user());
772 let json = r#"{"entities": [], "relationships": []}"#;
773 assert_eq!(service.extract_json(json), json);
774 }
775
776 #[test]
777 fn test_extract_json_from_markdown() {
778 let service = EntityExtractorService::without_llm(Domain::for_user());
779 let response = r#"Here's the extraction:
780
781```json
782{"entities": [{"name": "Alice", "type": "Person"}], "relationships": []}
783```
784
785Done!"#;
786 let extracted = service.extract_json(response);
787 assert!(extracted.contains("Alice"));
788 assert!(extracted.starts_with('{'));
789 }
790
791 #[test]
792 fn test_fallback_extraction() {
793 let service = EntityExtractorService::without_llm(Domain::for_user());
794 let result = service
795 .extract("We use Rust and PostgreSQL for the backend")
796 .unwrap();
797
798 assert!(result.used_fallback);
799 assert!(!result.entities.is_empty());
800
801 let names: Vec<_> = result.entities.iter().map(|e| e.name.as_str()).collect();
802 assert!(names.contains(&"Rust"));
803 assert!(names.contains(&"PostgreSQL"));
804 }
805
806 #[test]
807 fn test_fallback_no_match() {
808 let service = EntityExtractorService::without_llm(Domain::for_user());
809 let result = service.extract("Hello world").unwrap();
810
811 assert!(result.used_fallback);
812 assert!(result.entities.is_empty());
813 assert!(result.warnings.len() >= 2);
814 }
815
816 #[test]
817 fn test_empty_input() {
818 let service = EntityExtractorService::without_llm(Domain::for_user());
819 let result = service.extract("").unwrap();
820
821 assert!(result.entities.is_empty());
822 assert!(!result.used_fallback);
823 }
824
825 #[test]
826 fn test_to_graph_entities() {
827 let service = EntityExtractorService::without_llm(Domain::for_user());
828 let result = ExtractionResult {
829 entities: vec![ExtractedEntity {
830 name: "Alice".to_string(),
831 entity_type: "Person".to_string(),
832 confidence: 0.9,
833 aliases: vec!["A".to_string()],
834 description: Some("A person".to_string()),
835 }],
836 relationships: Vec::new(),
837 used_fallback: false,
838 warnings: Vec::new(),
839 };
840
841 let entities = service.to_graph_entities(&result);
842 assert_eq!(entities.len(), 1);
843 assert_eq!(entities[0].name, "Alice");
844 assert_eq!(entities[0].entity_type, EntityType::Person);
845 assert!((entities[0].confidence - 0.9).abs() < f32::EPSILON);
846 }
847
848 #[test]
849 fn test_min_confidence_threshold() {
850 let service =
851 EntityExtractorService::without_llm(Domain::for_user()).with_min_confidence(0.8);
852 assert!((service.min_confidence - 0.8).abs() < f32::EPSILON);
853 }
854
855 #[test]
856 fn test_inference_result_default() {
857 let result = InferenceResult::default();
858 assert!(result.relationships.is_empty());
859 assert!(!result.used_fallback);
860 assert!(result.warnings.is_empty());
861 }
862
863 #[test]
864 fn test_infer_relationships_empty() {
865 let service = EntityExtractorService::without_llm(Domain::for_user());
866 let result = service.infer_relationships(&[]).unwrap();
867
868 assert!(result.relationships.is_empty());
869 assert!(!result.used_fallback);
870 }
871
872 #[test]
873 fn test_infer_fallback_with_matching_entities() {
874 let service = EntityExtractorService::without_llm(Domain::for_user());
875
876 let entities = vec![
877 Entity::new(EntityType::Technology, "Rust", Domain::for_user()),
878 Entity::new(EntityType::Technology, "cargo", Domain::for_user()),
879 ];
880
881 let result = service.infer_relationships(&entities).unwrap();
882
883 assert!(result.used_fallback);
884 assert_eq!(result.relationships.len(), 1);
885 assert_eq!(result.relationships[0].from, "Rust");
886 assert_eq!(result.relationships[0].to, "cargo");
887 assert_eq!(result.relationships[0].relationship_type, "Uses");
888 }
889
890 #[test]
891 fn test_infer_fallback_no_matching_pairs() {
892 let service = EntityExtractorService::without_llm(Domain::for_user());
893
894 let entities = vec![
895 Entity::new(EntityType::Person, "Alice", Domain::for_user()),
896 Entity::new(EntityType::Organization, "Acme", Domain::for_user()),
897 ];
898
899 let result = service.infer_relationships(&entities).unwrap();
900
901 assert!(result.used_fallback);
902 assert!(result.relationships.is_empty());
903 }
904
905 #[test]
906 fn test_format_entities_for_inference() {
907 let service = EntityExtractorService::without_llm(Domain::for_user());
908
909 let mut entity = Entity::new(EntityType::Technology, "Rust", Domain::for_user());
910 entity.aliases = vec!["rust-lang".to_string()];
911
912 let formatted = service.format_entities_for_inference(&[entity]);
913
914 assert!(formatted.contains("Rust"));
915 assert!(formatted.contains("Technology"));
916 assert!(formatted.contains("rust-lang"));
917 }
918
919 #[test]
920 fn test_inferred_to_graph_relationships() {
921 let service = EntityExtractorService::without_llm(Domain::for_user());
922
923 let rust = Entity::new(EntityType::Technology, "Rust", Domain::for_user());
924 let cargo = Entity::new(EntityType::Technology, "cargo", Domain::for_user());
925
926 let mut entity_map = HashMap::new();
927 entity_map.insert("Rust".to_string(), rust.clone());
928 entity_map.insert("cargo".to_string(), cargo.clone());
929
930 let inferred = InferenceResult {
931 relationships: vec![InferredRelationship {
932 from: "Rust".to_string(),
933 to: "cargo".to_string(),
934 relationship_type: "Uses".to_string(),
935 confidence: 0.8,
936 reasoning: Some("Rust uses cargo as package manager".to_string()),
937 }],
938 used_fallback: false,
939 warnings: Vec::new(),
940 };
941
942 let relationships = service.inferred_to_graph_relationships(&inferred, &entity_map);
943
944 assert_eq!(relationships.len(), 1);
945 assert_eq!(relationships[0].from_entity, rust.id);
946 assert_eq!(relationships[0].to_entity, cargo.id);
947 assert_eq!(relationships[0].relationship_type, RelationshipType::Uses);
948 assert!(relationships[0].properties.contains_key("reasoning"));
949 }
950
951 #[test]
952 fn test_inferred_to_graph_missing_entity() {
953 let service = EntityExtractorService::without_llm(Domain::for_user());
954
955 let rust = Entity::new(EntityType::Technology, "Rust", Domain::for_user());
956 let mut entity_map = HashMap::new();
957 entity_map.insert("Rust".to_string(), rust);
958 let inferred = InferenceResult {
961 relationships: vec![InferredRelationship {
962 from: "Rust".to_string(),
963 to: "cargo".to_string(),
964 relationship_type: "Uses".to_string(),
965 confidence: 0.8,
966 reasoning: None,
967 }],
968 used_fallback: false,
969 warnings: Vec::new(),
970 };
971
972 let relationships = service.inferred_to_graph_relationships(&inferred, &entity_map);
973
974 assert!(relationships.is_empty());
976 }
977
978 #[test]
979 fn test_to_graph_relationships() {
980 let service = EntityExtractorService::without_llm(Domain::for_user());
981
982 let result = ExtractionResult {
983 entities: vec![
984 ExtractedEntity {
985 name: "Alice".to_string(),
986 entity_type: "Person".to_string(),
987 confidence: 0.9,
988 aliases: Vec::new(),
989 description: None,
990 },
991 ExtractedEntity {
992 name: "Acme".to_string(),
993 entity_type: "Organization".to_string(),
994 confidence: 0.85,
995 aliases: Vec::new(),
996 description: None,
997 },
998 ],
999 relationships: vec![ExtractedRelationship {
1000 from: "Alice".to_string(),
1001 to: "Acme".to_string(),
1002 relationship_type: "WorksAt".to_string(),
1003 confidence: 0.8,
1004 evidence: None,
1005 }],
1006 used_fallback: false,
1007 warnings: Vec::new(),
1008 };
1009
1010 let entities = service.to_graph_entities(&result);
1011 let entity_map: HashMap<String, Entity> =
1013 entities.into_iter().map(|e| (e.name.clone(), e)).collect();
1014 let relationships = service.to_graph_relationships(&result, &entity_map);
1015
1016 assert_eq!(relationships.len(), 1);
1017 assert_eq!(
1018 relationships[0].relationship_type,
1019 RelationshipType::WorksAt
1020 );
1021 }
1022
1023 #[test]
1024 fn test_extraction_with_various_technologies() {
1025 let service = EntityExtractorService::without_llm(Domain::for_user());
1026 let result = service
1027 .extract("We built this using React, TypeScript, and Docker containers")
1028 .unwrap();
1029
1030 assert!(result.used_fallback);
1031 let names: Vec<_> = result.entities.iter().map(|e| e.name.as_str()).collect();
1032 assert!(names.contains(&"React"));
1033 assert!(names.contains(&"TypeScript"));
1034 assert!(names.contains(&"Docker"));
1035 }
1036
1037 #[test]
1038 fn test_extraction_with_databases() {
1039 let service = EntityExtractorService::without_llm(Domain::for_user());
1040 let result = service
1041 .extract("Our stack uses PostgreSQL for persistence and Redis for caching")
1042 .unwrap();
1043
1044 assert!(result.used_fallback);
1045 let names: Vec<_> = result.entities.iter().map(|e| e.name.as_str()).collect();
1046 assert!(names.contains(&"PostgreSQL"));
1047 assert!(names.contains(&"Redis"));
1048 }
1049
1050 #[test]
1051 fn test_extracted_entity_defaults() {
1052 let entity = ExtractedEntity {
1053 name: "Test".to_string(),
1054 entity_type: "Concept".to_string(),
1055 confidence: 0.5,
1056 aliases: Vec::new(),
1057 description: None,
1058 };
1059
1060 assert_eq!(entity.name, "Test");
1061 assert!(entity.aliases.is_empty());
1062 assert!(entity.description.is_none());
1063 }
1064
1065 #[test]
1066 fn test_inferred_relationship_with_reasoning() {
1067 let rel = InferredRelationship {
1068 from: "Rust".to_string(),
1069 to: "LLVM".to_string(),
1070 relationship_type: "Uses".to_string(),
1071 confidence: 0.9,
1072 reasoning: Some("Rust compiles through LLVM".to_string()),
1073 };
1074
1075 assert_eq!(rel.from, "Rust");
1076 assert_eq!(rel.to, "LLVM");
1077 assert!(rel.reasoning.is_some());
1078 }
1079
1080 #[test]
1081 fn test_service_domain() {
1082 let user_domain = Domain::for_user();
1083 let service = EntityExtractorService::without_llm(user_domain);
1084
1085 let result = service.extract("Using Python for scripting").unwrap();
1087 let entities = service.to_graph_entities(&result);
1088
1089 if !entities.is_empty() {
1090 assert!(entities[0].domain.is_user());
1091 }
1092 }
1093
1094 #[test]
1098 #[ignore = "requires OPENAI_API_KEY and makes real API calls"]
1099 fn test_llm_extraction_integration() {
1100 use crate::llm::{LlmHttpConfig, LlmProvider, OpenAiClient};
1101 use std::sync::Arc;
1102
1103 let _ = tracing_subscriber::fmt()
1105 .with_max_level(tracing::Level::DEBUG)
1106 .with_test_writer()
1107 .try_init();
1108
1109 let api_key = std::env::var("OPENAI_API_KEY").expect("OPENAI_API_KEY must be set");
1111 assert!(!api_key.is_empty(), "OPENAI_API_KEY cannot be empty");
1112
1113 let http_config = LlmHttpConfig {
1115 timeout_ms: 60_000,
1116 connect_timeout_ms: 10_000,
1117 };
1118 let client = OpenAiClient::new()
1119 .with_api_key(&api_key)
1120 .with_model("gpt-5-nano-2025-08-07")
1121 .with_http_config(http_config);
1122
1123 let llm: Arc<dyn LlmProvider> = Arc::new(client);
1124
1125 let service = EntityExtractorService::with_shared_llm(llm, Domain::for_user());
1126
1127 println!("\n=== Test 1: Simple content ===");
1129 let simple_content = "PostgreSQL database with Redis cache";
1130 let result = service.extract(simple_content);
1131 match &result {
1132 Ok(r) => {
1133 println!(
1134 "Simple content result: used_fallback={}, entities={:?}",
1135 r.used_fallback, r.entities
1136 );
1137 assert!(
1138 !r.used_fallback,
1139 "Simple content should use LLM, not fallback"
1140 );
1141 },
1142 Err(e) => {
1143 println!("Simple content error: {e:?}");
1144 unreachable!("Simple content extraction failed: {e}");
1145 },
1146 }
1147
1148 println!("\n=== Test 2: Complex content ===");
1150 let complex_content = r#"The EntityExtractorService::extract_with_llm() method at src/services/entity_extraction.rs:312 calls llm.complete_with_system(&system, &user) to process text. If the LLM fails, it falls back to extract_fallback() which uses TECH_PATTERNS regex matching against patterns like r"\b(Rust|Python|Go|Java)\b" defined in the static LAZY_STATIC block."#;
1151 let result = service.extract(complex_content);
1152 match &result {
1153 Ok(r) => {
1154 println!(
1155 "Complex content result: used_fallback={}, entities={:?}, warnings={:?}",
1156 r.used_fallback, r.entities, r.warnings
1157 );
1158 if r.used_fallback {
1159 println!("WARNING: Complex content fell back to pattern matching!");
1160 }
1161 },
1162 Err(e) => {
1163 println!("Complex content error: {e:?}");
1164 },
1165 }
1166 }
1167}