Skip to main content

subcog/context/
detector.rs

1//! Git context detection implementation.
2
3use git2::Repository;
4use std::path::Path;
5
6/// Git repository context information.
7///
8/// Provides project identification, branch name, and optional file context
9/// derived from a git repository. All fields are optional to handle
10/// non-git directories and edge cases gracefully.
11///
12/// # Examples
13///
14/// ```rust,ignore
15/// use subcog::context::GitContext;
16///
17/// let ctx = GitContext::from_cwd();
18/// match (&ctx.project_id, &ctx.branch) {
19///     (Some(project), Some(branch)) => {
20///         println!("Working on {project} @ {branch}");
21///     }
22///     (Some(project), None) => {
23///         println!("Working on {project} (detached HEAD)");
24///     }
25///     (None, _) => {
26///         println!("Not in a git repository");
27///     }
28/// }
29/// ```
30#[derive(Debug, Clone, Default, PartialEq, Eq)]
31pub struct GitContext {
32    /// Project identifier derived from git remote URL or repository directory name.
33    ///
34    /// Format: `org/repo` from remote URL, or just the directory name if no remote.
35    /// Credentials are stripped from URLs for security.
36    pub project_id: Option<String>,
37
38    /// Current branch name.
39    ///
40    /// `None` if in detached HEAD state or if HEAD is unborn (empty repository).
41    pub branch: Option<String>,
42
43    /// Optional file path context.
44    ///
45    /// Can be set to provide file-specific context for operations.
46    pub file_path: Option<String>,
47}
48
49impl GitContext {
50    /// Detects git context from the current working directory.
51    ///
52    /// Uses `git2::Repository::discover()` to find the repository root,
53    /// traversing parent directories if necessary.
54    ///
55    /// # Returns
56    ///
57    /// A `GitContext` with detected values. If not in a git repository,
58    /// all fields will be `None`.
59    ///
60    /// # Examples
61    ///
62    /// ```rust,ignore
63    /// use subcog::context::GitContext;
64    ///
65    /// let ctx = GitContext::from_cwd();
66    /// if ctx.project_id.is_some() {
67    ///     println!("In a git repository");
68    /// }
69    /// ```
70    #[must_use]
71    pub fn from_cwd() -> Self {
72        std::env::current_dir().map_or_else(|_| Self::default(), |cwd| Self::from_path(&cwd))
73    }
74
75    /// Detects git context from a specific path.
76    ///
77    /// Uses `git2::Repository::discover()` to find the repository containing
78    /// the given path, traversing parent directories if necessary.
79    ///
80    /// # Arguments
81    ///
82    /// * `path` - The path to detect context from. Can be any path within a repository.
83    ///
84    /// # Returns
85    ///
86    /// A `GitContext` with detected values. If the path is not in a git repository,
87    /// all fields will be `None`.
88    ///
89    /// # Examples
90    ///
91    /// ```rust,ignore
92    /// use subcog::context::GitContext;
93    /// use std::path::Path;
94    ///
95    /// let ctx = GitContext::from_path(Path::new("/path/to/repo/subdir"));
96    /// println!("Project: {:?}", ctx.project_id);
97    /// ```
98    #[must_use]
99    pub fn from_path(path: &Path) -> Self {
100        let Ok(repo) = Repository::discover(path) else {
101            return Self::default();
102        };
103
104        Self {
105            project_id: detect_project_id(&repo),
106            branch: detect_branch(&repo),
107            file_path: None,
108        }
109    }
110
111    /// Creates a new `GitContext` with the specified file path.
112    ///
113    /// This is useful for adding file-specific context to an existing detection.
114    ///
115    /// # Arguments
116    ///
117    /// * `file_path` - The file path to associate with this context.
118    ///
119    /// # Examples
120    ///
121    /// ```rust,ignore
122    /// use subcog::context::GitContext;
123    ///
124    /// let ctx = GitContext::from_cwd()
125    ///     .with_file_path("src/main.rs");
126    /// ```
127    #[must_use]
128    pub fn with_file_path(mut self, file_path: impl Into<String>) -> Self {
129        self.file_path = Some(file_path.into());
130        self
131    }
132
133    /// Returns `true` if this context represents a git repository.
134    ///
135    /// A context is considered to be in a git repository if it has a project ID.
136    #[must_use]
137    pub const fn is_git_repo(&self) -> bool {
138        self.project_id.is_some()
139    }
140
141    /// Returns `true` if the repository is in detached HEAD state.
142    ///
143    /// Returns `false` if not in a git repository.
144    #[must_use]
145    pub const fn is_detached(&self) -> bool {
146        self.project_id.is_some() && self.branch.is_none()
147    }
148}
149
150/// Detects the project ID from a repository.
151///
152/// Priority:
153/// 1. Remote "origin" URL (sanitized)
154/// 2. First available remote URL (sanitized)
155/// 3. Repository directory name
156fn detect_project_id(repo: &Repository) -> Option<String> {
157    // Try to get origin remote first
158    if let Some(project_id) = repo
159        .find_remote("origin")
160        .ok()
161        .and_then(|origin| origin.url().and_then(sanitize_git_url))
162    {
163        return Some(project_id);
164    }
165
166    // Try any other remote using iterator chain
167    let from_remote = repo.remotes().ok().and_then(|remotes| {
168        remotes
169            .iter()
170            .flatten()
171            .filter_map(|name| repo.find_remote(name).ok())
172            .find_map(|remote| remote.url().and_then(sanitize_git_url))
173    });
174
175    if from_remote.is_some() {
176        return from_remote;
177    }
178
179    // Fall back to repository directory name
180    repo.workdir()
181        .or_else(|| repo.path().parent())
182        .and_then(|p| p.file_name())
183        .and_then(|n| n.to_str())
184        .map(String::from)
185}
186
187/// Detects the current branch name.
188///
189/// Returns `None` if:
190/// - HEAD is detached (pointing directly to a commit)
191/// - HEAD is unborn (empty repository with no commits)
192fn detect_branch(repo: &Repository) -> Option<String> {
193    let head = repo.head().ok()?;
194
195    // Check if HEAD is a branch (not detached)
196    if !head.is_branch() {
197        return None;
198    }
199
200    // Get the short name (e.g., "main" instead of "refs/heads/main")
201    head.shorthand().map(String::from)
202}
203
204/// Sanitizes a git remote URL by removing credentials and normalizing format.
205///
206/// # Security
207///
208/// This function strips any embedded credentials from URLs:
209/// - `https://user:password@host/path` -> `host/path`
210/// - `git@host:org/repo.git` -> `host/org/repo`
211///
212/// # Supported Formats
213///
214/// | Format | Example | Result |
215/// |--------|---------|--------|
216/// | HTTPS | `https://github.com/org/repo.git` | `github.com/org/repo` |
217/// | HTTPS with creds | `https://user:pass@github.com/org/repo` | `github.com/org/repo` |
218/// | SSH | `git@github.com:org/repo.git` | `github.com/org/repo` |
219/// | Git protocol | `git://github.com/org/repo.git` | `github.com/org/repo` |
220fn sanitize_git_url(url: &str) -> Option<String> {
221    let url = url.trim();
222
223    if url.is_empty() {
224        return None;
225    }
226
227    // Handle SSH format: git@host:org/repo.git
228    if let Some(ssh_part) = url.strip_prefix("git@") {
229        return sanitize_ssh_url(ssh_part);
230    }
231
232    // Handle URL format: https://host/path or git://host/path
233    sanitize_http_url(url)
234}
235
236/// Sanitizes an SSH-format URL (after stripping "git@" prefix).
237///
238/// Input: `github.com:org/repo.git`
239/// Output: `github.com/org/repo`
240fn sanitize_ssh_url(url: &str) -> Option<String> {
241    // Split on ':' to separate host from path
242    let (host, path) = url.split_once(':')?;
243
244    if host.is_empty() || path.is_empty() {
245        return None;
246    }
247
248    // Remove .git suffix and construct result
249    let path = path.strip_suffix(".git").unwrap_or(path);
250
251    Some(format!("{host}/{path}"))
252}
253
254/// Sanitizes an HTTP/HTTPS/Git protocol URL.
255///
256/// Handles:
257/// - `https://github.com/org/repo.git`
258/// - `https://user:pass@github.com/org/repo.git`
259/// - `git://github.com/org/repo.git`
260fn sanitize_http_url(url: &str) -> Option<String> {
261    // Strip protocol prefix
262    let without_protocol = url
263        .strip_prefix("https://")
264        .or_else(|| url.strip_prefix("http://"))
265        .or_else(|| url.strip_prefix("git://"))?;
266
267    // Strip credentials if present (user:pass@host -> host)
268    let without_creds = without_protocol
269        .find('@')
270        .map_or(without_protocol, |at_pos| &without_protocol[at_pos + 1..]);
271
272    if without_creds.is_empty() {
273        return None;
274    }
275
276    // Remove .git suffix and trailing slashes
277    let result = without_creds
278        .strip_suffix(".git")
279        .unwrap_or(without_creds)
280        .trim_end_matches('/');
281
282    if result.is_empty() {
283        None
284    } else {
285        Some(result.to_string())
286    }
287}
288
289#[cfg(test)]
290mod tests {
291    use super::*;
292    use git2::Signature;
293    use tempfile::TempDir;
294
295    // ============================================================================
296    // URL Sanitization Tests
297    // ============================================================================
298
299    #[test]
300    fn test_sanitize_https_url() {
301        assert_eq!(
302            sanitize_git_url("https://github.com/org/repo.git"),
303            Some("github.com/org/repo".to_string())
304        );
305    }
306
307    #[test]
308    fn test_sanitize_https_url_without_git_suffix() {
309        assert_eq!(
310            sanitize_git_url("https://github.com/org/repo"),
311            Some("github.com/org/repo".to_string())
312        );
313    }
314
315    #[test]
316    fn test_sanitize_https_url_with_credentials() {
317        assert_eq!(
318            sanitize_git_url("https://user:password@github.com/org/repo.git"),
319            Some("github.com/org/repo".to_string())
320        );
321    }
322
323    #[test]
324    fn test_sanitize_https_url_with_user_only() {
325        assert_eq!(
326            sanitize_git_url("https://user@github.com/org/repo.git"),
327            Some("github.com/org/repo".to_string())
328        );
329    }
330
331    #[test]
332    fn test_sanitize_ssh_url() {
333        assert_eq!(
334            sanitize_git_url("git@github.com:org/repo.git"),
335            Some("github.com/org/repo".to_string())
336        );
337    }
338
339    #[test]
340    fn test_sanitize_ssh_url_without_git_suffix() {
341        assert_eq!(
342            sanitize_git_url("git@github.com:org/repo"),
343            Some("github.com/org/repo".to_string())
344        );
345    }
346
347    #[test]
348    fn test_sanitize_git_protocol_url() {
349        assert_eq!(
350            sanitize_git_url("git://github.com/org/repo.git"),
351            Some("github.com/org/repo".to_string())
352        );
353    }
354
355    #[test]
356    fn test_sanitize_http_url() {
357        assert_eq!(
358            sanitize_git_url("http://github.com/org/repo.git"),
359            Some("github.com/org/repo".to_string())
360        );
361    }
362
363    #[test]
364    fn test_sanitize_url_with_trailing_slash() {
365        assert_eq!(
366            sanitize_git_url("https://github.com/org/repo/"),
367            Some("github.com/org/repo".to_string())
368        );
369    }
370
371    #[test]
372    fn test_sanitize_empty_url() {
373        assert_eq!(sanitize_git_url(""), None);
374    }
375
376    #[test]
377    fn test_sanitize_whitespace_url() {
378        assert_eq!(sanitize_git_url("   "), None);
379    }
380
381    #[test]
382    fn test_sanitize_invalid_url() {
383        // No protocol, treated as invalid
384        assert_eq!(sanitize_git_url("just-a-string"), None);
385    }
386
387    #[test]
388    fn test_sanitize_url_with_complex_credentials() {
389        // Password with special characters
390        assert_eq!(
391            sanitize_git_url("https://user:p%40ssw0rd!@github.com/org/repo.git"),
392            Some("github.com/org/repo".to_string())
393        );
394    }
395
396    #[test]
397    fn test_sanitize_gitlab_url() {
398        assert_eq!(
399            sanitize_git_url("https://gitlab.com/group/subgroup/repo.git"),
400            Some("gitlab.com/group/subgroup/repo".to_string())
401        );
402    }
403
404    #[test]
405    fn test_sanitize_bitbucket_url() {
406        assert_eq!(
407            sanitize_git_url("git@bitbucket.org:team/repo.git"),
408            Some("bitbucket.org/team/repo".to_string())
409        );
410    }
411
412    #[test]
413    fn test_sanitize_self_hosted_url() {
414        assert_eq!(
415            sanitize_git_url("https://git.company.com/team/project.git"),
416            Some("git.company.com/team/project".to_string())
417        );
418    }
419
420    // ============================================================================
421    // GitContext Construction Tests
422    // ============================================================================
423
424    #[test]
425    fn test_git_context_default() {
426        let ctx = GitContext::default();
427        assert!(ctx.project_id.is_none());
428        assert!(ctx.branch.is_none());
429        assert!(ctx.file_path.is_none());
430    }
431
432    #[test]
433    fn test_git_context_with_file_path() {
434        let ctx = GitContext::default().with_file_path("src/main.rs");
435        assert_eq!(ctx.file_path, Some("src/main.rs".to_string()));
436    }
437
438    #[test]
439    fn test_git_context_is_git_repo() {
440        let ctx = GitContext {
441            project_id: Some("org/repo".to_string()),
442            branch: Some("main".to_string()),
443            file_path: None,
444        };
445        assert!(ctx.is_git_repo());
446
447        let non_repo = GitContext::default();
448        assert!(!non_repo.is_git_repo());
449    }
450
451    #[test]
452    fn test_git_context_is_detached() {
453        let detached = GitContext {
454            project_id: Some("org/repo".to_string()),
455            branch: None,
456            file_path: None,
457        };
458        assert!(detached.is_detached());
459
460        let attached = GitContext {
461            project_id: Some("org/repo".to_string()),
462            branch: Some("main".to_string()),
463            file_path: None,
464        };
465        assert!(!attached.is_detached());
466
467        let non_repo = GitContext::default();
468        assert!(!non_repo.is_detached());
469    }
470
471    // ============================================================================
472    // Repository Detection Tests
473    // ============================================================================
474
475    fn create_test_repo() -> (TempDir, Repository) {
476        let dir = TempDir::new().unwrap();
477        let repo = Repository::init(dir.path()).unwrap();
478
479        // Create an initial commit
480        {
481            let sig = Signature::now("test", "test@test.com").unwrap();
482            let tree_id = repo.index().unwrap().write_tree().unwrap();
483            let tree = repo.find_tree(tree_id).unwrap();
484            repo.commit(Some("HEAD"), &sig, &sig, "Initial commit", &tree, &[])
485                .unwrap();
486        }
487
488        (dir, repo)
489    }
490
491    #[test]
492    fn test_from_path_non_git_directory() {
493        let dir = TempDir::new().unwrap();
494        let ctx = GitContext::from_path(dir.path());
495
496        assert!(ctx.project_id.is_none());
497        assert!(ctx.branch.is_none());
498        assert!(ctx.file_path.is_none());
499    }
500
501    #[test]
502    fn test_from_path_git_repo_no_remote() {
503        let (dir, _repo) = create_test_repo();
504        let ctx = GitContext::from_path(dir.path());
505
506        // Should fall back to directory name
507        assert!(ctx.project_id.is_some());
508        assert!(ctx.branch.is_some());
509    }
510
511    #[test]
512    fn test_from_path_git_repo_with_remote() {
513        let (dir, repo) = create_test_repo();
514
515        // Add a remote
516        repo.remote("origin", "https://github.com/testorg/testrepo.git")
517            .unwrap();
518
519        let ctx = GitContext::from_path(dir.path());
520
521        assert_eq!(
522            ctx.project_id,
523            Some("github.com/testorg/testrepo".to_string())
524        );
525        assert!(ctx.branch.is_some());
526    }
527
528    #[test]
529    fn test_from_path_subdirectory() {
530        let (dir, repo) = create_test_repo();
531
532        repo.remote("origin", "https://github.com/org/repo.git")
533            .unwrap();
534
535        // Create a subdirectory
536        let subdir = dir.path().join("src").join("lib");
537        std::fs::create_dir_all(&subdir).unwrap();
538
539        let ctx = GitContext::from_path(&subdir);
540
541        // Should still detect the repository
542        assert_eq!(ctx.project_id, Some("github.com/org/repo".to_string()));
543    }
544
545    #[test]
546    fn test_from_path_detached_head() {
547        let dir = TempDir::new().unwrap();
548        let repo = Repository::init(dir.path()).unwrap();
549
550        // Create initial commit
551        let sig = Signature::now("test", "test@test.com").unwrap();
552        let tree_id = repo.index().unwrap().write_tree().unwrap();
553        let tree = repo.find_tree(tree_id).unwrap();
554        let commit_oid = repo
555            .commit(Some("HEAD"), &sig, &sig, "Initial commit", &tree, &[])
556            .unwrap();
557
558        // Detach HEAD
559        repo.set_head_detached(commit_oid).unwrap();
560
561        let ctx = GitContext::from_path(dir.path());
562
563        assert!(ctx.project_id.is_some()); // Should still have project ID
564        assert!(ctx.branch.is_none()); // Branch should be None
565        assert!(ctx.is_detached());
566    }
567
568    #[test]
569    fn test_from_path_empty_repo() {
570        let dir = TempDir::new().unwrap();
571        let _repo = Repository::init(dir.path()).unwrap();
572
573        // Empty repo - no commits yet
574        let ctx = GitContext::from_path(dir.path());
575
576        // Should still detect as a repo via directory name
577        assert!(ctx.project_id.is_some());
578        // Branch may be None since HEAD is unborn
579        // This is expected behavior
580    }
581
582    #[test]
583    fn test_from_path_remote_with_credentials() {
584        let (dir, repo) = create_test_repo();
585
586        // Add a remote with embedded credentials
587        repo.remote("origin", "https://user:secretpass@github.com/org/repo.git")
588            .unwrap();
589
590        let ctx = GitContext::from_path(dir.path());
591
592        // Credentials should be stripped
593        assert_eq!(ctx.project_id, Some("github.com/org/repo".to_string()));
594        // Verify no credentials in project_id
595        assert!(!ctx.project_id.as_ref().unwrap().contains("user"));
596        assert!(!ctx.project_id.as_ref().unwrap().contains("secret"));
597    }
598
599    #[test]
600    fn test_from_path_ssh_remote() {
601        let (dir, repo) = create_test_repo();
602
603        repo.remote("origin", "git@github.com:org/repo.git")
604            .unwrap();
605
606        let ctx = GitContext::from_path(dir.path());
607
608        assert_eq!(ctx.project_id, Some("github.com/org/repo".to_string()));
609    }
610
611    #[test]
612    fn test_from_path_multiple_remotes() {
613        let (dir, repo) = create_test_repo();
614
615        // Add origin and another remote
616        repo.remote("upstream", "https://github.com/upstream/repo.git")
617            .unwrap();
618        repo.remote("origin", "https://github.com/fork/repo.git")
619            .unwrap();
620
621        let ctx = GitContext::from_path(dir.path());
622
623        // Should prefer origin
624        assert_eq!(ctx.project_id, Some("github.com/fork/repo".to_string()));
625    }
626
627    #[test]
628    fn test_from_path_non_origin_remote() {
629        let (dir, repo) = create_test_repo();
630
631        // Only add a non-origin remote
632        repo.remote("upstream", "https://github.com/upstream/repo.git")
633            .unwrap();
634
635        let ctx = GitContext::from_path(dir.path());
636
637        // Should fall back to any available remote
638        assert_eq!(ctx.project_id, Some("github.com/upstream/repo".to_string()));
639    }
640
641    #[test]
642    fn test_from_path_feature_branch() {
643        let (dir, repo) = create_test_repo();
644
645        // Create and checkout a feature branch
646        let head = repo.head().unwrap().target().unwrap();
647        let commit = repo.find_commit(head).unwrap();
648        repo.branch("feature/my-feature", &commit, false).unwrap();
649        repo.set_head("refs/heads/feature/my-feature").unwrap();
650
651        let ctx = GitContext::from_path(dir.path());
652
653        assert_eq!(ctx.branch, Some("feature/my-feature".to_string()));
654    }
655
656    #[test]
657    fn test_from_path_worktree() {
658        let (dir, repo) = create_test_repo();
659
660        // Create a branch for the worktree
661        let head = repo.head().unwrap().target().unwrap();
662        let commit = repo.find_commit(head).unwrap();
663        repo.branch("worktree-branch", &commit, false).unwrap();
664
665        // Create a worktree
666        let worktree_path = dir.path().parent().unwrap().join("test-worktree");
667        repo.worktree(
668            "test-worktree",
669            &worktree_path,
670            Some(
671                git2::WorktreeAddOptions::new().reference(Some(
672                    &repo
673                        .find_branch("worktree-branch", git2::BranchType::Local)
674                        .unwrap()
675                        .into_reference(),
676                )),
677            ),
678        )
679        .unwrap();
680
681        // Detect from worktree path
682        let ctx = GitContext::from_path(&worktree_path);
683
684        // Should detect the same project
685        assert!(ctx.project_id.is_some());
686        assert_eq!(ctx.branch, Some("worktree-branch".to_string()));
687
688        // Cleanup worktree
689        std::fs::remove_dir_all(&worktree_path).ok();
690    }
691
692    // ============================================================================
693    // Edge Cases
694    // ============================================================================
695
696    #[test]
697    fn test_from_path_bare_repo() {
698        let dir = TempDir::new().unwrap();
699        let repo = Repository::init_bare(dir.path()).unwrap();
700
701        repo.remote("origin", "https://github.com/org/repo.git")
702            .unwrap();
703
704        let ctx = GitContext::from_path(dir.path());
705
706        // Should still detect project ID from remote
707        assert_eq!(ctx.project_id, Some("github.com/org/repo".to_string()));
708    }
709
710    #[test]
711    fn test_sanitize_ssh_url_no_path() {
712        // Malformed SSH URL with no path
713        assert_eq!(sanitize_ssh_url("github.com:"), None);
714    }
715
716    #[test]
717    fn test_sanitize_ssh_url_no_host() {
718        // Malformed SSH URL with no host
719        assert_eq!(sanitize_ssh_url(":org/repo"), None);
720    }
721}