Source code for sw_metadata_bot.commit_lookup

"""Repository head commit lookup utilities."""

import re
import subprocess
from urllib.parse import quote, urlparse

import requests

from .token_resolver import resolve_token


def _resolve_optional_token(
    explicit_token: str | None, env_var_name: str
) -> str | None:
    """Resolve token without raising when missing."""
    return resolve_token(
        explicit_token=explicit_token,
        env_var_name=env_var_name,
        dry_run=True,
    )


[docs] def parse_github_repo(repo_url: str) -> tuple[str, str] | None: """Parse owner/repo from a GitHub repository URL.""" match = re.match(r"^https?://github\.com/([^/]+)/([^/]+)$", repo_url, re.IGNORECASE) if match is None: return None owner = match.group(1) repo = match.group(2).removesuffix(".git") return owner, repo
[docs] def resolve_gitlab_project_path(repo_url: str) -> tuple[str, str] | None: """Parse host and project path for GitLab repositories.""" parsed = urlparse(repo_url) host = parsed.netloc if not host or "gitlab" not in host.lower(): return None parts = [part for part in parsed.path.strip("/").split("/") if part] if len(parts) < 2: return None parts[-1] = parts[-1].removesuffix(".git") project_path = "/".join(parts) if not project_path: return None return host, project_path
[docs] def is_commit_hash(value: str) -> bool: """Return True if value looks like a commit hash.""" return bool(re.fullmatch(r"[0-9a-fA-F]{7,64}", value.strip()))
[docs] def get_github_head_commit(repo_url: str, token: str | None = None) -> str | None: """Fetch current head commit from GitHub API.""" parsed = parse_github_repo(repo_url) if parsed is None: return None owner, repo = parsed url = f"https://api.github.com/repos/{owner}/{repo}/commits" resolved_token = _resolve_optional_token(token, "GITHUB_API_TOKEN") headers: dict[str, str] = {"Accept": "application/vnd.github.v3+json"} if resolved_token: headers["Authorization"] = f"token {resolved_token}" response = requests.get(url, params={"per_page": 1}, headers=headers, timeout=10) response.raise_for_status() data = response.json() if not isinstance(data, list) or not data: return None first = data[0] if not isinstance(first, dict): return None sha = first.get("sha") if not isinstance(sha, str) or not sha: return None return sha if is_commit_hash(sha) else None
[docs] def get_gitlab_head_commit(repo_url: str, token: str | None = None) -> str | None: """Fetch current head commit from GitLab API for gitlab* hosts.""" parsed = resolve_gitlab_project_path(repo_url) if parsed is None: return None host, project_path = parsed encoded_project = quote(project_path, safe="") url = f"https://{host}/api/v4/projects/{encoded_project}/repository/commits" resolved_token = _resolve_optional_token(token, "GITLAB_API_TOKEN") headers: dict[str, str] = {} if resolved_token: headers["PRIVATE-TOKEN"] = resolved_token response = requests.get(url, params={"per_page": 1}, headers=headers, timeout=10) response.raise_for_status() data = response.json() if not isinstance(data, list) or not data: return None first = data[0] if not isinstance(first, dict): return None commit_id = first.get("id") if not isinstance(commit_id, str) or not commit_id: return None return commit_id if is_commit_hash(commit_id) else None
[docs] def get_generic_git_head_commit(repo_url: str) -> str | None: """Fetch current head commit via git ls-remote as generic fallback.""" result = subprocess.run( ["git", "ls-remote", repo_url, "HEAD"], check=False, capture_output=True, text=True, timeout=20, ) if result.returncode != 0: return None first_line = result.stdout.strip().splitlines() if not first_line: return None first_field = first_line[0].split()[0] if first_line[0].split() else "" if not first_field: return None return first_field if is_commit_hash(first_field) else None
[docs] def get_repo_head_commit(repo_url: str) -> str | None: """Fetch current head commit using API-first and git fallback strategies.""" resolvers = ( get_github_head_commit, get_gitlab_head_commit, get_generic_git_head_commit, ) for resolver in resolvers: try: commit_id = resolver(repo_url) except Exception: commit_id = None if isinstance(commit_id, str) and commit_id: return commit_id return None