Source code for sw_metadata_bot.check_parsing
"""Shared parsing helpers for RSMetacheck check identifiers.
RSMetacheck evaluates each repository against a catalog of checks for metadata
quality. Checks are identified by a code: P#### for Pitfalls (high-priority issues
that indicate missing or invalid metadata) or W#### for Warnings (informational
checks or best-practice recommendations). The #### segment is a 3-4 digit code
within each category.
Example check codes:
- P001: Repository lacks codemeta.json file
- W001: Incomplete metadata field descriptions
- P042: Missing license information
See constants.py for related definitions (CHECK_TYPE_*, CHECK_CODE_REGEX_PATTERN).
"""
import re
from . import constants
CHECK_CODE_PATTERN = re.compile(constants.CHECK_CODE_REGEX_PATTERN, re.IGNORECASE)
[docs]
def get_check_catalog_id(check: dict) -> str:
"""Return full RSMetacheck catalog ID URL for a check when available.
Preferred source is the new schema key ``assessesIndicator.@id`` when it
points to the RSMetacheck catalog. For backward compatibility, this falls
back to the legacy ``pitfall`` key.
"""
indicator_id = str(check.get("assessesIndicator", {}).get("@id", ""))
if (
indicator_id
and "catalog" in indicator_id
and CHECK_CODE_PATTERN.search(indicator_id)
):
return indicator_id
return str(check.get("pitfall", ""))
[docs]
def get_short_check_code(check: dict) -> str:
"""Return short check code such as P001 or W004."""
full_id = get_check_catalog_id(check)
if not full_id:
return ""
match = CHECK_CODE_PATTERN.search(full_id)
if match is None:
return ""
return match.group(1).upper()
[docs]
def is_check_reported(check: dict) -> bool:
"""Return True only when a check is explicitly reported by metacheck.
Verbose metacheck output marks each evaluated check with an ``output`` key.
Only values representing true are considered reported findings.
"""
output = check.get("output")
return str(output).lower() == "true"