Source code for sw_metadata_bot.pitfalls
"""Pitfalls data loading and parsing."""
import json
from datetime import datetime
from pathlib import Path
from . import __version__
from .check_parsing import get_check_catalog_id, get_short_check_code, is_check_reported
[docs]
def load_pitfalls(file_path: Path) -> dict:
"""Load pitfalls from JSON-LD file."""
with open(file_path, encoding="utf-8") as f:
return json.load(f)
[docs]
def get_repository_url(data: dict) -> str:
"""Extract repository URL from pitfalls data."""
return data.get("assessedSoftware", {}).get("url", "")
def _get_check_code(check: dict) -> str:
"""Extract full check catalog ID from a check entry."""
return get_check_catalog_id(check)
def _get_short_check_code(check_full_id: str) -> str:
"""Extract short check code (e.g. P001/W004) from full check ID."""
return check_full_id.split("#")[-1]
[docs]
def get_pitfalls_list(data: dict) -> list[dict]:
"""Get list of pitfall checks from data."""
return [
check
for check in data.get("checks", [])
if is_check_reported(check) and get_short_check_code(check).startswith("P")
]
[docs]
def get_warnings_list(data: dict) -> list[dict]:
"""Get list of warning checks from data."""
return [
check
for check in data.get("checks", [])
if is_check_reported(check) and get_short_check_code(check).startswith("W")
]
DEFAULT_GREETINGS = """\
Hi maintainers,
Your repository has been selected for our metadata quality improvement initiative. We've automatically analyzed your repository's metadata and discovered some issues that could be fixed.
"""
ISSUE_TEMPLATE = """\
{greetings}
This automated issue includes:
- Detected metadata pitfalls and warnings
- Suggestions for fixing each issue
## Context
This analysis is performed by the [CodeMetaSoft](https://w3id.org/codemetasoft) project to help improve research software metadata quality.
This is a first initiative aimed at identifying and reporting metadata quality issues across research software repositories.
At this stage, we only provide diagnostics and recommendations.
In future iterations, we plan to propose automated fixes for the detected issues to further simplify the improvement process and reduce manual effort.
Each pitfall and warning is identified by a unique code (e.g. P001 for pitfalls, W004 for warnings) that corresponds to specific metadata quality issues.
You can find more details about these checks and how to address them in the [RSMetacheck catalog](https://softwareunderstanding.github.io/RsMetaCheck/).
{report}
---
This report was generated automatically by [sw-metadata-bot](https://github.com/SoftwareUnderstanding/sw-metadata-bot) on your main default branch.
If you're not interested in participating, please comment "unsubscribe" and we will remove your repository from our list.
If you would like the pitfalls and warnings to be fixed automatically, please comment "auto-fix" and we will prioritize adding this feature in future iterations.
"""
[docs]
def create_issue_body(report: str, custom_message: str | None = None) -> str:
"""Wrap report in issue template using optional custom message or default greetings."""
if not custom_message:
custom_message = DEFAULT_GREETINGS
body = ISSUE_TEMPLATE.format(report=report, greetings=custom_message)
return body