Source code for scribe_data.check.check_missing_forms.pr_body

# SPDX-License-Identifier: GPL-3.0-or-later
"""
Generate a formatted PR body describing missing features for each language.
"""

import json
import sys

from scribe_data.utils import (
    data_type_metadata,
    language_metadata,
)



[docs]
def pr_body(missing_features: dict) -> str:
    """
    Generate a formatted PR body describing missing features for each language.

    Parameters
    ----------
    missing_features : dict
        Dictionary mapping language QIDs to their missing features.
        Format: {language_qid: {feature_type: [features]}}.

    Returns
    -------
    str
        Formatted PR body content in markdown format containing a table of
        missing features grouped by language.

    Notes
    -----
    The PR body includes:
    - A header indicating this is an automated PR
    - A table showing languages and their missing feature types
    - Features are grouped by language for better readability
    """
    pr_body_content = (
        "## Automated PR: Missing Lexeme Forms\n\n"
        + "This is an automated PR created by the [Check and Update Missing Query Forms](https://github.com/scribe-org/Scribe-Data/blob/main/.github/workflows/check_and_update_missing_query_forms.yaml) workflow.\n\n"
        + "### Missing Forms Summary\n"
        + "| **Language** | **Forms Type** |\n"
        + "|:-------------|:---------------|\n"
    )

    # Create a dictionary to group features by language.
    grouped_features = {}

    # Iterate over the missing features to populate the table.
    for entity, features in missing_features.items():
        # Check for sub-languages.
        language_name = None
        for name, data in language_metadata.items():
            if data.get("qid") == entity:
                language_name = name
                break

            if "sub_languages" in data:
                for sub_name, sub_data in data["sub_languages"].items():
                    if sub_data.get("qid") == entity:
                        language_name = f"{name} ({sub_name})"
                        break

            if language_name:
                break

        # Default to entity if no name is found.
        language_name = language_name or entity

        # Group features by language.
        if language_name not in grouped_features:
            grouped_features[language_name] = set()

        for feature in features.keys():
            feature_name = next(
                (name for name, qid in data_type_metadata.items() if qid == feature),
                feature,
            )
            grouped_features[language_name].add(feature_name)

    # Add grouped features to the PR body.
    for language, features in sorted(grouped_features.items()):
        form_list = ", ".join(sorted(features))
        pr_body_content += f"| **{language}** | {form_list} |\n"

    pr_body_content += "\nPlease review the changes and provide feedback.\n"

    print(pr_body_content)

    return pr_body_content



if __name__ == "__main__":
    with open(sys.argv[1], "r") as f:
        missing_features = json.load(f)

    pr_body(missing_features)