Source code for scribe_data.cli.cli_utils

# SPDX-License-Identifier: GPL-3.0-or-later
"""
Utility functions for the Scribe-Data CLI.
"""

import contextlib
import difflib

from scribe_data.utils import (
    data_type_metadata,
    get_language_from_iso,
    language_to_qid,
)

# MARK: Correct Inputs


[docs] def correct_data_type(data_type: str) -> str | None: """ Correct common versions of data type arguments to their standardized form. Parameters ---------- data_type : str The data type to potentially correct. Returns ------- str The data_type value or a corrected version of it. """ all_data_types = data_type_metadata.keys() if data_type in all_data_types: return data_type for wt in all_data_types: if f"{data_type}s" == wt: return wt
# MARK: Print Formatted # MARK: Validate
[docs] def validate_language_and_data_type( language: str | list[str] | bool | None, data_type: str | list[str] | bool | None, ) -> bool: """ Validate that the language and data type QIDs are not None. Parameters ---------- language : str or list The language(s) to validate. data_type : str or list The data type(s) to validate. Returns ------- bool True if validation passes, otherwise raises ValueError. Raises ------ ValueError If any of the languages or data types is invalid, with all errors reported together. """ def validate_single_item( item: str, valid_options: set[str], item_type: str ) -> str | None: """ Validate a single item against a list of valid options, providing error messages and suggestions. Parameters ---------- item : str The item to validate. valid_options : list A list of valid options against which the item will be validated. item_type : str A description of the item type (e.g., "language", "data-type") used in error messages. Returns ------- str or None An error message if the item is invalid, or None if the item is valid. """ if not isinstance(item, str): return None item_lower = item.lower().strip() if item_lower in valid_options: return None if item.startswith("Q") and len(item) > 1 and item[1:].isdigit(): return None if len(item_lower) in {2, 3} and item_lower.isalpha(): with contextlib.suppress(ValueError): get_language_from_iso(item_lower) return None closest_match = difflib.get_close_matches(item, valid_options, n=1) closest_match_str = ( f" The closest matching {item_type} is '{closest_match[0].capitalize()}'." if closest_match else "" ) return f"Invalid {item_type} '{item}'.{closest_match_str}" errors = [] # Handle language validation. if language is None or isinstance(language, bool): pass elif isinstance(language, str): language = [language] elif not isinstance(language, list): errors.append("Language must be a string or a list of strings.") if language is not None and isinstance(language, list): for lang in language: lang = lang.split(" ")[0] error = validate_single_item( item=lang, valid_options=set(language_to_qid.keys()), item_type="language", ) if error: errors.append(error) # Handle data type validation. if data_type is None or isinstance(data_type, bool): pass elif isinstance(data_type, str): data_type = [data_type] elif not isinstance(data_type, list): errors.append("Data type must be a string or a list of strings.") if data_type is not None and isinstance(data_type, list): valid_data_types = set(data_type_metadata.keys()) | {"wiktionary_translations"} for dt in data_type: error = validate_single_item( item=dt, valid_options=valid_data_types, item_type="data-type" ) if error: errors.append(error) # Raise ValueError with the combined error message. if errors: raise ValueError("\n".join(errors)) else: return True