# SPDX-License-Identifier: GPL-3.0-or-later
"""
Utility functions for the Scribe-Data CLI.
"""
import contextlib
import difflib
from scribe_data.utils import (
data_type_metadata,
get_language_from_iso,
language_to_qid,
)
# MARK: Correct Inputs
[docs]
def correct_data_type(data_type: str) -> str | None:
"""
Correct common versions of data type arguments to their standardized form.
Parameters
----------
data_type : str
The data type to potentially correct.
Returns
-------
str
The data_type value or a corrected version of it.
"""
all_data_types = data_type_metadata.keys()
if data_type in all_data_types:
return data_type
for wt in all_data_types:
if f"{data_type}s" == wt:
return wt
# MARK: Print Formatted
# MARK: Validate
[docs]
def validate_language_and_data_type(
language: str | list[str] | bool | None,
data_type: str | list[str] | bool | None,
) -> bool:
"""
Validate that the language and data type QIDs are not None.
Parameters
----------
language : str or list
The language(s) to validate.
data_type : str or list
The data type(s) to validate.
Returns
-------
bool
True if validation passes, otherwise raises ValueError.
Raises
------
ValueError
If any of the languages or data types is invalid, with all errors reported together.
"""
def validate_single_item(
item: str, valid_options: set[str], item_type: str
) -> str | None:
"""
Validate a single item against a list of valid options, providing error messages and suggestions.
Parameters
----------
item : str
The item to validate.
valid_options : list
A list of valid options against which the item will be validated.
item_type : str
A description of the item type (e.g., "language", "data-type") used in error messages.
Returns
-------
str or None
An error message if the item is invalid, or None if the item is valid.
"""
if not isinstance(item, str):
return None
item_lower = item.lower().strip()
if item_lower in valid_options:
return None
if item.startswith("Q") and len(item) > 1 and item[1:].isdigit():
return None
if len(item_lower) in {2, 3} and item_lower.isalpha():
with contextlib.suppress(ValueError):
get_language_from_iso(item_lower)
return None
closest_match = difflib.get_close_matches(item, valid_options, n=1)
closest_match_str = (
f" The closest matching {item_type} is '{closest_match[0].capitalize()}'."
if closest_match
else ""
)
return f"Invalid {item_type} '{item}'.{closest_match_str}"
errors = []
# Handle language validation.
if language is None or isinstance(language, bool):
pass
elif isinstance(language, str):
language = [language]
elif not isinstance(language, list):
errors.append("Language must be a string or a list of strings.")
if language is not None and isinstance(language, list):
for lang in language:
lang = lang.split(" ")[0]
error = validate_single_item(
item=lang,
valid_options=set(language_to_qid.keys()),
item_type="language",
)
if error:
errors.append(error)
# Handle data type validation.
if data_type is None or isinstance(data_type, bool):
pass
elif isinstance(data_type, str):
data_type = [data_type]
elif not isinstance(data_type, list):
errors.append("Data type must be a string or a list of strings.")
if data_type is not None and isinstance(data_type, list):
valid_data_types = set(data_type_metadata.keys()) | {"wiktionary_translations"}
for dt in data_type:
error = validate_single_item(
item=dt, valid_options=valid_data_types, item_type="data-type"
)
if error:
errors.append(error)
# Raise ValueError with the combined error message.
if errors:
raise ValueError("\n".join(errors))
else:
return True