Source code for scribe_data.cli.interactive.prompt

# SPDX-License-Identifier: GPL-3.0-or-later
"""
Interactive mode prompting for the Scribe-Data CLI to allow users to select request arguments.
"""

from pathlib import Path

from prompt_toolkit import prompt
from prompt_toolkit.completion import WordCompleter
from rich import print as rprint

from scribe_data.cli.interactive.config import interactive_mode_config
from scribe_data.utils import DEFAULT_WIKTIONARY_DUMP_EXPORT_DIR, resolve_lang_iso

# MARK: Word Completion


[docs] def create_word_completer( options: list[str], include_all: bool = False ) -> WordCompleter: """ Return a word completer object of the given options. Parameters ---------- options : List[str] The options that could complete the current input. include_all : bool Whether 'All' should be an option. Returns ------- WordCompleter The word completer object from which completions can be shown to the user. """ if include_all: options = ["All"] + options return WordCompleter(options, ignore_case=True)
# MARK: Language Selection
[docs] def prompt_for_languages() -> None: """ Request language and data type for lexeme totals. Returns ------- None Languages are added to the configuration or are asked for. """ language_completer = create_word_completer( interactive_mode_config.languages, include_all=True ) initial_language_selection = ", ".join(interactive_mode_config.selected_languages) selected_languages = prompt( "Select languages (comma-separated or 'All'): ", default=initial_language_selection, completer=language_completer, ) if "All" in selected_languages: interactive_mode_config.selected_languages = interactive_mode_config.languages elif selected_languages.strip(): # check if input is not just whitespace interactive_mode_config.selected_languages = [ lang.strip() for lang in selected_languages.split(",") if lang.strip() in interactive_mode_config.languages ] if not interactive_mode_config.selected_languages: rprint("[yellow]No language selected. Please try again.[/yellow]") return prompt_for_languages()
[docs] def _wiktionary_dump_search_dirs(location: Path) -> list[Path]: """ Build an ordered list of directories to search for Wiktionary dumps. Each candidate directory is resolved and included only if it exists. Duplicate paths are omitted while preserving the following search order: 1. The provided ``location`` directory. 2. The default export directory (:data:`~scribe_data.utils.DEFAULT_WIKTIONARY_DUMP_EXPORT_DIR`). 3. The default export directory under every ancestor of the current working directory. 4. The current working directory itself. Searching ancestor directories allows dumps to be found when the interactive mode is started from a nested folder (e.g., ``scribe_data_wiktionary_json_export/spanish``). Parameters ---------- location : Path User-supplied dump path or search root from :func:`resolve_wiktionary_dump_path`. Returns ------- list[Path] A deduplicated list of existing directories to search. """ candidates = [ location, DEFAULT_WIKTIONARY_DUMP_EXPORT_DIR, *(parent / DEFAULT_WIKTIONARY_DUMP_EXPORT_DIR for parent in Path.cwd().parents), Path.cwd(), ] resolved_paths = [path.expanduser().resolve() for path in candidates] return list(dict.fromkeys(path for path in resolved_paths if path.is_dir()))
[docs] def resolve_wiktionary_dump_path(language: str, location: str | Path) -> Path | None: """ Resolve a Wiktionary dump file for the given source language. Locates the newest Wiktionary XML dump for the specified language. If the ``location`` argument points directly to a file, that file is returned. Otherwise, it searches through a prioritized list of directories for dumps matching the ``{iso}wiktionary*pages-articles.xml*`` pattern. Parameters ---------- language : str Source language name (e.g. ``german``). location : str or Path Path to a specific dump file, or a base directory to begin searching from. Returns ------- Path or None The path to the newest matching dump file, the explicit file if ``location`` is a file, or ``None`` if no matching dump is found. """ path = Path(location).expanduser().resolve() if path.is_file(): return path if not (iso := resolve_lang_iso(language)): return None dumps = [ dump_path for search_dir in _wiktionary_dump_search_dirs(path) for dump_path in search_dir.glob(f"{iso}wiktionary*pages-articles.xml*") ] return ( max(dumps, key=lambda dump_path: dump_path.stat().st_mtime).resolve() if dumps else None )
# MARK: Data Type Selection
[docs] def prompt_for_data_types() -> None: """ Prompt the user to select data types. Returns ------- None Data types are added to the configuration or are asked for. """ data_type_completer = create_word_completer( interactive_mode_config.data_types, include_all=True ) initial_data_type_selection = ", ".join(interactive_mode_config.selected_data_types) while True: selected_data_types = prompt( "Select data types (comma-separated or 'All'): ", default=initial_data_type_selection, completer=data_type_completer, ) if "All" in selected_data_types.capitalize(): interactive_mode_config.selected_data_types = ( interactive_mode_config.data_types ) break elif selected_data_types.strip(): # check if input is not just whitespace interactive_mode_config.selected_data_types = [ dt.strip() for dt in selected_data_types.split(",") if dt.strip() in interactive_mode_config.data_types ] if interactive_mode_config.selected_data_types: break # exit loop if valid data types are selected rprint("[yellow]No data type selected. Please try again.[/yellow]")