From eb4cc8e6e02e719b0da98bc98bb31656574fe56e Mon Sep 17 00:00:00 2001 From: Wolfang Torres Date: Fri, 12 Jun 2026 00:43:55 +0800 Subject: [PATCH] update format for anki, upgrade trasnlation package search, fix small bugs --- .vscode/launch.json | 2 +- README.md | 4 ++- pyproject.toml | 1 + src/anki_hsk_creator/__about__.py | 3 ++- src/anki_hsk_creator/__main__.py | 13 +++++----- src/anki_hsk_creator/anki_generation.py | 33 +++++++++++++++++-------- src/anki_hsk_creator/api.py | 13 +++++++--- src/anki_hsk_creator/constants.py | 25 +++++++------------ src/anki_hsk_creator/proccessor.py | 2 +- src/anki_hsk_creator/utility.py | 19 +++++++++----- 10 files changed, 70 insertions(+), 45 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 39ade66..bdf185c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -9,7 +9,7 @@ "name": "Python Debugger: Module", "type": "debugpy", "request": "launch", - "module": "anki-hsk-creator" + "module": "anki_hsk_creator" } ] } \ No newline at end of file diff --git a/README.md b/README.md index 23ba7ec..8ff89cc 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,9 @@ creates anki hsk decks from a list of words ## Installation ```console -pip install anki-hsk-creator +git clone https://github.com/resemble-ai/chatterbox +git clone https://gitea.wolfang.info.ve/wolfang/anki-hsk-creator +git clone https://gitea.wolfang.info.ve/wolfang/anki-hsk-creator-data ``` ## License diff --git a/pyproject.toml b/pyproject.toml index b0ce95d..62c56be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ dependencies = [ "torchaudio", "torchcodec", "python-dotenv", + ] [project.optional-dependencies] diff --git a/src/anki_hsk_creator/__about__.py b/src/anki_hsk_creator/__about__.py index 33859a3..32158c0 100644 --- a/src/anki_hsk_creator/__about__.py +++ b/src/anki_hsk_creator/__about__.py @@ -1,5 +1,6 @@ """about.py""" + # SPDX-FileCopyrightText: 2026-present Wolfang Torres # # SPDX-License-Identifier: GPL-3.0-or-later -__version__ = "0.1.0" +__version__ = "0.1.1" diff --git a/src/anki_hsk_creator/__main__.py b/src/anki_hsk_creator/__main__.py index 49d03ee..e6d7d6b 100644 --- a/src/anki_hsk_creator/__main__.py +++ b/src/anki_hsk_creator/__main__.py @@ -4,7 +4,7 @@ from pathlib import Path # Local -from .api import list_input_files, process_a_file, select_file +from .api import is_file, list_input_files, process_a_file, select_file from .constants import LANGUAGES @@ -18,10 +18,10 @@ def cli_select_files(): for n, file in enumerate(files): print(f"{n+1} - {file}") s = None - while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)): + while not s or not s.isnumeric() or not 1 <= int(s) <= len(files): s = input(f"Please select the file [1-{len(files)}]: ") selected = files[int(s) - 1] - if selected.is_file(): + if is_file(selected): in_file = selected else: level = selected @@ -32,11 +32,11 @@ def cli_select_files(): def cli_select_language(): """Selects a language for the trasnlatatio""" print("Select a language:") - for language_id, language in LANGUAGES.language_names.items(): + for language_id, language in LANGUAGES.LanguageNames.items(): print(f"{language_id} - {language}") s = None - while not s or s not in LANGUAGES.available_languages: - s = input(f"Please select the language: ({ LANGUAGES.available_languages})") + while not s or s not in LANGUAGES.AvailableLanguages: + s = input(f"Please select the language {LANGUAGES.AvailableLanguages}: ") return s @@ -45,6 +45,7 @@ def main(): while True: input_file = cli_select_files() language_id = cli_select_language() + print(f"processing file {input_file.input_file} with language {language_id}") process_a_file(input_file, language_id) diff --git a/src/anki_hsk_creator/anki_generation.py b/src/anki_hsk_creator/anki_generation.py index 44926f8..3a292f7 100644 --- a/src/anki_hsk_creator/anki_generation.py +++ b/src/anki_hsk_creator/anki_generation.py @@ -48,17 +48,17 @@ PHRASE_MODEL = Model( templates=[ { "name": "Card 1", - "qfmt": "{{Translated}}
{{Audio}}", + "qfmt": "{{Translated}}
{{Audio}}
{{type:Phrase}}", "afmt": '{{FrontSide}}
{{Phrase}}', }, { "name": "Card 2", - "qfmt": "{{Phrase}}
{{Audio}}", + "qfmt": "{{Phrase}}
{{Audio}}
{{type:Translated}}", "afmt": '{{FrontSide}}
{{Translated}}', }, { "name": "Card 3", - "qfmt": "{{Audio}}", + "qfmt": "{{Audio}}
{{type:Phrase}}", "afmt": '{{FrontSide}}
{{Phrase}}', }, ], @@ -70,7 +70,7 @@ HSK_MODEL = Model( 1708536519, "HSK Model", fields=[ - {"name": "English"}, + {"name": "Translated"}, {"name": "Pinyin"}, {"name": "Simplified"}, {"name": "Traditional"}, @@ -79,7 +79,12 @@ HSK_MODEL = Model( templates=[ { "name": "Card 1", - "qfmt": "{{Pinyin}}
{{English}}
{{Audio}}", + "qfmt": ( + "{{Pinyin}}" + "
{{Translated}}" + "
{{Audio}}" + "
Pinyin: {{type:Simplified}}" + ), "afmt": ( "{{FrontSide}}
{{Simplified}}
" "
{{Traditional}}
" @@ -87,16 +92,24 @@ HSK_MODEL = Model( }, { "name": "Card 2", - "qfmt": "
{{Simplified}}

" - "{{Traditional}}
", + "qfmt": ( + "
{{Simplified}}
" + "
{{Traditional}}
" + "
Pinyin: {{type:Pinyin}}" + "
Translated: {{type:Translated}}" + ), "afmt": ( "{{FrontSide}}
{{Pinyin}}" - "
{{English}}
{{Audio}}" + "
{{Translated}}
{{Audio}}" ), }, { "name": "Card 3", - "qfmt": "{{Audio}}", + "qfmt": ( + "{{Audio}}" + "
Pinyin: {{type:Pinyin}}" + "
Simplified: {{type:Simplified}}" + ), "afmt": ( "{{FrontSide}}
{{Pinyin}}" "
{{Simplified}}
" @@ -139,7 +152,7 @@ def output_anki_phrase(process_file: ProcessFile, results: list[TranslationResul final_file = process_file.output_name.with_suffix(".apkg") deck_name = "::".join( - process_file.input_file.parts[:-1] + (process_file.input_fil.stem,) + process_file.input_file.parts[:-1] + (process_file.input_file.stem,) ) deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name) package = Package(deck) diff --git a/src/anki_hsk_creator/api.py b/src/anki_hsk_creator/api.py index cee2aa0..e19484c 100644 --- a/src/anki_hsk_creator/api.py +++ b/src/anki_hsk_creator/api.py @@ -27,6 +27,11 @@ def list_input_files(search_path: Path = Path()) -> list[Path]: return [path.relative_to(INPUT) for path in level.glob("*")] +def is_file(file_path: Path) -> bool: + """Check if a relative path is a file""" + return (INPUT / file_path).is_file() + + def select_file(file_path: Path) -> ProcessFile: """Given a relative path from `list_input_files`, return a ProcessFile""" if (INPUT / file_path).is_file(): @@ -61,12 +66,14 @@ def create_input_file( def process_a_file(process_file: ProcessFile, language_id: str): """From a input_file, a language and an output type, process a file""" process_file.language_id = language_id - if PHRASES_TYPE in process_file.input_file.suffix: + if PHRASES_TYPE in process_file.input_file.suffixes: TTS.create_tts() TRANS.create_translator(LANGUAGES.CN, language_id) - with process_file.absolute_input_file.open("r") as file: + with process_file.absolute_input_file.open("r", encoding="utf8") as file: text_lines = [line.strip() for line in file.readlines()] results = translator_process(text_lines, process_file, language_id) output_anki_phrase(process_file, results) - elif DICT_TYPE in process_file.input_file.suffix: + elif DICT_TYPE in process_file.input_file.suffixes: print("not implemented") + else: + print("no identified") diff --git a/src/anki_hsk_creator/constants.py b/src/anki_hsk_creator/constants.py index 16c8f65..bbeae01 100644 --- a/src/anki_hsk_creator/constants.py +++ b/src/anki_hsk_creator/constants.py @@ -7,7 +7,7 @@ import importlib.resources from . import DATA_FOLDER # Resources -CCCEDICT_PATH = importlib.resources.files("anki-hsk-creator").joinpath("cedict_ts.u8") +CCCEDICT_PATH = importlib.resources.files("anki_hsk_creator").joinpath("cedict_ts.u8") # Data folder structure INPUT = DATA_FOLDER / "input" @@ -33,19 +33,12 @@ class LANGUAGES: TR = "tr" TH = "th" - @property - def available_languages(self) -> tuple: - """Available laguages for translation""" - return (self.EN, self.ES, self.FR, self.RU, self.TR, self.TH) - - @property - def language_names(self) -> dict: - """Gets the name of a language code""" - return { - self.EN: "English", - self.ES: "Spanish", - self.FR: "French", - self.RU: "Russian", - self.TR: "Turkish", - self.TH: "Thai", + AvailableLanguages = (EN, ES, FR, RU, TR, TH) + LanguageNames = { + EN: "English", + ES: "Spanish", + FR: "French", + RU: "Russian", + TR: "Turkish", + TH: "Thai", } diff --git a/src/anki_hsk_creator/proccessor.py b/src/anki_hsk_creator/proccessor.py index 47b6e82..e83dbac 100644 --- a/src/anki_hsk_creator/proccessor.py +++ b/src/anki_hsk_creator/proccessor.py @@ -20,7 +20,7 @@ def translator_process( results = [] for n, line in enumerate(text_lines): line = line.strip() - audio_path = process_file.resources / f"N{n::03.0n}.wav" + audio_path = process_file.resources / f"N{n:03n}.wav" if not audio_path.exists(): audio = TTS.MODEL.generate(f"{line}。", language_id=LANGUAGES.CN) torchaudio.save(audio_path, audio, TTS.MODEL.sr) diff --git a/src/anki_hsk_creator/utility.py b/src/anki_hsk_creator/utility.py index 545251d..ccac88a 100644 --- a/src/anki_hsk_creator/utility.py +++ b/src/anki_hsk_creator/utility.py @@ -33,13 +33,20 @@ class TRANS: argostranslate.package.update_package_index() TRANS.PACKAGES = argostranslate.package.get_available_packages() TRANS.UPDATED = True - package_to_install = next( - filter( - lambda x: x.from_code == from_code and x.to_code == to_code, + packages = filter( + lambda x: x.from_code == from_code or x.to_code == to_code, TRANS.PACKAGES, ) - ) - argostranslate.package.install_from_path(package_to_install.download()) + packages_to_install = [] + for in_package in packages: + if in_package.from_code == from_code: + for out_package in packages: + if out_package.to_code == to_code: + if in_package.to_code == out_package.from_code: + packages_to_install.append(in_package) + packages_to_install.append(out_package) + for package in packages_to_install: + argostranslate.package.install_from_path(package.download()) class CCCEDICT: @@ -135,7 +142,7 @@ class ProcessFile: """Posible name for the output file, still missing the filetype""" if self.language_id is None: raise ValueError("Not a valid language selected") - return self.input_file.parent / f"{self.input_file.stem}.{self.language_id})." + return self.out_folder / f"{self.input_file.stem}.{self.language_id}." class TranslationResult: