update format for anki,

upgrade trasnlation package search, fix small bugs
2026-06-12 00:43:55 +08:00
parent 21c6416cfd
commit eb4cc8e6e0
10 changed files with 70 additions and 45 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -9,7 +9,7 @@
            "name": "Python Debugger: Module",
            "type": "debugpy",
            "request": "launch",
-            "module": "anki-hsk-creator"
+            "module": "anki_hsk_creator"
        }
    ]
 }
--- a/README.md
+++ b/README.md
@@ -14,7 +14,9 @@ creates anki hsk decks from a list of words
 ## Installation

 ```console
-pip install anki-hsk-creator
+git clone https://github.com/resemble-ai/chatterbox
+git clone https://gitea.wolfang.info.ve/wolfang/anki-hsk-creator
+git clone https://gitea.wolfang.info.ve/wolfang/anki-hsk-creator-data
 ```

 ## License
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,7 @@ dependencies = [
  "torchaudio",
  "torchcodec",
  "python-dotenv",
+
 ]

 [project.optional-dependencies]
--- a/src/anki_hsk_creator/about.py
+++ b/src/anki_hsk_creator/about.py
@@ -1,5 +1,6 @@
 """about.py"""
+
 # SPDX-FileCopyrightText: 2026-present Wolfang Torres <wolfang.torres@gmail.com>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
-__version__ = "0.1.0"
+__version__ = "0.1.1"
--- a/src/anki_hsk_creator/main.py
+++ b/src/anki_hsk_creator/main.py
@@ -4,7 +4,7 @@
 from pathlib import Path

 # Local
-from .api import list_input_files, process_a_file, select_file
+from .api import is_file, list_input_files, process_a_file, select_file
 from .constants import LANGUAGES


@@ -18,10 +18,10 @@ def cli_select_files():
        for n, file in enumerate(files):
            print(f"{n+1} - {file}")
        s = None
-        while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
+        while not s or not s.isnumeric() or not 1 <= int(s) <= len(files):
            s = input(f"Please select the file [1-{len(files)}]: ")
        selected = files[int(s) - 1]
-        if selected.is_file():
+        if is_file(selected):
            in_file = selected
        else:
            level = selected
@@ -32,11 +32,11 @@ def cli_select_files():
 def cli_select_language():
    """Selects a language for the trasnlatatio"""
    print("Select a language:")
-    for language_id, language in LANGUAGES.language_names.items():
+    for language_id, language in LANGUAGES.LanguageNames.items():
        print(f"{language_id} - {language}")
    s = None
-    while not s or s not in LANGUAGES.available_languages:
-        s = input(f"Please select the language: ({ LANGUAGES.available_languages})")
+    while not s or s not in LANGUAGES.AvailableLanguages:
+        s = input(f"Please select the language {LANGUAGES.AvailableLanguages}: ")
    return s


@@ -45,6 +45,7 @@ def main():
    while True:
        input_file = cli_select_files()
        language_id = cli_select_language()
+        print(f"processing file {input_file.input_file} with language {language_id}")
        process_a_file(input_file, language_id)


--- a/src/anki_hsk_creator/anki_generation.py
+++ b/src/anki_hsk_creator/anki_generation.py
@@ -48,17 +48,17 @@ PHRASE_MODEL = Model(
    templates=[
        {
            "name": "Card 1",
-            "qfmt": "{{Translated}}<br>{{Audio}}",
+            "qfmt": "{{Translated}}<br>{{Audio}}<br>{{type:Phrase}}",
            "afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
        },
        {
            "name": "Card 2",
-            "qfmt": "{{Phrase}}<br>{{Audio}}",
+            "qfmt": "{{Phrase}}<br>{{Audio}}<br>{{type:Translated}}",
            "afmt": '{{FrontSide}}<hr id="answer">{{Translated}}',
        },
        {
            "name": "Card 3",
-            "qfmt": "{{Audio}}",
+            "qfmt": "{{Audio}}<br>{{type:Phrase}}",
            "afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
        },
    ],
@@ -70,7 +70,7 @@ HSK_MODEL = Model(
    1708536519,
    "HSK Model",
    fields=[
-        {"name": "English"},
+        {"name": "Translated"},
        {"name": "Pinyin"},
        {"name": "Simplified"},
        {"name": "Traditional"},
@@ -79,7 +79,12 @@ HSK_MODEL = Model(
    templates=[
        {
            "name": "Card 1",
-            "qfmt": "<strong>{{Pinyin}}</strong><br>{{English}}<br>{{Audio}}",
+            "qfmt": (
+                "<strong>{{Pinyin}}</strong>"
+                "<br>{{Translated}}"
+                "<br>{{Audio}}"
+                "<br>Pinyin: {{type:Simplified}}"
+            ),
            "afmt": (
                "{{FrontSide}}<hr id='answer''><div class='simple'>{{Simplified}}</div>"
                "<br><div class='trad'>{{Traditional}}</div>"
@@ -87,16 +92,24 @@ HSK_MODEL = Model(
        },
        {
            "name": "Card 2",
-            "qfmt": "<div class='simple'>{{Simplified}}</div><br><div class='trad'>"
-            "{{Traditional}}</div>",
+            "qfmt": (
+                "<div class='simple'>{{Simplified}}</div>"
+                "<br><div class='trad'>{{Traditional}}</div>"
+                "<br>Pinyin: {{type:Pinyin}}"
+                "<br>Translated: {{type:Translated}}"
+            ),
            "afmt": (
                "{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>"
-                "<br>{{English}}<br>{{Audio}}"
+                "<br>{{Translated}}<br>{{Audio}}"
            ),
        },
        {
            "name": "Card 3",
-            "qfmt": "{{Audio}}",
+            "qfmt": (
+                "{{Audio}}"
+                "<br>Pinyin: {{type:Pinyin}}"
+                "<br>Simplified: {{type:Simplified}}"
+            ),
            "afmt": (
                "{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>"
                "<br><div class='simple'>{{Simplified}}</div>"
@@ -139,7 +152,7 @@ def output_anki_phrase(process_file: ProcessFile, results: list[TranslationResul

    final_file = process_file.output_name.with_suffix(".apkg")
    deck_name = "::".join(
-        process_file.input_file.parts[:-1] + (process_file.input_fil.stem,)
+        process_file.input_file.parts[:-1] + (process_file.input_file.stem,)
    )
    deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
    package = Package(deck)
--- a/src/anki_hsk_creator/api.py
+++ b/src/anki_hsk_creator/api.py
@@ -27,6 +27,11 @@ def list_input_files(search_path: Path = Path()) -> list[Path]:
    return [path.relative_to(INPUT) for path in level.glob("*")]


+def is_file(file_path: Path) -> bool:
+    """Check if a relative path is a file"""
+    return (INPUT / file_path).is_file()
+
+
 def select_file(file_path: Path) -> ProcessFile:
    """Given a relative path from `list_input_files`, return a ProcessFile"""
    if (INPUT / file_path).is_file():
@@ -61,12 +66,14 @@ def create_input_file(
 def process_a_file(process_file: ProcessFile, language_id: str):
    """From a input_file, a language and an output type, process a file"""
    process_file.language_id = language_id
-    if PHRASES_TYPE in process_file.input_file.suffix:
+    if PHRASES_TYPE in process_file.input_file.suffixes:
        TTS.create_tts()
        TRANS.create_translator(LANGUAGES.CN, language_id)
-        with process_file.absolute_input_file.open("r") as file:
+        with process_file.absolute_input_file.open("r", encoding="utf8") as file:
            text_lines = [line.strip() for line in file.readlines()]
        results = translator_process(text_lines, process_file, language_id)
        output_anki_phrase(process_file, results)
-    elif DICT_TYPE in process_file.input_file.suffix:
+    elif DICT_TYPE in process_file.input_file.suffixes:
        print("not implemented")
+    else:
+        print("no identified")
--- a/src/anki_hsk_creator/constants.py
+++ b/src/anki_hsk_creator/constants.py
@@ -7,7 +7,7 @@ import importlib.resources
 from . import DATA_FOLDER

 # Resources
-CCCEDICT_PATH = importlib.resources.files("anki-hsk-creator").joinpath("cedict_ts.u8")
+CCCEDICT_PATH = importlib.resources.files("anki_hsk_creator").joinpath("cedict_ts.u8")

 # Data folder structure
 INPUT = DATA_FOLDER / "input"
@@ -33,19 +33,12 @@ class LANGUAGES:
    TR = "tr"
    TH = "th"

-    @property
-    def available_languages(self) -> tuple:
-        """Available laguages for translation"""
-        return (self.EN, self.ES, self.FR, self.RU, self.TR, self.TH)
-
-    @property
-    def language_names(self) -> dict:
-        """Gets the name of a language code"""
-        return {
-            self.EN: "English",
-            self.ES: "Spanish",
-            self.FR: "French",
-            self.RU: "Russian",
-            self.TR: "Turkish",
-            self.TH: "Thai",
+    AvailableLanguages = (EN, ES, FR, RU, TR, TH)
+    LanguageNames = {
+            EN: "English",
+            ES: "Spanish",
+            FR: "French",
+            RU: "Russian",
+            TR: "Turkish",
+            TH: "Thai",
        }
--- a/src/anki_hsk_creator/proccessor.py
+++ b/src/anki_hsk_creator/proccessor.py
@@ -20,7 +20,7 @@ def translator_process(
    results = []
    for n, line in enumerate(text_lines):
        line = line.strip()
-        audio_path = process_file.resources / f"N{n::03.0n}.wav"
+        audio_path = process_file.resources / f"N{n:03n}.wav" 
        if not audio_path.exists():
            audio = TTS.MODEL.generate(f"{line}。", language_id=LANGUAGES.CN)
            torchaudio.save(audio_path, audio, TTS.MODEL.sr)
--- a/src/anki_hsk_creator/utility.py
+++ b/src/anki_hsk_creator/utility.py
@@ -33,13 +33,20 @@ class TRANS:
            argostranslate.package.update_package_index()
            TRANS.PACKAGES = argostranslate.package.get_available_packages()
            TRANS.UPDATED = True
-        package_to_install = next(
-            filter(
-                lambda x: x.from_code == from_code and x.to_code == to_code,
+        packages = filter(
+                lambda x: x.from_code == from_code or x.to_code == to_code,
                TRANS.PACKAGES,
            )
-        )
-        argostranslate.package.install_from_path(package_to_install.download())
+        packages_to_install = []
+        for in_package in packages:
+            if in_package.from_code == from_code:
+                for out_package in packages:
+                    if out_package.to_code == to_code:
+                        if in_package.to_code == out_package.from_code:
+                            packages_to_install.append(in_package)
+                            packages_to_install.append(out_package)
+        for package in packages_to_install:
+            argostranslate.package.install_from_path(package.download())


 class CCCEDICT:
@@ -135,7 +142,7 @@ class ProcessFile:
        """Posible name for the output file, still missing the filetype"""
        if self.language_id is None:
            raise ValueError("Not a valid language selected")
-        return self.input_file.parent / f"{self.input_file.stem}.{self.language_id})."
+        return self.out_folder / f"{self.input_file.stem}.{self.language_id}."


 class TranslationResult: