update speed generation for slower speed

update voice generation
fix anki template
2026-06-22 21:50:59 +08:00 · 2026-06-22 21:37:45 +08:00 · 2026-06-22 19:02:38 +08:00 · 2026-06-22 18:42:45 +08:00 · 2026-06-21 20:50:05 +08:00 · 2026-06-21 14:29:44 +08:00
10 changed files with 601 additions and 167 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -9,7 +9,7 @@
            "name": "Python Debugger: Module",
            "type": "debugpy",
            "request": "launch",
-            "module": "anki-hsk-creator"
+            "module": "anki_hsk_creator"
        }
    ]
 }
--- a/README.md
+++ b/README.md
@@ -14,7 +14,9 @@ creates anki hsk decks from a list of words
 ## Installation

 ```console
-pip install anki-hsk-creator
+git clone https://github.com/resemble-ai/chatterbox
+git clone https://gitea.wolfang.info.ve/wolfang/anki-hsk-creator
+git clone https://gitea.wolfang.info.ve/wolfang/anki-hsk-creator-data
 ```

 ## License
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,7 @@ dependencies = [
  "torchaudio",
  "torchcodec",
  "python-dotenv",
+
 ]

 [project.optional-dependencies]
--- a/src/anki_hsk_creator/about.py
+++ b/src/anki_hsk_creator/about.py
@@ -1,5 +1,6 @@
 """about.py"""
+
 # SPDX-FileCopyrightText: 2026-present Wolfang Torres <wolfang.torres@gmail.com>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
-__version__ = "0.1.0"
+__version__ = "0.1.3"
--- a/src/anki_hsk_creator/main.py
+++ b/src/anki_hsk_creator/main.py
@@ -4,11 +4,19 @@
 from pathlib import Path

 # Local
-from .api import list_input_files, process_a_file, select_file
-from .constants import LANGUAGES
+from .api import (
+    is_file,
+    list_input_files,
+    pre_process_a_dictionary_file,
+    process_a_dictionary_file,
+    process_a_phrases_file,
+    select_file,
+)
+from .constants import DICT_TYPE, LANGUAGES, PHRASES_TYPE
+from .utility import ProcessFile


-def cli_select_files():
+def cli_select_files() -> ProcessFile:
    """Loops until it finds a valid input_file"""
    print("Select data file:")
    in_file = None
@@ -18,10 +26,10 @@ def cli_select_files():
        for n, file in enumerate(files):
            print(f"{n+1} - {file}")
        s = None
-        while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
+        while not s or not s.isnumeric() or not 1 <= int(s) <= len(files):
            s = input(f"Please select the file [1-{len(files)}]: ")
        selected = files[int(s) - 1]
-        if selected.is_file():
+        if is_file(selected):
            in_file = selected
        else:
            level = selected
@@ -29,14 +37,36 @@ def cli_select_files():
    return input_file


-def cli_select_language():
-    """Selects a language for the trasnlatatio"""
-    print("Select a language:")
-    for language_id, language in LANGUAGES.language_names.items():
-        print(f"{language_id} - {language}")
+def cli_select_dictionay_tsv() -> bool:
+    """If a dictionary file is selected, ask if the user wants to proccess it"""
    s = None
-    while not s or s not in LANGUAGES.available_languages:
-        s = input(f"Please select the language: ({ LANGUAGES.available_languages})")
+    while s not in ("y", "yes", "no", "n"):
+        s = input("Do you want to Pre-Process a dictionary (y/n): ")
+    r = s in ("y", "yes")
+    return r
+
+
+def cli_select_language(languages: list = None) -> str:
+    """Selects a language for the trasnlatation"""
+    if languages:
+        avaliable_languages = {
+            lan_id: lan
+            for lan_id, lan in LANGUAGES.LanguageNames.items()
+            if lan_id in languages
+        }
+    else:
+        avaliable_languages = LANGUAGES.LanguageNames.items()
+    if not avaliable_languages:
+        raise ValueError("""No languages are avaliable,
+        if this is a dictionay file, you must preprocess it first""")
+    print("Select a language:")
+    for language_id, language in avaliable_languages:
+        if languages and language_id in languages:
+            print(f"{language_id} - {language}")
+    s = None
+    while not s or s not in LANGUAGES.AvailableLanguages:
+        lan_codes = [lan_id for lan_id, lan in avaliable_languages]
+        s = input(f"Please select the language {', '.join(lan_codes)}: ")
    return s


@@ -44,8 +74,22 @@ def main():
    """CLI interface for the module"""
    while True:
        input_file = cli_select_files()
-        language_id = cli_select_language()
-        process_a_file(input_file, language_id)
+        if DICT_TYPE in input_file.input_file.suffixes:
+            dict_selected = cli_select_dictionay_tsv()
+            if dict_selected:
+                language_id = cli_select_language()
+                pre_process_a_dictionary_file(input_file, language_id)
+            else:
+                language_id = cli_select_language(
+                    input_file.available_dictionary_languages
+                )
+                process_a_dictionary_file(input_file, language_id)
+        elif PHRASES_TYPE in input_file.input_file.suffixes:
+            language_id = cli_select_language()
+            print(
+                f"processing file {input_file.input_file} with language {language_id}"
+            )
+            process_a_phrases_file(input_file, language_id)


 if __name__ == "__main__":
--- a/src/anki_hsk_creator/anki_generation.py
+++ b/src/anki_hsk_creator/anki_generation.py
@@ -5,15 +5,14 @@ Produces anki output

 # Standard Library
 import random
+from pathlib import Path

 # Pip
 from genanki import Deck, Model, Note, Package
+from pinyin_tone_converter.pinyin_tone_converter import PinyinToneConverter

 # Local
-from .utility import ProcessFile, TranslationResult
-
-# from pinyin_tone_converter.pinyin_tone_converter import PinyinToneConverter
-
+from .utility import DictionaryResult, ProcessFile, TranslationResult

 # Constants

@@ -48,29 +47,46 @@ PHRASE_MODEL = Model(
    templates=[
        {
            "name": "Card 1",
-            "qfmt": "{{Translated}}<br>{{Audio}}",
+            "qfmt": "{{Translated}}<br>{{Audio}}<br>{{type:Phrase}}",
            "afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
        },
        {
            "name": "Card 2",
-            "qfmt": "{{Phrase}}<br>{{Audio}}",
+            "qfmt": "{{Phrase}}<br>{{Audio}}<br>{{type:Translated}}",
            "afmt": '{{FrontSide}}<hr id="answer">{{Translated}}',
        },
        {
            "name": "Card 3",
-            "qfmt": "{{Audio}}",
+            "qfmt": "{{Audio}}<br>{{type:Phrase}}",
            "afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
        },
    ],
    css=CSS,
 )

+DICTATION_MODEL = Model(
+    3187277536,
+    "Phrase Model",
+    fields=[
+        {"name": "Translated"},
+        {"name": "Phrase"},
+        {"name": "Audio"},
+    ],
+    templates=[
+        {
+            "name": "Card 1",
+            "qfmt": "{{Audio}}<br>{{type:Phrase}}",
+            "afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}<br>{{Translated}}',
+        },
+    ],
+    css=CSS,
+)

 HSK_MODEL = Model(
    1708536519,
    "HSK Model",
    fields=[
-        {"name": "English"},
+        {"name": "Translated"},
        {"name": "Pinyin"},
        {"name": "Simplified"},
        {"name": "Traditional"},
@@ -79,7 +95,12 @@ HSK_MODEL = Model(
    templates=[
        {
            "name": "Card 1",
-            "qfmt": "<strong>{{Pinyin}}</strong><br>{{English}}<br>{{Audio}}",
+            "qfmt": (
+                "<strong>{{Pinyin}}</strong>"
+                "<br>{{Translated}}"
+                "<br>{{Audio}}"
+                "<br>Simplified: {{type:Simplified}}"
+            ),
            "afmt": (
                "{{FrontSide}}<hr id='answer''><div class='simple'>{{Simplified}}</div>"
                "<br><div class='trad'>{{Traditional}}</div>"
@@ -87,16 +108,24 @@ HSK_MODEL = Model(
        },
        {
            "name": "Card 2",
-            "qfmt": "<div class='simple'>{{Simplified}}</div><br><div class='trad'>"
-            "{{Traditional}}</div>",
+            "qfmt": (
+                "<div class='simple'>{{Simplified}}</div>"
+                "<br><div class='trad'>{{Traditional}}</div>"
+                "<br>Pinyin: {{type:Pinyin}}"
+                # "<br>Translated: {{type:Translated}}"
+            ),
            "afmt": (
                "{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>"
-                "<br>{{English}}<br>{{Audio}}"
+                "<br>{{Translated}}<br>{{Audio}}"
            ),
        },
        {
            "name": "Card 3",
-            "qfmt": "{{Audio}}",
+            "qfmt": (
+                "{{Audio}}"
+                # "<br>Pinyin: {{type:Pinyin}}"
+                "<br>Simplified: {{type:Simplified}}"
+            ),
            "afmt": (
                "{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>"
                "<br><div class='simple'>{{Simplified}}</div>"
@@ -110,38 +139,87 @@ HSK_MODEL = Model(
 # Proccess


-# def output_anki_dictionary(out_file, results):
-#     """Creates an anki file from a dictionary results"""
-#     final_file = out_file.parent / f"{out_file.stem}.apkg"
-#     deck_name = "::".join(out_file.relative_to(OUTPUT).parts[:-1] + (out_file.stem,))
-#     deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
-#     package = Package(deck)
-#     audios = []
-#     for entry, audio in results:
-#         note = Note(
-#             model=HSK_MODEL,
-#             fields=[
-#                 "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)),
-#                 PinyinToneConverter().convert_text(entry.pinyin),
-#                 entry.simplified,
-#                 entry.traditional,
-#                 f"[sound:{audio.name}]",
-#             ],
-#         )
-#         audios.append(audio)
-#         deck.add_note(note)
-#     package.media_files = audios
-#     package.write_to_file(final_file)
-
-
-def output_anki_phrase(process_file: ProcessFile, results: list[TranslationResult]):
-    """Creates an anki file from a phrases results"""
-
+def output_anki_dictation(
+    process_file: ProcessFile, results: list[DictionaryResult]
+) -> Path:
+    """Creates an anki file for dictation result"""
    final_file = process_file.output_name.with_suffix(".apkg")
    deck_name = "::".join(
-        process_file.input_file.parts[:-1] + (process_file.input_fil.stem,)
+        process_file.input_file.parts[:-1] + (process_file.output_name.stem,)
+    )
+    deck = Deck(
+        random.randrange(1 << 30, 1 << 31),
+        deck_name,
+        f"Deck for {final_file.name}, "
+        "created in https://www.wolfang.info.ve/hskankicreator/",
+    )
+    package = Package(deck)
+    audios = []
+    for result in results:
+        note = Note(
+            model=DICTATION_MODEL,
+            fields=[
+                result.translated,
+                result.line,
+                f"[sound:{result.audio_path.name}]",
+            ],
+        )
+        deck.add_note(note)
+        audios.append(result.audio_path)
+    package.media_files = audios
+    package.write_to_file(final_file)
+    return final_file
+
+
+def output_anki_dictionary(
+    process_file: ProcessFile, results: list[DictionaryResult]
+) -> Path:
+    """Creates an anki file from a dictionary results"""
+    final_file = process_file.output_name.with_suffix(".apkg")
+    deck_name = "::".join(
+        process_file.input_file.parts[:-1] + (process_file.output_name.stem,)
+    )
+    deck = Deck(
+        random.randrange(1 << 30, 1 << 31),
+        deck_name,
+        f"Deck for {final_file.name}, "
+        "created in https://www.wolfang.info.ve/hskankicreator/",
+    )
+    package = Package(deck)
+    audios = []
+    for result in results:
+        note = Note(
+            model=HSK_MODEL,
+            fields=[
+                # "\n ".join(f"{n+1}. {m}" for n, m in enumerate(result.meanings)),
+                result.meaning,
+                PinyinToneConverter().convert_text(result.pinyin),
+                result.simplified,
+                result.traditional,
+                f"[sound:{result.audio_path.name}]",
+            ],
+        )
+        audios.append(result.audio_path)
+        deck.add_note(note)
+    package.media_files = audios
+    package.write_to_file(final_file)
+    return final_file
+
+
+def output_anki_phrase(
+    process_file: ProcessFile, results: list[TranslationResult]
+) -> Path:
+    """Creates an anki file from a phrases results"""
+    final_file = process_file.output_name.with_suffix(".apkg")
+    deck_name = "::".join(
+        process_file.input_file.parts[:-1] + (process_file.output_name.stem,)
+    )
+    deck = Deck(
+        random.randrange(1 << 30, 1 << 31),
+        deck_name,
+        f"Deck for {final_file.name}, "
+        "created in https://www.wolfang.info.ve/hskankicreator/",
    )
-    deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
    package = Package(deck)
    audios = []
    for result in results:
@@ -157,3 +235,4 @@ def output_anki_phrase(process_file: ProcessFile, results: list[TranslationResul
        audios.append(result.audio_path)
    package.media_files = audios
    package.write_to_file(final_file)
+    return final_file
--- a/src/anki_hsk_creator/api.py
+++ b/src/anki_hsk_creator/api.py
@@ -8,10 +8,27 @@ from pathlib import Path

 # Local
 from . import DATA_FOLDER
-from .anki_generation import output_anki_phrase
-from .constants import DICT_TYPE, INPUT, LANGUAGES, PHRASES_TYPE
-from .proccessor import translator_process
-from .utility import TRANS, TTS, ProcessFile
+from .anki_generation import (
+    output_anki_dictation,
+    output_anki_dictionary,
+    output_anki_phrase,
+)
+from .constants import (
+    DICTATION_TYPE,
+    DICT_TYPE,
+    INPUT,
+    LANGUAGES,
+    OUTPUT,
+    PHRASES_TYPE,
+    RESOURCES,
+)
+from .proccessor import (
+    dictation_process,
+    dictionary_pre_process,
+    dictionary_process,
+    translator_process,
+)
+from .utility import CCCEDICT, TRANS, TTS, ProcessFile

 # interface

@@ -21,12 +38,40 @@ def get_data_folder() -> Path:
    return DATA_FOLDER


+def get_output_folder() -> Path:
+    """Utility function, return the OUTPUT folder"""
+    return OUTPUT
+
+
+def get_resources_folder() -> Path:
+    """Utility function, return the RESOURCES folder"""
+    return RESOURCES
+
+
 def list_input_files(search_path: Path = Path()) -> list[Path]:
    """Return a list of files relative to the INPUT path"""
    level = INPUT / search_path
    return [path.relative_to(INPUT) for path in level.glob("*")]


+def is_file(file_path: Path) -> bool:
+    """Check if a relative path is a file"""
+    return (INPUT / file_path).is_file()
+
+
+def read_input_file(file_path: Path) -> str:
+    """Reads an input file"""
+    return (INPUT / file_path).open(encoding="utf8", newline="\n").read()
+
+
+def read_dictionary_file(process_file: ProcessFile, language_id: str) -> str:
+    """Reads an dictionary resource file"""
+    process_file.language_id = language_id
+    return process_file.dictionary_resource_file.open(
+        encoding="utf8", newline="\n"
+    ).read()
+
+
 def select_file(file_path: Path) -> ProcessFile:
    """Given a relative path from `list_input_files`, return a ProcessFile"""
    if (INPUT / file_path).is_file():
@@ -35,6 +80,71 @@ def select_file(file_path: Path) -> ProcessFile:
        raise ValueError(f"{file_path} is not a file")


+def create_folder(file_path: Path) -> ProcessFile:
+    """Creates a folder in a file_path"""
+    input_folder = INPUT / file_path
+    if input_folder.exists():
+        raise ValueError(f"{file_path} already exists")
+    else:
+        input_folder.mkdir(exist_ok=True, parents=True)
+        return ProcessFile(input_folder)
+
+
+def delete_folder(file_path: Path):
+    """Delete an empty folder in file_path"""
+    input_folder = INPUT / file_path
+    if input_folder.exists():
+        if any(Path("some/path/here").iterdir()):
+            raise ValueError(f"{file_path} has files inside")
+        else:
+            input_folder.rmdir()
+    else:
+        raise ValueError(f"{file_path} doesn't exists")
+
+
+def delete_file(file_path: Path):
+    """Deletes a file in a file_path"""
+    input_file = INPUT / file_path
+    if input_file.if_file():
+        input_file.unlink()
+    else:
+        raise ValueError(f"{file_path} doesn't exists")
+
+
+def list_file_resources(file_path: ProcessFile):
+    """Returns a list of a file_path resources files"""
+    return [file_path.resources.glob("*")]
+
+
+def analize_input_files(search_path: Path = Path()) -> dict[str, list[Path]]:
+    """Analaizes a path file, and returns input, resources and output files"""
+    data = {
+        "input": [],
+        "resources": [],
+        "output": [],
+    }
+    if search_path is None:
+        return data
+    input_path = INPUT / search_path
+    if input_path.is_file():
+        process_file = ProcessFile(search_path)
+        res_path = process_file.resources
+        outputs_path = process_file.out_folder
+        data["input"] = [search_path]
+        data["resources"] = [path.relative_to(RESOURCES) for path in res_path.glob("*")]
+        data["output"] = [
+            path.relative_to(OUTPUT)
+            for path in outputs_path.glob(f"{process_file.input_file.stem}*")
+        ]
+    elif input_path.exists():
+        res_path = RESOURCES / search_path
+        outputs_path = OUTPUT / search_path
+        data["input"] = [path.relative_to(INPUT) for path in input_path.glob("*")]
+        data["resources"] = [path.relative_to(RESOURCES) for path in res_path.glob("*")]
+        data["output"] = [path.relative_to(OUTPUT) for path in outputs_path.glob("*")]
+    return data
+
+
 def create_input_file(
    name: str, file_type: str, text: str, sub_folder: Path = Path()
 ) -> ProcessFile:
@@ -43,30 +153,87 @@ def create_input_file(
    it is created and the file placed inside.
    returns the relative path for future processing

-    valid file_types: ".phrases", ".dictionary"
+    valid file_types: ".phrases", ".dictionary" ".dictation"
    """
-    if file_type not in (PHRASES_TYPE, DICT_TYPE):
-        raise ValueError(f"file_type {file_type} not in {(PHRASES_TYPE, DICT_TYPE)}")
+    if file_type not in (PHRASES_TYPE, DICT_TYPE, DICTATION_TYPE):
+        raise ValueError(
+            f"file_type {file_type} not in {(PHRASES_TYPE, DICT_TYPE, DICTATION_TYPE)}"
+        )
    filename = f"{name}{file_type}.txt"
    relative = sub_folder / filename
    # write file
    file_path = INPUT / relative
    file_path.parent.mkdir(exist_ok=True, parents=True)
-    file_path.write_text(text, encoding="utf8")
+    file_path.write_text(text, encoding="utf8", newline="\n")
    # create process_file for future
    process_file = ProcessFile(relative)
    return process_file


-def process_a_file(process_file: ProcessFile, language_id: str):
-    """From a input_file, a language and an output type, process a file"""
+def write_input_file(process_file: ProcessFile, text: str):
+    """Write an input file"""
+    with process_file.absolute_input_file.open(
+        "w", encoding="utf8", newline="\n"
+    ) as file:
+        for line in text.split("\n"):
+            line = line.strip()
+            if line:
+                file.write(f"{line}\n")
+
+
+def write_resource_file(process_file: ProcessFile, language_id: str, text: str):
+    """Write a resource file"""
    process_file.language_id = language_id
-    if PHRASES_TYPE in process_file.input_file.suffix:
-        TTS.create_tts()
-        TRANS.create_translator(LANGUAGES.CN, language_id)
-        with process_file.absolute_input_file.open("r") as file:
-            text_lines = [line.strip() for line in file.readlines()]
-        results = translator_process(text_lines, process_file, language_id)
-        output_anki_phrase(process_file, results)
-    elif DICT_TYPE in process_file.input_file.suffix:
-        print("not implemented")
+    with process_file.dictionary_resource_file.open(
+        "w", encoding="utf8", newline="\n"
+    ) as file:
+        for line in text.split("\n"):
+            line = line.strip()
+            if line:
+                file.write(f"{line}\n")
+
+
+def pre_process_a_dictionary_file(process_file: ProcessFile, language_id: str):
+    """From a input_file, a language_id and an output type, process a file"""
+    process_file.language_id = language_id
+    TRANS.create_translator(LANGUAGES.EN, language_id)
+    CCCEDICT.create_cedict(language_id)
+    with process_file.absolute_input_file.open(
+        "r", encoding="utf8", newline="\n"
+    ) as file:
+        words_list = [word.strip() for word in file.readlines() if word]
+    dictionary_pre_process(words_list, process_file)
+
+
+def process_a_dictionary_file(process_file: ProcessFile, language_id: str) -> Path:
+    """Process a dictionary file"""
+    TTS.create_tts()
+    process_file.language_id = language_id
+    results = dictionary_process(process_file)
+    return output_anki_dictionary(process_file, results)
+
+
+def process_a_dictation_file(process_file: ProcessFile, language_id: str) -> Path:
+    """Process a dictation file"""
+    TTS.create_tts()
+    TRANS.create_translator(LANGUAGES.CN, language_id)
+    process_file.language_id = language_id
+    with process_file.absolute_input_file.open(
+        "r", encoding="utf8", newline="\n"
+    ) as file:
+        text_lines = [line.strip() for line in file.read().split("。")]
+    results = dictation_process(text_lines, process_file)
+    return output_anki_dictation(process_file, results)
+
+
+def process_a_phrases_file(process_file: ProcessFile, language_id: str) -> Path:
+    """Process a phrases file"""
+    process_file.language_id = language_id
+    TTS.create_tts()
+    TRANS.create_translator(LANGUAGES.CN, language_id)
+    with process_file.absolute_input_file.open(
+        "r", encoding="utf8", newline="\n"
+    ) as file:
+        text_lines = [line.strip() for line in file.readlines()]
+    results = translator_process(text_lines, process_file)
+    return output_anki_phrase(process_file, results)
--- a/src/anki_hsk_creator/constants.py
+++ b/src/anki_hsk_creator/constants.py
@@ -7,7 +7,7 @@ import importlib.resources
 from . import DATA_FOLDER

 # Resources
-CCCEDICT_PATH = importlib.resources.files("anki-hsk-creator").joinpath("cedict_ts.u8")
+CCCEDICT_PATH = importlib.resources.files("anki_hsk_creator").joinpath("cedict_ts.u8")

 # Data folder structure
 INPUT = DATA_FOLDER / "input"
@@ -20,6 +20,7 @@ RESOURCES.mkdir(exist_ok=True, parents=True)
 # File Types
 PHRASES_TYPE = ".phrases"
 DICT_TYPE = ".dictionary"
+DICTATION_TYPE = ".dictation"


 class LANGUAGES:
@@ -33,19 +34,12 @@ class LANGUAGES:
    TR = "tr"
    TH = "th"

-    @property
-    def available_languages(self) -> tuple:
-        """Available laguages for translation"""
-        return (self.EN, self.ES, self.FR, self.RU, self.TR, self.TH)
-
-    @property
-    def language_names(self) -> dict:
-        """Gets the name of a language code"""
-        return {
-            self.EN: "English",
-            self.ES: "Spanish",
-            self.FR: "French",
-            self.RU: "Russian",
-            self.TR: "Turkish",
-            self.TH: "Thai",
-        }
+    AvailableLanguages = (EN, ES, FR, RU, TR, TH)
+    LanguageNames = {
+        EN: "English",
+        ES: "Spanish",
+        FR: "French",
+        RU: "Russian",
+        TR: "Turkish",
+        TH: "Thai",
+    }
--- a/src/anki_hsk_creator/proccessor.py
+++ b/src/anki_hsk_creator/proccessor.py
@@ -1,84 +1,135 @@
 """processor.py"""

+# Standard Library
+import csv
+
 # Pip
 import argostranslate.translate
 import torchaudio

 # Local
 from .constants import LANGUAGES
-from .utility import TTS, ProcessFile, TranslationResult  # , CCCEDICT
+from .utility import CCCEDICT, TTS, DictionaryResult, ProcessFile, TranslationResult
+
+# Constants
+
+FIELDNAMES = ["simplified", "traditional", "pinyin", "meaning"]
+DIALECT = "excel-tab"

 # Results Classes


+def dictation_process(
+    text_lines: list[str], process_file: ProcessFile
+) -> list[TranslationResult]:
+    """Process for Dictation translation"""
+    results = []
+    for n, line in enumerate(text_lines):
+        line = line.strip()
+        line = "  ".join(line.split())
+        line = line.replace("，", "， 。。。 ")
+        audio_path = process_file.resources / f"N{n:03n}.wav"
+        if not audio_path.exists():
+            audio = TTS.MODEL.generate(
+                f"{line}。", language_id=LANGUAGES.CN, **TTS.DEFAULTS
+            )
+            torchaudio.save(audio_path, audio, TTS.MODEL.sr)
+        translated = argostranslate.translate.translate(
+            line, LANGUAGES.CN, process_file.language_id
+        )
+        results.append(
+            TranslationResult(process_file.language_id, translated, line, audio_path)
+        )
+    return results
+
+
 def translator_process(
-    text_lines: list[str],
-    process_file: ProcessFile,
-    language_id: str,
+    text_lines: list[str], process_file: ProcessFile
 ) -> list[TranslationResult]:
    """Process for phases or sentence translation"""
    results = []
    for n, line in enumerate(text_lines):
        line = line.strip()
-        audio_path = process_file.resources / f"N{n::03.0n}.wav"
+        line = "  ".join(line.split())
+        line = line.replace("，", "， 。。。 ")
+        audio_path = process_file.resources / f"N{n:03n}.wav"
        if not audio_path.exists():
-            audio = TTS.MODEL.generate(f"{line}。", language_id=LANGUAGES.CN)
+            audio = TTS.MODEL.generate(
+                f"{line}。", language_id=LANGUAGES.CN, **TTS.DEFAULTS
+            )
            torchaudio.save(audio_path, audio, TTS.MODEL.sr)
-        translated = argostranslate.translate.translate(line, LANGUAGES.CN, language_id)
-        results.append(TranslationResult(language_id, translated, line, audio_path))
+        translated = argostranslate.translate.translate(
+            line, LANGUAGES.CN, process_file.language_id
+        )
+        results.append(
+            TranslationResult(process_file.language_id, translated, line, audio_path)
+        )
    return results


-# def dictionary_process(dictionary, tts, in_file, resources):
-#     """Process dictionary files"""
-#     words_list = in_file.open(encoding="utf8").read().strip().split("\n")
-#     results = []
-#     try:
-#         with in_file.open("w", encoding="utf8") as input_file:
-#             for words in words_list:
-#                 word = words.split()[0]
-#                 pinyin = " ".join(words.split()[1:]) if len(words.split()) > 1 else None
-#                 if v := dictionary.get(word):
-#                     if len(v) > 1:
-#                         print(f"\nWARNING: {word} has multiple meanings:")
-#                         if pinyin and pinyin != "ERROR":
-#                             ml = list(filter(lambda x: x.pinyin == pinyin, v))
-#                         else:
-#                             ml = v
-#                         if len(ml) > 1:
-#                             for n, w in enumerate(ml):
-#                                 print(f"{n+1} - {w}")
-#                                 for m in w.meanings:
-#                                     print(f"\t{m}")
-#                             s = None
-#                             while (
-#                                 not s
-#                                 or not s.isnumeric()
-#                                 or not (1 <= int(s) <= len(v))
-#                             ):
-#                                 s = input(
-#                                     f"Please select the correct word [1-{len(v)}]: "
-#                                 )
-#                             v = v[int(s) - 1]
-#                         else:
-#                             v = ml[0]
-#                     else:
-#                         v = v[0]
-#                     audio_path = resources / f"{word}.wav"
-#                     if not audio_path.exists():
-#                         audio = tts.generate(f"{word}。", language_id="zh")
-#                         torchaudio.save(audio_path, audio, tts.sr)
-#                     input_file.write(f"{word}\t{v.pinyin}\n")
-#                     results.append((v, audio_path))
-#                 else:
-#                     print("============================================")
-#                     print(f"===================>ERROR: {word} not found")
-#                     print("============================================")
-#                     input_file.write(f"{word}\tERROR\n")
-#     except Exception:
-#         with in_file.open("w", encoding="utf8") as input_file:
-#             input_file.write("\n".join(words_list))
-#     return results
+def dictionary_pre_process(words_list: list[str], process_file: ProcessFile):
+    """Pre Process dictionary files into a intermediary resources file"""
+    dictionary = CCCEDICT.create_cedict(process_file.language_id)
+    with process_file.dictionary_resource_file.open(
+        "w", encoding="utf8", newline=""
+    ) as resource_file:
+        tsv_writer = csv.DictWriter(
+            resource_file, dialect=DIALECT, fieldnames=FIELDNAMES
+        )
+        tsv_writer.writeheader()
+        for words in words_list:
+            word = words.split()[0]
+            pinyin = " ".join(words.split()[1:]) if len(words.split()) > 1 else None
+            if entries := dictionary.get(word):
+                if pinyin is not None:
+                    entries = list(filter(lambda x: x.pinyin == pinyin, entries))
+                if len(entries) > 1:
+                    print(f"\nWARNING: {word} has multiple meanings:")
+                for entry in entries:
+                    for meaning in entry.meanings:
+                        tsv_writer.writerow(
+                            {
+                                "simplified": entry.simplified,
+                                "traditional": entry.traditional,
+                                "pinyin": entry.pinyin,
+                                "meaning": meaning,
+                            }
+                        )
+            else:
+                print("============================================")
+                print(f"===================>ERROR: {word} not found")
+                print("============================================")
+                tsv_writer.writerow(
+                    {
+                        "simplified": word,
+                        "traditional": None,
+                        "pinyin": None,
+                        "meaning": None,
+                    }
+                )
+
+
+def dictionary_process(process_file: ProcessFile) -> list[DictionaryResult]:
+    """Process a dictionary_resource_file into a final result"""
+    results = []
+    with process_file.dictionary_resource_file.open(
+        "r", encoding="utf8", newline=""
+    ) as resource_file:
+        reader = csv.DictReader(resource_file, dialect=DIALECT)
+        for line in reader:
+            audio_path = process_file.resources / f"{line['pinyin']}.wav"
+            if not audio_path.exists():
+                audio = TTS.MODEL.generate(
+                    f"{line['simplified']}。", language_id=LANGUAGES.CN, **TTS.DEFAULTS
+                )
+                torchaudio.save(audio_path, audio, TTS.MODEL.sr)
+            print(line)
+            result = DictionaryResult(
+                **line, audio_path=audio_path, language_id=process_file.language_id
+            )
+            results.append(result)
+    return results
+

 # def output_tsv(out_file, results):
 #     """writes the output as a tsv file"""
--- a/src/anki_hsk_creator/utility.py
+++ b/src/anki_hsk_creator/utility.py
@@ -11,7 +11,7 @@ from pathlib import Path
 import argostranslate.package
 import argostranslate.translate
 import torch
-from cedict_utils.cedict import CedictParser
+from cedict_utils.cedict import CedictEntry, CedictParser
 from chatterbox.mtl_tts import ChatterboxMultilingualTTS

 # Local
@@ -24,22 +24,81 @@ class TRANS:
    """Static Class for Argos translate"""

    UPDATED = False
-    PACKAGES = None
+    PACKAGES = []

    @staticmethod
    def create_translator(from_code, to_code):
        """Download and install Argos Translate package"""
+        print(f"Create translator from {from_code} to {to_code}")
+        if from_code == to_code:
+            return
        if not TRANS.UPDATED:
            argostranslate.package.update_package_index()
            TRANS.PACKAGES = argostranslate.package.get_available_packages()
            TRANS.UPDATED = True
-        package_to_install = next(
+        packages = tuple(
            filter(
-                lambda x: x.from_code == from_code and x.to_code == to_code,
+                lambda x: x.from_code == from_code or x.to_code == to_code,
                TRANS.PACKAGES,
            )
        )
-        argostranslate.package.install_from_path(package_to_install.download())
+        print(f"available packages {packages[:5]}")
+        packages_to_install = []
+        for in_package in packages:
+            if in_package.from_code == from_code:
+                for out_package in packages:
+                    if out_package.to_code == to_code:
+                        if in_package.to_code == out_package.from_code:
+                            print(
+                                f"Check in_package {in_package.from_code} {in_package.to_code}"
+                            )
+                            print(
+                                f"Check out_package {out_package.from_code} {out_package.to_code}"
+                            )
+                            packages_to_install.append(in_package)
+                            packages_to_install.append(out_package)
+        for package in packages_to_install:
+            print(f"instaling package {package}")
+            argostranslate.package.install_from_path(package.download())
+
+
+class TranslatedEntry:
+    """Holder class for CCCEDIT entry translated to `language_id`"""
+
+    def __init__(self, entry: CedictEntry, language_id: str):
+        self.entry = entry
+        self.language_id = language_id
+        self._translated_meanings = []
+
+    @property
+    def simplified(self):
+        """Entry simplified"""
+        return self.entry.simplified
+
+    @property
+    def traditional(self):
+        """Entry traditional"""
+        return self.entry.traditional
+
+    @property
+    def pinyin(self):
+        """Entry piying"""
+        return self.entry.pinyin
+
+    @property
+    def meanings(self):
+        """Entry translated meaning list"""
+        for meaning in self.entry.meanings:
+            if self.language_id != LANGUAGES.EN:
+                print(f"translating from {LANGUAGES.EN} to {self.language_id}")
+                print(f"-> {meaning}")
+                trans_meaning = argostranslate.translate.translate(
+                    meaning, LANGUAGES.EN, self.language_id
+                )
+            else:
+                trans_meaning = meaning
+            self._translated_meanings.append(trans_meaning)
+        return self._translated_meanings


 class CCCEDICT:
@@ -50,24 +109,23 @@ class CCCEDICT:
    DICTIONARY_LIST = {}

    @staticmethod
-    def create_cedict(language_id=LANGUAGES.EN):
+    def create_cedict(
+        language_id: str = LANGUAGES.EN,
+    ) -> dict[str, list[TranslatedEntry]]:
        """Creates a create_cedict dictionary object"""
        if not CCCEDICT.PARSER:
            CCCEDICT.PARSER = CedictParser()
            CCCEDICT.PARSER.read_file(CCCEDICT_PATH)
            CCCEDICT.ENTRIES = CCCEDICT.PARSER.parse()
        if language_id not in CCCEDICT.DICTIONARY_LIST:
+            TRANS.create_translator(LANGUAGES.EN, language_id)
            dictionary = {}
            for entry in CCCEDICT.ENTRIES:
-                if language_id != LANGUAGES.EN:
-                    TRANS.create_translator(LANGUAGES.EN, language_id)
-                    entry = argostranslate.translate.translate(
-                        entry, LANGUAGES.EN, language_id
-                    )
+                trans_entry = TranslatedEntry(entry, language_id)
                if entry.simplified not in dictionary:
-                    dictionary[entry.simplified] = [entry]
+                    dictionary[entry.simplified] = [trans_entry]
                else:
-                    dictionary[entry.simplified].append(entry)
+                    dictionary[entry.simplified].append(trans_entry)
            CCCEDICT.DICTIONARY_LIST[language_id] = dictionary
        else:
            dictionary = CCCEDICT.DICTIONARY_LIST[language_id]
@@ -79,6 +137,7 @@ class TTS:

    MODEL = None
    DEVICE = None
+    DEFAULTS = {"cfg_weight": 0.2, "exaggeration": 0.8}

    @staticmethod
    def create_tts():
@@ -135,7 +194,23 @@ class ProcessFile:
        """Posible name for the output file, still missing the filetype"""
        if self.language_id is None:
            raise ValueError("Not a valid language selected")
-        return self.input_file.parent / f"{self.input_file.stem}.{self.language_id})."
+        return self.out_folder / f"{self.input_file.stem}.{self.language_id}.temp"
+
+    @property
+    def dictionary_resource_file(self):
+        """The path for the resource tsv for dictionary files"""
+        return self.resources / f"dictionary.{self.language_id}.tsv"
+
+    @property
+    def relative_dictionary_resource_file(self):
+        """The path for the resource tsv for dictionary files"""
+        path = self.resources / f"dictionary.{self.language_id}.tsv"
+        return path.relative_to(RESOURCES)
+
+    @property
+    def available_dictionary_languages(self):
+        """for a Dictionary file loads the avaliable proceced languages"""
+        return [lan.suffixes[0][1:] for lan in self.resources.glob("dictionary.*.tsv")]


 class TranslationResult:
@@ -152,3 +227,23 @@ class TranslationResult:
        self.translated = translated
        self.line = line
        self.audio_path = audio_path
+
+
+class DictionaryResult:
+    """Result of a dictionaty process"""
+
+    def __init__(
+        self,
+        language_id: str,
+        simplified: str,
+        traditional: str,
+        pinyin: str,
+        meaning: str,
+        audio_path: Path,
+    ):
+        self.language_id = language_id
+        self.simplified = simplified
+        self.traditional = traditional
+        self.pinyin = pinyin
+        self.meaning = meaning
+        self.audio_path = audio_path
Author	SHA1	Message	Date
Wolfang Torres	bff05fca01	update speed generation for slower speed	2026-06-22 21:50:59 +08:00
Wolfang Torres	266bbbb370	update voice generation	2026-06-22 21:37:45 +08:00
Wolfang Torres	d0c7da966d	fix anki template	2026-06-22 19:02:38 +08:00
Wolfang Torres	59075c0468	change hsk model, doesnt use to typeboxes per card	2026-06-22 18:42:45 +08:00
Wolfang Torres	deaa4c649f	fix error on translator	2026-06-21 20:50:05 +08:00
Wolfang Torres	5fce71f44c	test	2026-06-21 14:29:44 +08:00
Wolfang Torres	f81f48a5d9	debug	2026-06-21 14:25:08 +08:00
Wolfang Torres	e945d53f85	fix trasnlator issues	2026-06-21 13:31:35 +08:00
Wolfang Torres	5b52c5ae96	test	2026-06-21 10:32:28 +08:00
Wolfang Torres	f0b1d7c29c	fix	2026-06-21 10:29:35 +08:00
Wolfang Torres	28eccd8d13	update for language creation	2026-06-21 10:26:12 +08:00
Wolfang Torres	43853678c1	updates	2026-06-21 10:09:06 +08:00
Wolfang Torres	ebc9aa77a7	add dictation model	2026-06-20 14:10:17 +08:00
Wolfang Torres	ebf2d58207	add create delete folder files	2026-06-20 12:29:41 +08:00
Wolfang Torres	d797e655c6	fix dot in name	2026-06-20 12:03:01 +08:00
Wolfang Torres	ab495b6d31	clean input and resources files	2026-06-20 11:51:49 +08:00
Wolfang Torres	bc1c4ff048	add debug	2026-06-20 11:40:26 +08:00
Wolfang Torres	a23e0dc34e	fix cli	2026-06-20 10:19:59 +08:00
Wolfang Torres	62588298a8	fix cli	2026-06-20 09:27:08 +08:00
Wolfang Torres	30dd8c8671	update api endpoints	2026-06-19 20:17:37 +08:00
Wolfang Torres	dde819f1e6	fix bug with dictionary only translated on demand	2026-06-12 20:58:53 +08:00
Wolfang Torres	f9fc887d05	add dictionary file process	2026-06-12 20:26:50 +08:00
Wolfang Torres	9b0d23b8ac	fix bug with name gneeration	2026-06-12 00:59:19 +08:00
Wolfang Torres	eb4cc8e6e0	update format for anki, upgrade trasnlation package search, fix small bugs	2026-06-12 00:43:55 +08:00