version 0.1

2026-06-11 21:23:34 +08:00
parent ea057668bc
commit 21c6416cfd
15 changed files with 645 additions and 367 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,6 +34,7 @@ dependencies = [
  "torch",
  "torchaudio",
  "torchcodec",
  "python-dotenv",
 ]
 [project.optional-dependencies]
@@ -41,7 +42,9 @@ dev = [
    "pytest",
    "black",
    "pylint",
-    "flakehell"
+    "flake8",
    "flake8-pyproject",
    # "flakeheaven",
 ]
 [project.urls]
@@ -50,17 +53,18 @@ Issues = "https://gitea.wolfang.info.ve/wolfang/anki-hsk-creator/issues"
 Source = "https://gitea.wolfang.info.ve/wolfang/anki-hsk-creator"
 [tool.hatch.version]
-path = "src/anki-hsk-creator/__about__.py"
+path = "src/anki_hsk_creator/__about__.py"
 [tool.hatch.build.targets.sdist]
 packages = ["src/anki_hsk_creator"]
 include = [
-    "src/anki-hsk-creator/cedict_ts.u8",
+    "src/anki_hsk_creator/cedict_ts.u8",
 ]
 [tool.hatch.build.targets.wheel]
-packages = ["src/anki-hsk-creator"]
+packages = ["src/anki_hsk_creator"]
 include = [
-    "src/anki-hsk-creator/cedict_ts.u8",
+    "src/anki_hsk_creator/cedict_ts.u8",
 ]
 [tool.hatch.envs.default]
@@ -69,7 +73,8 @@ extra-dependencies = [
 ]
 [tool.hatch.envs.default.scripts]
-format = "black --target-version=py314 anki-hsk-creator tests && isort anki-hsk-creator tests"
+format = "black --target-version=py314 src tests && isort src tests"
 lint = "flake8 src" 
 [tool.hatch.envs.types]
 extra-dependencies = [
@@ -80,16 +85,16 @@ extra-dependencies = [
 check = "mypy --install-types --non-interactive {args:src/anki-hsk-creator tests}"
 [tool.coverage.run]
-source_pkgs = ["anki-hsk-creator", "tests"]
+source_pkgs = ["src", "tests"]
 branch = true
 parallel = true
 omit = [
-  "src/anki-hsk-creator/__about__.py",
+  "src/anki_hsk_creator/__about__.py",
 ]
 [tool.coverage.paths]
-anki-hsk-creator = ["src/anki-hsk-creator", "*/anki-hsk-creator/src/anki-hsk-creator"]
+anki-hsk-creator = ["src/anki_hsk_creator", "*/anki-hsk-creator/src/anki_hsk_creator"]
-tests = ["tests", "*src/anki-hsk-creator/tests"]
+tests = ["tests", "*src/anki_hsk_creator/tests"]
 [tool.coverage.report]
 exclude_lines = [
@@ -111,6 +116,7 @@ exclude = '''
 )
 '''
 [tool.isort]
 src_paths = ["src", "test"]
 skip_glob = [".git", "__pycache__", ".vscode", "*venv", "build", "dist", "old", "*.egg-info"]
@@ -145,9 +151,8 @@ msg-template="{path}:{module}:{line}: [{msg_id}({symbol}), {obj}] {msg}"
 logging-format-style="new"
 logging-modules="logging"
-[tool.flakehell]
+[tool.flake8]
 max_line_length = 88
 format = "grouped"
 show_source = false
 exclude = [
    ".git",
@@ -160,8 +165,29 @@ exclude = [
    "*.egg-info",
 ]
-[tool.flakehell.plugins]
+[tool.flake8.plugins]
 mccabe = ["+C*"]
 pycodestyle = ["+E*", "+W*", "-E203", "-E501", "-W503"]
 pyflakes = ["+F*"]
 flake8-bugbear = ["+*", "+B950"]
 # [tool.flakeheaven]
 # max_line_length = 88
 # format = "grouped"
 # show_source = false
 # exclude = [
 #     ".git",
 #     "__pycache__",
 #     ".vscode",
 #     "*venv",
 #     "build",
 #     "dist",
 #     "old",
 #     "*.egg-info",
 # ]
 # [tool.flakeheaven.plugins]
 # mccabe = ["+C*"]
 # pycodestyle = ["+E*", "+W*", "-E203", "-E501", "-W503"]
 # pyflakes = ["+F*"]
 # flake8-bugbear = ["+*", "+B950"]
--- a/src/anki-hsk-creator/init.py
+++ b/src/anki-hsk-creator/init.py
@@ -1,9 +0,0 @@
 """anki-hsk-creator"""
 import os
 # Globals
 os.environ["HF_TOKEN"] = "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"
 ARGOS_UPDATED = False
 ARGOS_PACKAGES = None
--- a/src/anki-hsk-creator/main.py
+++ b/src/anki-hsk-creator/main.py
@@ -1,184 +0,0 @@
 ## Imports
 from pathlib import Path
 import random
 import csv
 ## PIP
 from pinyin_tone_converter.pinyin_tone_converter import PinyinToneConverter
 ## Main
 def process_files():
    print("Select data file:")
    in_file = None
    level = INPUT
    while not in_file:
        files = []
        for n, file in enumerate(level.glob("*")):
            files.append(file)
            print(f"{n+1} - {file.relative_to(INPUT)}")
        s = None
        while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
            s = input(f"Please select the file [1-{len(files)}]: ")
        selected = files[int(s) - 1]
        if selected.is_file():
            in_file = selected
        else:
            level = selected
    relative = in_file.relative_to(INPUT)
    out_file = OUTPUT / relative
    resources = RESOURCES / relative
    resources = resources.parent / resources.stem
    resources.mkdir(parents=True, exist_ok=True)
    out_file.parent.mkdir(parents=True, exist_ok=True)
    with in_file.open(encoding="utf8") as input_file:
        file_type = input_file.read().split()[0]
    return in_file, out_file, resources, file_type
 def dictionary_process(dictionary, tts, in_file, resources):
    """Process dictionary files"""
    words_list = in_file.open(encoding="utf8").read().strip().split("\n")
    results = []
    try:
        with in_file.open("w", encoding="utf8") as input_file:
            for words in words_list:
                word = words.split()[0]
                pinyin = " ".join(words.split()[1:]) if len(words.split()) > 1 else None
                if v := dictionary.get(word):
                    if len(v) > 1:
                        print(f"\nWARNING: {word} has multiple meanings:")
                        if pinyin and pinyin != "ERROR":
                            ml = list(filter(lambda x: x.pinyin == pinyin, v))
                        else:
                            ml = v
                        if len(ml) > 1:
                            for n, w in enumerate(ml):
                                print(f"{n+1} - {w}")
                                for m in w.meanings:
                                    print(f"\t{m}")
                            s = None
                            while (
                                not s
                                or not s.isnumeric()
                                or not (1 <= int(s) <= len(v))
                            ):
                                s = input(
                                    f"Please select the correct word [1-{len(v)}]: "
                                )
                            v = v[int(s) - 1]
                        else:
                            v = ml[0]
                    else:
                        v = v[0]
                    audio_path = resources / f"{word}.wav"
                    if not audio_path.exists():
                        audio = tts.generate(f"{word}。", language_id="zh")
                        torchaudio.save(audio_path, audio, tts.sr)
                    input_file.write(f"{word}\t{v.pinyin}\n")
                    results.append((v, audio_path))
                else:
                    print("============================================")
                    print(f"===================>ERROR: {word} not found")
                    print("============================================")
                    input_file.write(f"{word}\tERROR\n")
    except Exception:
        with in_file.open("w", encoding="utf8") as input_file:
            input_file.write("\n".join(words_list))
    return results
 def translator_process(tts, resources, in_file):
    """Process for phases trasnlation"""
    text_list = in_file.open(encoding="utf8").read().strip().split()
    results = []
    for n, phrase in enumerate(text_list):
        phrase = phrase.strip()
        audio_path = resources / f"N{n}.wav"
        if not audio_path.exists():
            audio = tts.generate(f"{phrase}。", language_id="zh")
            torchaudio.save(audio_path, audio, tts.sr)
        translated = argostranslate.translate.translate(phrase, CN, EN)
        results.append([translated, phrase, audio_path])
    return results
 # def output_tsv(out_file, results):
 #     """writes the output as a tsv file"""
 #     final_file = out_file.parent / f"{out_file.stem}.tsv"
 #     with final_file.open("w", encoding="utf8", newline="") as csvfile:
 #         writer = csv.writer(csvfile, delimiter="\t", quotechar='"')
 #         for entry in results:
 #             writer.writerow(
 #                 [
 #                     "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)),
 #                     PinyinToneConverter().convert_text(entry.pinyin),
 #                     entry.simplified,
 #                     entry.traditional,
 #                 ]
 #             )
 def output_anki_dictionary(out_file, results):
    final_file = out_file.parent / f"{out_file.stem}.apkg"
    deck_name = "::".join(out_file.relative_to(OUTPUT).parts[:-1] + (out_file.stem,))
    deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
    package = Package(deck)
    audios = []
    for entry, audio in results:
        note = Note(
            model=HSK_MODEL,
            fields=[
                "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)),
                PinyinToneConverter().convert_text(entry.pinyin),
                entry.simplified,
                entry.traditional,
                f"[sound:{audio.name}]",
            ],
        )
        audios.append(audio)
        deck.add_note(note)
    package.media_files = audios
    package.write_to_file(final_file)
 def output_anki_phrase(out_file, results):
    final_file = out_file.parent / f"{out_file.stem}.apkg"
    deck_name = "::".join(out_file.relative_to(OUTPUT).parts[:-1] + (out_file.stem,))
    deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
    package = Package(deck)
    audios = []
    for translated, phrase, audio in results:
        note = Note(
            model=PHRASE_MODEL,
            fields=[
                translated,
                phrase,
                f"[sound:{audio.name}]",
            ],
        )
        deck.add_note(note)
        audios.append(audio)
    package.media_files = audios
    package.write_to_file(final_file)
 def main():
    tts = create_tts()
    dictionary = create_cedict()
    create_translator()
    while True:
        in_file, out_file, resources, file_type = process_files()
        if PHRASES_TYPE in in_file.suffixes:
            results = translator_process(tts, resources, in_file)
            output_anki_phrase(out_file, results)
        elif DICT_TYPE in in_file.suffixes:
            results = dictionary_process(dictionary, tts, in_file, resources)
            output_anki_dictionary(out_file, results)
        else:
            raise TypeError("Error, filetype not especified!")
 if __name__ == "__main__":
    main()
--- a/src/anki-hsk-creator/anki-models.py
+++ b/src/anki-hsk-creator/anki-models.py
@@ -1,88 +0,0 @@
 # anki-models.py
 from genanki import Deck, Note, Model, Package
 # Constants
 CSS = """
 .card {
 font-family: arial;
 font-size: 20px;
 text-align: center;
 color: black;
 background-color: white;
 }
 .simple {
 font-family: Arial;
 font-size: 100px;
 }
 .trad {
 font-family: Arial;
 font-size: 75px;
 }
 """
 # Models
 PHRASE_MODEL = Model(
    2076166425,
    "Phrase Model",
    fields=[
        {"name": "Translated"},
        {"name": "Phrase"},
        {"name": "Audio"},
    ],
    templates=[
        {
            "name": "Card 1",
            "qfmt": "{{Translated}}<br>{{Audio}}",
            "afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
        },
        {
            "name": "Card 2",
            "qfmt": "{{Phrase}}<br>{{Audio}}",
            "afmt": '{{FrontSide}}<hr id="answer">{{Translated}}',
        },
        {
            "name": "Card 3",
            "qfmt": "{{Audio}}",
            "afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
        },
    ],
    css=CSS,
 )
 HSK_MODEL = Model(
    1708536519,
    "HSK Model",
    fields=[
        {"name": "English"},
        {"name": "Pinyin"},
        {"name": "Simplified"},
        {"name": "Traditional"},
        {"name": "Audio"},
    ],
    templates=[
        {
            "name": "Card 1",
            "qfmt": "<strong>{{Pinyin}}</strong><br>{{English}}<br>{{Audio}}",
            "afmt": "{{FrontSide}}<hr id='answer''><div class='simple'>"
            "{{Simplified}}</div><br><div class='trad'>{{Traditional}}</div>",
        },
        {
            "name": "Card 2",
            "qfmt": "<div class='simple'>{{Simplified}}</div><br><div class='trad'>"
            "{{Traditional}}</div>",
            "afmt": '{{FrontSide}}<hr id="answer"><strong>{{Pinyin}}</strong><br>{{English}}<br>{{Audio}}',
        },
        {
            "name": "Card 3",
            "qfmt": "{{Audio}}",
            "afmt": "{{FrontSide}}<hr id='answer''><strong>{{Pinyin}}</strong><br><div class='simple'>"
            "{{Simplified}}</div><br><div class='trad'>{{Traditional}}</div>",
        },
    ],
    css=CSS,
 )
--- a/src/anki-hsk-creator/constants.py
+++ b/src/anki-hsk-creator/constants.py
@@ -1,19 +0,0 @@
 ## Imports
 from pathlib import Path
 import random
 import importlib.resources
 CCCEDICT = importlib.resources.files("anki-hsk-creator").joinpath("cedict_ts.u8")
 DATA = Path(__file__).parent.parent / "data"
 INPUT = DATA / "input"
 OUTPUT = DATA / "output"
 RESOURCES = DATA / "resources"
 # File Types
 PHRASES_TYPE = ".phrases"
 DICT_TYPE = ".dictionary"
 # Language codes
 CN = "zh"
 EN = "en"
--- a/src/anki-hsk-creator/untility.py
+++ b/src/anki-hsk-creator/untility.py
@@ -1,53 +0,0 @@
 from cedict_utils.cedict import CedictParser
 import argostranslate.package
 import argostranslate.translate
 from chatterbox.mtl_tts import ChatterboxMultilingualTTS
 import torch
 import torchaudio
 from . import ARGOS_UPDATED, ARGOS_PACKAGES
 from . import CCCEDICT
 ## Functions
 def create_cedict(language_id="en"):
    """Creates a create_cedict dictionary object"""
    parser = CedictParser()
    parser.read_file(CCCEDICT)
    entries = parser.parse()
    dictionary = {}
    for entry in entries:
        if entry.simplified not in dictionary:
            dictionary[entry.simplified] = [entry]
        else:
            dictionary[entry.simplified].append(entry)
    return dictionary
 def create_translator(from_code, to_code):
    """Download and install Argos Translate package"""
    if not ARGOS_UPDATED:
        argostranslate.package.update_package_index()
        ARGOS_PACKAGES = argostranslate.package.get_available_packages()
        ARGOS_UPDATED = True
    package_to_install = next(
        filter(lambda x: x.from_code == CN and x.to_code == EN, ARGOS_PACKAGES)
    )
    argostranslate.package.install_from_path(package_to_install.download())
 def create_tts():
    # Automatically detect the best available device
    if torch.cuda.is_available():
        device = "cuda"
    elif torch.backends.mps.is_available():
        device = "mps"
    else:
        device = "cpu"
    tts = ChatterboxMultilingualTTS.from_pretrained(device=device, t3_model="v3")
    return tts
--- a/src/anki_hsk_creator/about.py
+++ b/src/anki_hsk_creator/about.py
@@ -1,4 +1,5 @@
 """about.py"""
 # SPDX-FileCopyrightText: 2026-present Wolfang Torres <wolfang.torres@gmail.com>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
-__version__ = "0.0.1"
+__version__ = "0.1.0"
--- a/src/anki_hsk_creator/init.py
+++ b/src/anki_hsk_creator/init.py
@@ -0,0 +1,20 @@
 """anki_hsk_creator"""
 # Standard Library
 import os
 from pathlib import Path
 # Pip
 from dotenv import load_dotenv
 load_dotenv()
 # Globals
 # Get an HF_TOKEN from huggingface for TTS generation
 HF_TOKEN = os.environ.get("HF_TOKEN")
 # Path
 default_path = Path.home() / "anki-hsk-creator-data"
 DATA_FOLDER = Path(os.environ.get("DATA_FOLDER", default_path))
 DATA_FOLDER.mkdir(exist_ok=True, parents=True)
--- a/src/anki_hsk_creator/main.py
+++ b/src/anki_hsk_creator/main.py
@@ -0,0 +1,52 @@
 """__main__.py"""
 # Standard Library
 from pathlib import Path
 # Local
 from .api import list_input_files, process_a_file, select_file
 from .constants import LANGUAGES
 def cli_select_files():
    """Loops until it finds a valid input_file"""
    print("Select data file:")
    in_file = None
    level = Path()
    while not in_file:
        files = list_input_files(level)
        for n, file in enumerate(files):
            print(f"{n+1} - {file}")
        s = None
        while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
            s = input(f"Please select the file [1-{len(files)}]: ")
        selected = files[int(s) - 1]
        if selected.is_file():
            in_file = selected
        else:
            level = selected
    input_file = select_file(in_file)
    return input_file
 def cli_select_language():
    """Selects a language for the trasnlatatio"""
    print("Select a language:")
    for language_id, language in LANGUAGES.language_names.items():
        print(f"{language_id} - {language}")
    s = None
    while not s or s not in LANGUAGES.available_languages:
        s = input(f"Please select the language: ({ LANGUAGES.available_languages})")
    return s
 def main():
    """CLI interface for the module"""
    while True:
        input_file = cli_select_files()
        language_id = cli_select_language()
        process_a_file(input_file, language_id)
 if __name__ == "__main__":
    main()
--- a/src/anki_hsk_creator/anki_generation.py
+++ b/src/anki_hsk_creator/anki_generation.py
@@ -0,0 +1,159 @@
 """anki_generation.py
 Produces anki output
 """
 # Standard Library
 import random
 # Pip
 from genanki import Deck, Model, Note, Package
 # Local
 from .utility import ProcessFile, TranslationResult
 # from pinyin_tone_converter.pinyin_tone_converter import PinyinToneConverter
 # Constants
 CSS = """
 .card {
 font-family: arial;
 font-size: 20px;
 text-align: center;
 color: black;
 background-color: white;
 }
 .simple {
 font-family: Arial;
 font-size: 100px;
 }
 .trad {
 font-family: Arial;
 font-size: 75px;
 }
 """
 # Models
 PHRASE_MODEL = Model(
    2076166425,
    "Phrase Model",
    fields=[
        {"name": "Translated"},
        {"name": "Phrase"},
        {"name": "Audio"},
    ],
    templates=[
        {
            "name": "Card 1",
            "qfmt": "{{Translated}}<br>{{Audio}}",
            "afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
        },
        {
            "name": "Card 2",
            "qfmt": "{{Phrase}}<br>{{Audio}}",
            "afmt": '{{FrontSide}}<hr id="answer">{{Translated}}',
        },
        {
            "name": "Card 3",
            "qfmt": "{{Audio}}",
            "afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
        },
    ],
    css=CSS,
 )
 HSK_MODEL = Model(
    1708536519,
    "HSK Model",
    fields=[
        {"name": "English"},
        {"name": "Pinyin"},
        {"name": "Simplified"},
        {"name": "Traditional"},
        {"name": "Audio"},
    ],
    templates=[
        {
            "name": "Card 1",
            "qfmt": "<strong>{{Pinyin}}</strong><br>{{English}}<br>{{Audio}}",
            "afmt": (
                "{{FrontSide}}<hr id='answer''><div class='simple'>{{Simplified}}</div>"
                "<br><div class='trad'>{{Traditional}}</div>"
            ),
        },
        {
            "name": "Card 2",
            "qfmt": "<div class='simple'>{{Simplified}}</div><br><div class='trad'>"
            "{{Traditional}}</div>",
            "afmt": (
                "{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>"
                "<br>{{English}}<br>{{Audio}}"
            ),
        },
        {
            "name": "Card 3",
            "qfmt": "{{Audio}}",
            "afmt": (
                "{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>"
                "<br><div class='simple'>{{Simplified}}</div>"
                "<br><div class='trad'>{{Traditional}}</div>"
            ),
        },
    ],
    css=CSS,
 )
 # Proccess
 # def output_anki_dictionary(out_file, results):
 #     """Creates an anki file from a dictionary results"""
 #     final_file = out_file.parent / f"{out_file.stem}.apkg"
 #     deck_name = "::".join(out_file.relative_to(OUTPUT).parts[:-1] + (out_file.stem,))
 #     deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
 #     package = Package(deck)
 #     audios = []
 #     for entry, audio in results:
 #         note = Note(
 #             model=HSK_MODEL,
 #             fields=[
 #                 "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)),
 #                 PinyinToneConverter().convert_text(entry.pinyin),
 #                 entry.simplified,
 #                 entry.traditional,
 #                 f"[sound:{audio.name}]",
 #             ],
 #         )
 #         audios.append(audio)
 #         deck.add_note(note)
 #     package.media_files = audios
 #     package.write_to_file(final_file)
 def output_anki_phrase(process_file: ProcessFile, results: list[TranslationResult]):
    """Creates an anki file from a phrases results"""
    final_file = process_file.output_name.with_suffix(".apkg")
    deck_name = "::".join(
        process_file.input_file.parts[:-1] + (process_file.input_fil.stem,)
    )
    deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
    package = Package(deck)
    audios = []
    for result in results:
        note = Note(
            model=PHRASE_MODEL,
            fields=[
                result.translated,
                result.line,
                f"[sound:{result.audio_path.name}]",
            ],
        )
        deck.add_note(note)
        audios.append(result.audio_path)
    package.media_files = audios
    package.write_to_file(final_file)
--- a/src/anki_hsk_creator/api.py
+++ b/src/anki_hsk_creator/api.py
@@ -0,0 +1,72 @@
 """api.py
 Interface for managuing and procesing files
 """
 # Standard Library
 from pathlib import Path
 # Local
 from . import DATA_FOLDER
 from .anki_generation import output_anki_phrase
 from .constants import DICT_TYPE, INPUT, LANGUAGES, PHRASES_TYPE
 from .proccessor import translator_process
 from .utility import TRANS, TTS, ProcessFile
 # interface
 def get_data_folder() -> Path:
    """Utility function, return the data folder"""
    return DATA_FOLDER
 def list_input_files(search_path: Path = Path()) -> list[Path]:
    """Return a list of files relative to the INPUT path"""
    level = INPUT / search_path
    return [path.relative_to(INPUT) for path in level.glob("*")]
 def select_file(file_path: Path) -> ProcessFile:
    """Given a relative path from `list_input_files`, return a ProcessFile"""
    if (INPUT / file_path).is_file():
        return ProcessFile(file_path)
    else:
        raise ValueError(f"{file_path} is not a file")
 def create_input_file(
    name: str, file_type: str, text: str, sub_folder: Path = Path()
 ) -> ProcessFile:
    """Creates an input file, with a name and a type form the available ones,
    writes a text to it, if a sub_folder is given,
    it is created and the file placed inside.
    returns the relative path for future processing
    valid file_types: ".phrases", ".dictionary"
    """
    if file_type not in (PHRASES_TYPE, DICT_TYPE):
        raise ValueError(f"file_type {file_type} not in {(PHRASES_TYPE, DICT_TYPE)}")
    filename = f"{name}{file_type}.txt"
    relative = sub_folder / filename
    # write file
    file_path = INPUT / relative
    file_path.parent.mkdir(exist_ok=True, parents=True)
    file_path.write_text(text, encoding="utf8")
    # create process_file for future
    process_file = ProcessFile(relative)
    return process_file
 def process_a_file(process_file: ProcessFile, language_id: str):
    """From a input_file, a language and an output type, process a file"""
    process_file.language_id = language_id
    if PHRASES_TYPE in process_file.input_file.suffix:
        TTS.create_tts()
        TRANS.create_translator(LANGUAGES.CN, language_id)
        with process_file.absolute_input_file.open("r") as file:
            text_lines = [line.strip() for line in file.readlines()]
        results = translator_process(text_lines, process_file, language_id)
        output_anki_phrase(process_file, results)
    elif DICT_TYPE in process_file.input_file.suffix:
        print("not implemented")
--- a/src/anki_hsk_creator/cedict_ts.u8
+++ b/src/anki_hsk_creator/cedict_ts.u8
--- a/src/anki_hsk_creator/constants.py
+++ b/src/anki_hsk_creator/constants.py
@@ -0,0 +1,51 @@
 """constants.py"""
 # Standard Library
 import importlib.resources
 # Local
 from . import DATA_FOLDER
 # Resources
 CCCEDICT_PATH = importlib.resources.files("anki-hsk-creator").joinpath("cedict_ts.u8")
 # Data folder structure
 INPUT = DATA_FOLDER / "input"
 INPUT.mkdir(exist_ok=True, parents=True)
 OUTPUT = DATA_FOLDER / "output"
 OUTPUT.mkdir(exist_ok=True, parents=True)
 RESOURCES = DATA_FOLDER / "resources"
 RESOURCES.mkdir(exist_ok=True, parents=True)
 # File Types
 PHRASES_TYPE = ".phrases"
 DICT_TYPE = ".dictionary"
 class LANGUAGES:
    """Available laguages for translation"""
    CN = "zh"
    EN = "en"
    ES = "es"
    FR = "fr"
    RU = "ru"
    TR = "tr"
    TH = "th"
    @property
    def available_languages(self) -> tuple:
        """Available laguages for translation"""
        return (self.EN, self.ES, self.FR, self.RU, self.TR, self.TH)
    @property
    def language_names(self) -> dict:
        """Gets the name of a language code"""
        return {
            self.EN: "English",
            self.ES: "Spanish",
            self.FR: "French",
            self.RU: "Russian",
            self.TR: "Turkish",
            self.TH: "Thai",
        }
--- a/src/anki_hsk_creator/proccessor.py
+++ b/src/anki_hsk_creator/proccessor.py
@@ -0,0 +1,96 @@
 """processor.py"""
 # Pip
 import argostranslate.translate
 import torchaudio
 # Local
 from .constants import LANGUAGES
 from .utility import TTS, ProcessFile, TranslationResult  # , CCCEDICT
 # Results Classes
 def translator_process(
    text_lines: list[str],
    process_file: ProcessFile,
    language_id: str,
 ) -> list[TranslationResult]:
    """Process for phases or sentence translation"""
    results = []
    for n, line in enumerate(text_lines):
        line = line.strip()
        audio_path = process_file.resources / f"N{n::03.0n}.wav"
        if not audio_path.exists():
            audio = TTS.MODEL.generate(f"{line}。", language_id=LANGUAGES.CN)
            torchaudio.save(audio_path, audio, TTS.MODEL.sr)
        translated = argostranslate.translate.translate(line, LANGUAGES.CN, language_id)
        results.append(TranslationResult(language_id, translated, line, audio_path))
    return results
 # def dictionary_process(dictionary, tts, in_file, resources):
 #     """Process dictionary files"""
 #     words_list = in_file.open(encoding="utf8").read().strip().split("\n")
 #     results = []
 #     try:
 #         with in_file.open("w", encoding="utf8") as input_file:
 #             for words in words_list:
 #                 word = words.split()[0]
 #                 pinyin = " ".join(words.split()[1:]) if len(words.split()) > 1 else None
 #                 if v := dictionary.get(word):
 #                     if len(v) > 1:
 #                         print(f"\nWARNING: {word} has multiple meanings:")
 #                         if pinyin and pinyin != "ERROR":
 #                             ml = list(filter(lambda x: x.pinyin == pinyin, v))
 #                         else:
 #                             ml = v
 #                         if len(ml) > 1:
 #                             for n, w in enumerate(ml):
 #                                 print(f"{n+1} - {w}")
 #                                 for m in w.meanings:
 #                                     print(f"\t{m}")
 #                             s = None
 #                             while (
 #                                 not s
 #                                 or not s.isnumeric()
 #                                 or not (1 <= int(s) <= len(v))
 #                             ):
 #                                 s = input(
 #                                     f"Please select the correct word [1-{len(v)}]: "
 #                                 )
 #                             v = v[int(s) - 1]
 #                         else:
 #                             v = ml[0]
 #                     else:
 #                         v = v[0]
 #                     audio_path = resources / f"{word}.wav"
 #                     if not audio_path.exists():
 #                         audio = tts.generate(f"{word}。", language_id="zh")
 #                         torchaudio.save(audio_path, audio, tts.sr)
 #                     input_file.write(f"{word}\t{v.pinyin}\n")
 #                     results.append((v, audio_path))
 #                 else:
 #                     print("============================================")
 #                     print(f"===================>ERROR: {word} not found")
 #                     print("============================================")
 #                     input_file.write(f"{word}\tERROR\n")
 #     except Exception:
 #         with in_file.open("w", encoding="utf8") as input_file:
 #             input_file.write("\n".join(words_list))
 #     return results
 # def output_tsv(out_file, results):
 #     """writes the output as a tsv file"""
 #     final_file = out_file.parent / f"{out_file.stem}.tsv"
 #     with final_file.open("w", encoding="utf8", newline="") as csvfile:
 #         writer = csv.writer(csvfile, delimiter="\t", quotechar='"')
 #         for entry in results:
 #             writer.writerow(
 #                 [
 #                     "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)),
 #                     PinyinToneConverter().convert_text(entry.pinyin),
 #                     entry.simplified,
 #                     entry.traditional,
 #                 ]
 #             )
--- a/src/anki_hsk_creator/utility.py
+++ b/src/anki_hsk_creator/utility.py
@@ -0,0 +1,154 @@
 """utility.py
 Static clasess and functions for general use
 """
 # Standard Library
 from pathlib import Path
 # Pip
 import argostranslate.package
 import argostranslate.translate
 import torch
 from cedict_utils.cedict import CedictParser
 from chatterbox.mtl_tts import ChatterboxMultilingualTTS
 # Local
 from .constants import CCCEDICT_PATH, INPUT, LANGUAGES, OUTPUT, RESOURCES
 # Static Clases
 class TRANS:
    """Static Class for Argos translate"""
    UPDATED = False
    PACKAGES = None
    @staticmethod
    def create_translator(from_code, to_code):
        """Download and install Argos Translate package"""
        if not TRANS.UPDATED:
            argostranslate.package.update_package_index()
            TRANS.PACKAGES = argostranslate.package.get_available_packages()
            TRANS.UPDATED = True
        package_to_install = next(
            filter(
                lambda x: x.from_code == from_code and x.to_code == to_code,
                TRANS.PACKAGES,
            )
        )
        argostranslate.package.install_from_path(package_to_install.download())
 class CCCEDICT:
    """Static Class for the CCCEDIT dictionary"""
    PARSER = None
    ENTRIES = []
    DICTIONARY_LIST = {}
    @staticmethod
    def create_cedict(language_id=LANGUAGES.EN):
        """Creates a create_cedict dictionary object"""
        if not CCCEDICT.PARSER:
            CCCEDICT.PARSER = CedictParser()
            CCCEDICT.PARSER.read_file(CCCEDICT_PATH)
            CCCEDICT.ENTRIES = CCCEDICT.PARSER.parse()
        if language_id not in CCCEDICT.DICTIONARY_LIST:
            dictionary = {}
            for entry in CCCEDICT.ENTRIES:
                if language_id != LANGUAGES.EN:
                    TRANS.create_translator(LANGUAGES.EN, language_id)
                    entry = argostranslate.translate.translate(
                        entry, LANGUAGES.EN, language_id
                    )
                if entry.simplified not in dictionary:
                    dictionary[entry.simplified] = [entry]
                else:
                    dictionary[entry.simplified].append(entry)
            CCCEDICT.DICTIONARY_LIST[language_id] = dictionary
        else:
            dictionary = CCCEDICT.DICTIONARY_LIST[language_id]
        return dictionary
 class TTS:
    """Static class for the the TTS engine"""
    MODEL = None
    DEVICE = None
    @staticmethod
    def create_tts():
        """Creates a TTS engine"""
        if TTS.DEVICE is None:
            # Automatically detect the best available device
            if torch.cuda.is_available():
                TTS.DEVICE = "cuda"
            elif torch.backends.mps.is_available():
                TTS.DEVICE = "mps"
            else:
                TTS.DEVICE = "cpu"
        if TTS.MODEL is None:
            TTS.MODEL = ChatterboxMultilingualTTS.from_pretrained(
                device=TTS.DEVICE, t3_model="v3"
            )
 # Clases
 class ProcessFile:
    """Class that represents a file to processs
    diferent input files has direfent process_files depending on language
    """
    def __init__(self, input_file: Path, language_id: str = None):
        self.input_file = input_file
        self._language_id = language_id
        # process file type
        self.out_folder = OUTPUT / input_file.parent
        self.out_folder.mkdir(parents=True, exist_ok=True)
        resources = RESOURCES / input_file
        self.resources = resources.parent / resources.stem
        self.resources.mkdir(parents=True, exist_ok=True)
    @property
    def absolute_input_file(self):
        """Absolute input file"""
        return INPUT / self.input_file
    @property
    def language_id(self):
        """language for this trasnlation process"""
        return self._language_id
    @language_id.setter
    def language_id(self, value):
        self._language_id = value
    @property
    def output_name(self):
        """Posible name for the output file, still missing the filetype"""
        if self.language_id is None:
            raise ValueError("Not a valid language selected")
        return self.input_file.parent / f"{self.input_file.stem}.{self.language_id})."
 class TranslationResult:
    """Result of a translated process"""
    def __init__(
        self,
        language_id: str,
        translated: str,
        line: str,
        audio_path: Path,
    ):
        self.language_id = language_id
        self.translated = translated
        self.line = line
        self.audio_path = audio_path