update format for anki,

upgrade trasnlation package search,
fix small bugs
This commit is contained in:
Wolfang Torres
2026-06-12 00:43:55 +08:00
parent 21c6416cfd
commit eb4cc8e6e0
10 changed files with 70 additions and 45 deletions

2
.vscode/launch.json vendored
View File

@@ -9,7 +9,7 @@
"name": "Python Debugger: Module", "name": "Python Debugger: Module",
"type": "debugpy", "type": "debugpy",
"request": "launch", "request": "launch",
"module": "anki-hsk-creator" "module": "anki_hsk_creator"
} }
] ]
} }

View File

@@ -14,7 +14,9 @@ creates anki hsk decks from a list of words
## Installation ## Installation
```console ```console
pip install anki-hsk-creator git clone https://github.com/resemble-ai/chatterbox
git clone https://gitea.wolfang.info.ve/wolfang/anki-hsk-creator
git clone https://gitea.wolfang.info.ve/wolfang/anki-hsk-creator-data
``` ```
## License ## License

View File

@@ -35,6 +35,7 @@ dependencies = [
"torchaudio", "torchaudio",
"torchcodec", "torchcodec",
"python-dotenv", "python-dotenv",
] ]
[project.optional-dependencies] [project.optional-dependencies]

View File

@@ -1,5 +1,6 @@
"""about.py""" """about.py"""
# SPDX-FileCopyrightText: 2026-present Wolfang Torres <wolfang.torres@gmail.com> # SPDX-FileCopyrightText: 2026-present Wolfang Torres <wolfang.torres@gmail.com>
# #
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
__version__ = "0.1.0" __version__ = "0.1.1"

View File

@@ -4,7 +4,7 @@
from pathlib import Path from pathlib import Path
# Local # Local
from .api import list_input_files, process_a_file, select_file from .api import is_file, list_input_files, process_a_file, select_file
from .constants import LANGUAGES from .constants import LANGUAGES
@@ -18,10 +18,10 @@ def cli_select_files():
for n, file in enumerate(files): for n, file in enumerate(files):
print(f"{n+1} - {file}") print(f"{n+1} - {file}")
s = None s = None
while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)): while not s or not s.isnumeric() or not 1 <= int(s) <= len(files):
s = input(f"Please select the file [1-{len(files)}]: ") s = input(f"Please select the file [1-{len(files)}]: ")
selected = files[int(s) - 1] selected = files[int(s) - 1]
if selected.is_file(): if is_file(selected):
in_file = selected in_file = selected
else: else:
level = selected level = selected
@@ -32,11 +32,11 @@ def cli_select_files():
def cli_select_language(): def cli_select_language():
"""Selects a language for the trasnlatatio""" """Selects a language for the trasnlatatio"""
print("Select a language:") print("Select a language:")
for language_id, language in LANGUAGES.language_names.items(): for language_id, language in LANGUAGES.LanguageNames.items():
print(f"{language_id} - {language}") print(f"{language_id} - {language}")
s = None s = None
while not s or s not in LANGUAGES.available_languages: while not s or s not in LANGUAGES.AvailableLanguages:
s = input(f"Please select the language: ({ LANGUAGES.available_languages})") s = input(f"Please select the language {LANGUAGES.AvailableLanguages}: ")
return s return s
@@ -45,6 +45,7 @@ def main():
while True: while True:
input_file = cli_select_files() input_file = cli_select_files()
language_id = cli_select_language() language_id = cli_select_language()
print(f"processing file {input_file.input_file} with language {language_id}")
process_a_file(input_file, language_id) process_a_file(input_file, language_id)

View File

@@ -48,17 +48,17 @@ PHRASE_MODEL = Model(
templates=[ templates=[
{ {
"name": "Card 1", "name": "Card 1",
"qfmt": "{{Translated}}<br>{{Audio}}", "qfmt": "{{Translated}}<br>{{Audio}}<br>{{type:Phrase}}",
"afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}', "afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
}, },
{ {
"name": "Card 2", "name": "Card 2",
"qfmt": "{{Phrase}}<br>{{Audio}}", "qfmt": "{{Phrase}}<br>{{Audio}}<br>{{type:Translated}}",
"afmt": '{{FrontSide}}<hr id="answer">{{Translated}}', "afmt": '{{FrontSide}}<hr id="answer">{{Translated}}',
}, },
{ {
"name": "Card 3", "name": "Card 3",
"qfmt": "{{Audio}}", "qfmt": "{{Audio}}<br>{{type:Phrase}}",
"afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}', "afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
}, },
], ],
@@ -70,7 +70,7 @@ HSK_MODEL = Model(
1708536519, 1708536519,
"HSK Model", "HSK Model",
fields=[ fields=[
{"name": "English"}, {"name": "Translated"},
{"name": "Pinyin"}, {"name": "Pinyin"},
{"name": "Simplified"}, {"name": "Simplified"},
{"name": "Traditional"}, {"name": "Traditional"},
@@ -79,7 +79,12 @@ HSK_MODEL = Model(
templates=[ templates=[
{ {
"name": "Card 1", "name": "Card 1",
"qfmt": "<strong>{{Pinyin}}</strong><br>{{English}}<br>{{Audio}}", "qfmt": (
"<strong>{{Pinyin}}</strong>"
"<br>{{Translated}}"
"<br>{{Audio}}"
"<br>Pinyin: {{type:Simplified}}"
),
"afmt": ( "afmt": (
"{{FrontSide}}<hr id='answer''><div class='simple'>{{Simplified}}</div>" "{{FrontSide}}<hr id='answer''><div class='simple'>{{Simplified}}</div>"
"<br><div class='trad'>{{Traditional}}</div>" "<br><div class='trad'>{{Traditional}}</div>"
@@ -87,16 +92,24 @@ HSK_MODEL = Model(
}, },
{ {
"name": "Card 2", "name": "Card 2",
"qfmt": "<div class='simple'>{{Simplified}}</div><br><div class='trad'>" "qfmt": (
"{{Traditional}}</div>", "<div class='simple'>{{Simplified}}</div>"
"<br><div class='trad'>{{Traditional}}</div>"
"<br>Pinyin: {{type:Pinyin}}"
"<br>Translated: {{type:Translated}}"
),
"afmt": ( "afmt": (
"{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>" "{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>"
"<br>{{English}}<br>{{Audio}}" "<br>{{Translated}}<br>{{Audio}}"
), ),
}, },
{ {
"name": "Card 3", "name": "Card 3",
"qfmt": "{{Audio}}", "qfmt": (
"{{Audio}}"
"<br>Pinyin: {{type:Pinyin}}"
"<br>Simplified: {{type:Simplified}}"
),
"afmt": ( "afmt": (
"{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>" "{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>"
"<br><div class='simple'>{{Simplified}}</div>" "<br><div class='simple'>{{Simplified}}</div>"
@@ -139,7 +152,7 @@ def output_anki_phrase(process_file: ProcessFile, results: list[TranslationResul
final_file = process_file.output_name.with_suffix(".apkg") final_file = process_file.output_name.with_suffix(".apkg")
deck_name = "::".join( deck_name = "::".join(
process_file.input_file.parts[:-1] + (process_file.input_fil.stem,) process_file.input_file.parts[:-1] + (process_file.input_file.stem,)
) )
deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name) deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
package = Package(deck) package = Package(deck)

View File

@@ -27,6 +27,11 @@ def list_input_files(search_path: Path = Path()) -> list[Path]:
return [path.relative_to(INPUT) for path in level.glob("*")] return [path.relative_to(INPUT) for path in level.glob("*")]
def is_file(file_path: Path) -> bool:
"""Check if a relative path is a file"""
return (INPUT / file_path).is_file()
def select_file(file_path: Path) -> ProcessFile: def select_file(file_path: Path) -> ProcessFile:
"""Given a relative path from `list_input_files`, return a ProcessFile""" """Given a relative path from `list_input_files`, return a ProcessFile"""
if (INPUT / file_path).is_file(): if (INPUT / file_path).is_file():
@@ -61,12 +66,14 @@ def create_input_file(
def process_a_file(process_file: ProcessFile, language_id: str): def process_a_file(process_file: ProcessFile, language_id: str):
"""From a input_file, a language and an output type, process a file""" """From a input_file, a language and an output type, process a file"""
process_file.language_id = language_id process_file.language_id = language_id
if PHRASES_TYPE in process_file.input_file.suffix: if PHRASES_TYPE in process_file.input_file.suffixes:
TTS.create_tts() TTS.create_tts()
TRANS.create_translator(LANGUAGES.CN, language_id) TRANS.create_translator(LANGUAGES.CN, language_id)
with process_file.absolute_input_file.open("r") as file: with process_file.absolute_input_file.open("r", encoding="utf8") as file:
text_lines = [line.strip() for line in file.readlines()] text_lines = [line.strip() for line in file.readlines()]
results = translator_process(text_lines, process_file, language_id) results = translator_process(text_lines, process_file, language_id)
output_anki_phrase(process_file, results) output_anki_phrase(process_file, results)
elif DICT_TYPE in process_file.input_file.suffix: elif DICT_TYPE in process_file.input_file.suffixes:
print("not implemented") print("not implemented")
else:
print("no identified")

View File

@@ -7,7 +7,7 @@ import importlib.resources
from . import DATA_FOLDER from . import DATA_FOLDER
# Resources # Resources
CCCEDICT_PATH = importlib.resources.files("anki-hsk-creator").joinpath("cedict_ts.u8") CCCEDICT_PATH = importlib.resources.files("anki_hsk_creator").joinpath("cedict_ts.u8")
# Data folder structure # Data folder structure
INPUT = DATA_FOLDER / "input" INPUT = DATA_FOLDER / "input"
@@ -33,19 +33,12 @@ class LANGUAGES:
TR = "tr" TR = "tr"
TH = "th" TH = "th"
@property AvailableLanguages = (EN, ES, FR, RU, TR, TH)
def available_languages(self) -> tuple: LanguageNames = {
"""Available laguages for translation""" EN: "English",
return (self.EN, self.ES, self.FR, self.RU, self.TR, self.TH) ES: "Spanish",
FR: "French",
@property RU: "Russian",
def language_names(self) -> dict: TR: "Turkish",
"""Gets the name of a language code""" TH: "Thai",
return {
self.EN: "English",
self.ES: "Spanish",
self.FR: "French",
self.RU: "Russian",
self.TR: "Turkish",
self.TH: "Thai",
} }

View File

@@ -20,7 +20,7 @@ def translator_process(
results = [] results = []
for n, line in enumerate(text_lines): for n, line in enumerate(text_lines):
line = line.strip() line = line.strip()
audio_path = process_file.resources / f"N{n::03.0n}.wav" audio_path = process_file.resources / f"N{n:03n}.wav"
if not audio_path.exists(): if not audio_path.exists():
audio = TTS.MODEL.generate(f"{line}", language_id=LANGUAGES.CN) audio = TTS.MODEL.generate(f"{line}", language_id=LANGUAGES.CN)
torchaudio.save(audio_path, audio, TTS.MODEL.sr) torchaudio.save(audio_path, audio, TTS.MODEL.sr)

View File

@@ -33,13 +33,20 @@ class TRANS:
argostranslate.package.update_package_index() argostranslate.package.update_package_index()
TRANS.PACKAGES = argostranslate.package.get_available_packages() TRANS.PACKAGES = argostranslate.package.get_available_packages()
TRANS.UPDATED = True TRANS.UPDATED = True
package_to_install = next( packages = filter(
filter( lambda x: x.from_code == from_code or x.to_code == to_code,
lambda x: x.from_code == from_code and x.to_code == to_code,
TRANS.PACKAGES, TRANS.PACKAGES,
) )
) packages_to_install = []
argostranslate.package.install_from_path(package_to_install.download()) for in_package in packages:
if in_package.from_code == from_code:
for out_package in packages:
if out_package.to_code == to_code:
if in_package.to_code == out_package.from_code:
packages_to_install.append(in_package)
packages_to_install.append(out_package)
for package in packages_to_install:
argostranslate.package.install_from_path(package.download())
class CCCEDICT: class CCCEDICT:
@@ -135,7 +142,7 @@ class ProcessFile:
"""Posible name for the output file, still missing the filetype""" """Posible name for the output file, still missing the filetype"""
if self.language_id is None: if self.language_id is None:
raise ValueError("Not a valid language selected") raise ValueError("Not a valid language selected")
return self.input_file.parent / f"{self.input_file.stem}.{self.language_id})." return self.out_folder / f"{self.input_file.stem}.{self.language_id}."
class TranslationResult: class TranslationResult: