update format for anki,
upgrade trasnlation package search, fix small bugs
This commit is contained in:
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@@ -9,7 +9,7 @@
|
||||
"name": "Python Debugger: Module",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "anki-hsk-creator"
|
||||
"module": "anki_hsk_creator"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -14,7 +14,9 @@ creates anki hsk decks from a list of words
|
||||
## Installation
|
||||
|
||||
```console
|
||||
pip install anki-hsk-creator
|
||||
git clone https://github.com/resemble-ai/chatterbox
|
||||
git clone https://gitea.wolfang.info.ve/wolfang/anki-hsk-creator
|
||||
git clone https://gitea.wolfang.info.ve/wolfang/anki-hsk-creator-data
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
@@ -35,6 +35,7 @@ dependencies = [
|
||||
"torchaudio",
|
||||
"torchcodec",
|
||||
"python-dotenv",
|
||||
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""about.py"""
|
||||
|
||||
# SPDX-FileCopyrightText: 2026-present Wolfang Torres <wolfang.torres@gmail.com>
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
__version__ = "0.1.0"
|
||||
__version__ = "0.1.1"
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
from pathlib import Path
|
||||
|
||||
# Local
|
||||
from .api import list_input_files, process_a_file, select_file
|
||||
from .api import is_file, list_input_files, process_a_file, select_file
|
||||
from .constants import LANGUAGES
|
||||
|
||||
|
||||
@@ -18,10 +18,10 @@ def cli_select_files():
|
||||
for n, file in enumerate(files):
|
||||
print(f"{n+1} - {file}")
|
||||
s = None
|
||||
while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
|
||||
while not s or not s.isnumeric() or not 1 <= int(s) <= len(files):
|
||||
s = input(f"Please select the file [1-{len(files)}]: ")
|
||||
selected = files[int(s) - 1]
|
||||
if selected.is_file():
|
||||
if is_file(selected):
|
||||
in_file = selected
|
||||
else:
|
||||
level = selected
|
||||
@@ -32,11 +32,11 @@ def cli_select_files():
|
||||
def cli_select_language():
|
||||
"""Selects a language for the trasnlatatio"""
|
||||
print("Select a language:")
|
||||
for language_id, language in LANGUAGES.language_names.items():
|
||||
for language_id, language in LANGUAGES.LanguageNames.items():
|
||||
print(f"{language_id} - {language}")
|
||||
s = None
|
||||
while not s or s not in LANGUAGES.available_languages:
|
||||
s = input(f"Please select the language: ({ LANGUAGES.available_languages})")
|
||||
while not s or s not in LANGUAGES.AvailableLanguages:
|
||||
s = input(f"Please select the language {LANGUAGES.AvailableLanguages}: ")
|
||||
return s
|
||||
|
||||
|
||||
@@ -45,6 +45,7 @@ def main():
|
||||
while True:
|
||||
input_file = cli_select_files()
|
||||
language_id = cli_select_language()
|
||||
print(f"processing file {input_file.input_file} with language {language_id}")
|
||||
process_a_file(input_file, language_id)
|
||||
|
||||
|
||||
|
||||
@@ -48,17 +48,17 @@ PHRASE_MODEL = Model(
|
||||
templates=[
|
||||
{
|
||||
"name": "Card 1",
|
||||
"qfmt": "{{Translated}}<br>{{Audio}}",
|
||||
"qfmt": "{{Translated}}<br>{{Audio}}<br>{{type:Phrase}}",
|
||||
"afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
|
||||
},
|
||||
{
|
||||
"name": "Card 2",
|
||||
"qfmt": "{{Phrase}}<br>{{Audio}}",
|
||||
"qfmt": "{{Phrase}}<br>{{Audio}}<br>{{type:Translated}}",
|
||||
"afmt": '{{FrontSide}}<hr id="answer">{{Translated}}',
|
||||
},
|
||||
{
|
||||
"name": "Card 3",
|
||||
"qfmt": "{{Audio}}",
|
||||
"qfmt": "{{Audio}}<br>{{type:Phrase}}",
|
||||
"afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
|
||||
},
|
||||
],
|
||||
@@ -70,7 +70,7 @@ HSK_MODEL = Model(
|
||||
1708536519,
|
||||
"HSK Model",
|
||||
fields=[
|
||||
{"name": "English"},
|
||||
{"name": "Translated"},
|
||||
{"name": "Pinyin"},
|
||||
{"name": "Simplified"},
|
||||
{"name": "Traditional"},
|
||||
@@ -79,7 +79,12 @@ HSK_MODEL = Model(
|
||||
templates=[
|
||||
{
|
||||
"name": "Card 1",
|
||||
"qfmt": "<strong>{{Pinyin}}</strong><br>{{English}}<br>{{Audio}}",
|
||||
"qfmt": (
|
||||
"<strong>{{Pinyin}}</strong>"
|
||||
"<br>{{Translated}}"
|
||||
"<br>{{Audio}}"
|
||||
"<br>Pinyin: {{type:Simplified}}"
|
||||
),
|
||||
"afmt": (
|
||||
"{{FrontSide}}<hr id='answer''><div class='simple'>{{Simplified}}</div>"
|
||||
"<br><div class='trad'>{{Traditional}}</div>"
|
||||
@@ -87,16 +92,24 @@ HSK_MODEL = Model(
|
||||
},
|
||||
{
|
||||
"name": "Card 2",
|
||||
"qfmt": "<div class='simple'>{{Simplified}}</div><br><div class='trad'>"
|
||||
"{{Traditional}}</div>",
|
||||
"qfmt": (
|
||||
"<div class='simple'>{{Simplified}}</div>"
|
||||
"<br><div class='trad'>{{Traditional}}</div>"
|
||||
"<br>Pinyin: {{type:Pinyin}}"
|
||||
"<br>Translated: {{type:Translated}}"
|
||||
),
|
||||
"afmt": (
|
||||
"{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>"
|
||||
"<br>{{English}}<br>{{Audio}}"
|
||||
"<br>{{Translated}}<br>{{Audio}}"
|
||||
),
|
||||
},
|
||||
{
|
||||
"name": "Card 3",
|
||||
"qfmt": "{{Audio}}",
|
||||
"qfmt": (
|
||||
"{{Audio}}"
|
||||
"<br>Pinyin: {{type:Pinyin}}"
|
||||
"<br>Simplified: {{type:Simplified}}"
|
||||
),
|
||||
"afmt": (
|
||||
"{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>"
|
||||
"<br><div class='simple'>{{Simplified}}</div>"
|
||||
@@ -139,7 +152,7 @@ def output_anki_phrase(process_file: ProcessFile, results: list[TranslationResul
|
||||
|
||||
final_file = process_file.output_name.with_suffix(".apkg")
|
||||
deck_name = "::".join(
|
||||
process_file.input_file.parts[:-1] + (process_file.input_fil.stem,)
|
||||
process_file.input_file.parts[:-1] + (process_file.input_file.stem,)
|
||||
)
|
||||
deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
|
||||
package = Package(deck)
|
||||
|
||||
@@ -27,6 +27,11 @@ def list_input_files(search_path: Path = Path()) -> list[Path]:
|
||||
return [path.relative_to(INPUT) for path in level.glob("*")]
|
||||
|
||||
|
||||
def is_file(file_path: Path) -> bool:
|
||||
"""Check if a relative path is a file"""
|
||||
return (INPUT / file_path).is_file()
|
||||
|
||||
|
||||
def select_file(file_path: Path) -> ProcessFile:
|
||||
"""Given a relative path from `list_input_files`, return a ProcessFile"""
|
||||
if (INPUT / file_path).is_file():
|
||||
@@ -61,12 +66,14 @@ def create_input_file(
|
||||
def process_a_file(process_file: ProcessFile, language_id: str):
|
||||
"""From a input_file, a language and an output type, process a file"""
|
||||
process_file.language_id = language_id
|
||||
if PHRASES_TYPE in process_file.input_file.suffix:
|
||||
if PHRASES_TYPE in process_file.input_file.suffixes:
|
||||
TTS.create_tts()
|
||||
TRANS.create_translator(LANGUAGES.CN, language_id)
|
||||
with process_file.absolute_input_file.open("r") as file:
|
||||
with process_file.absolute_input_file.open("r", encoding="utf8") as file:
|
||||
text_lines = [line.strip() for line in file.readlines()]
|
||||
results = translator_process(text_lines, process_file, language_id)
|
||||
output_anki_phrase(process_file, results)
|
||||
elif DICT_TYPE in process_file.input_file.suffix:
|
||||
elif DICT_TYPE in process_file.input_file.suffixes:
|
||||
print("not implemented")
|
||||
else:
|
||||
print("no identified")
|
||||
|
||||
@@ -7,7 +7,7 @@ import importlib.resources
|
||||
from . import DATA_FOLDER
|
||||
|
||||
# Resources
|
||||
CCCEDICT_PATH = importlib.resources.files("anki-hsk-creator").joinpath("cedict_ts.u8")
|
||||
CCCEDICT_PATH = importlib.resources.files("anki_hsk_creator").joinpath("cedict_ts.u8")
|
||||
|
||||
# Data folder structure
|
||||
INPUT = DATA_FOLDER / "input"
|
||||
@@ -33,19 +33,12 @@ class LANGUAGES:
|
||||
TR = "tr"
|
||||
TH = "th"
|
||||
|
||||
@property
|
||||
def available_languages(self) -> tuple:
|
||||
"""Available laguages for translation"""
|
||||
return (self.EN, self.ES, self.FR, self.RU, self.TR, self.TH)
|
||||
|
||||
@property
|
||||
def language_names(self) -> dict:
|
||||
"""Gets the name of a language code"""
|
||||
return {
|
||||
self.EN: "English",
|
||||
self.ES: "Spanish",
|
||||
self.FR: "French",
|
||||
self.RU: "Russian",
|
||||
self.TR: "Turkish",
|
||||
self.TH: "Thai",
|
||||
AvailableLanguages = (EN, ES, FR, RU, TR, TH)
|
||||
LanguageNames = {
|
||||
EN: "English",
|
||||
ES: "Spanish",
|
||||
FR: "French",
|
||||
RU: "Russian",
|
||||
TR: "Turkish",
|
||||
TH: "Thai",
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ def translator_process(
|
||||
results = []
|
||||
for n, line in enumerate(text_lines):
|
||||
line = line.strip()
|
||||
audio_path = process_file.resources / f"N{n::03.0n}.wav"
|
||||
audio_path = process_file.resources / f"N{n:03n}.wav"
|
||||
if not audio_path.exists():
|
||||
audio = TTS.MODEL.generate(f"{line}。", language_id=LANGUAGES.CN)
|
||||
torchaudio.save(audio_path, audio, TTS.MODEL.sr)
|
||||
|
||||
@@ -33,13 +33,20 @@ class TRANS:
|
||||
argostranslate.package.update_package_index()
|
||||
TRANS.PACKAGES = argostranslate.package.get_available_packages()
|
||||
TRANS.UPDATED = True
|
||||
package_to_install = next(
|
||||
filter(
|
||||
lambda x: x.from_code == from_code and x.to_code == to_code,
|
||||
packages = filter(
|
||||
lambda x: x.from_code == from_code or x.to_code == to_code,
|
||||
TRANS.PACKAGES,
|
||||
)
|
||||
)
|
||||
argostranslate.package.install_from_path(package_to_install.download())
|
||||
packages_to_install = []
|
||||
for in_package in packages:
|
||||
if in_package.from_code == from_code:
|
||||
for out_package in packages:
|
||||
if out_package.to_code == to_code:
|
||||
if in_package.to_code == out_package.from_code:
|
||||
packages_to_install.append(in_package)
|
||||
packages_to_install.append(out_package)
|
||||
for package in packages_to_install:
|
||||
argostranslate.package.install_from_path(package.download())
|
||||
|
||||
|
||||
class CCCEDICT:
|
||||
@@ -135,7 +142,7 @@ class ProcessFile:
|
||||
"""Posible name for the output file, still missing the filetype"""
|
||||
if self.language_id is None:
|
||||
raise ValueError("Not a valid language selected")
|
||||
return self.input_file.parent / f"{self.input_file.stem}.{self.language_id})."
|
||||
return self.out_folder / f"{self.input_file.stem}.{self.language_id}."
|
||||
|
||||
|
||||
class TranslationResult:
|
||||
|
||||
Reference in New Issue
Block a user