fix bug with dictionary only translated on demand

This commit is contained in:
Wolfang Torres
2026-06-12 20:58:53 +08:00
parent f9fc887d05
commit dde819f1e6
5 changed files with 26 additions and 20 deletions

View File

@@ -3,4 +3,4 @@
# SPDX-FileCopyrightText: 2026-present Wolfang Torres <wolfang.torres@gmail.com>
#
# SPDX-License-Identifier: GPL-3.0-or-later
__version__ = "0.1.2"
__version__ = "0.1.3"

View File

@@ -81,7 +81,7 @@ HSK_MODEL = Model(
"<strong>{{Pinyin}}</strong>"
"<br>{{Translated}}"
"<br>{{Audio}}"
"<br>Pinyin: {{type:Simplified}}"
"<br>Simplified: {{type:Simplified}}"
),
"afmt": (
"{{FrontSide}}<hr id='answer''><div class='simple'>{{Simplified}}</div>"
@@ -134,7 +134,8 @@ def output_anki_dictionary(process_file: ProcessFile, results: list[DictionaryRe
note = Note(
model=HSK_MODEL,
fields=[
"\n ".join(f"{n+1}. {m}" for n, m in enumerate(result.meanings)),
# "\n ".join(f"{n+1}. {m}" for n, m in enumerate(result.meanings)),
result.meaning,
PinyinToneConverter().convert_text(result.pinyin),
result.simplified,
result.traditional,

View File

@@ -74,7 +74,7 @@ def process_a_file(process_file: ProcessFile, language_id: str):
results = translator_process(text_lines, process_file)
output_anki_phrase(process_file, results)
elif DICT_TYPE in process_file.input_file.suffixes:
if process_file.dictionary_resource_file.exists():
if not process_file.dictionary_resource_file.is_file():
CCCEDICT.create_cedict(language_id)
with process_file.absolute_input_file.open("r", encoding="utf8") as file:
words_list = [word.strip() for word in file.readlines()]

View File

@@ -14,6 +14,7 @@ from .utility import CCCEDICT, TTS, DictionaryResult, ProcessFile, TranslationRe
# Constants
FIELDNAMES = ["simplified", "traditional", "pinyin", "meaning"]
DIALECT = "excel-tab"
# Results Classes
@@ -44,8 +45,8 @@ def dictionary_pre_process(words_list: list[str], process_file: ProcessFile):
with process_file.dictionary_resource_file.open(
"w", encoding="utf8", newline=""
) as resource_file:
tsv_writer = csv.writer(
resource_file, dialect="excel-tab", fieldnames=FIELDNAMES
tsv_writer = csv.DictWriter(
resource_file, dialect=DIALECT, fieldnames=FIELDNAMES
)
tsv_writer.writeheader()
for words in words_list:
@@ -84,15 +85,19 @@ def dictionary_process(process_file: ProcessFile) -> list[DictionaryResult]:
"""Process a dictionary_resource_file into a final result"""
results = []
with process_file.dictionary_resource_file.open(
"w", encoding="utf8", newline=""
"r", encoding="utf8", newline=""
) as resource_file:
reader = csv.DictReader(resource_file)
reader = csv.DictReader(resource_file, dialect=DIALECT)
for line in reader:
audio_path = process_file.resources / f"{line['pinyin']}.wav"
if not audio_path.exists():
audio = TTS.MODEL.generate(f"{line['simplified']}", language_id="zh")
audio = TTS.MODEL.generate(
f"{line['simplified']}", language_id=LANGUAGES.CN
)
torchaudio.save(audio_path, audio, TTS.MODEL.sr)
result = DictionaryResult(**line, audio_path=audio_path)
result = DictionaryResult(
**line, audio_path=audio_path, language_id=process_file.language_id
)
results.append(result)
return results

View File

@@ -56,14 +56,6 @@ class TranslatedEntry:
self.entry = entry
self.language_id = language_id
self._translated_meanings = []
for meaning in entry.meanings:
if language_id != LANGUAGES.EN:
trans_meaning = argostranslate.translate.translate(
meaning, LANGUAGES.EN, language_id
)
else:
trans_meaning = meaning
self._translated_meanings.append(trans_meaning)
@property
def simplified(self):
@@ -83,6 +75,14 @@ class TranslatedEntry:
@property
def meanings(self):
"""Entry translated meaning list"""
for meaning in self.entry.meanings:
if self.language_id != LANGUAGES.EN:
trans_meaning = argostranslate.translate.translate(
meaning, LANGUAGES.EN, self.language_id
)
else:
trans_meaning = meaning
self._translated_meanings.append(trans_meaning)
return self._translated_meanings
@@ -210,12 +210,12 @@ class DictionaryResult:
simplified: str,
traditional: str,
pinyin: str,
meanings: str,
meaning: str,
audio_path: Path,
):
self.language_id = language_id
self.simplified = simplified
self.traditional = traditional
self.pinyin = pinyin
self.meanings = meanings
self.meaning = meaning
self.audio_path = audio_path