fix bug with dictionary only translated on demand

This commit is contained in:
Wolfang Torres
2026-06-12 20:58:53 +08:00
parent f9fc887d05
commit dde819f1e6
5 changed files with 26 additions and 20 deletions

View File

@@ -3,4 +3,4 @@
# SPDX-FileCopyrightText: 2026-present Wolfang Torres <wolfang.torres@gmail.com> # SPDX-FileCopyrightText: 2026-present Wolfang Torres <wolfang.torres@gmail.com>
# #
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
__version__ = "0.1.2" __version__ = "0.1.3"

View File

@@ -81,7 +81,7 @@ HSK_MODEL = Model(
"<strong>{{Pinyin}}</strong>" "<strong>{{Pinyin}}</strong>"
"<br>{{Translated}}" "<br>{{Translated}}"
"<br>{{Audio}}" "<br>{{Audio}}"
"<br>Pinyin: {{type:Simplified}}" "<br>Simplified: {{type:Simplified}}"
), ),
"afmt": ( "afmt": (
"{{FrontSide}}<hr id='answer''><div class='simple'>{{Simplified}}</div>" "{{FrontSide}}<hr id='answer''><div class='simple'>{{Simplified}}</div>"
@@ -134,7 +134,8 @@ def output_anki_dictionary(process_file: ProcessFile, results: list[DictionaryRe
note = Note( note = Note(
model=HSK_MODEL, model=HSK_MODEL,
fields=[ fields=[
"\n ".join(f"{n+1}. {m}" for n, m in enumerate(result.meanings)), # "\n ".join(f"{n+1}. {m}" for n, m in enumerate(result.meanings)),
result.meaning,
PinyinToneConverter().convert_text(result.pinyin), PinyinToneConverter().convert_text(result.pinyin),
result.simplified, result.simplified,
result.traditional, result.traditional,

View File

@@ -74,7 +74,7 @@ def process_a_file(process_file: ProcessFile, language_id: str):
results = translator_process(text_lines, process_file) results = translator_process(text_lines, process_file)
output_anki_phrase(process_file, results) output_anki_phrase(process_file, results)
elif DICT_TYPE in process_file.input_file.suffixes: elif DICT_TYPE in process_file.input_file.suffixes:
if process_file.dictionary_resource_file.exists(): if not process_file.dictionary_resource_file.is_file():
CCCEDICT.create_cedict(language_id) CCCEDICT.create_cedict(language_id)
with process_file.absolute_input_file.open("r", encoding="utf8") as file: with process_file.absolute_input_file.open("r", encoding="utf8") as file:
words_list = [word.strip() for word in file.readlines()] words_list = [word.strip() for word in file.readlines()]

View File

@@ -14,6 +14,7 @@ from .utility import CCCEDICT, TTS, DictionaryResult, ProcessFile, TranslationRe
# Constants # Constants
FIELDNAMES = ["simplified", "traditional", "pinyin", "meaning"] FIELDNAMES = ["simplified", "traditional", "pinyin", "meaning"]
DIALECT = "excel-tab"
# Results Classes # Results Classes
@@ -44,8 +45,8 @@ def dictionary_pre_process(words_list: list[str], process_file: ProcessFile):
with process_file.dictionary_resource_file.open( with process_file.dictionary_resource_file.open(
"w", encoding="utf8", newline="" "w", encoding="utf8", newline=""
) as resource_file: ) as resource_file:
tsv_writer = csv.writer( tsv_writer = csv.DictWriter(
resource_file, dialect="excel-tab", fieldnames=FIELDNAMES resource_file, dialect=DIALECT, fieldnames=FIELDNAMES
) )
tsv_writer.writeheader() tsv_writer.writeheader()
for words in words_list: for words in words_list:
@@ -84,15 +85,19 @@ def dictionary_process(process_file: ProcessFile) -> list[DictionaryResult]:
"""Process a dictionary_resource_file into a final result""" """Process a dictionary_resource_file into a final result"""
results = [] results = []
with process_file.dictionary_resource_file.open( with process_file.dictionary_resource_file.open(
"w", encoding="utf8", newline="" "r", encoding="utf8", newline=""
) as resource_file: ) as resource_file:
reader = csv.DictReader(resource_file) reader = csv.DictReader(resource_file, dialect=DIALECT)
for line in reader: for line in reader:
audio_path = process_file.resources / f"{line['pinyin']}.wav" audio_path = process_file.resources / f"{line['pinyin']}.wav"
if not audio_path.exists(): if not audio_path.exists():
audio = TTS.MODEL.generate(f"{line['simplified']}", language_id="zh") audio = TTS.MODEL.generate(
f"{line['simplified']}", language_id=LANGUAGES.CN
)
torchaudio.save(audio_path, audio, TTS.MODEL.sr) torchaudio.save(audio_path, audio, TTS.MODEL.sr)
result = DictionaryResult(**line, audio_path=audio_path) result = DictionaryResult(
**line, audio_path=audio_path, language_id=process_file.language_id
)
results.append(result) results.append(result)
return results return results

View File

@@ -56,14 +56,6 @@ class TranslatedEntry:
self.entry = entry self.entry = entry
self.language_id = language_id self.language_id = language_id
self._translated_meanings = [] self._translated_meanings = []
for meaning in entry.meanings:
if language_id != LANGUAGES.EN:
trans_meaning = argostranslate.translate.translate(
meaning, LANGUAGES.EN, language_id
)
else:
trans_meaning = meaning
self._translated_meanings.append(trans_meaning)
@property @property
def simplified(self): def simplified(self):
@@ -83,6 +75,14 @@ class TranslatedEntry:
@property @property
def meanings(self): def meanings(self):
"""Entry translated meaning list""" """Entry translated meaning list"""
for meaning in self.entry.meanings:
if self.language_id != LANGUAGES.EN:
trans_meaning = argostranslate.translate.translate(
meaning, LANGUAGES.EN, self.language_id
)
else:
trans_meaning = meaning
self._translated_meanings.append(trans_meaning)
return self._translated_meanings return self._translated_meanings
@@ -210,12 +210,12 @@ class DictionaryResult:
simplified: str, simplified: str,
traditional: str, traditional: str,
pinyin: str, pinyin: str,
meanings: str, meaning: str,
audio_path: Path, audio_path: Path,
): ):
self.language_id = language_id self.language_id = language_id
self.simplified = simplified self.simplified = simplified
self.traditional = traditional self.traditional = traditional
self.pinyin = pinyin self.pinyin = pinyin
self.meanings = meanings self.meaning = meaning
self.audio_path = audio_path self.audio_path = audio_path