From dde819f1e6742f55aecfebf5e2f9e3cb42aff784 Mon Sep 17 00:00:00 2001 From: Wolfang Torres Date: Fri, 12 Jun 2026 20:58:53 +0800 Subject: [PATCH] fix bug with dictionary only translated on demand --- src/anki_hsk_creator/__about__.py | 2 +- src/anki_hsk_creator/anki_generation.py | 5 +++-- src/anki_hsk_creator/api.py | 2 +- src/anki_hsk_creator/proccessor.py | 17 +++++++++++------ src/anki_hsk_creator/utility.py | 20 ++++++++++---------- 5 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/anki_hsk_creator/__about__.py b/src/anki_hsk_creator/__about__.py index f9e9255..a88217b 100644 --- a/src/anki_hsk_creator/__about__.py +++ b/src/anki_hsk_creator/__about__.py @@ -3,4 +3,4 @@ # SPDX-FileCopyrightText: 2026-present Wolfang Torres # # SPDX-License-Identifier: GPL-3.0-or-later -__version__ = "0.1.2" +__version__ = "0.1.3" diff --git a/src/anki_hsk_creator/anki_generation.py b/src/anki_hsk_creator/anki_generation.py index cedf13b..2151ed9 100644 --- a/src/anki_hsk_creator/anki_generation.py +++ b/src/anki_hsk_creator/anki_generation.py @@ -81,7 +81,7 @@ HSK_MODEL = Model( "{{Pinyin}}" "
{{Translated}}" "
{{Audio}}" - "
Pinyin: {{type:Simplified}}" + "
Simplified: {{type:Simplified}}" ), "afmt": ( "{{FrontSide}}
{{Simplified}}
" @@ -134,7 +134,8 @@ def output_anki_dictionary(process_file: ProcessFile, results: list[DictionaryRe note = Note( model=HSK_MODEL, fields=[ - "\n ".join(f"{n+1}. {m}" for n, m in enumerate(result.meanings)), + # "\n ".join(f"{n+1}. {m}" for n, m in enumerate(result.meanings)), + result.meaning, PinyinToneConverter().convert_text(result.pinyin), result.simplified, result.traditional, diff --git a/src/anki_hsk_creator/api.py b/src/anki_hsk_creator/api.py index 77c4c39..3500179 100644 --- a/src/anki_hsk_creator/api.py +++ b/src/anki_hsk_creator/api.py @@ -74,7 +74,7 @@ def process_a_file(process_file: ProcessFile, language_id: str): results = translator_process(text_lines, process_file) output_anki_phrase(process_file, results) elif DICT_TYPE in process_file.input_file.suffixes: - if process_file.dictionary_resource_file.exists(): + if not process_file.dictionary_resource_file.is_file(): CCCEDICT.create_cedict(language_id) with process_file.absolute_input_file.open("r", encoding="utf8") as file: words_list = [word.strip() for word in file.readlines()] diff --git a/src/anki_hsk_creator/proccessor.py b/src/anki_hsk_creator/proccessor.py index 57e4275..5cd968a 100644 --- a/src/anki_hsk_creator/proccessor.py +++ b/src/anki_hsk_creator/proccessor.py @@ -14,6 +14,7 @@ from .utility import CCCEDICT, TTS, DictionaryResult, ProcessFile, TranslationRe # Constants FIELDNAMES = ["simplified", "traditional", "pinyin", "meaning"] +DIALECT = "excel-tab" # Results Classes @@ -44,8 +45,8 @@ def dictionary_pre_process(words_list: list[str], process_file: ProcessFile): with process_file.dictionary_resource_file.open( "w", encoding="utf8", newline="" ) as resource_file: - tsv_writer = csv.writer( - resource_file, dialect="excel-tab", fieldnames=FIELDNAMES + tsv_writer = csv.DictWriter( + resource_file, dialect=DIALECT, fieldnames=FIELDNAMES ) tsv_writer.writeheader() for words in words_list: @@ -84,15 +85,19 @@ def dictionary_process(process_file: ProcessFile) -> list[DictionaryResult]: """Process a dictionary_resource_file into a final result""" results = [] with process_file.dictionary_resource_file.open( - "w", encoding="utf8", newline="" + "r", encoding="utf8", newline="" ) as resource_file: - reader = csv.DictReader(resource_file) + reader = csv.DictReader(resource_file, dialect=DIALECT) for line in reader: audio_path = process_file.resources / f"{line['pinyin']}.wav" if not audio_path.exists(): - audio = TTS.MODEL.generate(f"{line['simplified']}。", language_id="zh") + audio = TTS.MODEL.generate( + f"{line['simplified']}。", language_id=LANGUAGES.CN + ) torchaudio.save(audio_path, audio, TTS.MODEL.sr) - result = DictionaryResult(**line, audio_path=audio_path) + result = DictionaryResult( + **line, audio_path=audio_path, language_id=process_file.language_id + ) results.append(result) return results diff --git a/src/anki_hsk_creator/utility.py b/src/anki_hsk_creator/utility.py index e3f9973..e123aad 100644 --- a/src/anki_hsk_creator/utility.py +++ b/src/anki_hsk_creator/utility.py @@ -56,14 +56,6 @@ class TranslatedEntry: self.entry = entry self.language_id = language_id self._translated_meanings = [] - for meaning in entry.meanings: - if language_id != LANGUAGES.EN: - trans_meaning = argostranslate.translate.translate( - meaning, LANGUAGES.EN, language_id - ) - else: - trans_meaning = meaning - self._translated_meanings.append(trans_meaning) @property def simplified(self): @@ -83,6 +75,14 @@ class TranslatedEntry: @property def meanings(self): """Entry translated meaning list""" + for meaning in self.entry.meanings: + if self.language_id != LANGUAGES.EN: + trans_meaning = argostranslate.translate.translate( + meaning, LANGUAGES.EN, self.language_id + ) + else: + trans_meaning = meaning + self._translated_meanings.append(trans_meaning) return self._translated_meanings @@ -210,12 +210,12 @@ class DictionaryResult: simplified: str, traditional: str, pinyin: str, - meanings: str, + meaning: str, audio_path: Path, ): self.language_id = language_id self.simplified = simplified self.traditional = traditional self.pinyin = pinyin - self.meanings = meanings + self.meaning = meaning self.audio_path = audio_path