fix bug with dictionary only translated on demand
This commit is contained in:
@@ -3,4 +3,4 @@
|
|||||||
# SPDX-FileCopyrightText: 2026-present Wolfang Torres <wolfang.torres@gmail.com>
|
# SPDX-FileCopyrightText: 2026-present Wolfang Torres <wolfang.torres@gmail.com>
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
__version__ = "0.1.2"
|
__version__ = "0.1.3"
|
||||||
|
|||||||
@@ -81,7 +81,7 @@ HSK_MODEL = Model(
|
|||||||
"<strong>{{Pinyin}}</strong>"
|
"<strong>{{Pinyin}}</strong>"
|
||||||
"<br>{{Translated}}"
|
"<br>{{Translated}}"
|
||||||
"<br>{{Audio}}"
|
"<br>{{Audio}}"
|
||||||
"<br>Pinyin: {{type:Simplified}}"
|
"<br>Simplified: {{type:Simplified}}"
|
||||||
),
|
),
|
||||||
"afmt": (
|
"afmt": (
|
||||||
"{{FrontSide}}<hr id='answer''><div class='simple'>{{Simplified}}</div>"
|
"{{FrontSide}}<hr id='answer''><div class='simple'>{{Simplified}}</div>"
|
||||||
@@ -134,7 +134,8 @@ def output_anki_dictionary(process_file: ProcessFile, results: list[DictionaryRe
|
|||||||
note = Note(
|
note = Note(
|
||||||
model=HSK_MODEL,
|
model=HSK_MODEL,
|
||||||
fields=[
|
fields=[
|
||||||
"\n ".join(f"{n+1}. {m}" for n, m in enumerate(result.meanings)),
|
# "\n ".join(f"{n+1}. {m}" for n, m in enumerate(result.meanings)),
|
||||||
|
result.meaning,
|
||||||
PinyinToneConverter().convert_text(result.pinyin),
|
PinyinToneConverter().convert_text(result.pinyin),
|
||||||
result.simplified,
|
result.simplified,
|
||||||
result.traditional,
|
result.traditional,
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ def process_a_file(process_file: ProcessFile, language_id: str):
|
|||||||
results = translator_process(text_lines, process_file)
|
results = translator_process(text_lines, process_file)
|
||||||
output_anki_phrase(process_file, results)
|
output_anki_phrase(process_file, results)
|
||||||
elif DICT_TYPE in process_file.input_file.suffixes:
|
elif DICT_TYPE in process_file.input_file.suffixes:
|
||||||
if process_file.dictionary_resource_file.exists():
|
if not process_file.dictionary_resource_file.is_file():
|
||||||
CCCEDICT.create_cedict(language_id)
|
CCCEDICT.create_cedict(language_id)
|
||||||
with process_file.absolute_input_file.open("r", encoding="utf8") as file:
|
with process_file.absolute_input_file.open("r", encoding="utf8") as file:
|
||||||
words_list = [word.strip() for word in file.readlines()]
|
words_list = [word.strip() for word in file.readlines()]
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from .utility import CCCEDICT, TTS, DictionaryResult, ProcessFile, TranslationRe
|
|||||||
# Constants
|
# Constants
|
||||||
|
|
||||||
FIELDNAMES = ["simplified", "traditional", "pinyin", "meaning"]
|
FIELDNAMES = ["simplified", "traditional", "pinyin", "meaning"]
|
||||||
|
DIALECT = "excel-tab"
|
||||||
|
|
||||||
# Results Classes
|
# Results Classes
|
||||||
|
|
||||||
@@ -44,8 +45,8 @@ def dictionary_pre_process(words_list: list[str], process_file: ProcessFile):
|
|||||||
with process_file.dictionary_resource_file.open(
|
with process_file.dictionary_resource_file.open(
|
||||||
"w", encoding="utf8", newline=""
|
"w", encoding="utf8", newline=""
|
||||||
) as resource_file:
|
) as resource_file:
|
||||||
tsv_writer = csv.writer(
|
tsv_writer = csv.DictWriter(
|
||||||
resource_file, dialect="excel-tab", fieldnames=FIELDNAMES
|
resource_file, dialect=DIALECT, fieldnames=FIELDNAMES
|
||||||
)
|
)
|
||||||
tsv_writer.writeheader()
|
tsv_writer.writeheader()
|
||||||
for words in words_list:
|
for words in words_list:
|
||||||
@@ -84,15 +85,19 @@ def dictionary_process(process_file: ProcessFile) -> list[DictionaryResult]:
|
|||||||
"""Process a dictionary_resource_file into a final result"""
|
"""Process a dictionary_resource_file into a final result"""
|
||||||
results = []
|
results = []
|
||||||
with process_file.dictionary_resource_file.open(
|
with process_file.dictionary_resource_file.open(
|
||||||
"w", encoding="utf8", newline=""
|
"r", encoding="utf8", newline=""
|
||||||
) as resource_file:
|
) as resource_file:
|
||||||
reader = csv.DictReader(resource_file)
|
reader = csv.DictReader(resource_file, dialect=DIALECT)
|
||||||
for line in reader:
|
for line in reader:
|
||||||
audio_path = process_file.resources / f"{line['pinyin']}.wav"
|
audio_path = process_file.resources / f"{line['pinyin']}.wav"
|
||||||
if not audio_path.exists():
|
if not audio_path.exists():
|
||||||
audio = TTS.MODEL.generate(f"{line['simplified']}。", language_id="zh")
|
audio = TTS.MODEL.generate(
|
||||||
|
f"{line['simplified']}。", language_id=LANGUAGES.CN
|
||||||
|
)
|
||||||
torchaudio.save(audio_path, audio, TTS.MODEL.sr)
|
torchaudio.save(audio_path, audio, TTS.MODEL.sr)
|
||||||
result = DictionaryResult(**line, audio_path=audio_path)
|
result = DictionaryResult(
|
||||||
|
**line, audio_path=audio_path, language_id=process_file.language_id
|
||||||
|
)
|
||||||
results.append(result)
|
results.append(result)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|||||||
@@ -56,14 +56,6 @@ class TranslatedEntry:
|
|||||||
self.entry = entry
|
self.entry = entry
|
||||||
self.language_id = language_id
|
self.language_id = language_id
|
||||||
self._translated_meanings = []
|
self._translated_meanings = []
|
||||||
for meaning in entry.meanings:
|
|
||||||
if language_id != LANGUAGES.EN:
|
|
||||||
trans_meaning = argostranslate.translate.translate(
|
|
||||||
meaning, LANGUAGES.EN, language_id
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
trans_meaning = meaning
|
|
||||||
self._translated_meanings.append(trans_meaning)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def simplified(self):
|
def simplified(self):
|
||||||
@@ -83,6 +75,14 @@ class TranslatedEntry:
|
|||||||
@property
|
@property
|
||||||
def meanings(self):
|
def meanings(self):
|
||||||
"""Entry translated meaning list"""
|
"""Entry translated meaning list"""
|
||||||
|
for meaning in self.entry.meanings:
|
||||||
|
if self.language_id != LANGUAGES.EN:
|
||||||
|
trans_meaning = argostranslate.translate.translate(
|
||||||
|
meaning, LANGUAGES.EN, self.language_id
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
trans_meaning = meaning
|
||||||
|
self._translated_meanings.append(trans_meaning)
|
||||||
return self._translated_meanings
|
return self._translated_meanings
|
||||||
|
|
||||||
|
|
||||||
@@ -210,12 +210,12 @@ class DictionaryResult:
|
|||||||
simplified: str,
|
simplified: str,
|
||||||
traditional: str,
|
traditional: str,
|
||||||
pinyin: str,
|
pinyin: str,
|
||||||
meanings: str,
|
meaning: str,
|
||||||
audio_path: Path,
|
audio_path: Path,
|
||||||
):
|
):
|
||||||
self.language_id = language_id
|
self.language_id = language_id
|
||||||
self.simplified = simplified
|
self.simplified = simplified
|
||||||
self.traditional = traditional
|
self.traditional = traditional
|
||||||
self.pinyin = pinyin
|
self.pinyin = pinyin
|
||||||
self.meanings = meanings
|
self.meaning = meaning
|
||||||
self.audio_path = audio_path
|
self.audio_path = audio_path
|
||||||
|
|||||||
Reference in New Issue
Block a user