Files
anki-hsk-creator/src/anki_hsk_creator/proccessor.py
Wolfang Torres eb4cc8e6e0 update format for anki,
upgrade trasnlation package search,
fix small bugs
2026-06-12 00:43:55 +08:00

97 lines
4.0 KiB
Python

"""processor.py"""
# Pip
import argostranslate.translate
import torchaudio
# Local
from .constants import LANGUAGES
from .utility import TTS, ProcessFile, TranslationResult # , CCCEDICT
# Results Classes
def translator_process(
text_lines: list[str],
process_file: ProcessFile,
language_id: str,
) -> list[TranslationResult]:
"""Process for phases or sentence translation"""
results = []
for n, line in enumerate(text_lines):
line = line.strip()
audio_path = process_file.resources / f"N{n:03n}.wav"
if not audio_path.exists():
audio = TTS.MODEL.generate(f"{line}", language_id=LANGUAGES.CN)
torchaudio.save(audio_path, audio, TTS.MODEL.sr)
translated = argostranslate.translate.translate(line, LANGUAGES.CN, language_id)
results.append(TranslationResult(language_id, translated, line, audio_path))
return results
# def dictionary_process(dictionary, tts, in_file, resources):
# """Process dictionary files"""
# words_list = in_file.open(encoding="utf8").read().strip().split("\n")
# results = []
# try:
# with in_file.open("w", encoding="utf8") as input_file:
# for words in words_list:
# word = words.split()[0]
# pinyin = " ".join(words.split()[1:]) if len(words.split()) > 1 else None
# if v := dictionary.get(word):
# if len(v) > 1:
# print(f"\nWARNING: {word} has multiple meanings:")
# if pinyin and pinyin != "ERROR":
# ml = list(filter(lambda x: x.pinyin == pinyin, v))
# else:
# ml = v
# if len(ml) > 1:
# for n, w in enumerate(ml):
# print(f"{n+1} - {w}")
# for m in w.meanings:
# print(f"\t{m}")
# s = None
# while (
# not s
# or not s.isnumeric()
# or not (1 <= int(s) <= len(v))
# ):
# s = input(
# f"Please select the correct word [1-{len(v)}]: "
# )
# v = v[int(s) - 1]
# else:
# v = ml[0]
# else:
# v = v[0]
# audio_path = resources / f"{word}.wav"
# if not audio_path.exists():
# audio = tts.generate(f"{word}。", language_id="zh")
# torchaudio.save(audio_path, audio, tts.sr)
# input_file.write(f"{word}\t{v.pinyin}\n")
# results.append((v, audio_path))
# else:
# print("============================================")
# print(f"===================>ERROR: {word} not found")
# print("============================================")
# input_file.write(f"{word}\tERROR\n")
# except Exception:
# with in_file.open("w", encoding="utf8") as input_file:
# input_file.write("\n".join(words_list))
# return results
# def output_tsv(out_file, results):
# """writes the output as a tsv file"""
# final_file = out_file.parent / f"{out_file.stem}.tsv"
# with final_file.open("w", encoding="utf8", newline="") as csvfile:
# writer = csv.writer(csvfile, delimiter="\t", quotechar='"')
# for entry in results:
# writer.writerow(
# [
# "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)),
# PinyinToneConverter().convert_text(entry.pinyin),
# entry.simplified,
# entry.traditional,
# ]
# )