97 lines
4.0 KiB
Python
97 lines
4.0 KiB
Python
"""processor.py"""
|
|
|
|
# Pip
|
|
import argostranslate.translate
|
|
import torchaudio
|
|
|
|
# Local
|
|
from .constants import LANGUAGES
|
|
from .utility import TTS, ProcessFile, TranslationResult # , CCCEDICT
|
|
|
|
# Results Classes
|
|
|
|
|
|
def translator_process(
|
|
text_lines: list[str],
|
|
process_file: ProcessFile,
|
|
language_id: str,
|
|
) -> list[TranslationResult]:
|
|
"""Process for phases or sentence translation"""
|
|
results = []
|
|
for n, line in enumerate(text_lines):
|
|
line = line.strip()
|
|
audio_path = process_file.resources / f"N{n:03n}.wav"
|
|
if not audio_path.exists():
|
|
audio = TTS.MODEL.generate(f"{line}。", language_id=LANGUAGES.CN)
|
|
torchaudio.save(audio_path, audio, TTS.MODEL.sr)
|
|
translated = argostranslate.translate.translate(line, LANGUAGES.CN, language_id)
|
|
results.append(TranslationResult(language_id, translated, line, audio_path))
|
|
return results
|
|
|
|
|
|
# def dictionary_process(dictionary, tts, in_file, resources):
|
|
# """Process dictionary files"""
|
|
# words_list = in_file.open(encoding="utf8").read().strip().split("\n")
|
|
# results = []
|
|
# try:
|
|
# with in_file.open("w", encoding="utf8") as input_file:
|
|
# for words in words_list:
|
|
# word = words.split()[0]
|
|
# pinyin = " ".join(words.split()[1:]) if len(words.split()) > 1 else None
|
|
# if v := dictionary.get(word):
|
|
# if len(v) > 1:
|
|
# print(f"\nWARNING: {word} has multiple meanings:")
|
|
# if pinyin and pinyin != "ERROR":
|
|
# ml = list(filter(lambda x: x.pinyin == pinyin, v))
|
|
# else:
|
|
# ml = v
|
|
# if len(ml) > 1:
|
|
# for n, w in enumerate(ml):
|
|
# print(f"{n+1} - {w}")
|
|
# for m in w.meanings:
|
|
# print(f"\t{m}")
|
|
# s = None
|
|
# while (
|
|
# not s
|
|
# or not s.isnumeric()
|
|
# or not (1 <= int(s) <= len(v))
|
|
# ):
|
|
# s = input(
|
|
# f"Please select the correct word [1-{len(v)}]: "
|
|
# )
|
|
# v = v[int(s) - 1]
|
|
# else:
|
|
# v = ml[0]
|
|
# else:
|
|
# v = v[0]
|
|
# audio_path = resources / f"{word}.wav"
|
|
# if not audio_path.exists():
|
|
# audio = tts.generate(f"{word}。", language_id="zh")
|
|
# torchaudio.save(audio_path, audio, tts.sr)
|
|
# input_file.write(f"{word}\t{v.pinyin}\n")
|
|
# results.append((v, audio_path))
|
|
# else:
|
|
# print("============================================")
|
|
# print(f"===================>ERROR: {word} not found")
|
|
# print("============================================")
|
|
# input_file.write(f"{word}\tERROR\n")
|
|
# except Exception:
|
|
# with in_file.open("w", encoding="utf8") as input_file:
|
|
# input_file.write("\n".join(words_list))
|
|
# return results
|
|
|
|
# def output_tsv(out_file, results):
|
|
# """writes the output as a tsv file"""
|
|
# final_file = out_file.parent / f"{out_file.stem}.tsv"
|
|
# with final_file.open("w", encoding="utf8", newline="") as csvfile:
|
|
# writer = csv.writer(csvfile, delimiter="\t", quotechar='"')
|
|
# for entry in results:
|
|
# writer.writerow(
|
|
# [
|
|
# "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)),
|
|
# PinyinToneConverter().convert_text(entry.pinyin),
|
|
# entry.simplified,
|
|
# entry.traditional,
|
|
# ]
|
|
# )
|