version 0.1
This commit is contained in:
96
src/anki_hsk_creator/proccessor.py
Normal file
96
src/anki_hsk_creator/proccessor.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""processor.py"""
|
||||
|
||||
# Pip
|
||||
import argostranslate.translate
|
||||
import torchaudio
|
||||
|
||||
# Local
|
||||
from .constants import LANGUAGES
|
||||
from .utility import TTS, ProcessFile, TranslationResult # , CCCEDICT
|
||||
|
||||
# Results Classes
|
||||
|
||||
|
||||
def translator_process(
|
||||
text_lines: list[str],
|
||||
process_file: ProcessFile,
|
||||
language_id: str,
|
||||
) -> list[TranslationResult]:
|
||||
"""Process for phases or sentence translation"""
|
||||
results = []
|
||||
for n, line in enumerate(text_lines):
|
||||
line = line.strip()
|
||||
audio_path = process_file.resources / f"N{n::03.0n}.wav"
|
||||
if not audio_path.exists():
|
||||
audio = TTS.MODEL.generate(f"{line}。", language_id=LANGUAGES.CN)
|
||||
torchaudio.save(audio_path, audio, TTS.MODEL.sr)
|
||||
translated = argostranslate.translate.translate(line, LANGUAGES.CN, language_id)
|
||||
results.append(TranslationResult(language_id, translated, line, audio_path))
|
||||
return results
|
||||
|
||||
|
||||
# def dictionary_process(dictionary, tts, in_file, resources):
|
||||
# """Process dictionary files"""
|
||||
# words_list = in_file.open(encoding="utf8").read().strip().split("\n")
|
||||
# results = []
|
||||
# try:
|
||||
# with in_file.open("w", encoding="utf8") as input_file:
|
||||
# for words in words_list:
|
||||
# word = words.split()[0]
|
||||
# pinyin = " ".join(words.split()[1:]) if len(words.split()) > 1 else None
|
||||
# if v := dictionary.get(word):
|
||||
# if len(v) > 1:
|
||||
# print(f"\nWARNING: {word} has multiple meanings:")
|
||||
# if pinyin and pinyin != "ERROR":
|
||||
# ml = list(filter(lambda x: x.pinyin == pinyin, v))
|
||||
# else:
|
||||
# ml = v
|
||||
# if len(ml) > 1:
|
||||
# for n, w in enumerate(ml):
|
||||
# print(f"{n+1} - {w}")
|
||||
# for m in w.meanings:
|
||||
# print(f"\t{m}")
|
||||
# s = None
|
||||
# while (
|
||||
# not s
|
||||
# or not s.isnumeric()
|
||||
# or not (1 <= int(s) <= len(v))
|
||||
# ):
|
||||
# s = input(
|
||||
# f"Please select the correct word [1-{len(v)}]: "
|
||||
# )
|
||||
# v = v[int(s) - 1]
|
||||
# else:
|
||||
# v = ml[0]
|
||||
# else:
|
||||
# v = v[0]
|
||||
# audio_path = resources / f"{word}.wav"
|
||||
# if not audio_path.exists():
|
||||
# audio = tts.generate(f"{word}。", language_id="zh")
|
||||
# torchaudio.save(audio_path, audio, tts.sr)
|
||||
# input_file.write(f"{word}\t{v.pinyin}\n")
|
||||
# results.append((v, audio_path))
|
||||
# else:
|
||||
# print("============================================")
|
||||
# print(f"===================>ERROR: {word} not found")
|
||||
# print("============================================")
|
||||
# input_file.write(f"{word}\tERROR\n")
|
||||
# except Exception:
|
||||
# with in_file.open("w", encoding="utf8") as input_file:
|
||||
# input_file.write("\n".join(words_list))
|
||||
# return results
|
||||
|
||||
# def output_tsv(out_file, results):
|
||||
# """writes the output as a tsv file"""
|
||||
# final_file = out_file.parent / f"{out_file.stem}.tsv"
|
||||
# with final_file.open("w", encoding="utf8", newline="") as csvfile:
|
||||
# writer = csv.writer(csvfile, delimiter="\t", quotechar='"')
|
||||
# for entry in results:
|
||||
# writer.writerow(
|
||||
# [
|
||||
# "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)),
|
||||
# PinyinToneConverter().convert_text(entry.pinyin),
|
||||
# entry.simplified,
|
||||
# entry.traditional,
|
||||
# ]
|
||||
# )
|
||||
Reference in New Issue
Block a user