anki-hsk-creator/src/anki_hsk_creator/proccessor.py

"""processor.py"""

# Pip
import argostranslate.translate
import torchaudio

# Local
from .constants import LANGUAGES
from .utility import TTS, ProcessFile, TranslationResult  # , CCCEDICT

# Results Classes


def translator_process(
    text_lines: list[str],
    process_file: ProcessFile,
    language_id: str,
) -> list[TranslationResult]:
    """Process for phases or sentence translation"""
    results = []
    for n, line in enumerate(text_lines):
        line = line.strip()
        audio_path = process_file.resources / f"N{n:03n}.wav"
        if not audio_path.exists():
            audio = TTS.MODEL.generate(f"{line}。", language_id=LANGUAGES.CN)
            torchaudio.save(audio_path, audio, TTS.MODEL.sr)
        translated = argostranslate.translate.translate(line, LANGUAGES.CN, language_id)
        results.append(TranslationResult(language_id, translated, line, audio_path))
    return results


# def dictionary_process(dictionary, tts, in_file, resources):
#     """Process dictionary files"""
#     words_list = in_file.open(encoding="utf8").read().strip().split("\n")
#     results = []
#     try:
#         with in_file.open("w", encoding="utf8") as input_file:
#             for words in words_list:
#                 word = words.split()[0]
#                 pinyin = " ".join(words.split()[1:]) if len(words.split()) > 1 else None
#                 if v := dictionary.get(word):
#                     if len(v) > 1:
#                         print(f"\nWARNING: {word} has multiple meanings:")
#                         if pinyin and pinyin != "ERROR":
#                             ml = list(filter(lambda x: x.pinyin == pinyin, v))
#                         else:
#                             ml = v
#                         if len(ml) > 1:
#                             for n, w in enumerate(ml):
#                                 print(f"{n+1} - {w}")
#                                 for m in w.meanings:
#                                     print(f"\t{m}")
#                             s = None
#                             while (
#                                 not s
#                                 or not s.isnumeric()
#                                 or not (1 <= int(s) <= len(v))
#                             ):
#                                 s = input(
#                                     f"Please select the correct word [1-{len(v)}]: "
#                                 )
#                             v = v[int(s) - 1]
#                         else:
#                             v = ml[0]
#                     else:
#                         v = v[0]
#                     audio_path = resources / f"{word}.wav"
#                     if not audio_path.exists():
#                         audio = tts.generate(f"{word}。", language_id="zh")
#                         torchaudio.save(audio_path, audio, tts.sr)
#                     input_file.write(f"{word}\t{v.pinyin}\n")
#                     results.append((v, audio_path))
#                 else:
#                     print("============================================")
#                     print(f"===================>ERROR: {word} not found")
#                     print("============================================")
#                     input_file.write(f"{word}\tERROR\n")
#     except Exception:
#         with in_file.open("w", encoding="utf8") as input_file:
#             input_file.write("\n".join(words_list))
#     return results

# def output_tsv(out_file, results):
#     """writes the output as a tsv file"""
#     final_file = out_file.parent / f"{out_file.stem}.tsv"
#     with final_file.open("w", encoding="utf8", newline="") as csvfile:
#         writer = csv.writer(csvfile, delimiter="\t", quotechar='"')
#         for entry in results:
#             writer.writerow(
#                 [
#                     "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)),
#                     PinyinToneConverter().convert_text(entry.pinyin),
#                     entry.simplified,
#                     entry.traditional,
#                 ]
#             )