version 0.1

2026-06-11 21:23:34 +08:00
parent ea057668bc
commit 21c6416cfd
15 changed files with 645 additions and 367 deletions
--- a/src/anki_hsk_creator/proccessor.py
+++ b/src/anki_hsk_creator/proccessor.py
@@ -0,0 +1,96 @@
+"""processor.py"""
+
+# Pip
+import argostranslate.translate
+import torchaudio
+
+# Local
+from .constants import LANGUAGES
+from .utility import TTS, ProcessFile, TranslationResult  # , CCCEDICT
+
+# Results Classes
+
+
+def translator_process(
+    text_lines: list[str],
+    process_file: ProcessFile,
+    language_id: str,
+) -> list[TranslationResult]:
+    """Process for phases or sentence translation"""
+    results = []
+    for n, line in enumerate(text_lines):
+        line = line.strip()
+        audio_path = process_file.resources / f"N{n::03.0n}.wav"
+        if not audio_path.exists():
+            audio = TTS.MODEL.generate(f"{line}。", language_id=LANGUAGES.CN)
+            torchaudio.save(audio_path, audio, TTS.MODEL.sr)
+        translated = argostranslate.translate.translate(line, LANGUAGES.CN, language_id)
+        results.append(TranslationResult(language_id, translated, line, audio_path))
+    return results
+
+
+# def dictionary_process(dictionary, tts, in_file, resources):
+#     """Process dictionary files"""
+#     words_list = in_file.open(encoding="utf8").read().strip().split("\n")
+#     results = []
+#     try:
+#         with in_file.open("w", encoding="utf8") as input_file:
+#             for words in words_list:
+#                 word = words.split()[0]
+#                 pinyin = " ".join(words.split()[1:]) if len(words.split()) > 1 else None
+#                 if v := dictionary.get(word):
+#                     if len(v) > 1:
+#                         print(f"\nWARNING: {word} has multiple meanings:")
+#                         if pinyin and pinyin != "ERROR":
+#                             ml = list(filter(lambda x: x.pinyin == pinyin, v))
+#                         else:
+#                             ml = v
+#                         if len(ml) > 1:
+#                             for n, w in enumerate(ml):
+#                                 print(f"{n+1} - {w}")
+#                                 for m in w.meanings:
+#                                     print(f"\t{m}")
+#                             s = None
+#                             while (
+#                                 not s
+#                                 or not s.isnumeric()
+#                                 or not (1 <= int(s) <= len(v))
+#                             ):
+#                                 s = input(
+#                                     f"Please select the correct word [1-{len(v)}]: "
+#                                 )
+#                             v = v[int(s) - 1]
+#                         else:
+#                             v = ml[0]
+#                     else:
+#                         v = v[0]
+#                     audio_path = resources / f"{word}.wav"
+#                     if not audio_path.exists():
+#                         audio = tts.generate(f"{word}。", language_id="zh")
+#                         torchaudio.save(audio_path, audio, tts.sr)
+#                     input_file.write(f"{word}\t{v.pinyin}\n")
+#                     results.append((v, audio_path))
+#                 else:
+#                     print("============================================")
+#                     print(f"===================>ERROR: {word} not found")
+#                     print("============================================")
+#                     input_file.write(f"{word}\tERROR\n")
+#     except Exception:
+#         with in_file.open("w", encoding="utf8") as input_file:
+#             input_file.write("\n".join(words_list))
+#     return results
+
+# def output_tsv(out_file, results):
+#     """writes the output as a tsv file"""
+#     final_file = out_file.parent / f"{out_file.stem}.tsv"
+#     with final_file.open("w", encoding="utf8", newline="") as csvfile:
+#         writer = csv.writer(csvfile, delimiter="\t", quotechar='"')
+#         for entry in results:
+#             writer.writerow(
+#                 [
+#                     "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)),
+#                     PinyinToneConverter().convert_text(entry.pinyin),
+#                     entry.simplified,
+#                     entry.traditional,
+#                 ]
+#             )