add suport for paragraph trsanlations

2026-05-20 02:06:39 +08:00
parent 6382d03475
commit d0e3693574
13 changed files with 173 additions and 44 deletions
--- a/anki-hsk-creator/main.py
+++ b/anki-hsk-creator/main.py
@@ -1,14 +1,14 @@
 ## Imports
 from pathlib import Path
+import random
 import csv

+## PIP
 from cedict_utils.cedict import CedictParser
 from pinyin_tone_converter.pinyin_tone_converter import PinyinToneConverter
-
-# from genanki import Deck, Note
-
-# import argostranslate.package
-# import argostranslate.translate
+from genanki import Deck, Note, Model, Package
+import argostranslate.package
+import argostranslate.translate

 ## Constants

@@ -16,40 +16,76 @@ CCCEDICT = Path(__file__).parent / "cedict_ts.u8"
 DATA = Path(__file__).parent.parent / "data"
 INPUT = DATA / "input"
 OUTPUT = DATA / "output"
-CN = "cn"
+CN = "zh"
 EN = "en"
+TEXT_TYPE = "TEXT_TYPE"
+CSS = """
+.card {
+ font-family: arial;
+ font-size: 20px;
+ text-align: center;
+ color: black;
+ background-color: white;
+}
+.simple {
+font-family: Arial;
+font-size: 100px;
+}
+.trad {
+font-family: Arial;
+font-size: 75px;
+}
+"""

 ## Classess

-## Main
+SIMPLE_MODEL = Model(
+    2076166425,
+    "Simple Model",
+    fields=[
+        {"name": "Question"},
+        {"name": "Answer"},
+    ],
+    templates=[
+        {
+            "name": "Card 1",
+            "qfmt": "{{Question}}",
+            "afmt": '{{FrontSide}}<hr id="answer">{{Answer}}',
+        },
+    ],
+    css=CSS,
+)

-# Download and install Argos Translate package
-# argostranslate.package.update_package_index()
-# available_packages = argostranslate.package.get_available_packages()
-# package_to_install = next(
-#     filter(
-#         lambda x: x.from_code == CN and x.to_code == EN, available_packages
-#     )
-# )
-# argostranslate.package.install_from_path(package_to_install.download())
+HSK_MODEL = Model(
+    1708536519,
+    "HSK Model",
+    fields=[
+        {"name": "English"},
+        {"name": "Pinyin"},
+        {"name": "Simplified"},
+        {"name": "Traditional"},
+    ],
+    templates=[
+        {
+            "name": "Card 1",
+            "qfmt": "<strong>{{Pinyin}}</strong><br>{{English}}",
+            "afmt": "{{FrontSide}}<hr id='answer''><div class='simple'>{{Simplified}}</div><br><div class='trad'>{{Traditional}}</div>",
+        },
+        {
+            "name": "Card 2",
+            "qfmt": "<div class='simple'>{{Simplified}}</div><br><div class='trad'>{{Traditional}}</div>",
+            "afmt": '{{FrontSide}}<hr id="answer"><strong>{{Pinyin}}</strong><br>{{English}}',
+        },
+    ],
+    css=CSS,
+)

-def process_files():
-    print("Select data file:")
-    files = []
-    for n, file in enumerate(INPUT.glob('**/*.txt')):
-        files.append(file)
-        print(f"{n+1} - {file.relative_to(INPUT)}")
-    s = None
-    while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
-        s = input(f"Please select the file [1-{len(files)}]: ")
-    in_file = files[int(s)-1]
-    relative = in_file.relative_to(INPUT)
-    out_file = OUTPUT / relative
-    out_file.parent.mkdir(parents=True, exist_ok=True)
-    return in_file, out_file

-def dictionary_process(in_file, out_file):
-    """Process dictionary files"""
+## Functions
+
+
+def create_cedict():
+    """Creates a create_cedict dictionary object"""
    parser = CedictParser()
    parser.read_file(CCCEDICT)
    entries = parser.parse()
@@ -61,16 +97,48 @@ def dictionary_process(in_file, out_file):
        else:
            dictionary[entry.simplified].append(entry)

-    out_file = DATA / f"{in_file.stem}.tsv"
-    words_list = in_file.open(encoding="utf8").read().split()
+    return dictionary

+
+def create_translator():
+    """Download and install Argos Translate package"""
+    argostranslate.package.update_package_index()
+    available_packages = argostranslate.package.get_available_packages()
+    package_to_install = next(
+        filter(lambda x: x.from_code == CN and x.to_code == EN, available_packages)
+    )
+    argostranslate.package.install_from_path(package_to_install.download())
+
+
+## Main
+
+
+def process_files():
+    print("Select data file:")
+    files = []
+    for n, file in enumerate(INPUT.glob("**/*.txt")):
+        files.append(file)
+        print(f"{n+1} - {file.relative_to(INPUT)}")
+    s = None
+    while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
+        s = input(f"Please select the file [1-{len(files)}]: ")
+    in_file = files[int(s) - 1]
+    relative = in_file.relative_to(INPUT)
+    out_file = OUTPUT / relative
+    out_file.parent.mkdir(parents=True, exist_ok=True)
+    with in_file.open(encoding="utf8") as input_file:
+        file_type = input_file.read().split()[0]
+    return in_file, out_file, file_type
+
+
+def dictionary_process(dictionary, in_file):
+    """Process dictionary files"""
+    words_list = in_file.open(encoding="utf8").read().split()
    results = []
    for word in words_list:
        if v := dictionary.get(word):
            if len(v) > 1:
-                print(
-                    f"\nWARNING: {word} has multiple meanings:"
-                )
+                print(f"\nWARNING: {word} has multiple meanings:")
                for n, w in enumerate(v):
                    print(f"{n+1} - {w}")
                    for m in w.meanings:
@@ -78,7 +146,7 @@ def dictionary_process(in_file, out_file):
                s = None
                while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)):
                    s = input(f"Please select the correct word [1-{len(v)}]: ")
-                v = v[int(s)-1]
+                v = v[int(s) - 1]
            else:
                v = v[0]
            results.append(v)
@@ -86,21 +154,80 @@ def dictionary_process(in_file, out_file):
            print("============================================")
            print(f"===================>ERROR: {word} not found")
            print("============================================")
-    with out_file.open("w", encoding="utf8", newline="") as csvfile:
+    return results
+
+
+def trasnlator_process(in_file):
+    """Process text trasnlate files"""
+    text_list = in_file.open(encoding="utf8").read().split()[1:]
+    results = []
+    for text in text_list:
+        text = text.strip()
+        for par in text.split("。"):
+            if par:
+                translatedText = argostranslate.translate.translate(par, CN, EN)
+                results.append([translatedText, par])
+    return results
+
+
+def output_tsv(out_file, results):
+    """writes the output as a tsv file"""
+    final_file = out_file.parent / f"{out_file.stem}.tsv"
+    with final_file.open("w", encoding="utf8", newline="") as csvfile:
        writer = csv.writer(csvfile, delimiter="\t", quotechar='"')
        for entry in results:
            writer.writerow(
                [
-                    "\n ".join(f"{n+1}. {m}" for n,m in enumerate(entry.meanings)),
+                    "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)),
                    PinyinToneConverter().convert_text(entry.pinyin),
                    entry.simplified,
                    entry.traditional,
                ]
            )

+
+def output_anki_dictionary(out_file, results):
+    final_file = out_file.parent / f"{out_file.stem}.apkg"
+    deck_name = "::".join(out_file.relative_to(OUTPUT).parts[:-1] + (out_file.stem,))
+    deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
+    for entry in results:
+        note = Note(
+            model=HSK_MODEL,
+            fields=[
+                "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)),
+                PinyinToneConverter().convert_text(entry.pinyin),
+                entry.simplified,
+                entry.traditional,
+            ],
+        )
+        deck.add_note(note)
+    Package(deck).write_to_file(final_file)
+
+
+def output_anki_text(out_file, results):
+    final_file = out_file.parent / f"{out_file.stem}.apkg"
+    deck_name = "::".join(out_file.relative_to(OUTPUT).parts[:-1] + (out_file.stem,))
+    deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
+    for entry in results:
+        note = Note(
+            model=SIMPLE_MODEL,
+            fields=entry,
+        )
+        deck.add_note(note)
+    Package(deck).write_to_file(final_file)
+
+
 def main():
-    in_file, out_file = process_files()
-    dictionary_process(in_file, out_file)
+    in_file, out_file, file_type = process_files()
+    if TEXT_TYPE == file_type:
+        create_translator()
+        results = trasnlator_process(in_file)
+        output_anki_text(out_file, results)
+    else:
+        dictionary = create_cedict()
+        results = dictionary_process(dictionary, in_file)
+        output_anki_dictionary(out_file, results)
+

 if __name__ == "__main__":
    main()
--- a/data/HSK1-1.txt
+++ b/data/HSK1-1.txt
--- a/data/input/HSK1/HSK1-1.txt
+++ b/data/input/HSK1/HSK1-1.txt
--- a/data/input/HSK1/HSK1-2.txt
+++ b/data/input/HSK1/HSK1-2.txt
--- a/data/input/HSK1/HSK1-3.txt
+++ b/data/input/HSK1/HSK1-3.txt
--- a/data/input/HSK1/HSK1-4.txt
+++ b/data/input/HSK1/HSK1-4.txt
--- a/data/input/口语/口语-第9课.-text.txt
+++ b/data/input/口语/口语-第9课.-text.txt
@@ -0,0 +1,2 @@
+TEXT_TYPE
+周六那场篮球比在，对手很厉害。前半场他们一直赢，后半场我们对才超过他们，领先得并不轻松。
--- a/data/output/HSK1/HSK1-1.apkg
+++ b/data/output/HSK1/HSK1-1.apkg
--- a/data/output/HSK1/HSK1-1.tsv
+++ b/data/output/HSK1/HSK1-1.tsv
--- a/data/output/HSK1/HSK1-2.tsv
+++ b/data/output/HSK1/HSK1-2.tsv
--- a/data/output/HSK1/HSK1-3.tsv
+++ b/data/output/HSK1/HSK1-3.tsv
--- a/data/output/HSK1/HSK1-4.tsv
+++ b/data/output/HSK1/HSK1-4.tsv
--- a/data/output/口语/口语-第9课.-text.apkg
+++ b/data/output/口语/口语-第9课.-text.apkg