add suport for paragraph trsanlations

2026-05-20 02:06:39 +08:00
parent 6382d03475
commit d0e3693574
13 changed files with 173 additions and 44 deletions
--- a/anki-hsk-creator/main.py
+++ b/anki-hsk-creator/main.py
@@ -1,14 +1,14 @@
 ## Imports
 from pathlib import Path
 import random
 import csv
 ## PIP
 from cedict_utils.cedict import CedictParser
 from pinyin_tone_converter.pinyin_tone_converter import PinyinToneConverter
-
+from genanki import Deck, Note, Model, Package
-# from genanki import Deck, Note
+import argostranslate.package
-
+import argostranslate.translate
 # import argostranslate.package
 # import argostranslate.translate
 ## Constants
@@ -16,40 +16,76 @@ CCCEDICT = Path(__file__).parent / "cedict_ts.u8"
 DATA = Path(__file__).parent.parent / "data"
 INPUT = DATA / "input"
 OUTPUT = DATA / "output"
-CN = "cn"
+CN = "zh"
 EN = "en"
 TEXT_TYPE = "TEXT_TYPE"
 CSS = """
 .card {
 font-family: arial;
 font-size: 20px;
 text-align: center;
 color: black;
 background-color: white;
 }
 .simple {
 font-family: Arial;
 font-size: 100px;
 }
 .trad {
 font-family: Arial;
 font-size: 75px;
 }
 """
 ## Classess
-## Main
+SIMPLE_MODEL = Model(
    2076166425,
    "Simple Model",
    fields=[
        {"name": "Question"},
        {"name": "Answer"},
    ],
    templates=[
        {
            "name": "Card 1",
            "qfmt": "{{Question}}",
            "afmt": '{{FrontSide}}<hr id="answer">{{Answer}}',
        },
    ],
    css=CSS,
 )
-# Download and install Argos Translate package
+HSK_MODEL = Model(
-# argostranslate.package.update_package_index()
+    1708536519,
-# available_packages = argostranslate.package.get_available_packages()
+    "HSK Model",
-# package_to_install = next(
+    fields=[
-#     filter(
+        {"name": "English"},
-#         lambda x: x.from_code == CN and x.to_code == EN, available_packages
+        {"name": "Pinyin"},
-#     )
+        {"name": "Simplified"},
-# )
+        {"name": "Traditional"},
-# argostranslate.package.install_from_path(package_to_install.download())
+    ],
    templates=[
        {
            "name": "Card 1",
            "qfmt": "<strong>{{Pinyin}}</strong><br>{{English}}",
            "afmt": "{{FrontSide}}<hr id='answer''><div class='simple'>{{Simplified}}</div><br><div class='trad'>{{Traditional}}</div>",
        },
        {
            "name": "Card 2",
            "qfmt": "<div class='simple'>{{Simplified}}</div><br><div class='trad'>{{Traditional}}</div>",
            "afmt": '{{FrontSide}}<hr id="answer"><strong>{{Pinyin}}</strong><br>{{English}}',
        },
    ],
    css=CSS,
 )
 def process_files():
    print("Select data file:")
    files = []
    for n, file in enumerate(INPUT.glob('**/*.txt')):
        files.append(file)
        print(f"{n+1} - {file.relative_to(INPUT)}")
    s = None
    while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
        s = input(f"Please select the file [1-{len(files)}]: ")
    in_file = files[int(s)-1]
    relative = in_file.relative_to(INPUT)
    out_file = OUTPUT / relative
    out_file.parent.mkdir(parents=True, exist_ok=True)
    return in_file, out_file
-def dictionary_process(in_file, out_file):
+## Functions
-    """Process dictionary files"""
+
 def create_cedict():
    """Creates a create_cedict dictionary object"""
    parser = CedictParser()
    parser.read_file(CCCEDICT)
    entries = parser.parse()
@@ -61,16 +97,48 @@ def dictionary_process(in_file, out_file):
        else:
            dictionary[entry.simplified].append(entry)
-    out_file = DATA / f"{in_file.stem}.tsv"
+    return dictionary
    words_list = in_file.open(encoding="utf8").read().split()
 def create_translator():
    """Download and install Argos Translate package"""
    argostranslate.package.update_package_index()
    available_packages = argostranslate.package.get_available_packages()
    package_to_install = next(
        filter(lambda x: x.from_code == CN and x.to_code == EN, available_packages)
    )
    argostranslate.package.install_from_path(package_to_install.download())
 ## Main
 def process_files():
    print("Select data file:")
    files = []
    for n, file in enumerate(INPUT.glob("**/*.txt")):
        files.append(file)
        print(f"{n+1} - {file.relative_to(INPUT)}")
    s = None
    while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
        s = input(f"Please select the file [1-{len(files)}]: ")
    in_file = files[int(s) - 1]
    relative = in_file.relative_to(INPUT)
    out_file = OUTPUT / relative
    out_file.parent.mkdir(parents=True, exist_ok=True)
    with in_file.open(encoding="utf8") as input_file:
        file_type = input_file.read().split()[0]
    return in_file, out_file, file_type
 def dictionary_process(dictionary, in_file):
    """Process dictionary files"""
    words_list = in_file.open(encoding="utf8").read().split()
    results = []
    for word in words_list:
        if v := dictionary.get(word):
            if len(v) > 1:
-                print(
+                print(f"\nWARNING: {word} has multiple meanings:")
                    f"\nWARNING: {word} has multiple meanings:"
                )
                for n, w in enumerate(v):
                    print(f"{n+1} - {w}")
                    for m in w.meanings:
@@ -86,7 +154,26 @@ def dictionary_process(in_file, out_file):
            print("============================================")
            print(f"===================>ERROR: {word} not found")
            print("============================================")
-    with out_file.open("w", encoding="utf8", newline="") as csvfile:
+    return results
 def trasnlator_process(in_file):
    """Process text trasnlate files"""
    text_list = in_file.open(encoding="utf8").read().split()[1:]
    results = []
    for text in text_list:
        text = text.strip()
        for par in text.split("。"):
            if par:
                translatedText = argostranslate.translate.translate(par, CN, EN)
                results.append([translatedText, par])
    return results
 def output_tsv(out_file, results):
    """writes the output as a tsv file"""
    final_file = out_file.parent / f"{out_file.stem}.tsv"
    with final_file.open("w", encoding="utf8", newline="") as csvfile:
        writer = csv.writer(csvfile, delimiter="\t", quotechar='"')
        for entry in results:
            writer.writerow(
@@ -98,9 +185,49 @@ def dictionary_process(in_file, out_file):
                ]
            )
 def output_anki_dictionary(out_file, results):
    final_file = out_file.parent / f"{out_file.stem}.apkg"
    deck_name = "::".join(out_file.relative_to(OUTPUT).parts[:-1] + (out_file.stem,))
    deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
    for entry in results:
        note = Note(
            model=HSK_MODEL,
            fields=[
                "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)),
                PinyinToneConverter().convert_text(entry.pinyin),
                entry.simplified,
                entry.traditional,
            ],
        )
        deck.add_note(note)
    Package(deck).write_to_file(final_file)
 def output_anki_text(out_file, results):
    final_file = out_file.parent / f"{out_file.stem}.apkg"
    deck_name = "::".join(out_file.relative_to(OUTPUT).parts[:-1] + (out_file.stem,))
    deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
    for entry in results:
        note = Note(
            model=SIMPLE_MODEL,
            fields=entry,
        )
        deck.add_note(note)
    Package(deck).write_to_file(final_file)
 def main():
-    in_file, out_file = process_files()
+    in_file, out_file, file_type = process_files()
-    dictionary_process(in_file, out_file)
+    if TEXT_TYPE == file_type:
        create_translator()
        results = trasnlator_process(in_file)
        output_anki_text(out_file, results)
    else:
        dictionary = create_cedict()
        results = dictionary_process(dictionary, in_file)
        output_anki_dictionary(out_file, results)
 if __name__ == "__main__":
    main()
--- a/data/HSK1-1.txt
+++ b/data/HSK1-1.txt
--- a/data/input/HSK1/HSK1-1.txt
+++ b/data/input/HSK1/HSK1-1.txt
--- a/data/input/HSK1/HSK1-2.txt
+++ b/data/input/HSK1/HSK1-2.txt
--- a/data/input/HSK1/HSK1-3.txt
+++ b/data/input/HSK1/HSK1-3.txt
--- a/data/input/HSK1/HSK1-4.txt
+++ b/data/input/HSK1/HSK1-4.txt
--- a/data/input/口语/口语-第9课.-text.txt
+++ b/data/input/口语/口语-第9课.-text.txt
@@ -0,0 +1,2 @@
 TEXT_TYPE
 周六那场篮球比在，对手很厉害。前半场他们一直赢，后半场我们对才超过他们，领先得并不轻松。
--- a/data/output/HSK1/HSK1-1.apkg
+++ b/data/output/HSK1/HSK1-1.apkg
--- a/data/output/HSK1/HSK1-1.tsv
+++ b/data/output/HSK1/HSK1-1.tsv
--- a/data/output/HSK1/HSK1-2.tsv
+++ b/data/output/HSK1/HSK1-2.tsv
--- a/data/output/HSK1/HSK1-3.tsv
+++ b/data/output/HSK1/HSK1-3.tsv
--- a/data/output/HSK1/HSK1-4.tsv
+++ b/data/output/HSK1/HSK1-4.tsv
--- a/data/output/口语/口语-第9课.-text.apkg
+++ b/data/output/口语/口语-第9课.-text.apkg
		`@@ -0,0 +1,2 @@`
							`TEXT_TYPE`
							`周六那场篮球比在，对手很厉害。前半场他们一直赢，后半场我们对才超过他们，领先得并不轻松。`