diff --git a/anki-hsk-creator/__main__.py b/anki-hsk-creator/__main__.py index e5ed330..627c489 100644 --- a/anki-hsk-creator/__main__.py +++ b/anki-hsk-creator/__main__.py @@ -1,55 +1,91 @@ ## Imports from pathlib import Path +import random import csv +## PIP from cedict_utils.cedict import CedictParser from pinyin_tone_converter.pinyin_tone_converter import PinyinToneConverter - -# from genanki import Deck, Note - -# import argostranslate.package -# import argostranslate.translate +from genanki import Deck, Note, Model, Package +import argostranslate.package +import argostranslate.translate ## Constants CCCEDICT = Path(__file__).parent / "cedict_ts.u8" -DATA = Path(__file__).parent.parent / "data" +DATA = Path(__file__).parent.parent / "data" INPUT = DATA / "input" OUTPUT = DATA / "output" -CN = "cn" +CN = "zh" EN = "en" +TEXT_TYPE = "TEXT_TYPE" +CSS = """ +.card { + font-family: arial; + font-size: 20px; + text-align: center; + color: black; + background-color: white; +} +.simple { +font-family: Arial; +font-size: 100px; +} +.trad { +font-family: Arial; +font-size: 75px; +} +""" ## Classess -## Main +SIMPLE_MODEL = Model( + 2076166425, + "Simple Model", + fields=[ + {"name": "Question"}, + {"name": "Answer"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": "{{Question}}", + "afmt": '{{FrontSide}}
{{Answer}}', + }, + ], + css=CSS, +) -# Download and install Argos Translate package -# argostranslate.package.update_package_index() -# available_packages = argostranslate.package.get_available_packages() -# package_to_install = next( -# filter( -# lambda x: x.from_code == CN and x.to_code == EN, available_packages -# ) -# ) -# argostranslate.package.install_from_path(package_to_install.download()) +HSK_MODEL = Model( + 1708536519, + "HSK Model", + fields=[ + {"name": "English"}, + {"name": "Pinyin"}, + {"name": "Simplified"}, + {"name": "Traditional"}, + ], + templates=[ + { + "name": "Card 1", + "qfmt": "{{Pinyin}}
{{English}}", + "afmt": "{{FrontSide}}
{{Simplified}}

{{Traditional}}
", + }, + { + "name": "Card 2", + "qfmt": "
{{Simplified}}

{{Traditional}}
", + "afmt": '{{FrontSide}}
{{Pinyin}}
{{English}}', + }, + ], + css=CSS, +) -def process_files(): - print("Select data file:") - files = [] - for n, file in enumerate(INPUT.glob('**/*.txt')): - files.append(file) - print(f"{n+1} - {file.relative_to(INPUT)}") - s = None - while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)): - s = input(f"Please select the file [1-{len(files)}]: ") - in_file = files[int(s)-1] - relative = in_file.relative_to(INPUT) - out_file = OUTPUT / relative - out_file.parent.mkdir(parents=True, exist_ok=True) - return in_file, out_file -def dictionary_process(in_file, out_file): - """Process dictionary files""" +## Functions + + +def create_cedict(): + """Creates a create_cedict dictionary object""" parser = CedictParser() parser.read_file(CCCEDICT) entries = parser.parse() @@ -60,17 +96,49 @@ def dictionary_process(in_file, out_file): dictionary[entry.simplified] = [entry] else: dictionary[entry.simplified].append(entry) - - out_file = DATA / f"{in_file.stem}.tsv" - words_list = in_file.open(encoding="utf8").read().split() + return dictionary + + +def create_translator(): + """Download and install Argos Translate package""" + argostranslate.package.update_package_index() + available_packages = argostranslate.package.get_available_packages() + package_to_install = next( + filter(lambda x: x.from_code == CN and x.to_code == EN, available_packages) + ) + argostranslate.package.install_from_path(package_to_install.download()) + + +## Main + + +def process_files(): + print("Select data file:") + files = [] + for n, file in enumerate(INPUT.glob("**/*.txt")): + files.append(file) + print(f"{n+1} - {file.relative_to(INPUT)}") + s = None + while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)): + s = input(f"Please select the file [1-{len(files)}]: ") + in_file = files[int(s) - 1] + relative = in_file.relative_to(INPUT) + out_file = OUTPUT / relative + out_file.parent.mkdir(parents=True, exist_ok=True) + with in_file.open(encoding="utf8") as input_file: + file_type = input_file.read().split()[0] + return in_file, out_file, file_type + + +def dictionary_process(dictionary, in_file): + """Process dictionary files""" + words_list = in_file.open(encoding="utf8").read().split() results = [] for word in words_list: if v := dictionary.get(word): if len(v) > 1: - print( - f"\nWARNING: {word} has multiple meanings:" - ) + print(f"\nWARNING: {word} has multiple meanings:") for n, w in enumerate(v): print(f"{n+1} - {w}") for m in w.meanings: @@ -78,7 +146,7 @@ def dictionary_process(in_file, out_file): s = None while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)): s = input(f"Please select the correct word [1-{len(v)}]: ") - v = v[int(s)-1] + v = v[int(s) - 1] else: v = v[0] results.append(v) @@ -86,21 +154,80 @@ def dictionary_process(in_file, out_file): print("============================================") print(f"===================>ERROR: {word} not found") print("============================================") - with out_file.open("w", encoding="utf8", newline="") as csvfile: + return results + + +def trasnlator_process(in_file): + """Process text trasnlate files""" + text_list = in_file.open(encoding="utf8").read().split()[1:] + results = [] + for text in text_list: + text = text.strip() + for par in text.split("。"): + if par: + translatedText = argostranslate.translate.translate(par, CN, EN) + results.append([translatedText, par]) + return results + + +def output_tsv(out_file, results): + """writes the output as a tsv file""" + final_file = out_file.parent / f"{out_file.stem}.tsv" + with final_file.open("w", encoding="utf8", newline="") as csvfile: writer = csv.writer(csvfile, delimiter="\t", quotechar='"') for entry in results: writer.writerow( [ - "\n ".join(f"{n+1}. {m}" for n,m in enumerate(entry.meanings)), + "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)), PinyinToneConverter().convert_text(entry.pinyin), entry.simplified, entry.traditional, ] ) + +def output_anki_dictionary(out_file, results): + final_file = out_file.parent / f"{out_file.stem}.apkg" + deck_name = "::".join(out_file.relative_to(OUTPUT).parts[:-1] + (out_file.stem,)) + deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name) + for entry in results: + note = Note( + model=HSK_MODEL, + fields=[ + "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)), + PinyinToneConverter().convert_text(entry.pinyin), + entry.simplified, + entry.traditional, + ], + ) + deck.add_note(note) + Package(deck).write_to_file(final_file) + + +def output_anki_text(out_file, results): + final_file = out_file.parent / f"{out_file.stem}.apkg" + deck_name = "::".join(out_file.relative_to(OUTPUT).parts[:-1] + (out_file.stem,)) + deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name) + for entry in results: + note = Note( + model=SIMPLE_MODEL, + fields=entry, + ) + deck.add_note(note) + Package(deck).write_to_file(final_file) + + def main(): - in_file, out_file = process_files() - dictionary_process(in_file, out_file) + in_file, out_file, file_type = process_files() + if TEXT_TYPE == file_type: + create_translator() + results = trasnlator_process(in_file) + output_anki_text(out_file, results) + else: + dictionary = create_cedict() + results = dictionary_process(dictionary, in_file) + output_anki_dictionary(out_file, results) + if __name__ == "__main__": main() diff --git a/data/HSK1-1.txt b/data/HSK1-1.txt new file mode 100644 index 0000000..e69de29 diff --git a/data/input/HSK/HSK1-1.txt b/data/input/HSK1/HSK1-1.txt similarity index 100% rename from data/input/HSK/HSK1-1.txt rename to data/input/HSK1/HSK1-1.txt diff --git a/data/input/HSK/HSK1-2.txt b/data/input/HSK1/HSK1-2.txt similarity index 100% rename from data/input/HSK/HSK1-2.txt rename to data/input/HSK1/HSK1-2.txt diff --git a/data/input/HSK/HSK1-3.txt b/data/input/HSK1/HSK1-3.txt similarity index 100% rename from data/input/HSK/HSK1-3.txt rename to data/input/HSK1/HSK1-3.txt diff --git a/data/input/HSK/HSK1-4.txt b/data/input/HSK1/HSK1-4.txt similarity index 100% rename from data/input/HSK/HSK1-4.txt rename to data/input/HSK1/HSK1-4.txt diff --git a/data/input/口语/口语-第9课.-text.txt b/data/input/口语/口语-第9课.-text.txt new file mode 100644 index 0000000..7af8beb --- /dev/null +++ b/data/input/口语/口语-第9课.-text.txt @@ -0,0 +1,2 @@ +TEXT_TYPE +周六那场篮球比在,对手很厉害。前半场他们一直赢,后半场我们对才超过他们,领先得并不轻松。 \ No newline at end of file diff --git a/data/output/HSK1/HSK1-1.apkg b/data/output/HSK1/HSK1-1.apkg new file mode 100644 index 0000000..e112a59 Binary files /dev/null and b/data/output/HSK1/HSK1-1.apkg differ diff --git a/data/output/HSK/HSK1-1.tsv b/data/output/HSK1/HSK1-1.tsv similarity index 100% rename from data/output/HSK/HSK1-1.tsv rename to data/output/HSK1/HSK1-1.tsv diff --git a/data/output/HSK/HSK1-2.tsv b/data/output/HSK1/HSK1-2.tsv similarity index 100% rename from data/output/HSK/HSK1-2.tsv rename to data/output/HSK1/HSK1-2.tsv diff --git a/data/output/HSK/HSK1-3.tsv b/data/output/HSK1/HSK1-3.tsv similarity index 100% rename from data/output/HSK/HSK1-3.tsv rename to data/output/HSK1/HSK1-3.tsv diff --git a/data/output/HSK/HSK1-4.tsv b/data/output/HSK1/HSK1-4.tsv similarity index 100% rename from data/output/HSK/HSK1-4.tsv rename to data/output/HSK1/HSK1-4.tsv diff --git a/data/output/口语/口语-第9课.-text.apkg b/data/output/口语/口语-第9课.-text.apkg new file mode 100644 index 0000000..b22fce1 Binary files /dev/null and b/data/output/口语/口语-第9课.-text.apkg differ