diff --git a/anki-hsk-creator/__main__.py b/anki-hsk-creator/__main__.py
index e5ed330..627c489 100644
--- a/anki-hsk-creator/__main__.py
+++ b/anki-hsk-creator/__main__.py
@@ -1,55 +1,91 @@
## Imports
from pathlib import Path
+import random
import csv
+## PIP
from cedict_utils.cedict import CedictParser
from pinyin_tone_converter.pinyin_tone_converter import PinyinToneConverter
-
-# from genanki import Deck, Note
-
-# import argostranslate.package
-# import argostranslate.translate
+from genanki import Deck, Note, Model, Package
+import argostranslate.package
+import argostranslate.translate
## Constants
CCCEDICT = Path(__file__).parent / "cedict_ts.u8"
-DATA = Path(__file__).parent.parent / "data"
+DATA = Path(__file__).parent.parent / "data"
INPUT = DATA / "input"
OUTPUT = DATA / "output"
-CN = "cn"
+CN = "zh"
EN = "en"
+TEXT_TYPE = "TEXT_TYPE"
+CSS = """
+.card {
+ font-family: arial;
+ font-size: 20px;
+ text-align: center;
+ color: black;
+ background-color: white;
+}
+.simple {
+font-family: Arial;
+font-size: 100px;
+}
+.trad {
+font-family: Arial;
+font-size: 75px;
+}
+"""
## Classess
-## Main
+SIMPLE_MODEL = Model(
+ 2076166425,
+ "Simple Model",
+ fields=[
+ {"name": "Question"},
+ {"name": "Answer"},
+ ],
+ templates=[
+ {
+ "name": "Card 1",
+ "qfmt": "{{Question}}",
+ "afmt": '{{FrontSide}}
{{Answer}}',
+ },
+ ],
+ css=CSS,
+)
-# Download and install Argos Translate package
-# argostranslate.package.update_package_index()
-# available_packages = argostranslate.package.get_available_packages()
-# package_to_install = next(
-# filter(
-# lambda x: x.from_code == CN and x.to_code == EN, available_packages
-# )
-# )
-# argostranslate.package.install_from_path(package_to_install.download())
+HSK_MODEL = Model(
+ 1708536519,
+ "HSK Model",
+ fields=[
+ {"name": "English"},
+ {"name": "Pinyin"},
+ {"name": "Simplified"},
+ {"name": "Traditional"},
+ ],
+ templates=[
+ {
+ "name": "Card 1",
+ "qfmt": "{{Pinyin}}
{{English}}",
+ "afmt": "{{FrontSide}}
{{Simplified}}
{{Traditional}}
",
+ },
+ {
+ "name": "Card 2",
+ "qfmt": "{{Simplified}}
{{Traditional}}
",
+ "afmt": '{{FrontSide}}
{{Pinyin}}
{{English}}',
+ },
+ ],
+ css=CSS,
+)
-def process_files():
- print("Select data file:")
- files = []
- for n, file in enumerate(INPUT.glob('**/*.txt')):
- files.append(file)
- print(f"{n+1} - {file.relative_to(INPUT)}")
- s = None
- while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
- s = input(f"Please select the file [1-{len(files)}]: ")
- in_file = files[int(s)-1]
- relative = in_file.relative_to(INPUT)
- out_file = OUTPUT / relative
- out_file.parent.mkdir(parents=True, exist_ok=True)
- return in_file, out_file
-def dictionary_process(in_file, out_file):
- """Process dictionary files"""
+## Functions
+
+
+def create_cedict():
+ """Creates a create_cedict dictionary object"""
parser = CedictParser()
parser.read_file(CCCEDICT)
entries = parser.parse()
@@ -60,17 +96,49 @@ def dictionary_process(in_file, out_file):
dictionary[entry.simplified] = [entry]
else:
dictionary[entry.simplified].append(entry)
-
- out_file = DATA / f"{in_file.stem}.tsv"
- words_list = in_file.open(encoding="utf8").read().split()
+ return dictionary
+
+
+def create_translator():
+ """Download and install Argos Translate package"""
+ argostranslate.package.update_package_index()
+ available_packages = argostranslate.package.get_available_packages()
+ package_to_install = next(
+ filter(lambda x: x.from_code == CN and x.to_code == EN, available_packages)
+ )
+ argostranslate.package.install_from_path(package_to_install.download())
+
+
+## Main
+
+
+def process_files():
+ print("Select data file:")
+ files = []
+ for n, file in enumerate(INPUT.glob("**/*.txt")):
+ files.append(file)
+ print(f"{n+1} - {file.relative_to(INPUT)}")
+ s = None
+ while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
+ s = input(f"Please select the file [1-{len(files)}]: ")
+ in_file = files[int(s) - 1]
+ relative = in_file.relative_to(INPUT)
+ out_file = OUTPUT / relative
+ out_file.parent.mkdir(parents=True, exist_ok=True)
+ with in_file.open(encoding="utf8") as input_file:
+ file_type = input_file.read().split()[0]
+ return in_file, out_file, file_type
+
+
+def dictionary_process(dictionary, in_file):
+ """Process dictionary files"""
+ words_list = in_file.open(encoding="utf8").read().split()
results = []
for word in words_list:
if v := dictionary.get(word):
if len(v) > 1:
- print(
- f"\nWARNING: {word} has multiple meanings:"
- )
+ print(f"\nWARNING: {word} has multiple meanings:")
for n, w in enumerate(v):
print(f"{n+1} - {w}")
for m in w.meanings:
@@ -78,7 +146,7 @@ def dictionary_process(in_file, out_file):
s = None
while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)):
s = input(f"Please select the correct word [1-{len(v)}]: ")
- v = v[int(s)-1]
+ v = v[int(s) - 1]
else:
v = v[0]
results.append(v)
@@ -86,21 +154,80 @@ def dictionary_process(in_file, out_file):
print("============================================")
print(f"===================>ERROR: {word} not found")
print("============================================")
- with out_file.open("w", encoding="utf8", newline="") as csvfile:
+ return results
+
+
+def trasnlator_process(in_file):
+ """Process text trasnlate files"""
+ text_list = in_file.open(encoding="utf8").read().split()[1:]
+ results = []
+ for text in text_list:
+ text = text.strip()
+ for par in text.split("。"):
+ if par:
+ translatedText = argostranslate.translate.translate(par, CN, EN)
+ results.append([translatedText, par])
+ return results
+
+
+def output_tsv(out_file, results):
+ """writes the output as a tsv file"""
+ final_file = out_file.parent / f"{out_file.stem}.tsv"
+ with final_file.open("w", encoding="utf8", newline="") as csvfile:
writer = csv.writer(csvfile, delimiter="\t", quotechar='"')
for entry in results:
writer.writerow(
[
- "\n ".join(f"{n+1}. {m}" for n,m in enumerate(entry.meanings)),
+ "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)),
PinyinToneConverter().convert_text(entry.pinyin),
entry.simplified,
entry.traditional,
]
)
+
+def output_anki_dictionary(out_file, results):
+ final_file = out_file.parent / f"{out_file.stem}.apkg"
+ deck_name = "::".join(out_file.relative_to(OUTPUT).parts[:-1] + (out_file.stem,))
+ deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
+ for entry in results:
+ note = Note(
+ model=HSK_MODEL,
+ fields=[
+ "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)),
+ PinyinToneConverter().convert_text(entry.pinyin),
+ entry.simplified,
+ entry.traditional,
+ ],
+ )
+ deck.add_note(note)
+ Package(deck).write_to_file(final_file)
+
+
+def output_anki_text(out_file, results):
+ final_file = out_file.parent / f"{out_file.stem}.apkg"
+ deck_name = "::".join(out_file.relative_to(OUTPUT).parts[:-1] + (out_file.stem,))
+ deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
+ for entry in results:
+ note = Note(
+ model=SIMPLE_MODEL,
+ fields=entry,
+ )
+ deck.add_note(note)
+ Package(deck).write_to_file(final_file)
+
+
def main():
- in_file, out_file = process_files()
- dictionary_process(in_file, out_file)
+ in_file, out_file, file_type = process_files()
+ if TEXT_TYPE == file_type:
+ create_translator()
+ results = trasnlator_process(in_file)
+ output_anki_text(out_file, results)
+ else:
+ dictionary = create_cedict()
+ results = dictionary_process(dictionary, in_file)
+ output_anki_dictionary(out_file, results)
+
if __name__ == "__main__":
main()
diff --git a/data/HSK1-1.txt b/data/HSK1-1.txt
new file mode 100644
index 0000000..e69de29
diff --git a/data/input/HSK/HSK1-1.txt b/data/input/HSK1/HSK1-1.txt
similarity index 100%
rename from data/input/HSK/HSK1-1.txt
rename to data/input/HSK1/HSK1-1.txt
diff --git a/data/input/HSK/HSK1-2.txt b/data/input/HSK1/HSK1-2.txt
similarity index 100%
rename from data/input/HSK/HSK1-2.txt
rename to data/input/HSK1/HSK1-2.txt
diff --git a/data/input/HSK/HSK1-3.txt b/data/input/HSK1/HSK1-3.txt
similarity index 100%
rename from data/input/HSK/HSK1-3.txt
rename to data/input/HSK1/HSK1-3.txt
diff --git a/data/input/HSK/HSK1-4.txt b/data/input/HSK1/HSK1-4.txt
similarity index 100%
rename from data/input/HSK/HSK1-4.txt
rename to data/input/HSK1/HSK1-4.txt
diff --git a/data/input/口语/口语-第9课.-text.txt b/data/input/口语/口语-第9课.-text.txt
new file mode 100644
index 0000000..7af8beb
--- /dev/null
+++ b/data/input/口语/口语-第9课.-text.txt
@@ -0,0 +1,2 @@
+TEXT_TYPE
+周六那场篮球比在,对手很厉害。前半场他们一直赢,后半场我们对才超过他们,领先得并不轻松。
\ No newline at end of file
diff --git a/data/output/HSK1/HSK1-1.apkg b/data/output/HSK1/HSK1-1.apkg
new file mode 100644
index 0000000..e112a59
Binary files /dev/null and b/data/output/HSK1/HSK1-1.apkg differ
diff --git a/data/output/HSK/HSK1-1.tsv b/data/output/HSK1/HSK1-1.tsv
similarity index 100%
rename from data/output/HSK/HSK1-1.tsv
rename to data/output/HSK1/HSK1-1.tsv
diff --git a/data/output/HSK/HSK1-2.tsv b/data/output/HSK1/HSK1-2.tsv
similarity index 100%
rename from data/output/HSK/HSK1-2.tsv
rename to data/output/HSK1/HSK1-2.tsv
diff --git a/data/output/HSK/HSK1-3.tsv b/data/output/HSK1/HSK1-3.tsv
similarity index 100%
rename from data/output/HSK/HSK1-3.tsv
rename to data/output/HSK1/HSK1-3.tsv
diff --git a/data/output/HSK/HSK1-4.tsv b/data/output/HSK1/HSK1-4.tsv
similarity index 100%
rename from data/output/HSK/HSK1-4.tsv
rename to data/output/HSK1/HSK1-4.tsv
diff --git a/data/output/口语/口语-第9课.-text.apkg b/data/output/口语/口语-第9课.-text.apkg
new file mode 100644
index 0000000..b22fce1
Binary files /dev/null and b/data/output/口语/口语-第9课.-text.apkg differ