diff --git a/anki-hsk-creator/__main__.py b/anki-hsk-creator/__main__.py index 8fc3d4d..0e557e2 100644 --- a/anki-hsk-creator/__main__.py +++ b/anki-hsk-creator/__main__.py @@ -18,7 +18,8 @@ INPUT = DATA / "input" OUTPUT = DATA / "output" CN = "zh" EN = "en" -TEXT_TYPE = "TEXT_TYPE" +PHRASES_TYPE = ".phrases" +DICT_TYPE = ".dictionary" CSS = """ .card { font-family: arial; @@ -127,14 +128,21 @@ def create_translator(): def process_files(): print("Select data file:") - files = [] - for n, file in enumerate(INPUT.glob("**/*.txt")): - files.append(file) - print(f"{n+1} - {file.relative_to(INPUT)}") - s = None - while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)): - s = input(f"Please select the file [1-{len(files)}]: ") - in_file = files[int(s) - 1] + in_file = None + level = INPUT + while not in_file: + files = [] + for n, file in enumerate(level.glob("*")): + files.append(file) + print(f"{n+1} - {file.relative_to(INPUT)}") + s = None + while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)): + s = input(f"Please select the file [1-{len(files)}]: ") + selected = files[int(s) - 1] + if selected.is_file(): + in_file = selected + else: + level = selected relative = in_file.relative_to(INPUT) out_file = OUTPUT / relative out_file.parent.mkdir(parents=True, exist_ok=True) @@ -146,26 +154,35 @@ def process_files(): def dictionary_process(dictionary, in_file): """Process dictionary files""" words_list = in_file.open(encoding="utf8").read().split() - results = [] - for word in words_list: - if v := dictionary.get(word): - if len(v) > 1: - print(f"\nWARNING: {word} has multiple meanings:") - for n, w in enumerate(v): - print(f"{n+1} - {w}") - for m in w.meanings: - print(f"\t{m}") - s = None - while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)): - s = input(f"Please select the correct word [1-{len(v)}]: ") - v = v[int(s) - 1] + results = [] + with in_file.open("w", encoding="utf8") as input_file: + for words in words_list: + word = words.split()[0] + pinyin = words.split()[1] if len(words.split()) > 1 else None + if v := dictionary.get(word): + if len(v) > 1: + print(f"\nWARNING: {word} has multiple meanings:") + if pinyin and pinyin != "ERROR": + ml = filter(lambda x: v.pinyin == pinyin, v) + else: + ml = v + for n, w in enumerate(ml): + print(f"{n+1} - {w}") + for m in w.meanings: + print(f"\t{m}") + s = None + while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)): + s = input(f"Please select the correct word [1-{len(v)}]: ") + v = v[int(s) - 1] + else: + v = v[0] + input_file.write(f"{word}\t{v.pinyin}\n") + results.append(v) else: - v = v[0] - results.append(v) - else: - print("============================================") - print(f"===================>ERROR: {word} not found") - print("============================================") + print("============================================") + print(f"===================>ERROR: {word} not found") + print("============================================") + input_file.write(f"{word}\tERROR\n") return results @@ -231,14 +248,16 @@ def output_anki_text(out_file, results): def main(): in_file, out_file, file_type = process_files() - if TEXT_TYPE == file_type: + if PHRASES_TYPE in in_file.suffixes: create_translator() results = translator_process(in_file) output_anki_text(out_file, results) - else: + elif DICT_TYPE in in_file.suffixes: dictionary = create_cedict() results = dictionary_process(dictionary, in_file) output_anki_dictionary(out_file, results) + else: + raise TypeError("Error, filetype not especified!") if __name__ == "__main__": diff --git a/data/input/HSK1/HSK1-1.txt b/data/input/HSK1/HSK1-1.dictionary.txt similarity index 69% rename from data/input/HSK1/HSK1-1.txt rename to data/input/HSK1/HSK1-1.dictionary.txt index fa269a9..3ec388f 100644 --- a/data/input/HSK1/HSK1-1.txt +++ b/data/input/HSK1/HSK1-1.dictionary.txt @@ -1,9 +1,9 @@ 你 好 -你好 您 +你好 您好 -你们 -您们 +你们好 +您们好 对不起 没关系 \ No newline at end of file diff --git a/data/input/HSK1/HSK1-1.prhases.txt b/data/input/HSK1/HSK1-1.phrases.txt similarity index 82% rename from data/input/HSK1/HSK1-1.prhases.txt rename to data/input/HSK1/HSK1-1.phrases.txt index 2c32671..a88d458 100644 --- a/data/input/HSK1/HSK1-1.prhases.txt +++ b/data/input/HSK1/HSK1-1.phrases.txt @@ -1,4 +1,3 @@ -TEXT_TYPE 上课 下课 想在休息 diff --git a/data/input/HSK1/HSK1-2.txt b/data/input/HSK1/HSK1-2.dictionary.txt similarity index 100% rename from data/input/HSK1/HSK1-2.txt rename to data/input/HSK1/HSK1-2.dictionary.txt diff --git a/data/input/HSK1/HSK1-2.phrases.txt b/data/input/HSK1/HSK1-2.phrases.txt index 343de94..388f7f6 100644 --- a/data/input/HSK1/HSK1-2.phrases.txt +++ b/data/input/HSK1/HSK1-2.phrases.txt @@ -1,3 +1,2 @@ -TEXT_TYPE 不谢 谢谢你 \ No newline at end of file diff --git a/data/input/HSK1/HSK1-3.txt b/data/input/HSK1/HSK1-3.dictionary.txt similarity index 100% rename from data/input/HSK1/HSK1-3.txt rename to data/input/HSK1/HSK1-3.dictionary.txt diff --git a/data/input/HSK1/HSK1-4.txt b/data/input/HSK1/HSK1-4.dictionary.txt similarity index 100% rename from data/input/HSK1/HSK1-4.txt rename to data/input/HSK1/HSK1-4.dictionary.txt diff --git a/data/input/口语/口语-第10课.txt b/data/input/口语/口语-第10课.dictionary.txt similarity index 100% rename from data/input/口语/口语-第10课.txt rename to data/input/口语/口语-第10课.dictionary.txt diff --git a/data/input/口语/口语-第10课.phrases.txt b/data/input/口语/口语-第10课.phrases.txt index abefc37..706caa7 100644 --- a/data/input/口语/口语-第10课.phrases.txt +++ b/data/input/口语/口语-第10课.phrases.txt @@ -1,4 +1,3 @@ -TEXT_TYPE 衣村生活 新农村 环境优美 diff --git a/data/input/口语/口语-第1课.txt b/data/input/口语/口语-第1课.dictionary.txt similarity index 100% rename from data/input/口语/口语-第1课.txt rename to data/input/口语/口语-第1课.dictionary.txt diff --git a/data/input/口语/口语-第2课.txt b/data/input/口语/口语-第2课.dictionary.txt similarity index 100% rename from data/input/口语/口语-第2课.txt rename to data/input/口语/口语-第2课.dictionary.txt diff --git a/data/input/口语/口语-第3课.txt b/data/input/口语/口语-第3课.dictionary.txt similarity index 100% rename from data/input/口语/口语-第3课.txt rename to data/input/口语/口语-第3课.dictionary.txt diff --git a/data/input/口语/口语-第4课.txt b/data/input/口语/口语-第4课.dictionary.txt similarity index 100% rename from data/input/口语/口语-第4课.txt rename to data/input/口语/口语-第4课.dictionary.txt diff --git a/data/input/口语/口语-第5课.txt b/data/input/口语/口语-第5课.dictionary.txt similarity index 100% rename from data/input/口语/口语-第5课.txt rename to data/input/口语/口语-第5课.dictionary.txt diff --git a/data/input/口语/口语-第6课.txt b/data/input/口语/口语-第6课.dictionary.txt similarity index 100% rename from data/input/口语/口语-第6课.txt rename to data/input/口语/口语-第6课.dictionary.txt diff --git a/data/input/口语/口语-第7课.txt b/data/input/口语/口语-第7课.dictionary.txt similarity index 100% rename from data/input/口语/口语-第7课.txt rename to data/input/口语/口语-第7课.dictionary.txt diff --git a/data/input/口语/口语-第8课.txt b/data/input/口语/口语-第8课.dictionary.txt similarity index 100% rename from data/input/口语/口语-第8课.txt rename to data/input/口语/口语-第8课.dictionary.txt diff --git a/data/input/口语/口语-第9课.txt b/data/input/口语/口语-第9课.dictionary.txt similarity index 100% rename from data/input/口语/口语-第9课.txt rename to data/input/口语/口语-第9课.dictionary.txt diff --git a/data/input/口语/口语-第9课.text.txt b/data/input/口语/口语-第9课.phrases.txt similarity index 92% rename from data/input/口语/口语-第9课.text.txt rename to data/input/口语/口语-第9课.phrases.txt index 7af8beb..4779018 100644 --- a/data/input/口语/口语-第9课.text.txt +++ b/data/input/口语/口语-第9课.phrases.txt @@ -1,2 +1 @@ -TEXT_TYPE 周六那场篮球比在,对手很厉害。前半场他们一直赢,后半场我们对才超过他们,领先得并不轻松。 \ No newline at end of file diff --git a/data/input/听力/听力-第3-一三课.txt b/data/input/听力/听力-第3-一三课.dictionary.txt similarity index 100% rename from data/input/听力/听力-第3-一三课.txt rename to data/input/听力/听力-第3-一三课.dictionary.txt diff --git a/data/input/基础汉语40课/基础汉语40课 - 第03课.txt b/data/input/基础汉语40课/基础汉语40课 - 第03课.dictionary.txt similarity index 100% rename from data/input/基础汉语40课/基础汉语40课 - 第03课.txt rename to data/input/基础汉语40课/基础汉语40课 - 第03课.dictionary.txt diff --git a/data/input/基础汉语40课/基础汉语40课 - 第04课.txt b/data/input/基础汉语40课/基础汉语40课 - 第04课.dictionary.txt similarity index 100% rename from data/input/基础汉语40课/基础汉语40课 - 第04课.txt rename to data/input/基础汉语40课/基础汉语40课 - 第04课.dictionary.txt diff --git a/data/input/基础汉语40课/基础汉语40课 - 第05课.txt b/data/input/基础汉语40课/基础汉语40课 - 第05课.dictionary.txt similarity index 100% rename from data/input/基础汉语40课/基础汉语40课 - 第05课.txt rename to data/input/基础汉语40课/基础汉语40课 - 第05课.dictionary.txt diff --git a/data/input/基础汉语40课/基础汉语40课 - 第06课.txt b/data/input/基础汉语40课/基础汉语40课 - 第06课.dictionary.txt similarity index 100% rename from data/input/基础汉语40课/基础汉语40课 - 第06课.txt rename to data/input/基础汉语40课/基础汉语40课 - 第06课.dictionary.txt diff --git a/data/input/基础汉语40课/基础汉语40课 - 第07课.txt b/data/input/基础汉语40课/基础汉语40课 - 第07课.dictionary.txt similarity index 100% rename from data/input/基础汉语40课/基础汉语40课 - 第07课.txt rename to data/input/基础汉语40课/基础汉语40课 - 第07课.dictionary.txt diff --git a/data/input/基础汉语40课/基础汉语40课 - 第16课.txt b/data/input/基础汉语40课/基础汉语40课 - 第16课.dictionary.txt similarity index 100% rename from data/input/基础汉语40课/基础汉语40课 - 第16课.txt rename to data/input/基础汉语40课/基础汉语40课 - 第16课.dictionary.txt diff --git a/data/input/基础汉语40课/基础汉语40课 - 第17课.txt b/data/input/基础汉语40课/基础汉语40课 - 第17课.dictionary.txt similarity index 100% rename from data/input/基础汉语40课/基础汉语40课 - 第17课.txt rename to data/input/基础汉语40课/基础汉语40课 - 第17课.dictionary.txt diff --git a/data/input/基础汉语40课/基础汉语40课 - 第18课.txt b/data/input/基础汉语40课/基础汉语40课 - 第18课.dictionary.txt similarity index 100% rename from data/input/基础汉语40课/基础汉语40课 - 第18课.txt rename to data/input/基础汉语40课/基础汉语40课 - 第18课.dictionary.txt diff --git a/data/input/基础汉语40课/基础汉语40课 - 第19课.txt b/data/input/基础汉语40课/基础汉语40课 - 第19课.dictionary.txt similarity index 100% rename from data/input/基础汉语40课/基础汉语40课 - 第19课.txt rename to data/input/基础汉语40课/基础汉语40课 - 第19课.dictionary.txt diff --git a/data/input/基础汉语40课/基础汉语40课 - 第20课.txt b/data/input/基础汉语40课/基础汉语40课 - 第20课.dictionary.txt similarity index 100% rename from data/input/基础汉语40课/基础汉语40课 - 第20课.txt rename to data/input/基础汉语40课/基础汉语40课 - 第20课.dictionary.txt diff --git a/data/input/基础汉语40课/基础汉语40课 - 第21课.txt b/data/input/基础汉语40课/基础汉语40课 - 第21课.dictionary.txt similarity index 100% rename from data/input/基础汉语40课/基础汉语40课 - 第21课.txt rename to data/input/基础汉语40课/基础汉语40课 - 第21课.dictionary.txt diff --git a/data/input/基础汉语40课/基础汉语40课 - 第22课.txt b/data/input/基础汉语40课/基础汉语40课 - 第22课.dictionary.txt similarity index 100% rename from data/input/基础汉语40课/基础汉语40课 - 第22课.txt rename to data/input/基础汉语40课/基础汉语40课 - 第22课.dictionary.txt diff --git a/data/input/基础汉语40课/基础汉语40课 - 第25课 - 1.words.txt b/data/input/基础汉语40课/基础汉语40课 - 第25课 - 1.dictionary.txt similarity index 100% rename from data/input/基础汉语40课/基础汉语40课 - 第25课 - 1.words.txt rename to data/input/基础汉语40课/基础汉语40课 - 第25课 - 1.dictionary.txt diff --git a/data/input/基础汉语40课/基础汉语40课 - 第25课 - 2.words.txt b/data/input/基础汉语40课/基础汉语40课 - 第25课 - 2.dictionary.txt similarity index 100% rename from data/input/基础汉语40课/基础汉语40课 - 第25课 - 2.words.txt rename to data/input/基础汉语40课/基础汉语40课 - 第25课 - 2.dictionary.txt diff --git a/data/input/基础汉语40课/基础汉语40课 - 第29课.txt b/data/input/基础汉语40课/基础汉语40课 - 第29课.dictionary.txt similarity index 100% rename from data/input/基础汉语40课/基础汉语40课 - 第29课.txt rename to data/input/基础汉语40课/基础汉语40课 - 第29课.dictionary.txt diff --git a/data/input/阅渎/阅渎第一-六课.txt b/data/input/阅渎/阅渎第一-六课.dictionary.txt similarity index 100% rename from data/input/阅渎/阅渎第一-六课.txt rename to data/input/阅渎/阅渎第一-六课.dictionary.txt diff --git a/data/output/HSK1/HSK1-1.dictionary.apkg b/data/output/HSK1/HSK1-1.dictionary.apkg new file mode 100644 index 0000000..9910b0e Binary files /dev/null and b/data/output/HSK1/HSK1-1.dictionary.apkg differ diff --git a/data/output/HSK1/HSK1-1.prhases.apkg b/data/output/HSK1/HSK1-1.prhases.apkg index 7416369..2ebc29c 100644 Binary files a/data/output/HSK1/HSK1-1.prhases.apkg and b/data/output/HSK1/HSK1-1.prhases.apkg differ diff --git a/setup.py b/setup.py index 4b28206..196a4ac 100644 --- a/setup.py +++ b/setup.py @@ -9,5 +9,6 @@ setup( "pinyin-tone-converter", "genanki", "argostranslate", + "chattts", ], )