Files
anki-hsk-creator/anki-hsk-creator/__main__.py
Wolfang Torres 6382d03475 reorganize
2026-05-19 21:24:11 +08:00

107 lines
3.3 KiB
Python

## Imports
from pathlib import Path
import csv
from cedict_utils.cedict import CedictParser
from pinyin_tone_converter.pinyin_tone_converter import PinyinToneConverter
# from genanki import Deck, Note
# import argostranslate.package
# import argostranslate.translate
## Constants
CCCEDICT = Path(__file__).parent / "cedict_ts.u8"
DATA = Path(__file__).parent.parent / "data"
INPUT = DATA / "input"
OUTPUT = DATA / "output"
CN = "cn"
EN = "en"
## Classess
## Main
# Download and install Argos Translate package
# argostranslate.package.update_package_index()
# available_packages = argostranslate.package.get_available_packages()
# package_to_install = next(
# filter(
# lambda x: x.from_code == CN and x.to_code == EN, available_packages
# )
# )
# argostranslate.package.install_from_path(package_to_install.download())
def process_files():
print("Select data file:")
files = []
for n, file in enumerate(INPUT.glob('**/*.txt')):
files.append(file)
print(f"{n+1} - {file.relative_to(INPUT)}")
s = None
while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
s = input(f"Please select the file [1-{len(files)}]: ")
in_file = files[int(s)-1]
relative = in_file.relative_to(INPUT)
out_file = OUTPUT / relative
out_file.parent.mkdir(parents=True, exist_ok=True)
return in_file, out_file
def dictionary_process(in_file, out_file):
"""Process dictionary files"""
parser = CedictParser()
parser.read_file(CCCEDICT)
entries = parser.parse()
dictionary = {}
for entry in entries:
if entry.simplified not in dictionary:
dictionary[entry.simplified] = [entry]
else:
dictionary[entry.simplified].append(entry)
out_file = DATA / f"{in_file.stem}.tsv"
words_list = in_file.open(encoding="utf8").read().split()
results = []
for word in words_list:
if v := dictionary.get(word):
if len(v) > 1:
print(
f"\nWARNING: {word} has multiple meanings:"
)
for n, w in enumerate(v):
print(f"{n+1} - {w}")
for m in w.meanings:
print(f"\t{m}")
s = None
while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)):
s = input(f"Please select the correct word [1-{len(v)}]: ")
v = v[int(s)-1]
else:
v = v[0]
results.append(v)
else:
print("============================================")
print(f"===================>ERROR: {word} not found")
print("============================================")
with out_file.open("w", encoding="utf8", newline="") as csvfile:
writer = csv.writer(csvfile, delimiter="\t", quotechar='"')
for entry in results:
writer.writerow(
[
"\n ".join(f"{n+1}. {m}" for n,m in enumerate(entry.meanings)),
PinyinToneConverter().convert_text(entry.pinyin),
entry.simplified,
entry.traditional,
]
)
def main():
in_file, out_file = process_files()
dictionary_process(in_file, out_file)
if __name__ == "__main__":
main()