rename files and add chat tts

This commit is contained in:
Wolfang Torres
2026-05-26 21:27:10 +08:00
parent 46c92958a9
commit 1fc0f0568f
39 changed files with 53 additions and 37 deletions

View File

@@ -18,7 +18,8 @@ INPUT = DATA / "input"
OUTPUT = DATA / "output" OUTPUT = DATA / "output"
CN = "zh" CN = "zh"
EN = "en" EN = "en"
TEXT_TYPE = "TEXT_TYPE" PHRASES_TYPE = ".phrases"
DICT_TYPE = ".dictionary"
CSS = """ CSS = """
.card { .card {
font-family: arial; font-family: arial;
@@ -127,14 +128,21 @@ def create_translator():
def process_files(): def process_files():
print("Select data file:") print("Select data file:")
in_file = None
level = INPUT
while not in_file:
files = [] files = []
for n, file in enumerate(INPUT.glob("**/*.txt")): for n, file in enumerate(level.glob("*")):
files.append(file) files.append(file)
print(f"{n+1} - {file.relative_to(INPUT)}") print(f"{n+1} - {file.relative_to(INPUT)}")
s = None s = None
while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)): while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
s = input(f"Please select the file [1-{len(files)}]: ") s = input(f"Please select the file [1-{len(files)}]: ")
in_file = files[int(s) - 1] selected = files[int(s) - 1]
if selected.is_file():
in_file = selected
else:
level = selected
relative = in_file.relative_to(INPUT) relative = in_file.relative_to(INPUT)
out_file = OUTPUT / relative out_file = OUTPUT / relative
out_file.parent.mkdir(parents=True, exist_ok=True) out_file.parent.mkdir(parents=True, exist_ok=True)
@@ -147,11 +155,18 @@ def dictionary_process(dictionary, in_file):
"""Process dictionary files""" """Process dictionary files"""
words_list = in_file.open(encoding="utf8").read().split() words_list = in_file.open(encoding="utf8").read().split()
results = [] results = []
for word in words_list: with in_file.open("w", encoding="utf8") as input_file:
for words in words_list:
word = words.split()[0]
pinyin = words.split()[1] if len(words.split()) > 1 else None
if v := dictionary.get(word): if v := dictionary.get(word):
if len(v) > 1: if len(v) > 1:
print(f"\nWARNING: {word} has multiple meanings:") print(f"\nWARNING: {word} has multiple meanings:")
for n, w in enumerate(v): if pinyin and pinyin != "ERROR":
ml = filter(lambda x: v.pinyin == pinyin, v)
else:
ml = v
for n, w in enumerate(ml):
print(f"{n+1} - {w}") print(f"{n+1} - {w}")
for m in w.meanings: for m in w.meanings:
print(f"\t{m}") print(f"\t{m}")
@@ -161,11 +176,13 @@ def dictionary_process(dictionary, in_file):
v = v[int(s) - 1] v = v[int(s) - 1]
else: else:
v = v[0] v = v[0]
input_file.write(f"{word}\t{v.pinyin}\n")
results.append(v) results.append(v)
else: else:
print("============================================") print("============================================")
print(f"===================>ERROR: {word} not found") print(f"===================>ERROR: {word} not found")
print("============================================") print("============================================")
input_file.write(f"{word}\tERROR\n")
return results return results
@@ -231,14 +248,16 @@ def output_anki_text(out_file, results):
def main(): def main():
in_file, out_file, file_type = process_files() in_file, out_file, file_type = process_files()
if TEXT_TYPE == file_type: if PHRASES_TYPE in in_file.suffixes:
create_translator() create_translator()
results = translator_process(in_file) results = translator_process(in_file)
output_anki_text(out_file, results) output_anki_text(out_file, results)
else: elif DICT_TYPE in in_file.suffixes:
dictionary = create_cedict() dictionary = create_cedict()
results = dictionary_process(dictionary, in_file) results = dictionary_process(dictionary, in_file)
output_anki_dictionary(out_file, results) output_anki_dictionary(out_file, results)
else:
raise TypeError("Error, filetype not especified!")
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -1,9 +1,9 @@
你好
你好
您好 您好
你们 你们
您们 您们
对不起 对不起
没关系 没关系

View File

@@ -1,4 +1,3 @@
TEXT_TYPE
上课 上课
下课 下课
想在休息 想在休息

View File

@@ -1,3 +1,2 @@
TEXT_TYPE
不谢 不谢
谢谢你 谢谢你

View File

@@ -1,4 +1,3 @@
TEXT_TYPE
衣村生活 衣村生活
新农村 新农村
环境优美 环境优美

View File

@@ -1,2 +1 @@
TEXT_TYPE
周六那场篮球比在,对手很厉害。前半场他们一直赢,后半场我们对才超过他们,领先得并不轻松。 周六那场篮球比在,对手很厉害。前半场他们一直赢,后半场我们对才超过他们,领先得并不轻松。

Binary file not shown.

Binary file not shown.

View File

@@ -9,5 +9,6 @@ setup(
"pinyin-tone-converter", "pinyin-tone-converter",
"genanki", "genanki",
"argostranslate", "argostranslate",
"chattts",
], ],
) )