rename files and add chat tts

This commit is contained in:
Wolfang Torres
2026-05-26 21:27:10 +08:00
parent 46c92958a9
commit 1fc0f0568f
39 changed files with 53 additions and 37 deletions

View File

@@ -18,7 +18,8 @@ INPUT = DATA / "input"
OUTPUT = DATA / "output"
CN = "zh"
EN = "en"
TEXT_TYPE = "TEXT_TYPE"
PHRASES_TYPE = ".phrases"
DICT_TYPE = ".dictionary"
CSS = """
.card {
font-family: arial;
@@ -127,14 +128,21 @@ def create_translator():
def process_files():
print("Select data file:")
in_file = None
level = INPUT
while not in_file:
files = []
for n, file in enumerate(INPUT.glob("**/*.txt")):
for n, file in enumerate(level.glob("*")):
files.append(file)
print(f"{n+1} - {file.relative_to(INPUT)}")
s = None
while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
s = input(f"Please select the file [1-{len(files)}]: ")
in_file = files[int(s) - 1]
selected = files[int(s) - 1]
if selected.is_file():
in_file = selected
else:
level = selected
relative = in_file.relative_to(INPUT)
out_file = OUTPUT / relative
out_file.parent.mkdir(parents=True, exist_ok=True)
@@ -147,11 +155,18 @@ def dictionary_process(dictionary, in_file):
"""Process dictionary files"""
words_list = in_file.open(encoding="utf8").read().split()
results = []
for word in words_list:
with in_file.open("w", encoding="utf8") as input_file:
for words in words_list:
word = words.split()[0]
pinyin = words.split()[1] if len(words.split()) > 1 else None
if v := dictionary.get(word):
if len(v) > 1:
print(f"\nWARNING: {word} has multiple meanings:")
for n, w in enumerate(v):
if pinyin and pinyin != "ERROR":
ml = filter(lambda x: v.pinyin == pinyin, v)
else:
ml = v
for n, w in enumerate(ml):
print(f"{n+1} - {w}")
for m in w.meanings:
print(f"\t{m}")
@@ -161,11 +176,13 @@ def dictionary_process(dictionary, in_file):
v = v[int(s) - 1]
else:
v = v[0]
input_file.write(f"{word}\t{v.pinyin}\n")
results.append(v)
else:
print("============================================")
print(f"===================>ERROR: {word} not found")
print("============================================")
input_file.write(f"{word}\tERROR\n")
return results
@@ -231,14 +248,16 @@ def output_anki_text(out_file, results):
def main():
in_file, out_file, file_type = process_files()
if TEXT_TYPE == file_type:
if PHRASES_TYPE in in_file.suffixes:
create_translator()
results = translator_process(in_file)
output_anki_text(out_file, results)
else:
elif DICT_TYPE in in_file.suffixes:
dictionary = create_cedict()
results = dictionary_process(dictionary, in_file)
output_anki_dictionary(out_file, results)
else:
raise TypeError("Error, filetype not especified!")
if __name__ == "__main__":

View File

@@ -1,9 +1,9 @@
你好
你好
您好
你们
您们
你们
您们
对不起
没关系

View File

@@ -1,4 +1,3 @@
TEXT_TYPE
上课
下课
想在休息

View File

@@ -1,3 +1,2 @@
TEXT_TYPE
不谢
谢谢你

View File

@@ -1,4 +1,3 @@
TEXT_TYPE
衣村生活
新农村
环境优美

View File

@@ -1,2 +1 @@
TEXT_TYPE
周六那场篮球比在,对手很厉害。前半场他们一直赢,后半场我们对才超过他们,领先得并不轻松。

Binary file not shown.

Binary file not shown.

View File

@@ -9,5 +9,6 @@ setup(
"pinyin-tone-converter",
"genanki",
"argostranslate",
"chattts",
],
)