rename files and add chat tts
This commit is contained in:
@@ -18,7 +18,8 @@ INPUT = DATA / "input"
|
||||
OUTPUT = DATA / "output"
|
||||
CN = "zh"
|
||||
EN = "en"
|
||||
TEXT_TYPE = "TEXT_TYPE"
|
||||
PHRASES_TYPE = ".phrases"
|
||||
DICT_TYPE = ".dictionary"
|
||||
CSS = """
|
||||
.card {
|
||||
font-family: arial;
|
||||
@@ -127,14 +128,21 @@ def create_translator():
|
||||
|
||||
def process_files():
|
||||
print("Select data file:")
|
||||
in_file = None
|
||||
level = INPUT
|
||||
while not in_file:
|
||||
files = []
|
||||
for n, file in enumerate(INPUT.glob("**/*.txt")):
|
||||
for n, file in enumerate(level.glob("*")):
|
||||
files.append(file)
|
||||
print(f"{n+1} - {file.relative_to(INPUT)}")
|
||||
s = None
|
||||
while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
|
||||
s = input(f"Please select the file [1-{len(files)}]: ")
|
||||
in_file = files[int(s) - 1]
|
||||
selected = files[int(s) - 1]
|
||||
if selected.is_file():
|
||||
in_file = selected
|
||||
else:
|
||||
level = selected
|
||||
relative = in_file.relative_to(INPUT)
|
||||
out_file = OUTPUT / relative
|
||||
out_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -147,11 +155,18 @@ def dictionary_process(dictionary, in_file):
|
||||
"""Process dictionary files"""
|
||||
words_list = in_file.open(encoding="utf8").read().split()
|
||||
results = []
|
||||
for word in words_list:
|
||||
with in_file.open("w", encoding="utf8") as input_file:
|
||||
for words in words_list:
|
||||
word = words.split()[0]
|
||||
pinyin = words.split()[1] if len(words.split()) > 1 else None
|
||||
if v := dictionary.get(word):
|
||||
if len(v) > 1:
|
||||
print(f"\nWARNING: {word} has multiple meanings:")
|
||||
for n, w in enumerate(v):
|
||||
if pinyin and pinyin != "ERROR":
|
||||
ml = filter(lambda x: v.pinyin == pinyin, v)
|
||||
else:
|
||||
ml = v
|
||||
for n, w in enumerate(ml):
|
||||
print(f"{n+1} - {w}")
|
||||
for m in w.meanings:
|
||||
print(f"\t{m}")
|
||||
@@ -161,11 +176,13 @@ def dictionary_process(dictionary, in_file):
|
||||
v = v[int(s) - 1]
|
||||
else:
|
||||
v = v[0]
|
||||
input_file.write(f"{word}\t{v.pinyin}\n")
|
||||
results.append(v)
|
||||
else:
|
||||
print("============================================")
|
||||
print(f"===================>ERROR: {word} not found")
|
||||
print("============================================")
|
||||
input_file.write(f"{word}\tERROR\n")
|
||||
return results
|
||||
|
||||
|
||||
@@ -231,14 +248,16 @@ def output_anki_text(out_file, results):
|
||||
|
||||
def main():
|
||||
in_file, out_file, file_type = process_files()
|
||||
if TEXT_TYPE == file_type:
|
||||
if PHRASES_TYPE in in_file.suffixes:
|
||||
create_translator()
|
||||
results = translator_process(in_file)
|
||||
output_anki_text(out_file, results)
|
||||
else:
|
||||
elif DICT_TYPE in in_file.suffixes:
|
||||
dictionary = create_cedict()
|
||||
results = dictionary_process(dictionary, in_file)
|
||||
output_anki_dictionary(out_file, results)
|
||||
else:
|
||||
raise TypeError("Error, filetype not especified!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
你
|
||||
好
|
||||
你好
|
||||
您
|
||||
你好
|
||||
您好
|
||||
你们
|
||||
您们
|
||||
你们好
|
||||
您们好
|
||||
对不起
|
||||
没关系
|
||||
@@ -1,4 +1,3 @@
|
||||
TEXT_TYPE
|
||||
上课
|
||||
下课
|
||||
想在休息
|
||||
@@ -1,3 +1,2 @@
|
||||
TEXT_TYPE
|
||||
不谢
|
||||
谢谢你
|
||||
@@ -1,4 +1,3 @@
|
||||
TEXT_TYPE
|
||||
衣村生活
|
||||
新农村
|
||||
环境优美
|
||||
|
||||
@@ -1,2 +1 @@
|
||||
TEXT_TYPE
|
||||
周六那场篮球比在,对手很厉害。前半场他们一直赢,后半场我们对才超过他们,领先得并不轻松。
|
||||
BIN
data/output/HSK1/HSK1-1.dictionary.apkg
Normal file
BIN
data/output/HSK1/HSK1-1.dictionary.apkg
Normal file
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user