rename files and add chat tts
This commit is contained in:
@@ -18,7 +18,8 @@ INPUT = DATA / "input"
|
|||||||
OUTPUT = DATA / "output"
|
OUTPUT = DATA / "output"
|
||||||
CN = "zh"
|
CN = "zh"
|
||||||
EN = "en"
|
EN = "en"
|
||||||
TEXT_TYPE = "TEXT_TYPE"
|
PHRASES_TYPE = ".phrases"
|
||||||
|
DICT_TYPE = ".dictionary"
|
||||||
CSS = """
|
CSS = """
|
||||||
.card {
|
.card {
|
||||||
font-family: arial;
|
font-family: arial;
|
||||||
@@ -127,14 +128,21 @@ def create_translator():
|
|||||||
|
|
||||||
def process_files():
|
def process_files():
|
||||||
print("Select data file:")
|
print("Select data file:")
|
||||||
files = []
|
in_file = None
|
||||||
for n, file in enumerate(INPUT.glob("**/*.txt")):
|
level = INPUT
|
||||||
files.append(file)
|
while not in_file:
|
||||||
print(f"{n+1} - {file.relative_to(INPUT)}")
|
files = []
|
||||||
s = None
|
for n, file in enumerate(level.glob("*")):
|
||||||
while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
|
files.append(file)
|
||||||
s = input(f"Please select the file [1-{len(files)}]: ")
|
print(f"{n+1} - {file.relative_to(INPUT)}")
|
||||||
in_file = files[int(s) - 1]
|
s = None
|
||||||
|
while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
|
||||||
|
s = input(f"Please select the file [1-{len(files)}]: ")
|
||||||
|
selected = files[int(s) - 1]
|
||||||
|
if selected.is_file():
|
||||||
|
in_file = selected
|
||||||
|
else:
|
||||||
|
level = selected
|
||||||
relative = in_file.relative_to(INPUT)
|
relative = in_file.relative_to(INPUT)
|
||||||
out_file = OUTPUT / relative
|
out_file = OUTPUT / relative
|
||||||
out_file.parent.mkdir(parents=True, exist_ok=True)
|
out_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -147,25 +155,34 @@ def dictionary_process(dictionary, in_file):
|
|||||||
"""Process dictionary files"""
|
"""Process dictionary files"""
|
||||||
words_list = in_file.open(encoding="utf8").read().split()
|
words_list = in_file.open(encoding="utf8").read().split()
|
||||||
results = []
|
results = []
|
||||||
for word in words_list:
|
with in_file.open("w", encoding="utf8") as input_file:
|
||||||
if v := dictionary.get(word):
|
for words in words_list:
|
||||||
if len(v) > 1:
|
word = words.split()[0]
|
||||||
print(f"\nWARNING: {word} has multiple meanings:")
|
pinyin = words.split()[1] if len(words.split()) > 1 else None
|
||||||
for n, w in enumerate(v):
|
if v := dictionary.get(word):
|
||||||
print(f"{n+1} - {w}")
|
if len(v) > 1:
|
||||||
for m in w.meanings:
|
print(f"\nWARNING: {word} has multiple meanings:")
|
||||||
print(f"\t{m}")
|
if pinyin and pinyin != "ERROR":
|
||||||
s = None
|
ml = filter(lambda x: v.pinyin == pinyin, v)
|
||||||
while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)):
|
else:
|
||||||
s = input(f"Please select the correct word [1-{len(v)}]: ")
|
ml = v
|
||||||
v = v[int(s) - 1]
|
for n, w in enumerate(ml):
|
||||||
|
print(f"{n+1} - {w}")
|
||||||
|
for m in w.meanings:
|
||||||
|
print(f"\t{m}")
|
||||||
|
s = None
|
||||||
|
while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)):
|
||||||
|
s = input(f"Please select the correct word [1-{len(v)}]: ")
|
||||||
|
v = v[int(s) - 1]
|
||||||
|
else:
|
||||||
|
v = v[0]
|
||||||
|
input_file.write(f"{word}\t{v.pinyin}\n")
|
||||||
|
results.append(v)
|
||||||
else:
|
else:
|
||||||
v = v[0]
|
print("============================================")
|
||||||
results.append(v)
|
print(f"===================>ERROR: {word} not found")
|
||||||
else:
|
print("============================================")
|
||||||
print("============================================")
|
input_file.write(f"{word}\tERROR\n")
|
||||||
print(f"===================>ERROR: {word} not found")
|
|
||||||
print("============================================")
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
@@ -231,14 +248,16 @@ def output_anki_text(out_file, results):
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
in_file, out_file, file_type = process_files()
|
in_file, out_file, file_type = process_files()
|
||||||
if TEXT_TYPE == file_type:
|
if PHRASES_TYPE in in_file.suffixes:
|
||||||
create_translator()
|
create_translator()
|
||||||
results = translator_process(in_file)
|
results = translator_process(in_file)
|
||||||
output_anki_text(out_file, results)
|
output_anki_text(out_file, results)
|
||||||
else:
|
elif DICT_TYPE in in_file.suffixes:
|
||||||
dictionary = create_cedict()
|
dictionary = create_cedict()
|
||||||
results = dictionary_process(dictionary, in_file)
|
results = dictionary_process(dictionary, in_file)
|
||||||
output_anki_dictionary(out_file, results)
|
output_anki_dictionary(out_file, results)
|
||||||
|
else:
|
||||||
|
raise TypeError("Error, filetype not especified!")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
你
|
你
|
||||||
好
|
好
|
||||||
你好
|
|
||||||
您
|
您
|
||||||
|
你好
|
||||||
您好
|
您好
|
||||||
你们
|
你们好
|
||||||
您们
|
您们好
|
||||||
对不起
|
对不起
|
||||||
没关系
|
没关系
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
TEXT_TYPE
|
|
||||||
上课
|
上课
|
||||||
下课
|
下课
|
||||||
想在休息
|
想在休息
|
||||||
@@ -1,3 +1,2 @@
|
|||||||
TEXT_TYPE
|
|
||||||
不谢
|
不谢
|
||||||
谢谢你
|
谢谢你
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
TEXT_TYPE
|
|
||||||
衣村生活
|
衣村生活
|
||||||
新农村
|
新农村
|
||||||
环境优美
|
环境优美
|
||||||
|
|||||||
@@ -1,2 +1 @@
|
|||||||
TEXT_TYPE
|
|
||||||
周六那场篮球比在,对手很厉害。前半场他们一直赢,后半场我们对才超过他们,领先得并不轻松。
|
周六那场篮球比在,对手很厉害。前半场他们一直赢,后半场我们对才超过他们,领先得并不轻松。
|
||||||
BIN
data/output/HSK1/HSK1-1.dictionary.apkg
Normal file
BIN
data/output/HSK1/HSK1-1.dictionary.apkg
Normal file
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user