add audio generation suport

This commit is contained in:
Wolfang Torres
2026-05-27 14:42:39 +08:00
parent b1e0ed45b7
commit e35bcf6d74
13 changed files with 61 additions and 36 deletions

View File

@@ -1,2 +1,5 @@
"""anki-hsk-creator"""
HF_TOKEN = "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"
import os
os.environ["HF_TOKEN"] = "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"

View File

@@ -10,6 +10,8 @@ from genanki import Deck, Note, Model, Package
import argostranslate.package
import argostranslate.translate
from chatterbox.mtl_tts import ChatterboxMultilingualTTS
import torch
import torchaudio
## Constants
@@ -68,7 +70,7 @@ HSK_FRONT_TEMPLATE = """
{{English}}
</tts>
<br>
{{MyMedia}}
{{Audio}}
"""
HSK_MODEL = Model(
@@ -92,7 +94,7 @@ HSK_MODEL = Model(
"name": "Card 2",
"qfmt": "<div class='simple'>{{Simplified}}</div><br><div class='trad'>"
"{{Traditional}}</div>",
"afmt": '{{FrontSide}}<hr id="answer"><strong>{{Pinyin}}</strong><br>{{English}}<br>{{MyMedia}}',
"afmt": '{{FrontSide}}<hr id="answer"><strong>{{Pinyin}}</strong><br>{{English}}<br>{{Audio}}',
},
],
css=CSS,
@@ -128,7 +130,14 @@ def create_translator():
argostranslate.package.install_from_path(package_to_install.download())
def create_tts():
tts = ChatterboxMultilingualTTS.from_pretrained(device="cuda")
# Automatically detect the best available device
if torch.cuda.is_available():
device = "cuda"
elif torch.backends.mps.is_available():
device = "mps"
else:
device = "cpu"
tts = ChatterboxMultilingualTTS.from_pretrained(device=device)
return tts
## Main
@@ -166,40 +175,45 @@ def dictionary_process(dictionary, tts, in_file, resources):
"""Process dictionary files"""
words_list = in_file.open(encoding="utf8").read().strip().split("\n")
results = []
with in_file.open("w", encoding="utf8") as input_file:
for words in words_list:
word = words.split()[0]
pinyin = " ".join(words.split()[1:]) if len(words.split()) > 1 else None
if v := dictionary.get(word):
if len(v) > 1:
print(f"\nWARNING: {word} has multiple meanings:")
if pinyin and pinyin != "ERROR":
ml = list(filter(lambda x: x.pinyin == pinyin, v))
try:
with in_file.open("w", encoding="utf8") as input_file:
for words in words_list:
word = words.split()[0]
pinyin = " ".join(words.split()[1:]) if len(words.split()) > 1 else None
if v := dictionary.get(word):
if len(v) > 1:
print(f"\nWARNING: {word} has multiple meanings:")
if pinyin and pinyin != "ERROR":
ml = list(filter(lambda x: x.pinyin == pinyin, v))
else:
ml = v
if len(ml) > 1:
for n, w in enumerate(ml):
print(f"{n+1} - {w}")
for m in w.meanings:
print(f"\t{m}")
s = None
while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)):
s = input(f"Please select the correct word [1-{len(v)}]: ")
v = v[int(s) - 1]
else:
v = ml[0]
else:
ml = v
if len(ml) > 1:
for n, w in enumerate(ml):
print(f"{n+1} - {w}")
for m in w.meanings:
print(f"\t{m}")
s = None
while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)):
s = input(f"Please select the correct word [1-{len(v)}]: ")
v = v[int(s) - 1]
else:
v = ml[0]
v = v[0]
audio_path = resources / f"{word}.wav"
if not audio_path.exists():
audio = tts.generate(word, language_id="zh")
torchaudio.save(audio_path, audio, tts.sr)
input_file.write(f"{word}\t{v.pinyin}\n")
results.append((v, audio_path))
else:
v = v[0]
audio = tts.generate(word, language_id="zh")
audio_path = resources / f"{word}.wav"
ta.save(audio_path, audio, tts.sr)
input_file.write(f"{word}\t{v.pinyin}\n")
results.append((v, audio_path))
else:
print("============================================")
print(f"===================>ERROR: {word} not found")
print("============================================")
input_file.write(f"{word}\tERROR\n")
print("============================================")
print(f"===================>ERROR: {word} not found")
print("============================================")
input_file.write(f"{word}\tERROR\n")
except Exception:
with in_file.open("w", encoding="utf8") as input_file:
input_file.write("\n".join(words_list))
return results

View File

@@ -7,5 +7,8 @@ Requires-Dist: pinyin-tone-converter
Requires-Dist: genanki
Requires-Dist: argostranslate
Requires-Dist: chatterbox-tts
Requires-Dist: torch
Requires-Dist: torchaudio
Requires-Dist: torchcodec
Dynamic: license-file
Dynamic: requires-dist

View File

@@ -3,3 +3,6 @@ pinyin-tone-converter
genanki
argostranslate
chatterbox-tts
torch
torchaudio
torchcodec

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -10,6 +10,8 @@ setup(
"genanki",
"argostranslate",
"chatterbox-tts",
"torch",
"torchaudio",
"torchcodec"
],
)