add audio generation suport

This commit is contained in:
Wolfang Torres
2026-05-27 14:42:39 +08:00
parent b1e0ed45b7
commit e35bcf6d74
13 changed files with 61 additions and 36 deletions

View File

@@ -1,2 +1,5 @@
"""anki-hsk-creator""" """anki-hsk-creator"""
HF_TOKEN = "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"
import os
os.environ["HF_TOKEN"] = "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"

View File

@@ -10,6 +10,8 @@ from genanki import Deck, Note, Model, Package
import argostranslate.package import argostranslate.package
import argostranslate.translate import argostranslate.translate
from chatterbox.mtl_tts import ChatterboxMultilingualTTS from chatterbox.mtl_tts import ChatterboxMultilingualTTS
import torch
import torchaudio
## Constants ## Constants
@@ -68,7 +70,7 @@ HSK_FRONT_TEMPLATE = """
{{English}} {{English}}
</tts> </tts>
<br> <br>
{{MyMedia}} {{Audio}}
""" """
HSK_MODEL = Model( HSK_MODEL = Model(
@@ -92,7 +94,7 @@ HSK_MODEL = Model(
"name": "Card 2", "name": "Card 2",
"qfmt": "<div class='simple'>{{Simplified}}</div><br><div class='trad'>" "qfmt": "<div class='simple'>{{Simplified}}</div><br><div class='trad'>"
"{{Traditional}}</div>", "{{Traditional}}</div>",
"afmt": '{{FrontSide}}<hr id="answer"><strong>{{Pinyin}}</strong><br>{{English}}<br>{{MyMedia}}', "afmt": '{{FrontSide}}<hr id="answer"><strong>{{Pinyin}}</strong><br>{{English}}<br>{{Audio}}',
}, },
], ],
css=CSS, css=CSS,
@@ -128,7 +130,14 @@ def create_translator():
argostranslate.package.install_from_path(package_to_install.download()) argostranslate.package.install_from_path(package_to_install.download())
def create_tts(): def create_tts():
tts = ChatterboxMultilingualTTS.from_pretrained(device="cuda") # Automatically detect the best available device
if torch.cuda.is_available():
device = "cuda"
elif torch.backends.mps.is_available():
device = "mps"
else:
device = "cpu"
tts = ChatterboxMultilingualTTS.from_pretrained(device=device)
return tts return tts
## Main ## Main
@@ -166,6 +175,7 @@ def dictionary_process(dictionary, tts, in_file, resources):
"""Process dictionary files""" """Process dictionary files"""
words_list = in_file.open(encoding="utf8").read().strip().split("\n") words_list = in_file.open(encoding="utf8").read().strip().split("\n")
results = [] results = []
try:
with in_file.open("w", encoding="utf8") as input_file: with in_file.open("w", encoding="utf8") as input_file:
for words in words_list: for words in words_list:
word = words.split()[0] word = words.split()[0]
@@ -190,9 +200,10 @@ def dictionary_process(dictionary, tts, in_file, resources):
v = ml[0] v = ml[0]
else: else:
v = v[0] v = v[0]
audio = tts.generate(word, language_id="zh")
audio_path = resources / f"{word}.wav" audio_path = resources / f"{word}.wav"
ta.save(audio_path, audio, tts.sr) if not audio_path.exists():
audio = tts.generate(word, language_id="zh")
torchaudio.save(audio_path, audio, tts.sr)
input_file.write(f"{word}\t{v.pinyin}\n") input_file.write(f"{word}\t{v.pinyin}\n")
results.append((v, audio_path)) results.append((v, audio_path))
else: else:
@@ -200,6 +211,9 @@ def dictionary_process(dictionary, tts, in_file, resources):
print(f"===================>ERROR: {word} not found") print(f"===================>ERROR: {word} not found")
print("============================================") print("============================================")
input_file.write(f"{word}\tERROR\n") input_file.write(f"{word}\tERROR\n")
except Exception:
with in_file.open("w", encoding="utf8") as input_file:
input_file.write("\n".join(words_list))
return results return results

View File

@@ -7,5 +7,8 @@ Requires-Dist: pinyin-tone-converter
Requires-Dist: genanki Requires-Dist: genanki
Requires-Dist: argostranslate Requires-Dist: argostranslate
Requires-Dist: chatterbox-tts Requires-Dist: chatterbox-tts
Requires-Dist: torch
Requires-Dist: torchaudio
Requires-Dist: torchcodec
Dynamic: license-file Dynamic: license-file
Dynamic: requires-dist Dynamic: requires-dist

View File

@@ -3,3 +3,6 @@ pinyin-tone-converter
genanki genanki
argostranslate argostranslate
chatterbox-tts chatterbox-tts
torch
torchaudio
torchcodec

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -10,6 +10,8 @@ setup(
"genanki", "genanki",
"argostranslate", "argostranslate",
"chatterbox-tts", "chatterbox-tts",
"torch",
"torchaudio", "torchaudio",
"torchcodec"
], ],
) )