add audio generation suport
This commit is contained in:
@@ -1,2 +1,5 @@
|
|||||||
"""anki-hsk-creator"""
|
"""anki-hsk-creator"""
|
||||||
HF_TOKEN = "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"
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["HF_TOKEN"] = "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"
|
||||||
|
|||||||
@@ -10,6 +10,8 @@ from genanki import Deck, Note, Model, Package
|
|||||||
import argostranslate.package
|
import argostranslate.package
|
||||||
import argostranslate.translate
|
import argostranslate.translate
|
||||||
from chatterbox.mtl_tts import ChatterboxMultilingualTTS
|
from chatterbox.mtl_tts import ChatterboxMultilingualTTS
|
||||||
|
import torch
|
||||||
|
import torchaudio
|
||||||
|
|
||||||
## Constants
|
## Constants
|
||||||
|
|
||||||
@@ -68,7 +70,7 @@ HSK_FRONT_TEMPLATE = """
|
|||||||
{{English}}
|
{{English}}
|
||||||
</tts>
|
</tts>
|
||||||
<br>
|
<br>
|
||||||
{{MyMedia}}
|
{{Audio}}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
HSK_MODEL = Model(
|
HSK_MODEL = Model(
|
||||||
@@ -92,7 +94,7 @@ HSK_MODEL = Model(
|
|||||||
"name": "Card 2",
|
"name": "Card 2",
|
||||||
"qfmt": "<div class='simple'>{{Simplified}}</div><br><div class='trad'>"
|
"qfmt": "<div class='simple'>{{Simplified}}</div><br><div class='trad'>"
|
||||||
"{{Traditional}}</div>",
|
"{{Traditional}}</div>",
|
||||||
"afmt": '{{FrontSide}}<hr id="answer"><strong>{{Pinyin}}</strong><br>{{English}}<br>{{MyMedia}}',
|
"afmt": '{{FrontSide}}<hr id="answer"><strong>{{Pinyin}}</strong><br>{{English}}<br>{{Audio}}',
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
css=CSS,
|
css=CSS,
|
||||||
@@ -128,7 +130,14 @@ def create_translator():
|
|||||||
argostranslate.package.install_from_path(package_to_install.download())
|
argostranslate.package.install_from_path(package_to_install.download())
|
||||||
|
|
||||||
def create_tts():
|
def create_tts():
|
||||||
tts = ChatterboxMultilingualTTS.from_pretrained(device="cuda")
|
# Automatically detect the best available device
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
device = "cuda"
|
||||||
|
elif torch.backends.mps.is_available():
|
||||||
|
device = "mps"
|
||||||
|
else:
|
||||||
|
device = "cpu"
|
||||||
|
tts = ChatterboxMultilingualTTS.from_pretrained(device=device)
|
||||||
return tts
|
return tts
|
||||||
|
|
||||||
## Main
|
## Main
|
||||||
@@ -166,6 +175,7 @@ def dictionary_process(dictionary, tts, in_file, resources):
|
|||||||
"""Process dictionary files"""
|
"""Process dictionary files"""
|
||||||
words_list = in_file.open(encoding="utf8").read().strip().split("\n")
|
words_list = in_file.open(encoding="utf8").read().strip().split("\n")
|
||||||
results = []
|
results = []
|
||||||
|
try:
|
||||||
with in_file.open("w", encoding="utf8") as input_file:
|
with in_file.open("w", encoding="utf8") as input_file:
|
||||||
for words in words_list:
|
for words in words_list:
|
||||||
word = words.split()[0]
|
word = words.split()[0]
|
||||||
@@ -190,9 +200,10 @@ def dictionary_process(dictionary, tts, in_file, resources):
|
|||||||
v = ml[0]
|
v = ml[0]
|
||||||
else:
|
else:
|
||||||
v = v[0]
|
v = v[0]
|
||||||
audio = tts.generate(word, language_id="zh")
|
|
||||||
audio_path = resources / f"{word}.wav"
|
audio_path = resources / f"{word}.wav"
|
||||||
ta.save(audio_path, audio, tts.sr)
|
if not audio_path.exists():
|
||||||
|
audio = tts.generate(word, language_id="zh")
|
||||||
|
torchaudio.save(audio_path, audio, tts.sr)
|
||||||
input_file.write(f"{word}\t{v.pinyin}\n")
|
input_file.write(f"{word}\t{v.pinyin}\n")
|
||||||
results.append((v, audio_path))
|
results.append((v, audio_path))
|
||||||
else:
|
else:
|
||||||
@@ -200,6 +211,9 @@ def dictionary_process(dictionary, tts, in_file, resources):
|
|||||||
print(f"===================>ERROR: {word} not found")
|
print(f"===================>ERROR: {word} not found")
|
||||||
print("============================================")
|
print("============================================")
|
||||||
input_file.write(f"{word}\tERROR\n")
|
input_file.write(f"{word}\tERROR\n")
|
||||||
|
except Exception:
|
||||||
|
with in_file.open("w", encoding="utf8") as input_file:
|
||||||
|
input_file.write("\n".join(words_list))
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -7,5 +7,8 @@ Requires-Dist: pinyin-tone-converter
|
|||||||
Requires-Dist: genanki
|
Requires-Dist: genanki
|
||||||
Requires-Dist: argostranslate
|
Requires-Dist: argostranslate
|
||||||
Requires-Dist: chatterbox-tts
|
Requires-Dist: chatterbox-tts
|
||||||
|
Requires-Dist: torch
|
||||||
|
Requires-Dist: torchaudio
|
||||||
|
Requires-Dist: torchcodec
|
||||||
Dynamic: license-file
|
Dynamic: license-file
|
||||||
Dynamic: requires-dist
|
Dynamic: requires-dist
|
||||||
|
|||||||
@@ -3,3 +3,6 @@ pinyin-tone-converter
|
|||||||
genanki
|
genanki
|
||||||
argostranslate
|
argostranslate
|
||||||
chatterbox-tts
|
chatterbox-tts
|
||||||
|
torch
|
||||||
|
torchaudio
|
||||||
|
torchcodec
|
||||||
|
|||||||
Binary file not shown.
BIN
data/resources/HSK1/HSK1-1.dictionary/你.wav
Normal file
BIN
data/resources/HSK1/HSK1-1.dictionary/你.wav
Normal file
Binary file not shown.
BIN
data/resources/HSK1/HSK1-1.dictionary/你好.wav
Normal file
BIN
data/resources/HSK1/HSK1-1.dictionary/你好.wav
Normal file
Binary file not shown.
BIN
data/resources/HSK1/HSK1-1.dictionary/好.wav
Normal file
BIN
data/resources/HSK1/HSK1-1.dictionary/好.wav
Normal file
Binary file not shown.
BIN
data/resources/HSK1/HSK1-1.dictionary/对不起.wav
Normal file
BIN
data/resources/HSK1/HSK1-1.dictionary/对不起.wav
Normal file
Binary file not shown.
BIN
data/resources/HSK1/HSK1-1.dictionary/您.wav
Normal file
BIN
data/resources/HSK1/HSK1-1.dictionary/您.wav
Normal file
Binary file not shown.
BIN
data/resources/HSK1/HSK1-1.dictionary/您好.wav
Normal file
BIN
data/resources/HSK1/HSK1-1.dictionary/您好.wav
Normal file
Binary file not shown.
BIN
data/resources/HSK1/HSK1-1.dictionary/没关系.wav
Normal file
BIN
data/resources/HSK1/HSK1-1.dictionary/没关系.wav
Normal file
Binary file not shown.
Reference in New Issue
Block a user