add audio generation suport

2026-05-27 14:42:39 +08:00
parent b1e0ed45b7
commit e35bcf6d74
13 changed files with 61 additions and 36 deletions
--- a/anki-hsk-creator/init.py
+++ b/anki-hsk-creator/init.py
@@ -1,2 +1,5 @@
 """anki-hsk-creator"""
-HF_TOKEN = "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"
+
 import os
 os.environ["HF_TOKEN"] = "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"
--- a/anki-hsk-creator/main.py
+++ b/anki-hsk-creator/main.py
@@ -10,6 +10,8 @@ from genanki import Deck, Note, Model, Package
 import argostranslate.package
 import argostranslate.translate
 from chatterbox.mtl_tts import ChatterboxMultilingualTTS
 import torch
 import torchaudio
 ## Constants
@@ -68,7 +70,7 @@ HSK_FRONT_TEMPLATE = """
 {{English}}
 </tts>
 <br>
-{{MyMedia}}
+{{Audio}}
 """
 HSK_MODEL = Model(
@@ -92,7 +94,7 @@ HSK_MODEL = Model(
            "name": "Card 2",
            "qfmt": "<div class='simple'>{{Simplified}}</div><br><div class='trad'>"
            "{{Traditional}}</div>",
-            "afmt": '{{FrontSide}}<hr id="answer"><strong>{{Pinyin}}</strong><br>{{English}}<br>{{MyMedia}}',
+            "afmt": '{{FrontSide}}<hr id="answer"><strong>{{Pinyin}}</strong><br>{{English}}<br>{{Audio}}',
        },
    ],
    css=CSS,
@@ -128,7 +130,14 @@ def create_translator():
    argostranslate.package.install_from_path(package_to_install.download())
 def create_tts():
-    tts = ChatterboxMultilingualTTS.from_pretrained(device="cuda")
+    # Automatically detect the best available device
    if torch.cuda.is_available():
        device = "cuda"
    elif torch.backends.mps.is_available():
        device = "mps"
    else:
        device = "cpu"
    tts = ChatterboxMultilingualTTS.from_pretrained(device=device)
    return tts
 ## Main
@@ -166,6 +175,7 @@ def dictionary_process(dictionary, tts, in_file, resources):
    """Process dictionary files"""
    words_list = in_file.open(encoding="utf8").read().strip().split("\n")
    results = [] 
    try:
        with in_file.open("w", encoding="utf8") as input_file:
            for words in words_list:
                word = words.split()[0]
@@ -190,9 +200,10 @@ def dictionary_process(dictionary, tts, in_file, resources):
                            v = ml[0]
                    else:
                        v = v[0]
                audio = tts.generate(word, language_id="zh")
                    audio_path = resources / f"{word}.wav"
-                ta.save(audio_path, audio, tts.sr)
+                    if not audio_path.exists():
                        audio = tts.generate(word, language_id="zh")
                        torchaudio.save(audio_path, audio, tts.sr)
                    input_file.write(f"{word}\t{v.pinyin}\n")
                    results.append((v, audio_path))
                else:
@@ -200,6 +211,9 @@ def dictionary_process(dictionary, tts, in_file, resources):
                    print(f"===================>ERROR: {word} not found")
                    print("============================================")
                    input_file.write(f"{word}\tERROR\n")
    except Exception:
        with in_file.open("w", encoding="utf8") as input_file:
            input_file.write("\n".join(words_list))
    return results
--- a/anki_hsk_creator.egg-info/PKG-INFO
+++ b/anki_hsk_creator.egg-info/PKG-INFO
@@ -7,5 +7,8 @@ Requires-Dist: pinyin-tone-converter
 Requires-Dist: genanki
 Requires-Dist: argostranslate
 Requires-Dist: chatterbox-tts
 Requires-Dist: torch
 Requires-Dist: torchaudio
 Requires-Dist: torchcodec
 Dynamic: license-file
 Dynamic: requires-dist
--- a/anki_hsk_creator.egg-info/requires.txt
+++ b/anki_hsk_creator.egg-info/requires.txt
@@ -3,3 +3,6 @@ pinyin-tone-converter
 genanki
 argostranslate
 chatterbox-tts
 torch
 torchaudio
 torchcodec
--- a/data/output/HSK1/HSK1-1.dictionary.apkg
+++ b/data/output/HSK1/HSK1-1.dictionary.apkg
--- a/data/resources/HSK1/HSK1-1.dictionary/你.wav
+++ b/data/resources/HSK1/HSK1-1.dictionary/你.wav
--- a/data/resources/HSK1/HSK1-1.dictionary/你好.wav
+++ b/data/resources/HSK1/HSK1-1.dictionary/你好.wav
--- a/data/resources/HSK1/HSK1-1.dictionary/好.wav
+++ b/data/resources/HSK1/HSK1-1.dictionary/好.wav
--- a/data/resources/HSK1/HSK1-1.dictionary/对不起.wav
+++ b/data/resources/HSK1/HSK1-1.dictionary/对不起.wav
--- a/data/resources/HSK1/HSK1-1.dictionary/您.wav
+++ b/data/resources/HSK1/HSK1-1.dictionary/您.wav
--- a/data/resources/HSK1/HSK1-1.dictionary/您好.wav
+++ b/data/resources/HSK1/HSK1-1.dictionary/您好.wav
--- a/data/resources/HSK1/HSK1-1.dictionary/没关系.wav
+++ b/data/resources/HSK1/HSK1-1.dictionary/没关系.wav
--- a/setup.py
+++ b/setup.py
@@ -10,6 +10,8 @@ setup(
        "genanki",
        "argostranslate",
        "chatterbox-tts",
        "torch",
        "torchaudio",
        "torchcodec"
    ],
 )