From 266bbbb370b3d01257ea700d338c660b84bd4262 Mon Sep 17 00:00:00 2001 From: Wolfang Torres Date: Mon, 22 Jun 2026 21:37:45 +0800 Subject: [PATCH] update voice generation --- src/anki_hsk_creator/anki_generation.py | 4 ++-- src/anki_hsk_creator/proccessor.py | 12 +++++++++--- src/anki_hsk_creator/utility.py | 9 +++++++-- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/anki_hsk_creator/anki_generation.py b/src/anki_hsk_creator/anki_generation.py index a922ef8..d735415 100644 --- a/src/anki_hsk_creator/anki_generation.py +++ b/src/anki_hsk_creator/anki_generation.py @@ -112,7 +112,7 @@ HSK_MODEL = Model( "
{{Simplified}}
" "
{{Traditional}}
" "
Pinyin: {{type:Pinyin}}" - #"
Translated: {{type:Translated}}" + # "
Translated: {{type:Translated}}" ), "afmt": ( "{{FrontSide}}
{{Pinyin}}" @@ -123,7 +123,7 @@ HSK_MODEL = Model( "name": "Card 3", "qfmt": ( "{{Audio}}" - #"
Pinyin: {{type:Pinyin}}" + # "
Pinyin: {{type:Pinyin}}" "
Simplified: {{type:Simplified}}" ), "afmt": ( diff --git a/src/anki_hsk_creator/proccessor.py b/src/anki_hsk_creator/proccessor.py index 822a5a1..c1e78ad 100644 --- a/src/anki_hsk_creator/proccessor.py +++ b/src/anki_hsk_creator/proccessor.py @@ -26,9 +26,13 @@ def dictation_process( results = [] for n, line in enumerate(text_lines): line = line.strip() + line = " ".join(line.split()) + line = line.replace(",", ", 。。。 ") audio_path = process_file.resources / f"N{n:03n}.wav" if not audio_path.exists(): - audio = TTS.MODEL.generate(f"{line}。", language_id=LANGUAGES.CN) + audio = TTS.MODEL.generate( + f"{line}。", language_id=LANGUAGES.CN, **TTS.DEFAULTS + ) torchaudio.save(audio_path, audio, TTS.MODEL.sr) translated = argostranslate.translate.translate( line, LANGUAGES.CN, process_file.language_id @@ -48,7 +52,9 @@ def translator_process( line = line.strip() audio_path = process_file.resources / f"N{n:03n}.wav" if not audio_path.exists(): - audio = TTS.MODEL.generate(f"{line}。", language_id=LANGUAGES.CN) + audio = TTS.MODEL.generate( + f"{line}。", language_id=LANGUAGES.CN, **TTS.DEFAULTS + ) torchaudio.save(audio_path, audio, TTS.MODEL.sr) translated = argostranslate.translate.translate( line, LANGUAGES.CN, process_file.language_id @@ -112,7 +118,7 @@ def dictionary_process(process_file: ProcessFile) -> list[DictionaryResult]: audio_path = process_file.resources / f"{line['pinyin']}.wav" if not audio_path.exists(): audio = TTS.MODEL.generate( - f"{line['simplified']}。", language_id=LANGUAGES.CN + f"{line['simplified']}。", language_id=LANGUAGES.CN, **TTS.DEFAULTS ) torchaudio.save(audio_path, audio, TTS.MODEL.sr) print(line) diff --git a/src/anki_hsk_creator/utility.py b/src/anki_hsk_creator/utility.py index 5fb6c33..535dec6 100644 --- a/src/anki_hsk_creator/utility.py +++ b/src/anki_hsk_creator/utility.py @@ -49,8 +49,12 @@ class TRANS: for out_package in packages: if out_package.to_code == to_code: if in_package.to_code == out_package.from_code: - print(f"Check in_package {in_package.from_code} {in_package.to_code}") - print(f"Check out_package {out_package.from_code} {out_package.to_code}") + print( + f"Check in_package {in_package.from_code} {in_package.to_code}" + ) + print( + f"Check out_package {out_package.from_code} {out_package.to_code}" + ) packages_to_install.append(in_package) packages_to_install.append(out_package) for package in packages_to_install: @@ -133,6 +137,7 @@ class TTS: MODEL = None DEVICE = None + DEFAULTS = {"cfg_weight": 0.2, "exaggeration": 0.8} @staticmethod def create_tts():