update voice generation

This commit is contained in:
Wolfang Torres
2026-06-22 21:37:45 +08:00
parent d0c7da966d
commit 266bbbb370
3 changed files with 18 additions and 7 deletions

View File

@@ -26,9 +26,13 @@ def dictation_process(
results = [] results = []
for n, line in enumerate(text_lines): for n, line in enumerate(text_lines):
line = line.strip() line = line.strip()
line = " ".join(line.split())
line = line.replace("", " 。。。 ")
audio_path = process_file.resources / f"N{n:03n}.wav" audio_path = process_file.resources / f"N{n:03n}.wav"
if not audio_path.exists(): if not audio_path.exists():
audio = TTS.MODEL.generate(f"{line}", language_id=LANGUAGES.CN) audio = TTS.MODEL.generate(
f"{line}", language_id=LANGUAGES.CN, **TTS.DEFAULTS
)
torchaudio.save(audio_path, audio, TTS.MODEL.sr) torchaudio.save(audio_path, audio, TTS.MODEL.sr)
translated = argostranslate.translate.translate( translated = argostranslate.translate.translate(
line, LANGUAGES.CN, process_file.language_id line, LANGUAGES.CN, process_file.language_id
@@ -48,7 +52,9 @@ def translator_process(
line = line.strip() line = line.strip()
audio_path = process_file.resources / f"N{n:03n}.wav" audio_path = process_file.resources / f"N{n:03n}.wav"
if not audio_path.exists(): if not audio_path.exists():
audio = TTS.MODEL.generate(f"{line}", language_id=LANGUAGES.CN) audio = TTS.MODEL.generate(
f"{line}", language_id=LANGUAGES.CN, **TTS.DEFAULTS
)
torchaudio.save(audio_path, audio, TTS.MODEL.sr) torchaudio.save(audio_path, audio, TTS.MODEL.sr)
translated = argostranslate.translate.translate( translated = argostranslate.translate.translate(
line, LANGUAGES.CN, process_file.language_id line, LANGUAGES.CN, process_file.language_id
@@ -112,7 +118,7 @@ def dictionary_process(process_file: ProcessFile) -> list[DictionaryResult]:
audio_path = process_file.resources / f"{line['pinyin']}.wav" audio_path = process_file.resources / f"{line['pinyin']}.wav"
if not audio_path.exists(): if not audio_path.exists():
audio = TTS.MODEL.generate( audio = TTS.MODEL.generate(
f"{line['simplified']}", language_id=LANGUAGES.CN f"{line['simplified']}", language_id=LANGUAGES.CN, **TTS.DEFAULTS
) )
torchaudio.save(audio_path, audio, TTS.MODEL.sr) torchaudio.save(audio_path, audio, TTS.MODEL.sr)
print(line) print(line)

View File

@@ -49,8 +49,12 @@ class TRANS:
for out_package in packages: for out_package in packages:
if out_package.to_code == to_code: if out_package.to_code == to_code:
if in_package.to_code == out_package.from_code: if in_package.to_code == out_package.from_code:
print(f"Check in_package {in_package.from_code} {in_package.to_code}") print(
print(f"Check out_package {out_package.from_code} {out_package.to_code}") f"Check in_package {in_package.from_code} {in_package.to_code}"
)
print(
f"Check out_package {out_package.from_code} {out_package.to_code}"
)
packages_to_install.append(in_package) packages_to_install.append(in_package)
packages_to_install.append(out_package) packages_to_install.append(out_package)
for package in packages_to_install: for package in packages_to_install:
@@ -133,6 +137,7 @@ class TTS:
MODEL = None MODEL = None
DEVICE = None DEVICE = None
DEFAULTS = {"cfg_weight": 0.2, "exaggeration": 0.8}
@staticmethod @staticmethod
def create_tts(): def create_tts():