From b1e0ed45b776b87147e3d9646abbfcf9cdf4ce1b Mon Sep 17 00:00:00 2001 From: Wolfang Torres Date: Wed, 27 May 2026 12:49:31 +0800 Subject: [PATCH] addd chaterboox audio generation --- anki-hsk-creator/__init__.py | 1 + anki-hsk-creator/__main__.py | 89 +++++++++++++++--------- anki_hsk_creator.egg-info/PKG-INFO | 1 + anki_hsk_creator.egg-info/requires.txt | 1 + data/input/HSK1/HSK1-1.dictionary.txt | 16 ++--- data/output/HSK1/HSK1-1.dictionary.apkg | Bin 53466 -> 53466 bytes setup.py | 3 +- 7 files changed, 68 insertions(+), 43 deletions(-) diff --git a/anki-hsk-creator/__init__.py b/anki-hsk-creator/__init__.py index e0f45dd..5ced5d1 100644 --- a/anki-hsk-creator/__init__.py +++ b/anki-hsk-creator/__init__.py @@ -1 +1,2 @@ """anki-hsk-creator""" +HF_TOKEN = "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" \ No newline at end of file diff --git a/anki-hsk-creator/__main__.py b/anki-hsk-creator/__main__.py index b5da595..a352c35 100644 --- a/anki-hsk-creator/__main__.py +++ b/anki-hsk-creator/__main__.py @@ -9,6 +9,7 @@ from pinyin_tone_converter.pinyin_tone_converter import PinyinToneConverter from genanki import Deck, Note, Model, Package import argostranslate.package import argostranslate.translate +from chatterbox.mtl_tts import ChatterboxMultilingualTTS ## Constants @@ -16,6 +17,7 @@ CCCEDICT = Path(__file__).parent / "cedict_ts.u8" DATA = Path(__file__).parent.parent / "data" INPUT = DATA / "input" OUTPUT = DATA / "output" +RESOURCES = DATA / "resources" CN = "zh" EN = "en" PHRASES_TYPE = ".phrases" @@ -65,6 +67,8 @@ HSK_FRONT_TEMPLATE = """ {{English}} +
+{{MyMedia}} """ HSK_MODEL = Model( @@ -75,6 +79,7 @@ HSK_MODEL = Model( {"name": "Pinyin"}, {"name": "Simplified"}, {"name": "Traditional"}, + {'name': 'Audio'}, ], templates=[ { @@ -87,7 +92,7 @@ HSK_MODEL = Model( "name": "Card 2", "qfmt": "
{{Simplified}}

" "{{Traditional}}
", - "afmt": '{{FrontSide}}
{{Pinyin}}
{{English}}', + "afmt": '{{FrontSide}}
{{Pinyin}}
{{English}}
{{MyMedia}}', }, ], css=CSS, @@ -122,6 +127,9 @@ def create_translator(): ) argostranslate.package.install_from_path(package_to_install.download()) +def create_tts(): + tts = ChatterboxMultilingualTTS.from_pretrained(device="cuda") + return tts ## Main @@ -145,15 +153,18 @@ def process_files(): level = selected relative = in_file.relative_to(INPUT) out_file = OUTPUT / relative + resources = RESOURCES / relative + resources = resources.parent / resources.stem + resources.mkdir(parents=True, exist_ok=True) out_file.parent.mkdir(parents=True, exist_ok=True) with in_file.open(encoding="utf8") as input_file: file_type = input_file.read().split()[0] - return in_file, out_file, file_type + return in_file, out_file, resources, file_type -def dictionary_process(dictionary, in_file): +def dictionary_process(dictionary, tts, in_file, resources): """Process dictionary files""" - words_list = in_file.open(encoding="utf8").read().split("\n") + words_list = in_file.open(encoding="utf8").read().strip().split("\n") results = [] with in_file.open("w", encoding="utf8") as input_file: for words in words_list: @@ -163,21 +174,27 @@ def dictionary_process(dictionary, in_file): if len(v) > 1: print(f"\nWARNING: {word} has multiple meanings:") if pinyin and pinyin != "ERROR": - ml = filter(lambda x: v.pinyin == pinyin, v) + ml = list(filter(lambda x: x.pinyin == pinyin, v)) else: ml = v - for n, w in enumerate(ml): - print(f"{n+1} - {w}") - for m in w.meanings: - print(f"\t{m}") - s = None - while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)): - s = input(f"Please select the correct word [1-{len(v)}]: ") - v = v[int(s) - 1] + if len(ml) > 1: + for n, w in enumerate(ml): + print(f"{n+1} - {w}") + for m in w.meanings: + print(f"\t{m}") + s = None + while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)): + s = input(f"Please select the correct word [1-{len(v)}]: ") + v = v[int(s) - 1] + else: + v = ml[0] else: v = v[0] + audio = tts.generate(word, language_id="zh") + audio_path = resources / f"{word}.wav" + ta.save(audio_path, audio, tts.sr) input_file.write(f"{word}\t{v.pinyin}\n") - results.append(v) + results.append((v, audio_path)) else: print("============================================") print(f"===================>ERROR: {word} not found") @@ -199,27 +216,29 @@ def translator_process(in_file): return results -def output_tsv(out_file, results): - """writes the output as a tsv file""" - final_file = out_file.parent / f"{out_file.stem}.tsv" - with final_file.open("w", encoding="utf8", newline="") as csvfile: - writer = csv.writer(csvfile, delimiter="\t", quotechar='"') - for entry in results: - writer.writerow( - [ - "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)), - PinyinToneConverter().convert_text(entry.pinyin), - entry.simplified, - entry.traditional, - ] - ) +# def output_tsv(out_file, results): +# """writes the output as a tsv file""" +# final_file = out_file.parent / f"{out_file.stem}.tsv" +# with final_file.open("w", encoding="utf8", newline="") as csvfile: +# writer = csv.writer(csvfile, delimiter="\t", quotechar='"') +# for entry in results: +# writer.writerow( +# [ +# "\n ".join(f"{n+1}. {m}" for n, m in enumerate(entry.meanings)), +# PinyinToneConverter().convert_text(entry.pinyin), +# entry.simplified, +# entry.traditional, +# ] +# ) def output_anki_dictionary(out_file, results): final_file = out_file.parent / f"{out_file.stem}.apkg" deck_name = "::".join(out_file.relative_to(OUTPUT).parts[:-1] + (out_file.stem,)) deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name) - for entry in results: + package = Package(deck) + audios = [] + for entry, audio in results: note = Note( model=HSK_MODEL, fields=[ @@ -227,11 +246,14 @@ def output_anki_dictionary(out_file, results): PinyinToneConverter().convert_text(entry.pinyin), entry.simplified, entry.traditional, + f"[sound:{audio.name}]" ], ) + audios.append(audio) deck.add_note(note) - Package(deck).write_to_file(final_file) - + package.media_files = audios + package.write_to_file(final_file) + def output_anki_text(out_file, results): final_file = out_file.parent / f"{out_file.stem}.apkg" @@ -247,14 +269,15 @@ def output_anki_text(out_file, results): def main(): - in_file, out_file, file_type = process_files() + in_file, out_file, resources, file_type = process_files() if PHRASES_TYPE in in_file.suffixes: create_translator() results = translator_process(in_file) output_anki_text(out_file, results) elif DICT_TYPE in in_file.suffixes: + tts = create_tts() dictionary = create_cedict() - results = dictionary_process(dictionary, in_file) + results = dictionary_process(dictionary, tts, in_file, resources) output_anki_dictionary(out_file, results) else: raise TypeError("Error, filetype not especified!") diff --git a/anki_hsk_creator.egg-info/PKG-INFO b/anki_hsk_creator.egg-info/PKG-INFO index bef5ae1..faee22e 100644 --- a/anki_hsk_creator.egg-info/PKG-INFO +++ b/anki_hsk_creator.egg-info/PKG-INFO @@ -6,5 +6,6 @@ Requires-Dist: cedict-utils Requires-Dist: pinyin-tone-converter Requires-Dist: genanki Requires-Dist: argostranslate +Requires-Dist: chatterbox-tts Dynamic: license-file Dynamic: requires-dist diff --git a/anki_hsk_creator.egg-info/requires.txt b/anki_hsk_creator.egg-info/requires.txt index de742a5..4406c32 100644 --- a/anki_hsk_creator.egg-info/requires.txt +++ b/anki_hsk_creator.egg-info/requires.txt @@ -2,3 +2,4 @@ cedict-utils pinyin-tone-converter genanki argostranslate +chatterbox-tts diff --git a/data/input/HSK1/HSK1-1.dictionary.txt b/data/input/HSK1/HSK1-1.dictionary.txt index 3ec388f..828c31f 100644 --- a/data/input/HSK1/HSK1-1.dictionary.txt +++ b/data/input/HSK1/HSK1-1.dictionary.txt @@ -1,9 +1,7 @@ -你 -好 -您 -你好 -您好 -你们好 -您们好 -对不起 -没关系 \ No newline at end of file +你 ni3 +好 hao3 +您 nin2 +你好 ni3 hao3 +您好 nin2 hao3 +对不起 dui4 bu5 qi3 +没关系 mei2 guan1 xi5 diff --git a/data/output/HSK1/HSK1-1.dictionary.apkg b/data/output/HSK1/HSK1-1.dictionary.apkg index d5ae27839e387d0abdb872f189a92bbe0586d00d..e13640e7ae956b786f26daaca3e41be91ec27f50 100644 GIT binary patch delta 1359 zcmZvcO-~a+9EWLlH`!`RBdZ*I!xx0ocH2^Hq10#(dN3G8!iJz;44Os`MB+^kz@Z<& zOiVnX1e4mAZM&Vm(RvXN#>7KUCh`G1d-Di0({aZ9*J0hUVuK z_2dA3(st+3^5XuYGlUG!)J$?R9-EHGcFTsMBx6zR40}`=qc5i)OT*@wdp`BUn_?1e z2F|*7{b)OIf0TG)aNweZDg&k%sLX=5?c}*66+Fc5*>*ZiDii;iA912KSW-}A8~kd ze1vRlj~%b;Q#6fH-#fyaENugC3CC=_0lY4}%EoKJoxVRHU)E-YiYWBGEWE(ROTdf5 zb8I{hoSg6dm~VsLG#l^wG*#C%;T_;@;4R<{;C0|N;12VL@`V*pC<8A6F9Od4CkL#4 s9+@K-=u!0k)iniSK~+Jvpn{;>zyC-y+qsIkP%Pdw(Dss( z7nzHv+R|$k>mCI0q8_}a7r}mjegH2X>+EElkw5O~d4BVo*_j>Ky-i_nQ!wY}1koPL zp5peWdzW_azuhsBRp?C5%$#nw+U>1(mLRmJTj`El5P6Kg`470Ijo_LtZvA>$MXQrx zeq$f4PhKC#9%y5SMKNcrJy|;`%WCh%)ho-rZgcU(;~%Z>iw-VYwXfMk5B;o>iN9;z z%pEjaCw{x$If~s>=|bu=yw2r?iJP|;mdBTFuysDvpJ(%QbgEIs8%XCH7xG!4px4bP z3qCf_YFUt=SLa=;Kd3(J1iRT54vwVzIFC_s?tghi`vK^Ov=4Ki_C3(A&_2w|wC{kv zOZzalY2O5Wi}qn|(7q1(CEAC%M*9lrt69I;AFw=abU!yQ;tzR9$Vmse^+)|dz|rIA za5OpU9F=tH>5{(EA0#}BIYt~qjy}f<$1+Ekqs`IcXmBiHwCEu;9;@lp>hTzFtV)iT z=z=5>8hOk-AZY|VWbTo1A9#hiL&nR%-I3qHU)CfAn-xZGG1tks0ldUqA>$fwe7=nd zeI>hTGM?y?D9aM_7#zW)->;R`GX^62UU iMjj)Fk;zDBr2PAjL}!M_