diff --git a/anki-hsk-creator/__init__.py b/anki-hsk-creator/__init__.py index 5ced5d1..933fc6a 100644 --- a/anki-hsk-creator/__init__.py +++ b/anki-hsk-creator/__init__.py @@ -1,2 +1,5 @@ """anki-hsk-creator""" -HF_TOKEN = "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" \ No newline at end of file + +import os + +os.environ["HF_TOKEN"] = "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" diff --git a/anki-hsk-creator/__main__.py b/anki-hsk-creator/__main__.py index a352c35..8df4382 100644 --- a/anki-hsk-creator/__main__.py +++ b/anki-hsk-creator/__main__.py @@ -10,6 +10,8 @@ from genanki import Deck, Note, Model, Package import argostranslate.package import argostranslate.translate from chatterbox.mtl_tts import ChatterboxMultilingualTTS +import torch +import torchaudio ## Constants @@ -68,7 +70,7 @@ HSK_FRONT_TEMPLATE = """ {{English}}
-{{MyMedia}} +{{Audio}} """ HSK_MODEL = Model( @@ -92,7 +94,7 @@ HSK_MODEL = Model( "name": "Card 2", "qfmt": "
{{Simplified}}

" "{{Traditional}}
", - "afmt": '{{FrontSide}}
{{Pinyin}}
{{English}}
{{MyMedia}}', + "afmt": '{{FrontSide}}
{{Pinyin}}
{{English}}
{{Audio}}', }, ], css=CSS, @@ -128,7 +130,14 @@ def create_translator(): argostranslate.package.install_from_path(package_to_install.download()) def create_tts(): - tts = ChatterboxMultilingualTTS.from_pretrained(device="cuda") + # Automatically detect the best available device + if torch.cuda.is_available(): + device = "cuda" + elif torch.backends.mps.is_available(): + device = "mps" + else: + device = "cpu" + tts = ChatterboxMultilingualTTS.from_pretrained(device=device) return tts ## Main @@ -166,40 +175,45 @@ def dictionary_process(dictionary, tts, in_file, resources): """Process dictionary files""" words_list = in_file.open(encoding="utf8").read().strip().split("\n") results = [] - with in_file.open("w", encoding="utf8") as input_file: - for words in words_list: - word = words.split()[0] - pinyin = " ".join(words.split()[1:]) if len(words.split()) > 1 else None - if v := dictionary.get(word): - if len(v) > 1: - print(f"\nWARNING: {word} has multiple meanings:") - if pinyin and pinyin != "ERROR": - ml = list(filter(lambda x: x.pinyin == pinyin, v)) + try: + with in_file.open("w", encoding="utf8") as input_file: + for words in words_list: + word = words.split()[0] + pinyin = " ".join(words.split()[1:]) if len(words.split()) > 1 else None + if v := dictionary.get(word): + if len(v) > 1: + print(f"\nWARNING: {word} has multiple meanings:") + if pinyin and pinyin != "ERROR": + ml = list(filter(lambda x: x.pinyin == pinyin, v)) + else: + ml = v + if len(ml) > 1: + for n, w in enumerate(ml): + print(f"{n+1} - {w}") + for m in w.meanings: + print(f"\t{m}") + s = None + while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)): + s = input(f"Please select the correct word [1-{len(v)}]: ") + v = v[int(s) - 1] + else: + v = ml[0] else: - ml = v - if len(ml) > 1: - for n, w in enumerate(ml): - print(f"{n+1} - {w}") - for m in w.meanings: - print(f"\t{m}") - s = None - while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)): - s = input(f"Please select the correct word [1-{len(v)}]: ") - v = v[int(s) - 1] - else: - v = ml[0] + v = v[0] + audio_path = resources / f"{word}.wav" + if not audio_path.exists(): + audio = tts.generate(word, language_id="zh") + torchaudio.save(audio_path, audio, tts.sr) + input_file.write(f"{word}\t{v.pinyin}\n") + results.append((v, audio_path)) else: - v = v[0] - audio = tts.generate(word, language_id="zh") - audio_path = resources / f"{word}.wav" - ta.save(audio_path, audio, tts.sr) - input_file.write(f"{word}\t{v.pinyin}\n") - results.append((v, audio_path)) - else: - print("============================================") - print(f"===================>ERROR: {word} not found") - print("============================================") - input_file.write(f"{word}\tERROR\n") + print("============================================") + print(f"===================>ERROR: {word} not found") + print("============================================") + input_file.write(f"{word}\tERROR\n") + except Exception: + with in_file.open("w", encoding="utf8") as input_file: + input_file.write("\n".join(words_list)) return results diff --git a/anki_hsk_creator.egg-info/PKG-INFO b/anki_hsk_creator.egg-info/PKG-INFO index faee22e..4ecf825 100644 --- a/anki_hsk_creator.egg-info/PKG-INFO +++ b/anki_hsk_creator.egg-info/PKG-INFO @@ -7,5 +7,8 @@ Requires-Dist: pinyin-tone-converter Requires-Dist: genanki Requires-Dist: argostranslate Requires-Dist: chatterbox-tts +Requires-Dist: torch +Requires-Dist: torchaudio +Requires-Dist: torchcodec Dynamic: license-file Dynamic: requires-dist diff --git a/anki_hsk_creator.egg-info/requires.txt b/anki_hsk_creator.egg-info/requires.txt index 4406c32..4254d2f 100644 --- a/anki_hsk_creator.egg-info/requires.txt +++ b/anki_hsk_creator.egg-info/requires.txt @@ -3,3 +3,6 @@ pinyin-tone-converter genanki argostranslate chatterbox-tts +torch +torchaudio +torchcodec diff --git a/data/output/HSK1/HSK1-1.dictionary.apkg b/data/output/HSK1/HSK1-1.dictionary.apkg index e13640e..0b4b811 100644 Binary files a/data/output/HSK1/HSK1-1.dictionary.apkg and b/data/output/HSK1/HSK1-1.dictionary.apkg differ diff --git a/data/resources/HSK1/HSK1-1.dictionary/你.wav b/data/resources/HSK1/HSK1-1.dictionary/你.wav new file mode 100644 index 0000000..cbdb916 Binary files /dev/null and b/data/resources/HSK1/HSK1-1.dictionary/你.wav differ diff --git a/data/resources/HSK1/HSK1-1.dictionary/你好.wav b/data/resources/HSK1/HSK1-1.dictionary/你好.wav new file mode 100644 index 0000000..0d545ac Binary files /dev/null and b/data/resources/HSK1/HSK1-1.dictionary/你好.wav differ diff --git a/data/resources/HSK1/HSK1-1.dictionary/好.wav b/data/resources/HSK1/HSK1-1.dictionary/好.wav new file mode 100644 index 0000000..51c43a4 Binary files /dev/null and b/data/resources/HSK1/HSK1-1.dictionary/好.wav differ diff --git a/data/resources/HSK1/HSK1-1.dictionary/对不起.wav b/data/resources/HSK1/HSK1-1.dictionary/对不起.wav new file mode 100644 index 0000000..b6a56f2 Binary files /dev/null and b/data/resources/HSK1/HSK1-1.dictionary/对不起.wav differ diff --git a/data/resources/HSK1/HSK1-1.dictionary/您.wav b/data/resources/HSK1/HSK1-1.dictionary/您.wav new file mode 100644 index 0000000..27b100e Binary files /dev/null and b/data/resources/HSK1/HSK1-1.dictionary/您.wav differ diff --git a/data/resources/HSK1/HSK1-1.dictionary/您好.wav b/data/resources/HSK1/HSK1-1.dictionary/您好.wav new file mode 100644 index 0000000..69873d1 Binary files /dev/null and b/data/resources/HSK1/HSK1-1.dictionary/您好.wav differ diff --git a/data/resources/HSK1/HSK1-1.dictionary/没关系.wav b/data/resources/HSK1/HSK1-1.dictionary/没关系.wav new file mode 100644 index 0000000..fbfaef7 Binary files /dev/null and b/data/resources/HSK1/HSK1-1.dictionary/没关系.wav differ diff --git a/setup.py b/setup.py index 7776c9d..7b8f39d 100644 --- a/setup.py +++ b/setup.py @@ -10,6 +10,8 @@ setup( "genanki", "argostranslate", "chatterbox-tts", + "torch", "torchaudio", + "torchcodec" ], )