diff --git a/anki-hsk-creator/__init__.py b/anki-hsk-creator/__init__.py
index 5ced5d1..933fc6a 100644
--- a/anki-hsk-creator/__init__.py
+++ b/anki-hsk-creator/__init__.py
@@ -1,2 +1,5 @@
"""anki-hsk-creator"""
-HF_TOKEN = "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"
\ No newline at end of file
+
+import os
+
+os.environ["HF_TOKEN"] = "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"
diff --git a/anki-hsk-creator/__main__.py b/anki-hsk-creator/__main__.py
index a352c35..8df4382 100644
--- a/anki-hsk-creator/__main__.py
+++ b/anki-hsk-creator/__main__.py
@@ -10,6 +10,8 @@ from genanki import Deck, Note, Model, Package
import argostranslate.package
import argostranslate.translate
from chatterbox.mtl_tts import ChatterboxMultilingualTTS
+import torch
+import torchaudio
## Constants
@@ -68,7 +70,7 @@ HSK_FRONT_TEMPLATE = """
{{English}}
-{{MyMedia}}
+{{Audio}}
"""
HSK_MODEL = Model(
@@ -92,7 +94,7 @@ HSK_MODEL = Model(
"name": "Card 2",
"qfmt": "
{{Simplified}}
"
"{{Traditional}}
",
- "afmt": '{{FrontSide}}
{{Pinyin}}
{{English}}
{{MyMedia}}',
+ "afmt": '{{FrontSide}}
{{Pinyin}}
{{English}}
{{Audio}}',
},
],
css=CSS,
@@ -128,7 +130,14 @@ def create_translator():
argostranslate.package.install_from_path(package_to_install.download())
def create_tts():
- tts = ChatterboxMultilingualTTS.from_pretrained(device="cuda")
+ # Automatically detect the best available device
+ if torch.cuda.is_available():
+ device = "cuda"
+ elif torch.backends.mps.is_available():
+ device = "mps"
+ else:
+ device = "cpu"
+ tts = ChatterboxMultilingualTTS.from_pretrained(device=device)
return tts
## Main
@@ -166,40 +175,45 @@ def dictionary_process(dictionary, tts, in_file, resources):
"""Process dictionary files"""
words_list = in_file.open(encoding="utf8").read().strip().split("\n")
results = []
- with in_file.open("w", encoding="utf8") as input_file:
- for words in words_list:
- word = words.split()[0]
- pinyin = " ".join(words.split()[1:]) if len(words.split()) > 1 else None
- if v := dictionary.get(word):
- if len(v) > 1:
- print(f"\nWARNING: {word} has multiple meanings:")
- if pinyin and pinyin != "ERROR":
- ml = list(filter(lambda x: x.pinyin == pinyin, v))
+ try:
+ with in_file.open("w", encoding="utf8") as input_file:
+ for words in words_list:
+ word = words.split()[0]
+ pinyin = " ".join(words.split()[1:]) if len(words.split()) > 1 else None
+ if v := dictionary.get(word):
+ if len(v) > 1:
+ print(f"\nWARNING: {word} has multiple meanings:")
+ if pinyin and pinyin != "ERROR":
+ ml = list(filter(lambda x: x.pinyin == pinyin, v))
+ else:
+ ml = v
+ if len(ml) > 1:
+ for n, w in enumerate(ml):
+ print(f"{n+1} - {w}")
+ for m in w.meanings:
+ print(f"\t{m}")
+ s = None
+ while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)):
+ s = input(f"Please select the correct word [1-{len(v)}]: ")
+ v = v[int(s) - 1]
+ else:
+ v = ml[0]
else:
- ml = v
- if len(ml) > 1:
- for n, w in enumerate(ml):
- print(f"{n+1} - {w}")
- for m in w.meanings:
- print(f"\t{m}")
- s = None
- while not s or not s.isnumeric() or not (1 <= int(s) <= len(v)):
- s = input(f"Please select the correct word [1-{len(v)}]: ")
- v = v[int(s) - 1]
- else:
- v = ml[0]
+ v = v[0]
+ audio_path = resources / f"{word}.wav"
+ if not audio_path.exists():
+ audio = tts.generate(word, language_id="zh")
+ torchaudio.save(audio_path, audio, tts.sr)
+ input_file.write(f"{word}\t{v.pinyin}\n")
+ results.append((v, audio_path))
else:
- v = v[0]
- audio = tts.generate(word, language_id="zh")
- audio_path = resources / f"{word}.wav"
- ta.save(audio_path, audio, tts.sr)
- input_file.write(f"{word}\t{v.pinyin}\n")
- results.append((v, audio_path))
- else:
- print("============================================")
- print(f"===================>ERROR: {word} not found")
- print("============================================")
- input_file.write(f"{word}\tERROR\n")
+ print("============================================")
+ print(f"===================>ERROR: {word} not found")
+ print("============================================")
+ input_file.write(f"{word}\tERROR\n")
+ except Exception:
+ with in_file.open("w", encoding="utf8") as input_file:
+ input_file.write("\n".join(words_list))
return results
diff --git a/anki_hsk_creator.egg-info/PKG-INFO b/anki_hsk_creator.egg-info/PKG-INFO
index faee22e..4ecf825 100644
--- a/anki_hsk_creator.egg-info/PKG-INFO
+++ b/anki_hsk_creator.egg-info/PKG-INFO
@@ -7,5 +7,8 @@ Requires-Dist: pinyin-tone-converter
Requires-Dist: genanki
Requires-Dist: argostranslate
Requires-Dist: chatterbox-tts
+Requires-Dist: torch
+Requires-Dist: torchaudio
+Requires-Dist: torchcodec
Dynamic: license-file
Dynamic: requires-dist
diff --git a/anki_hsk_creator.egg-info/requires.txt b/anki_hsk_creator.egg-info/requires.txt
index 4406c32..4254d2f 100644
--- a/anki_hsk_creator.egg-info/requires.txt
+++ b/anki_hsk_creator.egg-info/requires.txt
@@ -3,3 +3,6 @@ pinyin-tone-converter
genanki
argostranslate
chatterbox-tts
+torch
+torchaudio
+torchcodec
diff --git a/data/output/HSK1/HSK1-1.dictionary.apkg b/data/output/HSK1/HSK1-1.dictionary.apkg
index e13640e..0b4b811 100644
Binary files a/data/output/HSK1/HSK1-1.dictionary.apkg and b/data/output/HSK1/HSK1-1.dictionary.apkg differ
diff --git a/data/resources/HSK1/HSK1-1.dictionary/你.wav b/data/resources/HSK1/HSK1-1.dictionary/你.wav
new file mode 100644
index 0000000..cbdb916
Binary files /dev/null and b/data/resources/HSK1/HSK1-1.dictionary/你.wav differ
diff --git a/data/resources/HSK1/HSK1-1.dictionary/你好.wav b/data/resources/HSK1/HSK1-1.dictionary/你好.wav
new file mode 100644
index 0000000..0d545ac
Binary files /dev/null and b/data/resources/HSK1/HSK1-1.dictionary/你好.wav differ
diff --git a/data/resources/HSK1/HSK1-1.dictionary/好.wav b/data/resources/HSK1/HSK1-1.dictionary/好.wav
new file mode 100644
index 0000000..51c43a4
Binary files /dev/null and b/data/resources/HSK1/HSK1-1.dictionary/好.wav differ
diff --git a/data/resources/HSK1/HSK1-1.dictionary/对不起.wav b/data/resources/HSK1/HSK1-1.dictionary/对不起.wav
new file mode 100644
index 0000000..b6a56f2
Binary files /dev/null and b/data/resources/HSK1/HSK1-1.dictionary/对不起.wav differ
diff --git a/data/resources/HSK1/HSK1-1.dictionary/您.wav b/data/resources/HSK1/HSK1-1.dictionary/您.wav
new file mode 100644
index 0000000..27b100e
Binary files /dev/null and b/data/resources/HSK1/HSK1-1.dictionary/您.wav differ
diff --git a/data/resources/HSK1/HSK1-1.dictionary/您好.wav b/data/resources/HSK1/HSK1-1.dictionary/您好.wav
new file mode 100644
index 0000000..69873d1
Binary files /dev/null and b/data/resources/HSK1/HSK1-1.dictionary/您好.wav differ
diff --git a/data/resources/HSK1/HSK1-1.dictionary/没关系.wav b/data/resources/HSK1/HSK1-1.dictionary/没关系.wav
new file mode 100644
index 0000000..fbfaef7
Binary files /dev/null and b/data/resources/HSK1/HSK1-1.dictionary/没关系.wav differ
diff --git a/setup.py b/setup.py
index 7776c9d..7b8f39d 100644
--- a/setup.py
+++ b/setup.py
@@ -10,6 +10,8 @@ setup(
"genanki",
"argostranslate",
"chatterbox-tts",
+ "torch",
"torchaudio",
+ "torchcodec"
],
)