diff --git a/src/anki_hsk_creator/anki_generation.py b/src/anki_hsk_creator/anki_generation.py
index 8c3ea85..c005bdf 100644
--- a/src/anki_hsk_creator/anki_generation.py
+++ b/src/anki_hsk_creator/anki_generation.py
@@ -64,6 +64,23 @@ PHRASE_MODEL = Model(
css=CSS,
)
+DICTATION_MODEL = Model(
+ 3187277536,
+ "Phrase Model",
+ fields=[
+ {"name": "Translated"},
+ {"name": "Phrase"},
+ {"name": "Audio"},
+ ],
+ templates=[
+ {
+ "name": "Card 1",
+ "qfmt": "{{Audio}}
{{type:Phrase}}",
+ "afmt": '{{FrontSide}}
{{Phrase}}
{{Translated}}',
+ },
+ ],
+ css=CSS,
+)
HSK_MODEL = Model(
1708536519,
@@ -122,16 +139,52 @@ HSK_MODEL = Model(
# Proccess
+def output_anki_dictation(
+ process_file: ProcessFile, results: list[DictionaryResult]
+) -> Path:
+ """Creates an anki file for dictation result"""
+ final_file = process_file.output_name.with_suffix(".apkg")
+ deck_name = "::".join(
+ process_file.input_file.parts[:-1] + (process_file.output_name.stem,)
+ )
+ deck = Deck(
+ random.randrange(1 << 30, 1 << 31),
+ deck_name,
+ f"Deck for {final_file.name}, "
+ "created in https://www.wolfang.info.ve/hskankicreator/",
+ )
+ package = Package(deck)
+ audios = []
+ for result in results:
+ note = Note(
+ model=DICTATION_MODEL,
+ fields=[
+ result.translated,
+ result.line,
+ f"[sound:{result.audio_path.name}]",
+ ],
+ )
+ deck.add_note(note)
+ audios.append(result.audio_path)
+ package.media_files = audios
+ package.write_to_file(final_file)
+ return final_file
+
+
def output_anki_dictionary(
process_file: ProcessFile, results: list[DictionaryResult]
) -> Path:
"""Creates an anki file from a dictionary results"""
final_file = process_file.output_name.with_suffix(".apkg")
- print()
deck_name = "::".join(
process_file.input_file.parts[:-1] + (process_file.output_name.stem,)
)
- deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
+ deck = Deck(
+ random.randrange(1 << 30, 1 << 31),
+ deck_name,
+ f"Deck for {final_file.name}, "
+ "created in https://www.wolfang.info.ve/hskankicreator/",
+ )
package = Package(deck)
audios = []
for result in results:
@@ -161,7 +214,12 @@ def output_anki_phrase(
deck_name = "::".join(
process_file.input_file.parts[:-1] + (process_file.output_name.stem,)
)
- deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
+ deck = Deck(
+ random.randrange(1 << 30, 1 << 31),
+ deck_name,
+ f"Deck for {final_file.name}, "
+ "created in https://www.wolfang.info.ve/hskankicreator/",
+ )
package = Package(deck)
audios = []
for result in results:
diff --git a/src/anki_hsk_creator/api.py b/src/anki_hsk_creator/api.py
index cd19717..ead6b60 100644
--- a/src/anki_hsk_creator/api.py
+++ b/src/anki_hsk_creator/api.py
@@ -8,9 +8,26 @@ from pathlib import Path
# Local
from . import DATA_FOLDER
-from .anki_generation import output_anki_dictionary, output_anki_phrase
-from .constants import DICT_TYPE, INPUT, LANGUAGES, OUTPUT, PHRASES_TYPE, RESOURCES
-from .proccessor import dictionary_pre_process, dictionary_process, translator_process
+from .anki_generation import (
+ output_anki_dictation,
+ output_anki_dictionary,
+ output_anki_phrase,
+)
+from .constants import (
+ DICTATION_TYPE,
+ DICT_TYPE,
+ INPUT,
+ LANGUAGES,
+ OUTPUT,
+ PHRASES_TYPE,
+ RESOURCES,
+)
+from .proccessor import (
+ dictation_process,
+ dictionary_pre_process,
+ dictionary_process,
+ translator_process,
+)
from .utility import CCCEDICT, TRANS, TTS, ProcessFile
# interface
@@ -74,7 +91,7 @@ def create_folder(file_path: Path) -> ProcessFile:
def delete_folder(file_path: Path):
- """delete an empty folder in file_path"""
+ """Delete an empty folder in file_path"""
input_folder = INPUT / file_path
if input_folder.exists():
if any(Path("some/path/here").iterdir()):
@@ -136,10 +153,12 @@ def create_input_file(
it is created and the file placed inside.
returns the relative path for future processing
- valid file_types: ".phrases", ".dictionary"
+ valid file_types: ".phrases", ".dictionary" ".dictation"
"""
- if file_type not in (PHRASES_TYPE, DICT_TYPE):
- raise ValueError(f"file_type {file_type} not in {(PHRASES_TYPE, DICT_TYPE)}")
+ if file_type not in (PHRASES_TYPE, DICT_TYPE, DICTATION_TYPE):
+ raise ValueError(
+ f"file_type {file_type} not in {(PHRASES_TYPE, DICT_TYPE, DICTATION_TYPE)}"
+ )
filename = f"{name}{file_type}.txt"
relative = sub_folder / filename
# write file
@@ -152,6 +171,7 @@ def create_input_file(
def write_input_file(process_file: ProcessFile, text: str):
+ """Write an input file"""
with process_file.absolute_input_file.open(
"w", encoding="utf8", newline="\n"
) as file:
@@ -162,6 +182,7 @@ def write_input_file(process_file: ProcessFile, text: str):
def write_resource_file(process_file: ProcessFile, language_id: str, text: str):
+ """Write a resource file"""
process_file.language_id = language_id
with process_file.dictionary_resource_file.open(
"w", encoding="utf8", newline="\n"
@@ -184,13 +205,28 @@ def pre_process_a_dictionary_file(process_file: ProcessFile, language_id: str):
def process_a_dictionary_file(process_file: ProcessFile, language_id: str) -> Path:
+ """Process a dictionary file"""
TTS.create_tts()
process_file.language_id = language_id
results = dictionary_process(process_file)
return output_anki_dictionary(process_file, results)
+def process_a_dictation_file(process_file: ProcessFile, language_id: str) -> Path:
+ """Process a dictation file"""
+ TTS.create_tts()
+ TRANS.create_translator(LANGUAGES.CN, language_id)
+ process_file.language_id = language_id
+ with process_file.absolute_input_file.open(
+ "r", encoding="utf8", newline="\n"
+ ) as file:
+ text_lines = [line.strip() for line in file.read().split("。")]
+ results = dictation_process(text_lines, process_file)
+ return output_anki_dictation(process_file, results)
+
+
def process_a_phrases_file(process_file: ProcessFile, language_id: str) -> Path:
+ """Process a phrases file"""
process_file.language_id = language_id
TTS.create_tts()
TRANS.create_translator(LANGUAGES.CN, language_id)
diff --git a/src/anki_hsk_creator/constants.py b/src/anki_hsk_creator/constants.py
index 2d728be..69078b8 100644
--- a/src/anki_hsk_creator/constants.py
+++ b/src/anki_hsk_creator/constants.py
@@ -20,6 +20,7 @@ RESOURCES.mkdir(exist_ok=True, parents=True)
# File Types
PHRASES_TYPE = ".phrases"
DICT_TYPE = ".dictionary"
+DICTATION_TYPE = ".dictation"
class LANGUAGES:
diff --git a/src/anki_hsk_creator/proccessor.py b/src/anki_hsk_creator/proccessor.py
index c6b62e5..822a5a1 100644
--- a/src/anki_hsk_creator/proccessor.py
+++ b/src/anki_hsk_creator/proccessor.py
@@ -19,6 +19,26 @@ DIALECT = "excel-tab"
# Results Classes
+def dictation_process(
+ text_lines: list[str], process_file: ProcessFile
+) -> list[TranslationResult]:
+ """Process for Dictation translation"""
+ results = []
+ for n, line in enumerate(text_lines):
+ line = line.strip()
+ audio_path = process_file.resources / f"N{n:03n}.wav"
+ if not audio_path.exists():
+ audio = TTS.MODEL.generate(f"{line}。", language_id=LANGUAGES.CN)
+ torchaudio.save(audio_path, audio, TTS.MODEL.sr)
+ translated = argostranslate.translate.translate(
+ line, LANGUAGES.CN, process_file.language_id
+ )
+ results.append(
+ TranslationResult(process_file.language_id, translated, line, audio_path)
+ )
+ return results
+
+
def translator_process(
text_lines: list[str], process_file: ProcessFile
) -> list[TranslationResult]: