add dictation model
This commit is contained in:
@@ -64,6 +64,23 @@ PHRASE_MODEL = Model(
|
|||||||
css=CSS,
|
css=CSS,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
DICTATION_MODEL = Model(
|
||||||
|
3187277536,
|
||||||
|
"Phrase Model",
|
||||||
|
fields=[
|
||||||
|
{"name": "Translated"},
|
||||||
|
{"name": "Phrase"},
|
||||||
|
{"name": "Audio"},
|
||||||
|
],
|
||||||
|
templates=[
|
||||||
|
{
|
||||||
|
"name": "Card 1",
|
||||||
|
"qfmt": "{{Audio}}<br>{{type:Phrase}}",
|
||||||
|
"afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}<br>{{Translated}}',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
css=CSS,
|
||||||
|
)
|
||||||
|
|
||||||
HSK_MODEL = Model(
|
HSK_MODEL = Model(
|
||||||
1708536519,
|
1708536519,
|
||||||
@@ -122,16 +139,52 @@ HSK_MODEL = Model(
|
|||||||
# Proccess
|
# Proccess
|
||||||
|
|
||||||
|
|
||||||
|
def output_anki_dictation(
|
||||||
|
process_file: ProcessFile, results: list[DictionaryResult]
|
||||||
|
) -> Path:
|
||||||
|
"""Creates an anki file for dictation result"""
|
||||||
|
final_file = process_file.output_name.with_suffix(".apkg")
|
||||||
|
deck_name = "::".join(
|
||||||
|
process_file.input_file.parts[:-1] + (process_file.output_name.stem,)
|
||||||
|
)
|
||||||
|
deck = Deck(
|
||||||
|
random.randrange(1 << 30, 1 << 31),
|
||||||
|
deck_name,
|
||||||
|
f"Deck for {final_file.name}, "
|
||||||
|
"created in https://www.wolfang.info.ve/hskankicreator/",
|
||||||
|
)
|
||||||
|
package = Package(deck)
|
||||||
|
audios = []
|
||||||
|
for result in results:
|
||||||
|
note = Note(
|
||||||
|
model=DICTATION_MODEL,
|
||||||
|
fields=[
|
||||||
|
result.translated,
|
||||||
|
result.line,
|
||||||
|
f"[sound:{result.audio_path.name}]",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
deck.add_note(note)
|
||||||
|
audios.append(result.audio_path)
|
||||||
|
package.media_files = audios
|
||||||
|
package.write_to_file(final_file)
|
||||||
|
return final_file
|
||||||
|
|
||||||
|
|
||||||
def output_anki_dictionary(
|
def output_anki_dictionary(
|
||||||
process_file: ProcessFile, results: list[DictionaryResult]
|
process_file: ProcessFile, results: list[DictionaryResult]
|
||||||
) -> Path:
|
) -> Path:
|
||||||
"""Creates an anki file from a dictionary results"""
|
"""Creates an anki file from a dictionary results"""
|
||||||
final_file = process_file.output_name.with_suffix(".apkg")
|
final_file = process_file.output_name.with_suffix(".apkg")
|
||||||
print()
|
|
||||||
deck_name = "::".join(
|
deck_name = "::".join(
|
||||||
process_file.input_file.parts[:-1] + (process_file.output_name.stem,)
|
process_file.input_file.parts[:-1] + (process_file.output_name.stem,)
|
||||||
)
|
)
|
||||||
deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
|
deck = Deck(
|
||||||
|
random.randrange(1 << 30, 1 << 31),
|
||||||
|
deck_name,
|
||||||
|
f"Deck for {final_file.name}, "
|
||||||
|
"created in https://www.wolfang.info.ve/hskankicreator/",
|
||||||
|
)
|
||||||
package = Package(deck)
|
package = Package(deck)
|
||||||
audios = []
|
audios = []
|
||||||
for result in results:
|
for result in results:
|
||||||
@@ -161,7 +214,12 @@ def output_anki_phrase(
|
|||||||
deck_name = "::".join(
|
deck_name = "::".join(
|
||||||
process_file.input_file.parts[:-1] + (process_file.output_name.stem,)
|
process_file.input_file.parts[:-1] + (process_file.output_name.stem,)
|
||||||
)
|
)
|
||||||
deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
|
deck = Deck(
|
||||||
|
random.randrange(1 << 30, 1 << 31),
|
||||||
|
deck_name,
|
||||||
|
f"Deck for {final_file.name}, "
|
||||||
|
"created in https://www.wolfang.info.ve/hskankicreator/",
|
||||||
|
)
|
||||||
package = Package(deck)
|
package = Package(deck)
|
||||||
audios = []
|
audios = []
|
||||||
for result in results:
|
for result in results:
|
||||||
|
|||||||
@@ -8,9 +8,26 @@ from pathlib import Path
|
|||||||
|
|
||||||
# Local
|
# Local
|
||||||
from . import DATA_FOLDER
|
from . import DATA_FOLDER
|
||||||
from .anki_generation import output_anki_dictionary, output_anki_phrase
|
from .anki_generation import (
|
||||||
from .constants import DICT_TYPE, INPUT, LANGUAGES, OUTPUT, PHRASES_TYPE, RESOURCES
|
output_anki_dictation,
|
||||||
from .proccessor import dictionary_pre_process, dictionary_process, translator_process
|
output_anki_dictionary,
|
||||||
|
output_anki_phrase,
|
||||||
|
)
|
||||||
|
from .constants import (
|
||||||
|
DICTATION_TYPE,
|
||||||
|
DICT_TYPE,
|
||||||
|
INPUT,
|
||||||
|
LANGUAGES,
|
||||||
|
OUTPUT,
|
||||||
|
PHRASES_TYPE,
|
||||||
|
RESOURCES,
|
||||||
|
)
|
||||||
|
from .proccessor import (
|
||||||
|
dictation_process,
|
||||||
|
dictionary_pre_process,
|
||||||
|
dictionary_process,
|
||||||
|
translator_process,
|
||||||
|
)
|
||||||
from .utility import CCCEDICT, TRANS, TTS, ProcessFile
|
from .utility import CCCEDICT, TRANS, TTS, ProcessFile
|
||||||
|
|
||||||
# interface
|
# interface
|
||||||
@@ -74,7 +91,7 @@ def create_folder(file_path: Path) -> ProcessFile:
|
|||||||
|
|
||||||
|
|
||||||
def delete_folder(file_path: Path):
|
def delete_folder(file_path: Path):
|
||||||
"""delete an empty folder in file_path"""
|
"""Delete an empty folder in file_path"""
|
||||||
input_folder = INPUT / file_path
|
input_folder = INPUT / file_path
|
||||||
if input_folder.exists():
|
if input_folder.exists():
|
||||||
if any(Path("some/path/here").iterdir()):
|
if any(Path("some/path/here").iterdir()):
|
||||||
@@ -136,10 +153,12 @@ def create_input_file(
|
|||||||
it is created and the file placed inside.
|
it is created and the file placed inside.
|
||||||
returns the relative path for future processing
|
returns the relative path for future processing
|
||||||
|
|
||||||
valid file_types: ".phrases", ".dictionary"
|
valid file_types: ".phrases", ".dictionary" ".dictation"
|
||||||
"""
|
"""
|
||||||
if file_type not in (PHRASES_TYPE, DICT_TYPE):
|
if file_type not in (PHRASES_TYPE, DICT_TYPE, DICTATION_TYPE):
|
||||||
raise ValueError(f"file_type {file_type} not in {(PHRASES_TYPE, DICT_TYPE)}")
|
raise ValueError(
|
||||||
|
f"file_type {file_type} not in {(PHRASES_TYPE, DICT_TYPE, DICTATION_TYPE)}"
|
||||||
|
)
|
||||||
filename = f"{name}{file_type}.txt"
|
filename = f"{name}{file_type}.txt"
|
||||||
relative = sub_folder / filename
|
relative = sub_folder / filename
|
||||||
# write file
|
# write file
|
||||||
@@ -152,6 +171,7 @@ def create_input_file(
|
|||||||
|
|
||||||
|
|
||||||
def write_input_file(process_file: ProcessFile, text: str):
|
def write_input_file(process_file: ProcessFile, text: str):
|
||||||
|
"""Write an input file"""
|
||||||
with process_file.absolute_input_file.open(
|
with process_file.absolute_input_file.open(
|
||||||
"w", encoding="utf8", newline="\n"
|
"w", encoding="utf8", newline="\n"
|
||||||
) as file:
|
) as file:
|
||||||
@@ -162,6 +182,7 @@ def write_input_file(process_file: ProcessFile, text: str):
|
|||||||
|
|
||||||
|
|
||||||
def write_resource_file(process_file: ProcessFile, language_id: str, text: str):
|
def write_resource_file(process_file: ProcessFile, language_id: str, text: str):
|
||||||
|
"""Write a resource file"""
|
||||||
process_file.language_id = language_id
|
process_file.language_id = language_id
|
||||||
with process_file.dictionary_resource_file.open(
|
with process_file.dictionary_resource_file.open(
|
||||||
"w", encoding="utf8", newline="\n"
|
"w", encoding="utf8", newline="\n"
|
||||||
@@ -184,13 +205,28 @@ def pre_process_a_dictionary_file(process_file: ProcessFile, language_id: str):
|
|||||||
|
|
||||||
|
|
||||||
def process_a_dictionary_file(process_file: ProcessFile, language_id: str) -> Path:
|
def process_a_dictionary_file(process_file: ProcessFile, language_id: str) -> Path:
|
||||||
|
"""Process a dictionary file"""
|
||||||
TTS.create_tts()
|
TTS.create_tts()
|
||||||
process_file.language_id = language_id
|
process_file.language_id = language_id
|
||||||
results = dictionary_process(process_file)
|
results = dictionary_process(process_file)
|
||||||
return output_anki_dictionary(process_file, results)
|
return output_anki_dictionary(process_file, results)
|
||||||
|
|
||||||
|
|
||||||
|
def process_a_dictation_file(process_file: ProcessFile, language_id: str) -> Path:
|
||||||
|
"""Process a dictation file"""
|
||||||
|
TTS.create_tts()
|
||||||
|
TRANS.create_translator(LANGUAGES.CN, language_id)
|
||||||
|
process_file.language_id = language_id
|
||||||
|
with process_file.absolute_input_file.open(
|
||||||
|
"r", encoding="utf8", newline="\n"
|
||||||
|
) as file:
|
||||||
|
text_lines = [line.strip() for line in file.read().split("。")]
|
||||||
|
results = dictation_process(text_lines, process_file)
|
||||||
|
return output_anki_dictation(process_file, results)
|
||||||
|
|
||||||
|
|
||||||
def process_a_phrases_file(process_file: ProcessFile, language_id: str) -> Path:
|
def process_a_phrases_file(process_file: ProcessFile, language_id: str) -> Path:
|
||||||
|
"""Process a phrases file"""
|
||||||
process_file.language_id = language_id
|
process_file.language_id = language_id
|
||||||
TTS.create_tts()
|
TTS.create_tts()
|
||||||
TRANS.create_translator(LANGUAGES.CN, language_id)
|
TRANS.create_translator(LANGUAGES.CN, language_id)
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ RESOURCES.mkdir(exist_ok=True, parents=True)
|
|||||||
# File Types
|
# File Types
|
||||||
PHRASES_TYPE = ".phrases"
|
PHRASES_TYPE = ".phrases"
|
||||||
DICT_TYPE = ".dictionary"
|
DICT_TYPE = ".dictionary"
|
||||||
|
DICTATION_TYPE = ".dictation"
|
||||||
|
|
||||||
|
|
||||||
class LANGUAGES:
|
class LANGUAGES:
|
||||||
|
|||||||
@@ -19,6 +19,26 @@ DIALECT = "excel-tab"
|
|||||||
# Results Classes
|
# Results Classes
|
||||||
|
|
||||||
|
|
||||||
|
def dictation_process(
|
||||||
|
text_lines: list[str], process_file: ProcessFile
|
||||||
|
) -> list[TranslationResult]:
|
||||||
|
"""Process for Dictation translation"""
|
||||||
|
results = []
|
||||||
|
for n, line in enumerate(text_lines):
|
||||||
|
line = line.strip()
|
||||||
|
audio_path = process_file.resources / f"N{n:03n}.wav"
|
||||||
|
if not audio_path.exists():
|
||||||
|
audio = TTS.MODEL.generate(f"{line}。", language_id=LANGUAGES.CN)
|
||||||
|
torchaudio.save(audio_path, audio, TTS.MODEL.sr)
|
||||||
|
translated = argostranslate.translate.translate(
|
||||||
|
line, LANGUAGES.CN, process_file.language_id
|
||||||
|
)
|
||||||
|
results.append(
|
||||||
|
TranslationResult(process_file.language_id, translated, line, audio_path)
|
||||||
|
)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
def translator_process(
|
def translator_process(
|
||||||
text_lines: list[str], process_file: ProcessFile
|
text_lines: list[str], process_file: ProcessFile
|
||||||
) -> list[TranslationResult]:
|
) -> list[TranslationResult]:
|
||||||
|
|||||||
Reference in New Issue
Block a user