diff --git a/src/anki_hsk_creator/anki_generation.py b/src/anki_hsk_creator/anki_generation.py index 2151ed9..7167a53 100644 --- a/src/anki_hsk_creator/anki_generation.py +++ b/src/anki_hsk_creator/anki_generation.py @@ -5,6 +5,7 @@ Produces anki output # Standard Library import random +from pathlib import Path # Pip from genanki import Deck, Model, Note, Package @@ -121,7 +122,9 @@ HSK_MODEL = Model( # Proccess -def output_anki_dictionary(process_file: ProcessFile, results: list[DictionaryResult]): +def output_anki_dictionary( + process_file: ProcessFile, results: list[DictionaryResult] +) -> Path: """Creates an anki file from a dictionary results""" final_file = process_file.output_name.with_suffix(".apkg") deck_name = "::".join( @@ -146,9 +149,12 @@ def output_anki_dictionary(process_file: ProcessFile, results: list[DictionaryRe deck.add_note(note) package.media_files = audios package.write_to_file(final_file) + return final_file -def output_anki_phrase(process_file: ProcessFile, results: list[TranslationResult]): +def output_anki_phrase( + process_file: ProcessFile, results: list[TranslationResult] +) -> Path: """Creates an anki file from a phrases results""" final_file = process_file.output_name.with_suffix(".apkg") deck_name = "::".join( @@ -170,3 +176,4 @@ def output_anki_phrase(process_file: ProcessFile, results: list[TranslationResul audios.append(result.audio_path) package.media_files = audios package.write_to_file(final_file) + return final_file diff --git a/src/anki_hsk_creator/api.py b/src/anki_hsk_creator/api.py index 3500179..357d8f0 100644 --- a/src/anki_hsk_creator/api.py +++ b/src/anki_hsk_creator/api.py @@ -9,7 +9,7 @@ from pathlib import Path # Local from . import DATA_FOLDER from .anki_generation import output_anki_dictionary, output_anki_phrase -from .constants import DICT_TYPE, INPUT, LANGUAGES, PHRASES_TYPE +from .constants import DICT_TYPE, INPUT, LANGUAGES, OUTPUT, PHRASES_TYPE, RESOURCES from .proccessor import dictionary_pre_process, dictionary_process, translator_process from .utility import CCCEDICT, TRANS, TTS, ProcessFile @@ -21,6 +21,16 @@ def get_data_folder() -> Path: return DATA_FOLDER +def get_output_folder() -> Path: + """Utility function, return the OUTPUT folder""" + return OUTPUT + + +def get_resources_folder() -> Path: + """Utility function, return the RESOURCES folder""" + return RESOURCES + + def list_input_files(search_path: Path = Path()) -> list[Path]: """Return a list of files relative to the INPUT path""" level = INPUT / search_path @@ -32,6 +42,19 @@ def is_file(file_path: Path) -> bool: return (INPUT / file_path).is_file() +def read_input_file(file_path: Path) -> str: + """Reads an input file""" + return (INPUT / file_path).open(encoding="utf8", newline="\n").read() + + +def read_dictionary_file(process_file: ProcessFile, language_id: str) -> str: + """Reads an dictionary resource file""" + process_file.language_id = language_id + return process_file.dictionary_resource_file.open( + encoding="utf8", newline="\n" + ).read() + + def select_file(file_path: Path) -> ProcessFile: """Given a relative path from `list_input_files`, return a ProcessFile""" if (INPUT / file_path).is_file(): @@ -40,6 +63,40 @@ def select_file(file_path: Path) -> ProcessFile: raise ValueError(f"{file_path} is not a file") +def list_file_resources(file_path: ProcessFile): + """Returns a list of a file_path resources files""" + return [file_path.resources.glob("*")] + + +def analize_input_files(search_path: Path = Path()) -> dict[str, list[Path]]: + """Analaizes a path file, and returns input, resources and output files""" + data = { + "input": [], + "resources": [], + "output": [], + } + if search_path is None: + return data + input_path = INPUT / search_path + if input_path.is_file(): + process_file = ProcessFile(search_path) + res_path = process_file.resources + outputs_path = process_file.out_folder + data["input"] = [search_path] + data["resources"] = [path.relative_to(RESOURCES) for path in res_path.glob("*")] + data["output"] = [ + path.relative_to(OUTPUT) + for path in outputs_path.glob(f"{process_file.input_file.stem}*") + ] + elif input_path.exists(): + res_path = RESOURCES / search_path + outputs_path = OUTPUT / search_path + data["input"] = [path.relative_to(INPUT) for path in input_path.glob("*")] + data["resources"] = [path.relative_to(RESOURCES) for path in res_path.glob("*")] + data["output"] = [path.relative_to(OUTPUT) for path in outputs_path.glob("*")] + return data + + def create_input_file( name: str, file_type: str, text: str, sub_folder: Path = Path() ) -> ProcessFile: @@ -57,30 +114,52 @@ def create_input_file( # write file file_path = INPUT / relative file_path.parent.mkdir(exist_ok=True, parents=True) - file_path.write_text(text, encoding="utf8") + file_path.write_text(text, encoding="utf8", newline="\n") # create process_file for future process_file = ProcessFile(relative) return process_file -def process_a_file(process_file: ProcessFile, language_id: str): - """From a input_file, a language and an output type, process a file""" +def write_input_file(process_file: ProcessFile, text: str): + with process_file.absolute_input_file.open( + "w", encoding="utf8", newline="\n" + ) as file: + file.write(text) + + +def write_resource_file(process_file: ProcessFile, language_id: str, text: str): + process_file.language_id = language_id + with process_file.dictionary_resource_file.open( + "w", encoding="utf8", newline="\n" + ) as file: + file.write(text) + + +def pre_process_a_dictionary_file(process_file: ProcessFile, language_id: str): + """From a input_file, a language_id and an output type, process a file""" + process_file.language_id = language_id + CCCEDICT.create_cedict(language_id) + with process_file.absolute_input_file.open( + "r", encoding="utf8", newline="\n" + ) as file: + words_list = [word.strip() for word in file.readlines() if word] + dictionary_pre_process(words_list, process_file) + + +def process_a_dictionary_file(process_file: ProcessFile, language_id: str) -> Path: + TTS.create_tts() + process_file.language_id = language_id + results = dictionary_process(process_file) + return output_anki_dictionary(process_file, results) + + +def process_a_phrases_file(process_file: ProcessFile, language_id: str) -> Path: process_file.language_id = language_id TTS.create_tts() - if PHRASES_TYPE in process_file.input_file.suffixes: - TRANS.create_translator(LANGUAGES.CN, language_id) - with process_file.absolute_input_file.open("r", encoding="utf8") as file: - text_lines = [line.strip() for line in file.readlines()] - results = translator_process(text_lines, process_file) - output_anki_phrase(process_file, results) - elif DICT_TYPE in process_file.input_file.suffixes: - if not process_file.dictionary_resource_file.is_file(): - CCCEDICT.create_cedict(language_id) - with process_file.absolute_input_file.open("r", encoding="utf8") as file: - words_list = [word.strip() for word in file.readlines()] - dictionary_pre_process(words_list, process_file) - else: - results = dictionary_process(process_file) - output_anki_dictionary(process_file, results) - else: - print("filetype not identified") + TRANS.create_translator(LANGUAGES.CN, language_id) + with process_file.absolute_input_file.open( + "r", encoding="utf8", newline="\n" + ) as file: + text_lines = [line.strip() for line in file.readlines()] + results = translator_process(text_lines, process_file) + return output_anki_phrase(process_file, results) diff --git a/src/anki_hsk_creator/utility.py b/src/anki_hsk_creator/utility.py index e123aad..cab1e30 100644 --- a/src/anki_hsk_creator/utility.py +++ b/src/anki_hsk_creator/utility.py @@ -183,6 +183,17 @@ class ProcessFile: def dictionary_resource_file(self): """The path for the resource tsv for dictionary files""" return self.resources / f"dictionary.{self.language_id}.tsv" + + @property + def relative_dictionary_resource_file(self): + """The path for the resource tsv for dictionary files""" + path = self.resources / f"dictionary.{self.language_id}.tsv" + return path.relative_to(RESOURCES) + + @property + def available_dictionary_languages(self): + """for a Dictionary file loads the avaliable proceced languages""" + return [lan.suffixes[0][1:] for lan in self.resources.glob("dictionary.*.tsv")] class TranslationResult: