update api endpoints

This commit is contained in:
Wolfang Torres
2026-06-19 20:17:37 +08:00
parent dde819f1e6
commit 30dd8c8671
3 changed files with 120 additions and 23 deletions

View File

@@ -5,6 +5,7 @@ Produces anki output
# Standard Library
import random
from pathlib import Path
# Pip
from genanki import Deck, Model, Note, Package
@@ -121,7 +122,9 @@ HSK_MODEL = Model(
# Proccess
def output_anki_dictionary(process_file: ProcessFile, results: list[DictionaryResult]):
def output_anki_dictionary(
process_file: ProcessFile, results: list[DictionaryResult]
) -> Path:
"""Creates an anki file from a dictionary results"""
final_file = process_file.output_name.with_suffix(".apkg")
deck_name = "::".join(
@@ -146,9 +149,12 @@ def output_anki_dictionary(process_file: ProcessFile, results: list[DictionaryRe
deck.add_note(note)
package.media_files = audios
package.write_to_file(final_file)
return final_file
def output_anki_phrase(process_file: ProcessFile, results: list[TranslationResult]):
def output_anki_phrase(
process_file: ProcessFile, results: list[TranslationResult]
) -> Path:
"""Creates an anki file from a phrases results"""
final_file = process_file.output_name.with_suffix(".apkg")
deck_name = "::".join(
@@ -170,3 +176,4 @@ def output_anki_phrase(process_file: ProcessFile, results: list[TranslationResul
audios.append(result.audio_path)
package.media_files = audios
package.write_to_file(final_file)
return final_file

View File

@@ -9,7 +9,7 @@ from pathlib import Path
# Local
from . import DATA_FOLDER
from .anki_generation import output_anki_dictionary, output_anki_phrase
from .constants import DICT_TYPE, INPUT, LANGUAGES, PHRASES_TYPE
from .constants import DICT_TYPE, INPUT, LANGUAGES, OUTPUT, PHRASES_TYPE, RESOURCES
from .proccessor import dictionary_pre_process, dictionary_process, translator_process
from .utility import CCCEDICT, TRANS, TTS, ProcessFile
@@ -21,6 +21,16 @@ def get_data_folder() -> Path:
return DATA_FOLDER
def get_output_folder() -> Path:
"""Utility function, return the OUTPUT folder"""
return OUTPUT
def get_resources_folder() -> Path:
"""Utility function, return the RESOURCES folder"""
return RESOURCES
def list_input_files(search_path: Path = Path()) -> list[Path]:
"""Return a list of files relative to the INPUT path"""
level = INPUT / search_path
@@ -32,6 +42,19 @@ def is_file(file_path: Path) -> bool:
return (INPUT / file_path).is_file()
def read_input_file(file_path: Path) -> str:
"""Reads an input file"""
return (INPUT / file_path).open(encoding="utf8", newline="\n").read()
def read_dictionary_file(process_file: ProcessFile, language_id: str) -> str:
"""Reads an dictionary resource file"""
process_file.language_id = language_id
return process_file.dictionary_resource_file.open(
encoding="utf8", newline="\n"
).read()
def select_file(file_path: Path) -> ProcessFile:
"""Given a relative path from `list_input_files`, return a ProcessFile"""
if (INPUT / file_path).is_file():
@@ -40,6 +63,40 @@ def select_file(file_path: Path) -> ProcessFile:
raise ValueError(f"{file_path} is not a file")
def list_file_resources(file_path: ProcessFile):
"""Returns a list of a file_path resources files"""
return [file_path.resources.glob("*")]
def analize_input_files(search_path: Path = Path()) -> dict[str, list[Path]]:
"""Analaizes a path file, and returns input, resources and output files"""
data = {
"input": [],
"resources": [],
"output": [],
}
if search_path is None:
return data
input_path = INPUT / search_path
if input_path.is_file():
process_file = ProcessFile(search_path)
res_path = process_file.resources
outputs_path = process_file.out_folder
data["input"] = [search_path]
data["resources"] = [path.relative_to(RESOURCES) for path in res_path.glob("*")]
data["output"] = [
path.relative_to(OUTPUT)
for path in outputs_path.glob(f"{process_file.input_file.stem}*")
]
elif input_path.exists():
res_path = RESOURCES / search_path
outputs_path = OUTPUT / search_path
data["input"] = [path.relative_to(INPUT) for path in input_path.glob("*")]
data["resources"] = [path.relative_to(RESOURCES) for path in res_path.glob("*")]
data["output"] = [path.relative_to(OUTPUT) for path in outputs_path.glob("*")]
return data
def create_input_file(
name: str, file_type: str, text: str, sub_folder: Path = Path()
) -> ProcessFile:
@@ -57,30 +114,52 @@ def create_input_file(
# write file
file_path = INPUT / relative
file_path.parent.mkdir(exist_ok=True, parents=True)
file_path.write_text(text, encoding="utf8")
file_path.write_text(text, encoding="utf8", newline="\n")
# create process_file for future
process_file = ProcessFile(relative)
return process_file
def process_a_file(process_file: ProcessFile, language_id: str):
"""From a input_file, a language and an output type, process a file"""
def write_input_file(process_file: ProcessFile, text: str):
with process_file.absolute_input_file.open(
"w", encoding="utf8", newline="\n"
) as file:
file.write(text)
def write_resource_file(process_file: ProcessFile, language_id: str, text: str):
process_file.language_id = language_id
with process_file.dictionary_resource_file.open(
"w", encoding="utf8", newline="\n"
) as file:
file.write(text)
def pre_process_a_dictionary_file(process_file: ProcessFile, language_id: str):
"""From a input_file, a language_id and an output type, process a file"""
process_file.language_id = language_id
CCCEDICT.create_cedict(language_id)
with process_file.absolute_input_file.open(
"r", encoding="utf8", newline="\n"
) as file:
words_list = [word.strip() for word in file.readlines() if word]
dictionary_pre_process(words_list, process_file)
def process_a_dictionary_file(process_file: ProcessFile, language_id: str) -> Path:
TTS.create_tts()
process_file.language_id = language_id
results = dictionary_process(process_file)
return output_anki_dictionary(process_file, results)
def process_a_phrases_file(process_file: ProcessFile, language_id: str) -> Path:
process_file.language_id = language_id
TTS.create_tts()
if PHRASES_TYPE in process_file.input_file.suffixes:
TRANS.create_translator(LANGUAGES.CN, language_id)
with process_file.absolute_input_file.open("r", encoding="utf8") as file:
text_lines = [line.strip() for line in file.readlines()]
results = translator_process(text_lines, process_file)
output_anki_phrase(process_file, results)
elif DICT_TYPE in process_file.input_file.suffixes:
if not process_file.dictionary_resource_file.is_file():
CCCEDICT.create_cedict(language_id)
with process_file.absolute_input_file.open("r", encoding="utf8") as file:
words_list = [word.strip() for word in file.readlines()]
dictionary_pre_process(words_list, process_file)
else:
results = dictionary_process(process_file)
output_anki_dictionary(process_file, results)
else:
print("filetype not identified")
TRANS.create_translator(LANGUAGES.CN, language_id)
with process_file.absolute_input_file.open(
"r", encoding="utf8", newline="\n"
) as file:
text_lines = [line.strip() for line in file.readlines()]
results = translator_process(text_lines, process_file)
return output_anki_phrase(process_file, results)

View File

@@ -183,6 +183,17 @@ class ProcessFile:
def dictionary_resource_file(self):
"""The path for the resource tsv for dictionary files"""
return self.resources / f"dictionary.{self.language_id}.tsv"
@property
def relative_dictionary_resource_file(self):
"""The path for the resource tsv for dictionary files"""
path = self.resources / f"dictionary.{self.language_id}.tsv"
return path.relative_to(RESOURCES)
@property
def available_dictionary_languages(self):
"""for a Dictionary file loads the avaliable proceced languages"""
return [lan.suffixes[0][1:] for lan in self.resources.glob("dictionary.*.tsv")]
class TranslationResult: