update format for anki,
upgrade trasnlation package search, fix small bugs
This commit is contained in:
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@@ -9,7 +9,7 @@
|
|||||||
"name": "Python Debugger: Module",
|
"name": "Python Debugger: Module",
|
||||||
"type": "debugpy",
|
"type": "debugpy",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"module": "anki-hsk-creator"
|
"module": "anki_hsk_creator"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -14,7 +14,9 @@ creates anki hsk decks from a list of words
|
|||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
```console
|
```console
|
||||||
pip install anki-hsk-creator
|
git clone https://github.com/resemble-ai/chatterbox
|
||||||
|
git clone https://gitea.wolfang.info.ve/wolfang/anki-hsk-creator
|
||||||
|
git clone https://gitea.wolfang.info.ve/wolfang/anki-hsk-creator-data
|
||||||
```
|
```
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ dependencies = [
|
|||||||
"torchaudio",
|
"torchaudio",
|
||||||
"torchcodec",
|
"torchcodec",
|
||||||
"python-dotenv",
|
"python-dotenv",
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
"""about.py"""
|
"""about.py"""
|
||||||
|
|
||||||
# SPDX-FileCopyrightText: 2026-present Wolfang Torres <wolfang.torres@gmail.com>
|
# SPDX-FileCopyrightText: 2026-present Wolfang Torres <wolfang.torres@gmail.com>
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
__version__ = "0.1.0"
|
__version__ = "0.1.1"
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Local
|
# Local
|
||||||
from .api import list_input_files, process_a_file, select_file
|
from .api import is_file, list_input_files, process_a_file, select_file
|
||||||
from .constants import LANGUAGES
|
from .constants import LANGUAGES
|
||||||
|
|
||||||
|
|
||||||
@@ -18,10 +18,10 @@ def cli_select_files():
|
|||||||
for n, file in enumerate(files):
|
for n, file in enumerate(files):
|
||||||
print(f"{n+1} - {file}")
|
print(f"{n+1} - {file}")
|
||||||
s = None
|
s = None
|
||||||
while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
|
while not s or not s.isnumeric() or not 1 <= int(s) <= len(files):
|
||||||
s = input(f"Please select the file [1-{len(files)}]: ")
|
s = input(f"Please select the file [1-{len(files)}]: ")
|
||||||
selected = files[int(s) - 1]
|
selected = files[int(s) - 1]
|
||||||
if selected.is_file():
|
if is_file(selected):
|
||||||
in_file = selected
|
in_file = selected
|
||||||
else:
|
else:
|
||||||
level = selected
|
level = selected
|
||||||
@@ -32,11 +32,11 @@ def cli_select_files():
|
|||||||
def cli_select_language():
|
def cli_select_language():
|
||||||
"""Selects a language for the trasnlatatio"""
|
"""Selects a language for the trasnlatatio"""
|
||||||
print("Select a language:")
|
print("Select a language:")
|
||||||
for language_id, language in LANGUAGES.language_names.items():
|
for language_id, language in LANGUAGES.LanguageNames.items():
|
||||||
print(f"{language_id} - {language}")
|
print(f"{language_id} - {language}")
|
||||||
s = None
|
s = None
|
||||||
while not s or s not in LANGUAGES.available_languages:
|
while not s or s not in LANGUAGES.AvailableLanguages:
|
||||||
s = input(f"Please select the language: ({ LANGUAGES.available_languages})")
|
s = input(f"Please select the language {LANGUAGES.AvailableLanguages}: ")
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
@@ -45,6 +45,7 @@ def main():
|
|||||||
while True:
|
while True:
|
||||||
input_file = cli_select_files()
|
input_file = cli_select_files()
|
||||||
language_id = cli_select_language()
|
language_id = cli_select_language()
|
||||||
|
print(f"processing file {input_file.input_file} with language {language_id}")
|
||||||
process_a_file(input_file, language_id)
|
process_a_file(input_file, language_id)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -48,17 +48,17 @@ PHRASE_MODEL = Model(
|
|||||||
templates=[
|
templates=[
|
||||||
{
|
{
|
||||||
"name": "Card 1",
|
"name": "Card 1",
|
||||||
"qfmt": "{{Translated}}<br>{{Audio}}",
|
"qfmt": "{{Translated}}<br>{{Audio}}<br>{{type:Phrase}}",
|
||||||
"afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
|
"afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Card 2",
|
"name": "Card 2",
|
||||||
"qfmt": "{{Phrase}}<br>{{Audio}}",
|
"qfmt": "{{Phrase}}<br>{{Audio}}<br>{{type:Translated}}",
|
||||||
"afmt": '{{FrontSide}}<hr id="answer">{{Translated}}',
|
"afmt": '{{FrontSide}}<hr id="answer">{{Translated}}',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Card 3",
|
"name": "Card 3",
|
||||||
"qfmt": "{{Audio}}",
|
"qfmt": "{{Audio}}<br>{{type:Phrase}}",
|
||||||
"afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
|
"afmt": '{{FrontSide}}<hr id="answer">{{Phrase}}',
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
@@ -70,7 +70,7 @@ HSK_MODEL = Model(
|
|||||||
1708536519,
|
1708536519,
|
||||||
"HSK Model",
|
"HSK Model",
|
||||||
fields=[
|
fields=[
|
||||||
{"name": "English"},
|
{"name": "Translated"},
|
||||||
{"name": "Pinyin"},
|
{"name": "Pinyin"},
|
||||||
{"name": "Simplified"},
|
{"name": "Simplified"},
|
||||||
{"name": "Traditional"},
|
{"name": "Traditional"},
|
||||||
@@ -79,7 +79,12 @@ HSK_MODEL = Model(
|
|||||||
templates=[
|
templates=[
|
||||||
{
|
{
|
||||||
"name": "Card 1",
|
"name": "Card 1",
|
||||||
"qfmt": "<strong>{{Pinyin}}</strong><br>{{English}}<br>{{Audio}}",
|
"qfmt": (
|
||||||
|
"<strong>{{Pinyin}}</strong>"
|
||||||
|
"<br>{{Translated}}"
|
||||||
|
"<br>{{Audio}}"
|
||||||
|
"<br>Pinyin: {{type:Simplified}}"
|
||||||
|
),
|
||||||
"afmt": (
|
"afmt": (
|
||||||
"{{FrontSide}}<hr id='answer''><div class='simple'>{{Simplified}}</div>"
|
"{{FrontSide}}<hr id='answer''><div class='simple'>{{Simplified}}</div>"
|
||||||
"<br><div class='trad'>{{Traditional}}</div>"
|
"<br><div class='trad'>{{Traditional}}</div>"
|
||||||
@@ -87,16 +92,24 @@ HSK_MODEL = Model(
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Card 2",
|
"name": "Card 2",
|
||||||
"qfmt": "<div class='simple'>{{Simplified}}</div><br><div class='trad'>"
|
"qfmt": (
|
||||||
"{{Traditional}}</div>",
|
"<div class='simple'>{{Simplified}}</div>"
|
||||||
|
"<br><div class='trad'>{{Traditional}}</div>"
|
||||||
|
"<br>Pinyin: {{type:Pinyin}}"
|
||||||
|
"<br>Translated: {{type:Translated}}"
|
||||||
|
),
|
||||||
"afmt": (
|
"afmt": (
|
||||||
"{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>"
|
"{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>"
|
||||||
"<br>{{English}}<br>{{Audio}}"
|
"<br>{{Translated}}<br>{{Audio}}"
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Card 3",
|
"name": "Card 3",
|
||||||
"qfmt": "{{Audio}}",
|
"qfmt": (
|
||||||
|
"{{Audio}}"
|
||||||
|
"<br>Pinyin: {{type:Pinyin}}"
|
||||||
|
"<br>Simplified: {{type:Simplified}}"
|
||||||
|
),
|
||||||
"afmt": (
|
"afmt": (
|
||||||
"{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>"
|
"{{FrontSide}}<hr id='answer'><strong>{{Pinyin}}</strong>"
|
||||||
"<br><div class='simple'>{{Simplified}}</div>"
|
"<br><div class='simple'>{{Simplified}}</div>"
|
||||||
@@ -139,7 +152,7 @@ def output_anki_phrase(process_file: ProcessFile, results: list[TranslationResul
|
|||||||
|
|
||||||
final_file = process_file.output_name.with_suffix(".apkg")
|
final_file = process_file.output_name.with_suffix(".apkg")
|
||||||
deck_name = "::".join(
|
deck_name = "::".join(
|
||||||
process_file.input_file.parts[:-1] + (process_file.input_fil.stem,)
|
process_file.input_file.parts[:-1] + (process_file.input_file.stem,)
|
||||||
)
|
)
|
||||||
deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
|
deck = Deck(random.randrange(1 << 30, 1 << 31), deck_name)
|
||||||
package = Package(deck)
|
package = Package(deck)
|
||||||
|
|||||||
@@ -27,6 +27,11 @@ def list_input_files(search_path: Path = Path()) -> list[Path]:
|
|||||||
return [path.relative_to(INPUT) for path in level.glob("*")]
|
return [path.relative_to(INPUT) for path in level.glob("*")]
|
||||||
|
|
||||||
|
|
||||||
|
def is_file(file_path: Path) -> bool:
|
||||||
|
"""Check if a relative path is a file"""
|
||||||
|
return (INPUT / file_path).is_file()
|
||||||
|
|
||||||
|
|
||||||
def select_file(file_path: Path) -> ProcessFile:
|
def select_file(file_path: Path) -> ProcessFile:
|
||||||
"""Given a relative path from `list_input_files`, return a ProcessFile"""
|
"""Given a relative path from `list_input_files`, return a ProcessFile"""
|
||||||
if (INPUT / file_path).is_file():
|
if (INPUT / file_path).is_file():
|
||||||
@@ -61,12 +66,14 @@ def create_input_file(
|
|||||||
def process_a_file(process_file: ProcessFile, language_id: str):
|
def process_a_file(process_file: ProcessFile, language_id: str):
|
||||||
"""From a input_file, a language and an output type, process a file"""
|
"""From a input_file, a language and an output type, process a file"""
|
||||||
process_file.language_id = language_id
|
process_file.language_id = language_id
|
||||||
if PHRASES_TYPE in process_file.input_file.suffix:
|
if PHRASES_TYPE in process_file.input_file.suffixes:
|
||||||
TTS.create_tts()
|
TTS.create_tts()
|
||||||
TRANS.create_translator(LANGUAGES.CN, language_id)
|
TRANS.create_translator(LANGUAGES.CN, language_id)
|
||||||
with process_file.absolute_input_file.open("r") as file:
|
with process_file.absolute_input_file.open("r", encoding="utf8") as file:
|
||||||
text_lines = [line.strip() for line in file.readlines()]
|
text_lines = [line.strip() for line in file.readlines()]
|
||||||
results = translator_process(text_lines, process_file, language_id)
|
results = translator_process(text_lines, process_file, language_id)
|
||||||
output_anki_phrase(process_file, results)
|
output_anki_phrase(process_file, results)
|
||||||
elif DICT_TYPE in process_file.input_file.suffix:
|
elif DICT_TYPE in process_file.input_file.suffixes:
|
||||||
print("not implemented")
|
print("not implemented")
|
||||||
|
else:
|
||||||
|
print("no identified")
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import importlib.resources
|
|||||||
from . import DATA_FOLDER
|
from . import DATA_FOLDER
|
||||||
|
|
||||||
# Resources
|
# Resources
|
||||||
CCCEDICT_PATH = importlib.resources.files("anki-hsk-creator").joinpath("cedict_ts.u8")
|
CCCEDICT_PATH = importlib.resources.files("anki_hsk_creator").joinpath("cedict_ts.u8")
|
||||||
|
|
||||||
# Data folder structure
|
# Data folder structure
|
||||||
INPUT = DATA_FOLDER / "input"
|
INPUT = DATA_FOLDER / "input"
|
||||||
@@ -33,19 +33,12 @@ class LANGUAGES:
|
|||||||
TR = "tr"
|
TR = "tr"
|
||||||
TH = "th"
|
TH = "th"
|
||||||
|
|
||||||
@property
|
AvailableLanguages = (EN, ES, FR, RU, TR, TH)
|
||||||
def available_languages(self) -> tuple:
|
LanguageNames = {
|
||||||
"""Available laguages for translation"""
|
EN: "English",
|
||||||
return (self.EN, self.ES, self.FR, self.RU, self.TR, self.TH)
|
ES: "Spanish",
|
||||||
|
FR: "French",
|
||||||
@property
|
RU: "Russian",
|
||||||
def language_names(self) -> dict:
|
TR: "Turkish",
|
||||||
"""Gets the name of a language code"""
|
TH: "Thai",
|
||||||
return {
|
|
||||||
self.EN: "English",
|
|
||||||
self.ES: "Spanish",
|
|
||||||
self.FR: "French",
|
|
||||||
self.RU: "Russian",
|
|
||||||
self.TR: "Turkish",
|
|
||||||
self.TH: "Thai",
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ def translator_process(
|
|||||||
results = []
|
results = []
|
||||||
for n, line in enumerate(text_lines):
|
for n, line in enumerate(text_lines):
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
audio_path = process_file.resources / f"N{n::03.0n}.wav"
|
audio_path = process_file.resources / f"N{n:03n}.wav"
|
||||||
if not audio_path.exists():
|
if not audio_path.exists():
|
||||||
audio = TTS.MODEL.generate(f"{line}。", language_id=LANGUAGES.CN)
|
audio = TTS.MODEL.generate(f"{line}。", language_id=LANGUAGES.CN)
|
||||||
torchaudio.save(audio_path, audio, TTS.MODEL.sr)
|
torchaudio.save(audio_path, audio, TTS.MODEL.sr)
|
||||||
|
|||||||
@@ -33,13 +33,20 @@ class TRANS:
|
|||||||
argostranslate.package.update_package_index()
|
argostranslate.package.update_package_index()
|
||||||
TRANS.PACKAGES = argostranslate.package.get_available_packages()
|
TRANS.PACKAGES = argostranslate.package.get_available_packages()
|
||||||
TRANS.UPDATED = True
|
TRANS.UPDATED = True
|
||||||
package_to_install = next(
|
packages = filter(
|
||||||
filter(
|
lambda x: x.from_code == from_code or x.to_code == to_code,
|
||||||
lambda x: x.from_code == from_code and x.to_code == to_code,
|
|
||||||
TRANS.PACKAGES,
|
TRANS.PACKAGES,
|
||||||
)
|
)
|
||||||
)
|
packages_to_install = []
|
||||||
argostranslate.package.install_from_path(package_to_install.download())
|
for in_package in packages:
|
||||||
|
if in_package.from_code == from_code:
|
||||||
|
for out_package in packages:
|
||||||
|
if out_package.to_code == to_code:
|
||||||
|
if in_package.to_code == out_package.from_code:
|
||||||
|
packages_to_install.append(in_package)
|
||||||
|
packages_to_install.append(out_package)
|
||||||
|
for package in packages_to_install:
|
||||||
|
argostranslate.package.install_from_path(package.download())
|
||||||
|
|
||||||
|
|
||||||
class CCCEDICT:
|
class CCCEDICT:
|
||||||
@@ -135,7 +142,7 @@ class ProcessFile:
|
|||||||
"""Posible name for the output file, still missing the filetype"""
|
"""Posible name for the output file, still missing the filetype"""
|
||||||
if self.language_id is None:
|
if self.language_id is None:
|
||||||
raise ValueError("Not a valid language selected")
|
raise ValueError("Not a valid language selected")
|
||||||
return self.input_file.parent / f"{self.input_file.stem}.{self.language_id})."
|
return self.out_folder / f"{self.input_file.stem}.{self.language_id}."
|
||||||
|
|
||||||
|
|
||||||
class TranslationResult:
|
class TranslationResult:
|
||||||
|
|||||||
Reference in New Issue
Block a user