diff --git a/anki-hsk-creator/__main__.py b/anki-hsk-creator/__main__.py index e650047..7cb6ea5 100644 --- a/anki-hsk-creator/__main__.py +++ b/anki-hsk-creator/__main__.py @@ -10,9 +10,7 @@ from pinyin_tone_converter.pinyin_tone_converter import PinyinToneConverter ## Constants CCCEDICT = Path(__file__).parent / "cedict_ts.u8" -OUTPUT = Path(__file__).parent / "OUTPUT.tsv" -TEXT = "卡 陪 营业厅 附近 墙 贴 广告 排队 大概 营业员 对 一下 护照 让 选 充值 国内 国际 长途 费 下载 软件 发 短信 收 邮件 出来 办 又 习惯 自动取款机 取 能 网站 怕 麻烦 付 方法 电池 充电器 快递 应该 手表 寄 久 去年 客户 服务 帮 台 空调 最近 奇怪 修 地址 准备 办法 打开 按" - +DATA = Path(__file__).parent.parent / "data" ## Classess @@ -31,8 +29,20 @@ def main(): else: dictionary[entry.simplified].append(entry) + print("Select data file:") + files = [] + for n, file in enumerate(DATA.glob('*.txt')): + files.append(file) + print(f"{n+1} - {file}") + s = None + while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)): + s = input(f"Please select the file [1-{len(files)}]: ") + in_file = files[int(s)-1] + out_file = DATA / f"{in_file.stem}.tsv" + words_list = in_file.open(encoding="utf8").read().split() + results = [] - for word in TEXT.split(): + for word in words_list: if v := dictionary.get(word): if len(v) > 1: print( @@ -52,12 +62,12 @@ def main(): else: print(f"ERROR: {word} not found") - with OUTPUT.open("w", encoding="utf8", newline="") as csvfile: + with out_file.open("w", encoding="utf8", newline="") as csvfile: writer = csv.writer(csvfile, delimiter="\t", quotechar='"') for entry in results: writer.writerow( [ - "\n ".join(f"{n}. {m}" for i,m in enumerate(entry.meanings)), + "\n ".join(f"{n+1}. {m}" for n,m in enumerate(entry.meanings)), PinyinToneConverter().convert_text(entry.pinyin), entry.simplified, entry.traditional, diff --git a/data/基础汉语40课.tsv b/data/基础汉语40课.tsv new file mode 100644 index 0000000..16d010b --- /dev/null +++ b/data/基础汉语40课.tsv @@ -0,0 +1,219 @@ +"1. to stop + 2. to block + 3. (computing) (coll.) slow + 4. (loanword) card + 5. CL:張|张[zhang1],片[pian4] + 6. truck (from 'car') + 7. calorie (abbr. for 卡路里[ka3 lu4 li3]) + 8. cassette" kǎ 卡 卡 +"1. to accompany + 2. to keep sb company + 3. to assist + 4. old variant of 賠|赔[pei2]" péi 陪 陪 +"1. nearby; neighboring + 2. (in the) vicinity (of); neighborhood" fù jìn 附近 附近 +"1. wall (CL:面[mian4],堵[du3]) + 2. (slang) to block (a website) (usu. in the passive: 被牆|被墙[bei4 qiang2])" qiáng 墙 牆 +"1. to stick + 2. to paste + 3. to post (e.g. on a blog) + 4. to keep close to + 5. to fit snugly + 6. to subsidize + 7. allowance (e.g. money for food or housing) + 8. sticker + 9. classifier for sticking plaster: strip" tiē 贴 貼 +"1. to advertise + 2. a commercial + 3. advertisement + 4. CL:項|项[xiang4]" guǎng gào 广告 廣告 +1. to line up pái duì 排队 排隊 +"1. roughly + 2. probably + 3. rough + 4. approximate + 5. about + 6. general idea" dà gài 大概 大概 +"1. clerk + 2. shop assistant + 3. CL:個|个[ge4]" yíng yè yuán 营业员 營業員 +"1. right; correct + 2. towards; at; for + 3. concerning; regarding + 4. to treat (sb a certain way) + 5. to face + 6. (bound form) opposite; facing; matching + 7. to match together; to adjust + 8. to fit; to suit + 9. to answer; to reply + 10. to add; to pour in (a fluid) + 11. to check; to compare + 12. classifier: couple; pair" duì 对 對 +"1. (after a verb) a bit; a little (indicating brief duration, or softening the tone, or suggesting giving sth a try) + 2. all at once; suddenly" yī xià 一下 一下 +"1. passport + 2. CL:本[ben3],個|个[ge4]" hù zhào 护照 護照 +"1. to yield + 2. to permit + 3. to let sb do sth + 4. to have sb do sth + 5. to make sb (feel sad etc) + 6. by (indicates the agent in a passive clause, like 被[bei4])" ràng 让 讓 +"1. to choose + 2. to pick + 3. to select + 4. to elect" xuǎn 选 選 +1. to recharge (money onto a card) chōng zhí 充值 充值 +"1. domestic + 2. internal (to a country) + 3. civil" guó nèi 国内 國內 +1. international guó jì 国际 國際 +"1. long distance + 2. long-distance phone call (abbr. for 長途電話|长途电话[chang2 tu2 dian4 hua4]) + 3. long-distance coach (abbr. for 長途汽車|长途汽车[chang2 tu2 qi4 che1])" cháng tú 长途 長途 +"1. to cost + 2. to spend + 3. fee + 4. wasteful + 5. expenses" fèi 费 費 +"1. to download + 2. also pr. [xia4zai4]" xià zǎi 下载 下載 +1. (computer) software ruǎn jiàn 软件 軟件 +"1. to send out + 2. to show (one's feeling) + 3. to issue + 4. to develop + 5. to make a bundle of money + 6. classifier for gunshots (rounds)" fā 发 發 +"1. text message + 2. SMS" duǎn xìn 短信 短信 +"1. to receive + 2. to accept + 3. to collect + 4. to put away + 5. to restrain + 6. to stop + 7. in care of (used on address line after name)" shōu 收 收 +"1. mail + 2. post + 3. email" yóu jiàn 邮件 郵件 +"1. to come out + 2. to appear + 3. to arise" chū lái 出来 出來 +"1. to take care of (a matter); to deal with (a task, procedure etc); to organize (an event) + 2. to establish; to set up; to manage; to run (an enterprise) + 3. (law) to handle; to investigate; to prosecute (a case or suspect) + 4. (bound form) office (as in 招辦|招办[zhao1 ban4], admissions office) (abbr. for 辦公室|办公室[ban4 gong1 shi4])" bàn 办 辦 +"1. (once) again + 2. also + 3. both... and... + 4. and yet + 5. (used for emphasis) anyway" yòu 又 又 +"1. habit + 2. custom + 3. usual practice + 4. to be used to + 5. CL:個|个[ge4]" xí guàn 习惯 習慣 +1. automated teller machine (ATM) zì dòng qǔ kuǎn jī 自动取款机 自動取款機 +"1. to take + 2. to get + 3. to choose + 4. to fetch" qǔ 取 取 +"1. can + 2. to be able to + 3. might possibly + 4. ability + 5. (physics) energy" néng 能 能 +1. website wǎng zhàn 网站 網站 +"1. to be afraid + 2. to fear + 3. to dread + 4. to be unable to endure + 5. perhaps" pà 怕 怕 +"1. trouble; inconvenience + 2. inconvenient; troublesome; annoying + 3. to bother sb; to put sb to trouble" má fan 麻烦 麻煩 +"1. to pay + 2. to hand over to + 3. classifier for pairs or sets of things" fù 付 付 +"1. method; way; technique; procedure + 2. CL:個|个[ge4]" fāng fǎ 方法 方法 +"1. battery; electric cell + 2. CL:節|节[jie2],組|组[zu3]" diàn chí 电池 電池 +1. battery charger chōng diàn qì 充电器 充電器 +1. express delivery kuài dì 快递 快遞 +1. ought to; should; must yīng gāi 应该 應該 +"1. wristwatch + 2. CL:塊|块[kuai4],隻|只[zhi1],個|个[ge4]" shǒu biǎo 手表 手錶 +"1. to entrust; to place in sb's care + 2. (bound form) to depend on; to attach oneself to; to reside temporarily + 3. (bound form) foster (as in 寄女[ji4 nu : 3] foster daughter) + 4. to send by post; to mail" jì 寄 寄 +1. (of a period of time) long jiǔ 久 久 +1. last year qù nián 去年 去年 +1. client; customer kè hù 客户 客戶 +"1. to serve + 2. service + 3. CL:項|项[xiang4]" fú wù 服务 服務 +"1. to help + 2. to assist + 3. to support + 4. for sb (i.e. as a help) + 5. hired (as worker) + 6. side (of pail, boat etc) + 7. outer layer + 8. upper (of a shoe) + 9. group + 10. gang + 11. clique + 12. party + 13. secret society" bāng 帮 幫 +"1. platform + 2. stage + 3. terrace + 4. stand + 5. support + 6. station + 7. broadcasting station + 8. classifier for vehicles or machines" tái 台 臺 +"1. air conditioning + 2. air conditioner (including units that have a heating mode) + 3. CL:臺|台[tai2]" kōng tiáo 空调 空調 +"1. recently + 2. soon + 3. nearest" zuì jìn 最近 最近 +"1. strange + 2. odd + 3. to marvel + 4. to be baffled" qí guài 奇怪 奇怪 +"1. to decorate + 2. to embellish + 3. to repair + 4. to build + 5. to write + 6. to cultivate + 7. to study + 8. to take (a class)" xiū 修 修 +"1. address + 2. CL:個|个[ge4]" dì zhǐ 地址 地址 +"1. preparation + 2. to prepare + 3. to intend + 4. to be about to + 5. reserve (fund)" zhǔn bèi 准备 準備 +"1. way of handling sth; means; measure; (practical) solution to a problem + 2. CL:條|条[tiao2],個|个[ge4]" bàn fǎ 办法 辦法 +"1. to open + 2. to show (a ticket) + 3. to turn on + 4. to switch on" dǎ kāi 打开 打開 +"1. to press + 2. to push + 3. to leave aside or shelve + 4. to control + 5. to restrain + 6. to keep one's hand on + 7. to check or refer to + 8. according to + 9. in the light of + 10. (of an editor or author) to make a comment" àn 按 按 diff --git a/data/基础汉语40课.txt b/data/基础汉语40课.txt new file mode 100644 index 0000000..483bcc2 --- /dev/null +++ b/data/基础汉语40课.txt @@ -0,0 +1 @@ +卡 陪 营业厅 附近 墙 贴 广告 排队 大概 营业员 对 一下 护照 让 选 充值 国内 国际 长途 费 下载 软件 发 短信 收 邮件 出来 办 又 习惯 自动取款机 取 能 网站 怕 麻烦 付 方法 电池 充电器 快递 应该 手表 寄 久 去年 客户 服务 帮 台 空调 最近 奇怪 修 地址 准备 办法 打开 按 \ No newline at end of file