update
This commit is contained in:
@@ -10,9 +10,7 @@ from pinyin_tone_converter.pinyin_tone_converter import PinyinToneConverter
|
|||||||
## Constants
|
## Constants
|
||||||
|
|
||||||
CCCEDICT = Path(__file__).parent / "cedict_ts.u8"
|
CCCEDICT = Path(__file__).parent / "cedict_ts.u8"
|
||||||
OUTPUT = Path(__file__).parent / "OUTPUT.tsv"
|
DATA = Path(__file__).parent.parent / "data"
|
||||||
TEXT = "卡 陪 营业厅 附近 墙 贴 广告 排队 大概 营业员 对 一下 护照 让 选 充值 国内 国际 长途 费 下载 软件 发 短信 收 邮件 出来 办 又 习惯 自动取款机 取 能 网站 怕 麻烦 付 方法 电池 充电器 快递 应该 手表 寄 久 去年 客户 服务 帮 台 空调 最近 奇怪 修 地址 准备 办法 打开 按"
|
|
||||||
|
|
||||||
|
|
||||||
## Classess
|
## Classess
|
||||||
|
|
||||||
@@ -31,8 +29,20 @@ def main():
|
|||||||
else:
|
else:
|
||||||
dictionary[entry.simplified].append(entry)
|
dictionary[entry.simplified].append(entry)
|
||||||
|
|
||||||
|
print("Select data file:")
|
||||||
|
files = []
|
||||||
|
for n, file in enumerate(DATA.glob('*.txt')):
|
||||||
|
files.append(file)
|
||||||
|
print(f"{n+1} - {file}")
|
||||||
|
s = None
|
||||||
|
while not s or not s.isnumeric() or not (1 <= int(s) <= len(files)):
|
||||||
|
s = input(f"Please select the file [1-{len(files)}]: ")
|
||||||
|
in_file = files[int(s)-1]
|
||||||
|
out_file = DATA / f"{in_file.stem}.tsv"
|
||||||
|
words_list = in_file.open(encoding="utf8").read().split()
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for word in TEXT.split():
|
for word in words_list:
|
||||||
if v := dictionary.get(word):
|
if v := dictionary.get(word):
|
||||||
if len(v) > 1:
|
if len(v) > 1:
|
||||||
print(
|
print(
|
||||||
@@ -52,12 +62,12 @@ def main():
|
|||||||
else:
|
else:
|
||||||
print(f"ERROR: {word} not found")
|
print(f"ERROR: {word} not found")
|
||||||
|
|
||||||
with OUTPUT.open("w", encoding="utf8", newline="") as csvfile:
|
with out_file.open("w", encoding="utf8", newline="") as csvfile:
|
||||||
writer = csv.writer(csvfile, delimiter="\t", quotechar='"')
|
writer = csv.writer(csvfile, delimiter="\t", quotechar='"')
|
||||||
for entry in results:
|
for entry in results:
|
||||||
writer.writerow(
|
writer.writerow(
|
||||||
[
|
[
|
||||||
"\n ".join(f"{n}. {m}" for i,m in enumerate(entry.meanings)),
|
"\n ".join(f"{n+1}. {m}" for n,m in enumerate(entry.meanings)),
|
||||||
PinyinToneConverter().convert_text(entry.pinyin),
|
PinyinToneConverter().convert_text(entry.pinyin),
|
||||||
entry.simplified,
|
entry.simplified,
|
||||||
entry.traditional,
|
entry.traditional,
|
||||||
|
|||||||
219
data/基础汉语40课.tsv
Normal file
219
data/基础汉语40课.tsv
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
"1. to stop
|
||||||
|
2. to block
|
||||||
|
3. (computing) (coll.) slow
|
||||||
|
4. (loanword) card
|
||||||
|
5. CL:張|张[zhang1],片[pian4]
|
||||||
|
6. truck (from 'car')
|
||||||
|
7. calorie (abbr. for 卡路里[ka3 lu4 li3])
|
||||||
|
8. cassette" kǎ 卡 卡
|
||||||
|
"1. to accompany
|
||||||
|
2. to keep sb company
|
||||||
|
3. to assist
|
||||||
|
4. old variant of 賠|赔[pei2]" péi 陪 陪
|
||||||
|
"1. nearby; neighboring
|
||||||
|
2. (in the) vicinity (of); neighborhood" fù jìn 附近 附近
|
||||||
|
"1. wall (CL:面[mian4],堵[du3])
|
||||||
|
2. (slang) to block (a website) (usu. in the passive: 被牆|被墙[bei4 qiang2])" qiáng 墙 牆
|
||||||
|
"1. to stick
|
||||||
|
2. to paste
|
||||||
|
3. to post (e.g. on a blog)
|
||||||
|
4. to keep close to
|
||||||
|
5. to fit snugly
|
||||||
|
6. to subsidize
|
||||||
|
7. allowance (e.g. money for food or housing)
|
||||||
|
8. sticker
|
||||||
|
9. classifier for sticking plaster: strip" tiē 贴 貼
|
||||||
|
"1. to advertise
|
||||||
|
2. a commercial
|
||||||
|
3. advertisement
|
||||||
|
4. CL:項|项[xiang4]" guǎng gào 广告 廣告
|
||||||
|
1. to line up pái duì 排队 排隊
|
||||||
|
"1. roughly
|
||||||
|
2. probably
|
||||||
|
3. rough
|
||||||
|
4. approximate
|
||||||
|
5. about
|
||||||
|
6. general idea" dà gài 大概 大概
|
||||||
|
"1. clerk
|
||||||
|
2. shop assistant
|
||||||
|
3. CL:個|个[ge4]" yíng yè yuán 营业员 營業員
|
||||||
|
"1. right; correct
|
||||||
|
2. towards; at; for
|
||||||
|
3. concerning; regarding
|
||||||
|
4. to treat (sb a certain way)
|
||||||
|
5. to face
|
||||||
|
6. (bound form) opposite; facing; matching
|
||||||
|
7. to match together; to adjust
|
||||||
|
8. to fit; to suit
|
||||||
|
9. to answer; to reply
|
||||||
|
10. to add; to pour in (a fluid)
|
||||||
|
11. to check; to compare
|
||||||
|
12. classifier: couple; pair" duì 对 對
|
||||||
|
"1. (after a verb) a bit; a little (indicating brief duration, or softening the tone, or suggesting giving sth a try)
|
||||||
|
2. all at once; suddenly" yī xià 一下 一下
|
||||||
|
"1. passport
|
||||||
|
2. CL:本[ben3],個|个[ge4]" hù zhào 护照 護照
|
||||||
|
"1. to yield
|
||||||
|
2. to permit
|
||||||
|
3. to let sb do sth
|
||||||
|
4. to have sb do sth
|
||||||
|
5. to make sb (feel sad etc)
|
||||||
|
6. by (indicates the agent in a passive clause, like 被[bei4])" ràng 让 讓
|
||||||
|
"1. to choose
|
||||||
|
2. to pick
|
||||||
|
3. to select
|
||||||
|
4. to elect" xuǎn 选 選
|
||||||
|
1. to recharge (money onto a card) chōng zhí 充值 充值
|
||||||
|
"1. domestic
|
||||||
|
2. internal (to a country)
|
||||||
|
3. civil" guó nèi 国内 國內
|
||||||
|
1. international guó jì 国际 國際
|
||||||
|
"1. long distance
|
||||||
|
2. long-distance phone call (abbr. for 長途電話|长途电话[chang2 tu2 dian4 hua4])
|
||||||
|
3. long-distance coach (abbr. for 長途汽車|长途汽车[chang2 tu2 qi4 che1])" cháng tú 长途 長途
|
||||||
|
"1. to cost
|
||||||
|
2. to spend
|
||||||
|
3. fee
|
||||||
|
4. wasteful
|
||||||
|
5. expenses" fèi 费 費
|
||||||
|
"1. to download
|
||||||
|
2. also pr. [xia4zai4]" xià zǎi 下载 下載
|
||||||
|
1. (computer) software ruǎn jiàn 软件 軟件
|
||||||
|
"1. to send out
|
||||||
|
2. to show (one's feeling)
|
||||||
|
3. to issue
|
||||||
|
4. to develop
|
||||||
|
5. to make a bundle of money
|
||||||
|
6. classifier for gunshots (rounds)" fā 发 發
|
||||||
|
"1. text message
|
||||||
|
2. SMS" duǎn xìn 短信 短信
|
||||||
|
"1. to receive
|
||||||
|
2. to accept
|
||||||
|
3. to collect
|
||||||
|
4. to put away
|
||||||
|
5. to restrain
|
||||||
|
6. to stop
|
||||||
|
7. in care of (used on address line after name)" shōu 收 收
|
||||||
|
"1. mail
|
||||||
|
2. post
|
||||||
|
3. email" yóu jiàn 邮件 郵件
|
||||||
|
"1. to come out
|
||||||
|
2. to appear
|
||||||
|
3. to arise" chū lái 出来 出來
|
||||||
|
"1. to take care of (a matter); to deal with (a task, procedure etc); to organize (an event)
|
||||||
|
2. to establish; to set up; to manage; to run (an enterprise)
|
||||||
|
3. (law) to handle; to investigate; to prosecute (a case or suspect)
|
||||||
|
4. (bound form) office (as in 招辦|招办[zhao1 ban4], admissions office) (abbr. for 辦公室|办公室[ban4 gong1 shi4])" bàn 办 辦
|
||||||
|
"1. (once) again
|
||||||
|
2. also
|
||||||
|
3. both... and...
|
||||||
|
4. and yet
|
||||||
|
5. (used for emphasis) anyway" yòu 又 又
|
||||||
|
"1. habit
|
||||||
|
2. custom
|
||||||
|
3. usual practice
|
||||||
|
4. to be used to
|
||||||
|
5. CL:個|个[ge4]" xí guàn 习惯 習慣
|
||||||
|
1. automated teller machine (ATM) zì dòng qǔ kuǎn jī 自动取款机 自動取款機
|
||||||
|
"1. to take
|
||||||
|
2. to get
|
||||||
|
3. to choose
|
||||||
|
4. to fetch" qǔ 取 取
|
||||||
|
"1. can
|
||||||
|
2. to be able to
|
||||||
|
3. might possibly
|
||||||
|
4. ability
|
||||||
|
5. (physics) energy" néng 能 能
|
||||||
|
1. website wǎng zhàn 网站 網站
|
||||||
|
"1. to be afraid
|
||||||
|
2. to fear
|
||||||
|
3. to dread
|
||||||
|
4. to be unable to endure
|
||||||
|
5. perhaps" pà 怕 怕
|
||||||
|
"1. trouble; inconvenience
|
||||||
|
2. inconvenient; troublesome; annoying
|
||||||
|
3. to bother sb; to put sb to trouble" má fan 麻烦 麻煩
|
||||||
|
"1. to pay
|
||||||
|
2. to hand over to
|
||||||
|
3. classifier for pairs or sets of things" fù 付 付
|
||||||
|
"1. method; way; technique; procedure
|
||||||
|
2. CL:個|个[ge4]" fāng fǎ 方法 方法
|
||||||
|
"1. battery; electric cell
|
||||||
|
2. CL:節|节[jie2],組|组[zu3]" diàn chí 电池 電池
|
||||||
|
1. battery charger chōng diàn qì 充电器 充電器
|
||||||
|
1. express delivery kuài dì 快递 快遞
|
||||||
|
1. ought to; should; must yīng gāi 应该 應該
|
||||||
|
"1. wristwatch
|
||||||
|
2. CL:塊|块[kuai4],隻|只[zhi1],個|个[ge4]" shǒu biǎo 手表 手錶
|
||||||
|
"1. to entrust; to place in sb's care
|
||||||
|
2. (bound form) to depend on; to attach oneself to; to reside temporarily
|
||||||
|
3. (bound form) foster (as in 寄女[ji4 nu : 3] foster daughter)
|
||||||
|
4. to send by post; to mail" jì 寄 寄
|
||||||
|
1. (of a period of time) long jiǔ 久 久
|
||||||
|
1. last year qù nián 去年 去年
|
||||||
|
1. client; customer kè hù 客户 客戶
|
||||||
|
"1. to serve
|
||||||
|
2. service
|
||||||
|
3. CL:項|项[xiang4]" fú wù 服务 服務
|
||||||
|
"1. to help
|
||||||
|
2. to assist
|
||||||
|
3. to support
|
||||||
|
4. for sb (i.e. as a help)
|
||||||
|
5. hired (as worker)
|
||||||
|
6. side (of pail, boat etc)
|
||||||
|
7. outer layer
|
||||||
|
8. upper (of a shoe)
|
||||||
|
9. group
|
||||||
|
10. gang
|
||||||
|
11. clique
|
||||||
|
12. party
|
||||||
|
13. secret society" bāng 帮 幫
|
||||||
|
"1. platform
|
||||||
|
2. stage
|
||||||
|
3. terrace
|
||||||
|
4. stand
|
||||||
|
5. support
|
||||||
|
6. station
|
||||||
|
7. broadcasting station
|
||||||
|
8. classifier for vehicles or machines" tái 台 臺
|
||||||
|
"1. air conditioning
|
||||||
|
2. air conditioner (including units that have a heating mode)
|
||||||
|
3. CL:臺|台[tai2]" kōng tiáo 空调 空調
|
||||||
|
"1. recently
|
||||||
|
2. soon
|
||||||
|
3. nearest" zuì jìn 最近 最近
|
||||||
|
"1. strange
|
||||||
|
2. odd
|
||||||
|
3. to marvel
|
||||||
|
4. to be baffled" qí guài 奇怪 奇怪
|
||||||
|
"1. to decorate
|
||||||
|
2. to embellish
|
||||||
|
3. to repair
|
||||||
|
4. to build
|
||||||
|
5. to write
|
||||||
|
6. to cultivate
|
||||||
|
7. to study
|
||||||
|
8. to take (a class)" xiū 修 修
|
||||||
|
"1. address
|
||||||
|
2. CL:個|个[ge4]" dì zhǐ 地址 地址
|
||||||
|
"1. preparation
|
||||||
|
2. to prepare
|
||||||
|
3. to intend
|
||||||
|
4. to be about to
|
||||||
|
5. reserve (fund)" zhǔn bèi 准备 準備
|
||||||
|
"1. way of handling sth; means; measure; (practical) solution to a problem
|
||||||
|
2. CL:條|条[tiao2],個|个[ge4]" bàn fǎ 办法 辦法
|
||||||
|
"1. to open
|
||||||
|
2. to show (a ticket)
|
||||||
|
3. to turn on
|
||||||
|
4. to switch on" dǎ kāi 打开 打開
|
||||||
|
"1. to press
|
||||||
|
2. to push
|
||||||
|
3. to leave aside or shelve
|
||||||
|
4. to control
|
||||||
|
5. to restrain
|
||||||
|
6. to keep one's hand on
|
||||||
|
7. to check or refer to
|
||||||
|
8. according to
|
||||||
|
9. in the light of
|
||||||
|
10. (of an editor or author) to make a comment" àn 按 按
|
||||||
|
1
data/基础汉语40课.txt
Normal file
1
data/基础汉语40课.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
卡 陪 营业厅 附近 墙 贴 广告 排队 大概 营业员 对 一下 护照 让 选 充值 国内 国际 长途 费 下载 软件 发 短信 收 邮件 出来 办 又 习惯 自动取款机 取 能 网站 怕 麻烦 付 方法 电池 充电器 快递 应该 手表 寄 久 去年 客户 服务 帮 台 空调 最近 奇怪 修 地址 准备 办法 打开 按
|
||||||
Reference in New Issue
Block a user