继续解析

This commit is contained in:
shenjack 2023-12-15 09:58:39 +08:00
parent be9bbcb884
commit a14628d674
Signed by: shenjack
GPG Key ID: 7B1134A979775551
2 changed files with 20 additions and 10 deletions

View File

@ -23,18 +23,18 @@ def get_all_iter(
if not func(node):
continue
yield i, node
return -1, {}
yield -1, {}
def get_text(ast: ast_type) -> str:
"""
返回第一个找到的字符串
"""
ast = ast[0]
if ast['type'] == 'text':
return ast['raw']
elif ast['type'] in ('link', 'block_text'):
return get_text(ast['children'])
elif ast['type'] in ('link', 'block_text', 'list_item', 'list'):
return get_text(ast['children'][0])
print('unkown type', ast['type'], ast)
class TagParser(Options):
@ -120,21 +120,30 @@ class TagParser(Options):
if item.get("type") == "list_item"
]
# 顺手过滤一下
# pprint(tag_ast)
# self.logger.info(tag_ast)
for tag in tag_ast:
# self.logger.debug([(item['type'], item['children']) for item in tag])
this_tag = get_text(tag)
this_tag = get_text(tag[0])
if len(tag) == 1:
# 只有一个 tag, 无别名
self.tag_map[this_tag] = [this_tag]
self.logger.info(f'添加 tag {this_tag}')
self.logger.debug(f'添加 tag {this_tag}')
# len > 1
elif len(tag) >= 1:
# 寻找 "别名"
self.logger.debug(tag)
pprint(tag)
tag_names = tag[1]['children']
tag_list_start = get_all_iter(tag_names, 'list_item', lambda ast: get_text(ast) == '别名')
tag_list_start, node = next(tag_list_start)
if tag_list_start == -1:
continue
sub_names = []
for sub_name in node['children'][1]['children']:
sub_names.append(get_text(sub_name))
self.logger.debug(f"添加 tag {this_tag} 和别名 {sub_names}")
self.tag_map[this_tag] = sub_names
# 获取 tag 列表
self.logger.info(f'可用 tag: {self.tag_map}')
with open('./tags/tag.toml', 'w', encoding='utf-8') as file:
file.write(dumps(self.tag_map))
if __name__ == "__main__":

View File

@ -33,3 +33,4 @@
- 只是用来测试的
- 别名
- 测试1
- 测试2