2023-12-14 10:09:40 +08:00
|
|
|
from pathlib import Path
|
2023-12-15 09:03:08 +08:00
|
|
|
from typing import Union, Callable
|
2023-12-15 06:51:32 +08:00
|
|
|
from pprint import pprint
|
2023-12-14 10:09:40 +08:00
|
|
|
|
|
|
|
import mistune
|
|
|
|
|
2023-12-14 10:13:51 +08:00
|
|
|
from qtoml.encoder import dumps
|
|
|
|
from qtoml.decoder import loads
|
|
|
|
from lib_not_dr import loggers
|
|
|
|
from lib_not_dr.types.options import Options
|
2023-12-14 10:09:40 +08:00
|
|
|
|
|
|
|
|
2023-12-15 09:03:08 +08:00
|
|
|
ast_markdown = mistune.create_markdown(renderer="ast")
|
2023-12-15 07:51:23 +08:00
|
|
|
ast_type = list[dict[str, Union[str, dict]]]
|
2023-12-14 10:09:40 +08:00
|
|
|
|
|
|
|
|
2023-12-15 09:03:08 +08:00
|
|
|
def get_all_iter(
|
|
|
|
ast: ast_type, type: str, func: Callable[[ast_type], bool] = None
|
|
|
|
) -> tuple[int, dict]:
|
|
|
|
for i, node in enumerate(ast):
|
|
|
|
if node["type"] == type:
|
|
|
|
if func is not None:
|
|
|
|
if not func(node):
|
|
|
|
continue
|
|
|
|
yield i, node
|
2023-12-15 09:58:39 +08:00
|
|
|
yield -1, {}
|
2023-12-15 09:03:08 +08:00
|
|
|
|
|
|
|
|
|
|
|
def get_text(ast: ast_type) -> str:
|
|
|
|
"""
|
|
|
|
返回第一个找到的字符串
|
|
|
|
"""
|
|
|
|
if ast['type'] == 'text':
|
|
|
|
return ast['raw']
|
2023-12-15 09:58:39 +08:00
|
|
|
elif ast['type'] in ('link', 'block_text', 'list_item', 'list'):
|
|
|
|
return get_text(ast['children'][0])
|
|
|
|
print('unkown type', ast['type'], ast)
|
2023-12-15 09:03:08 +08:00
|
|
|
|
|
|
|
|
2023-12-14 10:13:51 +08:00
|
|
|
class TagParser(Options):
|
|
|
|
module_root: Path
|
|
|
|
tags: dict[str, list[str]] = {}
|
2023-12-15 09:03:08 +08:00
|
|
|
tag_map: dict[str, list[str]] = {}
|
2023-12-14 10:13:51 +08:00
|
|
|
logger: loggers.logger.Logger = None # noqa
|
2023-12-15 09:03:08 +08:00
|
|
|
|
2023-12-14 10:13:51 +08:00
|
|
|
def load_module(self, **kwargs):
|
2023-12-15 09:03:08 +08:00
|
|
|
for readme in self.module_root.rglob("readme.md"):
|
2023-12-14 10:13:51 +08:00
|
|
|
self.logger.debug(readme.absolute(), tag="load file")
|
|
|
|
self.get_module_data(readme)
|
2023-12-15 09:03:08 +08:00
|
|
|
|
2023-12-14 10:13:51 +08:00
|
|
|
tag_toml = dumps(self.tags)
|
|
|
|
tag_path = self.module_root / ".." / "build" / "tags.toml"
|
2023-12-14 18:23:27 +08:00
|
|
|
tag_path.parent.mkdir(parents=True, exist_ok=True)
|
2023-12-14 10:13:51 +08:00
|
|
|
tag_path.touch(exist_ok=True)
|
2023-12-15 09:03:08 +08:00
|
|
|
with open(tag_path, "w", encoding="utf-8") as file:
|
2023-12-14 10:13:51 +08:00
|
|
|
file.write(tag_toml)
|
2023-12-15 09:03:08 +08:00
|
|
|
|
2023-12-14 10:13:51 +08:00
|
|
|
def get_module_data(self, module_path: Path):
|
2023-12-15 09:03:08 +08:00
|
|
|
with open(module_path, "r", encoding="utf-8") as f:
|
2023-12-14 10:13:51 +08:00
|
|
|
file = f.read()
|
|
|
|
|
|
|
|
ast = ast_markdown(file)
|
|
|
|
if len(ast) == 0:
|
|
|
|
return
|
|
|
|
|
2023-12-15 09:03:08 +08:00
|
|
|
if ast[0] != {"type": "thematic_break"}:
|
2023-12-14 10:13:51 +08:00
|
|
|
# 排除开头不是注释块的
|
|
|
|
return
|
2023-12-15 09:03:08 +08:00
|
|
|
|
2023-12-14 10:13:51 +08:00
|
|
|
self.logger.info(f"开始解析 {ast[1]}")
|
|
|
|
config_code = ast[1].get("raw", "")
|
|
|
|
config_dict = loads(config_code)
|
|
|
|
self.logger.trace(config_dict)
|
2023-12-15 09:03:08 +08:00
|
|
|
|
|
|
|
if not (tag_list := config_dict.get("tags")):
|
2023-12-14 10:13:51 +08:00
|
|
|
self.logger.warn("未找到 tags", tag=str(module_path))
|
|
|
|
return
|
2023-12-15 09:03:08 +08:00
|
|
|
|
2023-12-14 10:13:51 +08:00
|
|
|
for tag in tag_list:
|
|
|
|
if tag not in self.tags:
|
|
|
|
self.tags[tag] = [module_path.__str__()]
|
|
|
|
else:
|
|
|
|
self.tags[tag].append(module_path.__str__())
|
2023-12-15 09:03:08 +08:00
|
|
|
|
2023-12-14 10:13:51 +08:00
|
|
|
def init(self, **kwargs) -> bool:
|
|
|
|
self.logger = loggers.get_logger()
|
|
|
|
self.logger.global_level = 0
|
2023-12-15 06:51:32 +08:00
|
|
|
self.load_tags()
|
|
|
|
return False
|
|
|
|
|
|
|
|
def load_tags(self):
|
|
|
|
tag_list_path = Path("./tags/readme.md")
|
|
|
|
if not tag_list_path.exists():
|
|
|
|
self.logger.error("未找到 tags/readme.md")
|
|
|
|
return False
|
|
|
|
|
2023-12-15 09:03:08 +08:00
|
|
|
with open(tag_list_path, "r", encoding="utf-8") as f:
|
2023-12-15 06:51:32 +08:00
|
|
|
file = f.read()
|
|
|
|
|
|
|
|
tag_ast: ast_type = ast_markdown(file)
|
|
|
|
start_tag = -1
|
2023-12-15 09:03:08 +08:00
|
|
|
# 找到二级标题
|
|
|
|
start_tag = get_all_iter(
|
|
|
|
tag_ast, "heading", lambda node: node["attrs"]["level"] == 2
|
|
|
|
)
|
|
|
|
start_tag, node = next(start_tag)
|
2023-12-15 06:51:32 +08:00
|
|
|
if start_tag == -1:
|
|
|
|
self.logger.error("未找到二级标题")
|
|
|
|
return False
|
|
|
|
# 获取 tag
|
2023-12-15 09:03:08 +08:00
|
|
|
tag_ast: ast_type = tag_ast[start_tag + 1 :]
|
|
|
|
start_tag = get_all_iter(tag_ast, "list")
|
|
|
|
start_tag, node = next(start_tag)
|
2023-12-15 06:51:32 +08:00
|
|
|
if start_tag == -1:
|
|
|
|
self.logger.error("未找到 tag 列表")
|
|
|
|
return False
|
2023-12-15 09:03:08 +08:00
|
|
|
tag_ast: ast_type = [
|
|
|
|
item["children"]
|
|
|
|
for item in tag_ast[start_tag]["children"]
|
|
|
|
if item.get("type") == "list_item"
|
|
|
|
]
|
2023-12-15 06:51:32 +08:00
|
|
|
# 顺手过滤一下
|
2023-12-15 09:03:08 +08:00
|
|
|
for tag in tag_ast:
|
|
|
|
# self.logger.debug([(item['type'], item['children']) for item in tag])
|
2023-12-15 09:58:39 +08:00
|
|
|
this_tag = get_text(tag[0])
|
2023-12-15 09:03:08 +08:00
|
|
|
if len(tag) == 1:
|
|
|
|
# 只有一个 tag, 无别名
|
|
|
|
self.tag_map[this_tag] = [this_tag]
|
2023-12-15 09:58:39 +08:00
|
|
|
self.logger.debug(f'添加 tag {this_tag}')
|
2023-12-15 09:03:08 +08:00
|
|
|
# len > 1
|
|
|
|
elif len(tag) >= 1:
|
|
|
|
# 寻找 "别名"
|
2023-12-15 09:58:39 +08:00
|
|
|
tag_names = tag[1]['children']
|
|
|
|
tag_list_start = get_all_iter(tag_names, 'list_item', lambda ast: get_text(ast) == '别名')
|
|
|
|
tag_list_start, node = next(tag_list_start)
|
|
|
|
if tag_list_start == -1:
|
|
|
|
continue
|
|
|
|
sub_names = []
|
|
|
|
for sub_name in node['children'][1]['children']:
|
|
|
|
sub_names.append(get_text(sub_name))
|
|
|
|
self.logger.debug(f"添加 tag {this_tag} 和别名 {sub_names}")
|
|
|
|
self.tag_map[this_tag] = sub_names
|
2023-12-15 06:51:32 +08:00
|
|
|
# 获取 tag 列表
|
2023-12-15 09:58:39 +08:00
|
|
|
self.logger.info(f'可用 tag: {self.tag_map}')
|
|
|
|
with open('./tags/tag.toml', 'w', encoding='utf-8') as file:
|
|
|
|
file.write(dumps(self.tag_map))
|
2023-12-14 10:13:51 +08:00
|
|
|
|
|
|
|
|
2023-12-15 09:03:08 +08:00
|
|
|
if __name__ == "__main__":
|
|
|
|
parser = TagParser(module_root=Path("modules"))
|
2023-12-14 10:13:51 +08:00
|
|
|
parser.load_module()
|
|
|
|
parser.logger.info(parser.tags)
|