from pathlib import Path from typing import Union, Callable from pprint import pprint import mistune from qtoml.encoder import dumps from qtoml.decoder import loads from lib_not_dr import loggers from lib_not_dr.types.options import Options ast_markdown = mistune.create_markdown(renderer="ast") ast_type = list[dict[str, Union[str, dict]]] def get_all_iter( ast: ast_type, type: str, func: Callable[[ast_type], bool] = None ) -> tuple[int, dict]: for i, node in enumerate(ast): if node["type"] == type: if func is not None: if not func(node): continue yield i, node yield -1, {} def get_text(ast: ast_type) -> str: """ 返回第一个找到的字符串 """ if ast['type'] == 'text': return ast['raw'] elif ast['type'] in ('link', 'block_text', 'list_item', 'list'): return get_text(ast['children'][0]) print('unkown type', ast['type'], ast) class TagParser(Options): module_root: Path tags: dict[str, list[str]] = {} tag_map: dict[str, list[str]] = {} logger: loggers.logger.Logger = None # noqa def load_module(self, **kwargs): for readme in self.module_root.rglob("readme.md"): self.logger.debug(readme.absolute(), tag="load file") self.get_module_data(readme) tag_toml = dumps(self.tags) tag_path = self.module_root / ".." / "build" / "tags.toml" tag_path.parent.mkdir(parents=True, exist_ok=True) tag_path.touch(exist_ok=True) with open(tag_path, "w", encoding="utf-8") as file: file.write(tag_toml) def get_module_data(self, module_path: Path): with open(module_path, "r", encoding="utf-8") as f: file = f.read() ast = ast_markdown(file) if len(ast) == 0: return if ast[0] != {"type": "thematic_break"}: # 排除开头不是注释块的 return self.logger.info(f"开始解析 {ast[1]}") config_code = ast[1].get("raw", "") config_dict = loads(config_code) self.logger.trace(config_dict) if not (tag_list := config_dict.get("tags")): self.logger.warn("未找到 tags", tag=str(module_path)) return for tag in tag_list: if tag not in self.tags: self.tags[tag] = [module_path.__str__()] else: self.tags[tag].append(module_path.__str__()) def init(self, **kwargs) -> bool: self.logger = loggers.get_logger() self.logger.global_level = 0 self.load_tags() return False def load_tags(self): tag_list_path = Path("./tags/readme.md") if not tag_list_path.exists(): self.logger.error("未找到 tags/readme.md") return False with open(tag_list_path, "r", encoding="utf-8") as f: file = f.read() tag_ast: ast_type = ast_markdown(file) start_tag = -1 # 找到二级标题 start_tag = get_all_iter( tag_ast, "heading", lambda node: node["attrs"]["level"] == 2 ) start_tag, node = next(start_tag) if start_tag == -1: self.logger.error("未找到二级标题") return False # 获取 tag tag_ast: ast_type = tag_ast[start_tag + 1 :] start_tag = get_all_iter(tag_ast, "list") start_tag, node = next(start_tag) if start_tag == -1: self.logger.error("未找到 tag 列表") return False tag_ast: ast_type = [ item["children"] for item in tag_ast[start_tag]["children"] if item.get("type") == "list_item" ] # 顺手过滤一下 for tag in tag_ast: # self.logger.debug([(item['type'], item['children']) for item in tag]) this_tag = get_text(tag[0]) if len(tag) == 1: # 只有一个 tag, 无别名 self.tag_map[this_tag] = [this_tag] self.logger.debug(f'添加 tag {this_tag}') # len > 1 elif len(tag) >= 1: # 寻找 "别名" tag_names = tag[1]['children'] tag_list_start = get_all_iter(tag_names, 'list_item', lambda ast: get_text(ast) == '别名') tag_list_start, node = next(tag_list_start) if tag_list_start == -1: continue sub_names = [] for sub_name in node['children'][1]['children']: sub_names.append(get_text(sub_name)) self.logger.debug(f"添加 tag {this_tag} 和别名 {sub_names}") self.tag_map[this_tag] = sub_names # 获取 tag 列表 self.logger.info(f'可用 tag: {self.tag_map}') with open('./tags/tag.toml', 'w', encoding='utf-8') as file: file.write(dumps(self.tag_map)) if __name__ == "__main__": parser = TagParser(module_root=Path("modules")) parser.load_module() parser.logger.info(parser.tags)