解析中

This commit is contained in:
shenjack 2023-12-15 09:03:08 +08:00
parent d724b9b06f
commit be9bbcb884
Signed by: shenjack
GPG Key ID: 7B1134A979775551
2 changed files with 72 additions and 37 deletions

View File

@ -1,5 +1,5 @@
from pathlib import Path from pathlib import Path
from typing import Union from typing import Union, Callable
from pprint import pprint from pprint import pprint
import mistune import mistune
@ -10,55 +10,78 @@ from lib_not_dr import loggers
from lib_not_dr.types.options import Options from lib_not_dr.types.options import Options
ast_markdown = mistune.create_markdown(renderer='ast') ast_markdown = mistune.create_markdown(renderer="ast")
ast_type = list[dict[str, Union[str, dict]]] ast_type = list[dict[str, Union[str, dict]]]
def get_all_iter(
ast: ast_type, type: str, func: Callable[[ast_type], bool] = None
) -> tuple[int, dict]:
for i, node in enumerate(ast):
if node["type"] == type:
if func is not None:
if not func(node):
continue
yield i, node
return -1, {}
def get_text(ast: ast_type) -> str:
"""
返回第一个找到的字符串
"""
ast = ast[0]
if ast['type'] == 'text':
return ast['raw']
elif ast['type'] in ('link', 'block_text'):
return get_text(ast['children'])
class TagParser(Options): class TagParser(Options):
module_root: Path module_root: Path
tags: dict[str, list[str]] = {} tags: dict[str, list[str]] = {}
tag_map: dict[str, list[str]] = {}
logger: loggers.logger.Logger = None # noqa logger: loggers.logger.Logger = None # noqa
def load_module(self, **kwargs): def load_module(self, **kwargs):
for readme in self.module_root.rglob('readme.md'): for readme in self.module_root.rglob("readme.md"):
self.logger.debug(readme.absolute(), tag="load file") self.logger.debug(readme.absolute(), tag="load file")
self.get_module_data(readme) self.get_module_data(readme)
tag_toml = dumps(self.tags) tag_toml = dumps(self.tags)
tag_path = self.module_root / ".." / "build" / "tags.toml" tag_path = self.module_root / ".." / "build" / "tags.toml"
tag_path.parent.mkdir(parents=True, exist_ok=True) tag_path.parent.mkdir(parents=True, exist_ok=True)
tag_path.touch(exist_ok=True) tag_path.touch(exist_ok=True)
with open(tag_path, 'w', encoding='utf-8') as file: with open(tag_path, "w", encoding="utf-8") as file:
file.write(tag_toml) file.write(tag_toml)
def get_module_data(self, module_path: Path): def get_module_data(self, module_path: Path):
with open(module_path, 'r', encoding='utf-8') as f: with open(module_path, "r", encoding="utf-8") as f:
file = f.read() file = f.read()
ast = ast_markdown(file) ast = ast_markdown(file)
if len(ast) == 0: if len(ast) == 0:
return return
if ast[0] != {'type': 'thematic_break'}: if ast[0] != {"type": "thematic_break"}:
# 排除开头不是注释块的 # 排除开头不是注释块的
return return
self.logger.info(f"开始解析 {ast[1]}") self.logger.info(f"开始解析 {ast[1]}")
config_code = ast[1].get("raw", "") config_code = ast[1].get("raw", "")
config_dict = loads(config_code) config_dict = loads(config_code)
self.logger.trace(config_dict) self.logger.trace(config_dict)
if not (tag_list := config_dict.get('tags')): if not (tag_list := config_dict.get("tags")):
self.logger.warn("未找到 tags", tag=str(module_path)) self.logger.warn("未找到 tags", tag=str(module_path))
return return
for tag in tag_list: for tag in tag_list:
if tag not in self.tags: if tag not in self.tags:
self.tags[tag] = [module_path.__str__()] self.tags[tag] = [module_path.__str__()]
else: else:
self.tags[tag].append(module_path.__str__()) self.tags[tag].append(module_path.__str__())
def init(self, **kwargs) -> bool: def init(self, **kwargs) -> bool:
self.logger = loggers.get_logger() self.logger = loggers.get_logger()
self.logger.global_level = 0 self.logger.global_level = 0
@ -71,42 +94,50 @@ class TagParser(Options):
self.logger.error("未找到 tags/readme.md") self.logger.error("未找到 tags/readme.md")
return False return False
with open(tag_list_path, 'r', encoding='utf-8') as f: with open(tag_list_path, "r", encoding="utf-8") as f:
file = f.read() file = f.read()
tag_ast: ast_type = ast_markdown(file) tag_ast: ast_type = ast_markdown(file)
start_tag = -1 start_tag = -1
for i, node in enumerate(tag_ast): # 找到二级标题
if node['type'] != 'heading': start_tag = get_all_iter(
continue tag_ast, "heading", lambda node: node["attrs"]["level"] == 2
if node['attrs']['level'] != 2: )
continue start_tag, node = next(start_tag)
# 说明是二级标题
start_tag = i
break
if start_tag == -1: if start_tag == -1:
self.logger.error("未找到二级标题") self.logger.error("未找到二级标题")
return False return False
# 获取 tag # 获取 tag
tag_ast: ast_type = tag_ast[start_tag + 1:] tag_ast: ast_type = tag_ast[start_tag + 1 :]
start_tag = get_all_iter(tag_ast, "list")
start_tag = -1 start_tag, node = next(start_tag)
for i, node in enumerate(tag_ast):
if node['type'] != 'list':
continue
start_tag = i
break
if start_tag == -1: if start_tag == -1:
self.logger.error("未找到 tag 列表") self.logger.error("未找到 tag 列表")
return False return False
tag_ast: ast_type = [item['children'] for item in tag_ast[start_tag]['children'] if item.get('type') == 'list_item'] tag_ast: ast_type = [
item["children"]
for item in tag_ast[start_tag]["children"]
if item.get("type") == "list_item"
]
# 顺手过滤一下 # 顺手过滤一下
pprint(tag_ast) # pprint(tag_ast)
self.logger.info(tag_ast) # self.logger.info(tag_ast)
for tag in tag_ast:
# self.logger.debug([(item['type'], item['children']) for item in tag])
this_tag = get_text(tag)
if len(tag) == 1:
# 只有一个 tag, 无别名
self.tag_map[this_tag] = [this_tag]
self.logger.info(f'添加 tag {this_tag}')
# len > 1
elif len(tag) >= 1:
# 寻找 "别名"
self.logger.debug(tag)
pprint(tag)
# 获取 tag 列表 # 获取 tag 列表
if __name__ == '__main__': if __name__ == "__main__":
parser = TagParser(module_root = Path("modules")) parser = TagParser(module_root=Path("modules"))
parser.load_module() parser.load_module()
parser.logger.info(parser.tags) parser.logger.info(parser.tags)

View File

@ -29,3 +29,7 @@
- [静音](./tags/silent.md) : 不会发出响声的模块。 - [静音](./tags/silent.md) : 不会发出响声的模块。
- [单片](./tags/single_chip.md) : 1高或1宽的模块。 - [单片](./tags/single_chip.md) : 1高或1宽的模块。
- [传统红石](./tags/traditional_redstone.md) : 仅使用红石方块特性,不使用墙电,水电,树电,脚电等其他特性的模块。 - [传统红石](./tags/traditional_redstone.md) : 仅使用红石方块特性,不使用墙电,水电,树电,脚电等其他特性的模块。
- 测试 tag
- 只是用来测试的
- 别名
- 测试1