📚 docs(codestable): 添加 CodeStable 项目知识库
添加 codestable/ 目录,包含项目架构文档、需求文档、 功能设计、问题追踪、参考文档、路线图及工具配置 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
314
codestable/tools/search-yaml.py
Normal file
314
codestable/tools/search-yaml.py
Normal file
@@ -0,0 +1,314 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
search-yaml.py — Generic YAML-frontmatter search tool for markdown document directories.
|
||||
|
||||
Works on any directory of .md files that use YAML frontmatter (--- ... ---).
|
||||
Designed for AI agent use: fast, structured output, no required external dependencies.
|
||||
|
||||
Filter syntax (--filter flag, repeatable, AND logic):
|
||||
key=value Exact match on a scalar field (case-insensitive)
|
||||
key~=value Substring match on a string field, or element-in for list fields
|
||||
|
||||
Usage examples:
|
||||
# Search codestable/compound (learning / trick / decision / explore docs share this dir)
|
||||
python codestable/tools/search-yaml.py --dir codestable/compound --filter doc_type=learning --filter track=pitfall
|
||||
python codestable/tools/search-yaml.py --dir codestable/compound --filter doc_type=trick --filter tags~=prisma
|
||||
python codestable/tools/search-yaml.py --dir codestable/compound --filter doc_type=decision --filter status=active --full
|
||||
|
||||
# Full-text search in body + frontmatter values
|
||||
python codestable/tools/search-yaml.py --dir codestable/compound --query "shadow database"
|
||||
|
||||
# JSON output for AI agent consumption
|
||||
python codestable/tools/search-yaml.py --dir codestable/compound --filter doc_type=learning --filter track=knowledge --json
|
||||
|
||||
# Sort by a frontmatter date field (works on any ISO-8601 date string, YAML date, or sortable value)
|
||||
python codestable/tools/search-yaml.py --dir codestable/library-docs --sort-by last_reviewed --order asc # oldest first (stalest)
|
||||
python codestable/tools/search-yaml.py --dir codestable/compound --sort-by date --order desc # newest first
|
||||
|
||||
# Works on any yaml-frontmatter markdown directory
|
||||
python codestable/tools/search-yaml.py --dir docs/decisions --filter status=accepted
|
||||
python codestable/tools/search-yaml.py --dir content/posts --filter tags~=python --query "asyncio"
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Frontmatter parsing (PyYAML used when available, builtin fallback otherwise)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _parse_yaml_scalar(val: str):
|
||||
val = val.strip()
|
||||
if val.startswith("[") and val.endswith("]"):
|
||||
inner = val[1:-1]
|
||||
return [item.strip().strip("'\"") for item in inner.split(",") if item.strip()]
|
||||
lower = val.lower()
|
||||
if lower in ("true", "yes"):
|
||||
return True
|
||||
if lower in ("false", "no"):
|
||||
return False
|
||||
if lower in ("null", "~", ""):
|
||||
return None
|
||||
return val
|
||||
|
||||
|
||||
def parse_frontmatter(text: str) -> tuple[dict, str]:
|
||||
"""
|
||||
Split a markdown document into (frontmatter_dict, body_text).
|
||||
Returns ({}, full_text) when no frontmatter is present.
|
||||
"""
|
||||
if not text.startswith("---"):
|
||||
return {}, text
|
||||
|
||||
end = text.find("\n---", 3)
|
||||
if end == -1:
|
||||
return {}, text
|
||||
|
||||
fm_text = text[3:end].strip()
|
||||
body = text[end + 4:].strip()
|
||||
|
||||
try:
|
||||
import yaml # type: ignore
|
||||
meta = yaml.safe_load(fm_text)
|
||||
return (meta or {}), body
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Minimal fallback: handles scalar values and inline lists
|
||||
meta: dict = {}
|
||||
for line in fm_text.splitlines():
|
||||
if not line.strip() or line.startswith("#") or ":" not in line:
|
||||
continue
|
||||
key, _, raw = line.partition(":")
|
||||
meta[key.strip()] = _parse_yaml_scalar(raw)
|
||||
|
||||
return meta, body
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Document loading
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def load_documents(directory: Path) -> list[dict]:
|
||||
docs = []
|
||||
for md_file in sorted(directory.rglob("*.md")):
|
||||
try:
|
||||
text = md_file.read_text(encoding="utf-8")
|
||||
except OSError as exc:
|
||||
print(f"[warn] Cannot read {md_file.name}: {exc}", file=sys.stderr)
|
||||
continue
|
||||
meta, body = parse_frontmatter(text)
|
||||
docs.append({
|
||||
"file": str(md_file.relative_to(directory)),
|
||||
"path": str(md_file),
|
||||
"meta": meta,
|
||||
"body": body,
|
||||
})
|
||||
return docs
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Filter parsing and evaluation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class Filter:
|
||||
"""Parsed representation of a single --filter expression."""
|
||||
|
||||
def __init__(self, raw: str):
|
||||
if "~=" in raw:
|
||||
key, _, value = raw.partition("~=")
|
||||
self.key = key.strip()
|
||||
self.value = value.strip()
|
||||
self.operator = "contains"
|
||||
elif "=" in raw:
|
||||
key, _, value = raw.partition("=")
|
||||
self.key = key.strip()
|
||||
self.value = value.strip()
|
||||
self.operator = "exact"
|
||||
else:
|
||||
raise argparse.ArgumentTypeError(
|
||||
f"Invalid filter expression {raw!r}. "
|
||||
"Use 'key=value' for exact match or 'key~=value' for substring/list-contains match."
|
||||
)
|
||||
|
||||
def matches(self, meta: dict) -> bool:
|
||||
field_val = meta.get(self.key)
|
||||
if field_val is None:
|
||||
return False
|
||||
|
||||
if self.operator == "exact":
|
||||
return str(field_val).lower() == self.value.lower()
|
||||
|
||||
# contains: substring for strings, element-in for lists
|
||||
if isinstance(field_val, list):
|
||||
return any(self.value.lower() == str(item).lower() for item in field_val)
|
||||
return self.value.lower() in str(field_val).lower()
|
||||
|
||||
def __repr__(self):
|
||||
op = "~=" if self.operator == "contains" else "="
|
||||
return f"Filter({self.key}{op}{self.value})"
|
||||
|
||||
|
||||
def parse_filter(raw: str) -> Filter:
|
||||
"""argparse type converter for --filter."""
|
||||
return Filter(raw)
|
||||
|
||||
|
||||
_MISSING = object()
|
||||
|
||||
|
||||
def _sort_key(doc: dict, field: str):
|
||||
"""
|
||||
Sort key for --sort-by. Docs missing the field sort to the end regardless
|
||||
of --order. Dates (datetime.date / datetime.datetime) and strings are both
|
||||
normalized to their string form — ISO 8601 date strings sort the same
|
||||
lexicographically as YAML-parsed date objects' isoformat().
|
||||
"""
|
||||
val = doc["meta"].get(field, _MISSING)
|
||||
if val is _MISSING or val is None:
|
||||
return (1, "")
|
||||
try:
|
||||
return (0, val.isoformat()) # datetime.date / datetime.datetime
|
||||
except AttributeError:
|
||||
return (0, str(val))
|
||||
|
||||
|
||||
def doc_matches(doc: dict, filters: list[Filter], query: str | None) -> bool:
|
||||
meta = doc["meta"]
|
||||
|
||||
for f in filters:
|
||||
if not f.matches(meta):
|
||||
return False
|
||||
|
||||
if query:
|
||||
needle = query.lower()
|
||||
haystack = doc["body"].lower() + " " + " ".join(str(v) for v in meta.values()).lower()
|
||||
if needle not in haystack:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Output formatting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _meta_summary(meta: dict) -> str:
|
||||
"""One-line summary of frontmatter fields, skipping slug/date for brevity."""
|
||||
skip = {"slug"}
|
||||
parts = []
|
||||
for k, v in meta.items():
|
||||
if k in skip:
|
||||
continue
|
||||
if isinstance(v, list):
|
||||
parts.append(f"{k}=[{', '.join(str(i) for i in v)}]")
|
||||
else:
|
||||
parts.append(f"{k}={v}")
|
||||
return " ".join(parts)
|
||||
|
||||
|
||||
def format_summary(doc: dict) -> str:
|
||||
return f"### {doc['file']}\n{_meta_summary(doc['meta'])}"
|
||||
|
||||
|
||||
def format_full(doc: dict) -> str:
|
||||
return format_summary(doc) + "\n\n" + doc["body"]
|
||||
|
||||
|
||||
def print_text(results: list[dict], full: bool) -> None:
|
||||
print(f"Found {len(results)} document(s).\n")
|
||||
sep = "\n" + "─" * 60 + "\n"
|
||||
chunks = [format_full(d) if full else format_summary(d) for d in results]
|
||||
print(sep.join(chunks))
|
||||
|
||||
|
||||
def print_json(results: list[dict], full: bool) -> None:
|
||||
output = []
|
||||
for doc in results:
|
||||
body = doc["body"]
|
||||
if not full and len(body) > 400:
|
||||
body = body[:400] + "…"
|
||||
output.append({"file": doc["file"], "meta": doc["meta"], "body": body})
|
||||
print(json.dumps(output, ensure_ascii=False, indent=2))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generic YAML-frontmatter search across a directory of markdown files.",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=__doc__,
|
||||
)
|
||||
parser.add_argument("--dir", metavar="DIR", required=True,
|
||||
help="Directory of .md files to search.")
|
||||
parser.add_argument("--filter", "-f", metavar="EXPR", dest="filters",
|
||||
type=parse_filter, action="append", default=[],
|
||||
help="Frontmatter filter expression. Repeatable (AND logic). "
|
||||
"key=value for exact match; key~=value for substring (strings) or element-in (lists).")
|
||||
parser.add_argument("--query", "-q", metavar="TEXT",
|
||||
help="Full-text search in document body and frontmatter values.")
|
||||
parser.add_argument("--full", action="store_true",
|
||||
help="Print full document body instead of just the frontmatter summary.")
|
||||
parser.add_argument("--json", dest="as_json", action="store_true",
|
||||
help="Output results as a JSON array.")
|
||||
parser.add_argument("--sort-by", metavar="FIELD", dest="sort_by",
|
||||
help="Sort results by a frontmatter field (e.g. last_reviewed, date, updated_at). "
|
||||
"ISO-8601 date strings and YAML-parsed dates both sort correctly. "
|
||||
"Docs missing the field are pushed to the end.")
|
||||
parser.add_argument("--order", choices=("asc", "desc"), default="desc",
|
||||
help="Sort order when --sort-by is set. Default: desc (newest first).")
|
||||
return parser
|
||||
|
||||
|
||||
def _resolve_directory(dir_arg: str) -> Path:
|
||||
directory = Path(dir_arg)
|
||||
if not directory.exists():
|
||||
print(f"[error] Directory not found: {directory}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
if not directory.is_dir():
|
||||
print(f"[error] Not a directory: {directory}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
return directory
|
||||
|
||||
|
||||
def _sort_results(results: list[dict], sort_by: str, order: str) -> list[dict]:
|
||||
def has_field(d: dict) -> bool:
|
||||
return sort_by in d["meta"] and d["meta"][sort_by] is not None
|
||||
|
||||
present = [d for d in results if has_field(d)]
|
||||
missing = [d for d in results if not has_field(d)]
|
||||
present.sort(key=lambda d: _sort_key(d, sort_by), reverse=(order == "desc"))
|
||||
return present + missing
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = _build_parser().parse_args()
|
||||
directory = _resolve_directory(args.dir)
|
||||
|
||||
docs = load_documents(directory)
|
||||
if not docs:
|
||||
print(f"No .md files found in {directory}")
|
||||
return
|
||||
|
||||
results = [d for d in docs if doc_matches(d, args.filters, args.query)]
|
||||
if not results:
|
||||
print("No matching documents found.")
|
||||
return
|
||||
|
||||
if args.sort_by:
|
||||
results = _sort_results(results, args.sort_by, args.order)
|
||||
|
||||
if args.as_json:
|
||||
print_json(results, full=args.full)
|
||||
else:
|
||||
print_text(results, full=args.full)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user