mcaxl/src/mcp_cucm_axl/docs_loader.py
Ryan Malloy 8b3da9d729 Initial mcp-cucm-axl
Read-only MCP server for Cisco Unified CM 15 AXL — built for LLM-driven
cluster auditing, with a particular focus on the Route Plan Report:
partitions, calling search spaces, route patterns, translation patterns,
called/calling party transformations, and digit-discard instructions.

Pairs intentionally with the sibling mcp-cisco-docs server (live
cluster state + vendor docs in one LLM context).

Architecture:
  - zeep SOAP client to CUCM AXL
  - WSDL bootstrap from Cisco's axlsqltoolkit.zip (auto-extract on
    first launch; zip is gitignored, vendor-licensed)
  - SQLite response cache at ~/.cache/mcp-cucm-axl/responses/
  - Schema-grounded prompts that pull chunks from the sibling
    cisco-docs index (docs_loader.py)

Read-only by structural guarantee — never registers AXL write methods
(no executeSQLUpdate, no add*/update*/remove*/apply*/reset*/restart*
tools). SQL queries also client-side validated (sql_validator.py) to
begin with SELECT or WITH.

Tools exposed:
  Foundational: axl_version, axl_sql, axl_list_tables,
                axl_describe_table, cache_stats, cache_clear
  Route plan:   route_partitions, route_calling_search_spaces,
                route_patterns, route_inspect_pattern,
                route_lists_and_groups, route_translation_chain,
                route_digit_discard_instructions

Prompts (schema-grounded):
  route_plan_overview, investigate_pattern, audit_routing,
  cucm_sql_help

Tests cover cache, docs_loader, normalize, sql_validator, wildcard.
2026-04-25 20:29:18 -06:00

153 lines
5.4 KiB
Python

"""Read the sibling cisco-docs index and surface chunks for prompt enrichment.
We deliberately do NOT load sentence-transformers here (would add ~500MB to
the dep tree). Prompt parameters are well-bounded (topic strings, audit-type
enums, table names), so substring-and-keyword matching against chunk text
and heading_path gets us most of the value.
For free-text semantic queries, the prompt instructs the LLM to invoke the
sibling cisco-docs MCP server's `search_docs` tool — composition over
duplication.
Doc-name weighting: the cisco-docs index for CUCM is dominated by CLI
reference chunks (~475 of 511) where most chunks are command syntax with
no conceptual content. We bias toward conceptual docs (system-config,
feature-config, admin) and penalize cli-reference for topical questions.
The bias only matters for ranking — every doc still gets matched.
"""
from __future__ import annotations
import json
import os
import sys
from pathlib import Path
# Default to the sibling docs index in this monorepo. Override with env var
# if mcp-cucm-axl gets used outside this layout.
_DEFAULT_INDEX_DIR = Path("/home/rpm/bingham/docs/src/assets/.cisco-docs-index")
# Doc-name multipliers — higher = preferred for conceptual prompts.
# Keys match the `doc` field in indexed chunks.
_DOC_WEIGHTS: dict[str, float] = {
"system-config-guide": 3.0,
"feature-config-guide": 2.5,
"admin-guide": 2.0,
"interop-sip-trunking-guide": 1.5,
"security-guide": 1.2,
"recording-use-cases": 1.0,
"rtmt-guide": 0.8,
"cli-reference": 0.3, # mostly command syntax, low conceptual signal
"release-notes": 0.5,
"hardware-compat": 0.2,
"server-os-compat": 0.2,
}
class DocsIndex:
"""In-memory chunk store with keyword filtering. Light, fast, no torch."""
def __init__(self, chunks: list[dict], meta: dict):
self.chunks = chunks
self.meta = meta
@classmethod
def load(cls, index_dir: Path | None = None) -> "DocsIndex | None":
index_dir = index_dir or Path(
os.environ.get("CISCO_DOCS_INDEX_PATH", _DEFAULT_INDEX_DIR)
)
chunks_path = index_dir / "chunks.jsonl"
meta_path = index_dir / "index_meta.json"
if not chunks_path.exists() or not meta_path.exists():
print(
f"[mcp-cucm-axl] cisco-docs index not found at {index_dir}; "
f"prompts will run without schema enrichment.",
file=sys.stderr,
flush=True,
)
return None
meta = json.loads(meta_path.read_text())
chunks = [
json.loads(line)
for line in chunks_path.read_text(encoding="utf-8").splitlines()
if line.strip()
]
print(
f"[mcp-cucm-axl] loaded {len(chunks)} doc chunks from {index_dir}",
file=sys.stderr,
flush=True,
)
return cls(chunks, meta)
def cucm_chunks(self) -> list[dict]:
return [c for c in self.chunks if c.get("product") == "cucm"]
def find(
self,
keywords: list[str],
product: str = "cucm",
max_chunks: int = 6,
max_chars_per_chunk: int = 800,
) -> list[dict]:
"""Score chunks by keyword hits in heading_path + text. Lowercase-insensitive.
Heading hits weight 3x text hits — heading paths are a much better
topical signal than incidental text mentions.
"""
if not keywords:
return []
kws = [k.lower() for k in keywords if k]
scored: list[tuple[float, dict]] = []
for chunk in self.chunks:
if product and chunk.get("product") != product:
continue
heading = " ".join(chunk.get("heading_path") or []).lower()
text = (chunk.get("text") or "").lower()
doc = chunk.get("doc") or ""
doc_lower = doc.lower()
raw = 0
for k in kws:
raw += heading.count(k) * 3
raw += doc_lower.count(k) * 2
raw += text.count(k)
if raw > 0:
weight = _DOC_WEIGHTS.get(doc, 1.0)
scored.append((raw * weight, chunk))
scored.sort(key=lambda t: t[0], reverse=True)
out = []
for score, chunk in scored[:max_chunks]:
text = chunk.get("text", "")
if len(text) > max_chars_per_chunk:
text = text[:max_chars_per_chunk] + ""
out.append({
"score": round(score, 1),
"heading_path": chunk.get("heading_path"),
"doc": chunk.get("doc"),
"version": chunk.get("version"),
"source_path": chunk.get("source_path"),
"text": text,
"chunk_id": chunk.get("id"),
})
return out
def format_chunks_for_prompt(self, chunks: list[dict]) -> str:
"""Render chunks as a markdown reference block for embedding in prompt seeds."""
if not chunks:
return "_No matching schema documentation found in the local index._"
lines = []
for c in chunks:
heading = " > ".join(c.get("heading_path") or []) or "(no heading)"
doc = c.get("doc", "")
version = c.get("version", "")
lines.append(f"### {heading} \n_source: {doc} ({version}) — score {c['score']}_")
lines.append("")
lines.append(c["text"])
lines.append("")
return "\n".join(lines)