📉 Slim detect_structure response to ~224 tokens

Preview capped at 10 sections as human-readable lines, detection_info
moved into the JSON file. Response went from ~22k tokens (inline) to
~1.6k (v2.1.2) to ~224 tokens now.
This commit is contained in:
Ryan Malloy 2026-03-04 17:15:32 -07:00
parent a23fd8467a
commit 81a3619144
2 changed files with 19 additions and 18 deletions

View File

@ -266,28 +266,29 @@ class StructureDetectionMixin(MCPMixin):
encoding="utf-8", encoding="utf-8",
) )
# Build compact summary: top-level sections with subsection counts # Build compact preview: "p1-30: Title (5 subs)" lines
summary_sections = [] max_preview = 10
for sec in sections: preview_lines = []
for sec in sections[:max_preview]:
sub_count = self._count_subsections(sec) sub_count = self._count_subsections(sec)
summary_sections.append({ sub_info = f" ({sub_count} sub)" if sub_count else ""
"title": sec["title"], preview_lines.append(
"level": sec["level"], f"p{sec['page_start']}-{sec['page_end']}: "
"pages": f"{sec['page_start']}-{sec['page_end']}", f"{sec['title'][:60]}{sub_info}"
"confidence": sec["confidence"], )
"method": sec["detection_method"], if len(sections) > max_preview:
"subsections": sub_count, preview_lines.append(
}) f"... and {len(sections) - max_preview} more sections"
)
return { return {
"success": True, "success": True,
"output_file": str(json_path), "output_file": str(json_path),
"summary": { "total_boundaries": len(flat_boundaries),
"total_boundaries": len(flat_boundaries), "top_level_sections": len(sections),
"top_level_sections": len(sections), "strategies_used": strategies_used,
"sections": summary_sections, "total_pages": total_pages,
}, "preview": preview_lines,
"detection_info": detection_info,
"detection_time": elapsed, "detection_time": elapsed,
} }

2
uv.lock generated
View File

@ -1032,7 +1032,7 @@ wheels = [
[[package]] [[package]]
name = "mcp-pdf" name = "mcp-pdf"
version = "2.1.1" version = "2.1.2"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "camelot-py", extra = ["cv"] }, { name = "camelot-py", extra = ["cv"] },