📉 Slim get_document_structure: cap bookmarks to 20 preview lines

Bookmark list was unbounded — a 346-bookmark parts manual produced
~12.5k tokens. Now returns indented bookmark preview (20 lines + count),
folds page_analysis and document_organization into structure_summary.
~406 tokens for the same document.
This commit is contained in:
Ryan Malloy 2026-03-06 21:26:30 -07:00
parent a1aa3f7363
commit 6af3104633
3 changed files with 17 additions and 15 deletions

View File

@ -1,6 +1,6 @@
[project]
name = "mcp-pdf"
version = "2.1.3"
version = "2.1.4"
description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more"
authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}]
readme = "README.md"

View File

@ -225,27 +225,29 @@ class DocumentAnalysisMixin(MCPMixin):
doc.close()
# Cap bookmark preview to avoid flooding MCP context
max_bookmark_preview = 20
bookmark_preview = [
b["indent"] for b in bookmarks[:max_bookmark_preview]
]
if len(bookmarks) > max_bookmark_preview:
bookmark_preview.append(
f"... and {len(bookmarks) - max_bookmark_preview} more bookmarks"
)
return {
"success": True,
"structure_summary": {
"total_pages": total_pages,
"has_bookmarks": has_bookmarks,
"bookmark_count": len(bookmarks),
"has_uniform_page_sizes": has_uniform_pages,
"unique_page_sizes": len(unique_page_sizes),
"has_forms": has_forms
},
"bookmarks": bookmarks,
"page_analysis": {
"total_pages": total_pages,
"unique_page_sizes": list(unique_page_sizes),
"pages": page_analysis[:10] # Limit to first 10 pages for context
},
"document_organization": {
"bookmark_hierarchy_depth": max([b["level"] for b in bookmarks]) if bookmarks else 0,
"bookmark_hierarchy_depth": max(b["level"] for b in bookmarks) if bookmarks else 0,
"estimated_sections": len([b for b in bookmarks if b["level"] <= 2]),
"page_size_consistency": has_uniform_pages
"has_uniform_page_sizes": has_uniform_pages,
"unique_page_sizes": list(unique_page_sizes),
"has_forms": has_forms,
},
"bookmark_preview": bookmark_preview,
"file_info": {
"path": str(path)
},

2
uv.lock generated
View File

@ -1032,7 +1032,7 @@ wheels = [
[[package]]
name = "mcp-pdf"
version = "2.1.2"
version = "2.1.3"
source = { editable = "." }
dependencies = [
{ name = "camelot-py", extra = ["cv"] },