📉 Slim get_document_structure: cap bookmarks to 20 preview lines

Bookmark list was unbounded — a 346-bookmark parts manual produced
~12.5k tokens. Now returns indented bookmark preview (20 lines + count),
folds page_analysis and document_organization into structure_summary.
~406 tokens for the same document.
This commit is contained in:
Ryan Malloy 2026-03-06 21:26:30 -07:00
parent a1aa3f7363
commit 6af3104633
3 changed files with 17 additions and 15 deletions

View File

@ -1,6 +1,6 @@
[project] [project]
name = "mcp-pdf" name = "mcp-pdf"
version = "2.1.3" version = "2.1.4"
description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more" description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more"
authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}] authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}]
readme = "README.md" readme = "README.md"

View File

@ -225,27 +225,29 @@ class DocumentAnalysisMixin(MCPMixin):
doc.close() doc.close()
# Cap bookmark preview to avoid flooding MCP context
max_bookmark_preview = 20
bookmark_preview = [
b["indent"] for b in bookmarks[:max_bookmark_preview]
]
if len(bookmarks) > max_bookmark_preview:
bookmark_preview.append(
f"... and {len(bookmarks) - max_bookmark_preview} more bookmarks"
)
return { return {
"success": True, "success": True,
"structure_summary": { "structure_summary": {
"total_pages": total_pages, "total_pages": total_pages,
"has_bookmarks": has_bookmarks, "has_bookmarks": has_bookmarks,
"bookmark_count": len(bookmarks), "bookmark_count": len(bookmarks),
"has_uniform_page_sizes": has_uniform_pages, "bookmark_hierarchy_depth": max(b["level"] for b in bookmarks) if bookmarks else 0,
"unique_page_sizes": len(unique_page_sizes),
"has_forms": has_forms
},
"bookmarks": bookmarks,
"page_analysis": {
"total_pages": total_pages,
"unique_page_sizes": list(unique_page_sizes),
"pages": page_analysis[:10] # Limit to first 10 pages for context
},
"document_organization": {
"bookmark_hierarchy_depth": max([b["level"] for b in bookmarks]) if bookmarks else 0,
"estimated_sections": len([b for b in bookmarks if b["level"] <= 2]), "estimated_sections": len([b for b in bookmarks if b["level"] <= 2]),
"page_size_consistency": has_uniform_pages "has_uniform_page_sizes": has_uniform_pages,
"unique_page_sizes": list(unique_page_sizes),
"has_forms": has_forms,
}, },
"bookmark_preview": bookmark_preview,
"file_info": { "file_info": {
"path": str(path) "path": str(path)
}, },

2
uv.lock generated
View File

@ -1032,7 +1032,7 @@ wheels = [
[[package]] [[package]]
name = "mcp-pdf" name = "mcp-pdf"
version = "2.1.2" version = "2.1.3"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "camelot-py", extra = ["cv"] }, { name = "camelot-py", extra = ["cv"] },