📉 Slim get_document_structure: cap bookmarks to 20 preview lines
Bookmark list was unbounded — a 346-bookmark parts manual produced ~12.5k tokens. Now returns indented bookmark preview (20 lines + count), folds page_analysis and document_organization into structure_summary. ~406 tokens for the same document.
This commit is contained in:
parent
a1aa3f7363
commit
6af3104633
@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "mcp-pdf"
|
name = "mcp-pdf"
|
||||||
version = "2.1.3"
|
version = "2.1.4"
|
||||||
description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more"
|
description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more"
|
||||||
authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}]
|
authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|||||||
@ -225,27 +225,29 @@ class DocumentAnalysisMixin(MCPMixin):
|
|||||||
|
|
||||||
doc.close()
|
doc.close()
|
||||||
|
|
||||||
|
# Cap bookmark preview to avoid flooding MCP context
|
||||||
|
max_bookmark_preview = 20
|
||||||
|
bookmark_preview = [
|
||||||
|
b["indent"] for b in bookmarks[:max_bookmark_preview]
|
||||||
|
]
|
||||||
|
if len(bookmarks) > max_bookmark_preview:
|
||||||
|
bookmark_preview.append(
|
||||||
|
f"... and {len(bookmarks) - max_bookmark_preview} more bookmarks"
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"success": True,
|
"success": True,
|
||||||
"structure_summary": {
|
"structure_summary": {
|
||||||
"total_pages": total_pages,
|
"total_pages": total_pages,
|
||||||
"has_bookmarks": has_bookmarks,
|
"has_bookmarks": has_bookmarks,
|
||||||
"bookmark_count": len(bookmarks),
|
"bookmark_count": len(bookmarks),
|
||||||
"has_uniform_page_sizes": has_uniform_pages,
|
"bookmark_hierarchy_depth": max(b["level"] for b in bookmarks) if bookmarks else 0,
|
||||||
"unique_page_sizes": len(unique_page_sizes),
|
|
||||||
"has_forms": has_forms
|
|
||||||
},
|
|
||||||
"bookmarks": bookmarks,
|
|
||||||
"page_analysis": {
|
|
||||||
"total_pages": total_pages,
|
|
||||||
"unique_page_sizes": list(unique_page_sizes),
|
|
||||||
"pages": page_analysis[:10] # Limit to first 10 pages for context
|
|
||||||
},
|
|
||||||
"document_organization": {
|
|
||||||
"bookmark_hierarchy_depth": max([b["level"] for b in bookmarks]) if bookmarks else 0,
|
|
||||||
"estimated_sections": len([b for b in bookmarks if b["level"] <= 2]),
|
"estimated_sections": len([b for b in bookmarks if b["level"] <= 2]),
|
||||||
"page_size_consistency": has_uniform_pages
|
"has_uniform_page_sizes": has_uniform_pages,
|
||||||
|
"unique_page_sizes": list(unique_page_sizes),
|
||||||
|
"has_forms": has_forms,
|
||||||
},
|
},
|
||||||
|
"bookmark_preview": bookmark_preview,
|
||||||
"file_info": {
|
"file_info": {
|
||||||
"path": str(path)
|
"path": str(path)
|
||||||
},
|
},
|
||||||
|
|||||||
2
uv.lock
generated
2
uv.lock
generated
@ -1032,7 +1032,7 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mcp-pdf"
|
name = "mcp-pdf"
|
||||||
version = "2.1.2"
|
version = "2.1.3"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "camelot-py", extra = ["cv"] },
|
{ name = "camelot-py", extra = ["cv"] },
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user