From 6af31046332110d8e2ba8c130898c20d0f1fd3e6 Mon Sep 17 00:00:00 2001 From: Ryan Malloy Date: Fri, 6 Mar 2026 21:26:30 -0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=89=20Slim=20get=5Fdocument=5Fstructur?= =?UTF-8?q?e:=20cap=20bookmarks=20to=2020=20preview=20lines?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bookmark list was unbounded — a 346-bookmark parts manual produced ~12.5k tokens. Now returns indented bookmark preview (20 lines + count), folds page_analysis and document_organization into structure_summary. ~406 tokens for the same document. --- pyproject.toml | 2 +- .../mixins_official/document_analysis.py | 28 ++++++++++--------- uv.lock | 2 +- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1b79055..9447138 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mcp-pdf" -version = "2.1.3" +version = "2.1.4" description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more" authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}] readme = "README.md" diff --git a/src/mcp_pdf/mixins_official/document_analysis.py b/src/mcp_pdf/mixins_official/document_analysis.py index e6f9868..b0dbb61 100644 --- a/src/mcp_pdf/mixins_official/document_analysis.py +++ b/src/mcp_pdf/mixins_official/document_analysis.py @@ -225,27 +225,29 @@ class DocumentAnalysisMixin(MCPMixin): doc.close() + # Cap bookmark preview to avoid flooding MCP context + max_bookmark_preview = 20 + bookmark_preview = [ + b["indent"] for b in bookmarks[:max_bookmark_preview] + ] + if len(bookmarks) > max_bookmark_preview: + bookmark_preview.append( + f"... and {len(bookmarks) - max_bookmark_preview} more bookmarks" + ) + return { "success": True, "structure_summary": { "total_pages": total_pages, "has_bookmarks": has_bookmarks, "bookmark_count": len(bookmarks), - "has_uniform_page_sizes": has_uniform_pages, - "unique_page_sizes": len(unique_page_sizes), - "has_forms": has_forms - }, - "bookmarks": bookmarks, - "page_analysis": { - "total_pages": total_pages, - "unique_page_sizes": list(unique_page_sizes), - "pages": page_analysis[:10] # Limit to first 10 pages for context - }, - "document_organization": { - "bookmark_hierarchy_depth": max([b["level"] for b in bookmarks]) if bookmarks else 0, + "bookmark_hierarchy_depth": max(b["level"] for b in bookmarks) if bookmarks else 0, "estimated_sections": len([b for b in bookmarks if b["level"] <= 2]), - "page_size_consistency": has_uniform_pages + "has_uniform_page_sizes": has_uniform_pages, + "unique_page_sizes": list(unique_page_sizes), + "has_forms": has_forms, }, + "bookmark_preview": bookmark_preview, "file_info": { "path": str(path) }, diff --git a/uv.lock b/uv.lock index 494c54d..4e53dcb 100644 --- a/uv.lock +++ b/uv.lock @@ -1032,7 +1032,7 @@ wheels = [ [[package]] name = "mcp-pdf" -version = "2.1.2" +version = "2.1.3" source = { editable = "." } dependencies = [ { name = "camelot-py", extra = ["cv"] },