From b53d8ab9985d8b5b9ed5a8b18b8c7f2d5b32e83d Mon Sep 17 00:00:00 2001 From: Ryan Malloy Date: Tue, 7 Apr 2026 04:19:20 -0600 Subject: [PATCH] Fix document-closed errors in 7 tools, fix stamp font name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Capture total_pages before doc.close() in content_analysis, security_analysis, annotations, and misc_tools mixins - Fix invalid PyMuPDF font name "helv-bold" → "helv" in add_stamps - Bump to v2.1.7 --- pyproject.toml | 2 +- src/mcp_pdf/mixins_official/annotations.py | 5 ++-- .../mixins_official/content_analysis.py | 29 ++++++++++--------- src/mcp_pdf/mixins_official/misc_tools.py | 18 +++++++----- .../mixins_official/security_analysis.py | 7 +++-- uv.lock | 2 +- 6 files changed, 35 insertions(+), 28 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e4cc232..637801c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mcp-pdf" -version = "2.1.6" +version = "2.1.7" description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more" authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}] readme = "README.md" diff --git a/src/mcp_pdf/mixins_official/annotations.py b/src/mcp_pdf/mixins_official/annotations.py index 421e006..695b4d6 100644 --- a/src/mcp_pdf/mixins_official/annotations.py +++ b/src/mcp_pdf/mixins_official/annotations.py @@ -402,7 +402,7 @@ class AnnotationsMixin(MCPMixin): stamp_type.upper(), fontsize=12, color=(1, 1, 1), # White text - fontname="helv-bold" + fontname="helv" ) stamps_added += 1 @@ -470,6 +470,7 @@ class AnnotationsMixin(MCPMixin): # Validate path input_pdf_path = await validate_pdf_path(pdf_path) doc = fitz.open(str(input_pdf_path)) + total_pages = len(doc) all_annotations = [] annotation_stats = { @@ -563,7 +564,7 @@ class AnnotationsMixin(MCPMixin): "annotations": formatted_data, "file_info": { "path": str(input_pdf_path), - "total_pages": len(doc) if 'doc' in locals() else 0 + "total_pages": total_pages if 'total_pages' in locals() else 0 }, "extraction_time": round(time.time() - start_time, 2) } diff --git a/src/mcp_pdf/mixins_official/content_analysis.py b/src/mcp_pdf/mixins_official/content_analysis.py index 74cd894..659391f 100644 --- a/src/mcp_pdf/mixins_official/content_analysis.py +++ b/src/mcp_pdf/mixins_official/content_analysis.py @@ -51,9 +51,10 @@ class ContentAnalysisMixin(MCPMixin): try: path = await validate_pdf_path(pdf_path) doc = fitz.open(str(path)) + total_pages = len(doc) # Extract text from sample pages for analysis - sample_size = min(10, len(doc)) + sample_size = min(10, total_pages) full_text = "" total_words = 0 total_sentences = 0 @@ -132,8 +133,8 @@ class ContentAnalysisMixin(MCPMixin): total_links = sum(len(doc[i].get_links()) for i in range(sample_size)) # Estimate for full document - estimated_total_images = int(total_images * len(doc) / sample_size) if sample_size > 0 else 0 - estimated_total_links = int(total_links * len(doc) / sample_size) if sample_size > 0 else 0 + estimated_total_images = int(total_images * total_pages / sample_size) if sample_size > 0 else 0 + estimated_total_links = int(total_links * total_pages / sample_size) if sample_size > 0 else 0 doc.close() @@ -145,8 +146,8 @@ class ContentAnalysisMixin(MCPMixin): "secondary_types": sorted(content_scores.items(), key=lambda x: x[1], reverse=True)[1:4] }, "content_analysis": { - "total_pages": len(doc), - "estimated_word_count": int(total_words * len(doc) / sample_size), + "total_pages": total_pages, + "estimated_word_count": int(total_words * total_pages / sample_size), "avg_words_per_page": round(avg_words_per_page, 1), "vocabulary_diversity": round(vocabulary_diversity, 2), "reading_level": reading_level, @@ -211,15 +212,16 @@ class ContentAnalysisMixin(MCPMixin): try: path = await validate_pdf_path(pdf_path) doc = fitz.open(str(path)) + total_pages = len(doc) # Parse pages parameter parsed_pages = parse_pages_parameter(pages) - page_numbers = parsed_pages if parsed_pages else list(range(len(doc))) - page_numbers = [p for p in page_numbers if 0 <= p < len(doc)] + page_numbers = parsed_pages if parsed_pages else list(range(total_pages)) + page_numbers = [p for p in page_numbers if 0 <= p < total_pages] # If parsing failed but pages was specified, use all pages if pages and not page_numbers: - page_numbers = list(range(len(doc))) + page_numbers = list(range(total_pages)) # Extract text from specified pages full_text = "" @@ -313,7 +315,7 @@ class ContentAnalysisMixin(MCPMixin): }, "file_info": { "path": str(path), - "total_pages": len(doc), + "total_pages": total_pages, "pages_processed": pages or "all" }, "analysis_time": round(time.time() - start_time, 2) @@ -354,17 +356,18 @@ class ContentAnalysisMixin(MCPMixin): try: path = await validate_pdf_path(pdf_path) doc = fitz.open(str(path)) + total_pages = len(doc) # Parse pages parameter parsed_pages = parse_pages_parameter(pages) if parsed_pages: - page_numbers = [p for p in parsed_pages if 0 <= p < len(doc)] + page_numbers = [p for p in parsed_pages if 0 <= p < total_pages] else: - page_numbers = list(range(min(5, len(doc)))) # Limit to 5 pages for performance + page_numbers = list(range(min(5, total_pages))) # Limit to 5 pages for performance # If parsing failed but pages was specified, default to first 5 if pages and not page_numbers: - page_numbers = list(range(min(5, len(doc)))) + page_numbers = list(range(min(5, total_pages))) layout_analysis = [] @@ -513,7 +516,7 @@ class ContentAnalysisMixin(MCPMixin): }, "file_info": { "path": str(path), - "total_pages": len(doc) + "total_pages": total_pages }, "analysis_time": round(time.time() - start_time, 2) } diff --git a/src/mcp_pdf/mixins_official/misc_tools.py b/src/mcp_pdf/mixins_official/misc_tools.py index 9ca39b4..c477e66 100644 --- a/src/mcp_pdf/mixins_official/misc_tools.py +++ b/src/mcp_pdf/mixins_official/misc_tools.py @@ -62,15 +62,16 @@ class MiscToolsMixin(MCPMixin): try: path = await validate_pdf_path(pdf_path) doc = fitz.open(str(path)) + total_pages = len(doc) # Parse pages parameter parsed_pages = parse_pages_parameter(pages) - page_numbers = parsed_pages if parsed_pages else list(range(len(doc))) - page_numbers = [p for p in page_numbers if 0 <= p < len(doc)] + page_numbers = parsed_pages if parsed_pages else list(range(total_pages)) + page_numbers = [p for p in page_numbers if 0 <= p < total_pages] # If parsing failed but pages was specified, use all pages if pages and not page_numbers: - page_numbers = list(range(len(doc))) + page_numbers = list(range(total_pages)) all_links = [] link_types = {"internal": 0, "external": 0, "email": 0, "other": 0} @@ -169,7 +170,7 @@ class MiscToolsMixin(MCPMixin): }, "file_info": { "path": str(path), - "total_pages": len(doc), + "total_pages": total_pages, "pages_processed": pages or "all" }, "extraction_time": round(time.time() - start_time, 2) @@ -210,15 +211,16 @@ class MiscToolsMixin(MCPMixin): try: path = await validate_pdf_path(pdf_path) doc = fitz.open(str(path)) + total_pages = len(doc) # Parse pages parameter parsed_pages = parse_pages_parameter(pages) - page_numbers = parsed_pages if parsed_pages else list(range(len(doc))) - page_numbers = [p for p in page_numbers if 0 <= p < len(doc)] + page_numbers = parsed_pages if parsed_pages else list(range(total_pages)) + page_numbers = [p for p in page_numbers if 0 <= p < total_pages] # If parsing failed but pages was specified, use all pages if pages and not page_numbers: - page_numbers = list(range(len(doc))) + page_numbers = list(range(total_pages)) visual_elements = [] charts_found = 0 @@ -326,7 +328,7 @@ class MiscToolsMixin(MCPMixin): }, "file_info": { "path": str(path), - "total_pages": len(doc) + "total_pages": total_pages }, "analysis_time": round(time.time() - start_time, 2) } diff --git a/src/mcp_pdf/mixins_official/security_analysis.py b/src/mcp_pdf/mixins_official/security_analysis.py index c7d88c0..afc492e 100644 --- a/src/mcp_pdf/mixins_official/security_analysis.py +++ b/src/mcp_pdf/mixins_official/security_analysis.py @@ -225,6 +225,7 @@ class SecurityAnalysisMixin(MCPMixin): try: path = await validate_pdf_path(pdf_path) doc = fitz.open(str(path)) + total_pages = len(doc) watermark_analysis = [] total_watermarks = 0 @@ -310,7 +311,7 @@ class SecurityAnalysisMixin(MCPMixin): # Watermark assessment has_watermarks = total_watermarks > 0 - watermark_density = total_watermarks / len(doc) if len(doc) > 0 else 0 + watermark_density = total_watermarks / total_pages if total_pages > 0 else 0 # Determine watermark pattern if watermark_density > 0.8: @@ -334,7 +335,7 @@ class SecurityAnalysisMixin(MCPMixin): "page_analysis": watermark_analysis, "watermark_insights": { "pages_with_watermarks": len(watermark_analysis), - "pages_without_watermarks": len(doc) - len(watermark_analysis), + "pages_without_watermarks": total_pages - len(watermark_analysis), "most_common_type": max(watermark_types, key=watermark_types.get) if any(watermark_types.values()) else "none" }, "recommendations": [ @@ -344,7 +345,7 @@ class SecurityAnalysisMixin(MCPMixin): ] if has_watermarks else ["No watermarks detected"], "file_info": { "path": str(path), - "total_pages": len(doc) + "total_pages": total_pages }, "analysis_time": round(time.time() - start_time, 2) } diff --git a/uv.lock b/uv.lock index 1b184a9..6dd0a04 100644 --- a/uv.lock +++ b/uv.lock @@ -1032,7 +1032,7 @@ wheels = [ [[package]] name = "mcp-pdf" -version = "2.1.5" +version = "2.1.6" source = { editable = "." } dependencies = [ { name = "fastmcp" },