Fix document-closed errors in 7 tools, fix stamp font name

- Capture total_pages before doc.close() in content_analysis, security_analysis, annotations, and misc_tools mixins - Fix invalid PyMuPDF font name "helv-bold" → "helv" in add_stamps - Bump to v2.1.7
2026-04-07 04:19:20 -06:00 · 2026-04-07 04:19:20 -06:00 · b53d8ab998
commit b53d8ab998
parent 057aa5be40
6 changed files with 35 additions and 28 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "mcp-pdf"
-version = "2.1.6"
+version = "2.1.7"
 description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more"
 authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}]
 readme = "README.md"
--- a/src/mcp_pdf/mixins_official/annotations.py
+++ b/src/mcp_pdf/mixins_official/annotations.py
@ -402,7 +402,7 @@ class AnnotationsMixin(MCPMixin):
                        stamp_type.upper(),
                        fontsize=12,
                        color=(1, 1, 1),  # White text
-                        fontname="helv-bold"
+                        fontname="helv"
                    )

                    stamps_added += 1
@ -470,6 +470,7 @@ class AnnotationsMixin(MCPMixin):
            # Validate path
            input_pdf_path = await validate_pdf_path(pdf_path)
            doc = fitz.open(str(input_pdf_path))
+            total_pages = len(doc)

            all_annotations = []
            annotation_stats = {
@ -563,7 +564,7 @@ class AnnotationsMixin(MCPMixin):
                "annotations": formatted_data,
                "file_info": {
                    "path": str(input_pdf_path),
-                    "total_pages": len(doc) if 'doc' in locals() else 0
+                    "total_pages": total_pages if 'total_pages' in locals() else 0
                },
                "extraction_time": round(time.time() - start_time, 2)
            }
--- a/src/mcp_pdf/mixins_official/content_analysis.py
+++ b/src/mcp_pdf/mixins_official/content_analysis.py
@ -51,9 +51,10 @@ class ContentAnalysisMixin(MCPMixin):
        try:
            path = await validate_pdf_path(pdf_path)
            doc = fitz.open(str(path))
+            total_pages = len(doc)

            # Extract text from sample pages for analysis
-            sample_size = min(10, len(doc))
+            sample_size = min(10, total_pages)
            full_text = ""
            total_words = 0
            total_sentences = 0
@ -132,8 +133,8 @@ class ContentAnalysisMixin(MCPMixin):
            total_links = sum(len(doc[i].get_links()) for i in range(sample_size))

            # Estimate for full document
-            estimated_total_images = int(total_images * len(doc) / sample_size) if sample_size > 0 else 0
-            estimated_total_links = int(total_links * len(doc) / sample_size) if sample_size > 0 else 0
+            estimated_total_images = int(total_images * total_pages / sample_size) if sample_size > 0 else 0
+            estimated_total_links = int(total_links * total_pages / sample_size) if sample_size > 0 else 0

            doc.close()

@ -145,8 +146,8 @@ class ContentAnalysisMixin(MCPMixin):
                    "secondary_types": sorted(content_scores.items(), key=lambda x: x[1], reverse=True)[1:4]
                },
                "content_analysis": {
-                    "total_pages": len(doc),
-                    "estimated_word_count": int(total_words * len(doc) / sample_size),
+                    "total_pages": total_pages,
+                    "estimated_word_count": int(total_words * total_pages / sample_size),
                    "avg_words_per_page": round(avg_words_per_page, 1),
                    "vocabulary_diversity": round(vocabulary_diversity, 2),
                    "reading_level": reading_level,
@ -211,15 +212,16 @@ class ContentAnalysisMixin(MCPMixin):
        try:
            path = await validate_pdf_path(pdf_path)
            doc = fitz.open(str(path))
+            total_pages = len(doc)

            # Parse pages parameter
            parsed_pages = parse_pages_parameter(pages)
-            page_numbers = parsed_pages if parsed_pages else list(range(len(doc)))
-            page_numbers = [p for p in page_numbers if 0 <= p < len(doc)]
+            page_numbers = parsed_pages if parsed_pages else list(range(total_pages))
+            page_numbers = [p for p in page_numbers if 0 <= p < total_pages]

            # If parsing failed but pages was specified, use all pages
            if pages and not page_numbers:
-                page_numbers = list(range(len(doc)))
+                page_numbers = list(range(total_pages))

            # Extract text from specified pages
            full_text = ""
@ -313,7 +315,7 @@ class ContentAnalysisMixin(MCPMixin):
                },
                "file_info": {
                    "path": str(path),
-                    "total_pages": len(doc),
+                    "total_pages": total_pages,
                    "pages_processed": pages or "all"
                },
                "analysis_time": round(time.time() - start_time, 2)
@ -354,17 +356,18 @@ class ContentAnalysisMixin(MCPMixin):
        try:
            path = await validate_pdf_path(pdf_path)
            doc = fitz.open(str(path))
+            total_pages = len(doc)

            # Parse pages parameter
            parsed_pages = parse_pages_parameter(pages)
            if parsed_pages:
-                page_numbers = [p for p in parsed_pages if 0 <= p < len(doc)]
+                page_numbers = [p for p in parsed_pages if 0 <= p < total_pages]
            else:
-                page_numbers = list(range(min(5, len(doc))))  # Limit to 5 pages for performance
+                page_numbers = list(range(min(5, total_pages)))  # Limit to 5 pages for performance

            # If parsing failed but pages was specified, default to first 5
            if pages and not page_numbers:
-                page_numbers = list(range(min(5, len(doc))))
+                page_numbers = list(range(min(5, total_pages)))

            layout_analysis = []

@ -513,7 +516,7 @@ class ContentAnalysisMixin(MCPMixin):
                },
                "file_info": {
                    "path": str(path),
-                    "total_pages": len(doc)
+                    "total_pages": total_pages
                },
                "analysis_time": round(time.time() - start_time, 2)
            }
--- a/src/mcp_pdf/mixins_official/misc_tools.py
+++ b/src/mcp_pdf/mixins_official/misc_tools.py
@ -62,15 +62,16 @@ class MiscToolsMixin(MCPMixin):
        try:
            path = await validate_pdf_path(pdf_path)
            doc = fitz.open(str(path))
+            total_pages = len(doc)

            # Parse pages parameter
            parsed_pages = parse_pages_parameter(pages)
-            page_numbers = parsed_pages if parsed_pages else list(range(len(doc)))
-            page_numbers = [p for p in page_numbers if 0 <= p < len(doc)]
+            page_numbers = parsed_pages if parsed_pages else list(range(total_pages))
+            page_numbers = [p for p in page_numbers if 0 <= p < total_pages]

            # If parsing failed but pages was specified, use all pages
            if pages and not page_numbers:
-                page_numbers = list(range(len(doc)))
+                page_numbers = list(range(total_pages))

            all_links = []
            link_types = {"internal": 0, "external": 0, "email": 0, "other": 0}
@ -169,7 +170,7 @@ class MiscToolsMixin(MCPMixin):
                },
                "file_info": {
                    "path": str(path),
-                    "total_pages": len(doc),
+                    "total_pages": total_pages,
                    "pages_processed": pages or "all"
                },
                "extraction_time": round(time.time() - start_time, 2)
@ -210,15 +211,16 @@ class MiscToolsMixin(MCPMixin):
        try:
            path = await validate_pdf_path(pdf_path)
            doc = fitz.open(str(path))
+            total_pages = len(doc)

            # Parse pages parameter
            parsed_pages = parse_pages_parameter(pages)
-            page_numbers = parsed_pages if parsed_pages else list(range(len(doc)))
-            page_numbers = [p for p in page_numbers if 0 <= p < len(doc)]
+            page_numbers = parsed_pages if parsed_pages else list(range(total_pages))
+            page_numbers = [p for p in page_numbers if 0 <= p < total_pages]

            # If parsing failed but pages was specified, use all pages
            if pages and not page_numbers:
-                page_numbers = list(range(len(doc)))
+                page_numbers = list(range(total_pages))

            visual_elements = []
            charts_found = 0
@ -326,7 +328,7 @@ class MiscToolsMixin(MCPMixin):
                },
                "file_info": {
                    "path": str(path),
-                    "total_pages": len(doc)
+                    "total_pages": total_pages
                },
                "analysis_time": round(time.time() - start_time, 2)
            }
--- a/src/mcp_pdf/mixins_official/security_analysis.py
+++ b/src/mcp_pdf/mixins_official/security_analysis.py
@ -225,6 +225,7 @@ class SecurityAnalysisMixin(MCPMixin):
        try:
            path = await validate_pdf_path(pdf_path)
            doc = fitz.open(str(path))
+            total_pages = len(doc)

            watermark_analysis = []
            total_watermarks = 0
@ -310,7 +311,7 @@ class SecurityAnalysisMixin(MCPMixin):

            # Watermark assessment
            has_watermarks = total_watermarks > 0
-            watermark_density = total_watermarks / len(doc) if len(doc) > 0 else 0
+            watermark_density = total_watermarks / total_pages if total_pages > 0 else 0

            # Determine watermark pattern
            if watermark_density > 0.8:
@ -334,7 +335,7 @@ class SecurityAnalysisMixin(MCPMixin):
                "page_analysis": watermark_analysis,
                "watermark_insights": {
                    "pages_with_watermarks": len(watermark_analysis),
-                    "pages_without_watermarks": len(doc) - len(watermark_analysis),
+                    "pages_without_watermarks": total_pages - len(watermark_analysis),
                    "most_common_type": max(watermark_types, key=watermark_types.get) if any(watermark_types.values()) else "none"
                },
                "recommendations": [
@ -344,7 +345,7 @@ class SecurityAnalysisMixin(MCPMixin):
                ] if has_watermarks else ["No watermarks detected"],
                "file_info": {
                    "path": str(path),
-                    "total_pages": len(doc)
+                    "total_pages": total_pages
                },
                "analysis_time": round(time.time() - start_time, 2)
            }
--- a/uv.lock
+++ b/uv.lock
@ -1032,7 +1032,7 @@ wheels = [

 [[package]]
 name = "mcp-pdf"
-version = "2.1.5"
+version = "2.1.6"
 source = { editable = "." }
 dependencies = [
    { name = "fastmcp" },