From b53d8ab9985d8b5b9ed5a8b18b8c7f2d5b32e83d Mon Sep 17 00:00:00 2001
From: Ryan Malloy <ryan@malloys.us>
Date: Tue, 7 Apr 2026 04:19:20 -0600
Subject: [PATCH] Fix document-closed errors in 7 tools, fix stamp font name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Capture total_pages before doc.close() in content_analysis,
  security_analysis, annotations, and misc_tools mixins
- Fix invalid PyMuPDF font name "helv-bold" → "helv" in add_stamps
- Bump to v2.1.7
---
 pyproject.toml                                |  2 +-
 src/mcp_pdf/mixins_official/annotations.py    |  5 ++--
 .../mixins_official/content_analysis.py       | 29 ++++++++++---------
 src/mcp_pdf/mixins_official/misc_tools.py     | 18 +++++++-----
 .../mixins_official/security_analysis.py      |  7 +++--
 uv.lock                                       |  2 +-
 6 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index e4cc232..637801c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "mcp-pdf"
-version = "2.1.6"
+version = "2.1.7"
 description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more"
 authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}]
 readme = "README.md"
diff --git a/src/mcp_pdf/mixins_official/annotations.py b/src/mcp_pdf/mixins_official/annotations.py
index 421e006..695b4d6 100644
--- a/src/mcp_pdf/mixins_official/annotations.py
+++ b/src/mcp_pdf/mixins_official/annotations.py
@@ -402,7 +402,7 @@ class AnnotationsMixin(MCPMixin):
                         stamp_type.upper(),
                         fontsize=12,
                         color=(1, 1, 1),  # White text
-                        fontname="helv-bold"
+                        fontname="helv"
                     )
 
                     stamps_added += 1
@@ -470,6 +470,7 @@ class AnnotationsMixin(MCPMixin):
             # Validate path
             input_pdf_path = await validate_pdf_path(pdf_path)
             doc = fitz.open(str(input_pdf_path))
+            total_pages = len(doc)
 
             all_annotations = []
             annotation_stats = {
@@ -563,7 +564,7 @@ class AnnotationsMixin(MCPMixin):
                 "annotations": formatted_data,
                 "file_info": {
                     "path": str(input_pdf_path),
-                    "total_pages": len(doc) if 'doc' in locals() else 0
+                    "total_pages": total_pages if 'total_pages' in locals() else 0
                 },
                 "extraction_time": round(time.time() - start_time, 2)
             }
diff --git a/src/mcp_pdf/mixins_official/content_analysis.py b/src/mcp_pdf/mixins_official/content_analysis.py
index 74cd894..659391f 100644
--- a/src/mcp_pdf/mixins_official/content_analysis.py
+++ b/src/mcp_pdf/mixins_official/content_analysis.py
@@ -51,9 +51,10 @@ class ContentAnalysisMixin(MCPMixin):
         try:
             path = await validate_pdf_path(pdf_path)
             doc = fitz.open(str(path))
+            total_pages = len(doc)
 
             # Extract text from sample pages for analysis
-            sample_size = min(10, len(doc))
+            sample_size = min(10, total_pages)
             full_text = ""
             total_words = 0
             total_sentences = 0
@@ -132,8 +133,8 @@ class ContentAnalysisMixin(MCPMixin):
             total_links = sum(len(doc[i].get_links()) for i in range(sample_size))
 
             # Estimate for full document
-            estimated_total_images = int(total_images * len(doc) / sample_size) if sample_size > 0 else 0
-            estimated_total_links = int(total_links * len(doc) / sample_size) if sample_size > 0 else 0
+            estimated_total_images = int(total_images * total_pages / sample_size) if sample_size > 0 else 0
+            estimated_total_links = int(total_links * total_pages / sample_size) if sample_size > 0 else 0
 
             doc.close()
 
@@ -145,8 +146,8 @@ class ContentAnalysisMixin(MCPMixin):
                     "secondary_types": sorted(content_scores.items(), key=lambda x: x[1], reverse=True)[1:4]
                 },
                 "content_analysis": {
-                    "total_pages": len(doc),
-                    "estimated_word_count": int(total_words * len(doc) / sample_size),
+                    "total_pages": total_pages,
+                    "estimated_word_count": int(total_words * total_pages / sample_size),
                     "avg_words_per_page": round(avg_words_per_page, 1),
                     "vocabulary_diversity": round(vocabulary_diversity, 2),
                     "reading_level": reading_level,
@@ -211,15 +212,16 @@ class ContentAnalysisMixin(MCPMixin):
         try:
             path = await validate_pdf_path(pdf_path)
             doc = fitz.open(str(path))
+            total_pages = len(doc)
 
             # Parse pages parameter
             parsed_pages = parse_pages_parameter(pages)
-            page_numbers = parsed_pages if parsed_pages else list(range(len(doc)))
-            page_numbers = [p for p in page_numbers if 0 <= p < len(doc)]
+            page_numbers = parsed_pages if parsed_pages else list(range(total_pages))
+            page_numbers = [p for p in page_numbers if 0 <= p < total_pages]
 
             # If parsing failed but pages was specified, use all pages
             if pages and not page_numbers:
-                page_numbers = list(range(len(doc)))
+                page_numbers = list(range(total_pages))
 
             # Extract text from specified pages
             full_text = ""
@@ -313,7 +315,7 @@ class ContentAnalysisMixin(MCPMixin):
                 },
                 "file_info": {
                     "path": str(path),
-                    "total_pages": len(doc),
+                    "total_pages": total_pages,
                     "pages_processed": pages or "all"
                 },
                 "analysis_time": round(time.time() - start_time, 2)
@@ -354,17 +356,18 @@ class ContentAnalysisMixin(MCPMixin):
         try:
             path = await validate_pdf_path(pdf_path)
             doc = fitz.open(str(path))
+            total_pages = len(doc)
 
             # Parse pages parameter
             parsed_pages = parse_pages_parameter(pages)
             if parsed_pages:
-                page_numbers = [p for p in parsed_pages if 0 <= p < len(doc)]
+                page_numbers = [p for p in parsed_pages if 0 <= p < total_pages]
             else:
-                page_numbers = list(range(min(5, len(doc))))  # Limit to 5 pages for performance
+                page_numbers = list(range(min(5, total_pages)))  # Limit to 5 pages for performance
 
             # If parsing failed but pages was specified, default to first 5
             if pages and not page_numbers:
-                page_numbers = list(range(min(5, len(doc))))
+                page_numbers = list(range(min(5, total_pages)))
 
             layout_analysis = []
 
@@ -513,7 +516,7 @@ class ContentAnalysisMixin(MCPMixin):
                 },
                 "file_info": {
                     "path": str(path),
-                    "total_pages": len(doc)
+                    "total_pages": total_pages
                 },
                 "analysis_time": round(time.time() - start_time, 2)
             }
diff --git a/src/mcp_pdf/mixins_official/misc_tools.py b/src/mcp_pdf/mixins_official/misc_tools.py
index 9ca39b4..c477e66 100644
--- a/src/mcp_pdf/mixins_official/misc_tools.py
+++ b/src/mcp_pdf/mixins_official/misc_tools.py
@@ -62,15 +62,16 @@ class MiscToolsMixin(MCPMixin):
         try:
             path = await validate_pdf_path(pdf_path)
             doc = fitz.open(str(path))
+            total_pages = len(doc)
 
             # Parse pages parameter
             parsed_pages = parse_pages_parameter(pages)
-            page_numbers = parsed_pages if parsed_pages else list(range(len(doc)))
-            page_numbers = [p for p in page_numbers if 0 <= p < len(doc)]
+            page_numbers = parsed_pages if parsed_pages else list(range(total_pages))
+            page_numbers = [p for p in page_numbers if 0 <= p < total_pages]
 
             # If parsing failed but pages was specified, use all pages
             if pages and not page_numbers:
-                page_numbers = list(range(len(doc)))
+                page_numbers = list(range(total_pages))
 
             all_links = []
             link_types = {"internal": 0, "external": 0, "email": 0, "other": 0}
@@ -169,7 +170,7 @@ class MiscToolsMixin(MCPMixin):
                 },
                 "file_info": {
                     "path": str(path),
-                    "total_pages": len(doc),
+                    "total_pages": total_pages,
                     "pages_processed": pages or "all"
                 },
                 "extraction_time": round(time.time() - start_time, 2)
@@ -210,15 +211,16 @@ class MiscToolsMixin(MCPMixin):
         try:
             path = await validate_pdf_path(pdf_path)
             doc = fitz.open(str(path))
+            total_pages = len(doc)
 
             # Parse pages parameter
             parsed_pages = parse_pages_parameter(pages)
-            page_numbers = parsed_pages if parsed_pages else list(range(len(doc)))
-            page_numbers = [p for p in page_numbers if 0 <= p < len(doc)]
+            page_numbers = parsed_pages if parsed_pages else list(range(total_pages))
+            page_numbers = [p for p in page_numbers if 0 <= p < total_pages]
 
             # If parsing failed but pages was specified, use all pages
             if pages and not page_numbers:
-                page_numbers = list(range(len(doc)))
+                page_numbers = list(range(total_pages))
 
             visual_elements = []
             charts_found = 0
@@ -326,7 +328,7 @@ class MiscToolsMixin(MCPMixin):
                 },
                 "file_info": {
                     "path": str(path),
-                    "total_pages": len(doc)
+                    "total_pages": total_pages
                 },
                 "analysis_time": round(time.time() - start_time, 2)
             }
diff --git a/src/mcp_pdf/mixins_official/security_analysis.py b/src/mcp_pdf/mixins_official/security_analysis.py
index c7d88c0..afc492e 100644
--- a/src/mcp_pdf/mixins_official/security_analysis.py
+++ b/src/mcp_pdf/mixins_official/security_analysis.py
@@ -225,6 +225,7 @@ class SecurityAnalysisMixin(MCPMixin):
         try:
             path = await validate_pdf_path(pdf_path)
             doc = fitz.open(str(path))
+            total_pages = len(doc)
 
             watermark_analysis = []
             total_watermarks = 0
@@ -310,7 +311,7 @@ class SecurityAnalysisMixin(MCPMixin):
 
             # Watermark assessment
             has_watermarks = total_watermarks > 0
-            watermark_density = total_watermarks / len(doc) if len(doc) > 0 else 0
+            watermark_density = total_watermarks / total_pages if total_pages > 0 else 0
 
             # Determine watermark pattern
             if watermark_density > 0.8:
@@ -334,7 +335,7 @@ class SecurityAnalysisMixin(MCPMixin):
                 "page_analysis": watermark_analysis,
                 "watermark_insights": {
                     "pages_with_watermarks": len(watermark_analysis),
-                    "pages_without_watermarks": len(doc) - len(watermark_analysis),
+                    "pages_without_watermarks": total_pages - len(watermark_analysis),
                     "most_common_type": max(watermark_types, key=watermark_types.get) if any(watermark_types.values()) else "none"
                 },
                 "recommendations": [
@@ -344,7 +345,7 @@ class SecurityAnalysisMixin(MCPMixin):
                 ] if has_watermarks else ["No watermarks detected"],
                 "file_info": {
                     "path": str(path),
-                    "total_pages": len(doc)
+                    "total_pages": total_pages
                 },
                 "analysis_time": round(time.time() - start_time, 2)
             }
diff --git a/uv.lock b/uv.lock
index 1b184a9..6dd0a04 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1032,7 +1032,7 @@ wheels = [
 
 [[package]]
 name = "mcp-pdf"
-version = "2.1.5"
+version = "2.1.6"
 source = { editable = "." }
 dependencies = [
     { name = "fastmcp" },