Fix document-closed errors in 7 tools, fix stamp font name

- Capture total_pages before doc.close() in content_analysis,
  security_analysis, annotations, and misc_tools mixins
- Fix invalid PyMuPDF font name "helv-bold" → "helv" in add_stamps
- Bump to v2.1.7
This commit is contained in:
Ryan Malloy 2026-04-07 04:19:20 -06:00
parent 057aa5be40
commit b53d8ab998
6 changed files with 35 additions and 28 deletions

View File

@ -1,6 +1,6 @@
[project]
name = "mcp-pdf"
version = "2.1.6"
version = "2.1.7"
description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more"
authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}]
readme = "README.md"

View File

@ -402,7 +402,7 @@ class AnnotationsMixin(MCPMixin):
stamp_type.upper(),
fontsize=12,
color=(1, 1, 1), # White text
fontname="helv-bold"
fontname="helv"
)
stamps_added += 1
@ -470,6 +470,7 @@ class AnnotationsMixin(MCPMixin):
# Validate path
input_pdf_path = await validate_pdf_path(pdf_path)
doc = fitz.open(str(input_pdf_path))
total_pages = len(doc)
all_annotations = []
annotation_stats = {
@ -563,7 +564,7 @@ class AnnotationsMixin(MCPMixin):
"annotations": formatted_data,
"file_info": {
"path": str(input_pdf_path),
"total_pages": len(doc) if 'doc' in locals() else 0
"total_pages": total_pages if 'total_pages' in locals() else 0
},
"extraction_time": round(time.time() - start_time, 2)
}

View File

@ -51,9 +51,10 @@ class ContentAnalysisMixin(MCPMixin):
try:
path = await validate_pdf_path(pdf_path)
doc = fitz.open(str(path))
total_pages = len(doc)
# Extract text from sample pages for analysis
sample_size = min(10, len(doc))
sample_size = min(10, total_pages)
full_text = ""
total_words = 0
total_sentences = 0
@ -132,8 +133,8 @@ class ContentAnalysisMixin(MCPMixin):
total_links = sum(len(doc[i].get_links()) for i in range(sample_size))
# Estimate for full document
estimated_total_images = int(total_images * len(doc) / sample_size) if sample_size > 0 else 0
estimated_total_links = int(total_links * len(doc) / sample_size) if sample_size > 0 else 0
estimated_total_images = int(total_images * total_pages / sample_size) if sample_size > 0 else 0
estimated_total_links = int(total_links * total_pages / sample_size) if sample_size > 0 else 0
doc.close()
@ -145,8 +146,8 @@ class ContentAnalysisMixin(MCPMixin):
"secondary_types": sorted(content_scores.items(), key=lambda x: x[1], reverse=True)[1:4]
},
"content_analysis": {
"total_pages": len(doc),
"estimated_word_count": int(total_words * len(doc) / sample_size),
"total_pages": total_pages,
"estimated_word_count": int(total_words * total_pages / sample_size),
"avg_words_per_page": round(avg_words_per_page, 1),
"vocabulary_diversity": round(vocabulary_diversity, 2),
"reading_level": reading_level,
@ -211,15 +212,16 @@ class ContentAnalysisMixin(MCPMixin):
try:
path = await validate_pdf_path(pdf_path)
doc = fitz.open(str(path))
total_pages = len(doc)
# Parse pages parameter
parsed_pages = parse_pages_parameter(pages)
page_numbers = parsed_pages if parsed_pages else list(range(len(doc)))
page_numbers = [p for p in page_numbers if 0 <= p < len(doc)]
page_numbers = parsed_pages if parsed_pages else list(range(total_pages))
page_numbers = [p for p in page_numbers if 0 <= p < total_pages]
# If parsing failed but pages was specified, use all pages
if pages and not page_numbers:
page_numbers = list(range(len(doc)))
page_numbers = list(range(total_pages))
# Extract text from specified pages
full_text = ""
@ -313,7 +315,7 @@ class ContentAnalysisMixin(MCPMixin):
},
"file_info": {
"path": str(path),
"total_pages": len(doc),
"total_pages": total_pages,
"pages_processed": pages or "all"
},
"analysis_time": round(time.time() - start_time, 2)
@ -354,17 +356,18 @@ class ContentAnalysisMixin(MCPMixin):
try:
path = await validate_pdf_path(pdf_path)
doc = fitz.open(str(path))
total_pages = len(doc)
# Parse pages parameter
parsed_pages = parse_pages_parameter(pages)
if parsed_pages:
page_numbers = [p for p in parsed_pages if 0 <= p < len(doc)]
page_numbers = [p for p in parsed_pages if 0 <= p < total_pages]
else:
page_numbers = list(range(min(5, len(doc)))) # Limit to 5 pages for performance
page_numbers = list(range(min(5, total_pages))) # Limit to 5 pages for performance
# If parsing failed but pages was specified, default to first 5
if pages and not page_numbers:
page_numbers = list(range(min(5, len(doc))))
page_numbers = list(range(min(5, total_pages)))
layout_analysis = []
@ -513,7 +516,7 @@ class ContentAnalysisMixin(MCPMixin):
},
"file_info": {
"path": str(path),
"total_pages": len(doc)
"total_pages": total_pages
},
"analysis_time": round(time.time() - start_time, 2)
}

View File

@ -62,15 +62,16 @@ class MiscToolsMixin(MCPMixin):
try:
path = await validate_pdf_path(pdf_path)
doc = fitz.open(str(path))
total_pages = len(doc)
# Parse pages parameter
parsed_pages = parse_pages_parameter(pages)
page_numbers = parsed_pages if parsed_pages else list(range(len(doc)))
page_numbers = [p for p in page_numbers if 0 <= p < len(doc)]
page_numbers = parsed_pages if parsed_pages else list(range(total_pages))
page_numbers = [p for p in page_numbers if 0 <= p < total_pages]
# If parsing failed but pages was specified, use all pages
if pages and not page_numbers:
page_numbers = list(range(len(doc)))
page_numbers = list(range(total_pages))
all_links = []
link_types = {"internal": 0, "external": 0, "email": 0, "other": 0}
@ -169,7 +170,7 @@ class MiscToolsMixin(MCPMixin):
},
"file_info": {
"path": str(path),
"total_pages": len(doc),
"total_pages": total_pages,
"pages_processed": pages or "all"
},
"extraction_time": round(time.time() - start_time, 2)
@ -210,15 +211,16 @@ class MiscToolsMixin(MCPMixin):
try:
path = await validate_pdf_path(pdf_path)
doc = fitz.open(str(path))
total_pages = len(doc)
# Parse pages parameter
parsed_pages = parse_pages_parameter(pages)
page_numbers = parsed_pages if parsed_pages else list(range(len(doc)))
page_numbers = [p for p in page_numbers if 0 <= p < len(doc)]
page_numbers = parsed_pages if parsed_pages else list(range(total_pages))
page_numbers = [p for p in page_numbers if 0 <= p < total_pages]
# If parsing failed but pages was specified, use all pages
if pages and not page_numbers:
page_numbers = list(range(len(doc)))
page_numbers = list(range(total_pages))
visual_elements = []
charts_found = 0
@ -326,7 +328,7 @@ class MiscToolsMixin(MCPMixin):
},
"file_info": {
"path": str(path),
"total_pages": len(doc)
"total_pages": total_pages
},
"analysis_time": round(time.time() - start_time, 2)
}

View File

@ -225,6 +225,7 @@ class SecurityAnalysisMixin(MCPMixin):
try:
path = await validate_pdf_path(pdf_path)
doc = fitz.open(str(path))
total_pages = len(doc)
watermark_analysis = []
total_watermarks = 0
@ -310,7 +311,7 @@ class SecurityAnalysisMixin(MCPMixin):
# Watermark assessment
has_watermarks = total_watermarks > 0
watermark_density = total_watermarks / len(doc) if len(doc) > 0 else 0
watermark_density = total_watermarks / total_pages if total_pages > 0 else 0
# Determine watermark pattern
if watermark_density > 0.8:
@ -334,7 +335,7 @@ class SecurityAnalysisMixin(MCPMixin):
"page_analysis": watermark_analysis,
"watermark_insights": {
"pages_with_watermarks": len(watermark_analysis),
"pages_without_watermarks": len(doc) - len(watermark_analysis),
"pages_without_watermarks": total_pages - len(watermark_analysis),
"most_common_type": max(watermark_types, key=watermark_types.get) if any(watermark_types.values()) else "none"
},
"recommendations": [
@ -344,7 +345,7 @@ class SecurityAnalysisMixin(MCPMixin):
] if has_watermarks else ["No watermarks detected"],
"file_info": {
"path": str(path),
"total_pages": len(doc)
"total_pages": total_pages
},
"analysis_time": round(time.time() - start_time, 2)
}

2
uv.lock generated
View File

@ -1032,7 +1032,7 @@ wheels = [
[[package]]
name = "mcp-pdf"
version = "2.1.5"
version = "2.1.6"
source = { editable = "." }
dependencies = [
{ name = "fastmcp" },