Fix document-closed errors in 7 tools, fix stamp font name

- Capture total_pages before doc.close() in content_analysis,
  security_analysis, annotations, and misc_tools mixins
- Fix invalid PyMuPDF font name "helv-bold" → "helv" in add_stamps
- Bump to v2.1.7
This commit is contained in:
Ryan Malloy 2026-04-07 04:19:20 -06:00
parent 057aa5be40
commit b53d8ab998
6 changed files with 35 additions and 28 deletions

View File

@ -1,6 +1,6 @@
[project] [project]
name = "mcp-pdf" name = "mcp-pdf"
version = "2.1.6" version = "2.1.7"
description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more" description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more"
authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}] authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}]
readme = "README.md" readme = "README.md"

View File

@ -402,7 +402,7 @@ class AnnotationsMixin(MCPMixin):
stamp_type.upper(), stamp_type.upper(),
fontsize=12, fontsize=12,
color=(1, 1, 1), # White text color=(1, 1, 1), # White text
fontname="helv-bold" fontname="helv"
) )
stamps_added += 1 stamps_added += 1
@ -470,6 +470,7 @@ class AnnotationsMixin(MCPMixin):
# Validate path # Validate path
input_pdf_path = await validate_pdf_path(pdf_path) input_pdf_path = await validate_pdf_path(pdf_path)
doc = fitz.open(str(input_pdf_path)) doc = fitz.open(str(input_pdf_path))
total_pages = len(doc)
all_annotations = [] all_annotations = []
annotation_stats = { annotation_stats = {
@ -563,7 +564,7 @@ class AnnotationsMixin(MCPMixin):
"annotations": formatted_data, "annotations": formatted_data,
"file_info": { "file_info": {
"path": str(input_pdf_path), "path": str(input_pdf_path),
"total_pages": len(doc) if 'doc' in locals() else 0 "total_pages": total_pages if 'total_pages' in locals() else 0
}, },
"extraction_time": round(time.time() - start_time, 2) "extraction_time": round(time.time() - start_time, 2)
} }

View File

@ -51,9 +51,10 @@ class ContentAnalysisMixin(MCPMixin):
try: try:
path = await validate_pdf_path(pdf_path) path = await validate_pdf_path(pdf_path)
doc = fitz.open(str(path)) doc = fitz.open(str(path))
total_pages = len(doc)
# Extract text from sample pages for analysis # Extract text from sample pages for analysis
sample_size = min(10, len(doc)) sample_size = min(10, total_pages)
full_text = "" full_text = ""
total_words = 0 total_words = 0
total_sentences = 0 total_sentences = 0
@ -132,8 +133,8 @@ class ContentAnalysisMixin(MCPMixin):
total_links = sum(len(doc[i].get_links()) for i in range(sample_size)) total_links = sum(len(doc[i].get_links()) for i in range(sample_size))
# Estimate for full document # Estimate for full document
estimated_total_images = int(total_images * len(doc) / sample_size) if sample_size > 0 else 0 estimated_total_images = int(total_images * total_pages / sample_size) if sample_size > 0 else 0
estimated_total_links = int(total_links * len(doc) / sample_size) if sample_size > 0 else 0 estimated_total_links = int(total_links * total_pages / sample_size) if sample_size > 0 else 0
doc.close() doc.close()
@ -145,8 +146,8 @@ class ContentAnalysisMixin(MCPMixin):
"secondary_types": sorted(content_scores.items(), key=lambda x: x[1], reverse=True)[1:4] "secondary_types": sorted(content_scores.items(), key=lambda x: x[1], reverse=True)[1:4]
}, },
"content_analysis": { "content_analysis": {
"total_pages": len(doc), "total_pages": total_pages,
"estimated_word_count": int(total_words * len(doc) / sample_size), "estimated_word_count": int(total_words * total_pages / sample_size),
"avg_words_per_page": round(avg_words_per_page, 1), "avg_words_per_page": round(avg_words_per_page, 1),
"vocabulary_diversity": round(vocabulary_diversity, 2), "vocabulary_diversity": round(vocabulary_diversity, 2),
"reading_level": reading_level, "reading_level": reading_level,
@ -211,15 +212,16 @@ class ContentAnalysisMixin(MCPMixin):
try: try:
path = await validate_pdf_path(pdf_path) path = await validate_pdf_path(pdf_path)
doc = fitz.open(str(path)) doc = fitz.open(str(path))
total_pages = len(doc)
# Parse pages parameter # Parse pages parameter
parsed_pages = parse_pages_parameter(pages) parsed_pages = parse_pages_parameter(pages)
page_numbers = parsed_pages if parsed_pages else list(range(len(doc))) page_numbers = parsed_pages if parsed_pages else list(range(total_pages))
page_numbers = [p for p in page_numbers if 0 <= p < len(doc)] page_numbers = [p for p in page_numbers if 0 <= p < total_pages]
# If parsing failed but pages was specified, use all pages # If parsing failed but pages was specified, use all pages
if pages and not page_numbers: if pages and not page_numbers:
page_numbers = list(range(len(doc))) page_numbers = list(range(total_pages))
# Extract text from specified pages # Extract text from specified pages
full_text = "" full_text = ""
@ -313,7 +315,7 @@ class ContentAnalysisMixin(MCPMixin):
}, },
"file_info": { "file_info": {
"path": str(path), "path": str(path),
"total_pages": len(doc), "total_pages": total_pages,
"pages_processed": pages or "all" "pages_processed": pages or "all"
}, },
"analysis_time": round(time.time() - start_time, 2) "analysis_time": round(time.time() - start_time, 2)
@ -354,17 +356,18 @@ class ContentAnalysisMixin(MCPMixin):
try: try:
path = await validate_pdf_path(pdf_path) path = await validate_pdf_path(pdf_path)
doc = fitz.open(str(path)) doc = fitz.open(str(path))
total_pages = len(doc)
# Parse pages parameter # Parse pages parameter
parsed_pages = parse_pages_parameter(pages) parsed_pages = parse_pages_parameter(pages)
if parsed_pages: if parsed_pages:
page_numbers = [p for p in parsed_pages if 0 <= p < len(doc)] page_numbers = [p for p in parsed_pages if 0 <= p < total_pages]
else: else:
page_numbers = list(range(min(5, len(doc)))) # Limit to 5 pages for performance page_numbers = list(range(min(5, total_pages))) # Limit to 5 pages for performance
# If parsing failed but pages was specified, default to first 5 # If parsing failed but pages was specified, default to first 5
if pages and not page_numbers: if pages and not page_numbers:
page_numbers = list(range(min(5, len(doc)))) page_numbers = list(range(min(5, total_pages)))
layout_analysis = [] layout_analysis = []
@ -513,7 +516,7 @@ class ContentAnalysisMixin(MCPMixin):
}, },
"file_info": { "file_info": {
"path": str(path), "path": str(path),
"total_pages": len(doc) "total_pages": total_pages
}, },
"analysis_time": round(time.time() - start_time, 2) "analysis_time": round(time.time() - start_time, 2)
} }

View File

@ -62,15 +62,16 @@ class MiscToolsMixin(MCPMixin):
try: try:
path = await validate_pdf_path(pdf_path) path = await validate_pdf_path(pdf_path)
doc = fitz.open(str(path)) doc = fitz.open(str(path))
total_pages = len(doc)
# Parse pages parameter # Parse pages parameter
parsed_pages = parse_pages_parameter(pages) parsed_pages = parse_pages_parameter(pages)
page_numbers = parsed_pages if parsed_pages else list(range(len(doc))) page_numbers = parsed_pages if parsed_pages else list(range(total_pages))
page_numbers = [p for p in page_numbers if 0 <= p < len(doc)] page_numbers = [p for p in page_numbers if 0 <= p < total_pages]
# If parsing failed but pages was specified, use all pages # If parsing failed but pages was specified, use all pages
if pages and not page_numbers: if pages and not page_numbers:
page_numbers = list(range(len(doc))) page_numbers = list(range(total_pages))
all_links = [] all_links = []
link_types = {"internal": 0, "external": 0, "email": 0, "other": 0} link_types = {"internal": 0, "external": 0, "email": 0, "other": 0}
@ -169,7 +170,7 @@ class MiscToolsMixin(MCPMixin):
}, },
"file_info": { "file_info": {
"path": str(path), "path": str(path),
"total_pages": len(doc), "total_pages": total_pages,
"pages_processed": pages or "all" "pages_processed": pages or "all"
}, },
"extraction_time": round(time.time() - start_time, 2) "extraction_time": round(time.time() - start_time, 2)
@ -210,15 +211,16 @@ class MiscToolsMixin(MCPMixin):
try: try:
path = await validate_pdf_path(pdf_path) path = await validate_pdf_path(pdf_path)
doc = fitz.open(str(path)) doc = fitz.open(str(path))
total_pages = len(doc)
# Parse pages parameter # Parse pages parameter
parsed_pages = parse_pages_parameter(pages) parsed_pages = parse_pages_parameter(pages)
page_numbers = parsed_pages if parsed_pages else list(range(len(doc))) page_numbers = parsed_pages if parsed_pages else list(range(total_pages))
page_numbers = [p for p in page_numbers if 0 <= p < len(doc)] page_numbers = [p for p in page_numbers if 0 <= p < total_pages]
# If parsing failed but pages was specified, use all pages # If parsing failed but pages was specified, use all pages
if pages and not page_numbers: if pages and not page_numbers:
page_numbers = list(range(len(doc))) page_numbers = list(range(total_pages))
visual_elements = [] visual_elements = []
charts_found = 0 charts_found = 0
@ -326,7 +328,7 @@ class MiscToolsMixin(MCPMixin):
}, },
"file_info": { "file_info": {
"path": str(path), "path": str(path),
"total_pages": len(doc) "total_pages": total_pages
}, },
"analysis_time": round(time.time() - start_time, 2) "analysis_time": round(time.time() - start_time, 2)
} }

View File

@ -225,6 +225,7 @@ class SecurityAnalysisMixin(MCPMixin):
try: try:
path = await validate_pdf_path(pdf_path) path = await validate_pdf_path(pdf_path)
doc = fitz.open(str(path)) doc = fitz.open(str(path))
total_pages = len(doc)
watermark_analysis = [] watermark_analysis = []
total_watermarks = 0 total_watermarks = 0
@ -310,7 +311,7 @@ class SecurityAnalysisMixin(MCPMixin):
# Watermark assessment # Watermark assessment
has_watermarks = total_watermarks > 0 has_watermarks = total_watermarks > 0
watermark_density = total_watermarks / len(doc) if len(doc) > 0 else 0 watermark_density = total_watermarks / total_pages if total_pages > 0 else 0
# Determine watermark pattern # Determine watermark pattern
if watermark_density > 0.8: if watermark_density > 0.8:
@ -334,7 +335,7 @@ class SecurityAnalysisMixin(MCPMixin):
"page_analysis": watermark_analysis, "page_analysis": watermark_analysis,
"watermark_insights": { "watermark_insights": {
"pages_with_watermarks": len(watermark_analysis), "pages_with_watermarks": len(watermark_analysis),
"pages_without_watermarks": len(doc) - len(watermark_analysis), "pages_without_watermarks": total_pages - len(watermark_analysis),
"most_common_type": max(watermark_types, key=watermark_types.get) if any(watermark_types.values()) else "none" "most_common_type": max(watermark_types, key=watermark_types.get) if any(watermark_types.values()) else "none"
}, },
"recommendations": [ "recommendations": [
@ -344,7 +345,7 @@ class SecurityAnalysisMixin(MCPMixin):
] if has_watermarks else ["No watermarks detected"], ] if has_watermarks else ["No watermarks detected"],
"file_info": { "file_info": {
"path": str(path), "path": str(path),
"total_pages": len(doc) "total_pages": total_pages
}, },
"analysis_time": round(time.time() - start_time, 2) "analysis_time": round(time.time() - start_time, 2)
} }

2
uv.lock generated
View File

@ -1032,7 +1032,7 @@ wheels = [
[[package]] [[package]]
name = "mcp-pdf" name = "mcp-pdf"
version = "2.1.5" version = "2.1.6"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "fastmcp" }, { name = "fastmcp" },