Fix document-closed errors in 7 tools, fix stamp font name
- Capture total_pages before doc.close() in content_analysis, security_analysis, annotations, and misc_tools mixins - Fix invalid PyMuPDF font name "helv-bold" → "helv" in add_stamps - Bump to v2.1.7
This commit is contained in:
parent
057aa5be40
commit
b53d8ab998
@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "mcp-pdf"
|
||||
version = "2.1.6"
|
||||
version = "2.1.7"
|
||||
description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more"
|
||||
authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}]
|
||||
readme = "README.md"
|
||||
|
||||
@ -402,7 +402,7 @@ class AnnotationsMixin(MCPMixin):
|
||||
stamp_type.upper(),
|
||||
fontsize=12,
|
||||
color=(1, 1, 1), # White text
|
||||
fontname="helv-bold"
|
||||
fontname="helv"
|
||||
)
|
||||
|
||||
stamps_added += 1
|
||||
@ -470,6 +470,7 @@ class AnnotationsMixin(MCPMixin):
|
||||
# Validate path
|
||||
input_pdf_path = await validate_pdf_path(pdf_path)
|
||||
doc = fitz.open(str(input_pdf_path))
|
||||
total_pages = len(doc)
|
||||
|
||||
all_annotations = []
|
||||
annotation_stats = {
|
||||
@ -563,7 +564,7 @@ class AnnotationsMixin(MCPMixin):
|
||||
"annotations": formatted_data,
|
||||
"file_info": {
|
||||
"path": str(input_pdf_path),
|
||||
"total_pages": len(doc) if 'doc' in locals() else 0
|
||||
"total_pages": total_pages if 'total_pages' in locals() else 0
|
||||
},
|
||||
"extraction_time": round(time.time() - start_time, 2)
|
||||
}
|
||||
|
||||
@ -51,9 +51,10 @@ class ContentAnalysisMixin(MCPMixin):
|
||||
try:
|
||||
path = await validate_pdf_path(pdf_path)
|
||||
doc = fitz.open(str(path))
|
||||
total_pages = len(doc)
|
||||
|
||||
# Extract text from sample pages for analysis
|
||||
sample_size = min(10, len(doc))
|
||||
sample_size = min(10, total_pages)
|
||||
full_text = ""
|
||||
total_words = 0
|
||||
total_sentences = 0
|
||||
@ -132,8 +133,8 @@ class ContentAnalysisMixin(MCPMixin):
|
||||
total_links = sum(len(doc[i].get_links()) for i in range(sample_size))
|
||||
|
||||
# Estimate for full document
|
||||
estimated_total_images = int(total_images * len(doc) / sample_size) if sample_size > 0 else 0
|
||||
estimated_total_links = int(total_links * len(doc) / sample_size) if sample_size > 0 else 0
|
||||
estimated_total_images = int(total_images * total_pages / sample_size) if sample_size > 0 else 0
|
||||
estimated_total_links = int(total_links * total_pages / sample_size) if sample_size > 0 else 0
|
||||
|
||||
doc.close()
|
||||
|
||||
@ -145,8 +146,8 @@ class ContentAnalysisMixin(MCPMixin):
|
||||
"secondary_types": sorted(content_scores.items(), key=lambda x: x[1], reverse=True)[1:4]
|
||||
},
|
||||
"content_analysis": {
|
||||
"total_pages": len(doc),
|
||||
"estimated_word_count": int(total_words * len(doc) / sample_size),
|
||||
"total_pages": total_pages,
|
||||
"estimated_word_count": int(total_words * total_pages / sample_size),
|
||||
"avg_words_per_page": round(avg_words_per_page, 1),
|
||||
"vocabulary_diversity": round(vocabulary_diversity, 2),
|
||||
"reading_level": reading_level,
|
||||
@ -211,15 +212,16 @@ class ContentAnalysisMixin(MCPMixin):
|
||||
try:
|
||||
path = await validate_pdf_path(pdf_path)
|
||||
doc = fitz.open(str(path))
|
||||
total_pages = len(doc)
|
||||
|
||||
# Parse pages parameter
|
||||
parsed_pages = parse_pages_parameter(pages)
|
||||
page_numbers = parsed_pages if parsed_pages else list(range(len(doc)))
|
||||
page_numbers = [p for p in page_numbers if 0 <= p < len(doc)]
|
||||
page_numbers = parsed_pages if parsed_pages else list(range(total_pages))
|
||||
page_numbers = [p for p in page_numbers if 0 <= p < total_pages]
|
||||
|
||||
# If parsing failed but pages was specified, use all pages
|
||||
if pages and not page_numbers:
|
||||
page_numbers = list(range(len(doc)))
|
||||
page_numbers = list(range(total_pages))
|
||||
|
||||
# Extract text from specified pages
|
||||
full_text = ""
|
||||
@ -313,7 +315,7 @@ class ContentAnalysisMixin(MCPMixin):
|
||||
},
|
||||
"file_info": {
|
||||
"path": str(path),
|
||||
"total_pages": len(doc),
|
||||
"total_pages": total_pages,
|
||||
"pages_processed": pages or "all"
|
||||
},
|
||||
"analysis_time": round(time.time() - start_time, 2)
|
||||
@ -354,17 +356,18 @@ class ContentAnalysisMixin(MCPMixin):
|
||||
try:
|
||||
path = await validate_pdf_path(pdf_path)
|
||||
doc = fitz.open(str(path))
|
||||
total_pages = len(doc)
|
||||
|
||||
# Parse pages parameter
|
||||
parsed_pages = parse_pages_parameter(pages)
|
||||
if parsed_pages:
|
||||
page_numbers = [p for p in parsed_pages if 0 <= p < len(doc)]
|
||||
page_numbers = [p for p in parsed_pages if 0 <= p < total_pages]
|
||||
else:
|
||||
page_numbers = list(range(min(5, len(doc)))) # Limit to 5 pages for performance
|
||||
page_numbers = list(range(min(5, total_pages))) # Limit to 5 pages for performance
|
||||
|
||||
# If parsing failed but pages was specified, default to first 5
|
||||
if pages and not page_numbers:
|
||||
page_numbers = list(range(min(5, len(doc))))
|
||||
page_numbers = list(range(min(5, total_pages)))
|
||||
|
||||
layout_analysis = []
|
||||
|
||||
@ -513,7 +516,7 @@ class ContentAnalysisMixin(MCPMixin):
|
||||
},
|
||||
"file_info": {
|
||||
"path": str(path),
|
||||
"total_pages": len(doc)
|
||||
"total_pages": total_pages
|
||||
},
|
||||
"analysis_time": round(time.time() - start_time, 2)
|
||||
}
|
||||
|
||||
@ -62,15 +62,16 @@ class MiscToolsMixin(MCPMixin):
|
||||
try:
|
||||
path = await validate_pdf_path(pdf_path)
|
||||
doc = fitz.open(str(path))
|
||||
total_pages = len(doc)
|
||||
|
||||
# Parse pages parameter
|
||||
parsed_pages = parse_pages_parameter(pages)
|
||||
page_numbers = parsed_pages if parsed_pages else list(range(len(doc)))
|
||||
page_numbers = [p for p in page_numbers if 0 <= p < len(doc)]
|
||||
page_numbers = parsed_pages if parsed_pages else list(range(total_pages))
|
||||
page_numbers = [p for p in page_numbers if 0 <= p < total_pages]
|
||||
|
||||
# If parsing failed but pages was specified, use all pages
|
||||
if pages and not page_numbers:
|
||||
page_numbers = list(range(len(doc)))
|
||||
page_numbers = list(range(total_pages))
|
||||
|
||||
all_links = []
|
||||
link_types = {"internal": 0, "external": 0, "email": 0, "other": 0}
|
||||
@ -169,7 +170,7 @@ class MiscToolsMixin(MCPMixin):
|
||||
},
|
||||
"file_info": {
|
||||
"path": str(path),
|
||||
"total_pages": len(doc),
|
||||
"total_pages": total_pages,
|
||||
"pages_processed": pages or "all"
|
||||
},
|
||||
"extraction_time": round(time.time() - start_time, 2)
|
||||
@ -210,15 +211,16 @@ class MiscToolsMixin(MCPMixin):
|
||||
try:
|
||||
path = await validate_pdf_path(pdf_path)
|
||||
doc = fitz.open(str(path))
|
||||
total_pages = len(doc)
|
||||
|
||||
# Parse pages parameter
|
||||
parsed_pages = parse_pages_parameter(pages)
|
||||
page_numbers = parsed_pages if parsed_pages else list(range(len(doc)))
|
||||
page_numbers = [p for p in page_numbers if 0 <= p < len(doc)]
|
||||
page_numbers = parsed_pages if parsed_pages else list(range(total_pages))
|
||||
page_numbers = [p for p in page_numbers if 0 <= p < total_pages]
|
||||
|
||||
# If parsing failed but pages was specified, use all pages
|
||||
if pages and not page_numbers:
|
||||
page_numbers = list(range(len(doc)))
|
||||
page_numbers = list(range(total_pages))
|
||||
|
||||
visual_elements = []
|
||||
charts_found = 0
|
||||
@ -326,7 +328,7 @@ class MiscToolsMixin(MCPMixin):
|
||||
},
|
||||
"file_info": {
|
||||
"path": str(path),
|
||||
"total_pages": len(doc)
|
||||
"total_pages": total_pages
|
||||
},
|
||||
"analysis_time": round(time.time() - start_time, 2)
|
||||
}
|
||||
|
||||
@ -225,6 +225,7 @@ class SecurityAnalysisMixin(MCPMixin):
|
||||
try:
|
||||
path = await validate_pdf_path(pdf_path)
|
||||
doc = fitz.open(str(path))
|
||||
total_pages = len(doc)
|
||||
|
||||
watermark_analysis = []
|
||||
total_watermarks = 0
|
||||
@ -310,7 +311,7 @@ class SecurityAnalysisMixin(MCPMixin):
|
||||
|
||||
# Watermark assessment
|
||||
has_watermarks = total_watermarks > 0
|
||||
watermark_density = total_watermarks / len(doc) if len(doc) > 0 else 0
|
||||
watermark_density = total_watermarks / total_pages if total_pages > 0 else 0
|
||||
|
||||
# Determine watermark pattern
|
||||
if watermark_density > 0.8:
|
||||
@ -334,7 +335,7 @@ class SecurityAnalysisMixin(MCPMixin):
|
||||
"page_analysis": watermark_analysis,
|
||||
"watermark_insights": {
|
||||
"pages_with_watermarks": len(watermark_analysis),
|
||||
"pages_without_watermarks": len(doc) - len(watermark_analysis),
|
||||
"pages_without_watermarks": total_pages - len(watermark_analysis),
|
||||
"most_common_type": max(watermark_types, key=watermark_types.get) if any(watermark_types.values()) else "none"
|
||||
},
|
||||
"recommendations": [
|
||||
@ -344,7 +345,7 @@ class SecurityAnalysisMixin(MCPMixin):
|
||||
] if has_watermarks else ["No watermarks detected"],
|
||||
"file_info": {
|
||||
"path": str(path),
|
||||
"total_pages": len(doc)
|
||||
"total_pages": total_pages
|
||||
},
|
||||
"analysis_time": round(time.time() - start_time, 2)
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user