📉 File-first output for ocr_pdf, slim split_pdf_by_structure response

ocr_pdf: writes OCR text to file by default, returns path + preview
instead of full text dump (~17k tokens → ~500 tokens). inline=True
for old behavior.

split_pdf_by_structure: sections are now one-line summaries instead
of full path objects. Removed detected_structure dump from response.
This commit is contained in:
Ryan Malloy 2026-03-08 05:30:57 -06:00
parent d413438fea
commit 057aa5be40
4 changed files with 66 additions and 33 deletions

View File

@ -1,6 +1,6 @@
[project]
name = "mcp-pdf"
version = "2.1.5"
version = "2.1.6"
description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more"
authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}]
readme = "README.md"

View File

@ -826,9 +826,9 @@ class StructureDetectionMixin(MCPMixin):
"error": (
f"No boundaries found at level <= {split_level} with "
f"confidence >= {min_confidence}. Try lowering min_confidence "
f"or increasing split_level."
f"or increasing split_level. "
f"({len(flat_boundaries)} total boundaries detected)"
),
"detected_structure": structure_result["structure"],
"split_time": round(time.time() - start_time, 2),
}
@ -897,16 +897,10 @@ class StructureDetectionMixin(MCPMixin):
except OSError:
pass
sections_results.append({
"title": title,
"page_start": page_start,
"page_end": page_end,
"directory": str(section_dir),
"pdf_path": str(section_pdf_path) if section_pdf_path else None,
"markdown_path": str(md_path) if md_path else None,
"images_extracted": images_extracted,
"vectors_extracted": vectors_extracted,
})
sections_results.append(
f"p{page_start}-{page_end}: {title[:60]} "
f"({images_extracted} img, {vectors_extracted} vec)"
)
source_doc.close()
@ -915,7 +909,6 @@ class StructureDetectionMixin(MCPMixin):
"sections_created": len(sections_results),
"output_directory": str(output_dir),
"sections": sections_results,
"detected_structure": structure_result["structure"],
"split_time": round(time.time() - start_time, 2),
}

View File

@ -195,7 +195,11 @@ class TextExtractionMixin(MCPMixin):
@mcp_tool(
name="ocr_pdf",
description="Perform OCR on scanned PDFs with preprocessing options"
description=(
"Perform OCR on scanned PDFs. By default writes extracted text "
"to a .txt file and returns the path with a short preview. "
"Set inline=True to return full OCR text in the response."
)
)
async def ocr_pdf(
self,
@ -203,7 +207,9 @@ class TextExtractionMixin(MCPMixin):
pages: Optional[str] = None,
languages: List[str] = ["eng"],
dpi: int = 300,
preprocess: bool = True
preprocess: bool = True,
output_directory: Optional[str] = None,
inline: bool = False,
) -> Dict[str, Any]:
"""
Perform OCR on scanned PDF pages.
@ -214,9 +220,14 @@ class TextExtractionMixin(MCPMixin):
languages: List of language codes for OCR
dpi: DPI for image rendering
preprocess: Whether to preprocess images for better OCR
output_directory: Directory for the OCR text file.
Defaults to a temp directory.
inline: If True, return full OCR text in the response.
Default: False (write to file, return path + preview).
Returns:
Dictionary containing OCR results
Dictionary containing OCR file path and summary, or full text
if inline=True
"""
start_time = time.time()
@ -294,25 +305,54 @@ class TextExtractionMixin(MCPMixin):
# Calculate overall statistics
successful_pages = [r for r in ocr_results if "error" not in r]
avg_confidence = sum(r["confidence"] for r in successful_pages) / len(successful_pages) if successful_pages else 0
full_text = "\n\n".join(total_text)
word_count = len(full_text.split())
elapsed = round(time.time() - start_time, 2)
# ── Inline mode: return everything in the response ──
if inline:
return {
"success": True,
"text": full_text,
"pages_processed": len(pages_to_process),
"pages_successful": len(successful_pages),
"overall_confidence": round(avg_confidence, 2),
"page_results": ocr_results,
"ocr_time": elapsed,
}
# ── File-first mode (default): write text, return summary ──
if output_directory:
out_dir = Path(validate_output_path(output_directory))
else:
out_dir = Path(tempfile.mkdtemp(prefix="pdf_ocr_"))
out_dir.mkdir(parents=True, exist_ok=True)
output_filename = f"{path.stem}_ocr.txt"
output_path = out_dir / output_filename
output_path.write_text(full_text, encoding="utf-8")
# Build preview (first ~500 chars at sentence boundary)
preview = full_text[:500]
if len(full_text) > 500:
last_period = preview.rfind(".")
if last_period > 300:
preview = preview[:last_period + 1]
preview += " [...]"
return {
"success": True,
"text": "\n\n".join(total_text),
"output_file": str(output_path),
"text_preview": preview,
"ocr_summary": {
"word_count": word_count,
"character_count": len(full_text),
"pages_processed": len(pages_to_process),
"pages_successful": len(successful_pages),
"pages_failed": len(pages_to_process) - len(successful_pages),
"overall_confidence": round(avg_confidence, 2),
"page_results": ocr_results,
"ocr_settings": {
"languages": languages,
"dpi": dpi,
"preprocessing": preprocess
},
"file_info": {
"path": str(path),
"total_pages": total_pages
},
"ocr_time": round(time.time() - start_time, 2)
"ocr_time": elapsed,
}
except Exception as e:

2
uv.lock generated
View File

@ -1032,7 +1032,7 @@ wheels = [
[[package]]
name = "mcp-pdf"
version = "2.1.4"
version = "2.1.5"
source = { editable = "." }
dependencies = [
{ name = "fastmcp" },