diff --git a/pyproject.toml b/pyproject.toml index 6a063e3..e4cc232 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mcp-pdf" -version = "2.1.5" +version = "2.1.6" description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more" authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}] readme = "README.md" diff --git a/src/mcp_pdf/mixins_official/structure_detection.py b/src/mcp_pdf/mixins_official/structure_detection.py index 0370d08..3d28eca 100644 --- a/src/mcp_pdf/mixins_official/structure_detection.py +++ b/src/mcp_pdf/mixins_official/structure_detection.py @@ -826,9 +826,9 @@ class StructureDetectionMixin(MCPMixin): "error": ( f"No boundaries found at level <= {split_level} with " f"confidence >= {min_confidence}. Try lowering min_confidence " - f"or increasing split_level." + f"or increasing split_level. " + f"({len(flat_boundaries)} total boundaries detected)" ), - "detected_structure": structure_result["structure"], "split_time": round(time.time() - start_time, 2), } @@ -897,16 +897,10 @@ class StructureDetectionMixin(MCPMixin): except OSError: pass - sections_results.append({ - "title": title, - "page_start": page_start, - "page_end": page_end, - "directory": str(section_dir), - "pdf_path": str(section_pdf_path) if section_pdf_path else None, - "markdown_path": str(md_path) if md_path else None, - "images_extracted": images_extracted, - "vectors_extracted": vectors_extracted, - }) + sections_results.append( + f"p{page_start}-{page_end}: {title[:60]} " + f"({images_extracted} img, {vectors_extracted} vec)" + ) source_doc.close() @@ -915,7 +909,6 @@ class StructureDetectionMixin(MCPMixin): "sections_created": len(sections_results), "output_directory": str(output_dir), "sections": sections_results, - "detected_structure": structure_result["structure"], "split_time": round(time.time() - start_time, 2), } diff --git a/src/mcp_pdf/mixins_official/text_extraction.py b/src/mcp_pdf/mixins_official/text_extraction.py index c78a357..15a72f9 100644 --- a/src/mcp_pdf/mixins_official/text_extraction.py +++ b/src/mcp_pdf/mixins_official/text_extraction.py @@ -195,7 +195,11 @@ class TextExtractionMixin(MCPMixin): @mcp_tool( name="ocr_pdf", - description="Perform OCR on scanned PDFs with preprocessing options" + description=( + "Perform OCR on scanned PDFs. By default writes extracted text " + "to a .txt file and returns the path with a short preview. " + "Set inline=True to return full OCR text in the response." + ) ) async def ocr_pdf( self, @@ -203,7 +207,9 @@ class TextExtractionMixin(MCPMixin): pages: Optional[str] = None, languages: List[str] = ["eng"], dpi: int = 300, - preprocess: bool = True + preprocess: bool = True, + output_directory: Optional[str] = None, + inline: bool = False, ) -> Dict[str, Any]: """ Perform OCR on scanned PDF pages. @@ -214,9 +220,14 @@ class TextExtractionMixin(MCPMixin): languages: List of language codes for OCR dpi: DPI for image rendering preprocess: Whether to preprocess images for better OCR + output_directory: Directory for the OCR text file. + Defaults to a temp directory. + inline: If True, return full OCR text in the response. + Default: False (write to file, return path + preview). Returns: - Dictionary containing OCR results + Dictionary containing OCR file path and summary, or full text + if inline=True """ start_time = time.time() @@ -294,25 +305,54 @@ class TextExtractionMixin(MCPMixin): # Calculate overall statistics successful_pages = [r for r in ocr_results if "error" not in r] avg_confidence = sum(r["confidence"] for r in successful_pages) / len(successful_pages) if successful_pages else 0 + full_text = "\n\n".join(total_text) + word_count = len(full_text.split()) + elapsed = round(time.time() - start_time, 2) + + # ── Inline mode: return everything in the response ── + if inline: + return { + "success": True, + "text": full_text, + "pages_processed": len(pages_to_process), + "pages_successful": len(successful_pages), + "overall_confidence": round(avg_confidence, 2), + "page_results": ocr_results, + "ocr_time": elapsed, + } + + # ── File-first mode (default): write text, return summary ── + if output_directory: + out_dir = Path(validate_output_path(output_directory)) + else: + out_dir = Path(tempfile.mkdtemp(prefix="pdf_ocr_")) + out_dir.mkdir(parents=True, exist_ok=True) + + output_filename = f"{path.stem}_ocr.txt" + output_path = out_dir / output_filename + output_path.write_text(full_text, encoding="utf-8") + + # Build preview (first ~500 chars at sentence boundary) + preview = full_text[:500] + if len(full_text) > 500: + last_period = preview.rfind(".") + if last_period > 300: + preview = preview[:last_period + 1] + preview += " [...]" return { "success": True, - "text": "\n\n".join(total_text), - "pages_processed": len(pages_to_process), - "pages_successful": len(successful_pages), - "pages_failed": len(pages_to_process) - len(successful_pages), - "overall_confidence": round(avg_confidence, 2), - "page_results": ocr_results, - "ocr_settings": { - "languages": languages, - "dpi": dpi, - "preprocessing": preprocess + "output_file": str(output_path), + "text_preview": preview, + "ocr_summary": { + "word_count": word_count, + "character_count": len(full_text), + "pages_processed": len(pages_to_process), + "pages_successful": len(successful_pages), + "pages_failed": len(pages_to_process) - len(successful_pages), + "overall_confidence": round(avg_confidence, 2), }, - "file_info": { - "path": str(path), - "total_pages": total_pages - }, - "ocr_time": round(time.time() - start_time, 2) + "ocr_time": elapsed, } except Exception as e: diff --git a/uv.lock b/uv.lock index 8d823d4..1b184a9 100644 --- a/uv.lock +++ b/uv.lock @@ -1032,7 +1032,7 @@ wheels = [ [[package]] name = "mcp-pdf" -version = "2.1.4" +version = "2.1.5" source = { editable = "." } dependencies = [ { name = "fastmcp" },