🔧 Add permit_forms with lazy reportlab imports
Coordinate-based PDF form filling for scanned/flat PDFs. reportlab is now optional - only loaded when permit tools used. Install with: pip install mcp-pdf[forms]
This commit is contained in:
parent
271e4c71d6
commit
febe6dae13
16
README.md
16
README.md
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
**A FastMCP server for PDF processing**
|
**A FastMCP server for PDF processing**
|
||||||
|
|
||||||
*41 tools for text extraction, OCR, tables, forms, annotations, and more*
|
*46 tools for text extraction, OCR, tables, forms, annotations, and more*
|
||||||
|
|
||||||
[](https://www.python.org/downloads/)
|
[](https://www.python.org/downloads/)
|
||||||
[](https://github.com/jlowin/fastmcp)
|
[](https://github.com/jlowin/fastmcp)
|
||||||
@ -98,6 +98,20 @@ uv run python examples/verify_installation.py
|
|||||||
| `create_form_pdf` | Create new forms with text fields, checkboxes, dropdowns |
|
| `create_form_pdf` | Create new forms with text fields, checkboxes, dropdowns |
|
||||||
| `add_form_fields` | Add fields to existing PDFs |
|
| `add_form_fields` | Add fields to existing PDFs |
|
||||||
|
|
||||||
|
### Permit Forms (Coordinate-Based)
|
||||||
|
|
||||||
|
For scanned PDFs or forms without interactive fields. Draws text at (x, y) coordinates.
|
||||||
|
|
||||||
|
| Tool | What it does |
|
||||||
|
|------|-------------|
|
||||||
|
| `fill_permit_form` | Fill any PDF by drawing at coordinates (works with scanned forms) |
|
||||||
|
| `get_field_schema` | Get field definitions for validation or UI generation |
|
||||||
|
| `validate_permit_form_data` | Check data against field schema before filling |
|
||||||
|
| `preview_field_positions` | Generate PDF showing field boundaries (debugging) |
|
||||||
|
| `insert_attachment_pages` | Insert image/text pages with "See page X" references |
|
||||||
|
|
||||||
|
**Requires:** `pip install mcp-pdf[forms]` (adds reportlab dependency)
|
||||||
|
|
||||||
### Document Assembly
|
### Document Assembly
|
||||||
|
|
||||||
| Tool | What it does |
|
| Tool | What it does |
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "mcp-pdf"
|
name = "mcp-pdf"
|
||||||
version = "2.0.9"
|
version = "2.0.10"
|
||||||
description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more"
|
description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more"
|
||||||
authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}]
|
authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|||||||
@ -6,7 +6,10 @@ This mixin enables filling ANY PDF (scanned, flat, non-interactive) by drawing
|
|||||||
text and checkboxes at specified (x, y) coordinates, then merging the overlay
|
text and checkboxes at specified (x, y) coordinates, then merging the overlay
|
||||||
with the original template. This is ideal for government forms that don't have
|
with the original template. This is ideal for government forms that don't have
|
||||||
proper AcroForm fields.
|
proper AcroForm fields.
|
||||||
|
|
||||||
|
Requires: pip install mcp-pdf[forms]
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
@ -15,15 +18,11 @@ import time
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, BinaryIO
|
from typing import Any, Dict, List, Optional, BinaryIO, TYPE_CHECKING
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
# PDF processing libraries
|
# PDF processing libraries (always available)
|
||||||
from pypdf import PdfReader, PdfWriter
|
from pypdf import PdfReader, PdfWriter
|
||||||
from reportlab.lib.pagesizes import letter
|
|
||||||
from reportlab.lib.units import inch
|
|
||||||
from reportlab.lib.utils import ImageReader
|
|
||||||
from reportlab.pdfgen import canvas
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
# Official FastMCP mixin
|
# Official FastMCP mixin
|
||||||
@ -33,19 +32,55 @@ from ..security import validate_pdf_path, validate_output_path, sanitize_error_m
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Lazy import for reportlab (optional dependency)
|
||||||
|
_reportlab_available = None
|
||||||
|
|
||||||
|
def _check_reportlab():
|
||||||
|
"""Check if reportlab is available, raise helpful error if not."""
|
||||||
|
global _reportlab_available
|
||||||
|
if _reportlab_available is None:
|
||||||
|
try:
|
||||||
|
from reportlab.lib.pagesizes import letter
|
||||||
|
from reportlab.lib.units import inch
|
||||||
|
from reportlab.lib.utils import ImageReader
|
||||||
|
from reportlab.pdfgen import canvas
|
||||||
|
_reportlab_available = True
|
||||||
|
except ImportError:
|
||||||
|
_reportlab_available = False
|
||||||
|
|
||||||
|
if not _reportlab_available:
|
||||||
|
raise ImportError(
|
||||||
|
"reportlab is required for permit form tools. "
|
||||||
|
"Install with: pip install mcp-pdf[forms]"
|
||||||
|
)
|
||||||
|
|
||||||
|
def _get_reportlab():
|
||||||
|
"""Get reportlab modules, raising error if not available."""
|
||||||
|
_check_reportlab()
|
||||||
|
from reportlab.lib.pagesizes import letter
|
||||||
|
from reportlab.lib.units import inch
|
||||||
|
from reportlab.lib.utils import ImageReader
|
||||||
|
from reportlab.pdfgen import canvas
|
||||||
|
return {
|
||||||
|
'letter': letter,
|
||||||
|
'inch': inch,
|
||||||
|
'ImageReader': ImageReader,
|
||||||
|
'canvas': canvas,
|
||||||
|
}
|
||||||
|
|
||||||
# Page dimensions: 612 x 792 points (letter size)
|
# Page dimensions: 612 x 792 points (letter size)
|
||||||
# Y coordinates in PDF are from bottom, so we convert from top-origin
|
# Y coordinates in PDF are from bottom, so we convert from top-origin
|
||||||
PAGE_HEIGHT = 792
|
PAGE_HEIGHT = 792
|
||||||
PAGE_WIDTH = 612
|
PAGE_WIDTH = 612
|
||||||
|
|
||||||
# Margins for attachment pages
|
# Margins for attachment pages (in points, 72 points = 1 inch)
|
||||||
MARGIN_TOP = 0.75 * inch
|
MARGIN_TOP = 54 # 0.75 inch
|
||||||
MARGIN_BOTTOM = 0.5 * inch
|
MARGIN_BOTTOM = 36 # 0.5 inch
|
||||||
MARGIN_LEFT = 0.5 * inch
|
MARGIN_LEFT = 36 # 0.5 inch
|
||||||
MARGIN_RIGHT = 0.5 * inch
|
MARGIN_RIGHT = 36 # 0.5 inch
|
||||||
|
|
||||||
# Header styling for attachment pages
|
# Header styling for attachment pages
|
||||||
HEADER_HEIGHT = 0.5 * inch
|
HEADER_HEIGHT = 36 # 0.5 inch
|
||||||
HEADER_FONT_SIZE = 14
|
HEADER_FONT_SIZE = 14
|
||||||
|
|
||||||
|
|
||||||
@ -295,8 +330,9 @@ def _create_attachment_page_with_image(
|
|||||||
Returns:
|
Returns:
|
||||||
BytesIO buffer containing the single-page PDF
|
BytesIO buffer containing the single-page PDF
|
||||||
"""
|
"""
|
||||||
|
rl = _get_reportlab()
|
||||||
buffer = io.BytesIO()
|
buffer = io.BytesIO()
|
||||||
c = canvas.Canvas(buffer, pagesize=letter)
|
c = rl['canvas'].Canvas(buffer, pagesize=rl['letter'])
|
||||||
|
|
||||||
# Calculate content area
|
# Calculate content area
|
||||||
content_top = PAGE_HEIGHT - MARGIN_TOP
|
content_top = PAGE_HEIGHT - MARGIN_TOP
|
||||||
@ -361,7 +397,7 @@ def _create_attachment_page_with_image(
|
|||||||
|
|
||||||
# Draw the image
|
# Draw the image
|
||||||
img_buffer.seek(0)
|
img_buffer.seek(0)
|
||||||
img_reader = ImageReader(img_buffer)
|
img_reader = rl['ImageReader'](img_buffer)
|
||||||
c.drawImage(
|
c.drawImage(
|
||||||
img_reader,
|
img_reader,
|
||||||
draw_x, draw_y,
|
draw_x, draw_y,
|
||||||
@ -395,8 +431,9 @@ def _create_attachment_page_with_text(
|
|||||||
Returns:
|
Returns:
|
||||||
BytesIO buffer containing the single-page PDF
|
BytesIO buffer containing the single-page PDF
|
||||||
"""
|
"""
|
||||||
|
rl = _get_reportlab()
|
||||||
buffer = io.BytesIO()
|
buffer = io.BytesIO()
|
||||||
c = canvas.Canvas(buffer, pagesize=letter)
|
c = rl['canvas'].Canvas(buffer, pagesize=rl['letter'])
|
||||||
|
|
||||||
content_top = PAGE_HEIGHT - MARGIN_TOP
|
content_top = PAGE_HEIGHT - MARGIN_TOP
|
||||||
content_bottom = MARGIN_BOTTOM
|
content_bottom = MARGIN_BOTTOM
|
||||||
@ -473,8 +510,9 @@ def _create_see_page_annotation(
|
|||||||
Returns:
|
Returns:
|
||||||
BytesIO buffer containing a single-page PDF with the annotation
|
BytesIO buffer containing a single-page PDF with the annotation
|
||||||
"""
|
"""
|
||||||
|
rl = _get_reportlab()
|
||||||
buffer = io.BytesIO()
|
buffer = io.BytesIO()
|
||||||
c = canvas.Canvas(buffer, pagesize=letter)
|
c = rl['canvas'].Canvas(buffer, pagesize=rl['letter'])
|
||||||
|
|
||||||
# Convert y from top-down to PDF bottom-up coordinates
|
# Convert y from top-down to PDF bottom-up coordinates
|
||||||
pdf_y = PAGE_HEIGHT - y - height
|
pdf_y = PAGE_HEIGHT - y - height
|
||||||
@ -514,8 +552,9 @@ def _create_page_overlay(
|
|||||||
page_num: int,
|
page_num: int,
|
||||||
) -> io.BytesIO:
|
) -> io.BytesIO:
|
||||||
"""Create overlay for a specific page with form data."""
|
"""Create overlay for a specific page with form data."""
|
||||||
|
rl = _get_reportlab()
|
||||||
buffer = io.BytesIO()
|
buffer = io.BytesIO()
|
||||||
c = canvas.Canvas(buffer, pagesize=letter)
|
c = rl['canvas'].Canvas(buffer, pagesize=rl['letter'])
|
||||||
c.setFont("Helvetica", 9)
|
c.setFont("Helvetica", 9)
|
||||||
|
|
||||||
# Get fields for this page
|
# Get fields for this page
|
||||||
@ -919,6 +958,9 @@ Returns:
|
|||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# Get reportlab (optional dependency)
|
||||||
|
rl = _get_reportlab()
|
||||||
|
|
||||||
# Validate template path
|
# Validate template path
|
||||||
template_file = await validate_pdf_path(template_path)
|
template_file = await validate_pdf_path(template_path)
|
||||||
|
|
||||||
@ -937,7 +979,7 @@ Returns:
|
|||||||
|
|
||||||
# Create overlay with field boxes
|
# Create overlay with field boxes
|
||||||
buffer = io.BytesIO()
|
buffer = io.BytesIO()
|
||||||
c = canvas.Canvas(buffer, pagesize=letter)
|
c = rl['canvas'].Canvas(buffer, pagesize=rl['letter'])
|
||||||
|
|
||||||
# Semi-transparent red for boxes
|
# Semi-transparent red for boxes
|
||||||
c.setStrokeColorRGB(1, 0, 0) # Red stroke
|
c.setStrokeColorRGB(1, 0, 0) # Red stroke
|
||||||
|
|||||||
@ -24,6 +24,7 @@ from .mixins_official.security_analysis import SecurityAnalysisMixin
|
|||||||
from .mixins_official.content_analysis import ContentAnalysisMixin
|
from .mixins_official.content_analysis import ContentAnalysisMixin
|
||||||
from .mixins_official.pdf_utilities import PDFUtilitiesMixin
|
from .mixins_official.pdf_utilities import PDFUtilitiesMixin
|
||||||
from .mixins_official.misc_tools import MiscToolsMixin
|
from .mixins_official.misc_tools import MiscToolsMixin
|
||||||
|
from .mixins_official.permit_forms import PermitFormMixin
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
@ -79,6 +80,7 @@ class PDFServerOfficial:
|
|||||||
ContentAnalysisMixin,
|
ContentAnalysisMixin,
|
||||||
PDFUtilitiesMixin,
|
PDFUtilitiesMixin,
|
||||||
MiscToolsMixin,
|
MiscToolsMixin,
|
||||||
|
PermitFormMixin,
|
||||||
]
|
]
|
||||||
|
|
||||||
for mixin_class in mixin_classes:
|
for mixin_class in mixin_classes:
|
||||||
@ -105,7 +107,7 @@ class PDFServerOfficial:
|
|||||||
"""Get detailed server information including mixins and configuration"""
|
"""Get detailed server information including mixins and configuration"""
|
||||||
return {
|
return {
|
||||||
"server_name": "MCP PDF Tools (Official FastMCP Pattern)",
|
"server_name": "MCP PDF Tools (Official FastMCP Pattern)",
|
||||||
"version": "2.0.7",
|
"version": "2.0.10",
|
||||||
"architecture": "Official FastMCP Mixin Pattern",
|
"architecture": "Official FastMCP Mixin Pattern",
|
||||||
"total_mixins": len(self.mixins),
|
"total_mixins": len(self.mixins),
|
||||||
"mixins": [
|
"mixins": [
|
||||||
@ -160,7 +162,7 @@ def main():
|
|||||||
from importlib.metadata import version
|
from importlib.metadata import version
|
||||||
package_version = version("mcp-pdf")
|
package_version = version("mcp-pdf")
|
||||||
except:
|
except:
|
||||||
package_version = "2.0.7"
|
package_version = "2.0.10"
|
||||||
|
|
||||||
logger.info(f"🎬 MCP PDF Tools Server v{package_version} (Official Pattern)")
|
logger.info(f"🎬 MCP PDF Tools Server v{package_version} (Official Pattern)")
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user