🔧 Add permit_forms with lazy reportlab imports

Coordinate-based PDF form filling for scanned/flat PDFs.
reportlab is now optional - only loaded when permit tools used.

Install with: pip install mcp-pdf[forms]
This commit is contained in:
Ryan Malloy 2026-02-08 13:59:02 -07:00
parent 271e4c71d6
commit febe6dae13
4 changed files with 80 additions and 22 deletions

View File

@ -6,7 +6,7 @@
**A FastMCP server for PDF processing**
*41 tools for text extraction, OCR, tables, forms, annotations, and more*
*46 tools for text extraction, OCR, tables, forms, annotations, and more*
[![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg?style=flat-square)](https://www.python.org/downloads/)
[![FastMCP](https://img.shields.io/badge/FastMCP-2.0+-green.svg?style=flat-square)](https://github.com/jlowin/fastmcp)
@ -98,6 +98,20 @@ uv run python examples/verify_installation.py
| `create_form_pdf` | Create new forms with text fields, checkboxes, dropdowns |
| `add_form_fields` | Add fields to existing PDFs |
### Permit Forms (Coordinate-Based)
For scanned PDFs or forms without interactive fields. Draws text at (x, y) coordinates.
| Tool | What it does |
|------|-------------|
| `fill_permit_form` | Fill any PDF by drawing at coordinates (works with scanned forms) |
| `get_field_schema` | Get field definitions for validation or UI generation |
| `validate_permit_form_data` | Check data against field schema before filling |
| `preview_field_positions` | Generate PDF showing field boundaries (debugging) |
| `insert_attachment_pages` | Insert image/text pages with "See page X" references |
**Requires:** `pip install mcp-pdf[forms]` (adds reportlab dependency)
### Document Assembly
| Tool | What it does |

View File

@ -1,6 +1,6 @@
[project]
name = "mcp-pdf"
version = "2.0.9"
version = "2.0.10"
description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more"
authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}]
readme = "README.md"

View File

@ -6,7 +6,10 @@ This mixin enables filling ANY PDF (scanned, flat, non-interactive) by drawing
text and checkboxes at specified (x, y) coordinates, then merging the overlay
with the original template. This is ideal for government forms that don't have
proper AcroForm fields.
Requires: pip install mcp-pdf[forms]
"""
from __future__ import annotations
import base64
import io
@ -15,15 +18,11 @@ import time
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import Any, Dict, List, Optional, BinaryIO
from typing import Any, Dict, List, Optional, BinaryIO, TYPE_CHECKING
import logging
# PDF processing libraries
# PDF processing libraries (always available)
from pypdf import PdfReader, PdfWriter
from reportlab.lib.pagesizes import letter
from reportlab.lib.units import inch
from reportlab.lib.utils import ImageReader
from reportlab.pdfgen import canvas
from PIL import Image
# Official FastMCP mixin
@ -33,19 +32,55 @@ from ..security import validate_pdf_path, validate_output_path, sanitize_error_m
logger = logging.getLogger(__name__)
# Lazy import for reportlab (optional dependency)
_reportlab_available = None
def _check_reportlab():
"""Check if reportlab is available, raise helpful error if not."""
global _reportlab_available
if _reportlab_available is None:
try:
from reportlab.lib.pagesizes import letter
from reportlab.lib.units import inch
from reportlab.lib.utils import ImageReader
from reportlab.pdfgen import canvas
_reportlab_available = True
except ImportError:
_reportlab_available = False
if not _reportlab_available:
raise ImportError(
"reportlab is required for permit form tools. "
"Install with: pip install mcp-pdf[forms]"
)
def _get_reportlab():
"""Get reportlab modules, raising error if not available."""
_check_reportlab()
from reportlab.lib.pagesizes import letter
from reportlab.lib.units import inch
from reportlab.lib.utils import ImageReader
from reportlab.pdfgen import canvas
return {
'letter': letter,
'inch': inch,
'ImageReader': ImageReader,
'canvas': canvas,
}
# Page dimensions: 612 x 792 points (letter size)
# Y coordinates in PDF are from bottom, so we convert from top-origin
PAGE_HEIGHT = 792
PAGE_WIDTH = 612
# Margins for attachment pages
MARGIN_TOP = 0.75 * inch
MARGIN_BOTTOM = 0.5 * inch
MARGIN_LEFT = 0.5 * inch
MARGIN_RIGHT = 0.5 * inch
# Margins for attachment pages (in points, 72 points = 1 inch)
MARGIN_TOP = 54 # 0.75 inch
MARGIN_BOTTOM = 36 # 0.5 inch
MARGIN_LEFT = 36 # 0.5 inch
MARGIN_RIGHT = 36 # 0.5 inch
# Header styling for attachment pages
HEADER_HEIGHT = 0.5 * inch
HEADER_HEIGHT = 36 # 0.5 inch
HEADER_FONT_SIZE = 14
@ -295,8 +330,9 @@ def _create_attachment_page_with_image(
Returns:
BytesIO buffer containing the single-page PDF
"""
rl = _get_reportlab()
buffer = io.BytesIO()
c = canvas.Canvas(buffer, pagesize=letter)
c = rl['canvas'].Canvas(buffer, pagesize=rl['letter'])
# Calculate content area
content_top = PAGE_HEIGHT - MARGIN_TOP
@ -361,7 +397,7 @@ def _create_attachment_page_with_image(
# Draw the image
img_buffer.seek(0)
img_reader = ImageReader(img_buffer)
img_reader = rl['ImageReader'](img_buffer)
c.drawImage(
img_reader,
draw_x, draw_y,
@ -395,8 +431,9 @@ def _create_attachment_page_with_text(
Returns:
BytesIO buffer containing the single-page PDF
"""
rl = _get_reportlab()
buffer = io.BytesIO()
c = canvas.Canvas(buffer, pagesize=letter)
c = rl['canvas'].Canvas(buffer, pagesize=rl['letter'])
content_top = PAGE_HEIGHT - MARGIN_TOP
content_bottom = MARGIN_BOTTOM
@ -473,8 +510,9 @@ def _create_see_page_annotation(
Returns:
BytesIO buffer containing a single-page PDF with the annotation
"""
rl = _get_reportlab()
buffer = io.BytesIO()
c = canvas.Canvas(buffer, pagesize=letter)
c = rl['canvas'].Canvas(buffer, pagesize=rl['letter'])
# Convert y from top-down to PDF bottom-up coordinates
pdf_y = PAGE_HEIGHT - y - height
@ -514,8 +552,9 @@ def _create_page_overlay(
page_num: int,
) -> io.BytesIO:
"""Create overlay for a specific page with form data."""
rl = _get_reportlab()
buffer = io.BytesIO()
c = canvas.Canvas(buffer, pagesize=letter)
c = rl['canvas'].Canvas(buffer, pagesize=rl['letter'])
c.setFont("Helvetica", 9)
# Get fields for this page
@ -919,6 +958,9 @@ Returns:
start_time = time.time()
try:
# Get reportlab (optional dependency)
rl = _get_reportlab()
# Validate template path
template_file = await validate_pdf_path(template_path)
@ -937,7 +979,7 @@ Returns:
# Create overlay with field boxes
buffer = io.BytesIO()
c = canvas.Canvas(buffer, pagesize=letter)
c = rl['canvas'].Canvas(buffer, pagesize=rl['letter'])
# Semi-transparent red for boxes
c.setStrokeColorRGB(1, 0, 0) # Red stroke

View File

@ -24,6 +24,7 @@ from .mixins_official.security_analysis import SecurityAnalysisMixin
from .mixins_official.content_analysis import ContentAnalysisMixin
from .mixins_official.pdf_utilities import PDFUtilitiesMixin
from .mixins_official.misc_tools import MiscToolsMixin
from .mixins_official.permit_forms import PermitFormMixin
# Configure logging
logging.basicConfig(level=logging.INFO)
@ -79,6 +80,7 @@ class PDFServerOfficial:
ContentAnalysisMixin,
PDFUtilitiesMixin,
MiscToolsMixin,
PermitFormMixin,
]
for mixin_class in mixin_classes:
@ -105,7 +107,7 @@ class PDFServerOfficial:
"""Get detailed server information including mixins and configuration"""
return {
"server_name": "MCP PDF Tools (Official FastMCP Pattern)",
"version": "2.0.7",
"version": "2.0.10",
"architecture": "Official FastMCP Mixin Pattern",
"total_mixins": len(self.mixins),
"mixins": [
@ -160,7 +162,7 @@ def main():
from importlib.metadata import version
package_version = version("mcp-pdf")
except:
package_version = "2.0.7"
package_version = "2.0.10"
logger.info(f"🎬 MCP PDF Tools Server v{package_version} (Official Pattern)")