Add file_content parameter for hosted HTTP transport

- Add MCP_ALLOW_LOCAL_FILES env var (default false for security)
- All tools now accept file_content (base64) for remote document upload
- Local file access blocked on hosted servers unless explicitly enabled
- Update docker-compose to set MCP_ALLOW_LOCAL_FILES=false
- Fix test assertions for updated function signatures
This commit is contained in:
Ryan Malloy 2026-01-20 18:47:19 -07:00
parent 483ed9121b
commit b0477103d5
7 changed files with 197 additions and 64 deletions

View File

@ -15,25 +15,24 @@ services:
- MCP_PORT=8000 - MCP_PORT=8000
- DEBUG=${DEBUG:-false} - DEBUG=${DEBUG:-false}
- OFFICE_TEMP_DIR=/tmp/mcwaddams - OFFICE_TEMP_DIR=/tmp/mcwaddams
# Security: Disable local file access for hosted server
# Clients must use file_content parameter to upload documents
- MCP_ALLOW_LOCAL_FILES=false
volumes: volumes:
# Temp directory for document processing # Temp directory for document processing
- mcwaddams-temp:/tmp/mcwaddams - mcwaddams-temp:/tmp/mcwaddams
networks: networks:
- caddy - caddy
labels: labels:
# Caddy-docker-proxy labels for /mcp endpoint # Caddy-docker-proxy labels - direct reverse proxy (no path stripping)
# MCP is served at /mcp on the backend
caddy: ${MCWADDAMS_HOST:-mcwaddams.l.supported.systems} caddy: ${MCWADDAMS_HOST:-mcwaddams.l.supported.systems}
caddy.@mcp.path: /mcp/* caddy.reverse_proxy: "{{upstreams 8000}}"
caddy.@mcp.path_strip: /mcp caddy.reverse_proxy.flush_interval: "-1"
caddy.handle: "@mcp" caddy.reverse_proxy.transport: "http"
caddy.handle.reverse_proxy: "{{upstreams 8000}}" caddy.reverse_proxy.transport.read_timeout: "0"
caddy.handle.reverse_proxy.flush_interval: "-1" caddy.reverse_proxy.transport.write_timeout: "0"
caddy.handle.reverse_proxy.transport: "http" caddy.reverse_proxy.stream_timeout: "24h"
caddy.handle.reverse_proxy.transport.read_timeout: "0"
caddy.handle.reverse_proxy.transport.write_timeout: "0"
caddy.handle.reverse_proxy.stream_timeout: "24h"
caddy.handle.reverse_proxy.header_up.Connection: "{http.request.header.Connection}"
caddy.handle.reverse_proxy.header_up.Upgrade: "{http.request.header.Upgrade}"
healthcheck: healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health')"] test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health')"]
interval: 30s interval: 30s

22
src/mcwaddams/mixins/excel.py Normal file → Executable file
View File

@ -17,6 +17,13 @@ from ..utils import (
) )
# Common field description for file_content parameter
FILE_CONTENT_DESC = (
"Base64-encoded file content (for hosted/HTTP transport). "
"When provided, file_path is used only for extension detection."
)
class ExcelMixin(MCPMixin): class ExcelMixin(MCPMixin):
"""Mixin containing Excel-specific tools for advanced spreadsheet processing.""" """Mixin containing Excel-specific tools for advanced spreadsheet processing."""
@ -34,6 +41,7 @@ class ExcelMixin(MCPMixin):
async def analyze_excel_data( async def analyze_excel_data(
self, self,
file_path: str = Field(description="Path to Excel document or URL"), file_path: str = Field(description="Path to Excel document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC),
sheet_names: List[str] = Field(default=[], description="Specific sheets to analyze (empty = all sheets)"), sheet_names: List[str] = Field(default=[], description="Specific sheets to analyze (empty = all sheets)"),
include_statistics: bool = Field(default=True, description="Include statistical analysis (mean, median, etc.)"), include_statistics: bool = Field(default=True, description="Include statistical analysis (mean, median, etc.)"),
detect_data_types: bool = Field(default=True, description="Analyze and detect optimal data types"), detect_data_types: bool = Field(default=True, description="Analyze and detect optimal data types"),
@ -42,8 +50,8 @@ class ExcelMixin(MCPMixin):
"""Analyze Excel data with comprehensive statistics and data quality assessment.""" """Analyze Excel data with comprehensive statistics and data quality assessment."""
start_time = time.time() start_time = time.time()
# Resolve and validate file # Resolve and validate file (download if URL, or decode if content provided)
resolved_path = await resolve_office_file_path(file_path) resolved_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
validation = await validate_office_file(resolved_path) validation = await validate_office_file(resolved_path)
if validation["category"] not in ["excel"]: if validation["category"] not in ["excel"]:
@ -178,6 +186,7 @@ class ExcelMixin(MCPMixin):
async def extract_excel_formulas( async def extract_excel_formulas(
self, self,
file_path: str = Field(description="Path to Excel document or URL"), file_path: str = Field(description="Path to Excel document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC),
sheet_names: List[str] = Field(default=[], description="Specific sheets to process (empty = all sheets)"), sheet_names: List[str] = Field(default=[], description="Specific sheets to process (empty = all sheets)"),
include_values: bool = Field(default=True, description="Include calculated values alongside formulas"), include_values: bool = Field(default=True, description="Include calculated values alongside formulas"),
analyze_dependencies: bool = Field(default=True, description="Analyze formula dependencies and references") analyze_dependencies: bool = Field(default=True, description="Analyze formula dependencies and references")
@ -186,8 +195,8 @@ class ExcelMixin(MCPMixin):
start_time = time.time() start_time = time.time()
import re import re
# Resolve and validate file # Resolve and validate file (download if URL, or decode if content provided)
resolved_path = await resolve_office_file_path(file_path) resolved_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
validation = await validate_office_file(resolved_path) validation = await validate_office_file(resolved_path)
if validation["category"] not in ["excel"] or validation["extension"] == ".csv": if validation["category"] not in ["excel"] or validation["extension"] == ".csv":
@ -288,6 +297,7 @@ class ExcelMixin(MCPMixin):
async def create_excel_chart_data( async def create_excel_chart_data(
self, self,
file_path: str = Field(description="Path to Excel document or URL"), file_path: str = Field(description="Path to Excel document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC),
sheet_name: str = Field(default="", description="Sheet to process (empty = first sheet)"), sheet_name: str = Field(default="", description="Sheet to process (empty = first sheet)"),
chart_type: str = Field(default="auto", description="Chart type: auto, bar, line, pie, scatter, histogram"), chart_type: str = Field(default="auto", description="Chart type: auto, bar, line, pie, scatter, histogram"),
x_column: str = Field(default="", description="Column for X-axis (empty = auto-detect)"), x_column: str = Field(default="", description="Column for X-axis (empty = auto-detect)"),
@ -297,8 +307,8 @@ class ExcelMixin(MCPMixin):
"""Generate chart-ready data and configurations from Excel spreadsheets.""" """Generate chart-ready data and configurations from Excel spreadsheets."""
start_time = time.time() start_time = time.time()
# Resolve and validate file # Resolve and validate file (download if URL, or decode if content provided)
resolved_path = await resolve_office_file_path(file_path) resolved_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
validation = await validate_office_file(resolved_path) validation = await validate_office_file(resolved_path)
if validation["category"] not in ["excel"]: if validation["category"] not in ["excel"]:

45
src/mcwaddams/mixins/universal.py Normal file → Executable file
View File

@ -1,7 +1,7 @@
"""Universal Office Tools Mixin - Format-agnostic tools that work across all Office document types.""" """Universal Office Tools Mixin - Format-agnostic tools that work across all Office document types."""
import time import time
from typing import Any from typing import Any, Optional
from fastmcp.contrib.mcp_mixin import MCPMixin, mcp_tool from fastmcp.contrib.mcp_mixin import MCPMixin, mcp_tool
from pydantic import Field from pydantic import Field
@ -17,6 +17,13 @@ from ..utils import (
from ..resources import resource_store, EmbeddedResource, ResourceStore from ..resources import resource_store, EmbeddedResource, ResourceStore
# Common field description for file_content parameter
FILE_CONTENT_DESC = (
"Base64-encoded file content (for hosted/HTTP transport). "
"When provided, file_path is used only for extension detection."
)
class UniversalMixin(MCPMixin): class UniversalMixin(MCPMixin):
"""Mixin containing format-agnostic tools that work across Word, Excel, PowerPoint, and CSV files.""" """Mixin containing format-agnostic tools that work across Word, Excel, PowerPoint, and CSV files."""
@ -27,6 +34,7 @@ class UniversalMixin(MCPMixin):
async def extract_text( async def extract_text(
self, self,
file_path: str = Field(description="Path to Office document or URL"), file_path: str = Field(description="Path to Office document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC),
preserve_formatting: bool = Field(default=False, description="Preserve text formatting and structure"), preserve_formatting: bool = Field(default=False, description="Preserve text formatting and structure"),
include_metadata: bool = Field(default=True, description="Include document metadata in output"), include_metadata: bool = Field(default=True, description="Include document metadata in output"),
method: str = Field(default="auto", description="Extraction method: auto, primary, fallback") method: str = Field(default="auto", description="Extraction method: auto, primary, fallback")
@ -34,8 +42,8 @@ class UniversalMixin(MCPMixin):
start_time = time.time() start_time = time.time()
try: try:
# Resolve file path (download if URL) # Resolve file path (download if URL, or decode if content provided)
local_path = await resolve_office_file_path(file_path) local_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
# Validate file # Validate file
validation = await validate_office_file(local_path) validation = await validate_office_file(local_path)
@ -85,6 +93,7 @@ class UniversalMixin(MCPMixin):
async def extract_images( async def extract_images(
self, self,
file_path: str = Field(description="Path to Office document or URL"), file_path: str = Field(description="Path to Office document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC),
min_width: int = Field(default=100, description="Minimum image width in pixels"), min_width: int = Field(default=100, description="Minimum image width in pixels"),
min_height: int = Field(default=100, description="Minimum image height in pixels"), min_height: int = Field(default=100, description="Minimum image height in pixels"),
output_format: str = Field(default="png", description="Output image format: png, jpg, jpeg"), output_format: str = Field(default="png", description="Output image format: png, jpg, jpeg"),
@ -93,8 +102,8 @@ class UniversalMixin(MCPMixin):
start_time = time.time() start_time = time.time()
try: try:
# Resolve file path # Resolve file path (download if URL, or decode if content provided)
local_path = await resolve_office_file_path(file_path) local_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
# Validate file # Validate file
validation = await validate_office_file(local_path) validation = await validate_office_file(local_path)
@ -135,13 +144,14 @@ class UniversalMixin(MCPMixin):
) )
async def extract_metadata( async def extract_metadata(
self, self,
file_path: str = Field(description="Path to Office document or URL") file_path: str = Field(description="Path to Office document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC)
) -> dict[str, Any]: ) -> dict[str, Any]:
start_time = time.time() start_time = time.time()
try: try:
# Resolve file path # Resolve file path (download if URL, or decode if content provided)
local_path = await resolve_office_file_path(file_path) local_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
# Validate file # Validate file
validation = await validate_office_file(local_path) validation = await validate_office_file(local_path)
@ -175,11 +185,12 @@ class UniversalMixin(MCPMixin):
) )
async def detect_office_format( async def detect_office_format(
self, self,
file_path: str = Field(description="Path to Office document or URL") file_path: str = Field(description="Path to Office document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC)
) -> dict[str, Any]: ) -> dict[str, Any]:
try: try:
# Resolve file path # Resolve file path (download if URL, or decode if content provided)
local_path = await resolve_office_file_path(file_path) local_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
# Get comprehensive format detection # Get comprehensive format detection
format_info = await detect_format(local_path) format_info = await detect_format(local_path)
@ -199,13 +210,14 @@ class UniversalMixin(MCPMixin):
) )
async def analyze_document_health( async def analyze_document_health(
self, self,
file_path: str = Field(description="Path to Office document or URL") file_path: str = Field(description="Path to Office document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC)
) -> dict[str, Any]: ) -> dict[str, Any]:
start_time = time.time() start_time = time.time()
try: try:
# Resolve file path # Resolve file path (download if URL, or decode if content provided)
local_path = await resolve_office_file_path(file_path) local_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
# Validate file thoroughly # Validate file thoroughly
validation = await validate_office_file(local_path) validation = await validate_office_file(local_path)
@ -350,6 +362,7 @@ class UniversalMixin(MCPMixin):
async def index_document( async def index_document(
self, self,
file_path: str = Field(description="Path to Office document or URL"), file_path: str = Field(description="Path to Office document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC),
include_images: bool = Field(default=True, description="Index embedded images"), include_images: bool = Field(default=True, description="Index embedded images"),
include_chapters: bool = Field(default=True, description="Index chapters/sections (Word docs)"), include_chapters: bool = Field(default=True, description="Index chapters/sections (Word docs)"),
include_sheets: bool = Field(default=True, description="Index sheets (Excel docs)"), include_sheets: bool = Field(default=True, description="Index sheets (Excel docs)"),
@ -362,8 +375,8 @@ class UniversalMixin(MCPMixin):
""" """
start_time = time.time() start_time = time.time()
# Resolve and validate # Resolve and validate (download if URL, or decode if content provided)
local_path = await resolve_office_file_path(file_path) local_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
validation = await validate_office_file(local_path) validation = await validate_office_file(local_path)
if not validation["is_valid"]: if not validation["is_valid"]:
raise OfficeFileError(f"Invalid file: {', '.join(validation['errors'])}") raise OfficeFileError(f"Invalid file: {', '.join(validation['errors'])}")

47
src/mcwaddams/mixins/word.py Normal file → Executable file
View File

@ -18,6 +18,13 @@ from ..utils import (
from ..pagination import paginate_document_conversion, PaginationParams from ..pagination import paginate_document_conversion, PaginationParams
# Common field description for file_content parameter
FILE_CONTENT_DESC = (
"Base64-encoded file content (for hosted/HTTP transport). "
"When provided, file_path is used only for extension detection."
)
class WordMixin(MCPMixin): class WordMixin(MCPMixin):
"""Mixin containing Word-specific tools for advanced document processing.""" """Mixin containing Word-specific tools for advanced document processing."""
@ -44,6 +51,7 @@ class WordMixin(MCPMixin):
async def convert_to_markdown( async def convert_to_markdown(
self, self,
file_path: str = Field(description="Path to Office document or URL"), file_path: str = Field(description="Path to Office document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC),
include_images: bool = Field(default=True, description="Include images in markdown output. When True, images are extracted to files and linked in the markdown."), include_images: bool = Field(default=True, description="Include images in markdown output. When True, images are extracted to files and linked in the markdown."),
image_mode: str = Field(default="files", description="Image handling mode: 'files' (default, saves to disk and links), 'base64' (embeds inline - WARNING: can create massive responses), or 'references' (metadata only, no content)"), image_mode: str = Field(default="files", description="Image handling mode: 'files' (default, saves to disk and links), 'base64' (embeds inline - WARNING: can create massive responses), or 'references' (metadata only, no content)"),
max_image_size: int = Field(default=1024*1024, description="Maximum image size in bytes for base64 encoding (only used when image_mode='base64')"), max_image_size: int = Field(default=1024*1024, description="Maximum image size in bytes for base64 encoding (only used when image_mode='base64')"),
@ -61,8 +69,8 @@ class WordMixin(MCPMixin):
) -> dict[str, Any]: ) -> dict[str, Any]:
start_time = time.time() start_time = time.time()
# Resolve file path # Resolve file path (download if URL, or decode if content provided)
local_path = await resolve_office_file_path(file_path) local_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
# Validate file # Validate file
validation = await validate_office_file(local_path) validation = await validate_office_file(local_path)
@ -275,6 +283,7 @@ class WordMixin(MCPMixin):
async def extract_word_tables( async def extract_word_tables(
self, self,
file_path: str = Field(description="Path to Word document or URL"), file_path: str = Field(description="Path to Word document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC),
include_styling: bool = Field(default=True, description="Include table styling information (borders, alignment, etc.)"), include_styling: bool = Field(default=True, description="Include table styling information (borders, alignment, etc.)"),
output_format: str = Field(default="structured", description="Output format: structured, csv, json, markdown"), output_format: str = Field(default="structured", description="Output format: structured, csv, json, markdown"),
preserve_merged_cells: bool = Field(default=True, description="Handle merged cells appropriately"), preserve_merged_cells: bool = Field(default=True, description="Handle merged cells appropriately"),
@ -286,8 +295,8 @@ class WordMixin(MCPMixin):
import json import json
import io import io
# Resolve and validate file # Resolve and validate file (download if URL, or decode if content provided)
resolved_path = await resolve_office_file_path(file_path) resolved_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
validation = await validate_office_file(resolved_path) validation = await validate_office_file(resolved_path)
if validation["category"] != "word": if validation["category"] != "word":
@ -451,6 +460,7 @@ class WordMixin(MCPMixin):
async def analyze_word_structure( async def analyze_word_structure(
self, self,
file_path: str = Field(description="Path to Word document or URL"), file_path: str = Field(description="Path to Word document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC),
include_page_info: bool = Field(default=True, description="Include page layout and section information"), include_page_info: bool = Field(default=True, description="Include page layout and section information"),
extract_outline: bool = Field(default=True, description="Extract document outline and heading hierarchy"), extract_outline: bool = Field(default=True, description="Extract document outline and heading hierarchy"),
analyze_styles: bool = Field(default=True, description="Analyze custom styles and formatting patterns") analyze_styles: bool = Field(default=True, description="Analyze custom styles and formatting patterns")
@ -458,8 +468,8 @@ class WordMixin(MCPMixin):
"""Analyze Word document structure and organization.""" """Analyze Word document structure and organization."""
start_time = time.time() start_time = time.time()
# Resolve and validate file # Resolve and validate file (download if URL, or decode if content provided)
resolved_path = await resolve_office_file_path(file_path) resolved_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
validation = await validate_office_file(resolved_path) validation = await validate_office_file(resolved_path)
if validation["category"] != "word": if validation["category"] != "word":
@ -646,6 +656,7 @@ class WordMixin(MCPMixin):
async def get_document_outline( async def get_document_outline(
self, self,
file_path: str = Field(description="Path to Word document or URL"), file_path: str = Field(description="Path to Word document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC),
include_word_counts: bool = Field(default=True, description="Include estimated word count per section"), include_word_counts: bool = Field(default=True, description="Include estimated word count per section"),
detect_chapters: bool = Field(default=True, description="Detect and flag chapter headings specifically") detect_chapters: bool = Field(default=True, description="Detect and flag chapter headings specifically")
) -> dict[str, Any]: ) -> dict[str, Any]:
@ -654,7 +665,7 @@ class WordMixin(MCPMixin):
from docx.oxml.ns import qn from docx.oxml.ns import qn
start_time = time.time() start_time = time.time()
local_path = await resolve_office_file_path(file_path) local_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
validation = await validate_office_file(local_path) validation = await validate_office_file(local_path)
if not validation["is_valid"]: if not validation["is_valid"]:
@ -765,13 +776,14 @@ class WordMixin(MCPMixin):
@handle_office_errors("Style consistency check") @handle_office_errors("Style consistency check")
async def check_style_consistency( async def check_style_consistency(
self, self,
file_path: str = Field(description="Path to Word document or URL") file_path: str = Field(description="Path to Word document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC)
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Check document for style and formatting consistency issues.""" """Check document for style and formatting consistency issues."""
from docx import Document from docx import Document
start_time = time.time() start_time = time.time()
local_path = await resolve_office_file_path(file_path) local_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
validation = await validate_office_file(local_path) validation = await validate_office_file(local_path)
if not validation["is_valid"]: if not validation["is_valid"]:
@ -924,6 +936,7 @@ class WordMixin(MCPMixin):
self, self,
file_path: str = Field(description="Path to Word document or URL"), file_path: str = Field(description="Path to Word document or URL"),
query: str = Field(description="Text to search for (case-insensitive)"), query: str = Field(description="Text to search for (case-insensitive)"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC),
context_chars: int = Field(default=100, description="Number of characters of context before and after match"), context_chars: int = Field(default=100, description="Number of characters of context before and after match"),
max_results: int = Field(default=20, description="Maximum number of results to return") max_results: int = Field(default=20, description="Maximum number of results to return")
) -> dict[str, Any]: ) -> dict[str, Any]:
@ -931,7 +944,7 @@ class WordMixin(MCPMixin):
from docx import Document from docx import Document
start_time = time.time() start_time = time.time()
local_path = await resolve_office_file_path(file_path) local_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
validation = await validate_office_file(local_path) validation = await validate_office_file(local_path)
if not validation["is_valid"]: if not validation["is_valid"]:
@ -1009,6 +1022,7 @@ class WordMixin(MCPMixin):
async def extract_entities( async def extract_entities(
self, self,
file_path: str = Field(description="Path to Word document or URL"), file_path: str = Field(description="Path to Word document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC),
entity_types: str = Field(default="all", description="Entity types to extract: 'all', 'people', 'places', 'organizations', or comma-separated combination"), entity_types: str = Field(default="all", description="Entity types to extract: 'all', 'people', 'places', 'organizations', or comma-separated combination"),
min_occurrences: int = Field(default=1, description="Minimum occurrences for an entity to be included"), min_occurrences: int = Field(default=1, description="Minimum occurrences for an entity to be included"),
include_context: bool = Field(default=True, description="Include sample context for each entity") include_context: bool = Field(default=True, description="Include sample context for each entity")
@ -1019,7 +1033,7 @@ class WordMixin(MCPMixin):
import re import re
start_time = time.time() start_time = time.time()
local_path = await resolve_office_file_path(file_path) local_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
validation = await validate_office_file(local_path) validation = await validate_office_file(local_path)
if not validation["is_valid"]: if not validation["is_valid"]:
@ -1219,6 +1233,7 @@ class WordMixin(MCPMixin):
async def get_chapter_summaries( async def get_chapter_summaries(
self, self,
file_path: str = Field(description="Path to Word document or URL"), file_path: str = Field(description="Path to Word document or URL"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC),
sentences_per_chapter: int = Field(default=3, description="Number of opening sentences to include per chapter"), sentences_per_chapter: int = Field(default=3, description="Number of opening sentences to include per chapter"),
include_word_counts: bool = Field(default=True, description="Include word count for each chapter") include_word_counts: bool = Field(default=True, description="Include word count for each chapter")
) -> dict[str, Any]: ) -> dict[str, Any]:
@ -1227,7 +1242,7 @@ class WordMixin(MCPMixin):
import re import re
start_time = time.time() start_time = time.time()
local_path = await resolve_office_file_path(file_path) local_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
validation = await validate_office_file(local_path) validation = await validate_office_file(local_path)
if not validation["is_valid"]: if not validation["is_valid"]:
@ -1318,6 +1333,7 @@ class WordMixin(MCPMixin):
async def save_reading_progress( async def save_reading_progress(
self, self,
file_path: str = Field(description="Path to Word document"), file_path: str = Field(description="Path to Word document"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC),
chapter_number: int = Field(default=1, description="Current chapter number"), chapter_number: int = Field(default=1, description="Current chapter number"),
paragraph_index: int = Field(default=0, description="Current paragraph index"), paragraph_index: int = Field(default=0, description="Current paragraph index"),
notes: str = Field(default="", description="Optional notes about where you left off") notes: str = Field(default="", description="Optional notes about where you left off")
@ -1326,7 +1342,7 @@ class WordMixin(MCPMixin):
import json import json
from datetime import datetime from datetime import datetime
local_path = await resolve_office_file_path(file_path) local_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
validation = await validate_office_file(local_path) validation = await validate_office_file(local_path)
if not validation["is_valid"]: if not validation["is_valid"]:
@ -1386,12 +1402,13 @@ class WordMixin(MCPMixin):
@handle_office_errors("Get reading progress") @handle_office_errors("Get reading progress")
async def get_reading_progress( async def get_reading_progress(
self, self,
file_path: str = Field(description="Path to Word document") file_path: str = Field(description="Path to Word document"),
file_content: Optional[str] = Field(default=None, description=FILE_CONTENT_DESC)
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Retrieve saved reading progress from bookmark file.""" """Retrieve saved reading progress from bookmark file."""
import json import json
local_path = await resolve_office_file_path(file_path) local_path = await resolve_office_file_path(file_path, file_content=file_content, filename=file_path)
validation = await validate_office_file(local_path) validation = await validate_office_file(local_path)
if not validation["is_valid"]: if not validation["is_valid"]:

View File

@ -4,6 +4,7 @@ import os
import time import time
import hashlib import hashlib
import tempfile import tempfile
import base64
from pathlib import Path from pathlib import Path
from typing import Optional, Dict, Any from typing import Optional, Dict, Any
import aiofiles import aiofiles
@ -12,6 +13,11 @@ from urllib.parse import urlparse
from .validation import OfficeFileError from .validation import OfficeFileError
# Environment variable to control local file access
# Default to False (secure) - set to "true" for local stdio transport
MCP_ALLOW_LOCAL_FILES = os.environ.get("MCP_ALLOW_LOCAL_FILES", "false").lower() == "true"
class OfficeFileCache: class OfficeFileCache:
"""Simple file cache for downloaded Office documents.""" """Simple file cache for downloaded Office documents."""
@ -212,20 +218,45 @@ def get_cache() -> OfficeFileCache:
return _global_cache return _global_cache
async def resolve_office_file_path(file_path: str, use_cache: bool = True) -> str: async def resolve_office_file_path(
"""Resolve file path, downloading from URL if necessary. file_path: str,
use_cache: bool = True,
file_content: Optional[str] = None,
filename: Optional[str] = None
) -> str:
"""Resolve file path, downloading from URL if necessary, or decode inline content.
Args: Args:
file_path: Local file path or URL file_path: Local file path or URL (ignored if file_content provided)
use_cache: Whether to use caching for URLs use_cache: Whether to use caching for URLs
file_content: Base64-encoded file content (for hosted/HTTP transport)
filename: Original filename for extension detection (used with file_content)
Returns: Returns:
Local file path (downloaded if was URL) Local file path (temp file if from content, downloaded if from URL)
Security:
When MCP_ALLOW_LOCAL_FILES=false (default for HTTP transport):
- Local file paths are rejected
- Only URLs and file_content are allowed
- This prevents hosted servers from accessing server-side files
""" """
# Priority 1: If file_content is provided, decode and write to temp file
if file_content:
return await _resolve_from_content(file_content, filename or file_path)
# Check if it's a URL # Check if it's a URL
parsed = urlparse(file_path) parsed = urlparse(file_path)
if not (parsed.scheme and parsed.netloc): is_url = bool(parsed.scheme and parsed.netloc)
# Local file path
if not is_url:
# Local file path - check if allowed
if not MCP_ALLOW_LOCAL_FILES:
raise OfficeFileError(
"Local file access is disabled for this server. "
"Please use file_content parameter to upload document data, "
"or provide a URL. Set MCP_ALLOW_LOCAL_FILES=true to enable local files."
)
return file_path return file_path
# Validate URL scheme # Validate URL scheme
@ -247,3 +278,60 @@ async def resolve_office_file_path(file_path: str, use_cache: bool = True) -> st
# Direct download without caching # Direct download without caching
from .validation import download_office_file from .validation import download_office_file
return await download_office_file(file_path) return await download_office_file(file_path)
async def _resolve_from_content(file_content: str, filename_hint: str) -> str:
"""Decode base64 content and write to a temp file.
Args:
file_content: Base64-encoded file data
filename_hint: Filename or path to extract extension from
Returns:
Path to temporary file containing decoded content
"""
try:
# Decode base64 content
content_bytes = base64.b64decode(file_content)
except Exception as e:
raise OfficeFileError(f"Invalid base64 content: {str(e)}")
# Extract extension from filename hint
ext = Path(filename_hint).suffix.lower()
if not ext:
# Try to detect from content magic bytes
ext = _detect_extension_from_bytes(content_bytes)
# Create temp file with correct extension
temp_dir = Path(tempfile.gettempdir()) / "mcp_office_uploads"
temp_dir.mkdir(exist_ok=True)
# Generate unique filename
content_hash = hashlib.sha256(content_bytes).hexdigest()[:12]
temp_path = temp_dir / f"upload_{content_hash}{ext}"
# Write content to temp file
async with aiofiles.open(temp_path, 'wb') as f:
await f.write(content_bytes)
return str(temp_path)
def _detect_extension_from_bytes(content: bytes) -> str:
"""Detect file extension from magic bytes."""
# ZIP-based formats (docx, xlsx, pptx)
if content[:4] == b'PK\x03\x04':
# Could be docx, xlsx, or pptx - default to .docx
# Full detection would require reading internal XML
return ".docx"
# OLE Compound Document (doc, xls, ppt)
if content[:8] == b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1':
return ".doc"
# CSV (text-based, starts with printable characters)
if content[:1].isalpha() or content[:1] in b'"\'':
return ".csv"
# Default
return ".bin"

View File

@ -324,7 +324,10 @@ class TestMockingStrategies:
assert result["document_metadata"] == mock_office_file["metadata"] assert result["document_metadata"] == mock_office_file["metadata"]
# Verify mocks were called correctly # Verify mocks were called correctly
mock_resolve.assert_called_once_with(mock_office_file["path"]) mock_resolve.assert_called_once()
# First positional arg should be the file path
call_args = mock_resolve.call_args
assert call_args[0][0] == mock_office_file["path"]
mock_validate.assert_called_once_with(mock_office_file["path"]) mock_validate.assert_called_once_with(mock_office_file["path"])
mock_detect.assert_called_once_with(mock_office_file["path"]) mock_detect.assert_called_once_with(mock_office_file["path"])

View File

@ -408,7 +408,10 @@ class TestMockingPatterns:
assert "structure" in result # Because preserve_formatting=True assert "structure" in result # Because preserve_formatting=True
# Verify all mocks were called appropriately # Verify all mocks were called appropriately
mock_resolve.assert_called_once_with("/test/document.docx") mock_resolve.assert_called_once()
# First positional arg should be the file path
call_args = mock_resolve.call_args
assert call_args[0][0] == "/test/document.docx"
mock_validate.assert_called_once_with("/realistic/path/document.docx") mock_validate.assert_called_once_with("/realistic/path/document.docx")
mock_detect.assert_called_once_with("/realistic/path/document.docx") mock_detect.assert_called_once_with("/realistic/path/document.docx")
mock_extract.assert_called_once() mock_extract.assert_called_once()