mcarchive-org/tests/test_client.py
Ryan Malloy 5265a6440b Initial mcarchive-org MCP server
FastMCP server wrapping archive.org's public read APIs:
- search_items / scrape_items: advanced search + bulk cursor pagination
- get_item_metadata / list_files: progressive disclosure with filtering
- get_file_url / download_file: canonical URLs and streaming downloads
  with HTTP Range resume + optional MD5 verification

Smoke-tested end-to-end via claude -p headless MCP and pytest against
live archive.org endpoints.
2026-04-21 09:41:20 -06:00

53 lines
1.5 KiB
Python

"""End-to-end smoke tests against live archive.org (network required).
Run with: uv run pytest -v
Skip with: uv run pytest -v -m 'not network'
"""
from __future__ import annotations
from pathlib import Path
import pytest
from mcarchive_org.client import ArchiveClient
pytestmark = [pytest.mark.asyncio, pytest.mark.network]
async def test_search_nasa_item():
async with ArchiveClient() as c:
result = await c.search(query="identifier:nasa", rows=5)
assert result["num_found"] >= 1
assert any(d["identifier"] == "nasa" for d in result["docs"])
async def test_metadata_nasa():
async with ArchiveClient() as c:
data = await c.metadata("nasa")
assert data["metadata"]["identifier"] == "nasa"
assert isinstance(data["files"], list) and data["files"]
async def test_download_small_file(tmp_path: Path):
async with ArchiveClient() as c:
files = await c.files("nasa")
# pick the smallest file to keep the test fast
small = min(
(f for f in files if f.get("size") and str(f["size"]).isdigit()),
key=lambda f: int(f["size"]),
)
dest = tmp_path / small["name"]
result = await c.download_to_file(
"nasa", small["name"], dest, verify_md5=small.get("md5")
)
assert result["bytes"] > 0
if small.get("md5"):
assert result["md5_ok"] is True
async def test_scrape_requires_min_count():
async with ArchiveClient() as c:
with pytest.raises(ValueError):
await c.scrape(query="identifier:nasa", count=10)