"""End-to-end smoke tests against live archive.org (network required). Run with: uv run pytest -v Skip with: uv run pytest -v -m 'not network' """ from __future__ import annotations from pathlib import Path import pytest from mcarchive_org.client import ArchiveClient pytestmark = pytest.mark.network async def test_search_nasa_item(): async with ArchiveClient() as c: result = await c.search(query="identifier:nasa", rows=5) assert result["num_found"] >= 1 assert any(d["identifier"] == "nasa" for d in result["docs"]) async def test_metadata_nasa(): async with ArchiveClient() as c: data = await c.metadata("nasa") assert data["metadata"]["identifier"] == "nasa" assert isinstance(data["files"], list) and data["files"] async def test_download_small_file(tmp_path: Path): async with ArchiveClient() as c: files = await c.files("nasa") # pick the smallest file to keep the test fast small = min( (f for f in files if f.get("size") and str(f["size"]).isdigit()), key=lambda f: int(f["size"]), ) dest = tmp_path / small["name"] result = await c.download_to_file( "nasa", small["name"], dest, verify_md5=small.get("md5") ) assert result["bytes_written"] > 0 if small.get("md5"): assert result["md5_ok"] is True async def test_scrape_requires_min_count(): async with ArchiveClient() as c: with pytest.raises(ValueError): await c.scrape(query="identifier:nasa", count=10)