mcaxl/tests/test_docs_loader.py

"""Tests for the docs index loader (chunk filtering for prompt enrichment)."""

import json
from pathlib import Path

import pytest

from mcaxl.docs_loader import DocsIndex


@pytest.fixture
def fake_index(tmp_path: Path) -> Path:
    chunks = [
        {
            "id": "cucm::v15::admin::Route-Plan-Overview::0",
            "text": "The route plan defines how calls are routed through the cluster.",
            "heading_path": ["Route Plan Overview"],
            "source_path": str(tmp_path / "fake.md"),
            "product": "cucm",
            "version": "v15",
            "doc": "admin",
        },
        {
            "id": "cucm::v15::admin::Translation-Patterns::0",
            "text": "Translation patterns rewrite digits before routing.",
            "heading_path": ["Call Routing", "Translation Patterns"],
            "source_path": str(tmp_path / "fake.md"),
            "product": "cucm",
            "version": "v15",
            "doc": "admin",
        },
        {
            "id": "cer::v15::admin::Caller-ID::0",
            "text": "Caller ID handling for emergency calls.",
            "heading_path": ["Caller ID"],
            "source_path": str(tmp_path / "fake.md"),
            "product": "cer",
            "version": "v15",
            "doc": "admin",
        },
    ]
    (tmp_path / "chunks.jsonl").write_text(
        "\n".join(json.dumps(c) for c in chunks)
    )
    (tmp_path / "index_meta.json").write_text(
        json.dumps({"model_name": "test", "embedding_dim": 384, "products": ["cucm", "cer"]})
    )
    return tmp_path


def test_load_index(fake_index: Path):
    idx = DocsIndex.load(fake_index)
    assert idx is not None
    assert len(idx.chunks) == 3


def test_load_missing_returns_none(tmp_path: Path):
    assert DocsIndex.load(tmp_path / "nope") is None


def test_find_filters_by_product(fake_index: Path):
    idx = DocsIndex.load(fake_index)
    assert idx is not None
    cucm_only = idx.find(["caller"], product="cucm")
    assert all(c.get("doc") for c in cucm_only)
    cer_only = idx.find(["caller"], product="cer")
    assert any("Caller" in (c["heading_path"] or [""])[0] for c in cer_only)


def test_find_scores_heading_higher_than_text(fake_index: Path):
    idx = DocsIndex.load(fake_index)
    assert idx is not None
    results = idx.find(["translation"], product="cucm")
    assert results
    # The chunk with "Translation Patterns" in heading should rank above
    # any other chunk that just mentions translation incidentally
    assert "Translation" in " ".join(results[0]["heading_path"] or [])


def test_find_no_matches(fake_index: Path):
    idx = DocsIndex.load(fake_index)
    assert idx is not None
    assert idx.find(["xyzzyplugh"]) == []


def test_format_for_prompt_includes_heading_and_text(fake_index: Path):
    idx = DocsIndex.load(fake_index)
    assert idx is not None
    chunks = idx.find(["route plan"], product="cucm")
    rendered = idx.format_chunks_for_prompt(chunks)
    assert "Route Plan Overview" in rendered
    assert "route plan defines" in rendered.lower()


def test_format_empty_chunks(fake_index: Path):
    idx = DocsIndex.load(fake_index)
    assert idx is not None
    rendered = idx.format_chunks_for_prompt([])
    assert "No matching" in rendered