microsoft
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/test.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/tests/ContentProcessor/.coveragerc‎
Lines changed: 34 additions & 0 deletions b/‎src/tests/ContentProcessor/.coveragerc‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎src/tests/ContentProcessor/README.md‎
Lines changed: 20 additions & 0 deletions b/‎src/tests/ContentProcessor/README.md‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎src/tests/ContentProcessor/application/test_application_configuration.py‎
Lines changed: 26 additions & 0 deletions b/‎src/tests/ContentProcessor/application/test_application_configuration.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎src/tests/ContentProcessor/application/test_service_config.py‎
Lines changed: 84 additions & 0 deletions b/‎src/tests/ContentProcessor/application/test_service_config.py‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎src/tests/ContentProcessor/azure_helper/test_content_understanding_model.py‎
Lines changed: 174 additions & 0 deletions b/‎src/tests/ContentProcessor/azure_helper/test_content_understanding_model.py‎
Lines changed: 174 additions & 0 deletions
@@ -6,6 +6,7 @@ on:
       - main
       - dev
       - demo
+      - psl-unit-test-cps-v2
     paths:
       - 'src/**/*.py'
       - 'tests/**/*.py'
 
@@ -0,0 +1,34 @@
+# Coverage configuration for ContentProcessor
+# Excludes integration components to focus on core business logic
+
+[run]
+source = ../../ContentProcessor/src
+omit =
+    # Exclude main entry points (tested via integration)
+    */main.py
+    # Exclude queue handler base (abstract class requiring concrete implementations)
+    */libs/pipeline/queue_handler_base.py
+    # Exclude agent framework (external dependency compatibility issues)
+    */libs/agent_framework/*
+    # Exclude test files
+    */tests/*
+    */test_*.py
+    */__pycache__/*
+
+[report]
+exclude_lines =
+    # Standard exclusions
+    pragma: no cover
+    def __repr__
+    raise AssertionError
+    raise NotImplementedError
+    if __name__ == .__main__.:
+    if TYPE_CHECKING:
+    @abstractmethod
+    @abc.abstractmethod
+    
+precision = 2
+show_missing = True
+
+[html]
+directory = htmlcov_core_logic
@@ -0,0 +1,20 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""README for ContentProcessor tests.
+
+This directory contains unit tests for the ContentProcessor component.
+
+Structure:
+- azure_helper/: Tests for Azure helper modules
+- pipeline/: Tests for pipeline entities and handlers
+- utils/: Tests for utility modules
+- application/: Tests for application configuration
+- base/: Tests for base models
+
+Run tests:
+    cd src/tests/ContentProcessor  
+    pytest --cov=../../ContentProcessor/src --cov-report=term-missing
+
+Coverage target: >85%
+"""
@@ -0,0 +1,26 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Tests for libs.application.application_configuration (settings and validators)."""
+
+from __future__ import annotations
+
+from libs.application.application_configuration import AppConfiguration
+
+# ── TestAppConfiguration ────────────────────────────────────────────────
+
+
+class TestAppConfiguration:
+    """Field validator for process step splitting."""
+
+    def test_split_processes_from_csv(self):
+        result = AppConfiguration.split_processes("extract,transform,save")
+        assert result == ["extract", "transform", "save"]
+
+    def test_split_processes_single(self):
+        result = AppConfiguration.split_processes("extract")
+        assert result == ["extract"]
+
+    def test_split_processes_passthrough_list(self):
+        result = AppConfiguration.split_processes(["a", "b"])
+        assert result == ["a", "b"]
@@ -0,0 +1,84 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Tests for libs.application.service_config (LLM service configuration)."""
+
+from __future__ import annotations
+
+from libs.application.service_config import ServiceConfig
+
+# ── TestServiceConfig ───────────────────────────────────────────────────
+
+
+class TestServiceConfig:
+    """Construction, validation, and serialisation of ServiceConfig."""
+
+    def _make_env(self, **overrides):
+        base = {
+            "AZURE_OPENAI_API_VERSION": "2024-02-01",
+            "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME": "gpt-4",
+            "AZURE_OPENAI_ENDPOINT": "https://myoai.openai.azure.com",
+            "AZURE_OPENAI_API_KEY": "secret-key",
+        }
+        base.update(overrides)
+        return base
+
+    def test_construction_from_env_vars(self):
+        env = self._make_env()
+        cfg = ServiceConfig("default", "AZURE_OPENAI", env)
+        assert cfg.service_id == "default"
+        assert cfg.api_version == "2024-02-01"
+        assert cfg.chat_deployment_name == "gpt-4"
+        assert cfg.endpoint == "https://myoai.openai.azure.com"
+
+    def test_is_valid_with_entra_id(self):
+        env = self._make_env()
+        cfg = ServiceConfig("svc", "AZURE_OPENAI", env, use_entra_id=True)
+        assert cfg.is_valid() is True
+
+    def test_is_valid_without_entra_id_requires_api_key(self):
+        env = self._make_env()
+        cfg = ServiceConfig("svc", "AZURE_OPENAI", env, use_entra_id=False)
+        assert cfg.is_valid() is True
+
+    def test_is_invalid_missing_endpoint(self):
+        env = self._make_env()
+        del env["AZURE_OPENAI_ENDPOINT"]
+        cfg = ServiceConfig("svc", "AZURE_OPENAI", env, use_entra_id=True)
+        assert cfg.is_valid() is False
+
+    def test_is_invalid_missing_deployment(self):
+        env = self._make_env()
+        del env["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"]
+        cfg = ServiceConfig("svc", "AZURE_OPENAI", env, use_entra_id=True)
+        assert cfg.is_valid() is False
+
+    def test_is_invalid_no_entra_no_key(self):
+        env = self._make_env()
+        del env["AZURE_OPENAI_API_KEY"]
+        cfg = ServiceConfig("svc", "AZURE_OPENAI", env, use_entra_id=False)
+        assert cfg.is_valid() is False
+
+    def test_to_dict_keys(self):
+        env = self._make_env()
+        cfg = ServiceConfig("svc", "AZURE_OPENAI", env)
+        d = cfg.to_dict()
+        assert d["endpoint"] == "https://myoai.openai.azure.com"
+        assert d["chat_deployment_name"] == "gpt-4"
+        assert d["api_key"] == "secret-key"
+
+    def test_to_dict_empty_fields_become_none(self):
+        cfg = ServiceConfig("svc", "MISSING_PREFIX", {})
+        d = cfg.to_dict()
+        assert d["endpoint"] is None
+        assert d["chat_deployment_name"] is None
+
+    def test_custom_prefix(self):
+        env = {
+            "MY_LLM_ENDPOINT": "https://custom.api",
+            "MY_LLM_CHAT_DEPLOYMENT_NAME": "model-v2",
+        }
+        cfg = ServiceConfig("custom", "MY_LLM", env, use_entra_id=True)
+        assert cfg.endpoint == "https://custom.api"
+        assert cfg.chat_deployment_name == "model-v2"
+        assert cfg.is_valid() is True
@@ -0,0 +1,174 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Tests for libs.azure_helper.model.content_understanding (API response models)."""
+
+from __future__ import annotations
+
+from libs.azure_helper.model.content_understanding import (
+    AnalyzedResult,
+    DocumentContent,
+    Line,
+    Page,
+    Paragraph,
+    ResultData,
+    Span,
+    Word,
+)
+
+# ── TestSpan ────────────────────────────────────────────────────────────
+
+
+class TestSpan:
+    """Basic offset/length span model."""
+
+    def test_construction(self):
+        span = Span(offset=0, length=10)
+        assert span.offset == 0
+        assert span.length == 10
+
+
+# ── TestWord ────────────────────────────────────────────────────────────
+
+
+class TestWord:
+    """Word model with polygon extraction from source field."""
+
+    def test_construction(self):
+        word = Word(
+            content="hello",
+            span=Span(offset=0, length=5),
+            confidence=0.99,
+            source="D(1, 1.0, 2.0, 3.0, 4.0)",
+        )
+        assert word.content == "hello"
+        assert word.confidence == 0.99
+
+    def test_polygon_parsed_from_source(self):
+        word = Word(
+            content="test",
+            span=Span(offset=0, length=4),
+            confidence=0.95,
+            source="D(1, 10.5, 20.3, 30.1, 40.2)",
+        )
+        assert word.polygon == [10.5, 20.3, 30.1, 40.2]
+
+    def test_polygon_empty_for_non_d_source(self):
+        word = Word(
+            content="test",
+            span=Span(offset=0, length=4),
+            confidence=0.95,
+            source="other-source",
+        )
+        assert word.polygon == []
+
+
+# ── TestLine ────────────────────────────────────────────────────────────
+
+
+class TestLine:
+    """Line model with polygon parsing."""
+
+    def test_construction_with_polygon(self):
+        line = Line(
+            content="Hello world",
+            source="D(1, 1.0, 2.0, 3.0, 4.0)",
+            span=Span(offset=0, length=11),
+        )
+        assert line.content == "Hello world"
+        assert line.polygon == [1.0, 2.0, 3.0, 4.0]
+
+
+# ── TestParagraph ───────────────────────────────────────────────────────
+
+
+class TestParagraph:
+    """Paragraph model with polygon parsing."""
+
+    def test_construction(self):
+        para = Paragraph(
+            content="A paragraph.",
+            source="D(1, 5.0, 10.0)",
+            span=Span(offset=0, length=12),
+        )
+        assert para.content == "A paragraph."
+        assert para.polygon == [5.0, 10.0]
+
+
+# ── TestPage ────────────────────────────────────────────────────────────
+
+
+class TestPage:
+    """Page container with words, lines, and paragraphs."""
+
+    def test_construction(self):
+        page = Page(
+            pageNumber=1,
+            angle=0.0,
+            width=8.5,
+            height=11.0,
+            spans=[Span(offset=0, length=100)],
+            words=[
+                Word(
+                    content="word",
+                    span=Span(offset=0, length=4),
+                    confidence=0.9,
+                    source="plain",
+                )
+            ],
+        )
+        assert page.pageNumber == 1
+        assert len(page.words) == 1
+        assert page.lines == []
+        assert page.paragraphs == []
+
+
+# ── TestDocumentContent ─────────────────────────────────────────────────
+
+
+class TestDocumentContent:
+    """Document content container with pages."""
+
+    def test_construction(self):
+        doc = DocumentContent(
+            markdown="# Title",
+            kind="document",
+            startPageNumber=1,
+            endPageNumber=1,
+            unit="inch",
+            pages=[
+                Page(
+                    pageNumber=1,
+                    angle=0.0,
+                    width=8.5,
+                    height=11.0,
+                    spans=[Span(offset=0, length=7)],
+                    words=[],
+                )
+            ],
+        )
+        assert doc.markdown == "# Title"
+        assert len(doc.pages) == 1
+
+
+# ── TestAnalyzedResult ──────────────────────────────────────────────────
+
+
+class TestAnalyzedResult:
+    """Top-level API response model."""
+
+    def test_construction(self):
+        result = AnalyzedResult(
+            id="r-1",
+            status="succeeded",
+            result=ResultData(
+                analyzerId="prebuilt",
+                apiVersion="2024-01-01",
+                createdAt="2024-01-01T00:00:00Z",
+                warnings=[],
+                contents=[],
+            ),
+        )
+        assert result.id == "r-1"
+        assert result.status == "succeeded"
+        assert result.result.contents == []