Skip to content

Commit e38e28d

Browse files
Added unit test
1 parent 62b319e commit e38e28d

92 files changed

Lines changed: 10295 additions & 0 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/test.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ on:
66
- main
77
- dev
88
- demo
9+
- psl-unit-test-cps-v2
910
paths:
1011
- 'src/**/*.py'
1112
- 'tests/**/*.py'
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Coverage configuration for ContentProcessor
2+
# Excludes integration components to focus on core business logic
3+
4+
[run]
5+
source = ../../ContentProcessor/src
6+
omit =
7+
# Exclude main entry points (tested via integration)
8+
*/main.py
9+
# Exclude queue handler base (abstract class requiring concrete implementations)
10+
*/libs/pipeline/queue_handler_base.py
11+
# Exclude agent framework (external dependency compatibility issues)
12+
*/libs/agent_framework/*
13+
# Exclude test files
14+
*/tests/*
15+
*/test_*.py
16+
*/__pycache__/*
17+
18+
[report]
19+
exclude_lines =
20+
# Standard exclusions
21+
pragma: no cover
22+
def __repr__
23+
raise AssertionError
24+
raise NotImplementedError
25+
if __name__ == .__main__.:
26+
if TYPE_CHECKING:
27+
@abstractmethod
28+
@abc.abstractmethod
29+
30+
precision = 2
31+
show_missing = True
32+
33+
[html]
34+
directory = htmlcov_core_logic
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License.
3+
4+
"""README for ContentProcessor tests.
5+
6+
This directory contains unit tests for the ContentProcessor component.
7+
8+
Structure:
9+
- azure_helper/: Tests for Azure helper modules
10+
- pipeline/: Tests for pipeline entities and handlers
11+
- utils/: Tests for utility modules
12+
- application/: Tests for application configuration
13+
- base/: Tests for base models
14+
15+
Run tests:
16+
cd src/tests/ContentProcessor
17+
pytest --cov=../../ContentProcessor/src --cov-report=term-missing
18+
19+
Coverage target: >85%
20+
"""
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License.
3+
4+
"""Tests for libs.application.application_configuration (settings and validators)."""
5+
6+
from __future__ import annotations
7+
8+
from libs.application.application_configuration import AppConfiguration
9+
10+
# ── TestAppConfiguration ────────────────────────────────────────────────
11+
12+
13+
class TestAppConfiguration:
14+
"""Field validator for process step splitting."""
15+
16+
def test_split_processes_from_csv(self):
17+
result = AppConfiguration.split_processes("extract,transform,save")
18+
assert result == ["extract", "transform", "save"]
19+
20+
def test_split_processes_single(self):
21+
result = AppConfiguration.split_processes("extract")
22+
assert result == ["extract"]
23+
24+
def test_split_processes_passthrough_list(self):
25+
result = AppConfiguration.split_processes(["a", "b"])
26+
assert result == ["a", "b"]
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License.
3+
4+
"""Tests for libs.application.service_config (LLM service configuration)."""
5+
6+
from __future__ import annotations
7+
8+
from libs.application.service_config import ServiceConfig
9+
10+
# ── TestServiceConfig ───────────────────────────────────────────────────
11+
12+
13+
class TestServiceConfig:
14+
"""Construction, validation, and serialisation of ServiceConfig."""
15+
16+
def _make_env(self, **overrides):
17+
base = {
18+
"AZURE_OPENAI_API_VERSION": "2024-02-01",
19+
"AZURE_OPENAI_CHAT_DEPLOYMENT_NAME": "gpt-4",
20+
"AZURE_OPENAI_ENDPOINT": "https://myoai.openai.azure.com",
21+
"AZURE_OPENAI_API_KEY": "secret-key",
22+
}
23+
base.update(overrides)
24+
return base
25+
26+
def test_construction_from_env_vars(self):
27+
env = self._make_env()
28+
cfg = ServiceConfig("default", "AZURE_OPENAI", env)
29+
assert cfg.service_id == "default"
30+
assert cfg.api_version == "2024-02-01"
31+
assert cfg.chat_deployment_name == "gpt-4"
32+
assert cfg.endpoint == "https://myoai.openai.azure.com"
33+
34+
def test_is_valid_with_entra_id(self):
35+
env = self._make_env()
36+
cfg = ServiceConfig("svc", "AZURE_OPENAI", env, use_entra_id=True)
37+
assert cfg.is_valid() is True
38+
39+
def test_is_valid_without_entra_id_requires_api_key(self):
40+
env = self._make_env()
41+
cfg = ServiceConfig("svc", "AZURE_OPENAI", env, use_entra_id=False)
42+
assert cfg.is_valid() is True
43+
44+
def test_is_invalid_missing_endpoint(self):
45+
env = self._make_env()
46+
del env["AZURE_OPENAI_ENDPOINT"]
47+
cfg = ServiceConfig("svc", "AZURE_OPENAI", env, use_entra_id=True)
48+
assert cfg.is_valid() is False
49+
50+
def test_is_invalid_missing_deployment(self):
51+
env = self._make_env()
52+
del env["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"]
53+
cfg = ServiceConfig("svc", "AZURE_OPENAI", env, use_entra_id=True)
54+
assert cfg.is_valid() is False
55+
56+
def test_is_invalid_no_entra_no_key(self):
57+
env = self._make_env()
58+
del env["AZURE_OPENAI_API_KEY"]
59+
cfg = ServiceConfig("svc", "AZURE_OPENAI", env, use_entra_id=False)
60+
assert cfg.is_valid() is False
61+
62+
def test_to_dict_keys(self):
63+
env = self._make_env()
64+
cfg = ServiceConfig("svc", "AZURE_OPENAI", env)
65+
d = cfg.to_dict()
66+
assert d["endpoint"] == "https://myoai.openai.azure.com"
67+
assert d["chat_deployment_name"] == "gpt-4"
68+
assert d["api_key"] == "secret-key"
69+
70+
def test_to_dict_empty_fields_become_none(self):
71+
cfg = ServiceConfig("svc", "MISSING_PREFIX", {})
72+
d = cfg.to_dict()
73+
assert d["endpoint"] is None
74+
assert d["chat_deployment_name"] is None
75+
76+
def test_custom_prefix(self):
77+
env = {
78+
"MY_LLM_ENDPOINT": "https://custom.api",
79+
"MY_LLM_CHAT_DEPLOYMENT_NAME": "model-v2",
80+
}
81+
cfg = ServiceConfig("custom", "MY_LLM", env, use_entra_id=True)
82+
assert cfg.endpoint == "https://custom.api"
83+
assert cfg.chat_deployment_name == "model-v2"
84+
assert cfg.is_valid() is True
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License.
3+
4+
"""Tests for libs.azure_helper.model.content_understanding (API response models)."""
5+
6+
from __future__ import annotations
7+
8+
from libs.azure_helper.model.content_understanding import (
9+
AnalyzedResult,
10+
DocumentContent,
11+
Line,
12+
Page,
13+
Paragraph,
14+
ResultData,
15+
Span,
16+
Word,
17+
)
18+
19+
# ── TestSpan ────────────────────────────────────────────────────────────
20+
21+
22+
class TestSpan:
23+
"""Basic offset/length span model."""
24+
25+
def test_construction(self):
26+
span = Span(offset=0, length=10)
27+
assert span.offset == 0
28+
assert span.length == 10
29+
30+
31+
# ── TestWord ────────────────────────────────────────────────────────────
32+
33+
34+
class TestWord:
35+
"""Word model with polygon extraction from source field."""
36+
37+
def test_construction(self):
38+
word = Word(
39+
content="hello",
40+
span=Span(offset=0, length=5),
41+
confidence=0.99,
42+
source="D(1, 1.0, 2.0, 3.0, 4.0)",
43+
)
44+
assert word.content == "hello"
45+
assert word.confidence == 0.99
46+
47+
def test_polygon_parsed_from_source(self):
48+
word = Word(
49+
content="test",
50+
span=Span(offset=0, length=4),
51+
confidence=0.95,
52+
source="D(1, 10.5, 20.3, 30.1, 40.2)",
53+
)
54+
assert word.polygon == [10.5, 20.3, 30.1, 40.2]
55+
56+
def test_polygon_empty_for_non_d_source(self):
57+
word = Word(
58+
content="test",
59+
span=Span(offset=0, length=4),
60+
confidence=0.95,
61+
source="other-source",
62+
)
63+
assert word.polygon == []
64+
65+
66+
# ── TestLine ────────────────────────────────────────────────────────────
67+
68+
69+
class TestLine:
70+
"""Line model with polygon parsing."""
71+
72+
def test_construction_with_polygon(self):
73+
line = Line(
74+
content="Hello world",
75+
source="D(1, 1.0, 2.0, 3.0, 4.0)",
76+
span=Span(offset=0, length=11),
77+
)
78+
assert line.content == "Hello world"
79+
assert line.polygon == [1.0, 2.0, 3.0, 4.0]
80+
81+
82+
# ── TestParagraph ───────────────────────────────────────────────────────
83+
84+
85+
class TestParagraph:
86+
"""Paragraph model with polygon parsing."""
87+
88+
def test_construction(self):
89+
para = Paragraph(
90+
content="A paragraph.",
91+
source="D(1, 5.0, 10.0)",
92+
span=Span(offset=0, length=12),
93+
)
94+
assert para.content == "A paragraph."
95+
assert para.polygon == [5.0, 10.0]
96+
97+
98+
# ── TestPage ────────────────────────────────────────────────────────────
99+
100+
101+
class TestPage:
102+
"""Page container with words, lines, and paragraphs."""
103+
104+
def test_construction(self):
105+
page = Page(
106+
pageNumber=1,
107+
angle=0.0,
108+
width=8.5,
109+
height=11.0,
110+
spans=[Span(offset=0, length=100)],
111+
words=[
112+
Word(
113+
content="word",
114+
span=Span(offset=0, length=4),
115+
confidence=0.9,
116+
source="plain",
117+
)
118+
],
119+
)
120+
assert page.pageNumber == 1
121+
assert len(page.words) == 1
122+
assert page.lines == []
123+
assert page.paragraphs == []
124+
125+
126+
# ── TestDocumentContent ─────────────────────────────────────────────────
127+
128+
129+
class TestDocumentContent:
130+
"""Document content container with pages."""
131+
132+
def test_construction(self):
133+
doc = DocumentContent(
134+
markdown="# Title",
135+
kind="document",
136+
startPageNumber=1,
137+
endPageNumber=1,
138+
unit="inch",
139+
pages=[
140+
Page(
141+
pageNumber=1,
142+
angle=0.0,
143+
width=8.5,
144+
height=11.0,
145+
spans=[Span(offset=0, length=7)],
146+
words=[],
147+
)
148+
],
149+
)
150+
assert doc.markdown == "# Title"
151+
assert len(doc.pages) == 1
152+
153+
154+
# ── TestAnalyzedResult ──────────────────────────────────────────────────
155+
156+
157+
class TestAnalyzedResult:
158+
"""Top-level API response model."""
159+
160+
def test_construction(self):
161+
result = AnalyzedResult(
162+
id="r-1",
163+
status="succeeded",
164+
result=ResultData(
165+
analyzerId="prebuilt",
166+
apiVersion="2024-01-01",
167+
createdAt="2024-01-01T00:00:00Z",
168+
warnings=[],
169+
contents=[],
170+
),
171+
)
172+
assert result.id == "r-1"
173+
assert result.status == "succeeded"
174+
assert result.result.contents == []

0 commit comments

Comments
 (0)