Skip to content

Commit 5e6ca2a

Browse files
committed
feat: подключить базовую трассировку OpenTelemetry и Jaeger
Что сделано: - добавлены зависимости OpenTelemetry и обновлён lock-файл - добавлен модуль telemetry с OTLP-экспортом, сэмплингом и авто-инструментацией FastAPI, SQLAlchemy, Redis - подключена инициализация и корректное завершение telemetry в lifecycle приложения - добавлены ручные span на сервисном уровне auth и tasks для читаемого дерева вызовов - добавлен сервис Jaeger и OTel-переменные окружения в docker-compose
1 parent 2cda0f9 commit 5e6ca2a

8 files changed

Lines changed: 565 additions & 28 deletions

File tree

app/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ class Settings(BaseSettings):
1515
ARGON_HASH_LEN: int = 32
1616
ARGON_SALT_LEN: int = 16
1717
ARGON_MAX_PASSWORD_LEN: int = 1024 # basic DoS guard
18+
OTEL_ENABLED: bool = True
19+
OTEL_SERVICE_NAME: str = "task-manager-api"
20+
OTEL_EXPORTER_OTLP_ENDPOINT: str = "http://localhost:4317"
21+
OTEL_EXPORTER_OTLP_INSECURE: bool = True
22+
OTEL_SAMPLE_RATIO: float = 1.0
1823

1924
model_config = SettingsConfigDict(
2025
env_file=".env",

app/main.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from app.routes.metrics import router as metrics_router
1212
from app.routes.tags import router as tags_router
1313
from app.routes.tasks import router as tasks_router
14+
from app.telemetry import setup_telemetry, shutdown_telemetry
1415

1516

1617
@asynccontextmanager
@@ -19,6 +20,7 @@ async def lifespan(app: FastAPI):
1920
try:
2021
yield
2122
finally:
23+
shutdown_telemetry()
2224
await redis_client.aclose()
2325

2426

@@ -31,6 +33,8 @@ async def lifespan(app: FastAPI):
3133
lifespan=lifespan,
3234
)
3335

36+
setup_telemetry(app)
37+
3438
# middleware регистрируются в обратном порядке (луковица).
3539
# RequestID должен сработать первым, поэтому добавляем его последним.
3640
app.add_middleware(MetricsMiddleware)

app/services/auth.py

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@
44

55
from uuid import UUID
66

7+
from opentelemetry import trace
78
from sqlalchemy.ext.asyncio import AsyncSession
89

910
from app.models.user import User
1011
from app.repositories import user_repo
1112
from app.security.password import get_dummy_hash, hash_password, verify_password
1213

14+
tracer = trace.get_tracer(__name__)
15+
1316

1417
class UserAlreadyExists(Exception):
1518
"""Email уже занят другим пользователем."""
@@ -25,23 +28,29 @@ class UserInactive(Exception):
2528

2629
async def register_user(session: AsyncSession, email: str, password: str) -> User:
2730
"""Регистрирует нового пользователя. Поднимает UserAlreadyExists если email занят."""
28-
if await user_repo.get_user_by_email(session, email):
29-
raise UserAlreadyExists(email)
30-
pass_hash = await hash_password(password=password)
31-
return await user_repo.create_user(session, email=email, hashed_password=pass_hash)
31+
with tracer.start_as_current_span("auth.register_user"):
32+
if await user_repo.get_user_by_email(session, email):
33+
raise UserAlreadyExists(email)
34+
pass_hash = await hash_password(password=password)
35+
return await user_repo.create_user(
36+
session,
37+
email=email,
38+
hashed_password=pass_hash,
39+
)
3240

3341

3442
async def authenticate_user(
3543
session: AsyncSession, email: str, password: str
3644
) -> User | None:
3745
"""Проверяет email + пароль. Возвращает пользователя или None если данные неверны."""
38-
user = await user_repo.get_user_by_email(session, email)
39-
# Всегда прогоняем Argon2, даже если email не найден — защита от тайминг-атаки.
40-
# Без этого атакующий может по времени ответа определить, существует ли email.
41-
hashed = user.hashed_password if user else get_dummy_hash()
42-
if not await verify_password(password=password, hashed_password=hashed):
43-
return None
44-
return user
46+
with tracer.start_as_current_span("auth.authenticate_user"):
47+
user = await user_repo.get_user_by_email(session, email)
48+
# Всегда прогоняем Argon2, даже если email не найден — защита от тайминг-атаки.
49+
# Без этого атакующий может по времени ответа определить, существует ли email.
50+
hashed = user.hashed_password if user else get_dummy_hash()
51+
if not await verify_password(password=password, hashed_password=hashed):
52+
return None
53+
return user
4554

4655

4756
async def authenticate_active_user(
@@ -62,9 +71,10 @@ async def authenticate_active_user(
6271

6372
async def validate_refresh_subject(session: AsyncSession, user_id: UUID) -> User:
6473
"""Проверяет, что пользователь для refresh существует и активен."""
65-
user = await user_repo.get_user_by_id(session, user_id)
66-
if not user:
67-
raise UserNotFound
68-
if not user.is_active:
69-
raise UserInactive
70-
return user
74+
with tracer.start_as_current_span("auth.validate_refresh_subject"):
75+
user = await user_repo.get_user_by_id(session, user_id)
76+
if not user:
77+
raise UserNotFound
78+
if not user.is_active:
79+
raise UserInactive
80+
return user

app/services/task.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,39 +4,46 @@
44
from datetime import date
55
from uuid import UUID
66

7+
from opentelemetry import trace
78
from sqlalchemy.ext.asyncio import AsyncSession
89

910
from app.models.task import Task
1011
from app.models.user import User
1112
from app.repositories import task_repo
1213
from app.schemas.task import TaskCreate, TaskUpdate
1314

15+
tracer = trace.get_tracer(__name__)
16+
1417

1518
class InvalidDueDate(Exception):
1619
"""Дедлайн задачи не может быть в прошлом."""
1720

1821

1922
async def create_task(session: AsyncSession, user: User, payload: TaskCreate) -> Task:
2023
"""Создаёт задачу. Поднимает InvalidDueDate если дедлайн в прошлом."""
21-
if payload.due_date and payload.due_date < date.today():
22-
raise InvalidDueDate
23-
return await task_repo.create_task(session, user, **payload.model_dump())
24+
with tracer.start_as_current_span("tasks.create_task"):
25+
if payload.due_date and payload.due_date < date.today():
26+
raise InvalidDueDate
27+
return await task_repo.create_task(session, user, **payload.model_dump())
2428

2529

2630
async def get_user_tasks(session: AsyncSession, user: User) -> Sequence[Task]:
2731
"""Возвращает все задачи пользователя."""
28-
return await task_repo.get_tasks_by_user(session, user)
32+
with tracer.start_as_current_span("tasks.get_user_tasks"):
33+
return await task_repo.get_tasks_by_user(session, user)
2934

3035

3136
async def update_task(session: AsyncSession, task: Task, payload: TaskUpdate) -> Task:
3237
"""Обновляет задачу. Поднимает InvalidDueDate если дедлайн в прошлом."""
33-
updates = payload.model_dump(exclude_unset=True)
34-
due_date = updates.get("due_date")
35-
if due_date is not None and due_date < date.today():
36-
raise InvalidDueDate
37-
return await task_repo.update_task(session, task, **updates)
38+
with tracer.start_as_current_span("tasks.update_task"):
39+
updates = payload.model_dump(exclude_unset=True)
40+
due_date = updates.get("due_date")
41+
if due_date is not None and due_date < date.today():
42+
raise InvalidDueDate
43+
return await task_repo.update_task(session, task, **updates)
3844

3945

4046
async def delete_task(session: AsyncSession, task: Task) -> UUID:
4147
"""Удаляет задачу и возвращает её id."""
42-
return await task_repo.delete_task(session, task)
48+
with tracer.start_as_current_span("tasks.delete_task"):
49+
return await task_repo.delete_task(session, task)

app/telemetry.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from fastapi import FastAPI
2+
from opentelemetry import trace
3+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
4+
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
5+
from opentelemetry.instrumentation.redis import RedisInstrumentor
6+
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
7+
from opentelemetry.sdk.resources import Resource
8+
from opentelemetry.sdk.trace import TracerProvider
9+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
10+
from opentelemetry.sdk.trace.sampling import ParentBased, TraceIdRatioBased
11+
12+
from app.config import settings
13+
from app.database import engine
14+
15+
_initialized = False
16+
17+
18+
def setup_telemetry(app: FastAPI) -> None:
19+
"""Инициализирует OpenTelemetry и авто-инструментацию приложения."""
20+
global _initialized
21+
if _initialized or not settings.OTEL_ENABLED:
22+
return
23+
24+
provider = TracerProvider(
25+
resource=Resource.create({"service.name": settings.OTEL_SERVICE_NAME}),
26+
sampler=ParentBased(TraceIdRatioBased(settings.OTEL_SAMPLE_RATIO)),
27+
)
28+
exporter = OTLPSpanExporter(
29+
endpoint=settings.OTEL_EXPORTER_OTLP_ENDPOINT,
30+
insecure=settings.OTEL_EXPORTER_OTLP_INSECURE,
31+
)
32+
provider.add_span_processor(BatchSpanProcessor(exporter))
33+
trace.set_tracer_provider(provider)
34+
35+
FastAPIInstrumentor.instrument_app(app, excluded_urls="/metrics,/api/v1/health")
36+
SQLAlchemyInstrumentor().instrument(engine=engine.sync_engine)
37+
RedisInstrumentor().instrument()
38+
_initialized = True
39+
40+
41+
def shutdown_telemetry() -> None:
42+
"""Корректно завершает экспортёр при остановке приложения."""
43+
if not settings.OTEL_ENABLED:
44+
return
45+
provider = trace.get_tracer_provider()
46+
if isinstance(provider, TracerProvider):
47+
provider.shutdown()

docker-compose.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,30 @@ services:
4949
ACCESS_TOKEN_EXPIRE_MINUTES: 10
5050
REFRESH_TOKEN_EXPIRE_DAYS: 7
5151
WEB_CONCURRENCY: 4
52+
OTEL_ENABLED: "true"
53+
OTEL_SERVICE_NAME: task-manager-api
54+
OTEL_EXPORTER_OTLP_ENDPOINT: http://jaeger:4317
55+
OTEL_EXPORTER_OTLP_INSECURE: "true"
56+
OTEL_SAMPLE_RATIO: "1.0"
5257
ports:
5358
- "8000:8000"
5459
depends_on:
5560
postgres:
5661
condition: service_healthy
5762
redis:
5863
condition: service_healthy
64+
jaeger:
65+
condition: service_started
66+
restart: unless-stopped
67+
68+
jaeger:
69+
image: jaegertracing/all-in-one:1.64.0
70+
container_name: task-manager-jaeger
71+
ports:
72+
- "16686:16686"
73+
- "4317:4317"
74+
environment:
75+
COLLECTOR_OTLP_ENABLED: "true"
5976
restart: unless-stopped
6077

6178
prometheus:

pyproject.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ dependencies = [
1818
"uvicorn[standard]>=0.41.0",
1919
"structlog>=25.5.0",
2020
"prometheus-client>=0.24.1",
21+
"opentelemetry-api>=1.40.0",
22+
"opentelemetry-sdk>=1.40.0",
23+
"opentelemetry-exporter-otlp>=1.40.0",
24+
"opentelemetry-instrumentation-fastapi>=0.61b0",
25+
"opentelemetry-instrumentation-sqlalchemy>=0.61b0",
26+
"opentelemetry-instrumentation-redis>=0.61b0",
2127
]
2228

2329
[dependency-groups]

0 commit comments

Comments
 (0)