Skip to content
Merged
1 change: 0 additions & 1 deletion infra/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,6 @@ module applicationInsights 'br/public:avm/res/insights/component:0.7.0' = if (en
name: 'appi-${solutionSuffix}'
location: location
workspaceResourceId: logAnalyticsWorkspaceResourceId
diagnosticSettings: [{ workspaceResourceId: logAnalyticsWorkspaceResourceId }]
tags: allTags
enableTelemetry: enableTelemetry
retentionInDays: 365
Expand Down
61 changes: 40 additions & 21 deletions src/backend/api/api_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,15 @@ def record_exception_to_trace(e):
span.set_status(Status(StatusCode.ERROR, str(e)))


def set_span_attributes(**attributes):
"""Set key-value attributes on the current OpenTelemetry span."""
span = trace.get_current_span()
if span and span.is_recording():
for key, value in attributes.items():
if value:
span.set_attribute(key, str(value))


# start processing the batch
@router.post("/start-processing")
async def start_processing(request: Request):
Expand Down Expand Up @@ -87,6 +96,7 @@ async def start_processing(request: Request):
batch_id = payload.get("batch_id")
translate_from = payload.get("translate_from")
translate_to = payload.get("translate_to")
set_span_attributes(batch_id=batch_id)

track_event_if_configured(
"ProcessingStart",
Expand All @@ -103,6 +113,7 @@ async def start_processing(request: Request):
"message": "Files processed",
}
except Exception as e:
track_event_if_configured("ProcessingFailed", {"batch_id": batch_id, "error": str(e)})
record_exception_to_trace(e)
raise HTTPException(status_code=500, detail=str(e)) from e
Comment thread
Abdul-Microsoft marked this conversation as resolved.

Expand Down Expand Up @@ -146,6 +157,7 @@ async def download_files(batch_id: str):
# call batch_service get_batch_for_zip to get all files for batch_id
batch_service = BatchService()
await batch_service.initialize_database()
set_span_attributes(batch_id=batch_id)

file_data = await batch_service.get_batch_for_zip(batch_id)
if not file_data:
Expand Down Expand Up @@ -187,6 +199,7 @@ async def download_files(batch_id: str):
)
return Response(zip_data, media_type="application/zip", headers=headers)
except Exception as e:
track_event_if_configured("DownloadZipFailed", {"batch_id": batch_id, "error": str(e)})
record_exception_to_trace(e)
raise HTTPException(
status_code=404, detail=f"Error creating ZIP file: {str(e)}"
Expand Down Expand Up @@ -270,7 +283,7 @@ async def batch_status_updates(
await close_connection(batch_id)
except Exception as e:
record_exception_to_trace(e)
logger.error("Error in WebSocket connection", error=str(e))
logger.error("Error in WebSocket connection", error=str(e), batch_id=batch_id)
await close_connection(batch_id)


Expand Down Expand Up @@ -373,15 +386,16 @@ async def get_batch_status(request: Request, batch_id: str):
# Authenticate user
authenticated_user = get_authenticated_user(request)
user_id = authenticated_user.user_principal_id
set_span_attributes(batch_id=batch_id, user_id=user_id)
if not user_id:
track_event_if_configured(
"UserIdNotFound", {"status_code": 400, "detail": "no user"}
"UserIdNotFound", {"status_code": 400, "detail": "no user", "batch_id": batch_id}
)
raise HTTPException(status_code=401, detail="User not authenticated")

# Validate batch_id format
if not batch_service.is_valid_uuid(batch_id):
track_event_if_configured("InvalidBatchId", {"batch_id": batch_id})
track_event_if_configured("InvalidBatchId", {"batch_id": batch_id, "user_id": user_id})
raise HTTPException(status_code=400, detail="Invalid batch_id format")

# Fetch batch details
Expand All @@ -399,7 +413,7 @@ async def get_batch_status(request: Request, batch_id: str):
record_exception_to_trace(e)
raise e
except Exception as e:
logger.error("Error retrieving batch history", error=str(e))
logger.error("Error retrieving batch history", error=str(e), batch_id=batch_id)
record_exception_to_trace(e)
error_message = str(e)
if "403" in error_message:
Expand All @@ -417,9 +431,10 @@ async def get_batch_summary(request: Request, batch_id: str):
# Authenticate user
authenticated_user = get_authenticated_user(request)
user_id = authenticated_user.user_principal_id
set_span_attributes(batch_id=batch_id, user_id=user_id)
if not user_id:
track_event_if_configured(
"UserIdNotFound", {"status_code": 400, "detail": "no user"}
"UserIdNotFound", {"status_code": 400, "detail": "no user", "batch_id": batch_id}
)
raise HTTPException(status_code=401, detail="User not authenticated")

Expand All @@ -431,16 +446,16 @@ async def get_batch_summary(request: Request, batch_id: str):
)
raise HTTPException(status_code=404, detail="No batch summary found.")
track_event_if_configured(
"BatchSummaryRetrieved", {"batch_summary": batch_summary}
"BatchSummaryRetrieved", {"batch_id": batch_id, "user_id": user_id, "batch_summary": batch_summary}
)
return batch_summary

except HTTPException as e:
logger.error("Error fetching batch summary", error=str(e))
logger.error("Error fetching batch summary", error=str(e), batch_id=batch_id)
record_exception_to_trace(e)
raise e
except Exception as e:
logger.error("Error fetching batch summary", error=str(e))
logger.error("Error fetching batch summary", error=str(e), batch_id=batch_id)
record_exception_to_trace(e)
raise HTTPException(status_code=404, detail="Batch not found") from e

Expand Down Expand Up @@ -538,16 +553,17 @@ async def upload_file(
authenticated_user = get_authenticated_user(request)
user_id = authenticated_user.user_principal_id

set_span_attributes(batch_id=batch_id, user_id=user_id)
if not user_id:
track_event_if_configured(
"UserIdNotFound", {"status_code": 400, "detail": "no user"}
"UserIdNotFound", {"status_code": 400, "detail": "no user", "batch_id": batch_id}
)
raise HTTPException(status_code=401, detail="User not authenticated")

# Validate batch_id format
logger.info(f"batch_id: {batch_id}")
logger.info("Upload request received", batch_id=batch_id, filename=str(file.filename))
if not batch_service.is_valid_uuid(batch_id):
track_event_if_configured("InvalidBatchId", {"batch_id": batch_id})
track_event_if_configured("InvalidBatchId", {"batch_id": batch_id, "user_id": user_id})
raise HTTPException(status_code=400, detail="Invalid batch_id format")

# Upload file via BatchService
Expand Down Expand Up @@ -660,6 +676,7 @@ async def get_file_details(request: Request, file_id: str):
# Authenticate user
authenticated_user = get_authenticated_user(request)
user_id = authenticated_user.user_principal_id
set_span_attributes(file_id=file_id, user_id=user_id)
if not user_id:
track_event_if_configured(
"UserIdNotFound", {"endpoint": "get_file_details"}
Expand All @@ -668,22 +685,22 @@ async def get_file_details(request: Request, file_id: str):

# Validate file_id format
if not batch_service.is_valid_uuid(file_id):
track_event_if_configured("InvalidFileId", {"file_id": file_id})
track_event_if_configured("InvalidFileId", {"file_id": file_id, "user_id": user_id})
raise HTTPException(status_code=400, detail="Invalid file_id format")

# Fetch file details
file_data = await batch_service.get_file_report(file_id)
if not file_data:
track_event_if_configured("FileNotFound", {"file_id": file_id})
track_event_if_configured("FileNotFound", {"file_id": file_id, "user_id": user_id})
raise HTTPException(status_code=404, detail="File not found")
track_event_if_configured("FileDetailsRetrieved", {"file_data": file_data})
track_event_if_configured("FileDetailsRetrieved", {"file_id": file_id, "user_id": user_id, "file_data": file_data})
return file_data

except HTTPException as e:
record_exception_to_trace(e)
raise e
except Exception as e:
logger.error("Error retrieving file details", error=str(e))
logger.error("Error retrieving file details", error=str(e), file_id=file_id)
record_exception_to_trace(e)
raise HTTPException(status_code=500, detail="Internal server error") from e

Expand Down Expand Up @@ -721,15 +738,16 @@ async def delete_batch_details(request: Request, batch_id: str):
# Authenticate user
authenticated_user = get_authenticated_user(request)
user_id = authenticated_user.user_principal_id
set_span_attributes(batch_id=batch_id, user_id=user_id)
if not user_id:
track_event_if_configured(
"UserIdNotFound", {"endpoint": "delete_batch_details"}
"UserIdNotFound", {"batch_id": batch_id, "endpoint": "delete_batch_details"}
)
raise HTTPException(status_code=401, detail="User not authenticated")

# Validate file_id format
if not batch_service.is_valid_uuid(batch_id):
track_event_if_configured("InvalidBatchId", {"batch_id": batch_id})
track_event_if_configured("InvalidBatchId", {"batch_id": batch_id, "user_id": user_id})
raise HTTPException(
status_code=400, detail=f"Invalid batch_id format: {batch_id}"
)
Expand All @@ -745,7 +763,7 @@ async def delete_batch_details(request: Request, batch_id: str):
record_exception_to_trace(e)
raise e
except Exception as e:
logger.error("Failed to delete batch from database", error=str(e))
logger.error("Failed to delete batch from database", error=str(e), batch_id=batch_id)
record_exception_to_trace(e)
raise HTTPException(status_code=500, detail="Database connection error") from e

Expand Down Expand Up @@ -783,6 +801,7 @@ async def delete_file_details(request: Request, file_id: str):
# Authenticate user
authenticated_user = get_authenticated_user(request)
user_id = authenticated_user.user_principal_id
set_span_attributes(file_id=file_id, user_id=user_id)
if not user_id:
track_event_if_configured(
"UserIdNotFound", {"endpoint": "delete_file_details"}
Expand All @@ -791,15 +810,15 @@ async def delete_file_details(request: Request, file_id: str):

# Validate file_id format
if not batch_service.is_valid_uuid(file_id):
track_event_if_configured("InvalidFileId", {"file_id": file_id})
track_event_if_configured("InvalidFileId", {"file_id": file_id, "user_id": user_id})
raise HTTPException(
status_code=400, detail=f"Invalid file_id format: {file_id}"
)

# Delete file
file_delete = await batch_service.delete_file(file_id, user_id)
if file_delete is None:
track_event_if_configured("FileDeleteNotFound", {"file_id": file_id})
track_event_if_configured("FileDeleteNotFound", {"file_id": file_id, "user_id": user_id})
raise HTTPException(status_code=404, detail=f"File not found: {file_id}")

logger.info(f"File deleted successfully: {file_delete}")
Expand All @@ -810,7 +829,7 @@ async def delete_file_details(request: Request, file_id: str):
record_exception_to_trace(e)
raise e
except Exception as e:
logger.error("Failed to delete file from database", error=str(e))
logger.error("Failed to delete file from database", error=str(e), file_id=file_id)
record_exception_to_trace(e)
raise HTTPException(status_code=500, detail="Database connection error") from e

Expand Down
57 changes: 3 additions & 54 deletions src/backend/api/event_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,69 +3,18 @@
import os

# Third-party
from applicationinsights import TelemetryClient
from applicationinsights.channel import SynchronousQueue, SynchronousSender, TelemetryChannel
from azure.monitor.events.extension import track_event

from dotenv import load_dotenv

load_dotenv()

# Global telemetry client (initialized once)
_telemetry_client = None


def _get_telemetry_client():
"""Get or create the Application Insights telemetry client."""
global _telemetry_client

if _telemetry_client is None:
connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
if connection_string:
try:
# Extract instrumentation key from connection string
# Format: InstrumentationKey=xxx;IngestionEndpoint=https://...
parts = dict(part.split('=', 1) for part in connection_string.split(';') if '=' in part)
instrumentation_key = parts.get('InstrumentationKey')

if instrumentation_key:
# Create a synchronous channel for immediate sending
sender = SynchronousSender()
queue = SynchronousQueue(sender)
channel = TelemetryChannel(None, queue)

_telemetry_client = TelemetryClient(instrumentation_key, channel)
logging.info("Application Insights TelemetryClient initialized successfully")
else:
logging.error("Could not extract InstrumentationKey from connection string")
except Exception as e:
logging.error(f"Failed to initialize TelemetryClient: {e}")

return _telemetry_client


def track_event_if_configured(event_name: str, event_data: dict):
"""Track a custom event to Application Insights customEvents table.

This uses the Application Insights SDK TelemetryClient which properly
sends custom events to the customEvents table in Application Insights.
"""
"""Track a custom event to Application Insights customEvents table."""
instrumentation_key = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
if instrumentation_key:
try:
client = _get_telemetry_client()
if client:
# Convert all values to strings to ensure compatibility
properties = {k: str(v) for k, v in event_data.items()}

# Track the custom event
client.track_event(event_name, properties=properties)
client.flush() # Ensure immediate sending

logging.debug(f"Tracked custom event: {event_name} with data: {event_data}")
else:
logging.warning("TelemetryClient not available, custom event not tracked")
except Exception as e:
logging.error(f"Failed to track event {event_name}: {e}")
track_event(event_name, event_data)
Comment thread
Abdul-Microsoft marked this conversation as resolved.
else:
logging.warning(
f"Skipping track_event for {event_name} as Application Insights is not configured"
Expand Down
Loading
Loading