Skip to content

Commit 0bbd905

Browse files
Merge pull request #389 from microsoft/psl-logging-enhancement
feat: improve Application Insights logging and telemetry handling
2 parents 57c1324 + 0719c2e commit 0bbd905

18 files changed

Lines changed: 336 additions & 526 deletions

infra/main.bicep

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,6 @@ module applicationInsights 'br/public:avm/res/insights/component:0.7.0' = if (en
285285
name: 'appi-${solutionSuffix}'
286286
location: location
287287
workspaceResourceId: logAnalyticsWorkspaceResourceId
288-
diagnosticSettings: [{ workspaceResourceId: logAnalyticsWorkspaceResourceId }]
289288
tags: allTags
290289
enableTelemetry: enableTelemetry
291290
retentionInDays: 365

infra/main.json

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"_generator": {
77
"name": "bicep",
88
"version": "0.41.2.15936",
9-
"templateHash": "18262017557117529046"
9+
"templateHash": "3093757051086668797"
1010
},
1111
"name": "Modernize Your Code Solution Accelerator",
1212
"description": "CSA CTO Gold Standard Solution Accelerator for Modernize Your Code. \r\n"
@@ -3919,13 +3919,6 @@
39193919
"value": "[parameters('location')]"
39203920
},
39213921
"workspaceResourceId": "[if(variables('useExistingLogAnalytics'), createObject('value', parameters('existingLogAnalyticsWorkspaceId')), createObject('value', reference('logAnalyticsWorkspace').outputs.resourceId.value))]",
3922-
"diagnosticSettings": {
3923-
"value": [
3924-
{
3925-
"workspaceResourceId": "[if(variables('useExistingLogAnalytics'), parameters('existingLogAnalyticsWorkspaceId'), reference('logAnalyticsWorkspace').outputs.resourceId.value)]"
3926-
}
3927-
]
3928-
},
39293922
"tags": {
39303923
"value": "[variables('allTags')]"
39313924
},
@@ -12902,11 +12895,11 @@
1290212895
},
1290312896
"dependsOn": [
1290412897
"applicationInsights",
12898+
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').storageBlob)]",
1290512899
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').ods)]",
1290612900
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').agentSvc)]",
12907-
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').storageBlob)]",
12908-
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').oms)]",
1290912901
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').monitor)]",
12902+
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').oms)]",
1291012903
"dataCollectionEndpoint",
1291112904
"logAnalyticsWorkspace",
1291212905
"virtualNetwork"
@@ -16624,15 +16617,9 @@
1662416617
"osType": {
1662516618
"value": "Windows"
1662616619
},
16627-
"vmSize": {
16628-
"value": "[if(not(empty(parameters('vmSize'))), parameters('vmSize'), 'Standard_D2s_v5')]"
16629-
},
16630-
"adminUsername": {
16631-
"value": "[if(not(empty(parameters('vmAdminUsername'))), parameters('vmAdminUsername'), 'JumpboxAdminUser')]"
16632-
},
16633-
"adminPassword": {
16634-
"value": "[if(not(empty(parameters('vmAdminPassword'))), parameters('vmAdminPassword'), 'JumpboxAdminP@ssw0rd1234!')]"
16635-
},
16620+
"vmSize": "[if(not(empty(parameters('vmSize'))), createObject('value', parameters('vmSize')), createObject('value', 'Standard_D2s_v5'))]",
16621+
"adminUsername": "[if(not(empty(parameters('vmAdminUsername'))), createObject('value', parameters('vmAdminUsername')), createObject('value', 'JumpboxAdminUser'))]",
16622+
"adminPassword": "[if(not(empty(parameters('vmAdminPassword'))), createObject('value', parameters('vmAdminPassword')), createObject('value', 'JumpboxAdminP@ssw0rd1234!'))]",
1663616623
"managedIdentities": {
1663716624
"value": {
1663816625
"systemAssigned": true
@@ -31930,8 +31917,8 @@
3193031917
"dependsOn": [
3193131918
"aiServices",
3193231919
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').openAI)]",
31933-
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').aiServices)]",
3193431920
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').cognitiveServices)]",
31921+
"[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').aiServices)]",
3193531922
"virtualNetwork"
3193631923
]
3193731924
},

infra/main_custom.bicep

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,6 @@ module applicationInsights 'br/public:avm/res/insights/component:0.7.0' = if (en
288288
name: 'appi-${solutionSuffix}'
289289
location: location
290290
workspaceResourceId: logAnalyticsWorkspaceResourceId
291-
diagnosticSettings: [{ workspaceResourceId: logAnalyticsWorkspaceResourceId }]
292291
tags: allTags
293292
enableTelemetry: enableTelemetry
294293
retentionInDays: 365

src/backend/api/api_routes.py

Lines changed: 44 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,15 @@ def record_exception_to_trace(e):
4444
span.set_status(Status(StatusCode.ERROR, str(e)))
4545

4646

47+
def set_span_attributes(**attributes):
48+
"""Set key-value attributes on the current OpenTelemetry span."""
49+
span = trace.get_current_span()
50+
if span and span.is_recording():
51+
for key, value in attributes.items():
52+
if value:
53+
span.set_attribute(key, str(value))
54+
55+
4756
# start processing the batch
4857
@router.post("/start-processing")
4958
async def start_processing(request: Request):
@@ -82,11 +91,13 @@ async def start_processing(request: Request):
8291
500:
8392
description: Internal server error
8493
"""
94+
batch_id = None
8595
try:
8696
payload = await request.json()
8797
batch_id = payload.get("batch_id")
8898
translate_from = payload.get("translate_from")
8999
translate_to = payload.get("translate_to")
100+
set_span_attributes(batch_id=batch_id)
90101

91102
track_event_if_configured(
92103
"ProcessingStart",
@@ -103,6 +114,10 @@ async def start_processing(request: Request):
103114
"message": "Files processed",
104115
}
105116
except Exception as e:
117+
event_data = {"error": str(e)}
118+
if batch_id is not None:
119+
event_data["batch_id"] = batch_id
120+
track_event_if_configured("ProcessingFailed", event_data)
106121
record_exception_to_trace(e)
107122
raise HTTPException(status_code=500, detail=str(e)) from e
108123

@@ -146,6 +161,7 @@ async def download_files(batch_id: str):
146161
# call batch_service get_batch_for_zip to get all files for batch_id
147162
batch_service = BatchService()
148163
await batch_service.initialize_database()
164+
set_span_attributes(batch_id=batch_id)
149165

150166
file_data = await batch_service.get_batch_for_zip(batch_id)
151167
if not file_data:
@@ -187,6 +203,7 @@ async def download_files(batch_id: str):
187203
)
188204
return Response(zip_data, media_type="application/zip", headers=headers)
189205
except Exception as e:
206+
track_event_if_configured("DownloadZipFailed", {"batch_id": batch_id, "error": str(e)})
190207
record_exception_to_trace(e)
191208
raise HTTPException(
192209
status_code=404, detail=f"Error creating ZIP file: {str(e)}"
@@ -270,7 +287,7 @@ async def batch_status_updates(
270287
await close_connection(batch_id)
271288
except Exception as e:
272289
record_exception_to_trace(e)
273-
logger.error("Error in WebSocket connection", error=str(e))
290+
logger.error("Error in WebSocket connection", error=str(e), batch_id=batch_id)
274291
await close_connection(batch_id)
275292

276293

@@ -373,15 +390,16 @@ async def get_batch_status(request: Request, batch_id: str):
373390
# Authenticate user
374391
authenticated_user = get_authenticated_user(request)
375392
user_id = authenticated_user.user_principal_id
393+
set_span_attributes(batch_id=batch_id, user_id=user_id)
376394
if not user_id:
377395
track_event_if_configured(
378-
"UserIdNotFound", {"status_code": 400, "detail": "no user"}
396+
"UserIdNotFound", {"status_code": 400, "detail": "no user", "batch_id": batch_id}
379397
)
380398
raise HTTPException(status_code=401, detail="User not authenticated")
381399

382400
# Validate batch_id format
383401
if not batch_service.is_valid_uuid(batch_id):
384-
track_event_if_configured("InvalidBatchId", {"batch_id": batch_id})
402+
track_event_if_configured("InvalidBatchId", {"batch_id": batch_id, "user_id": user_id})
385403
raise HTTPException(status_code=400, detail="Invalid batch_id format")
386404

387405
# Fetch batch details
@@ -399,7 +417,7 @@ async def get_batch_status(request: Request, batch_id: str):
399417
record_exception_to_trace(e)
400418
raise e
401419
except Exception as e:
402-
logger.error("Error retrieving batch history", error=str(e))
420+
logger.error("Error retrieving batch history", error=str(e), batch_id=batch_id)
403421
record_exception_to_trace(e)
404422
error_message = str(e)
405423
if "403" in error_message:
@@ -417,9 +435,10 @@ async def get_batch_summary(request: Request, batch_id: str):
417435
# Authenticate user
418436
authenticated_user = get_authenticated_user(request)
419437
user_id = authenticated_user.user_principal_id
438+
set_span_attributes(batch_id=batch_id, user_id=user_id)
420439
if not user_id:
421440
track_event_if_configured(
422-
"UserIdNotFound", {"status_code": 400, "detail": "no user"}
441+
"UserIdNotFound", {"status_code": 400, "detail": "no user", "batch_id": batch_id}
423442
)
424443
raise HTTPException(status_code=401, detail="User not authenticated")
425444

@@ -431,16 +450,16 @@ async def get_batch_summary(request: Request, batch_id: str):
431450
)
432451
raise HTTPException(status_code=404, detail="No batch summary found.")
433452
track_event_if_configured(
434-
"BatchSummaryRetrieved", {"batch_summary": batch_summary}
453+
"BatchSummaryRetrieved", {"batch_id": batch_id, "user_id": user_id, "batch_summary": batch_summary}
435454
)
436455
return batch_summary
437456

438457
except HTTPException as e:
439-
logger.error("Error fetching batch summary", error=str(e))
458+
logger.error("Error fetching batch summary", error=str(e), batch_id=batch_id)
440459
record_exception_to_trace(e)
441460
raise e
442461
except Exception as e:
443-
logger.error("Error fetching batch summary", error=str(e))
462+
logger.error("Error fetching batch summary", error=str(e), batch_id=batch_id)
444463
record_exception_to_trace(e)
445464
raise HTTPException(status_code=404, detail="Batch not found") from e
446465

@@ -538,16 +557,17 @@ async def upload_file(
538557
authenticated_user = get_authenticated_user(request)
539558
user_id = authenticated_user.user_principal_id
540559

560+
set_span_attributes(batch_id=batch_id, user_id=user_id)
541561
if not user_id:
542562
track_event_if_configured(
543-
"UserIdNotFound", {"status_code": 400, "detail": "no user"}
563+
"UserIdNotFound", {"status_code": 400, "detail": "no user", "batch_id": batch_id}
544564
)
545565
raise HTTPException(status_code=401, detail="User not authenticated")
546566

547567
# Validate batch_id format
548-
logger.info(f"batch_id: {batch_id}")
568+
logger.info("Upload request received", batch_id=batch_id, filename=str(file.filename))
549569
if not batch_service.is_valid_uuid(batch_id):
550-
track_event_if_configured("InvalidBatchId", {"batch_id": batch_id})
570+
track_event_if_configured("InvalidBatchId", {"batch_id": batch_id, "user_id": user_id})
551571
raise HTTPException(status_code=400, detail="Invalid batch_id format")
552572

553573
# Upload file via BatchService
@@ -660,6 +680,7 @@ async def get_file_details(request: Request, file_id: str):
660680
# Authenticate user
661681
authenticated_user = get_authenticated_user(request)
662682
user_id = authenticated_user.user_principal_id
683+
set_span_attributes(file_id=file_id, user_id=user_id)
663684
if not user_id:
664685
track_event_if_configured(
665686
"UserIdNotFound", {"endpoint": "get_file_details"}
@@ -668,22 +689,22 @@ async def get_file_details(request: Request, file_id: str):
668689

669690
# Validate file_id format
670691
if not batch_service.is_valid_uuid(file_id):
671-
track_event_if_configured("InvalidFileId", {"file_id": file_id})
692+
track_event_if_configured("InvalidFileId", {"file_id": file_id, "user_id": user_id})
672693
raise HTTPException(status_code=400, detail="Invalid file_id format")
673694

674695
# Fetch file details
675696
file_data = await batch_service.get_file_report(file_id)
676697
if not file_data:
677-
track_event_if_configured("FileNotFound", {"file_id": file_id})
698+
track_event_if_configured("FileNotFound", {"file_id": file_id, "user_id": user_id})
678699
raise HTTPException(status_code=404, detail="File not found")
679-
track_event_if_configured("FileDetailsRetrieved", {"file_data": file_data})
700+
track_event_if_configured("FileDetailsRetrieved", {"file_id": file_id, "user_id": user_id, "file_data": file_data})
680701
return file_data
681702

682703
except HTTPException as e:
683704
record_exception_to_trace(e)
684705
raise e
685706
except Exception as e:
686-
logger.error("Error retrieving file details", error=str(e))
707+
logger.error("Error retrieving file details", error=str(e), file_id=file_id)
687708
record_exception_to_trace(e)
688709
raise HTTPException(status_code=500, detail="Internal server error") from e
689710

@@ -721,15 +742,16 @@ async def delete_batch_details(request: Request, batch_id: str):
721742
# Authenticate user
722743
authenticated_user = get_authenticated_user(request)
723744
user_id = authenticated_user.user_principal_id
745+
set_span_attributes(batch_id=batch_id, user_id=user_id)
724746
if not user_id:
725747
track_event_if_configured(
726-
"UserIdNotFound", {"endpoint": "delete_batch_details"}
748+
"UserIdNotFound", {"batch_id": batch_id, "endpoint": "delete_batch_details"}
727749
)
728750
raise HTTPException(status_code=401, detail="User not authenticated")
729751

730752
# Validate file_id format
731753
if not batch_service.is_valid_uuid(batch_id):
732-
track_event_if_configured("InvalidBatchId", {"batch_id": batch_id})
754+
track_event_if_configured("InvalidBatchId", {"batch_id": batch_id, "user_id": user_id})
733755
raise HTTPException(
734756
status_code=400, detail=f"Invalid batch_id format: {batch_id}"
735757
)
@@ -745,7 +767,7 @@ async def delete_batch_details(request: Request, batch_id: str):
745767
record_exception_to_trace(e)
746768
raise e
747769
except Exception as e:
748-
logger.error("Failed to delete batch from database", error=str(e))
770+
logger.error("Failed to delete batch from database", error=str(e), batch_id=batch_id)
749771
record_exception_to_trace(e)
750772
raise HTTPException(status_code=500, detail="Database connection error") from e
751773

@@ -783,6 +805,7 @@ async def delete_file_details(request: Request, file_id: str):
783805
# Authenticate user
784806
authenticated_user = get_authenticated_user(request)
785807
user_id = authenticated_user.user_principal_id
808+
set_span_attributes(file_id=file_id, user_id=user_id)
786809
if not user_id:
787810
track_event_if_configured(
788811
"UserIdNotFound", {"endpoint": "delete_file_details"}
@@ -791,15 +814,15 @@ async def delete_file_details(request: Request, file_id: str):
791814

792815
# Validate file_id format
793816
if not batch_service.is_valid_uuid(file_id):
794-
track_event_if_configured("InvalidFileId", {"file_id": file_id})
817+
track_event_if_configured("InvalidFileId", {"file_id": file_id, "user_id": user_id})
795818
raise HTTPException(
796819
status_code=400, detail=f"Invalid file_id format: {file_id}"
797820
)
798821

799822
# Delete file
800823
file_delete = await batch_service.delete_file(file_id, user_id)
801824
if file_delete is None:
802-
track_event_if_configured("FileDeleteNotFound", {"file_id": file_id})
825+
track_event_if_configured("FileDeleteNotFound", {"file_id": file_id, "user_id": user_id})
803826
raise HTTPException(status_code=404, detail=f"File not found: {file_id}")
804827

805828
logger.info(f"File deleted successfully: {file_delete}")
@@ -810,7 +833,7 @@ async def delete_file_details(request: Request, file_id: str):
810833
record_exception_to_trace(e)
811834
raise e
812835
except Exception as e:
813-
logger.error("Failed to delete file from database", error=str(e))
836+
logger.error("Failed to delete file from database", error=str(e), file_id=file_id)
814837
record_exception_to_trace(e)
815838
raise HTTPException(status_code=500, detail="Database connection error") from e
816839

src/backend/api/event_utils.py

Lines changed: 3 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -3,69 +3,18 @@
33
import os
44

55
# Third-party
6-
from applicationinsights import TelemetryClient
7-
from applicationinsights.channel import SynchronousQueue, SynchronousSender, TelemetryChannel
6+
from azure.monitor.events.extension import track_event
87

98
from dotenv import load_dotenv
109

1110
load_dotenv()
1211

13-
# Global telemetry client (initialized once)
14-
_telemetry_client = None
15-
16-
17-
def _get_telemetry_client():
18-
"""Get or create the Application Insights telemetry client."""
19-
global _telemetry_client
20-
21-
if _telemetry_client is None:
22-
connection_string = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
23-
if connection_string:
24-
try:
25-
# Extract instrumentation key from connection string
26-
# Format: InstrumentationKey=xxx;IngestionEndpoint=https://...
27-
parts = dict(part.split('=', 1) for part in connection_string.split(';') if '=' in part)
28-
instrumentation_key = parts.get('InstrumentationKey')
29-
30-
if instrumentation_key:
31-
# Create a synchronous channel for immediate sending
32-
sender = SynchronousSender()
33-
queue = SynchronousQueue(sender)
34-
channel = TelemetryChannel(None, queue)
35-
36-
_telemetry_client = TelemetryClient(instrumentation_key, channel)
37-
logging.info("Application Insights TelemetryClient initialized successfully")
38-
else:
39-
logging.error("Could not extract InstrumentationKey from connection string")
40-
except Exception as e:
41-
logging.error(f"Failed to initialize TelemetryClient: {e}")
42-
43-
return _telemetry_client
44-
4512

4613
def track_event_if_configured(event_name: str, event_data: dict):
47-
"""Track a custom event to Application Insights customEvents table.
48-
49-
This uses the Application Insights SDK TelemetryClient which properly
50-
sends custom events to the customEvents table in Application Insights.
51-
"""
14+
"""Track a custom event to Application Insights customEvents table."""
5215
instrumentation_key = os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING")
5316
if instrumentation_key:
54-
try:
55-
client = _get_telemetry_client()
56-
if client:
57-
# Convert all values to strings to ensure compatibility
58-
properties = {k: str(v) for k, v in event_data.items()}
59-
60-
# Track the custom event
61-
client.track_event(event_name, properties=properties)
62-
client.flush() # Ensure immediate sending
63-
64-
logging.debug(f"Tracked custom event: {event_name} with data: {event_data}")
65-
else:
66-
logging.warning("TelemetryClient not available, custom event not tracked")
67-
except Exception as e:
68-
logging.error(f"Failed to track event {event_name}: {e}")
17+
track_event(event_name, event_data)
6918
else:
7019
logging.warning(
7120
f"Skipping track_event for {event_name} as Application Insights is not configured"

0 commit comments

Comments
 (0)