diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 9adff38c..103f8222 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -1,4 +1,4 @@ -name: Validate Deployment +name: Deploy-Test-Cleanup Pipeline on: push: branches: @@ -8,52 +8,51 @@ on: schedule: - cron: "0 10,22 * * *" # Runs at 10:00 AM and 10:00 PM GMT +env: + GPT_CAPACITY: 250 + TEXT_EMBEDDING_CAPACITY: 200 + jobs: deploy: - runs-on: windows-latest # Use a Windows runner for PowerShell scripts + runs-on: ubuntu-latest + outputs: + RESOURCE_GROUP_NAME: ${{ steps.get_webapp_url.outputs.RESOURCE_GROUP_NAME }} + KUBERNETES_RESOURCE_GROUP_NAME: ${{ steps.get_webapp_url.outputs.KUBERNETES_RESOURCE_GROUP_NAME }} + WEBAPP_URL: ${{ steps.get_webapp_url.outputs.WEBAPP_URL }} + OPENAI_RESOURCE_NAME: ${{ steps.get_webapp_url.outputs.OPENAI_RESOURCE_NAME }} + DOCUMENT_INTELLIGENCE_RESOURCE_NAME: ${{ steps.get_webapp_url.outputs.DOCUMENT_INTELLIGENCE_RESOURCE_NAME }} + VALID_REGION: ${{ steps.get_webapp_url.outputs.VALID_REGION }} steps: - name: Checkout Code uses: actions/checkout@v4 # Checks out your repository - # Install Azure CLI + - name: Install Azure CLI - shell: pwsh + shell: bash run: | - Invoke-WebRequest -Uri https://aka.ms/installazurecliwindows -OutFile AzureCLI.msi - Start-Process msiexec.exe -ArgumentList '/I AzureCLI.msi /quiet' -Wait + curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash + az --version # Verify installation - # Install kubectl (Windows method) - - name: Install kubectl - shell: pwsh + - name: Install Kubernetes CLI (kubectl) + shell: bash run: | - Invoke-WebRequest -Uri https://dl.k8s.io/release/v1.28.0/bin/windows/amd64/kubectl.exe -OutFile kubectl.exe - Move-Item -Path ./kubectl.exe -Destination "C:\kubectl.exe" - [Environment]::SetEnvironmentVariable('PATH', $env:PATH + ';C:\', [System.EnvironmentVariableTarget]::Machine) + az aks install-cli + az extension add --name aks-preview - # Install Helm (Windows method) - name: Install Helm - shell: pwsh + shell: bash run: | - Invoke-WebRequest -Uri https://get.helm.sh/helm-v3.13.0-windows-amd64.zip -OutFile helm.zip - Expand-Archive helm.zip -DestinationPath helm - Move-Item -Path ./helm/windows-amd64/helm.exe -Destination "C:\helm.exe" - [Environment]::SetEnvironmentVariable('PATH', $env:PATH + ';C:\', [System.EnvironmentVariableTarget]::Machine) - - - name: Set Docker environment variables - run: echo "DOCKER_BUILDKIT=0" >> $GITHUB_ENV + curl https://baltocdn.com/helm/signing.asc | gpg --dearmor | sudo tee /usr/share/keyrings/helm.gpg > /dev/null + sudo apt-get install apt-transport-https --yes + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/helm.gpg] https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list + sudo apt-get update + sudo apt-get install helm - # Set up Docker - name: Set up Docker uses: docker/setup-buildx-action@v3 with: driver: docker - - name: Setup PowerShell - shell: pwsh - run: | - $PSVersionTable.PSVersion - - # Run Quota Check Script - name: Run Quota Check id: quota-check shell: pwsh @@ -84,11 +83,10 @@ jobs: AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }} - GPT_MIN_CAPACITY: "10" - TEXT_EMBEDDING_MIN_CAPACITY: "10" + GPT_MIN_CAPACITY: ${{ env.GPT_CAPACITY }} + TEXT_EMBEDDING_MIN_CAPACITY: ${{ env.TEXT_EMBEDDING_CAPACITY }} AZURE_REGIONS: "${{ vars.AZURE_REGIONS }}" - # Send Notification on Quota Failure - name: Send Notification on Quota Failure if: env.QUOTA_FAILED == 'true' shell: pwsh @@ -98,7 +96,7 @@ jobs: # Construct the email body $EMAIL_BODY = @" { - "body": "

Dear Team,

The quota check has failed, and the pipeline cannot proceed.

Build URL: $RUN_URL

Please take necessary action.

Best regards,
Your Automation Team

" + "body": "

Dear Team,

The quota check has failed, and the pipeline cannot proceed.

Build URL: $RUN_URL

Please take necessary action.

Best regards,
Your Automation Team

" } "@ @@ -140,97 +138,234 @@ jobs: yes "@ $input | pwsh ./resourcedeployment.ps1 - echo "Resource Group Name is ${{ env.rg_name }}" - echo "Kubernetes resource group are ${{ env.krg_name }}" + Write-Host "Resource Group Name is ${{ env.rg_name }}" + Write-Host "Kubernetes resource group are ${{ env.krg_name }}" env: AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }} - - - name: Cleanup Resource Group - if: always() # Ensures this step runs even if the deployment fails - shell: pwsh + + - name: Extract Web App URL and Increase TPM + id: get_webapp_url + shell: bash run: | - az login --service-principal --username ${{ secrets.AZURE_CLIENT_ID }} --password ${{ secrets.AZURE_CLIENT_SECRET }} --tenant ${{ secrets.AZURE_TENANT_ID }} - az group delete --name ${{ env.rg_name }} --yes --no-wait - az group delete --name ${{ env.krg_name }} --yes --no-wait - env: - AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} - AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} - AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }} + # Save the resource group name and Kubernetes resource group name to GITHUB_OUTPUT + echo "RESOURCE_GROUP_NAME=${{ env.rg_name }}" >> $GITHUB_OUTPUT + echo "KUBERNETES_RESOURCE_GROUP_NAME=${{ env.krg_name }}" >> $GITHUB_OUTPUT + echo "VALID_REGION=${{ env.VALID_REGION }}" >> $GITHUB_OUTPUT + + if az account show &> /dev/null; then + echo "Azure CLI is authenticated." + else + echo "Azure CLI is not authenticated. Logging in..." + az login --service-principal --username ${{ secrets.AZURE_CLIENT_ID }} --password ${{ secrets.AZURE_CLIENT_SECRET }} --tenant ${{ secrets.AZURE_TENANT_ID }} + fi + az account set --subscription ${{ secrets.AZURE_SUBSCRIPTION_ID }} + + # Get the Web App URL and save it to GITHUB_OUTPUT + echo "Retrieving Web App URL..." + public_ip_name=$(az network public-ip list --resource-group ${{ env.krg_name }} --query "[?contains(name, 'kubernetes-')].name" -o tsv) + fqdn=$(az network public-ip show --resource-group ${{ env.krg_name }} --name $public_ip_name --query "dnsSettings.fqdn" -o tsv) + if [ -n "$fqdn" ]; then + echo "WEBAPP_URL=https://$fqdn" >> $GITHUB_OUTPUT + echo "Web App URL is https://$fqdn" + else + echo "Failed to retrieve Web App URL." + exit 1 + fi - - name: Wait for Resource Deletion to Complete - shell: pwsh + # Get Azure OpenAI resource name + openai_resource_name=$(az cognitiveservices account list --resource-group ${{ env.rg_name }} --query "[?kind=='OpenAI'].name | [0]" -o tsv) + if [ -z "$openai_resource_name" ]; then + echo "No Azure OpenAI resource found in the resource group." + exit 1 + fi + echo "OpenAI resource name is $openai_resource_name" + echo "OPENAI_RESOURCE_NAME=$openai_resource_name" >> $GITHUB_OUTPUT + + # Get Azure Document Intelligence resource name + document_intelligence_resource_name=$(az cognitiveservices account list --resource-group ${{ env.rg_name }} --query "[?kind=='FormRecognizer'].name | [0]" -o tsv) + if [ -z "$document_intelligence_resource_name" ]; then + echo "No Azure Document Intelligence resource found in the resource group." + else + echo "Document Intelligence resource name is $document_intelligence_resource_name" + echo "DOCUMENT_INTELLIGENCE_RESOURCE_NAME=$document_intelligence_resource_name" >> $GITHUB_OUTPUT + fi + + # Increase the TPM for the Azure OpenAI models + echo "Increasing TPM for Azure OpenAI models..." + openai_gpt_deployment_url="/subscriptions/${{ secrets.AZURE_SUBSCRIPTION_ID }}/resourceGroups/${{ env.rg_name }}/providers/Microsoft.CognitiveServices/accounts/$openai_resource_name/deployments/gpt-4o-mini?api-version=2023-05-01" + az rest -m put -u "$openai_gpt_deployment_url" -b "{'sku':{'name':'Standard','capacity':${{ env.GPT_CAPACITY }}},'properties': {'model': {'format': 'OpenAI','name': 'gpt-4o-mini','version': '2024-07-18'}}}" + if [ $? -ne 0 ]; then + echo "Failed to increase TPM for GPT deployment." + exit 1 + else + echo "Successfully increased TPM for GPT deployment." + fi + openai_embedding_deployment_url="/subscriptions/${{ secrets.AZURE_SUBSCRIPTION_ID }}/resourceGroups/${{ env.rg_name }}/providers/Microsoft.CognitiveServices/accounts/$openai_resource_name/deployments/text-embedding-large?api-version=2023-05-01" + az rest -m put -u "$openai_embedding_deployment_url" -b "{'sku':{'name':'Standard','capacity': ${{ env.TEXT_EMBEDDING_CAPACITY }}},'properties': {'model': {'format': 'OpenAI','name': 'text-embedding-3-large','version': '1'}}}" + if [ $? -ne 0 ]; then + echo "Failed to increase TPM for Text Embedding deployment." + exit 1 + else + echo "Successfully increased TPM for Text Embedding deployment." + fi + + - name: Validate Deployment + shell: bash run: | - $retries = 0 - $maxRetries = 3 - $sleepIntervals = @(700, 200, 200) - - while ($retries -lt $maxRetries) { - $rgStatus = az group exists --name ${{ env.rg_name }} - $krgStatus = az group exists --name ${{ env.krg_name }} - - - # if (-not $rgStatus -and -not $krgStatus) { - # Write-Host "Both resource groups deleted successfully." - # break - # } - if ($rgStatus -eq "false" -and $krgStatus -eq "false") { - Write-Host "Both resource groups deleted successfully." + webapp_url="${{ steps.get_webapp_url.outputs.WEBAPP_URL }}" + echo "Validating web app at: $webapp_url" + + # Enhanced health check with retry logic + max_attempts=7 + attempt=1 + success=false + + while [ $attempt -le $max_attempts ] && [ "$success" = false ]; do + echo "Attempt $attempt/$max_attempts: Checking web app health..." + + # Check if web app responds + http_code=$(curl -s -o /dev/null -w "%{http_code}" "$webapp_url" || echo "000") + + if [ "$http_code" -eq 200 ]; then + echo "✅ Web app is healthy (HTTP $http_code)" + success=true + elif [ "$http_code" -eq 404 ]; then + echo "❌ Web app not found (HTTP 404)" break - } + elif [ "$http_code" -eq 503 ] || [ "$http_code" -eq 502 ]; then + echo "⚠️ Web app temporarily unavailable (HTTP $http_code), retrying..." + sleep 20 + else + echo "⚠️ Web app returned HTTP $http_code, retrying..." + sleep 20 + fi + + attempt=$((attempt + 1)) + done + + if [ "$success" = false ]; then + echo "❌ Web app validation failed after $max_attempts attempts" + exit 1 + fi - $retries++ - if ($retries -eq $maxRetries) { - Write-Host "Resource groups deletion not confirmed after $maxRetries attempts. Exiting." + - name: Run Post Deployment Script + shell: pwsh + run: | + Write-Host "Running post deployment script to upload files..." + cd Deployment + try { + .\uploadfiles.ps1 -EndpointUrl ${{ steps.get_webapp_url.outputs.WEBAPP_URL }} + Write-Host "ExitCode: $LASTEXITCODE" + if ($LASTEXITCODE -eq $null -or $LASTEXITCODE -eq 0) { + Write-Host "✅ Post deployment script completed successfully." + } else { + Write-Host "❌ Post deployment script failed with exit code: $LASTEXITCODE" exit 1 } - - Write-Host "Resource groups still exist. Retrying in $($sleepIntervals[$retries - 1]) seconds..." - Start-Sleep -Seconds $sleepIntervals[$retries - 1] + } + catch { + Write-Host "❌ Post deployment script failed with error: $($_.Exception.Message)" + exit 1 } - - name: Purging the Resources - if: success() - shell: pwsh + - name: Logout from Azure + if: always() + shell: bash run: | - # Set variables using GitHub Actions environment values - $solutionPrefix = "${{ env.SOLUTION_PREFIX }}" - $subscriptionId = "${{ secrets.AZURE_SUBSCRIPTION_ID }}" - $resourceGroupName = "${{ env.rg_name }}" - - $openai_name = "openaiservice-$solutionPrefix" - $cognitiveservice_name = "cognitiveservice-$solutionPrefix" - - # Debug: Print resource names - Write-Host "Purging OpenAI resource: $openai_name" - Write-Host "Purging CognitiveService Account: $cognitiveservice_name" - - # Construct resource IDs - $openaiResourceId = "/subscriptions/$subscriptionId/providers/Microsoft.CognitiveServices/locations/${{ env.VALID_REGION }}/resourceGroups/$resourceGroupName/deletedAccounts/$openai_name" - $cognitiveResourceId = "/subscriptions/$subscriptionId/providers/Microsoft.CognitiveServices/locations/${{ env.VALID_REGION }}/resourceGroups/$resourceGroupName/deletedAccounts/$cognitiveservice_name" + if az account show &> /dev/null; then + echo "Logging out from Azure..." + az logout + echo "Logged out from Azure successfully." + else + echo "Azure CLI is not authenticated. Skipping logout." + fi + + + e2e-test: + needs: deploy + uses: ./.github/workflows/test-automation.yml + with: + DKM_URL: ${{ needs.deploy.outputs.WEBAPP_URL }} + secrets: inherit + + + cleanup-deployment: + if: always() + needs: [deploy, e2e-test] + runs-on: ubuntu-latest + env: + RESOURCE_GROUP_NAME: ${{ needs.deploy.outputs.RESOURCE_GROUP_NAME }} + KUBERNETES_RESOURCE_GROUP_NAME: ${{ needs.deploy.outputs.KUBERNETES_RESOURCE_GROUP_NAME }} + OPENAI_RESOURCE_NAME: ${{ needs.deploy.outputs.OPENAI_RESOURCE_NAME }} + DOCUMENT_INTELLIGENCE_RESOURCE_NAME: ${{ needs.deploy.outputs.DOCUMENT_INTELLIGENCE_RESOURCE_NAME }} + VALID_REGION: ${{ needs.deploy.outputs.VALID_REGION }} - # Debug: Print constructed resource IDs - Write-Host "Command to purge OpenAI resource: az resource delete --ids `"$openaiResourceId`" --verbose" - Write-Host "Command to purge CognitiveService Account: az resource delete --ids `"$cognitiveResourceId`" --verbose" - # Purge OpenAI Resource - az resource delete --ids $openaiResourceId --verbose - if (-not $?) { - Write-Host "Failed to purge OpenAI resource: $openaiResourceId" - } + steps: + - name: Install Azure CLI + shell: bash + run: | + curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash + az --version # Verify installation + + - name: Login to Azure + shell: bash + run: | + az login --service-principal --username ${{ secrets.AZURE_CLIENT_ID }} --password ${{ secrets.AZURE_CLIENT_SECRET }} --tenant ${{ secrets.AZURE_TENANT_ID }} + az account set --subscription "${{ secrets.AZURE_SUBSCRIPTION_ID }}" - # Purge CognitiveService Account + - name: Delete Resource Groups + if: env.RESOURCE_GROUP_NAME != '' + shell: bash + run: | + az group delete --name ${{ env.RESOURCE_GROUP_NAME }} --yes --no-wait + az group delete --name ${{ env.KUBERNETES_RESOURCE_GROUP_NAME }} --yes --no-wait + - name: Wait for Resource Deletion to Complete + shell: bash + run: | + echo "Waiting for Azure OpenaAI and Document Intelligence resources to be deleted..." + sleep 60 + retries=0 + max_retries=3 + sleep_duration=60 + while [ $retries -lt $max_retries ]; do + aoai_exists=$(az resource list --resource-group ${{ env.RESOURCE_GROUP_NAME }} --name ${{ env.OPENAI_RESOURCE_NAME }} --query "[0].name" -o tsv) + di_exists=$(az resource list --resource-group ${{ env.RESOURCE_GROUP_NAME }} --name ${{ env.DOCUMENT_INTELLIGENCE_RESOURCE_NAME }} --query "[0].name" -o tsv) + + if [ -z "$aoai_exists" ] && [ -z "$di_exists" ]; then + echo "Resources deleted successfully." + break + else + echo "Resources still exist, retrying in $((sleep_duration * (retries + 1))) seconds..." + sleep $((sleep_duration * (retries + 1))) + retries=$((retries + 1)) + fi + done - az resource delete --ids $cognitiveResourceId --verbose - if (-not $?) { - Write-Host "Failed to purge CognitiveService Account." - } + - name: Purging the Resources + if: success() + shell: bash + run: | + echo "Purging the Azure OpenAI and Document Intelligence resources..." + if [ -z "${{ env.OPENAI_RESOURCE_NAME }}" ]; then + echo "No Azure OpenAI resource to purge." + else + echo "Purging Azure OpenAI resource..." + az cognitiveservices account purge --name ${{ env.OPENAI_RESOURCE_NAME }} --resource-group ${{ env.RESOURCE_GROUP_NAME }} --location ${{ env.VALID_REGION }} + fi + + if [ -z "${{ env.DOCUMENT_INTELLIGENCE_RESOURCE_NAME }}" ]; then + echo "No Azure Document Intelligence resource to purge." + else + echo "Purging Azure Document Intelligence resources..." + az cognitiveservices account purge --name ${{ env.DOCUMENT_INTELLIGENCE_RESOURCE_NAME }} --resource-group ${{ env.RESOURCE_GROUP_NAME }} --location ${{ env.VALID_REGION }} + fi - name: Send Notification on Failure - if: failure() + if: failure() || needs.deploy.result == 'failure' shell: pwsh run: | # Define the RUN_URL variable @@ -251,3 +386,15 @@ jobs: } catch { Write-Output "Failed to send notification." } + + - name: Logout from Azure + if: always() + shell: bash + run: | + if az account show &> /dev/null; then + echo "Logging out from Azure..." + az logout + echo "Logged out from Azure successfully." + else + echo "Azure CLI is not authenticated. Skipping logout." + fi \ No newline at end of file diff --git a/.github/workflows/test-automation.yml b/.github/workflows/test-automation.yml index 5383d57b..43e6d481 100644 --- a/.github/workflows/test-automation.yml +++ b/.github/workflows/test-automation.yml @@ -1,18 +1,19 @@ name: Test Automation DKM on: - push: - branches: - - main - - dev - paths: - - 'tests/e2e-test/**' - schedule: - - cron: '0 13 * * *' # Runs at 1 PM UTC - workflow_dispatch: + workflow_call: + inputs: + DKM_URL: + required: true + type: string + description: "Web URL for DKM" + secrets: + EMAILNOTIFICATION_LOGICAPP_URL_TA: + required: false + description: "Logic App URL for email notifications" env: - url: ${{ vars.DKM_URL }} + url: ${{ inputs.DKM_URL }} accelerator_name: "DKM" jobs: @@ -27,20 +28,6 @@ jobs: with: python-version: '3.13' - - name: Azure CLI Login - uses: azure/login@v2 - with: - creds: '{"clientId":"${{ secrets.AZURE_CLIENT_ID }}","clientSecret":"${{ secrets.AZURE_CLIENT_SECRET }}","subscriptionId":"${{ secrets.AZURE_SUBSCRIPTION_ID }}","tenantId":"${{ secrets.AZURE_TENANT_ID }}"}' - - - name: Start AKS - id: start-aks - uses: azure/cli@v2 - with: - azcliversion: 'latest' - inlineScript: | - az aks install-cli - if [ "$(az aks show --resource-group ${{ vars.DKM_RG }} --name ${{ vars.DKM_AKS_NAME }} --query "powerState.code" -o tsv)" = "Running" ]; then echo "AKS is running"; else az aks start --resource-group ${{ vars.DKM_RG }} --name ${{ vars.DKM_AKS_NAME }}; fi - - name: Install dependencies run: | python -m pip install --upgrade pip @@ -49,6 +36,15 @@ jobs: - name: Ensure browsers are installed run: python -m playwright install --with-deps chromium + - name: Open URL + run: | + echo "Opening URL: ${{ env.url }}" + python -m webbrowser "${{ env.url }}" + + - name: Sleep for 30 seconds + run: sleep 30s + shell: bash + - name: Run tests(1) id: test1 run: | @@ -117,14 +113,4 @@ jobs: # Send the notification curl -X POST "${{ secrets.EMAILNOTIFICATION_LOGICAPP_URL_TA }}" \ -H "Content-Type: application/json" \ - -d "$EMAIL_BODY" || echo "Failed to send notification" - - - name: Stop AKS - if: always() - uses: azure/cli@v2 - with: - azcliversion: 'latest' - inlineScript: | - az aks install-cli - if [ "$(az aks show --resource-group ${{ vars.DKM_RG }} --name ${{ vars.DKM_AKS_NAME }} --query "powerState.code" -o tsv)" = "Running" ]; then az aks stop --resource-group ${{ vars.DKM_RG }} --name ${{ vars.DKM_AKS_NAME }}; else echo "AKS is already stopped"; fi - az logout \ No newline at end of file + -d "$EMAIL_BODY" || echo "Failed to send notification" \ No newline at end of file diff --git a/Deployment/checkquota.ps1 b/Deployment/checkquota.ps1 index 2617ed2f..7cc9eecb 100644 --- a/Deployment/checkquota.ps1 +++ b/Deployment/checkquota.ps1 @@ -66,11 +66,12 @@ foreach ($REGION in $REGIONS) { foreach ($MODEL in $MIN_CAPACITY.Keys) { - $MODEL_INFO = $QUOTA_INFO | Where-Object { $_.Name -eq $MODEL } + $MODEL_INFO = $QUOTA_INFO | Where-Object { $_.Name.Value -eq $MODEL } if (-not $MODEL_INFO) { Write-Host "⚠️ WARNING: No quota information found for model: $MODEL in $REGION. Skipping." - continue + $INSUFFICIENT_QUOTA = $true + break } $CURRENT_VALUE = [int]$MODEL_INFO.CurrentValue