diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 5edab961f..27d2bae69 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -75,11 +75,11 @@ jobs: id: generate_rg_name run: | echo "Generating a unique resource group name..." - TIMESTAMP=$(date +%Y%m%d%H%M%S) - COMMON_PART="pslautomationRes" - UNIQUE_RG_NAME="${COMMON_PART}${TIMESTAMP}" + ACCL_NAME="docgen" # Account name as specified + SHORT_UUID=$(uuidgen | cut -d'-' -f1) + UNIQUE_RG_NAME="arg-${ACCL_NAME}-${SHORT_UUID}" echo "RESOURCE_GROUP_NAME=${UNIQUE_RG_NAME}" >> $GITHUB_ENV - echo "Generated Resource_GROUP_PREFIX: ${UNIQUE_RG_NAME}" + echo "Generated RESOURCE_GROUP_NAME: ${UNIQUE_RG_NAME}" - name: Check and Create Resource Group id: check_create_rg diff --git a/.gitignore b/.gitignore index ec2bcbd76..dd27693a3 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,6 @@ __pycache__/ venv myenv -scriptsenv/ \ No newline at end of file +scriptsenv/ + +scriptenv \ No newline at end of file diff --git a/README.md b/README.md index c53fd90e6..eb6076c60 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ Here are some example regions where the services are available: East US, East US ### Configurable Deployment Settings -When you start the deployment, most parameters will have **default values**, but you can update the following settings: +When you start the deployment, most parameters will have **default values**, but you can update the below settings by following the steps [here](./docs/CustomizingAzdParameters.md): | **Setting** | **Description** | **Default value** | |------------|----------------| ------------| @@ -138,6 +138,7 @@ If you're not using one of the above options for opening the project, then you'l * [Python 3.9+](https://www.python.org/downloads/) * [Docker Desktop](https://www.docker.com/products/docker-desktop/) * [Git](https://git-scm.com/downloads) + * [Powershell](https://learn.microsoft.com/en-us/powershell/scripting/install/installing-powershell-on-windows?view=powershell-7.5) (Required for Windows users only. Follow the steps [here](./docs/PowershellSetup.md) to add it to the Windows PATH.) 2. Download the project code: @@ -192,7 +193,12 @@ To change the azd parameters from the default values, follow the steps [here](./ * This deployment will take *7-10 minutes* to provision the resources in your account and set up the solution with sample data. * If you get an error or timeout with deployment, changing the location can help, as there may be availability constraints for the resources. -5. Once the deployment has completed successfully, open the [Azure Portal](https://portal.azure.com/), go to the deployed resource group, find the App Service and get the app URL from `Default domain`. +5. Once the deployment has completed successfully and you would like to use the sample data, run the bash command printed in the terminal. The bash command will look like the following: + ```shell + bash ./infra/scripts/process_sample_data.sh + ``` + +6. Open the [Azure Portal](https://portal.azure.com/), go to the deployed resource group, find the App Service and get the app URL from `Default domain`. 6. You can now delete the resources by running `azd down`, if you are done trying out the application. diff --git a/azure.yaml b/azure.yaml index 705bc041a..64d978545 100644 --- a/azure.yaml +++ b/azure.yaml @@ -31,6 +31,8 @@ hooks: run: | Write-Host "Web app URL: " Write-Host "$env:WEB_APP_URL" -ForegroundColor Cyan + Write-Host "`nIf you want to use the Sample Data, run the following command in the Bash terminal to process it:" + Write-Host "bash ./infra/scripts/process_sample_data.sh $env:STORAGE_ACCOUNT_NAME $env:STORAGE_CONTAINER_NAME $env:KEY_VAULT_NAME $env:COSMOSDB_ACCOUNT_NAME $env:RESOURCE_GROUP_NAME" -ForegroundColor Cyan shell: pwsh continueOnError: false interactive: true @@ -38,6 +40,9 @@ hooks: run: | echo "Web app URL: " echo $WEB_APP_URL + echo "" + echo "If you want to use the Sample Data, run the following command in the terminal to process it:" + echo "bash ./infra/scripts/process_sample_data.sh $STORAGE_ACCOUNT_NAME $STORAGE_CONTAINER_NAME $KEY_VAULT_NAME $COSMOSDB_ACCOUNT_NAME $RESOURCE_GROUP_NAME" shell: sh continueOnError: false interactive: true \ No newline at end of file diff --git a/docs/PowershellSetup.md b/docs/PowershellSetup.md new file mode 100644 index 000000000..76d3de4c1 --- /dev/null +++ b/docs/PowershellSetup.md @@ -0,0 +1,45 @@ +# Add PowerShell 7 to PATH in Windows + +This guide will help you add **PowerShell 7** (PowerShell Core) to your system’s PATH variable on Windows, so you can easily run it from any Command Prompt or Run dialog. + +## Prerequisites + +- You should have **PowerShell 7** installed on your machine. If you haven’t installed it yet, you can download it following the guide here: [Installing PowerShell on Windows | Microsoft Learn](https://learn.microsoft.com/en-us/powershell/scripting/install/installing-powershell-on-windows?view=powershell-7.5). +- **Administrative privileges are not required** unless you're modifying system-wide environment variables. You can modify your **user-specific PATH** without admin rights. + +## Steps to Add PowerShell 7 to PATH + +### 1. Open **System Properties** + - Press `Win + X` and choose **System**. + - Click on **Advanced system settings** on the left sidebar. This will open the **System Properties** window. + - In the **System Properties** window, click on the **Environment Variables** button at the bottom. + +### 2. Edit User Environment Variables + - In the **Environment Variables** window, under **User variables**, find the `Path` variable. + - Select the `Path` variable and click **Edit**. (If the `Path` variable doesn’t exist, click **New** and name it `Path`.) + +### 3. Check if PowerShell 7 Path is Already in PATH + - Before adding the path, make sure the following path is not already present in the list: + ``` + C:\Program Files\PowerShell\7\ + ``` + - If the path is already there, you don't need to add it again. +### 4. Add PowerShell 7 Path + - If the path is not already in the list, click **New** in the **Edit Environment Variable** window. + - Add the following path to the list: + ``` + C:\Program Files\PowerShell\7\ + ``` + > **Note:** If you installed PowerShell 7 in a custom location, replace the above path with the correct one. +### 5. Save Changes + - After adding the path, click **OK** to close the **Edit Environment Variable** window. + - Click **OK** again to close the **Environment Variables** window. + - Finally, click **OK** to exit the **System Properties** window. +### 6. Verify PowerShell 7 in PATH + - Open **Command Prompt** or **Run** (press `Win + R`). + - Type `pwsh` and press Enter. + - If PowerShell 7 opens, you've successfully added it to your PATH! +--- +## Troubleshooting +- **PowerShell 7 not opening:** Ensure the path to PowerShell 7 is entered correctly. If you're using a custom installation folder, check that the correct path is added to the `Path` variable. +- **Changes not taking effect:** Try restarting your computer or logging out and logging back in for the changes to apply. \ No newline at end of file diff --git a/docs/quota_check.md b/docs/quota_check.md index 3409d9a2b..6647943c2 100644 --- a/docs/quota_check.md +++ b/docs/quota_check.md @@ -1,6 +1,12 @@ ## Check Quota Availability Before Deployment -Before deploying the accelerator, **ensure sufficient quota availability** for the required model. \ +Before deploying the accelerator, **ensure sufficient quota availability** for the required model. + +### Login if you have not done so already +``` +azd auth login +``` + ### 📌 Default Models & Capacities: ``` diff --git a/infra/deploy_ai_foundry.bicep b/infra/deploy_ai_foundry.bicep index a5ec9e55e..81ca9ca03 100644 --- a/infra/deploy_ai_foundry.bicep +++ b/infra/deploy_ai_foundry.bicep @@ -92,7 +92,7 @@ resource applicationInsights 'Microsoft.Insights/components@2020-02-02' = { properties: { Application_Type: 'web' publicNetworkAccessForIngestion: 'Enabled' - publicNetworkAccessForQuery: 'Disabled' + publicNetworkAccessForQuery: 'Enabled' WorkspaceResourceId: logAnalytics.id } } diff --git a/infra/main.bicep b/infra/main.bicep index aa34e4a58..2870d37c9 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -424,6 +424,14 @@ module cosmosDBModule 'deploy_cosmos_db.bicep' = { } scope: resourceGroup(resourceGroup().name) } +// output copykbfiles string = './infra/scripts/copy_kb_files.sh ${storageAccount.outputs.storageName} ${storageAccount.outputs.storageContainer} ${managedIdentityModule.outputs.managedIdentityOutput.clientId}' +// output createindex string = './infra/scripts/run_create_index_scripts.sh ${kvault.outputs.keyvaultName} ${managedIdentityModule.outputs.managedIdentityOutput.clientId}' + +output STORAGE_ACCOUNT_NAME string = storageAccount.outputs.storageName +output STORAGE_CONTAINER_NAME string = storageAccount.outputs.storageContainer +output KEY_VAULT_NAME string = kvault.outputs.keyvaultName +output COSMOSDB_ACCOUNT_NAME string = cosmosDBModule.outputs.cosmosAccountName +output RESOURCE_GROUP_NAME string = resourceGroup().name // //========== Deployment script to upload sample data ========== // @@ -452,21 +460,21 @@ module cosmosDBModule 'deploy_cosmos_db.bicep' = { // dependsOn:[keyVault,uploadFiles] // } -//========== Deployment script to upload sample data ========== // -module uploadFiles 'deploy_post_deployment_scripts.bicep' = { - name : 'deploy_post_deployment_scripts' - params:{ - solutionName: solutionPrefix - solutionLocation: secondaryLocation - baseUrl: baseUrl - storageAccountName: storageAccount.outputs.storageName - containerName: storageAccount.outputs.storageContainer - managedIdentityObjectId:managedIdentityModule.outputs.managedIdentityOutput.id - managedIdentityClientId:managedIdentityModule.outputs.managedIdentityOutput.clientId - keyVaultName:aifoundry.outputs.keyvaultName - logAnalyticsWorkspaceResourceName: aifoundry.outputs.logAnalyticsWorkspaceResourceName - } -} +// //========== Deployment script to upload sample data ========== // +// module uploadFiles 'deploy_post_deployment_scripts.bicep' = { +// name : 'deploy_post_deployment_scripts' +// params:{ +// solutionName: solutionPrefix +// solutionLocation: secondaryLocation +// baseUrl: baseUrl +// storageAccountName: storageAccount.outputs.storageName +// containerName: storageAccount.outputs.storageContainer +// managedIdentityObjectId:managedIdentityModule.outputs.managedIdentityOutput.id +// managedIdentityClientId:managedIdentityModule.outputs.managedIdentityOutput.clientId +// keyVaultName:aifoundry.outputs.keyvaultName +// logAnalyticsWorkspaceResourceName: aifoundry.outputs.logAnalyticsWorkspaceResourceName +// } +// } // resource CosmosDB 'Microsoft.DocumentDB/databaseAccounts@2023-04-15' = { diff --git a/infra/main.json b/infra/main.json index 5e8eac8c6..5e38c91c6 100644 --- a/infra/main.json +++ b/infra/main.json @@ -5,7 +5,7 @@ "_generator": { "name": "bicep", "version": "0.34.44.8038", - "templateHash": "745056846550767942" + "templateHash": "1426128943473309418" } }, "parameters": { @@ -384,7 +384,7 @@ "_generator": { "name": "bicep", "version": "0.34.44.8038", - "templateHash": "15569997416548251984" + "templateHash": "5944622050604729741" } }, "parameters": { @@ -529,7 +529,7 @@ "properties": { "Application_Type": "web", "publicNetworkAccessForIngestion": "Enabled", - "publicNetworkAccessForQuery": "Disabled", + "publicNetworkAccessForQuery": "Enabled", "WorkspaceResourceId": "[resourceId('Microsoft.OperationalInsights/workspaces', variables('workspaceName'))]" }, "dependsOn": [ @@ -1874,198 +1874,32 @@ "dependsOn": [ "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_keyvault')]" ] - }, - { - "type": "Microsoft.Resources/deployments", - "apiVersion": "2022-09-01", - "name": "deploy_post_deployment_scripts", - "properties": { - "expressionEvaluationOptions": { - "scope": "inner" - }, - "mode": "Incremental", - "parameters": { - "solutionName": { - "value": "[variables('solutionPrefix')]" - }, - "solutionLocation": { - "value": "[parameters('secondaryLocation')]" - }, - "baseUrl": { - "value": "[variables('baseUrl')]" - }, - "storageAccountName": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_storage_account'), '2022-09-01').outputs.storageName.value]" - }, - "containerName": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_storage_account'), '2022-09-01').outputs.storageContainer.value]" - }, - "managedIdentityObjectId": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_managed_identity'), '2022-09-01').outputs.managedIdentityOutput.value.id]" - }, - "managedIdentityClientId": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_managed_identity'), '2022-09-01').outputs.managedIdentityOutput.value.clientId]" - }, - "keyVaultName": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_ai_foundry'), '2022-09-01').outputs.keyvaultName.value]" - }, - "logAnalyticsWorkspaceResourceName": { - "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_ai_foundry'), '2022-09-01').outputs.logAnalyticsWorkspaceResourceName.value]" - } - }, - "template": { - "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", - "contentVersion": "1.0.0.0", - "metadata": { - "_generator": { - "name": "bicep", - "version": "0.34.44.8038", - "templateHash": "17578272684671627358" - } - }, - "parameters": { - "solutionName": { - "type": "string", - "metadata": { - "description": "Solution Name" - } - }, - "solutionLocation": { - "type": "string", - "metadata": { - "description": "Specifies the location for resources." - } - }, - "baseUrl": { - "type": "string" - }, - "managedIdentityObjectId": { - "type": "string" - }, - "managedIdentityClientId": { - "type": "string" - }, - "storageAccountName": { - "type": "string" - }, - "containerName": { - "type": "string" - }, - "containerAppName": { - "type": "string", - "defaultValue": "[format('ca-{0}', parameters('solutionName'))]" - }, - "environmentName": { - "type": "string", - "defaultValue": "[format('cae-{0}', parameters('solutionName'))]" - }, - "imageName": { - "type": "string", - "defaultValue": "python:3.11-alpine" - }, - "setupCopyKbFiles": { - "type": "string", - "defaultValue": "[format('{0}infra/scripts/copy_kb_files.sh', parameters('baseUrl'))]" - }, - "setupCreateIndexScriptsUrl": { - "type": "string", - "defaultValue": "[format('{0}infra/scripts/run_create_index_scripts.sh', parameters('baseUrl'))]" - }, - "keyVaultName": { - "type": "string" - }, - "logAnalyticsWorkspaceResourceName": { - "type": "string" - } - }, - "resources": [ - { - "type": "Microsoft.App/managedEnvironments", - "apiVersion": "2022-03-01", - "name": "[parameters('environmentName')]", - "location": "[parameters('solutionLocation')]", - "properties": { - "zoneRedundant": false, - "appLogsConfiguration": { - "destination": "log-analytics", - "logAnalyticsConfiguration": { - "customerId": "[reference(resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceResourceName')), '2020-10-01').customerId]", - "sharedKey": "[listKeys(resourceId('Microsoft.OperationalInsights/workspaces', parameters('logAnalyticsWorkspaceResourceName')), '2020-10-01').primarySharedKey]" - } - } - } - }, - { - "type": "Microsoft.App/containerApps", - "apiVersion": "2022-03-01", - "name": "[parameters('containerAppName')]", - "location": "[parameters('solutionLocation')]", - "identity": { - "type": "UserAssigned", - "userAssignedIdentities": { - "[format('{0}', parameters('managedIdentityObjectId'))]": {} - } - }, - "properties": { - "managedEnvironmentId": "[resourceId('Microsoft.App/managedEnvironments', parameters('environmentName'))]", - "configuration": { - "ingress": null, - "activeRevisionsMode": "Single" - }, - "template": { - "scale": { - "minReplicas": 1, - "maxReplicas": 1 - }, - "containers": [ - { - "name": "[parameters('containerAppName')]", - "image": "[parameters('imageName')]", - "resources": { - "cpu": 2, - "memory": "4.0Gi" - }, - "command": [ - "/bin/sh", - "-c", - "[format('mkdir -p /scripts && apk add --no-cache curl bash jq py3-pip gcc musl-dev libffi-dev openssl-dev python3-dev && pip install --upgrade azure-cli && apk add --no-cache --virtual .build-deps build-base unixodbc-dev && curl -s -o msodbcsql18_18.4.1.1-1_amd64.apk https://download.microsoft.com/download/7/6/d/76de322a-d860-4894-9945-f0cc5d6a45f8/msodbcsql18_18.4.1.1-1_amd64.apk && curl -s -o mssql-tools18_18.4.1.1-1_amd64.apk https://download.microsoft.com/download/7/6/d/76de322a-d860-4894-9945-f0cc5d6a45f8/mssql-tools18_18.4.1.1-1_amd64.apk && apk add --allow-untrusted msodbcsql18_18.4.1.1-1_amd64.apk && apk add --allow-untrusted mssql-tools18_18.4.1.1-1_amd64.apk && curl -s -o /scripts/copy_kb_files.sh {0} && chmod +x /scripts/copy_kb_files.sh && sh -x /scripts/copy_kb_files.sh {1} {2} {3} {4} && curl -s -o /scripts/run_create_index_scripts.sh {5} && chmod +x /scripts/run_create_index_scripts.sh && sh -x /scripts/run_create_index_scripts.sh {6} {7} {8} && apk add --no-cache ca-certificates less ncurses-terminfo-base krb5-libs libgcc libintl libssl3 libstdc++ tzdata userspace-rcu zlib icu-libs curl && apk -X https://dl-cdn.alpinelinux.org/alpine/edge/main add --no-cache lttng-ust openssh-client && echo \"Container app setup completed successfully.\"', parameters('setupCopyKbFiles'), parameters('storageAccountName'), parameters('containerName'), parameters('baseUrl'), parameters('managedIdentityClientId'), parameters('setupCreateIndexScriptsUrl'), parameters('baseUrl'), parameters('keyVaultName'), parameters('managedIdentityClientId'))]" - ], - "env": [ - { - "name": "STORAGE_ACCOUNT_NAME", - "value": "[parameters('storageAccountName')]" - }, - { - "name": "CONTAINER_NAME", - "value": "[parameters('containerName')]" - }, - { - "name": "APPSETTING_WEBSITE_SITE_NAME", - "value": "DUMMY" - } - ] - } - ] - } - }, - "dependsOn": [ - "[resourceId('Microsoft.App/managedEnvironments', parameters('environmentName'))]" - ] - } - ] - } - }, - "dependsOn": [ - "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_ai_foundry')]", - "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_managed_identity')]", - "[extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_storage_account')]" - ] } ], "outputs": { "WEB_APP_URL": { "type": "string", "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_app_service'), '2022-09-01').outputs.webAppUrl.value]" + }, + "STORAGE_ACCOUNT_NAME": { + "type": "string", + "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_storage_account'), '2022-09-01').outputs.storageName.value]" + }, + "STORAGE_CONTAINER_NAME": { + "type": "string", + "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_storage_account'), '2022-09-01').outputs.storageContainer.value]" + }, + "KEY_VAULT_NAME": { + "type": "string", + "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_keyvault'), '2022-09-01').outputs.keyvaultName.value]" + }, + "COSMOSDB_ACCOUNT_NAME": { + "type": "string", + "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, resourceGroup().name), 'Microsoft.Resources/deployments', 'deploy_cosmos_db'), '2022-09-01').outputs.cosmosAccountName.value]" + }, + "RESOURCE_GROUP_NAME": { + "type": "string", + "value": "[resourceGroup().name]" } } } \ No newline at end of file diff --git a/infra/scripts/add_cosmosdb_access.sh b/infra/scripts/add_cosmosdb_access.sh new file mode 100644 index 000000000..24cef5c2e --- /dev/null +++ b/infra/scripts/add_cosmosdb_access.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Variables +resource_group="$1" +account_name="$2" + +# Authenticate with Azure +if az account show &> /dev/null; then + echo "Already authenticated with Azure." +else + if [ -n "$managedIdentityClientId" ]; then + # Use managed identity if running in Azure + echo "Authenticating with Managed Identity..." + az login --identity --client-id ${managedIdentityClientId} + else + # Use Azure CLI login if running locally + echo "Authenticating with Azure CLI..." + az login + fi + echo "Not authenticated with Azure. Attempting to authenticate..." +fi + +echo "Getting signed in user id" +signed_user_id=$(az ad signed-in-user show --query id -o tsv) + +# Check if the user has the Cosmos DB Built-in Data Contributor role +echo "Checking if user has the Cosmos DB Built-in Data Contributor role" +roleExists=$(az cosmosdb sql role assignment list \ + --resource-group $resource_group \ + --account-name $account_name \ + --query "[?roleDefinitionId.ends_with(@, '00000000-0000-0000-0000-000000000002') && principalId == '$signed_user_id']" -o tsv) + +# Check if the role exists +if [ -n "$roleExists" ]; then + echo "User already has the Cosmos DB Built-in Data Contributer role." +else + echo "User does not have the Cosmos DB Built-in Data Contributer role. Assigning the role." + MSYS_NO_PATHCONV=1 az cosmosdb sql role assignment create \ + --resource-group $resource_group \ + --account-name $account_name \ + --role-definition-id 00000000-0000-0000-0000-000000000002 \ + --principal-id $signed_user_id \ + --scope "/" \ + --output none + if [ $? -eq 0 ]; then + echo "Cosmos DB Built-in Data Contributer role assigned successfully." + else + echo "Failed to assign Cosmos DB Built-in Data Contributer role." + fi +fi \ No newline at end of file diff --git a/infra/scripts/copy_kb_files.sh b/infra/scripts/copy_kb_files.sh index ff3897ee9..43ad34cf2 100644 --- a/infra/scripts/copy_kb_files.sh +++ b/infra/scripts/copy_kb_files.sh @@ -3,8 +3,8 @@ # Variables storageAccount="$1" fileSystem="$2" -baseUrl="$3" -managedIdentityClientId="$4" +# baseUrl="$3" +managedIdentityClientId="$3" zipFileName1="pdfdata.zip" extractedFolder1="pdf" @@ -15,21 +15,58 @@ zipUrl1=${baseUrl}"infra/data/pdfdata.zip" # zipUrl2=${baseUrl}"infra/data/audio_data.zip" # Create folders if they do not exist -mkdir -p "/mnt/azscripts/azscriptinput/$extractedFolder1" +# mkdir -p "/mnt/azscripts/azscriptinput/$extractedFolder1" # mkdir -p "/mnt/azscripts/azscriptinput/$extractedFolder2" # Download the zip file -curl --output /mnt/azscripts/azscriptinput/"$zipFileName1" "$zipUrl1" +# curl --output /mnt/azscripts/azscriptinput/"$zipFileName1" "$zipUrl1" # curl --output /mnt/azscripts/azscriptinput/"$zipFileName2" "$zipUrl2" # Extract the zip file -unzip /mnt/azscripts/azscriptinput/"$zipFileName1" -d /mnt/azscripts/azscriptinput/"$extractedFolder1" +unzip infra/data/"$zipFileName1" -d infra/data/"$extractedFolder1" # unzip /mnt/azscripts/azscriptinput/"$zipFileName2" -d /mnt/azscripts/azscriptinput/"$extractedFolder2" echo "Script Started" -# Authenticate with Azure using managed identity -az login --identity --client-id ${managedIdentityClientId} +# Authenticate with Azure +if az account show &> /dev/null; then + echo "Already authenticated with Azure." +else + if [ -n "$managedIdentityClientId" ]; then + # Use managed identity if running in Azure + echo "Authenticating with Managed Identity..." + az login --identity --client-id ${managedIdentityClientId} + else + # Use Azure CLI login if running locally + echo "Authenticating with Azure CLI..." + az login + fi + echo "Not authenticated with Azure. Attempting to authenticate..." +fi + +echo "Getting signed in user id" +signed_user_id=$(az ad signed-in-user show --query id -o tsv) + +echo "Getting storage account resource id" +storage_account_resource_id=$(az storage account show --name $storageAccount --query id --output tsv) + +#check if user has the Storage Blob Data Contributor role, add it if not +echo "Checking if user has the Storage Blob Data Contributor role" +role_assignment=$(MSYS_NO_PATHCONV=1 az role assignment list --assignee $signed_user_id --role "Storage Blob Data Contributor" --scope $storage_account_resource_id --query "[].roleDefinitionId" -o tsv) +if [ -z "$role_assignment" ]; then + echo "User does not have the Storage Blob Data Contributor role. Assigning the role." + MSYS_NO_PATHCONV=1 az role assignment create --assignee $signed_user_id --role "Storage Blob Data Contributor" --scope $storage_account_resource_id --output none + if [ $? -eq 0 ]; then + echo "Role assignment completed successfully." + else + echo "Error: Role assignment failed." + exit 1 + fi +else + echo "User already has the Storage Blob Data Contributor role." +fi + # Using az storage blob upload-batch to upload files with managed identity authentication, as the az storage fs directory upload command is not working with managed identity authentication. -az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder1" --source /mnt/azscripts/azscriptinput/"$extractedFolder1" --auth-mode login --pattern '*' --overwrite +echo "Uploading files to Azure Storage" +az storage blob upload-batch --account-name "$storageAccount" --destination "$fileSystem"/"$extractedFolder1" --source infra/data/"$extractedFolder1" --auth-mode login --pattern '*' --overwrite --output none # az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder2" --source /mnt/azscripts/azscriptinput/"$extractedFolder2" --auth-mode login --pattern '*' --overwrite \ No newline at end of file diff --git a/infra/scripts/process_sample_data.sh b/infra/scripts/process_sample_data.sh new file mode 100644 index 000000000..30529053e --- /dev/null +++ b/infra/scripts/process_sample_data.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# Variables +storageAccount="$1" +fileSystem="$2" +keyvaultName="$3" +cosmosDbAccountName="$4" +resourceGroupName="$5" +managedIdentityClientId="$6" + +# Check if all required arguments are provided +if [ -z "$storageAccount" ] || [ -z "$fileSystem" ] || [ -z "$keyvaultName" ] || [ -z "$cosmosDbAccountName" ] || [ -z "$resourceGroupName" ]; then + echo "Usage: $0 [managedIdentityClientId]" + exit 1 +fi + +# Call add_cosmosdb_access.sh +echo "Running add_cosmosdb_access.sh" +bash infra/scripts/add_cosmosdb_access.sh "$resourceGroupName" "$cosmosDbAccountName" "$managedIdentityClientId" +if [ $? -ne 0 ]; then + echo "Error: add_cosmosdb_access.sh failed." + exit 1 +fi +echo "add_cosmosdb_access.sh completed successfully." + +# Call copy_kb_files.sh +echo "Running copy_kb_files.sh" +bash infra/scripts/copy_kb_files.sh "$storageAccount" "$fileSystem" "$managedIdentityClientId" +if [ $? -ne 0 ]; then + echo "Error: copy_kb_files.sh failed." + exit 1 +fi +echo "copy_kb_files.sh completed successfully." + +# Call run_create_index_scripts.sh +echo "Running run_create_index_scripts.sh" +bash infra/scripts/run_create_index_scripts.sh "$keyvaultName" "$managedIdentityClientId" +if [ $? -ne 0 ]; then + echo "Error: run_create_index_scripts.sh failed." + exit 1 +fi +echo "run_create_index_scripts.sh completed successfully." + +echo "All scripts executed successfully." \ No newline at end of file diff --git a/infra/scripts/run_create_index_scripts.sh b/infra/scripts/run_create_index_scripts.sh index 9c598bcf2..2987212ce 100644 --- a/infra/scripts/run_create_index_scripts.sh +++ b/infra/scripts/run_create_index_scripts.sh @@ -1,20 +1,56 @@ #!/bin/bash -echo "started the script" # Variables -baseUrl="$1" -keyvaultName="$2" -managedIdentityClientId="$3" -requirementFile="requirements.txt" -requirementFileUrl=${baseUrl}"infra/scripts/index_scripts/requirements.txt" +# baseUrl="$1" +keyvaultName="$1" +managedIdentityClientId="$2" +# requirementFile="infra/scripts/index_scripts/requirements.txt" +# requirementFileUrl=${baseUrl}"infra/scripts/index_scripts/requirements.txt" echo "Script Started" -# Download the create_index and create table python files -curl --output "01_create_search_index.py" ${baseUrl}"infra/scripts/index_scripts/01_create_search_index.py" -curl --output "02_process_data.py" ${baseUrl}"infra/scripts/index_scripts/02_process_data.py" +# Authenticate with Azure +if az account show &> /dev/null; then + echo "Already authenticated with Azure." +else + if [ -n "$managedIdentityClientId" ]; then + # Use managed identity if running in Azure + echo "Authenticating with Managed Identity..." + az login --identity --client-id ${managedIdentityClientId} + else + # Use Azure CLI login if running locally + echo "Authenticating with Azure CLI..." + az login + fi + echo "Not authenticated with Azure. Attempting to authenticate..." +fi +echo "Getting signed in user id" +signed_user_id=$(az ad signed-in-user show --query id -o tsv) +# # Download the create_index and create table python files +# curl --output "01_create_search_index.py" ${baseUrl}"infra/scripts/index_scripts/01_create_search_index.py" +# curl --output "02_process_data.py" ${baseUrl}"infra/scripts/index_scripts/02_process_data.py" + +# Define the scope for the Key Vault (replace with your Key Vault resource ID) +echo "Getting key vault resource id" +key_vault_resource_id=$(az keyvault show --name $keyvaultName --query id --output tsv) + +# Check if the user has the Key Vault Administrator role +echo "Checking if user has the Key Vault Administrator role" +role_assignment=$(MSYS_NO_PATHCONV=1 az role assignment list --assignee $signed_user_id --role "Key Vault Administrator" --scope $key_vault_resource_id --query "[].roleDefinitionId" -o tsv) +if [ -z "$role_assignment" ]; then + echo "User does not have the Key Vault Administrator role. Assigning the role." + MSYS_NO_PATHCONV=1 az role assignment create --assignee $signed_user_id --role "Key Vault Administrator" --scope $key_vault_resource_id --output none + if [ $? -eq 0 ]; then + echo "Key Vault Administrator role assigned successfully." + else + echo "Failed to assign Key Vault Administrator role." + exit 1 + fi +else + echo "User already has the Key Vault Administrator role." +fi # RUN apt-get update # RUN apt-get install python3 python3-dev g++ unixodbc-dev unixodbc libpq-dev @@ -24,17 +60,52 @@ curl --output "02_process_data.py" ${baseUrl}"infra/scripts/index_scripts/02_pro # pip install pyodbc # Download the requirement file -curl --output "$requirementFile" "$requirementFileUrl" +# curl --output "$requirementFile" "$requirementFileUrl" -echo "Download completed" +# echo "Download completed" #Replace key vault name -sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "01_create_search_index.py" -sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "01_create_search_index.py" -sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "02_process_data.py" -sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "02_process_data.py" +sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "infra/scripts/index_scripts/01_create_search_index.py" +sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "infra/scripts/index_scripts/02_process_data.py" +if [ -n "$managedIdentityClientId" ]; then + sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "infra/scripts/index_scripts/01_create_search_index.py" + sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "infra/scripts/index_scripts/02_process_data.py" +fi + + +# create virtual environment +# Check if the virtual environment already exists +if [ -d "infra/scripts/scriptenv" ]; then + echo "Virtual environment already exists. Skipping creation." +else + echo "Creating virtual environment" + python3 -m venv infra/scripts/scriptenv +fi + +# handling virtual environment activation for different OS +activate_env_output=$(source infra/scripts/scriptenv/bin/activate 2>&1) +if [ -n "$activate_env_output" ]; then + source infra/scripts/scriptenv/Scripts/activate +fi + +# Install the requirements +echo "Installing requirements" +pip install --quiet -r infra/scripts/index_scripts/requirements.txt +echo "Requirements installed" -pip install -r requirements.txt +# Run the scripts +echo "Running the python scripts" +echo "Creating the search index" +python infra/scripts/index_scripts/01_create_search_index.py +if [ $? -ne 0 ]; then + echo "Error: 01_create_search_index.py failed." + exit 1 +fi -python 01_create_search_index.py -python 02_process_data.py +echo "Processing the data" +python infra/scripts/index_scripts/02_process_data.py +if [ $? -ne 0 ]; then + echo "Error: 02_process_data.py failed." + exit 1 +fi +echo "Scripts completed"