Skip to content

Commit 2f9bf70

Browse files
refactor: update scripts and add process_sample_data.sh
1 parent c57d877 commit 2f9bf70

6 files changed

Lines changed: 98 additions & 17 deletions

File tree

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,6 @@ __pycache__/
99
venv
1010
myenv
1111

12-
scriptsenv/
12+
scriptsenv/
13+
14+
scriptenv

azure.yaml

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,21 +31,25 @@ hooks:
3131
run: |
3232
Write-Host "Web app URL: "
3333
Write-Host "$env:WEB_APP_URL" -ForegroundColor Cyan
34-
Write-Host "Run the following command in the terminal to copy data files to storage account: "
35-
Write-Host "$env:copykbfiles" -ForegroundColor Cyan
36-
Write-Host "Run the following command in the terminal to run the scripts: "
37-
Write-Host "$env:createindex" -ForegroundColor Cyan
34+
# Write-Host "Run the following command in the terminal to copy data files to storage account: "
35+
# Write-Host "$env:copykbfiles" -ForegroundColor Cyan
36+
# Write-Host "Run the following command in the terminal to run the scripts: "
37+
# Write-Host "$env:createindex" -ForegroundColor Cyan
38+
Write-Host "If you want to use the Sample Data, run the following command in the Bash terminal to process it:"
39+
Write-Host "./infra/scripts/process_sample_data.sh $env:STORAGE_ACCOUNT_NAME $env:STORAGE_CONTAINER_NAME $env:KEY_VAULT_NAME" -ForegroundColor Cyan
3840
shell: pwsh
3941
continueOnError: false
4042
interactive: true
4143
posix:
4244
run: |
4345
echo "Web app URL: "
4446
echo $WEB_APP_URL
45-
echo "Run the following command in the terminal to copy data files to storage account: "
46-
echo $copykbfiles
47-
echo "Run the following command in the terminal to run the scripts: "
48-
echo $createindex
47+
# echo "Run the following command in the terminal to copy data files to storage account: "
48+
# echo $copykbfiles
49+
# echo "Run the following command in the terminal to run the scripts: "
50+
# echo $createindex
51+
Write-Host "If you want to use the Sample Data, run the following command in the Bash terminal to process it:"
52+
Write-Host "./infra/scripts/process_sample_data.sh $env:STORAGE_ACCOUNT_NAME $env:STORAGE_CONTAINER_NAME kv-$env:KEY_VAULT_NAME" -ForegroundColor Cyan
4953
shell: sh
5054
continueOnError: false
5155
interactive: true

infra/main.bicep

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,8 +424,12 @@ module cosmosDBModule 'deploy_cosmos_db.bicep' = {
424424
}
425425
scope: resourceGroup(resourceGroup().name)
426426
}
427-
output copykbfiles string = './infra/scripts/copy_kb_files.sh ${storageAccount.outputs.storageName} ${storageAccount.outputs.storageContainer} ${managedIdentityModule.outputs.managedIdentityOutput.clientId}'
428-
output createindex string = './infra/scripts/run_create_index_scripts.sh ${kvault.outputs.keyvaultName} ${managedIdentityModule.outputs.managedIdentityOutput.clientId}'
427+
// output copykbfiles string = './infra/scripts/copy_kb_files.sh ${storageAccount.outputs.storageName} ${storageAccount.outputs.storageContainer} ${managedIdentityModule.outputs.managedIdentityOutput.clientId}'
428+
// output createindex string = './infra/scripts/run_create_index_scripts.sh ${kvault.outputs.keyvaultName} ${managedIdentityModule.outputs.managedIdentityOutput.clientId}'
429+
430+
output STORAGE_ACCOUNT_NAME string = storageAccount.outputs.storageName
431+
output STORAGE_CONTAINER_NAME string = storageAccount.outputs.storageContainer
432+
output KEY_VAULT_NAME string = kvault.outputs.keyvaultName
429433

430434

431435
// //========== Deployment script to upload sample data ========== //

infra/scripts/copy_kb_files.sh

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,24 @@ unzip infra/data/"$zipFileName1" -d infra/data/"$extractedFolder1"
2828

2929
echo "Script Started"
3030

31-
# Authenticate with Azure using managed identity
32-
az login --identity --client-id ${managedIdentityClientId}
31+
# Authenticate with Azure
32+
if az account show &> /dev/null; then
33+
echo "Already authenticated with Azure."
34+
else
35+
if [ -n "$managedIdentityClientId" ]; then
36+
# Use managed identity if running in Azure
37+
echo "Authenticating with Managed Identity..."
38+
az login --identity --client-id ${managedIdentityClientId}
39+
else
40+
# Use Azure CLI login if running locally
41+
echo "Authenticating with Azure CLI..."
42+
az login
43+
fi
44+
echo "Not authenticated with Azure. Attempting to authenticate..."
45+
fi
46+
47+
3348
# Using az storage blob upload-batch to upload files with managed identity authentication, as the az storage fs directory upload command is not working with managed identity authentication.
34-
az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder1" --source infra/data/"$extractedFolder1" --auth-mode login --pattern '*' --overwrite
49+
echo "Uploading files to Azure Storage..."
50+
az storage blob upload-batch --account-name "$storageAccount" --destination "$fileSystem"/"$extractedFolder1" --source infra/data/"$extractedFolder1" --auth-mode login --pattern '*' --overwrite
3551
# az storage blob upload-batch --account-name "$storageAccount" --destination data/"$extractedFolder2" --source /mnt/azscripts/azscriptinput/"$extractedFolder2" --auth-mode login --pattern '*' --overwrite
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/bash
2+
3+
# Variables
4+
storageAccount="$1"
5+
fileSystem="$2"
6+
keyvaultName="$3"
7+
managedIdentityClientId="$4"
8+
9+
# Check if all required arguments are provided
10+
if [ -z "$storageAccount" ] || [ -z "$fileSystem" ] || [ -z "$keyvaultName" ]; then
11+
echo "Usage: $0 <storageAccount> <fileSystem> <keyvaultName> [managedIdentityClientId]"
12+
exit 1
13+
fi
14+
15+
# Call copy_kb_files.sh
16+
echo "Running copy_kb_files.sh"
17+
bash infra/scripts/copy_kb_files.sh "$storageAccount" "$fileSystem" "$managedIdentityClientId"
18+
if [ $? -ne 0 ]; then
19+
echo "Error: copy_kb_files.sh failed."
20+
exit 1
21+
fi
22+
echo "copy_kb_files.sh completed successfully."
23+
24+
# Call run_create_index_scripts.sh
25+
echo "Running run_create_index_scripts.sh"
26+
bash infra/scripts/run_create_index_scripts.sh "$keyvaultName" "$managedIdentityClientId"
27+
if [ $? -ne 0 ]; then
28+
echo "Error: run_create_index_scripts.sh failed."
29+
exit 1
30+
fi
31+
echo "run_create_index_scripts.sh completed successfully."
32+
33+
echo "All scripts executed successfully."

infra/scripts/run_create_index_scripts.sh

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ key_vault_resource_id=$(az keyvault show --name $keyvaultName --query id --outpu
2323

2424
# Assign the Key Vault Administrator role to the user
2525
echo "Assigning the Key Vault Administrator role to the user."
26-
az role assignment create --assignee $signed_user_id --role "Key Vault Administrator" --scope $key_vault_resource_id
26+
az role assignment create --assignee $signed_user_id --role "Key Vault Administrator" --scope /$key_vault_resource_id
2727

2828
# RUN apt-get update
2929
# RUN apt-get install python3 python3-dev g++ unixodbc-dev unixodbc libpq-dev
@@ -39,11 +39,33 @@ az role assignment create --assignee $signed_user_id --role "Key Vault Administr
3939

4040
#Replace key vault name
4141
sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "infra/scripts/index_scripts/01_create_search_index.py"
42-
sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "infra/scripts/index_scripts/01_create_search_index.py"
4342
sed -i "s/kv_to-be-replaced/${keyvaultName}/g" "infra/scripts/index_scripts/02_process_data.py"
44-
sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "infra/scripts/index_scripts/02_process_data.py"
43+
if [ -n "$managedIdentityClientId" ]; then
44+
sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "infra/scripts/index_scripts/01_create_search_index.py"
45+
sed -i "s/mici_to-be-replaced/${managedIdentityClientId}/g" "infra/scripts/index_scripts/02_process_data.py"
46+
fi
4547

48+
49+
# create virtual environment
50+
echo "Creating virtual environment"
51+
# Check if the virtual environment already exists
52+
if [ -d "infra/scripts/scriptenv" ]; then
53+
echo "Virtual environment already exists. Skipping creation."
54+
else
55+
echo "Creating virtual environment"
56+
python3 -m venv infra/scripts/scriptenv
57+
fi
58+
source infra/scripts/scriptenv/Scripts/activate
59+
60+
# Install the requirements
61+
echo "Installing requirements"
4662
pip install -r infra/scripts/index_scripts/requirements.txt
63+
echo "Requirements installed"
4764

65+
# Run the scripts
66+
echo "Running the scripts"
67+
echo "Creating the search index"
4868
python infra/scripts/index_scripts/01_create_search_index.py
69+
echo "Processing the data"
4970
python infra/scripts/index_scripts/02_process_data.py
71+
echo "Scripts completed"

0 commit comments

Comments
 (0)