Skip to content

Commit 31f60ec

Browse files
refactor: Replace OpenAI API cal with Foundry SDK
2 parents 2b1e24a + fecf3ce commit 31f60ec

File tree

8 files changed

+39
-25
lines changed

8 files changed

+39
-25
lines changed

infra/main.bicep

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,3 +520,4 @@ output AZURE_SQL_SERVER_USERNAME string = sqlServerEnabled ? servicesUsername :
520520
output AZURE_COSMOS_ACCOUNT_NAME string = cosmosDbEnabled ? cosmosDb.outputs.cosmosDBname : ''
521521
output SAMPLE_APP_URL string = deploySampleApp ? appService.outputs.uri : ''
522522
output AZURE_APP_SAMPLE_ENABLED bool = deploySampleApp
523+
output AZURE_AI_AGENT_ENDPOINT string = cognitiveServices.outputs.aiServicesEndpoint

infra/main.parameters.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@
6262
"authClientSecret": {
6363
"value": "${AZURE_AUTH_CLIENT_SECRET}"
6464
},
65+
"aiServicesEndpoint":{
66+
"value": "${AZURE_AI_AGENT_ENDPOINT}"
67+
},
6568
"aiEmbeddingModelDeployment": {
6669
"value": {
6770
"modelName": "text-embedding-3-small",

scripts/index_scripts/02_process_data.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
2-
from openai import AzureOpenAI
1+
from azure.identity import DefaultAzureCredential
2+
from azure.ai.inference import EmbeddingsClient
3+
from urllib.parse import urlparse
34
import re
45
import time
56
from pypdf import PdfReader
@@ -10,28 +11,37 @@
1011
import requests
1112

1213
search_endpoint = os.getenv("SEARCH_ENDPOINT")
13-
openai_endpoint = os.getenv("OPEN_AI_ENDPOINT_URL")
14+
ai_project_endpoint = os.getenv("AZURE_AI_AGENT_ENDPOINT") # AI Foundry Project endpoint
1415
embedding_model_name = os.getenv("EMBEDDING_MODEL_NAME")
1516
embedding_model_api_version = os.getenv("EMBEDDING_MODEL_API_VERSION")
1617
use_local_files = (os.getenv("USE_LOCAL_FILES") == "true")
1718
index_name = "ai_app_index"
1819

1920
print(f"Creating search index at {search_endpoint} with index name {index_name}")
20-
print(f"Using OpenAI endpoint: {openai_endpoint}")
21+
print(f"Using AI Foundry Project endpoint: {ai_project_endpoint}")
2122
print(f"Using embedding model: {embedding_model_name} with API version: {embedding_model_api_version}")
2223

23-
# Function: Get Embeddings
24-
def get_embeddings(text: str, openai_endpoint: str, embedding_model_api_version: str):
24+
# Function: Get Embeddings using Azure AI Inference SDK with Foundry endpoint
25+
def get_embeddings(text: str, ai_project_endpoint: str, embedding_model_api_version: str):
2526
credential = DefaultAzureCredential()
26-
token_provider = get_bearer_token_provider(credential,
27-
"https://cognitiveservices.azure.com/.default")
28-
client = AzureOpenAI(
29-
api_version=embedding_model_api_version,
30-
azure_endpoint=openai_endpoint,
31-
azure_ad_token_provider=token_provider
27+
28+
# Construct inference endpoint with /models path for Azure AI Foundry
29+
inference_endpoint = f"https://{urlparse(ai_project_endpoint).netloc}/models"
30+
31+
# Create embeddings client using Azure AI Inference SDK
32+
embeddings_client = EmbeddingsClient(
33+
endpoint=inference_endpoint,
34+
credential=credential,
35+
credential_scopes=["https://cognitiveservices.azure.com/.default"]
36+
)
37+
38+
# Create embeddings using the model name from environment
39+
response = embeddings_client.embed(
40+
model=embedding_model_name,
41+
input=[text]
3242
)
3343

34-
embedding = client.embeddings.create(input=text, model=embedding_model_name).data[0].embedding
44+
embedding = response.data[0].embedding
3545
return embedding
3646

3747
# Function: Clean Spaces with Regex -
@@ -92,12 +102,12 @@ def prepare_search_doc(content, document_id, filename):
92102
chunk_id = document_id + '_' + str(chunk_num).zfill(2)
93103

94104
try:
95-
v_contentVector = get_embeddings(str(chunk), openai_endpoint, "2023-05-15")
105+
v_contentVector = get_embeddings(str(chunk), ai_project_endpoint, embedding_model_api_version)
96106
except Exception as e:
97107
print(f"Error occurred: {e}. Retrying after 30 seconds...")
98108
time.sleep(30)
99109
try:
100-
v_contentVector = get_embeddings(str(chunk), openai_endpoint, "1")
110+
v_contentVector = get_embeddings(str(chunk), ai_project_endpoint, embedding_model_api_version)
101111
except Exception as e:
102112
print(f"Retry failed: {e}. Setting v_contentVector to an empty list.")
103113
v_contentVector = []

scripts/index_scripts/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
openai
1+
azure-ai-inference>=1.0.0b1
22
pypdf
33
# pyodbc
44
tiktoken

scripts/postprovision.ps1

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ if (-not $virtualMachineId) {
2222
Write-Host "2. Navigate to the scripts directory: cd $PSScriptRoot"
2323
Write-Host "3. Run the below commands to process & ingest the sample data:"
2424
Write-Host "Set-ExecutionPolicy RemoteSigned -Scope CurrentUser"
25-
Write-Host ".\process_sample_data.ps1 -SearchEndpoint '$env:AZURE_SEARCH_ENDPOINT' -OpenAiEndpoint '$env:AZURE_OPENAI_ENDPOINT' -EmbeddingModelName '$env:EMBEDDING_MODEL_NAME' -EmbeddingModelApiVersion '2025-01-01-preview' -UseLocalFiles `$true"
25+
Write-Host ".\process_sample_data.ps1 -SearchEndpoint '$env:AZURE_SEARCH_ENDPOINT' -ProjectEndpoint '$env:AZURE_AI_AGENT_ENDPOINT' -EmbeddingModelName '$env:EMBEDDING_MODEL_NAME' -EmbeddingModelApiVersion '2025-01-01-preview' -UseLocalFiles `$true"
2626
} else {
2727
Write-Host "To ingest the sample data, follow these steps:"
2828
Write-Host "1. Login to the Virtual Machine using the username '$userName' and Password provided during deployment."
2929
Write-Host "2. Open the PowerShell terminal."
3030
Write-Host "3. Navigate to the scripts directory: cd C:\DataIngestionScripts"
3131
Write-Host "4. Run the following commands to process & ingest the sample data:"
32-
Write-Host "powershell -ExecutionPolicy Bypass -File process_sample_data.ps1 -SearchEndpoint '$env:AZURE_SEARCH_ENDPOINT' -OpenAiEndpoint '$env:AZURE_OPENAI_ENDPOINT' -EmbeddingModelName '$env:EMBEDDING_MODEL_NAME' -EmbeddingModelApiVersion '2025-01-01-preview'"
32+
Write-Host "powershell -ExecutionPolicy Bypass -File process_sample_data.ps1 -SearchEndpoint '$env:AZURE_SEARCH_ENDPOINT' -ProjectEndpoint '$env:AZURE_AI_AGENT_ENDPOINT' -EmbeddingModelName '$env:EMBEDDING_MODEL_NAME' -EmbeddingModelApiVersion '2025-01-01-preview'"
3333
}

scripts/postprovision.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@ if [ -z "$virtualMachineId" ]; then
1919
echo "1. Open the terminal."
2020
echo "2. Navigate to the scripts directory: cd $SCRIPT_DIR/scripts"
2121
echo "3. Run the following command to process the sample data:"
22-
echo "./process_sample_data.sh '$AZURE_SEARCH_ENDPOINT' '$AZURE_OPENAI_ENDPOINT' '$EMBEDDING_MODEL_NAME' '2025-01-01-preview'"
22+
echo "./process_sample_data.sh '$AZURE_SEARCH_ENDPOINT' '$AZURE_AI_AGENT_ENDPOINT' '$EMBEDDING_MODEL_NAME' '2025-01-01-preview'"
2323
else
2424
echo "To ingest the sample data, follow these steps:"
2525
echo "1. Login to the Virtual Machine using the username '$userName' and Password provided during deployment."
2626
echo "2. Open the PowerShell terminal."
2727
echo "3. Navigate to the scripts directory: cd C:\\DataIngestionScripts"
2828
echo "4. Run the following command to process the sample data:"
29-
echo "powershell -ExecutionPolicy Bypass -File process_sample_data.ps1 -SearchEndpoint '$AZURE_SEARCH_ENDPOINT' -OpenAiEndpoint '$AZURE_OPENAI_ENDPOINT' -EmbeddingModelName '$EMBEDDING_MODEL_NAME' -EmbeddingModelApiVersion '2025-01-01-preview'"
29+
echo "powershell -ExecutionPolicy Bypass -File process_sample_data.ps1 -SearchEndpoint '$AZURE_SEARCH_ENDPOINT' -ProjectEndpoint '$AZURE_AI_AGENT_ENDPOINT' -EmbeddingModelName '$EMBEDDING_MODEL_NAME' -EmbeddingModelApiVersion '2025-01-01-preview'"
3030
fi

scripts/process_sample_data.ps1

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
param (
22
[string]$SearchEndpoint,
3-
[string]$OpenAiEndpoint,
3+
[string]$ProjectEndpoint,
44
[string]$EmbeddingModelName,
55
[string]$EmbeddingModelApiVersion,
66
[bool]$UseLocalFiles = $false
@@ -59,7 +59,7 @@ Write-Host "Using Python command: $pythonExe"
5959

6060
# --- Set Environment Variables ---
6161
$env:SEARCH_ENDPOINT = $SearchEndpoint
62-
$env:OPEN_AI_ENDPOINT_URL = $OpenAiEndpoint
62+
$env:AZURE_AI_AGENT_ENDPOINT = $ProjectEndpoint
6363
$env:EMBEDDING_MODEL_NAME = $EmbeddingModelName
6464
$env:EMBEDDING_MODEL_API_VERSION = $EmbeddingModelApiVersion
6565
$env:USE_LOCAL_FILES = $UseLocalFiles.ToString().ToLower()

scripts/process_sample_data.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ set -o pipefail
55

66
# --- Input Parameters ---
77
SearchEndpoint="$1"
8-
OpenAiEndpoint="$2"
8+
ProjectEndpoint="$2"
99
EmbeddingModelName="$3"
1010
EmbeddingModelApiVersion="$4"
1111

1212
if [ $# -ne 4 ]; then
13-
echo "Usage: $0 <SearchEndpoint> <OpenAiEndpoint> <EmbeddingModelName> <EmbeddingModelApiVersion>"
13+
echo "Usage: $0 <SearchEndpoint> <ProjectEndpoint> <EmbeddingModelName> <EmbeddingModelApiVersion>"
1414
exit 1
1515
fi
1616

@@ -43,7 +43,7 @@ echo "$PROCESS_DATA_SCRIPT" | tee -a "$LOG_FILE"
4343

4444
# --- Export environment variables ---
4545
export SEARCH_ENDPOINT="$SearchEndpoint"
46-
export OPEN_AI_ENDPOINT_URL="$OpenAiEndpoint"
46+
export AZURE_AI_AGENT_ENDPOINT="$ProjectEndpoint"
4747
export EMBEDDING_MODEL_NAME="$EmbeddingModelName"
4848
export EMBEDDING_MODEL_API_VERSION="$EmbeddingModelApiVersion"
4949
export USE_LOCAL_FILES="true"

0 commit comments

Comments
 (0)