-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path04_create_onelake_datasource.ps1
More file actions
269 lines (235 loc) · 12.4 KB
/
04_create_onelake_datasource.ps1
File metadata and controls
269 lines (235 loc) · 12.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
# Create OneLake data source for AI Search indexing
# This script creates the OneLake data source using the correct preview API
param(
[string]$aiSearchName = "",
[string]$resourceGroup = "",
[string]$subscription = "",
[string]$workspaceId = "",
[string]$lakehouseId = "",
[string]$dataSourceName = "onelake-reports-datasource",
[string]$workspaceName = "",
[string]$queryPath = "Files/documents/reports",
[ValidateSet("systemAssignedManagedIdentity", "userAssignedManagedIdentity", "none")]
[string]$identityType = "systemAssignedManagedIdentity",
[string]$userAssignedIdentityResourceId = ""
)
# Skip when Fabric is disabled for this environment
$fabricWorkspaceMode = $env:fabricWorkspaceMode
if (-not $fabricWorkspaceMode) { $fabricWorkspaceMode = $env:fabricWorkspaceModeOut }
if (-not $fabricWorkspaceMode) {
try {
$azdMode = & azd env get-value fabricWorkspaceModeOut 2>$null
if ($azdMode) { $fabricWorkspaceMode = $azdMode.ToString().Trim() }
} catch { }
}
if (-not $fabricWorkspaceMode -and $env:AZURE_OUTPUTS_JSON) {
try {
$out0 = $env:AZURE_OUTPUTS_JSON | ConvertFrom-Json -ErrorAction Stop
if ($out0.fabricWorkspaceModeOut -and $out0.fabricWorkspaceModeOut.value) { $fabricWorkspaceMode = $out0.fabricWorkspaceModeOut.value }
elseif ($out0.fabricWorkspaceMode -and $out0.fabricWorkspaceMode.value) { $fabricWorkspaceMode = $out0.fabricWorkspaceMode.value }
} catch { }
}
if ($fabricWorkspaceMode -and $fabricWorkspaceMode.ToString().Trim().ToLowerInvariant() -eq 'none') {
Write-Warning "[onelake-datasource] Fabric workspace mode is 'none'; skipping datasource creation."
exit 0
}
$outputs = $null
if ($env:AZURE_OUTPUTS_JSON) {
try { $outputs = $env:AZURE_OUTPUTS_JSON | ConvertFrom-Json -ErrorAction Stop } catch { $outputs = $null }
}
# Import security module
. "$PSScriptRoot/../SecurityModule.ps1"
function Get-SafeName([string]$name) {
if (-not $name) { return $null }
$safe = $name.ToLower() -replace "[^a-z0-9-]", "-" -replace "-+", "-"
$safe = $safe.Trim('-')
if ([string]::IsNullOrEmpty($safe)) { return $null }
if ($safe.Length -gt 128) { $safe = $safe.Substring(0,128).Trim('-') }
return $safe
}
# Resolve workspace name if not provided
if (-not $workspaceName) { $workspaceName = $env:FABRIC_WORKSPACE_NAME }
if (-not $workspaceName -and (Test-Path (Join-Path ([IO.Path]::GetTempPath()) 'fabric_workspace.env'))) {
Get-Content (Join-Path ([IO.Path]::GetTempPath()) 'fabric_workspace.env') | ForEach-Object {
if ($_ -match '^FABRIC_WORKSPACE_NAME=(.+)$') { $workspaceName = $Matches[1].Trim() }
}
}
if (-not $workspaceName -and $env:AZURE_OUTPUTS_JSON) {
try { $workspaceName = ($env:AZURE_OUTPUTS_JSON | ConvertFrom-Json).desiredFabricWorkspaceName.value } catch {}
}
# If dataSourceName is still the generic default, derive from workspace name
if ($dataSourceName -eq 'onelake-reports-datasource' -and $workspaceName) {
$ds = Get-SafeName($workspaceName + "-onelake-datasource")
if ($ds) { $dataSourceName = $ds }
}
# Resolve parameters from environment
if (-not $aiSearchName -and $outputs -and $outputs.aiSearchName -and $outputs.aiSearchName.value) { $aiSearchName = $outputs.aiSearchName.value }
if (-not $aiSearchName) { $aiSearchName = $env:aiSearchName }
if (-not $aiSearchName) { $aiSearchName = $env:AZURE_AI_SEARCH_NAME }
if (-not $resourceGroup -and $outputs -and $outputs.aiSearchResourceGroup -and $outputs.aiSearchResourceGroup.value) { $resourceGroup = $outputs.aiSearchResourceGroup.value }
if (-not $resourceGroup) { $resourceGroup = $env:aiSearchResourceGroup }
if (-not $resourceGroup) { $resourceGroup = $env:AZURE_RESOURCE_GROUP_NAME }
if (-not $resourceGroup) { $resourceGroup = $env:AZURE_RESOURCE_GROUP }
if (-not $subscription -and $outputs -and $outputs.aiSearchSubscriptionId -and $outputs.aiSearchSubscriptionId.value) { $subscription = $outputs.aiSearchSubscriptionId.value }
if (-not $subscription) { $subscription = $env:aiSearchSubscriptionId }
if (-not $subscription) { $subscription = $env:AZURE_SUBSCRIPTION_ID }
# Resolve Fabric workspace and lakehouse IDs
if (-not $workspaceId) { $workspaceId = $env:FABRIC_WORKSPACE_ID }
if (-not $lakehouseId) { $lakehouseId = $env:FABRIC_LAKEHOUSE_ID }
# Try azd outputs (Bicep emits fabricWorkspaceIdOut for BYO mode)
if (-not $workspaceId -and $outputs) {
if ($outputs.fabricWorkspaceIdOut -and $outputs.fabricWorkspaceIdOut.value) { $workspaceId = $outputs.fabricWorkspaceIdOut.value }
elseif ($outputs.fabricWorkspaceId -and $outputs.fabricWorkspaceId.value) { $workspaceId = $outputs.fabricWorkspaceId.value }
}
if (-not $lakehouseId -and $outputs) {
if ($outputs.fabricLakehouseId -and $outputs.fabricLakehouseId.value) { $lakehouseId = $outputs.fabricLakehouseId.value }
}
# Try azd env store (persisted by create_lakehouses.ps1)
if (-not $workspaceId) { try { $val = & azd env get-value FABRIC_WORKSPACE_ID 2>$null; if ($val) { $workspaceId = $val.ToString().Trim() } } catch {} }
if (-not $lakehouseId) { try { $val = & azd env get-value FABRIC_LAKEHOUSE_ID 2>$null; if ($val) { $lakehouseId = $val.ToString().Trim() } } catch {} }
# Try temp fabric_workspace.env (from create_fabric_workspace.ps1)
if ((-not $workspaceId -or -not $lakehouseId) -and (Test-Path (Join-Path ([IO.Path]::GetTempPath()) 'fabric_workspace.env'))) {
Get-Content (Join-Path ([IO.Path]::GetTempPath()) 'fabric_workspace.env') | ForEach-Object {
if ($_ -match '^FABRIC_WORKSPACE_ID=(.+)$' -and -not $workspaceId) { $workspaceId = $Matches[1] }
if ($_ -match '^FABRIC_LAKEHOUSE_ID=(.+)$' -and -not $lakehouseId) { $lakehouseId = $Matches[1] }
# Also try lakehouse-specific IDs (bronze, silver, gold)
if ($_ -match '^FABRIC_LAKEHOUSE_bronze_ID=(.+)$' -and -not $lakehouseId) { $lakehouseId = $Matches[1] }
}
}
# Try dedicated lakehouse file
if ((-not $workspaceId -or -not $lakehouseId) -and (Test-Path (Join-Path ([IO.Path]::GetTempPath()) 'fabric_lakehouses.env'))) {
Get-Content (Join-Path ([IO.Path]::GetTempPath()) 'fabric_lakehouses.env') | ForEach-Object {
if ($_ -match '^FABRIC_LAKEHOUSE_ID=(.+)$' -and -not $lakehouseId) { $lakehouseId = $Matches[1] }
if ($_ -match '^FABRIC_LAKEHOUSE_bronze_ID=(.+)$' -and -not $lakehouseId) { $lakehouseId = $Matches[1] }
}
}
Write-Host "Creating OneLake data source for AI Search service: $aiSearchName"
Write-Host "================================================================="
if (-not $aiSearchName -or -not $resourceGroup -or -not $subscription) {
Write-Error "AI Search configuration not found (name='$aiSearchName', rg='$resourceGroup', subscription='$subscription'). Cannot create OneLake data source."
throw
}
if (-not $workspaceId -or -not $lakehouseId) {
Write-Error "Fabric workspace or lakehouse identifiers missing (workspaceId='$workspaceId', lakehouseId='$lakehouseId'). Cannot create OneLake data source."
exit 1
}
. "$PSScriptRoot/SearchHelpers.ps1"
Write-Host "Workspace ID: $workspaceId"
Write-Host "Lakehouse ID: $lakehouseId"
Write-Host "Query Path: $queryPath"
Write-Host ""
$originalPublicAccess = Ensure-SearchPublicAccess
try {
# Use preview API version required for OneLake
$apiVersion = '2024-05-01-preview'
# Create OneLake data source with System-Assigned Managed Identity
Write-Host "Creating OneLake data source: $dataSourceName"
# Create the data source using the exact working format from Azure portal
Write-Host "Creating OneLake data source using proven working format..."
# Build the datasource payload with the requested identity configuration so Search uses Entra ID at runtime. For
# system-assigned managed identity, the Search service infers the identity from the connection string when the
# identity property is omitted (per REST contract), so we only emit the identity block for special cases.
$identityBlock = $null
switch ($identityType) {
"userAssignedManagedIdentity" {
if (-not $userAssignedIdentityResourceId) {
Write-Error "userAssignedIdentityResourceId must be provided when identityType is 'userAssignedManagedIdentity'."
exit 1
}
$identityBlock = @{
"@odata.type" = "#Microsoft.Azure.Search.DataUserAssignedIdentity"
userAssignedIdentity = $userAssignedIdentityResourceId
}
}
"none" {
$identityBlock = @{ "@odata.type" = "#Microsoft.Azure.Search.DataNoneIdentity" }
}
}
$dataSourceBody = @{
name = $dataSourceName
description = "OneLake data source for document indexing"
type = "onelake"
credentials = @{
connectionString = "ResourceId=$workspaceId"
}
container = @{
name = $lakehouseId
query = $null
}
dataChangeDetectionPolicy = $null
dataDeletionDetectionPolicy = $null
encryptionKey = $null
identity = $identityBlock
} | ConvertTo-Json -Depth 10
# First, check if datasource exists and delete it if it does
$existingDataSourceUri = "https://$aiSearchName.search.windows.net/datasources/$dataSourceName" + "?api-version=$apiVersion"
try {
$existingDataSource = Invoke-SearchRequest -Method 'GET' -Uri $existingDataSourceUri
if ($existingDataSource) {
Write-Host "Found existing datasource. Checking for dependent indexers..."
# Get all indexers to see if any reference this datasource
$indexersUri = "https://$aiSearchName.search.windows.net/indexers?api-version=$apiVersion"
$indexers = Invoke-SearchRequest -Method 'GET' -Uri $indexersUri
$dependentIndexers = $indexers.value | Where-Object { $_.dataSourceName -eq $dataSourceName }
if ($dependentIndexers) {
Write-Host "Found dependent indexers. Deleting them first..."
foreach ($indexer in $dependentIndexers) {
$deleteIndexerUri = "https://$aiSearchName.search.windows.net/indexers/$($indexer.name)?api-version=$apiVersion"
try {
Invoke-SearchRequest -Method 'DELETE' -Uri $deleteIndexerUri
Write-Host "Deleted indexer: $($indexer.name)"
} catch {
Write-Host "Warning: Could not delete indexer $($indexer.name): $($_.Exception.Message)"
}
}
}
Write-Host "Deleting existing datasource to recreate with current values..."
Invoke-SearchRequest -Method 'DELETE' -Uri $existingDataSourceUri
Write-Host "Existing datasource deleted."
}
} catch {
# Datasource doesn't exist, which is fine
Write-Host "No existing datasource found, creating new one..."
}
# Create the datasource
$createDataSourceUri = "https://$aiSearchName.search.windows.net/datasources" + "?api-version=$apiVersion"
try {
$response = Invoke-SearchRequest -Method 'POST' -Uri $createDataSourceUri -Body $dataSourceBody
Write-Host ""
Write-Host "OneLake data source created successfully!"
Write-Host "Datasource Name: $($response.name)"
Write-Host "Lakehouse ID: $($response.container.name)"
} catch {
Write-Error "Failed to create OneLake datasource: $($_.Exception.Message)"
if ($_.ErrorDetails -and $_.ErrorDetails.Message) {
Write-Host "Error details: $($_.ErrorDetails.Message)"
}
$response = $null
try { $response = $_.Exception.Response } catch { $response = $null }
if ($response -and $response -is [System.Net.Http.HttpResponseMessage]) {
Write-Host "HTTP Status: $($response.StatusCode)"
Write-Host "HTTP Reason: $($response.ReasonPhrase)"
try {
$bodyText = $response.Content.ReadAsStringAsync().Result
if ($bodyText) {
Write-Host "HTTP Body: $bodyText"
}
} catch { }
}
# Try using curl with the bearer token to get a better error message when possible
if ($accessToken) {
Write-Host ""
Write-Host "Attempting to get detailed error using curl..."
$curlResult = & curl -s -D - -X POST "$createDataSourceUri" -H "Authorization: Bearer $accessToken" -H "Content-Type: application/json" -d $dataSourceBody
Write-Host "Curl result:"
Write-Host $curlResult
}
exit 1
}
Write-Host ""
Write-Host "⚠️ IMPORTANT: Ensure the AI Search System-Assigned Managed Identity has:"
Write-Host " 1. OneLake data access role in the Fabric workspace"
Write-Host " 2. Storage Blob Data Reader role in Azure"
} finally {
Restore-SearchPublicAccess -OriginalAccess $originalPublicAccess
}