diff --git a/docs/post_deployment_steps.md b/docs/post_deployment_steps.md index aac6b8f..b14f030 100644 --- a/docs/post_deployment_steps.md +++ b/docs/post_deployment_steps.md @@ -149,6 +149,8 @@ If the connection fails, verify RBAC roles are assigned (see Troubleshooting sec If `purviewCollectionName` is left empty in [infra/main.bicepparam](../infra/main.bicepparam), the automation now uses `collection-`. +> **Note:** If a tenant-level Fabric datasource already exists under a different collection, the scan script automatically reparents the deployment collection as a child of the datasource's collection. This ensures scans comply with Purview's requirement that scans are created within the datasource's collection hierarchy. In the Purview portal, your deployment collection may appear nested under the datasource's collection rather than at the root. + If the identity running `azd` does not have **Purview Collection Admin** (or equivalent) on the target collection, the Purview scripts will warn and skip collection, datasource, and scan steps. Grant the role, then rerun the Purview scripts. If you need to rerun the Purview steps after provisioning: @@ -288,10 +290,13 @@ pwsh ./scripts/automationScripts/OneLakeIndex/06_setup_ai_foundry_search_rbac.ps 2. Check scan configuration: - Purview Portal → Data Map → Sources → Fabric source → Scans -3. Re-run the registration script: +3. **`Scan_CollectionOutOfBound` error:** Purview requires that scans are created under the datasource's collection or a child of it. If your deployment collection is not under the datasource's collection, the scan script will attempt to reparent it automatically. If this fails, manually move your deployment collection under the datasource's collection in Purview Portal → Data Map → Collections. + +4. Re-run the scan pipeline: ```bash eval $(azd env get-values) pwsh ./scripts/automationScripts/FabricWorkspace/CreateWorkspace/register_fabric_datasource.ps1 + pwsh ./scripts/automationScripts/FabricPurviewAutomation/trigger_purview_scan_for_fabric_workspace.ps1 ``` ### Post-Provision Hooks Failed diff --git a/scripts/automationScripts/FabricPurviewAutomation/trigger_purview_scan_for_fabric_workspace.ps1 b/scripts/automationScripts/FabricPurviewAutomation/trigger_purview_scan_for_fabric_workspace.ps1 index 6c61084..490e47e 100644 --- a/scripts/automationScripts/FabricPurviewAutomation/trigger_purview_scan_for_fabric_workspace.ps1 +++ b/scripts/automationScripts/FabricPurviewAutomation/trigger_purview_scan_for_fabric_workspace.ps1 @@ -207,10 +207,74 @@ if (Test-Path $collectionEnvPath) { if ($_ -match '^PURVIEW_COLLECTION_ID=(.*)$') { $collectionId = $Matches[1].Trim() } } } +# Fallback: resolve collection from azd env when temp file is missing +if (-not $collectionId) { + try { + $azdCollId = & azd env get-value purviewCollectionName 2>$null + if ($LASTEXITCODE -eq 0 -and $azdCollId) { $collectionId = $azdCollId.Trim() } + } catch { } +} +if (-not $collectionId) { + try { + $azdCollId = & azd env get-value desiredFabricDomainName 2>$null + if ($LASTEXITCODE -eq 0 -and $azdCollId) { $collectionId = $azdCollId.Trim() } + } catch { } +} if (-not $collectionId) { Log "No Purview collection found. Scan will be created in root collection." } +# Resolve the datasource's own collection to avoid Scan_CollectionOutOfBound errors. +# Purview requires scans to be created under the datasource's collection or a child of it. +$datasourceCollectionId = $null +$datasourceEnvPathForColl = Join-Path $tempDir 'fabric_datasource.env' +if (Test-Path $datasourceEnvPathForColl) { + Get-Content $datasourceEnvPathForColl | ForEach-Object { + if ($_ -match '^FABRIC_COLLECTION_ID=(.+)$') { $datasourceCollectionId = $Matches[1].Trim() } + } +} +if (-not $datasourceCollectionId) { + # Query the datasource directly to get its collection + try { + $dsInfo = Invoke-SecureRestMethod -Uri "$endpoint/scan/datasources/${datasourceName}?api-version=2022-07-01-preview" -Headers $purviewHeaders -Method Get -ErrorAction Stop + if ($dsInfo.properties.collection.referenceName) { + $datasourceCollectionId = $dsInfo.properties.collection.referenceName + Log "Datasource '$datasourceName' belongs to collection: $datasourceCollectionId" + } + } catch { + Log "Could not query datasource collection: $($_.Exception.Message)" + } +} + +# If our deployment collection differs from the datasource collection, reparent it as a child +if ($collectionId -and $datasourceCollectionId -and $collectionId -ne $datasourceCollectionId) { + Log "Deployment collection '$collectionId' is not under datasource collection '$datasourceCollectionId'. Reparenting..." + try { + $reparentBody = @{ + parentCollection = @{ + referenceName = $datasourceCollectionId + type = 'CollectionReference' + } + } | ConvertTo-Json -Depth 5 + $reparentUrl = "$endpoint/account/collections/${collectionId}?api-version=2019-11-01-preview" + $reparentHeaders = New-SecureHeaders -Token $purviewToken -AdditionalHeaders @{'Content-Type' = 'application/json'} + $reparentResp = Invoke-SecureWebRequest -Uri $reparentUrl -Headers $reparentHeaders -Method Put -Body $reparentBody -ErrorAction Stop + if ($reparentResp.StatusCode -ge 200 -and $reparentResp.StatusCode -lt 300) { + Log "Collection '$collectionId' reparented under '$datasourceCollectionId' successfully" + } else { + Warn "Reparent returned HTTP $($reparentResp.StatusCode). Falling back to datasource collection." + $collectionId = $datasourceCollectionId + } + } catch { + Warn "Failed to reparent collection: $($_.Exception.Message). Falling back to datasource collection." + $collectionId = $datasourceCollectionId + } +} elseif (-not $collectionId -and $datasourceCollectionId) { + # No deployment collection — use the datasource's collection + $collectionId = $datasourceCollectionId + Log "Using datasource collection: $collectionId" +} + $scanName = "scan-workspace-$WorkspaceId" Log "Creating/Updating scan '$scanName' for datasource '$datasourceName' targeting workspace '$WorkspaceId'"