Skip to content

Commit daa373e

Browse files
rojiCopilot
andcommitted
Update SQL Server vector search to latest VECTOR_SEARCH() syntax
- Replace deprecated TOP_N parameter with SELECT TOP(N) WITH APPROXIMATE - Enable iterative filtering (WHERE predicates during vector search) - Support skip via subquery wrapping (TOP and OFFSET/FETCH can't coexist) - Add Azure SQL runtime detection for DiskAnn (SERVERPROPERTY EngineEdition) - Remove read-only table workaround (SqlServerDiskAnnVectorSearchTests) - Update hybrid search CTE with new VECTOR_SEARCH syntax - Gate DiskAnn conformance test on Azure SQL connection string - Handle 100-row minimum requirement for DiskAnn vector index creation Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 20c91a3 commit daa373e

File tree

7 files changed

+267
-228
lines changed

7 files changed

+267
-228
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,3 +502,5 @@ swa-cli.config.json
502502

503503
# dapr extension files
504504
**/dapr.yaml
505+
506+
*.lscache

dotnet/src/VectorData/SqlServer/SqlServerCollection.cs

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,12 @@ public class SqlServerCollection<TKey, TRecord>
4242
/// <summary>The database schema.</summary>
4343
private readonly string? _schema;
4444

45+
/// <summary>Whether the model contains any DiskAnn vector properties, requiring Azure SQL.</summary>
46+
private readonly bool _requiresAzureSql;
47+
48+
/// <summary>Cached result of the Azure SQL engine edition check (null = not yet checked).</summary>
49+
private bool? _isAzureSql;
50+
4551
/// <summary>
4652
/// Initializes a new instance of the <see cref="SqlServerCollection{TKey, TRecord}"/> class.
4753
/// </summary>
@@ -78,6 +84,16 @@ internal SqlServerCollection(string connectionString, string name, Func<SqlServe
7884

7985
this._mapper = new SqlServerMapper<TRecord>(this._model);
8086

87+
// Check if any vector property uses DiskAnn, which requires Azure SQL.
88+
foreach (var vp in this._model.VectorProperties)
89+
{
90+
if (vp.IndexKind is not (null or "" or IndexKind.Flat))
91+
{
92+
this._requiresAzureSql = true;
93+
break;
94+
}
95+
}
96+
8197
var connectionStringBuilder = new SqlConnectionStringBuilder(connectionString);
8298

8399
this._collectionMetadata = new()
@@ -116,6 +132,12 @@ public override Task EnsureCollectionExistsAsync(CancellationToken cancellationT
116132
private async Task CreateCollectionAsync(bool ifNotExists, CancellationToken cancellationToken)
117133
{
118134
using SqlConnection connection = new(this._connectionString);
135+
136+
if (this._requiresAzureSql)
137+
{
138+
await this.EnsureAzureSqlForDiskAnnAsync(connection, cancellationToken).ConfigureAwait(false);
139+
}
140+
119141
List<SqlCommand> commands = SqlServerCommandBuilder.CreateTable(
120142
connection,
121143
this._schema,
@@ -604,6 +626,12 @@ _ when vectorProperty.EmbeddingGenerationDispatcher is not null
604626
// Connection and command are going to be disposed by the ReadVectorSearchResultsAsync,
605627
// when the user is done with the results.
606628
SqlConnection connection = new(this._connectionString);
629+
630+
if (vectorProperty.IndexKind is not (null or "" or IndexKind.Flat))
631+
{
632+
await this.EnsureAzureSqlForDiskAnnAsync(connection, cancellationToken).ConfigureAwait(false);
633+
}
634+
607635
SqlCommand command = SqlServerCommandBuilder.SelectVector(
608636
connection,
609637
this._schema,
@@ -664,6 +692,12 @@ _ when vectorProperty.EmbeddingGenerationDispatcher is not null
664692
// Connection and command are going to be disposed by the ReadVectorSearchResultsAsync,
665693
// when the user is done with the results.
666694
SqlConnection connection = new(this._connectionString);
695+
696+
if (vectorProperty.IndexKind is not (null or "" or IndexKind.Flat))
697+
{
698+
await this.EnsureAzureSqlForDiskAnnAsync(connection, cancellationToken).ConfigureAwait(false);
699+
}
700+
667701
SqlCommand command = SqlServerCommandBuilder.SelectHybrid(
668702
connection,
669703
this._schema,
@@ -807,4 +841,40 @@ public override async IAsyncEnumerable<TRecord> GetAsync(Expression<Func<TRecord
807841
yield return this._mapper.MapFromStorageToDataModel(reader, options.IncludeVectors);
808842
}
809843
}
844+
845+
/// <summary>
846+
/// Validates that the connection is to Azure SQL Database or SQL database in Microsoft Fabric,
847+
/// which is required for DiskAnn vector indexes and the VECTOR_SEARCH function.
848+
/// </summary>
849+
private async Task EnsureAzureSqlForDiskAnnAsync(SqlConnection connection, CancellationToken cancellationToken)
850+
{
851+
if (this._isAzureSql is true)
852+
{
853+
return;
854+
}
855+
856+
if (connection.State != System.Data.ConnectionState.Open)
857+
{
858+
await connection.OpenAsync(cancellationToken).ConfigureAwait(false);
859+
}
860+
861+
using var command = connection.CreateCommand();
862+
command.CommandText = "SELECT SERVERPROPERTY('EngineEdition')";
863+
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
864+
var engineEdition = Convert.ToInt32(result);
865+
866+
// 5 = Azure SQL Database, 11 = SQL database in Microsoft Fabric
867+
this._isAzureSql = engineEdition is 5 or 11;
868+
869+
if (!this._isAzureSql.Value)
870+
{
871+
// Dispose the connection before throwing; in SearchAsync/HybridSearchAsync the connection
872+
// is not in a using block (it's normally disposed by ReadVectorSearchResultsAsync).
873+
connection.Dispose();
874+
875+
throw new NotSupportedException(
876+
"DiskAnn vector indexes and the VECTOR_SEARCH function require Azure SQL Database or SQL database in Microsoft Fabric. " +
877+
"They are not supported on SQL Server. Use a Flat index kind with VECTOR_DISTANCE instead.");
878+
}
879+
}
810880
}

dotnet/src/VectorData/SqlServer/SqlServerCommandBuilder.cs

Lines changed: 61 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -535,41 +535,67 @@ private static SqlCommand SelectVectorWithVectorSearch<TRecord>(
535535
string distanceMetric,
536536
string sorting)
537537
{
538-
// VECTOR_SEARCH() currently only supports post-filtering (TOP_N candidates are returned first,
539-
// then predicates are applied). Pre-filtering is not supported.
540-
if (options.Filter is not null)
541-
{
542-
throw new NotSupportedException(
543-
"Filtering is not supported with approximate vector search (VECTOR_SEARCH). " +
544-
"Remove the filter or use IndexKind.Flat for exact search with VECTOR_DISTANCE.");
545-
}
546-
547538
SqlCommand command = connection.CreateCommand();
548539
command.Parameters.AddWithValue("@vector", vector);
549540

550541
StringBuilder sb = new(300);
551542

543+
// When skip > 0, we need a subquery since TOP and OFFSET/FETCH can't coexist in the same SELECT.
544+
bool needsSubquery = options.Skip > 0;
545+
546+
if (needsSubquery)
547+
{
548+
sb.Append("SELECT * FROM (");
549+
}
550+
552551
// VECTOR_SEARCH returns all columns from the table plus a 'distance' column.
553552
// We select the needed columns from the table alias and alias 'distance' as 'score'.
554-
sb.Append("SELECT ");
553+
// The latest version vector indexes require SELECT TOP(N) WITH APPROXIMATE instead of the deprecated TOP_N parameter.
554+
sb.Append("SELECT TOP(").Append(top + options.Skip).Append(") WITH APPROXIMATE ");
555555
sb.AppendIdentifiers(model.Properties, prefix: "t.", includeVectors: options.IncludeVectors);
556556
sb.AppendLine(",");
557557
sb.AppendLine("s.[distance] AS [score]");
558558
sb.Append("FROM VECTOR_SEARCH(TABLE = ");
559559
sb.AppendTableName(schema, tableName);
560560
sb.Append(" AS t, COLUMN = ").AppendIdentifier(vectorProperty.StorageName);
561-
sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).Append('\'');
562-
sb.Append(", TOP_N = ").Append(top + options.Skip).AppendLine(") AS s");
561+
sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).AppendLine("') AS s");
562+
563+
// With latest version vector indexes, WHERE predicates are applied during the vector search process
564+
// (iterative filtering), not after retrieval.
565+
if (options.Filter is not null)
566+
{
567+
int startParamIndex = command.Parameters.Count;
568+
569+
SqlServerFilterTranslator translator = new(model, options.Filter, sb, startParamIndex: startParamIndex, tableAlias: "t");
570+
translator.Translate(appendWhere: true);
571+
List<object> parameters = translator.ParameterValues;
572+
573+
foreach (object parameter in parameters)
574+
{
575+
command.AddParameter(vectorProperty, $"@_{startParamIndex++}", parameter);
576+
}
577+
578+
sb.AppendLine();
579+
}
563580

564581
if (options.ScoreThreshold is not null)
565582
{
566583
command.Parameters.AddWithValue("@scoreThreshold", options.ScoreThreshold!.Value);
567-
sb.AppendLine("WHERE s.[distance] <= @scoreThreshold");
584+
sb.Append(options.Filter is not null ? "AND " : "WHERE ");
585+
sb.AppendLine("s.[distance] <= @scoreThreshold");
568586
}
569587

570588
sb.AppendFormat("ORDER BY [score] {0}", sorting);
571-
sb.AppendLine();
572-
sb.AppendFormat("OFFSET {0} ROWS FETCH NEXT {1} ROWS ONLY;", options.Skip, top);
589+
590+
if (needsSubquery)
591+
{
592+
sb.AppendLine();
593+
sb.Append(") AS [inner]");
594+
sb.AppendLine();
595+
sb.AppendFormat("ORDER BY [score] {0}", sorting);
596+
sb.AppendLine();
597+
sb.AppendFormat("OFFSET {0} ROWS FETCH NEXT {1} ROWS ONLY;", options.Skip, top);
598+
}
573599

574600
command.CommandText = sb.ToString();
575601
return command;
@@ -587,15 +613,6 @@ internal static SqlCommand SelectHybrid<TRecord>(
587613
{
588614
bool useVectorSearch = UseVectorSearch(vectorProperty);
589615

590-
// VECTOR_SEARCH() currently only supports post-filtering (TOP_N candidates are returned first,
591-
// then predicates are applied). Pre-filtering is not supported.
592-
if (useVectorSearch && options.Filter is not null)
593-
{
594-
throw new NotSupportedException(
595-
"Filtering is not supported with approximate vector search (VECTOR_SEARCH). " +
596-
"Remove the filter or use IndexKind.Flat for exact search with VECTOR_DISTANCE.");
597-
}
598-
599616
string distanceFunction = vectorProperty.DistanceFunction ?? DistanceFunction.CosineDistance;
600617
(string distanceMetric, _) = MapDistanceFunction(distanceFunction);
601618

@@ -652,16 +669,32 @@ internal static SqlCommand SelectHybrid<TRecord>(
652669
// CTE 2: Semantic/vector search
653670
if (useVectorSearch)
654671
{
655-
// Use VECTOR_SEARCH() for approximate nearest neighbor search with a vector index
672+
// Use VECTOR_SEARCH() for approximate nearest neighbor search with a vector index.
673+
// The latest version vector indexes require SELECT TOP(N) WITH APPROXIMATE instead of the deprecated TOP_N parameter.
656674
sb.AppendLine("semantic_search AS (");
657-
sb.AppendLine(" SELECT TOP(@candidateCount)");
675+
sb.AppendLine(" SELECT TOP(@candidateCount) WITH APPROXIMATE");
658676
sb.Append(" t.").AppendIdentifier(model.KeyProperty.StorageName).AppendLine(",");
659677
sb.AppendLine(" RANK() OVER (ORDER BY s.[distance]) AS [rank]");
660678
sb.AppendLine(" FROM VECTOR_SEARCH(TABLE = ");
661679
sb.Append(" ").AppendTableName(schema, tableName);
662680
sb.Append(" AS t, COLUMN = ").AppendIdentifier(vectorProperty.StorageName);
663-
sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).Append('\'');
664-
sb.Append(", TOP_N = @candidateCount").AppendLine(") AS s");
681+
sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).AppendLine("') AS s");
682+
683+
// With latest version vector indexes, WHERE predicates are applied during the vector search process
684+
// (iterative filtering), not after retrieval.
685+
if (options.Filter is not null)
686+
{
687+
int filterParamStart = command.Parameters.Count;
688+
SqlServerFilterTranslator translator = new(model, options.Filter, sb, startParamIndex: filterParamStart, tableAlias: "t");
689+
translator.Translate(appendWhere: true);
690+
foreach (object parameter in translator.ParameterValues)
691+
{
692+
command.AddParameter(property: null, $"@_{filterParamStart++}", parameter);
693+
}
694+
sb.AppendLine();
695+
}
696+
697+
sb.AppendLine(" ORDER BY s.[distance]");
665698
sb.AppendLine("),");
666699
}
667700
else

dotnet/test/VectorData/SqlServer.ConformanceTests/SqlServerCommandBuilderTests.cs

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -262,11 +262,10 @@ public void SelectVector_WithDiskAnnIndex()
262262

263263
Assert.Equal(
264264
"""
265-
SELECT t.[id],t.[name],t.[embedding],
265+
SELECT TOP(5) WITH APPROXIMATE t.[id],t.[name],t.[embedding],
266266
s.[distance] AS [score]
267-
FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE', TOP_N = 5) AS s
267+
FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE') AS s
268268
ORDER BY [score] ASC
269-
OFFSET 0 ROWS FETCH NEXT 5 ROWS ONLY;
270269
""", command.CommandText, ignoreLineEndingDifferences: true);
271270
}
272271

@@ -295,16 +294,18 @@ public void SelectVector_WithDiskAnnIndex_WithSkip()
295294

296295
Assert.Equal(
297296
"""
298-
SELECT t.[id],t.[name],
297+
SELECT * FROM (SELECT TOP(8) WITH APPROXIMATE t.[id],t.[name],
299298
s.[distance] AS [score]
300-
FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE', TOP_N = 8) AS s
299+
FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE') AS s
300+
ORDER BY [score] ASC
301+
) AS [inner]
301302
ORDER BY [score] ASC
302303
OFFSET 3 ROWS FETCH NEXT 5 ROWS ONLY;
303304
""", command.CommandText, ignoreLineEndingDifferences: true);
304305
}
305306

306307
[Fact]
307-
public void SelectVector_WithDiskAnnIndex_WithFilter_Throws()
308+
public void SelectVector_WithDiskAnnIndex_WithFilter()
308309
{
309310
var model = BuildModel(
310311
[
@@ -324,12 +325,20 @@ public void SelectVector_WithDiskAnnIndex_WithFilter_Throws()
324325
Filter = d => (string)d["name"]! == "test"
325326
};
326327

327-
Assert.Throws<NotSupportedException>(() =>
328-
SqlServerCommandBuilder.SelectVector(
329-
connection, "schema", "table",
330-
model.VectorProperties[0], model,
331-
top: 5, options,
332-
new SqlVector<float>(new float[] { 1f, 2f, 3f })));
328+
using SqlCommand command = SqlServerCommandBuilder.SelectVector(
329+
connection, "schema", "table",
330+
model.VectorProperties[0], model,
331+
top: 5, options,
332+
new SqlVector<float>(new float[] { 1f, 2f, 3f }));
333+
334+
Assert.Equal(
335+
"""
336+
SELECT TOP(5) WITH APPROXIMATE t.[id],t.[name],
337+
s.[distance] AS [score]
338+
FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE') AS s
339+
WHERE (t.[name] = 'test')
340+
ORDER BY [score] ASC
341+
""", command.CommandText, ignoreLineEndingDifferences: true);
333342
}
334343

335344
[Fact]
@@ -361,12 +370,11 @@ public void SelectVector_WithDiskAnnIndex_WithScoreThreshold()
361370

362371
Assert.Equal(
363372
"""
364-
SELECT t.[id],t.[name],t.[embedding],
373+
SELECT TOP(5) WITH APPROXIMATE t.[id],t.[name],t.[embedding],
365374
s.[distance] AS [score]
366-
FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE', TOP_N = 5) AS s
375+
FROM VECTOR_SEARCH(TABLE = [schema].[table] AS t, COLUMN = [embedding], SIMILAR_TO = @vector, METRIC = 'COSINE') AS s
367376
WHERE s.[distance] <= @scoreThreshold
368377
ORDER BY [score] ASC
369-
OFFSET 0 ROWS FETCH NEXT 5 ROWS ONLY;
370378
""", command.CommandText, ignoreLineEndingDifferences: true);
371379
}
372380

0 commit comments

Comments
 (0)