Skip to content

Commit bc2c366

Browse files
committed
Adding Oracle Vector Store
1 parent de947e2 commit bc2c366

2 files changed

Lines changed: 301 additions & 0 deletions

File tree

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
package com.microsoft.semantickernel.data.jdbc.oracle;
2+
3+
import com.microsoft.semantickernel.data.jdbc.JDBCVectorStoreQueryProvider;
4+
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordDefinition;
5+
import com.microsoft.semantickernel.data.vectorstorage.options.UpsertRecordOptions;
6+
import com.microsoft.semantickernel.exceptions.SKException;
7+
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
8+
9+
import javax.annotation.Nonnull;
10+
import javax.sql.DataSource;
11+
import java.sql.Connection;
12+
import java.sql.PreparedStatement;
13+
import java.sql.SQLException;
14+
import java.util.List;
15+
16+
public class OracleVectorStoreQueryProvider extends JDBCVectorStoreQueryProvider {
17+
18+
// This could be removed if super.collectionTable made protected
19+
private final String collectionsTable;
20+
21+
private OracleVectorStoreQueryProvider(@Nonnull DataSource dataSource, @Nonnull String collectionsTable, @Nonnull String prefixForCollectionTables) {
22+
super(dataSource, collectionsTable, prefixForCollectionTables);
23+
this.collectionsTable = collectionsTable;
24+
}
25+
26+
@Override
27+
public void prepareVectorStore() {
28+
String createCollectionsTable = formatQuery(
29+
"CREATE TABLE IF NOT EXISTS %s (collectionId VARCHAR(255) PRIMARY KEY)",
30+
validateSQLidentifier(collectionsTable));
31+
32+
try (Connection connection = dataSource.getConnection();
33+
PreparedStatement createTable = connection.prepareStatement(createCollectionsTable)) {
34+
createTable.execute();
35+
} catch (SQLException e) {
36+
throw new SKException("Failed to prepare vector store", e);
37+
}
38+
}
39+
40+
@Override
41+
public void upsertRecords(String collectionName, List<?> records, VectorStoreRecordDefinition recordDefinition, UpsertRecordOptions options) {
42+
43+
// Using hsqldb impl
44+
45+
46+
super.upsertRecords(collectionName, records, recordDefinition, options);
47+
}
48+
49+
public static Builder builder() {
50+
return new Builder();
51+
}
52+
53+
public static class Builder
54+
extends JDBCVectorStoreQueryProvider.Builder {
55+
56+
private DataSource dataSource;
57+
private String collectionsTable = DEFAULT_COLLECTIONS_TABLE;
58+
private String prefixForCollectionTables = DEFAULT_PREFIX_FOR_COLLECTION_TABLES;
59+
60+
@SuppressFBWarnings("EI_EXPOSE_REP2")
61+
public Builder withDataSource(DataSource dataSource) {
62+
this.dataSource = dataSource;
63+
return this;
64+
}
65+
66+
/**
67+
* Sets the collections table name.
68+
* @param collectionsTable the collections table name
69+
* @return the builder
70+
*/
71+
public Builder withCollectionsTable(String collectionsTable) {
72+
this.collectionsTable = validateSQLidentifier(collectionsTable);
73+
return this;
74+
}
75+
76+
/**
77+
* Sets the prefix for collection tables.
78+
* @param prefixForCollectionTables the prefix for collection tables
79+
* @return the builder
80+
*/
81+
public Builder withPrefixForCollectionTables(String prefixForCollectionTables) {
82+
this.prefixForCollectionTables = validateSQLidentifier(prefixForCollectionTables);
83+
return this;
84+
}
85+
86+
@Override
87+
public OracleVectorStoreQueryProvider build() {
88+
return new OracleVectorStoreQueryProvider(dataSource, collectionsTable, prefixForCollectionTables);
89+
}
90+
}
91+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
package com.microsoft.semantickernel.samples.syntaxexamples.memory;
3+
4+
import com.azure.ai.openai.OpenAIAsyncClient;
5+
import com.azure.ai.openai.OpenAIClientBuilder;
6+
import com.azure.core.credential.AzureKeyCredential;
7+
import com.azure.core.credential.KeyCredential;
8+
import com.microsoft.semantickernel.aiservices.openai.textembedding.OpenAITextEmbeddingGenerationService;
9+
import com.microsoft.semantickernel.data.jdbc.JDBCVectorStore;
10+
import com.microsoft.semantickernel.data.jdbc.JDBCVectorStoreOptions;
11+
import com.microsoft.semantickernel.data.jdbc.JDBCVectorStoreRecordCollectionOptions;
12+
import com.microsoft.semantickernel.data.jdbc.postgres.PostgreSQLVectorStoreQueryProvider;
13+
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults;
14+
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection;
15+
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData;
16+
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey;
17+
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector;
18+
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
19+
20+
import java.nio.charset.StandardCharsets;
21+
import java.sql.SQLException;
22+
import java.util.Arrays;
23+
import java.util.Base64;
24+
import java.util.Collections;
25+
import java.util.List;
26+
import java.util.Map;
27+
import java.util.stream.Collectors;
28+
29+
import org.postgresql.ds.PGSimpleDataSource;
30+
import reactor.core.publisher.Flux;
31+
import reactor.core.publisher.Mono;
32+
33+
public class VectorStoreWithOracle {
34+
35+
static class GitHubFile {
36+
@VectorStoreRecordKey
37+
private final String id;
38+
@VectorStoreRecordData
39+
private final String description;
40+
@VectorStoreRecordData
41+
private final String link;
42+
@VectorStoreRecordVector(dimensions = EMBEDDING_DIMENSIONS, distanceFunction = DistanceFunction.COSINE_DISTANCE)
43+
private final List<Float> embedding;
44+
45+
public GitHubFile() {
46+
this(null, null, null, Collections.emptyList());
47+
}
48+
49+
public GitHubFile(
50+
String id,
51+
String description,
52+
String link,
53+
List<Float> embedding) {
54+
this.id = id;
55+
this.description = description;
56+
this.link = link;
57+
this.embedding = embedding;
58+
}
59+
60+
public String getId() {
61+
return id;
62+
}
63+
64+
public String getDescription() {
65+
return description;
66+
}
67+
68+
public String getLink() {
69+
return link;
70+
}
71+
72+
public List<Float> getEmbedding() {
73+
return embedding;
74+
}
75+
76+
static String encodeId(String realId) {
77+
byte[] bytes = Base64.getUrlEncoder().encode(realId.getBytes(StandardCharsets.UTF_8));
78+
return new String(bytes, StandardCharsets.UTF_8);
79+
}
80+
}
81+
82+
// Run a PostgreSQL server with:
83+
// docker run -d --name pgvector-container -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=root -e POSTGRES_DB=sk -p 5432:5432 pgvector/pgvector:pg17
84+
85+
public static void main(String[] args) throws SQLException {
86+
System.out.println("==============================================================");
87+
System.out.println("============== Oracle Vector Store Example ===================");
88+
System.out.println("==============================================================");
89+
90+
OpenAIAsyncClient client;
91+
92+
if (AZURE_CLIENT_KEY != null) {
93+
client = new OpenAIClientBuilder()
94+
.credential(new AzureKeyCredential(AZURE_CLIENT_KEY))
95+
.endpoint(CLIENT_ENDPOINT)
96+
.buildAsyncClient();
97+
98+
} else {
99+
client = new OpenAIClientBuilder()
100+
.credential(new KeyCredential(CLIENT_KEY))
101+
.buildAsyncClient();
102+
}
103+
104+
// Create an OpenAI text embedding generation service
105+
var embeddingGeneration = OpenAITextEmbeddingGenerationService.builder()
106+
.withOpenAIAsyncClient(client)
107+
.withModelId(MODEL_ID)
108+
.withDimensions(EMBEDDING_DIMENSIONS)
109+
.build();
110+
111+
storeAndSearch(embeddingGeneration);
112+
}
113+
114+
public static void storeAndSearch(OpenAITextEmbeddingGenerationService embeddingGeneration) {
115+
// Configure the data source
116+
PGSimpleDataSource dataSource = new PGSimpleDataSource();
117+
dataSource.setUrl("jdbc:postgresql://localhost:5432/sk");
118+
dataSource.setUser("postgres");
119+
dataSource.setPassword("root");
120+
121+
// Build a query provider
122+
// Other available query providers are PostgreSQLVectorStoreQueryProvider and SQLiteVectorStoreQueryProvider
123+
var queryProvider = PostgreSQLVectorStoreQueryProvider.builder()
124+
.withDataSource(dataSource)
125+
.build();
126+
127+
// Build a vector store
128+
var jdbcVectorStore = JDBCVectorStore.builder()
129+
.withDataSource(dataSource)
130+
.withOptions(JDBCVectorStoreOptions.builder()
131+
.withQueryProvider(queryProvider)
132+
.build())
133+
.build();
134+
135+
// Set up the record collection to use
136+
String collectionName = "skgithubfiles";
137+
var collection = jdbcVectorStore.getCollection(collectionName,
138+
JDBCVectorStoreRecordCollectionOptions.<GitHubFile>builder()
139+
.withRecordClass(GitHubFile.class)
140+
.build());
141+
142+
// Create collection if it does not exist and store data
143+
collection
144+
.createCollectionIfNotExistsAsync()
145+
.then(storeData(collection, embeddingGeneration, sampleData()))
146+
.block();
147+
148+
// Search for results
149+
var results = search("How to get started", collection, embeddingGeneration).block();
150+
151+
if (results == null || results.getTotalCount() == 0) {
152+
System.out.println("No search results found.");
153+
return;
154+
}
155+
var searchResult = results.getResults().get(0);
156+
System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n",
157+
searchResult.getScore(), searchResult.getRecord().link,
158+
searchResult.getRecord().description);
159+
}
160+
161+
private static Mono<VectorSearchResults<GitHubFile>> search(
162+
String searchText,
163+
VectorStoreRecordCollection<String, GitHubFile> recordCollection,
164+
OpenAITextEmbeddingGenerationService embeddingGeneration) {
165+
// Generate embeddings for the search text and search for the closest records
166+
return embeddingGeneration.generateEmbeddingAsync(searchText)
167+
.flatMap(r -> recordCollection.searchAsync(r.getVector(), null));
168+
}
169+
170+
private static Mono<List<String>> storeData(
171+
VectorStoreRecordCollection<String, GitHubFile> recordStore,
172+
OpenAITextEmbeddingGenerationService embeddingGeneration,
173+
Map<String, String> data) {
174+
175+
return Flux.fromIterable(data.entrySet())
176+
.flatMap(entry -> {
177+
System.out.println("Save '" + entry.getKey() + "' to memory.");
178+
179+
// Generate embeddings for the data and store it
180+
return embeddingGeneration
181+
.generateEmbeddingsAsync(Collections.singletonList(entry.getValue()))
182+
.flatMap(embeddings -> {
183+
GitHubFile gitHubFile = new GitHubFile(
184+
GitHubFile.encodeId(entry.getKey()),
185+
entry.getValue(),
186+
entry.getKey(),
187+
embeddings.get(0).getVector());
188+
return recordStore.upsertAsync(gitHubFile, null);
189+
});
190+
})
191+
.collectList();
192+
}
193+
194+
private static Map<String, String> sampleData() {
195+
return Arrays.stream(new String[][] {
196+
{ "https://github.com/microsoft/semantic-kernel/blob/main/README.md",
197+
"README: Installation, getting started with Semantic Kernel, and how to contribute" },
198+
{ "https://github.com/microsoft/semantic-kernel/blob/main/samples/notebooks/dotnet/02-running-prompts-from-file.ipynb",
199+
"Jupyter notebook describing how to pass prompts from a file to a semantic skill or function" },
200+
{ "https://github.com/microsoft/semantic-kernel/tree/main/samples/skills/ChatSkill/ChatGPT",
201+
"Sample demonstrating how to create a chat skill interfacing with ChatGPT" },
202+
{ "https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel/Memory/VolatileMemoryStore.cs",
203+
"C# class that defines a volatile embedding store" },
204+
{ "https://github.com/microsoft/semantic-kernel/blob/main/samples/dotnet/KernelHttpServer/README.md",
205+
"README: How to set up a Semantic Kernel Service API using Azure Function Runtime v4" },
206+
{ "https://github.com/microsoft/semantic-kernel/blob/main/samples/apps/chat-summary-webapp-react/README.md",
207+
"README: README associated with a sample chat summary react-based webapp" },
208+
}).collect(Collectors.toMap(element -> element[0], element -> element[1]));
209+
}
210+
}

0 commit comments

Comments
 (0)