Skip to content

Commit

Permalink
Merge pull request #260 from milderhc/ms-learn
Browse files Browse the repository at this point in the history
Add MS Learn code
milderhc authored Nov 6, 2024
2 parents 7d1d85d + 0c2ce39 commit 9219810
Showing 16 changed files with 691 additions and 73 deletions.
Original file line number Diff line number Diff line change
@@ -41,7 +41,7 @@ public class Hotel {
@VectorStoreRecordVector(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
private final List<Float> indexedEuclidean;

@VectorStoreRecordData
@VectorStoreRecordData(isFilterable = true)
private final List<String> tags;

@VectorStoreRecordData
Original file line number Diff line number Diff line change
@@ -6,7 +6,6 @@
import com.microsoft.semantickernel.data.redis.RedisVectorStore;
import com.microsoft.semantickernel.data.redis.RedisVectorStoreOptions;
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollectionOptions;
import com.microsoft.semantickernel.tests.data.jdbc.Hotel;
import com.redis.testcontainers.RedisContainer;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
Original file line number Diff line number Diff line change
@@ -6,11 +6,10 @@
import com.azure.core.credential.AzureKeyCredential;
import com.azure.core.credential.KeyCredential;
import com.microsoft.semantickernel.aiservices.openai.textembedding.OpenAITextEmbeddingGenerationService;
import com.microsoft.semantickernel.data.textsearch.TextSearchResultValue;
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults;
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection;
import com.microsoft.semantickernel.data.VolatileVectorStore;
import com.microsoft.semantickernel.data.VolatileVectorStoreRecordCollectionOptions;
import com.microsoft.semantickernel.data.VectorStoreTextSearch;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector;
@@ -43,7 +42,6 @@ static class GitHubFile {
@VectorStoreRecordData
private final String description;
@VectorStoreRecordData
@TextSearchResultValue
private final String link;
@VectorStoreRecordVector(dimensions = EMBEDDING_DIMENSIONS, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.COSINE_DISTANCE)
private final List<Float> embedding;
@@ -125,24 +123,27 @@ public static void inMemoryStoreAndSearch(
.then(storeData(collection, embeddingGeneration, sampleData()))
.block();

// Build a vectorized search
var vectorStoreTextSearch = VectorStoreTextSearch.<GitHubFile>builder()
.withVectorizedSearch(collection)
.withTextEmbeddingGenerationService(embeddingGeneration)
.build();

// Search for results
// Volatile store executes an exhaustive search, for approximate search use Azure AI Search, Redis or JDBC with PostgreSQL
String query = "How to get started?";
var results = vectorStoreTextSearch.searchAsync(query, null)
.block();
var results = search("How to get started", collection, embeddingGeneration).block();

if (results == null || results.getTotalCount() == 0) {
System.out.println("No search results found.");
return;
}
var searchResult = results.getResults().get(0);
System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n",
searchResult.getScore(), searchResult.getRecord().link,
searchResult.getRecord().description);
}

System.out.printf("Best result for '%s': %s%n", query, results.getResults().get(0));
private static Mono<VectorSearchResults<GitHubFile>> search(
String searchText,
VectorStoreRecordCollection<String, GitHubFile> recordCollection,
OpenAITextEmbeddingGenerationService embeddingGeneration) {
// Generate embeddings for the search text and search for the closest records
return embeddingGeneration.generateEmbeddingAsync(searchText)
.flatMap(r -> recordCollection.searchAsync(r.getVector(), null));
}

private static Mono<List<String>> storeData(
Original file line number Diff line number Diff line change
@@ -5,16 +5,17 @@
import com.azure.ai.openai.OpenAIClientBuilder;
import com.azure.core.credential.AzureKeyCredential;
import com.azure.core.credential.KeyCredential;
import com.azure.core.util.ClientOptions;
import com.azure.core.util.MetricsOptions;
import com.azure.core.util.TracingOptions;
import com.azure.search.documents.indexes.SearchIndexAsyncClient;
import com.azure.search.documents.indexes.SearchIndexClientBuilder;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.microsoft.semantickernel.aiservices.openai.textembedding.OpenAITextEmbeddingGenerationService;
import com.microsoft.semantickernel.data.azureaisearch.AzureAISearchVectorStore;
import com.microsoft.semantickernel.data.azureaisearch.AzureAISearchVectorStoreOptions;
import com.microsoft.semantickernel.data.azureaisearch.AzureAISearchVectorStoreRecordCollectionOptions;
import com.microsoft.semantickernel.data.textsearch.TextSearchResultValue;
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults;
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection;
import com.microsoft.semantickernel.data.VectorStoreTextSearch;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector;
@@ -49,12 +50,11 @@ public class VectorStoreWithAzureAISearch {
private static final int EMBEDDING_DIMENSIONS = 1536;

static class GitHubFile {
@VectorStoreRecordKey()
@VectorStoreRecordKey
private final String id;
@VectorStoreRecordData()
@VectorStoreRecordData
private final String description;
@VectorStoreRecordData
@TextSearchResultValue
private final String link;
@VectorStoreRecordVector(dimensions = EMBEDDING_DIMENSIONS, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.COSINE_SIMILARITY)
private final List<Float> embedding;
@@ -64,10 +64,10 @@ public GitHubFile() {
}

public GitHubFile(
@JsonProperty("fileId") String id,
@JsonProperty("description") String description,
@JsonProperty("link") String link,
@JsonProperty("embedding") List<Float> embedding) {
String id,
String description,
String link,
List<Float> embedding) {
this.id = id;
this.description = description;
this.link = link;
@@ -108,6 +108,7 @@ public static void main(String[] args) {
var searchClient = new SearchIndexClientBuilder()
.endpoint(AZURE_AI_SEARCH_ENDPOINT)
.credential(new AzureKeyCredential(AZURE_AISEARCH_KEY))
.clientOptions(clientOptions())
.buildAsyncClient();

storeAndSearch(searchClient, embeddingGeneration);
@@ -137,24 +138,27 @@ public static void storeAndSearch(
.then(storeData(collection, embeddingGeneration, sampleData()))
.block();

// Build a vectorized search
var vectorStoreTextSearch = VectorStoreTextSearch.<GitHubFile>builder()
.withVectorizedSearch(collection)
.withTextEmbeddingGenerationService(embeddingGeneration)
.build();

// Search for results
// Might need to wait for the data to be indexed
String query = "How to get started?";
var results = vectorStoreTextSearch.searchAsync(query, null)
.block();
var results = search("How to get started", collection, embeddingGeneration).block();

if (results == null || results.getTotalCount() == 0) {
System.out.println("No search results found.");
return;
}
var searchResult = results.getResults().get(0);
System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n",
searchResult.getScore(), searchResult.getRecord().link,
searchResult.getRecord().description);
}

System.out.printf("Best result for '%s': %s%n", query, results.getResults().get(0));
private static Mono<VectorSearchResults<GitHubFile>> search(
String searchText,
VectorStoreRecordCollection<String, GitHubFile> recordCollection,
OpenAITextEmbeddingGenerationService embeddingGeneration) {
// Generate embeddings for the search text and search for the closest records
return embeddingGeneration.generateEmbeddingAsync(searchText)
.flatMap(r -> recordCollection.searchAsync(r.getVector(), null));
}

private static Mono<List<String>> storeData(
@@ -197,4 +201,11 @@ private static Map<String, String> sampleData() {
"README: README associated with a sample chat summary react-based webapp" },
}).collect(Collectors.toMap(element -> element[0], element -> element[1]));
}

private static ClientOptions clientOptions() {
return new ClientOptions()
.setTracingOptions(new TracingOptions())
.setMetricsOptions(new MetricsOptions())
.setApplicationId("Semantic-Kernel");
}
}
Original file line number Diff line number Diff line change
@@ -9,15 +9,14 @@
import com.microsoft.semantickernel.data.jdbc.JDBCVectorStore;
import com.microsoft.semantickernel.data.jdbc.JDBCVectorStoreOptions;
import com.microsoft.semantickernel.data.jdbc.JDBCVectorStoreRecordCollectionOptions;
import com.microsoft.semantickernel.data.jdbc.mysql.MySQLVectorStoreQueryProvider;
import com.microsoft.semantickernel.data.textsearch.TextSearchResultValue;
import com.microsoft.semantickernel.data.jdbc.postgres.PostgreSQLVectorStoreQueryProvider;
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults;
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection;
import com.microsoft.semantickernel.data.VectorStoreTextSearch;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector;
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
import com.mysql.cj.jdbc.MysqlDataSource;

import java.nio.charset.StandardCharsets;
import java.sql.SQLException;
import java.util.Arrays;
@@ -27,6 +26,7 @@
import java.util.Map;
import java.util.stream.Collectors;

import org.postgresql.ds.PGSimpleDataSource;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;

@@ -42,12 +42,11 @@ public class VectorStoreWithJDBC {
private static final int EMBEDDING_DIMENSIONS = 1536;

static class GitHubFile {
@VectorStoreRecordKey()
@VectorStoreRecordKey
private final String id;
@VectorStoreRecordData()
@VectorStoreRecordData
private final String description;
@VectorStoreRecordData
@TextSearchResultValue
private final String link;
@VectorStoreRecordVector(dimensions = EMBEDDING_DIMENSIONS, distanceFunction = DistanceFunction.COSINE_DISTANCE)
private final List<Float> embedding;
@@ -89,8 +88,8 @@ static String encodeId(String realId) {
}
}

// Run a MySQL server with:
// docker run -d --name mysql-container -e MYSQL_ROOT_PASSWORD=root -e MYSQL_DATABASE=sk -p 3306:3306 mysql:latest
// Run a PostgreSQL server with:
// docker run -d --name pgvector-container -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=root -e POSTGRES_DB=sk -p 5432:5432 pgvector/pgvector:pg17

public static void main(String[] args) throws SQLException {
System.out.println("==============================================================");
@@ -123,14 +122,14 @@ public static void main(String[] args) throws SQLException {

public static void storeAndSearch(OpenAITextEmbeddingGenerationService embeddingGeneration) {
// Configure the data source
var dataSource = new MysqlDataSource();
dataSource.setUrl("jdbc:mysql://localhost:3306/sk");
PGSimpleDataSource dataSource = new PGSimpleDataSource();
dataSource.setUrl("jdbc:postgresql://localhost:5432/sk");
dataSource.setUser("postgres");
dataSource.setPassword("root");
dataSource.setUser("root");

// Build a query provider
// Other available query providers are PostgreSQLVectorStoreQueryProvider and SQLiteVectorStoreQueryProvider
var queryProvider = MySQLVectorStoreQueryProvider.builder()
var queryProvider = PostgreSQLVectorStoreQueryProvider.builder()
.withDataSource(dataSource)
.build();

@@ -155,23 +154,26 @@ public static void storeAndSearch(OpenAITextEmbeddingGenerationService embedding
.then(storeData(collection, embeddingGeneration, sampleData()))
.block();

// Build a vectorized search
var vectorStoreTextSearch = VectorStoreTextSearch.<GitHubFile>builder()
.withVectorizedSearch(collection)
.withTextEmbeddingGenerationService(embeddingGeneration)
.build();

// Search for results
String query = "How to get started?";
var results = vectorStoreTextSearch.searchAsync(query, null)
.block();
var results = search("How to get started", collection, embeddingGeneration).block();

if (results == null || results.getTotalCount() == 0) {
System.out.println("No search results found.");
return;
}
var searchResult = results.getResults().get(0);
System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n",
searchResult.getScore(), searchResult.getRecord().link,
searchResult.getRecord().description);
}

System.out.printf("Best result for '%s': %s%n", query, results.getResults().get(0));
private static Mono<VectorSearchResults<GitHubFile>> search(
String searchText,
VectorStoreRecordCollection<String, GitHubFile> recordCollection,
OpenAITextEmbeddingGenerationService embeddingGeneration) {
// Generate embeddings for the search text and search for the closest records
return embeddingGeneration.generateEmbeddingAsync(searchText)
.flatMap(r -> recordCollection.searchAsync(r.getVector(), null));
}

private static Mono<List<String>> storeData(
Original file line number Diff line number Diff line change
@@ -10,9 +10,8 @@
import com.microsoft.semantickernel.data.redis.RedisStorageType;
import com.microsoft.semantickernel.data.redis.RedisVectorStore;
import com.microsoft.semantickernel.data.redis.RedisVectorStoreOptions;
import com.microsoft.semantickernel.data.textsearch.TextSearchResultValue;
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults;
import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection;
import com.microsoft.semantickernel.data.VectorStoreTextSearch;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector;
@@ -42,12 +41,11 @@ public class VectorStoreWithRedis {
private static final int EMBEDDING_DIMENSIONS = 1536;

public static class GitHubFile {
@VectorStoreRecordKey()
@VectorStoreRecordKey
private final String id;
@VectorStoreRecordData()
@VectorStoreRecordData
private final String description;
@VectorStoreRecordData
@TextSearchResultValue
private final String link;
@VectorStoreRecordVector(dimensions = EMBEDDING_DIMENSIONS, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.COSINE_DISTANCE)
private final List<Float> embedding;
@@ -146,23 +144,27 @@ public static void storeAndSearch(
.then(storeData(collection, embeddingGeneration, sampleData()))
.block();

// Build a vectorized search
var vectorStoreTextSearch = VectorStoreTextSearch.<GitHubFile>builder()
.withVectorizedSearch(collection)
.withTextEmbeddingGenerationService(embeddingGeneration)
.build();

// Search for results
String query = "How to get started?";
var results = vectorStoreTextSearch.searchAsync(query, null)
.block();
// Might need to wait for the data to be indexed
var results = search("How to get started", collection, embeddingGeneration).block();

if (results == null || results.getTotalCount() == 0) {
System.out.println("No search results found.");
return;
}
var searchResult = results.getResults().get(0);
System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n",
searchResult.getScore(), searchResult.getRecord().link,
searchResult.getRecord().description);
}

System.out.printf("Best result for '%s': %s%n", query, results.getResults().get(0));
private static Mono<VectorSearchResults<GitHubFile>> search(
String searchText,
VectorStoreRecordCollection<String, GitHubFile> recordCollection,
OpenAITextEmbeddingGenerationService embeddingGeneration) {
// Generate embeddings for the search text and search for the closest records
return embeddingGeneration.generateEmbeddingAsync(searchText)
.flatMap(r -> recordCollection.searchAsync(r.getVector(), null));
}

private static Mono<List<String>> storeData(
13 changes: 13 additions & 0 deletions samples/semantickernel-learn-resources/pom.xml
Original file line number Diff line number Diff line change
@@ -29,6 +29,19 @@
<artifactId>semantickernel-api</artifactId>
</dependency>

<dependency>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-data-azureaisearch</artifactId>
</dependency>
<dependency>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-data-jdbc</artifactId>
</dependency>
<dependency>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-data-redis</artifactId>
</dependency>

<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
Loading

0 comments on commit 9219810

Please sign in to comment.