spring-projects
diff --git a/‎.gitattributes
+1 b/‎.gitattributes
+1
diff --git a/‎embedding-clients/transformers-embedding/README.md
+71 b/‎embedding-clients/transformers-embedding/README.md
+71
diff --git a/‎embedding-clients/transformers-embedding/pom.xml
+86 b/‎embedding-clients/transformers-embedding/pom.xml
+86
diff --git a/‎embedding-clients/transformers-embedding/src/main/java/org/springframework/ai/embedding/ResourceCacheService.java
+149 b/‎embedding-clients/transformers-embedding/src/main/java/org/springframework/ai/embedding/ResourceCacheService.java
+149
@@ -0,0 +1 @@
+*.onnx filter=lfs diff=lfs merge=lfs -text
@@ -0,0 +1,71 @@
+# Local Transformers Embedding Client
+
+The `TransformersEmbeddingClient` is a `EmbeddingClient` implementation that computes, locally, [sentence embeddings](https://www.sbert.net/examples/applications/computing-embeddings/README.html#sentence-embeddings-with-transformers) using a selected [sentence transformer](https://www.sbert.net/).
+
+It uses [pre-trained](https://www.sbert.net/docs/pretrained_models.html) transformer models, serialized into the [Open Neural Network Exchange (ONNX)](https://onnx.ai/) format.
+
+The [Deep Java Library](https://djl.ai/) and the Microsoft [ONNX Java Runtime](https://onnxruntime.ai/docs/get-started/with-java.html) libraries are applied to run the ONNX models and compute the embeddings in Java.
+
+## Serialize the Tokenizer and the Transformer Model
+
+To run things in Java, we need to serialize the Tokenizer and the Transformer Model into ONNX format.
+
+### Serialize with optimum-cli
+
+One, quick, way to achieve this, is to use the [optimum-cli](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model#exporting-a-model-to-onnx-using-the-cli) command line tool.
+
+Following snippet creates an python virtual environment, installs the required packages and runs the optimum-cli to serialize (e.g. export) the models:
+
+```bash
+python3 -m venv venv
+source ./venv/bin/activate
+(venv) pip install --upgrade pip
+(venv) pip install optimum onnx onnxruntime
+(venv) optimum-cli export onnx --model sentence-transformers/all-MiniLM-L6-v2 onnx-output-folder
+```
+
+The `optimum-cli` command exports the [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) transformer into the `onnx-output-folder` folder. Later includes the `tokenizer.json` and `model.onnx` files used by the embedding client.
+
+## Apply the ONNX model
+
+Use the `setTokenizerResource(tokenizerJsonUri)` and `setModelResource(modelOnnxUri)` methods to set the URI locations of the exported `tokenizer.json` and `model.onnx` files.
+The `classpath:`, `file:` or `https:` URI schemas are supported.
+
+If no other model is explicitly set, the `TransformersEmbeddingClient` defaults to [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) model:
+
+|     |  |
+| -------- | ------- |
+| Dimensions  |384    |
+| Avg. performance | 58.80     |
+| Speed    | 14200 sentences/sec    |
+| Size    | 80MB    |
+
+
+Following snippet illustrates how to use the `TransformersEmbeddingClient`:
+
+```java
+TransformersEmbeddingClient embeddingClient = new TransformersEmbeddingClient();
+
+// (optional) defaults to classpath:/onnx/all-MiniLM-L6-v2/tokenizer.json
+embeddingClient.setTokenizerResource("classpath:/onnx/all-MiniLM-L6-v2/tokenizer.json");
+// (optional) defaults to classpath:/onnx/all-MiniLM-L6-v2/model.onnx
+embeddingClient.setModelResource("classpath:/onnx/all-MiniLM-L6-v2/model.onnx");
+
+// (optional) defaults to ${java.io.tmpdir}/spring-ai-onnx-model
+// Only the http/https resources are cached by default.
+embeddingClient.setResourceCacheDirectory("/tmp/onnx-zoo");
+
+embeddingClient.afterPropertiesSet();
+
+List<List<Double>> embeddings =
+	embeddingClient.embed(List.of("Hello world", "World is big"));
+
+```
+
+
+
+
+
+
+
+
@@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<parent>
+		<groupId>org.springframework.experimental.ai</groupId>
+		<artifactId>spring-ai</artifactId>
+		<version>0.7.0-SNAPSHOT</version>
+		<relativePath>../../pom.xml</relativePath>
+	</parent>
+	<artifactId>transformers-embedding</artifactId>
+	<packaging>jar</packaging>
+	<name>Spring AI Embedding Client - Sentence Transormers Embeddings </name>
+	<description>Spring AI Sentence Transformers Embedding Client</description>
+	<url>https://github.com/spring-projects-experimental/spring-ai</url>
+
+	<scm>
+		<url>https://github.com/spring-projects-experimental/spring-ai</url>
+		<connection>git://github.com/spring-projects-experimental/spring-ai.git</connection>
+		<developerConnection>[email protected]:spring-projects-experimental/spring-ai.git</developerConnection>
+	</scm>
+
+	<properties>
+		<djl.version>0.24.0</djl.version>
+		<onnxruntime.version>1.16.1</onnxruntime.version>
+	</properties>
+	<dependencies>
+		<dependency>
+			<groupId>org.springframework.experimental.ai</groupId>
+			<artifactId>spring-ai-core</artifactId>
+			<version>${parent.version}</version>
+		</dependency>
+
+		<dependency>
+			<groupId>com.microsoft.onnxruntime</groupId>
+			<artifactId>onnxruntime</artifactId>
+			<version>${onnxruntime.version}</version>
+		</dependency>
+
+		<dependency>
+			<groupId>ai.djl.pytorch</groupId>
+			<artifactId>pytorch-engine</artifactId>
+			<version>${djl.version}</version>
+		</dependency>
+
+		<dependency>
+			<groupId>ai.djl</groupId>
+			<artifactId>api</artifactId>
+			<version>${djl.version}</version>
+		</dependency>
+
+		<dependency>
+			<groupId>ai.djl</groupId>
+			<artifactId>model-zoo</artifactId>
+			<version>${djl.version}</version>
+		</dependency>
+
+		<dependency>
+			<groupId>ai.djl.huggingface</groupId>
+			<artifactId>tokenizers</artifactId>
+			<version>${djl.version}</version>
+		</dependency>
+
+
+		<!-- TESTING -->
+		<dependency>
+			<groupId>org.springframework.boot</groupId>
+			<artifactId>spring-boot-starter-test</artifactId>
+			<scope>test</scope>
+		</dependency>
+
+		<dependency>
+			<groupId>org.springframework.boot</groupId>
+			<artifactId>spring-boot-testcontainers</artifactId>
+			<scope>test</scope>
+		</dependency>
+
+		<dependency>
+			<groupId>org.testcontainers</groupId>
+			<artifactId>junit-jupiter</artifactId>
+			<scope>test</scope>
+		</dependency>
+
+	</dependencies>
+
+</project>
@@ -0,0 +1,149 @@
+/*
+ * Copyright 2023-2023 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.embedding;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.springframework.core.io.DefaultResourceLoader;
+import org.springframework.core.io.FileUrlResource;
+import org.springframework.core.io.Resource;
+import org.springframework.util.Assert;
+import org.springframework.util.FileCopyUtils;
+import org.springframework.util.StreamUtils;
+import org.springframework.util.StringUtils;
+
+/**
+ * Service that helps caching remote {@link Resource}s on the local file system.
+ *
+ * @author Christian Tzolov
+ */
+public class ResourceCacheService {
+
+	private static final Log logger = LogFactory.getLog(ResourceCacheService.class);
+
+	/**
+	 * The parent folder that contains all cached resources.
+	 */
+	private final File cacheDirectory;
+
+	/**
+	 * Resources with URI schemas belonging to the excludedUriSchemas are not cached. By
+	 * default the file and classpath resources are not cached as they are already in the
+	 * local file system.
+	 */
+	private List<String> excludedUriSchemas = new ArrayList<>(List.of("file", "classpath"));
+
+	public ResourceCacheService() {
+		this(new File(System.getProperty("java.io.tmpdir"), "spring-ai-onnx-model").getAbsolutePath());
+	}
+
+	public ResourceCacheService(String rootCacheDirectory) {
+		this(new File(rootCacheDirectory));
+	}
+
+	public ResourceCacheService(File rootCacheDirectory) {
+		Assert.notNull(rootCacheDirectory, "Cache directory can not be null.");
+		this.cacheDirectory = rootCacheDirectory;
+		if (!this.cacheDirectory.exists()) {
+			logger.info("Create cache root directory: " + this.cacheDirectory.getAbsolutePath());
+			this.cacheDirectory.mkdirs();
+		}
+		Assert.isTrue(this.cacheDirectory.isDirectory(), "The cache folder must be a directory");
+	}
+
+	/**
+	 * Overrides the excluded URI schemas list.
+	 * @param excludedUriSchemas new list of URI schemas to be excluded from caching.
+	 */
+	public void setExcludedUriSchemas(List<String> excludedUriSchemas) {
+		Assert.notNull(excludedUriSchemas, "The excluded URI schemas list can not be null");
+		this.excludedUriSchemas = excludedUriSchemas;
+	}
+
+	/**
+	 * Get {@link Resource} representing the cached copy of the original resource.
+	 * @param originalResourceUri Resource to be cached.
+	 * @return Returns a cached resource. If the original resource's URI schema is within
+	 * the excluded schema list the original resource is returned.
+	 */
+	public Resource getCachedResource(String originalResourceUri) {
+		return this.getCachedResource(new DefaultResourceLoader().getResource(originalResourceUri));
+	}
+
+	/**
+	 * Get {@link Resource} representing the cached copy of the original resource.
+	 * @param originalResource Resource to be cached.
+	 * @return Returns a cached resource. If the original resource's URI schema is within
+	 * the excluded schema list the original resource is returned.
+	 */
+	public Resource getCachedResource(Resource originalResource) {
+		try {
+			if (this.excludedUriSchemas.contains(originalResource.getURI().getScheme())) {
+				logger.info("The " + originalResource.toString() + " resource with URI schema ["
+						+ originalResource.getURI().getScheme() + "] is excluded from caching");
+				return originalResource;
+			}
+
+			File cachedFile = getCachedFile(originalResource);
+			if (!cachedFile.exists()) {
+				FileCopyUtils.copy(StreamUtils.copyToByteArray(originalResource.getInputStream()), cachedFile);
+				logger.info("Caching the " + originalResource.toString() + " resource to: " + cachedFile);
+			}
+			return new FileUrlResource(cachedFile.getAbsolutePath());
+		}
+		catch (Exception e) {
+			throw new IllegalStateException("Failed to cache the resource: " + originalResource.getDescription(), e);
+		}
+	}
+
+	private File getCachedFile(Resource originalResource) throws IOException {
+		var resourceParentFolder = new File(this.cacheDirectory,
+				UUID.nameUUIDFromBytes(pathWithoutLastSegment(originalResource.getURI())).toString());
+		resourceParentFolder.mkdirs();
+		String newFileName = getCacheName(originalResource);
+		return new File(resourceParentFolder, newFileName);
+	}
+
+	private byte[] pathWithoutLastSegment(URI uri) {
+		String path = uri.toASCIIString();
+		var pathBeforeLastSegment = path.substring(0, path.lastIndexOf('/') + 1);
+		return pathBeforeLastSegment.getBytes();
+	}
+
+	private String getCacheName(Resource originalResource) throws IOException {
+		String fileName = originalResource.getFilename();
+		String fragment = originalResource.getURI().getFragment();
+		return !StringUtils.hasText(fragment) ? fileName : fileName + "_" + fragment;
+	}
+
+	public void deleteCacheFolder() {
+		if (this.cacheDirectory.exists()) {
+			logger.info("Empty Model Cache at:" + this.cacheDirectory.getAbsolutePath());
+			this.cacheDirectory.delete();
+			this.cacheDirectory.mkdirs();
+		}
+	}
+
+}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+*.onnx filter=lfs diff=lfs merge=lfs -text`