diff --git a/README.md b/README.md
index 984eb2e66..7f5541e78 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
## Create and Deploy Durable, Data-Intensive Agentic Workflows
-**Indexify simplifies building and serving durable, multi-stage data-intensive workflows and exposes them as HTTP APIs or Python Remote APIs.**
+Indexify simplifies building and serving durable, multi-stage data-intensive workflows and exposes them as HTTP APIs or Python Remote APIs.
@@ -14,7 +14,7 @@
### 💡 Use Cases
-**Indexify** is a versatile data processing framework for all kinds of use cases, including:
+Indexify is a versatile data processing framework for all kinds of use cases, including:
* [Extracting and Indexing PDF Documents](examples/pdf_document_extraction/)
* [Scraping and Summarizing Websites](examples/website_audio_summary/)
@@ -24,7 +24,7 @@
### ⭐ Key Features
-* **Multi-Cloud/Datacenter/Region:** Leverage Compute from other clouds with very little hassle and configuration.
+* **Multi-Cloud/Datacenter/Region:** Leverage Compute in your workflows from other clouds with very little hassle and configuration.
* **Dynamic Routing:** Route data to different specialized compute functions distributed on a cluster based on conditional branching logic.
* **Local Inference:** Execute LLMs directly within workflow functions using LLamaCPP, vLLM, or Hugging Face Transformers in Python functions.
@@ -53,7 +53,7 @@ from pydantic import BaseModel
from tensorlake import tensorlake_function, Graph, Image, TensorlakeCompute
from typing import List, Union
-
+# Define Input and Outputs of various functions in your workflow
class Text(BaseModel):
text: str
@@ -67,7 +67,8 @@ class ChunkEmbedding(BaseModel):
text: str
embedding: List[float]
-
+# Define an image capable of running the functions. Each image
+# can have their own image
embedding_image = (
Image()
.name("text_embedding_image")
@@ -99,15 +100,17 @@ def chunk_text(input: dict) -> List[TextChunk]:
# Embed a single chunk.
-# Note: (Automatic Map) Indexify automatically parallelize functions when they consume an element
-# from functions that produces a List
+# Note: (Automatic Map) Indexify automatically parallelize functions when they consume an
+# element from functions that produces a List. In this case each text chunk is processed
+# in parallel by an Embedder function
class Embedder(TensorlakeCompute):
name = "embedder"
image = embedding_image
+ # TensorlakeCompute function allows initializing resources in the constructors
+ # and they are not unloaded again until the compute object is destroyed.
def __init__(self):
from sentence_transformers import SentenceTransformer
-
self._model = SentenceTransformer("all-MiniLM-L6-v2")
def run(self, chunk: TextChunk) -> ChunkEmbedding:
@@ -182,6 +185,11 @@ Executor is the component which is responsible for running your functions. On a
indexify-cli executor --dev
```
+Set the environment variable -
+```bash
+export INDEXIFY_URL=http://localhost:8900
+```
+
Change the code in the workflow to the following -
```python
from tensorlake import RemoteGraph
@@ -194,7 +202,24 @@ At this point, you now have a Graph endpoint on Indexify Server ready to be call
You can invoke the Graph as a REST API if the first function is configured to accept JSON payload.
```curl
-curl http://localhost:8900/namespaces/default/compute_graphs/text_embedder -d '{"text": "hello world"}'
+curl -X 'POST' http://localhost:8900/namespaces/default/compute_graphs/text_embedder/invoke_object -H 'Content-Type: application/json' -d '{"input": {"text": "hello world"}}'
+```
+
+This returns you an invocation id - `{"id":"55df51b4a84ffc69"}`. An Invocation Id can be used to get the status of the workflow as it processes that input, and getting any outputs off the graph.
+
+Get the outputs of the Embedding function -
+```bash
+curl -X GET http://localhost:8900/namespaces/default/compute_graphs/text_embedder/invocations/55df51b4a84ffc69/outputs
+```
+This returns all the outputs of the function -
+```json
+{"status":"finalized","outputs":[{"compute_fn":"chunk_text","id":"89de2063abadf5d3","created_at":1738110077424},{"compute_fn":"embedder","id":"4908f00d711c4cd1","created_at":1738110081015}],"cursor":null}
+```
+
+You can now retrieve one of the outputs -
+
+```bash
+curl -X GET http://localhost:8900/namespaces/default/compute_graphs/text_embedder/invocations/55df51b4a84ffc69/fn/embedder/output/4908f00d711c4cd1
```
You can invoke the Graph from Python too
@@ -203,22 +228,30 @@ from tensorlake import RemoteGraph
remote_graph = RemoteGraph.by_name("text_embedder")
```
+
## Deploying to Production Clusters
Deploying a workflow to production is a two step process -
-1. Run the server
-2. Build and deploy containers to run the functions.
+#### 1. Run the server
+```bash
+docker run -it --net host tensorlake/indexify-server
+```
-### Building and Deploying Containers
+### 2. Building and Deploying Function Containers
* First build and deploy container images that contains all the python and system dependencies of your code. They can be built using standard Docker build systems. For this example, we have a single image that can run all the functions. You can separate them to reduce the size of your images for more complex projects.
+```bash
+indexify-cli build-image workflow.py
+```
+This builds the following image, as defined in the workflow code above - `text_embedding_image`
+
* Next Deploy the Containers
```bash
-docker run -d indexify-cli executor --function default:text_embedder:chunk_document
-docker run -d indexify-cli executor --function default:text_embedder:embed_chunk
-docker run -d indexify-cli executor --function default:text_embedder:write_to_db
+docker run --it --net host text_embedding_image indexify-cli executor --function default:text_embedder:chunk_document
+docker run --it --net host text_embedding_image indexify-cli executor --function default:text_embedder:embed_chunk
+docker run --it --net host text_embedding_image indexify-cli executor --function default:text_embedder:write_to_db
```
> Containers are treated as ephemeral, only one type of function is ever scheduled on a single container. We are starting two containers for placing one function in each of them.
diff --git a/examples/readme/map_reduce_example.py b/examples/readme/map_reduce_example.py
new file mode 100644
index 000000000..63d5cef26
--- /dev/null
+++ b/examples/readme/map_reduce_example.py
@@ -0,0 +1,38 @@
+from pydantic import BaseModel
+from tensorlake import tensorlake_function, Graph
+from typing import List
+
+class Total(BaseModel):
+ val: int = 0
+
+@tensorlake_function()
+def generate_numbers(a: int) -> List[int]:
+ return [i for i in range(a)]
+
+@tensorlake_function()
+def square(x: int) -> int:
+ return x ** 2
+
+@tensorlake_function(accumulate=Total)
+def add(total: Total, new: int) -> Total:
+ total.val += new
+ return total
+
+g = Graph(name="sequence_summer", start_node=generate_numbers, description="Simple Sequence Summer")
+g.add_edge(generate_numbers, square)
+g.add_edge(square, add)
+
+if __name__ == "__main__":
+ #invocation_id = g.run(a=10)
+ #result = g.get_output(invocation_id, "add")
+ #print(result)
+
+ from tensorlake import RemoteGraph
+ graph = RemoteGraph.deploy(g)
+ invocation_id = graph.run(block_until_done=True, a=10)
+ result = graph.output(invocation_id, "add")
+ print(result)
+
+ graph = RemoteGraph.by_name("sequence_summer")
+ invocation_id = graph.run(block_until_done=True, a=5)
+ print(graph.output(invocation_id, "add"))
diff --git a/examples/readme/text_embedder.py b/examples/readme/text_embedder.py
index 1261283d8..bc6efd8f9 100644
--- a/examples/readme/text_embedder.py
+++ b/examples/readme/text_embedder.py
@@ -98,7 +98,9 @@ def run(self, embedding: ChunkEmbedding) -> None:
if __name__ == "__main__":
- invocation_id = graph.run(input={"text": "This is a test text"})
+ from tensorlake import RemoteGraph
+ graph = RemoteGraph.deploy(graph)
+ invocation_id = graph.run(block_until_done=True, input={"text": "This is a test text"})
print(f"Invocation ID: {invocation_id}")
embedding = graph.output(invocation_id, "embedder")
print(embedding)
diff --git a/indexify/src/indexify/cli/cli.py b/indexify/src/indexify/cli/cli.py
index 82360f3c7..a05fecbb9 100644
--- a/indexify/src/indexify/cli/cli.py
+++ b/indexify/src/indexify/cli/cli.py
@@ -292,7 +292,7 @@ def _parse_function_uris(uri_strs: Optional[List[str]]) -> Optional[List[Functio
def _create_image(image: Image, python_sdk_path):
console.print(
- Text("Creating container for ", style="cyan"),
+ Text("Creating image for ", style="cyan"),
Text(f"`{image._image_name}`", style="cyan bold"),
)
_build_image(image=image, python_sdk_path=python_sdk_path)
diff --git a/server/Cargo.lock b/server/Cargo.lock
index 37e497d1c..b7897120d 100644
--- a/server/Cargo.lock
+++ b/server/Cargo.lock
@@ -580,9 +580,9 @@ dependencies = [
[[package]]
name = "clap"
-version = "4.5.26"
+version = "4.5.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a8eb5e908ef3a6efbe1ed62520fb7287959888c88485abe072543190ecc66783"
+checksum = "769b0145982b4b48713e01ec42d61614425f27b7058bda7180a3a41f30104796"
dependencies = [
"clap_builder",
"clap_derive",
@@ -590,9 +590,9 @@ dependencies = [
[[package]]
name = "clap_builder"
-version = "4.5.26"
+version = "4.5.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96b01801b5fc6a0a232407abc821660c9c6d25a1cafc0d4f85f29fb8d9afc121"
+checksum = "1b26884eb4b57140e4d2d93652abfa49498b938b3c9179f9fc487b0acc3edad7"
dependencies = [
"anstream",
"anstyle",
@@ -1130,10 +1130,22 @@ dependencies = [
"cfg-if",
"js-sys",
"libc",
- "wasi",
+ "wasi 0.11.0+wasi-snapshot-preview1",
"wasm-bindgen",
]
+[[package]]
+name = "getrandom"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi 0.13.3+wasi-0.2.2",
+ "windows-targets",
+]
+
[[package]]
name = "gimli"
version = "0.31.1"
@@ -1259,9 +1271,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "hyper"
-version = "1.5.2"
+version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "256fb8d4bd6413123cc9d91832d78325c48ff41677595be797d90f42969beae0"
+checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80"
dependencies = [
"bytes",
"futures-channel",
@@ -1499,7 +1511,7 @@ dependencies = [
[[package]]
name = "indexify-server"
-version = "0.2.20"
+version = "0.2.21"
dependencies = [
"anyhow",
"async-stream",
@@ -1857,7 +1869,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
dependencies = [
"libc",
- "wasi",
+ "wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.52.0",
]
@@ -2346,7 +2358,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d"
dependencies = [
"bytes",
- "getrandom",
+ "getrandom 0.2.15",
"rand",
"ring",
"rustc-hash 2.1.0",
@@ -2419,7 +2431,7 @@ version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
- "getrandom",
+ "getrandom 0.2.15",
]
[[package]]
@@ -2559,7 +2571,7 @@ checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d"
dependencies = [
"cc",
"cfg-if",
- "getrandom",
+ "getrandom 0.2.15",
"libc",
"spin",
"untrusted",
@@ -2812,9 +2824,9 @@ dependencies = [
[[package]]
name = "serde_json"
-version = "1.0.135"
+version = "1.0.138"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9"
+checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949"
dependencies = [
"itoa",
"memchr",
@@ -3125,13 +3137,13 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
[[package]]
name = "tempfile"
-version = "3.15.0"
+version = "3.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704"
+checksum = "38c246215d7d24f48ae091a2902398798e05d978b24315d6efbc00ede9a8bb91"
dependencies = [
"cfg-if",
"fastrand",
- "getrandom",
+ "getrandom 0.3.1",
"once_cell",
"rustix",
"windows-sys 0.59.0",
@@ -3685,11 +3697,11 @@ dependencies = [
[[package]]
name = "uuid"
-version = "1.12.0"
+version = "1.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "744018581f9a3454a9e15beb8a33b017183f1e7c0cd170232a2d1453b23a51c4"
+checksum = "b3758f5e68192bb96cc8f9b7e2c2cfdabb435499a28499a42f8f984092adad4b"
dependencies = [
- "getrandom",
+ "getrandom 0.2.15",
"serde",
]
@@ -3764,6 +3776,15 @@ version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+[[package]]
+name = "wasi"
+version = "0.13.3+wasi-0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
+dependencies = [
+ "wit-bindgen-rt",
+]
+
[[package]]
name = "wasm-bindgen"
version = "0.2.99"
@@ -4132,6 +4153,15 @@ dependencies = [
"memchr",
]
+[[package]]
+name = "wit-bindgen-rt"
+version = "0.33.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
+dependencies = [
+ "bitflags 2.7.0",
+]
+
[[package]]
name = "write16"
version = "1.0.0"
diff --git a/server/Cargo.toml b/server/Cargo.toml
index 2aa713c33..6fc620705 100644
--- a/server/Cargo.toml
+++ b/server/Cargo.toml
@@ -22,7 +22,7 @@ members = [
[workspace.dependencies]
serde = { version = "1.0.217", features = ["derive"] }
anyhow = "1.0.95"
-serde_json = "1.0.135"
+serde_json = "1.0.138"
# https://github.com/rust-rocksdb/rust-rocksdb/issues/881
rocksdb = { version = "0.23.0" }
data_model = { path = "data_model" }
@@ -30,7 +30,7 @@ blob_store = { path = "blob_store" }
indexify_utils = { path = "utils" }
indexify_ui = { path = "indexify_ui" }
metrics = { path = "metrics" }
-hyper = "1.5.2"
+hyper = "1.6.0"
state_store = { path = "state_store" }
strum = { version = "0.26.3", features = ["derive"] }
tracing = "0.1.41"
@@ -42,7 +42,7 @@ serde_yml = "0.0.12"
figment = { version = "0.10.19", features = ["yaml"] }
axum = { version = "0.8.1", features = ["multipart", "macros", "tokio"] }
axum-server = "0.7.1"
-tempfile = "3.15.0"
+tempfile = "3.16.0"
utoipa = { version = "5.3.1", features = ["axum_extras"] }
utoipa-swagger-ui = { version = "9.0.0", features = ["axum"] }
object_store = { version = "0.11.2", features = ["aws"] }
@@ -66,7 +66,7 @@ tower-http = { version = "0.6.2", default-features = false, features = [
] }
pin-project = "1.1.8"
ciborium = "0.2.2"
-uuid = { version = "1.12.0", features = ["v4"] }
+uuid = { version = "1.12.1", features = ["v4"] }
url = "2.5.4"
opentelemetry = { version = "0.27.1", features = ["metrics", "trace"] }
opentelemetry_sdk = { version = "0.27.1", features = [
@@ -95,7 +95,7 @@ serde = { workspace = true }
serde_json = { workspace = true }
anyhow = { workspace = true }
figment = { workspace = true }
-clap = { version = "4.5.26", features = ["derive"] }
+clap = { version = "4.5.27", features = ["derive"] }
tracing = { workspace = true }
axum = { workspace = true }
tokio = { workspace = true }