diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fcdbecb..a127cab 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -39,6 +39,9 @@ jobs:
- name: Setup Rust
uses: dtolnay/rust-toolchain@stable
+ # - name: Set up QEMU
+ # uses: docker/setup-qemu-action@v3
+
- name: Install Protoc
uses: arduino/setup-protoc@v2
with:
@@ -70,17 +73,13 @@ jobs:
with:
context: .
file: anvil/Dockerfile
- load: true
+# Triggers Error: buildx failed with: ERROR: failed to build: docker exporter does not currently support exporting manifest lists
+# https://github.com/docker/buildx/issues/59
+ load: true #When ARM support is re-enabled, this needs to be disabled
push: false
- tags: ${{ steps.img.outputs.tag }}
+ tags: anvil:test
+# # platforms: linux/amd64,linux/arm64
platforms: linux/amd64
- build-args: |
- BINARY_PATH=./target/release
-
- - name: Validate runtime binary in image
- run: |
- docker run --rm ${{ steps.img.outputs.tag }} ls -l /usr/local/bin
- docker run --rm ${{ steps.img.outputs.tag }} /usr/local/bin/anvil --help >/dev/null
- name: Wait for PostgreSQL to be ready
run: |
@@ -130,12 +129,18 @@ jobs:
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
+ # platforms: linux/amd64,linux/arm64
platforms: linux/amd64
- build-args: |
- BINARY_PATH=./target/release
cache-from: type=gha
cache-to: type=gha,mode=max
+ - name: Prepare Release Assets
+ if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+ run: |
+ mkdir -p release
+ docker cp $(docker create ${{ steps.meta.outputs.tags }}):/usr/local/bin/anvil-cli release/anvil
+ docker cp $(docker create ${{ steps.meta.outputs.tags }}):/usr/local/bin/admin release/anvil-admin
+
- name: Create GitHub Release
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: softprops/action-gh-release@v1
@@ -150,3 +155,6 @@ jobs:
```sh
docker pull ghcr.io/${{ github.repository }}:${{ steps.tag.outputs.tag_name }}
```
+ files: |
+ release/anvil
+ release/anvil-admin
diff --git a/anvil/Dockerfile b/anvil/Dockerfile
index 83ed1f4..05bc598 100644
--- a/anvil/Dockerfile
+++ b/anvil/Dockerfile
@@ -11,6 +11,7 @@ COPY . .
# Build the anvil server and the admin CLI in release mode
RUN cargo build --release --bin anvil --bin admin
+RUN cargo build --release -p anvil-cli
# Stage 2: Create the final, minimal image
FROM rust:latest
@@ -24,6 +25,7 @@ RUN apt-get update && apt-get purge -y build-essential pkg-config libssl-dev pro
# Copy the compiled binaries from the builder stage
COPY --from=builder /usr/src/anvil/target/release/anvil /usr/local/bin/anvil
COPY --from=builder /usr/src/anvil/target/release/admin /usr/local/bin/admin
+COPY --from=builder /usr/src/anvil/target/release/anvil-cli /usr/local/bin/anvil-cli
# Expose the default gRPC/S3 port and a potential swarm port
EXPOSE 50051
diff --git a/docs/01-getting-started.md b/docs/01-getting-started.md
index c4ff613..39a0fa9 100644
--- a/docs/01-getting-started.md
+++ b/docs/01-getting-started.md
@@ -1,20 +1,20 @@
---
slug: /anvil/getting-started
title: 'Getting Started: Anvil in 10 Minutes'
-description: A hands-on guide to launching a single-node Anvil instance with Docker and interacting with it using an S3 client.
-tags: [getting-started, docker, s3]
+description: A hands-on guide to launching a single-node Anvil instance with Docker and interacting with it using the Anvil CLI.
+tags: [getting-started, docker, cli]
---
# Chapter 1: Anvil in 10 Minutes
-> **TL;DR:** Use our `docker-compose.yml` to launch a single-node Anvil instance and its Postgres database. Use the `anvil-cli` or any S3 client to create a bucket and upload your first file.
+> **TL;DR:** Use our `docker-compose.yml` to launch a single-node Anvil instance and its Postgres database. Use the `anvil` to create a bucket and upload your first file.
This guide will walk you through the fastest way to get a fully functional, single-node Anvil instance running on your local machine. By the end, you will have created a bucket, uploaded a file, and downloaded it back.
### 1.1. Prerequisites
- **Docker and Docker Compose:** Anvil is packaged as a Docker container for easy deployment. Ensure you have both [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/install/) installed.
-- **An S3 Client:** You will need a client tool that can speak the S3 protocol. We recommend the [AWS Command Line Interface (CLI)](https://aws.amazon.com/cli/).
+- **`anvil`:** The Anvil command-line interface is the primary tool for interacting with your Anvil cluster. It should be provided as part of your Anvil distribution.
### 1.2. Launching Anvil with Docker Compose
@@ -61,7 +61,7 @@ services:
retries: 5
anvil1:
- image: ghcr.io/worka-ai/anvil:main
+ image: ghcr.io/worka-ai/anvil:v2025.11.14-001012
depends_on:
postgres-global:
condition: service_healthy
@@ -112,7 +112,7 @@ This command will download the necessary images and start the Anvil and Postgres
### 1.3. Creating Your First Tenant and API Key
-Anvil is a multi-tenant system. Before you can create buckets, you need a **Tenant** and an **App** with an API key. You can create these using the `admin` CLI, which we will run inside the running Docker container.
+Anvil is a multi-tenant system. Before you can create buckets, you need a **Tenant** and an **App** with an API key. You can create these using the `admin` tool, which we will run inside the running Docker container.
**Step 1: Create the Region and Tenant**
@@ -126,14 +126,14 @@ docker compose exec anvil1 admin tenants create my-first-tenant
**Step 2: Create an App**
-Next, create an App for this tenant. This will generate the credentials needed to interact with the S3 API.
+Next, create an App for this tenant. This will generate the credentials needed to interact with the API.
```bash
# Create an app and get its credentials (uses named flags)
-docker compose exec anvil1 admin apps create --tenant-name my-first-tenant --app-name my-s3-app
+docker compose exec anvil1 admin apps create --tenant-name my-first-tenant --app-name my-cli-app
```
-This command will output a **Client ID** and a **Client Secret**. **Save these securely!** They are your S3 access credentials.
+This command will output a **Client ID** and a **Client Secret**. **Save these securely!** They are your API credentials.
**Step 3: Grant Permissions**
@@ -143,31 +143,28 @@ By default, a new app has **no permissions**. You must explicitly grant it the r
```bash
# Grant the app full permissions on all resources
-docker compose exec anvil1 admin policies grant --app-name my-s3-app --action "*" --resource "*"
+docker compose exec anvil1 admin policies grant --app-name my-cli-app --action "*" --resource "*"
```
-### 1.4. Using an S3 Client to Create a Bucket
+### 1.4. Using the `anvil` to Create a Bucket
-Now you can configure your S3 client to connect to Anvil. For the AWS CLI, you can set the credentials and endpoint URL using environment variables.
+Now you can configure the `anvil` to connect to your new Anvil instance.
-Replace `YOUR_CLIENT_ID` and `YOUR_CLIENT_SECRET` with the values you saved in the previous step.
+**Step 1: Configure the CLI**
-```bash
-export AWS_ACCESS_KEY_ID=YOUR_CLIENT_ID
-export AWS_SECRET_ACCESS_KEY=YOUR_SECRET_ACCESS_KEY
-export AWS_DEFAULT_REGION=europe-west-1
+Run the `configure` command and provide the host and the credentials you saved.
-# The Anvil S3 endpoint (note the port)
-ANVIL_ENDPOINT="http://localhost:50051"
+```bash
+# Replace YOUR_CLIENT_ID and YOUR_CLIENT_SECRET with the values from the previous step
+anvil configure --host http://localhost:50051 --client-id YOUR_CLIENT_ID --client-secret YOUR_CLIENT_SECRET
```
-Now, create a bucket. Bucket names must be globally unique.
+**Step 2: Create a Bucket**
+
+Now, create a bucket.
```bash
-aws s3api create-bucket \
- --bucket my-first-anvil-bucket \
- --region europe-west-1 \
- --endpoint-url $ANVIL_ENDPOINT
+anvil bucket create --name my-first-anvil-bucket --region europe-west-1
```
### 1.5. Uploading and Downloading Your First Object
@@ -178,22 +175,22 @@ Create a sample file to upload:
echo "Hello, Anvil!" > hello.txt
```
-Upload it to your new bucket:
+Upload it to your new bucket using an S3-style path:
```bash
-aws s3 cp hello.txt s3://my-first-anvil-bucket/hello.txt --endpoint-url $ANVIL_ENDPOINT
+anvil object put --src hello.txt --dest s3://my-first-anvil-bucket/hello.txt
```
You can list the objects in your bucket to confirm the upload was successful:
```bash
-aws s3 ls s3://my-first-anvil-bucket/ --endpoint-url $ANVIL_ENDPOINT
+anvil object ls --path s3://my-first-anvil-bucket/
```
Finally, download the file back to verify its contents:
```bash
-aws s3 cp s3://my-first-anvil-bucket/hello.txt downloaded_hello.txt --endpoint-url $ANVIL_ENDPOINT
+anvil object get --src s3://my-first-anvil-bucket/hello.txt --dest downloaded_hello.txt
cat downloaded_hello.txt
# Expected output: Hello, Anvil!
diff --git a/docs/03-user-guide-authentication.md b/docs/03-user-guide-authentication.md
index c4103e5..742d16a 100644
--- a/docs/03-user-guide-authentication.md
+++ b/docs/03-user-guide-authentication.md
@@ -25,7 +25,7 @@ This model ensures that you can issue, rotate, and revoke credentials for differ
### 3.2. Creating an App and Getting Credentials
-You create an App using the `anvil admin` CLI (as shown in the Getting Started guide) or via the administrative API.
+You create an App using the `admin` tool (as shown in the Getting Started guide) or via the administrative API.
```bash
# This command is run by an administrator
@@ -49,7 +49,7 @@ Permissions in Anvil are defined by policies that connect an App to an **action*
* `write`: Permission to create, update, or delete resources.
* `grant`: Permission to manage the permissions of other apps (a highly privileged action).
-A policy is granted using the admin CLI:
+A policy is granted using the admin tool:
```bash
# Grant the app permission to read and write objects in 'my-data-bucket'
@@ -75,9 +75,9 @@ When a bucket is public:
- `GetObject` and `HeadObject` operations are allowed for anonymous users (without any authentication).
- All other operations (`PutObject`, `DeleteObject`, `ListObjects`) still require valid, authorized credentials.
-You can set a bucket's public status using the `anvil admin` CLI or the gRPC API.
+You can set a bucket's public status using the `admin` tool or the gRPC API.
```bash
# Make a bucket public (requires 'grant' permission on the bucket)
docker compose exec anvil1 admin buckets set-public-access --bucket my-public-assets --allow
-```
+```
\ No newline at end of file
diff --git a/docs/04-user-guide-s3-gateway.md b/docs/04-user-guide-s3-gateway.md
index 4f218a8..e24ad09 100644
--- a/docs/04-user-guide-s3-gateway.md
+++ b/docs/04-user-guide-s3-gateway.md
@@ -11,11 +11,13 @@ tags: [user-guide, s3, aws-cli, rclone, sdk]
One of Anvil's most powerful features is its S3-compatible API gateway. This allows you to leverage the vast ecosystem of existing S3 tools, libraries, and SDKs to interact with your Anvil cluster without needing to write any custom code.
+> **Note:** While this guide focuses on S3-compatible tools, the `anvil` is the recommended primary interface for most operations. See the [Getting Started](./getting-started) guide for `anvil` examples.
+
### 4.1. Configuring S3 Clients
To connect an S3 client to Anvil, you need to configure three things:
-1. **Endpoint URL:** The HTTP address of your Anvil node (e.g., `http://localhost:9000`).
+1. **Endpoint URL:** The HTTP address of your Anvil node (e.g., `http://localhost:50051`).
2. **Access Key ID:** Your Anvil App's **Client ID**.
3. **Secret Access Key:** Your Anvil App's **Client Secret**.
@@ -29,10 +31,10 @@ export AWS_ACCESS_KEY_ID="YOUR_CLIENT_ID"
export AWS_SECRET_ACCESS_KEY="YOUR_CLIENT_SECRET"
# The region your bucket is in
-export AWS_DEFAULT_REGION="DOCKER_TEST"
+export AWS_DEFAULT_REGION="europe-west-1"
# The Anvil S3 endpoint
-ANVIL_ENDPOINT="http://localhost:9000"
+ANVIL_ENDPOINT="http://localhost:50051"
```
Alternatively, you can create a dedicated profile in your `~/.aws/config` and `~/.aws/credentials` files.
@@ -56,10 +58,10 @@ import boto3
s3_client = boto3.client(
's3',
- endpoint_url='http://localhost:9000',
+ endpoint_url='http://localhost:50051',
aws_access_key_id='YOUR_CLIENT_ID',
aws_secret_access_key='YOUR_CLIENT_SECRET',
- region_name='DOCKER_TEST'
+ region_name='europe-west-1'
)
```
@@ -72,7 +74,7 @@ Once configured, you can use the standard S3 commands to manage your buckets and
```bash
aws s3api create-bucket \
--bucket my-s3-bucket \
- --region DOCKER_TEST \
+ --region europe-west-1 \
--endpoint-url $ANVIL_ENDPOINT
```
@@ -93,33 +95,3 @@ aws s3 ls s3://my-s3-bucket/ --endpoint-url $ANVIL_ENDPOINT
```bash
aws s3 cp s3://my-s3-bucket/remote-file.txt downloaded-file.txt --endpoint-url $ANVIL_ENDPOINT
```
-
-### 4.3. Generating Presigned URLs
-
-Anvil's S3 gateway supports generating presigned URLs, which provide temporary, credential-less access to your objects. This is the most secure way to grant a user temporary access to download or upload a specific file.
-
-**Generate a Presigned URL for Download (GET)**
-
-```bash
-aws s3 presign s3://my-s3-bucket/remote-file.txt --expires-in 300 --endpoint-url $ANVIL_ENDPOINT
-```
-
-This will return a long URL that can be used by anyone to download `remote-file.txt` for the next 5 minutes (300 seconds).
-
-```bash
-# Anyone can use this URL to download the file
-curl "THE_PRESIGNED_URL"
-```
-
-**Generate a Presigned URL for Upload (PUT)**
-
-```bash
-aws s3 presign s3://my-s3-bucket/new-object.txt --expires-in 600 --endpoint-url $ANVIL_ENDPOINT
-```
-
-This URL can be used to upload a file to the specified key.
-
-```bash
-curl -T "local-upload.txt" "THE_PRESIGNED_URL"
-```
-
diff --git a/docs/06-operational-guide-deployment.md b/docs/06-operational-guide-deployment.md
index 6933967..c64203d 100644
--- a/docs/06-operational-guide-deployment.md
+++ b/docs/06-operational-guide-deployment.md
@@ -15,7 +15,7 @@ This chapter covers the fundamentals of deploying Anvil. The architecture is fle
A single-node deployment is the simplest way to run Anvil and is perfect for development, testing, or small-scale use cases. It consists of one Anvil instance and two PostgreSQL databases (which can run on the same Postgres server).
-See the `docker-compose.yml` in the [Getting Started](/docs/anvil/getting-started) guide for a complete, working example.
+See the `docker-compose.yml` in the [Getting Started](../getting-started) guide for a complete, working example.
**Key Configuration Parameters:**
@@ -92,7 +92,7 @@ sudo firewall-cmd --reload
### 6.3. Configuration Reference
-Anvil is configured entirely through environment variables. The following is a reference for the most important variables, defined in `src/config.rs`.
+Anvil is configured entirely through environment variables. The following is a reference for the most important variables.
| Variable | Description |
| ------------------------------- | --------------------------------------------------------------------------- |
@@ -101,7 +101,7 @@ Anvil is configured entirely through environment variables. The following is a r
| `REGION` | **Required.** The name of the region this node belongs to. |
| `JWT_SECRET` | **Required.** Secret key for minting and verifying JWTs. |
| `ANVIL_SECRET_ENCRYPTION_KEY` | **Required.** A 64-character hex-encoded string for AES-256 encryption.
**CRITICAL:** This key is used to encrypt sensitive data at rest. It **MUST** be a cryptographically secure, 64-character hexadecimal string (representing 32 bytes). Loss of this key will result in permanent data loss.
Generate a secure key with:
`openssl rand -hex 32` |
-| `ANVIL_CLUSTER_SECRET` | A shared secret to authenticate and encrypt inter-node gossip messages. |
+| `CLUSTER_SECRET` | A shared secret to authenticate and encrypt inter-node gossip messages. |
| `API_LISTEN_ADDR` | The local IP and port for the unified S3 Gateway and gRPC service (e.g., `0.0.0.0:50051`). |
| `CLUSTER_LISTEN_ADDR` | The local multiaddress for the QUIC P2P listener. |
| `PUBLIC_CLUSTER_ADDRS` | Comma-separated list of public-facing multiaddresses for this node. |
@@ -116,4 +116,4 @@ The separation of databases is a key scaling feature.
- **Global Database:** This is the single source of truth for low-volume, globally relevant data. It contains tables for `tenants`, `buckets`, `apps`, `policies`, and `regions`. Because all nodes access this, it can become a bottleneck if not managed correctly, but the data it holds changes infrequently.
-- **Regional Database:** This database handles the high-volume traffic of object metadata. Each region has its own, containing the `objects` table. This allows object listing and searching to be handled locally within a region, preventing a single database from having to index billions of objects from around the world.
+- **Regional Database:** This database handles the high-volume traffic of object metadata. Each region has its own, containing the `objects` table. This allows object listing and searching to be handled locally within a region, preventing a single database from having to index billions of objects from around the world.
\ No newline at end of file
diff --git a/docs/07-operational-guide-admin-cli.md b/docs/07-operational-guide-admin-cli.md
index bb3c068..a67ea20 100644
--- a/docs/07-operational-guide-admin-cli.md
+++ b/docs/07-operational-guide-admin-cli.md
@@ -1,19 +1,19 @@
----
-slug: /anvil/operational-guide/admin-cli
-title: 'Operational Guide: The Anvil Admin CLI'
-description: A reference guide for using the `anvil admin` command-line interface to manage tenants, apps, policies, and regions.
-tags: [operational-guide, admin, cli, tenants, apps, policies]
+---
+slug: /anvil/operational-guide/admin-tool
+title: 'Operational Guide: The Admin Tool'
+description: A reference guide for using the `admin` tool to manage tenants, apps, policies, and regions.
+tags: [operational-guide, admin, tenants, apps, policies]
---
-# Chapter 7: The Anvil Admin CLI
+# Chapter 9: The Admin Tool
-> **TL;DR:** Use the `anvil admin` CLI for core administrative tasks. It connects directly to the global database to manage tenants, regions, apps, and policies.
+> **TL;DR:** Use the `admin` tool for core administrative tasks. It connects directly to the global database to manage tenants, regions, apps, and policies.
-Anvil includes a powerful command-line interface (CLI) for performing essential administrative tasks. This tool is the primary way to bootstrap the system and manage high-level resources. It works by connecting directly to the global PostgreSQL database.
+Anvil includes a powerful command-line tool for performing essential administrative tasks. This tool is the primary way to bootstrap the system and manage high-level resources. It works by connecting directly to the global PostgreSQL database.
-### Running the Admin CLI
+### Running the Admin Tool
-When running Anvil via Docker Compose, you can execute the admin CLI using `docker-compose exec`. Note that the command is `admin`, not `anvil admin`.
+When running Anvil via Docker Compose, you can execute the admin tool using `docker-compose exec`. The command to run is `admin`.
```bash
docker compose exec anvil1 admin
diff --git a/docs/08-operational-guide-scaling.md b/docs/08-operational-guide-scaling.md
index 5169a9c..c125afd 100644
--- a/docs/08-operational-guide-scaling.md
+++ b/docs/08-operational-guide-scaling.md
@@ -50,7 +50,7 @@ version: "3.8"
services:
anvil2:
- image: ghcr.io/worka-ai/anvil:main
+ image: ghcr.io/worka-ai/anvil:v2025.11.14-001012
# We don't run databases here; we point to the ones on Host A
environment:
RUST_LOG: "info"
@@ -63,7 +63,7 @@ services:
# --- Use the SAME secrets as the rest of the cluster ---
JWT_SECRET: "must-be-a-long-and-random-secret-for-signing-jwts"
ANVIL_SECRET_ENCRYPTION_KEY: "must-be-a-64-character-hex-string-generate-with-openssl-rand-hex-32"
- ANVIL_CLUSTER_SECRET: "must-be-a-long-and-random-secret-for-cluster-gossip"
+ CLUSTER_SECRET: "must-be-a-long-and-random-secret-for-cluster-gossip"
# --- Networking for Host B ---
API_LISTEN_ADDR: "0.0.0.0:50051"
@@ -112,9 +112,9 @@ For large-scale, geographically distributed deployments, Anvil supports a multi-
1. **Set up a new PostgreSQL database** for the new region.
2. **Run the regional database migrations** against this new database.
-3. **Register the new region** in the global database using the admin CLI:
+3. **Register the new region** in the global database using the admin tool:
```bash
- anvil admin regions create --name
+ docker compose exec admin regions create
```
4. **Launch new Anvil peers** in the new geographical location, configuring them with:
* The shared `GLOBAL_DATABASE_URL`.
@@ -127,4 +127,4 @@ Anvil uses a **SWIM-like gossip protocol** to manage cluster membership. This is
- **Gossip:** Each peer periodically sends its state to a few other random peers. This information spreads virally through the cluster, ensuring that all nodes eventually converge on the same view of the cluster's membership.
- **Failure Detection:** If a peer fails to respond to pings from its neighbors, it is marked as "suspect." If it remains unresponsive, it is eventually removed from the cluster state by all peers.
-- **mDNS (Multicast DNS):** For local networks (like a single data center or a Docker network), Anvil can use mDNS to automatically discover peers without needing a bootstrap address. This is enabled by default (`ENABLE_MDNS=true`) and is useful for simplifying local development and testing.
+- **mDNS (Multicast DNS):** For local networks (like a single data center or a Docker network), Anvil can use mDNS to automatically discover peers without needing a bootstrap address. This is enabled by default (`ENABLE_MDNS=true`) and is useful for simplifying local development and testing.
\ No newline at end of file
diff --git a/docs/10-developer-guide-architecture.md b/docs/10-developer-guide-architecture.md
index db24033..f8949a6 100644
--- a/docs/10-developer-guide-architecture.md
+++ b/docs/10-developer-guide-architecture.md
@@ -9,7 +9,7 @@ tags: [developer-guide, architecture, principles, rust, quic, postgres]
> **TL;DR:** Anvil is a distributed system built on Rust, QUIC, and Postgres. It prioritizes operational simplicity and multi-tenancy, using erasure coding for durability and a gossip protocol for membership.
-This guide is for developers who want to contribute to Anvil or understand its internal workings. We begin with a high-level view of the system's architecture and the design decisions that shape it.
+This guide is for developers who want to understand Anvil's internal workings. We begin with a high-level view of the system's architecture and the design decisions that shape it.
### 10.1. Guiding Principles
@@ -23,8 +23,6 @@ Anvil's design is guided by a few core principles:
4. **Performance-First:** We prioritize performance by using modern, efficient technologies. This includes zero-copy I/O where possible, a fully asynchronous Rust codebase built on Tokio, and a high-performance QUIC-based network protocol.
-5. **Extensibility:** The system is designed to be a foundation for more than just storage. The concept of "Compute Capabilities" is a first-class citizen, paving the way for a unified data and compute fabric.
-
### 10.2. Core Technologies
The choice of technology is critical to achieving Anvil's design goals.
@@ -40,4 +38,4 @@ The choice of technology is critical to achieving Anvil's design goals.
* A **Global** database stores low-volume, high-importance data like tenants, buckets, and security policies.
* **Regional** databases store the high-volume object metadata for each region. This allows the most frequent queries (listing objects) to remain local to a region, enabling massive horizontal scaling.
-- **Tonic and Axum:** The API layer is built using the `tonic` framework for the gRPC API and the `axum` framework for the S3-compatible HTTP gateway. They are integrated into a single server process, allowing Anvil to serve both protocols from one application.
+- **Tonic and Axum:** The API layer is built using the `tonic` framework for the gRPC API and the `axum` framework for the S3-compatible HTTP gateway. They are integrated into a single server process, allowing Anvil to serve both protocols from one application.
\ No newline at end of file
diff --git a/docs/11-user-guide-hugging-face-ingestion.md b/docs/11-user-guide-hugging-face-ingestion.md
new file mode 100644
index 0000000..20822b2
--- /dev/null
+++ b/docs/11-user-guide-hugging-face-ingestion.md
@@ -0,0 +1,55 @@
+---
+slug: /anvil/user-guide/hugging-face-ingestion
+title: 'User Guide: Hugging Face Ingestion'
+description: Learn how to ingest models from the Hugging Face Hub directly into your Anvil cluster.
+tags: [user-guide, hugging-face, models, ingestion, cli]
+---
+
+# Chapter 11: Hugging Face Ingestion
+
+> **TL;DR:** Use the `anvil hf ingest` command to quickly and efficiently pull model repositories from the Hugging Face Hub and store them as objects in your Anvil cluster.
+
+Anvil provides a streamlined workflow for ingesting machine learning models directly from the Hugging Face Hub. This feature is designed to simplify the process of populating your object store with the models you need for your AI applications.
+
+### How It Works
+
+When you initiate an ingestion, Anvil performs the following steps:
+
+1. **API Interaction:** It communicates with the Hugging Face Hub API to get the list of all files associated with the specified model repository.
+2. **Concurrent Download:** It downloads the model files concurrently to maximize speed and efficiency.
+3. **Object Storage:** As each file is downloaded, it is streamed directly into your Anvil cluster as an object. The object key is automatically determined based on the file's path in the original repository.
+
+This process is significantly faster and more reliable than manually downloading files and then uploading them.
+
+### Using the `anvil`
+
+The primary way to use this feature is through the `anvil`.
+
+**Command**
+
+```bash
+anvil hf ingest --repo --bucket
+```
+
+- `--repo`: The ID of the repository on the Hugging Face Hub (e.g., `gpt2` or `stabilityai/stable-diffusion-2-1`).
+- `--bucket`: The name of the Anvil bucket where the model files will be stored.
+
+**Example**
+
+Let's say you want to ingest the original GPT-2 model into a bucket named `llm-models`.
+
+```bash
+# First, ensure the destination bucket exists
+anvil bucket create --name llm-models --region europe-west-1
+
+# Now, ingest the model
+anvil hf ingest --repo gpt2 --bucket llm-models
+```
+
+After the process completes, you can list the objects in your bucket to see the model files:
+
+```bash
+anvil object ls --path s3://llm-models/gpt2/
+```
+
+You will see all the files from the `gpt2` repository, such as `config.json`, `model.safetensors`, and `tokenizer.json`, stored as objects in your Anvil cluster.
diff --git a/docs/15-developer-guide-deep-dive-compute.md b/docs/15-developer-guide-deep-dive-compute.md
deleted file mode 100644
index fa1f72d..0000000
--- a/docs/15-developer-guide-deep-dive-compute.md
+++ /dev/null
@@ -1,78 +0,0 @@
----
-slug: /anvil/developer-guide/deep-dive/compute
-title: 'Deep Dive: Compute Capabilities'
-description: An exploration of Anvil's vision for unifying storage and compute, allowing peers to execute jobs directly on the data fabric.
-tags: [developer-guide, architecture, compute, jobs, scheduling]
----
-
-# Chapter 15: Deep Dive: Compute Capabilities
-
-> **TL;DR:** Peers can register compute capabilities. A scheduler uses HRW hashing to dispatch jobs, turning the storage fabric into a compute fabric.
-
-Worka Anvil is designed to be more than just a distributed storage system; it is a foundation for a unified **storage and compute fabric**. The architecture anticipates a world where the system not only stores data but also performs computations directly on that data, minimizing data movement and maximizing efficiency.
-
-While this feature is still in its early stages, the architectural groundwork and API definitions are already in place.
-
-### The Vision: Data-Local Compute
-
-The core idea is to bring the computation to the data. Instead of a client downloading a massive dataset, running a computation, and then uploading the result, the client can submit a **Job** to Anvil. Anvil then schedules this job to run on a peer that is best suited for the task, ideally one that already holds some or all of the required data.
-
-This is particularly powerful for workloads like:
-
-- **Machine Learning Inference:** A peer with a GPU can register an `inference` capability. A client can then submit a job with an image and a model name, and the peer will run the inference and return the result.
-- **Data Transformation:** A job could be submitted to resize an image, transcode a video, or convert a CSV file to Parquet format.
-- **Complex Queries:** A job could perform a complex analysis over a large dataset stored in Anvil, returning only the final, aggregated result.
-
-### Architectural Components
-
-The implementation of this vision relies on a few key components defined in the database schema and gRPC API.
-
-#### 1. Capability Registry
-
-The `compute_capabilities` table in the global PostgreSQL database acts as a registry for the compute resources available in the cluster.
-
-```sql
-CREATE TABLE compute_capabilities (
- peer_id UUID NOT NULL,
- region TEXT NOT NULL,
- capability TEXT NOT NULL, -- e.g., "inference:llama3", "video:transcode:h264"
- resources JSONB, -- e.g., {"gpu": "true", "memory_gb": 16}
- max_concurrency INT,
- PRIMARY KEY(peer_id, capability)
-);
-```
-
-When a peer with compute resources comes online, it registers its capabilities in this table. This allows the scheduler to discover which nodes can perform which tasks.
-
-#### 2. The `ComputeService` gRPC API
-
-The `anvil.proto` file defines the `ComputeService`, which is the user-facing entry point for submitting and managing jobs.
-
-```proto
-service ComputeService {
- rpc RegisterCapability(RegisterCapabilityRequest) returns (RegisterCapabilityResponse);
- rpc SubmitJob(SubmitJobRequest) returns (SubmitJobResponse);
- rpc GetJobStatus(GetJobStatusRequest) returns (GetJobStatusResponse);
-}
-```
-
-- `RegisterCapability`: Called by a compute peer on startup to advertise its capabilities.
-- `SubmitJob`: Called by a client to request a computation.
-- `GetJobStatus`: Called by a client to check on the progress of a submitted job.
-
-#### 3. Job Scheduling
-
-When a client calls `SubmitJob`, the Anvil scheduler performs the following steps:
-
-1. **Filter Peers:** It queries the `compute_capabilities` table to find all peers that have registered the required capability (e.g., `video:transcode:h264`) and are in the desired region.
-2. **Select a Peer:** It uses **Rendezvous Hashing (HRW)**, similar to shard placement, to select a peer from the filtered list. This provides load balancing and deterministic scheduling. The hashing could be influenced by the job's input data key, promoting data locality.
-3. **Dispatch Job:** The scheduler sends the job to the selected peer.
-
-#### 4. Job Execution
-
-The peer that receives the job is responsible for executing it. The execution environment is designed to be pluggable, but the primary methods would be:
-
-- **Containers (Podman/Docker):** For complex, non-sandboxed workloads, the peer could use a container runtime to execute the job.
-- **WASM Runtimes:** For secure, sandboxed, and portable compute, WebAssembly is an ideal choice.
-
-If the job requires a model or other large assets, the peer can fetch them directly from Anvil's storage layer, benefiting from the distributed and data-local nature of the system.
diff --git a/docs/16-developer-guide-contributing.md b/docs/16-developer-guide-contributing.md
deleted file mode 100644
index a3f3678..0000000
--- a/docs/16-developer-guide-contributing.md
+++ /dev/null
@@ -1,93 +0,0 @@
----
-slug: /anvil/developer-guide/contributing
-title: 'Developer Guide: Contributing to Anvil'
-description: A guide for developers who want to contribute to the Anvil project, covering setup, testing, and contribution guidelines.
-tags: [developer-guide, contributing, development, testing, github]
----
-
-# Chapter 16: Contributing to Anvil
-
-> **TL;DR:** Set up your development environment, run the test suites, and follow our contribution guidelines.
-
-Anvil is an open-source project, and we welcome contributions from the community. This guide provides the information you need to get your development environment set up, run the tests, and prepare your first contribution.
-
-### 16.1. Building from Source
-
-**Prerequisites:**
-
-- **Rust:** Anvil is built with the Rust 2024 edition. We recommend installing Rust via [rustup](https://rustup.rs/).
-- **PostgreSQL:** You will need a running PostgreSQL server for the databases.
-- **Docker:** The integration tests use Docker to manage test environments.
-
-**Steps to Build:**
-
-1. **Clone the Repository:**
-
- ```bash
- git clone https://github.com/worka-ai/anvil.git
- cd anvil
- ```
-
-2. **Set up Databases:**
- For development, you can use the provided `docker-compose.yml` to spin up the necessary PostgreSQL instances.
-
- ```bash
- docker-compose up -d postgres-global postgres-regional
- ```
-
-3. **Configure Environment:**
- Copy the `.env.example` file to `.env` and fill in the database URLs and other required secrets.
-
-4. **Build the Project:**
-
- ```bash
- cargo build
- ```
-
-### 16.2. Running the Test Suite
-
-Anvil has a comprehensive test suite that covers unit tests, integration tests, and end-to-end cluster tests.
-
-**Unit Tests:**
-
-These are fast tests that check individual components in isolation.
-
-```bash
-cargo test --lib
-```
-
-**Integration Tests:**
-
-These tests spin up a test cluster, including isolated databases, to test the interaction between different components. They are located in the `tests/` directory.
-
-```bash
-cargo test --test '*'
-```
-
-> **Note:** The integration tests require Docker to be running, as they create isolated PostgreSQL instances for each test run to ensure a clean environment.
-
-**End-to-End Docker Cluster Test:**
-
-The `docker_cluster_test` is a full end-to-end test that uses Docker Compose to build and run a multi-node cluster, then interacts with it using both the gRPC and S3 APIs. It is the most comprehensive test of the system.
-
-```bash
-cargo test --test docker_cluster_test
-```
-
-### 16.3. Code Style and Contribution Guidelines
-
-1. **Code Formatting:** All code should be formatted with `rustfmt`. You can run this with `cargo fmt`.
-
-2. **Linting:** We use `clippy` for linting. Please run `cargo clippy --all-targets --all-features -- -D warnings` to check for any issues before submitting your code.
-
-3. **Commit Messages:** Please follow the [Conventional Commits](https://www.conventionalcommits.org/) specification for your commit messages. This helps us maintain a clear and readable commit history.
- * Example: `feat(storage): Add support for tiered storage`
- * Example: `fix(s3): Correctly handle URL encoding in object keys`
-
-4. **Pull Requests:**
- * Create your changes on a new branch.
- * Ensure all tests pass before submitting.
- * Provide a clear description of the changes in your pull request.
- * If your change is user-facing, please include updates to the relevant documentation.
-
-We look forward to your contributions!
diff --git a/docs/17-appendices-docker-compose.md b/docs/17-appendices-docker-compose.md
index 9b3d4dd..da2bc09 100644
--- a/docs/17-appendices-docker-compose.md
+++ b/docs/17-appendices-docker-compose.md
@@ -1,17 +1,17 @@
---
slug: /anvil/appendices/docker-compose
title: 'Appendix A: Docker Compose Reference'
-description: A reference copy of the `docker-compose.yml` file for deploying a multi-node Anvil cluster.
+description: A reference copy of the `docker-compose.yml` file for deploying a multi-node Anvil cluster for testing.
tags: [appendices, docker, configuration]
---
# Appendix A: Docker Compose Reference
-This appendix contains the full `docker-compose.yml` file used for setting up a multi-node Anvil cluster for development and testing. It demonstrates how to configure multiple Anvil peers, connect them to shared databases, and set up the necessary networking and environment variables.
+This appendix contains a full `docker-compose.yml` file used for setting up a multi-node Anvil cluster for development and testing. It demonstrates how to configure multiple Anvil peers, connect them to shared databases, and set up the necessary networking and environment variables. This is a more complex setup than the single-node example in the Getting Started guide.
### Single-Node Development
-For a simpler, single-node setup, you can refer to the version in the [Getting Started](/docs/anvil/getting-started) guide.
+For a simpler, single-node setup, you can refer to the version in the [Getting Started](../getting-started) guide.
### Multi-Node Cluster (`docker-compose.yml`)
@@ -56,7 +56,7 @@ services:
retries: 5
anvil1:
- build: .
+ image: ghcr.io/worka-ai/anvil:v2025.11.14-001012
depends_on:
postgres-global:
condition: service_healthy
@@ -70,7 +70,7 @@ services:
# --- CRITICAL: SET THESE TO SECURE, RANDOMLY GENERATED VALUES ---
JWT_SECRET: "must-be-a-long-and-random-secret-for-signing-jwts"
ANVIL_SECRET_ENCRYPTION_KEY: "must-be-a-64-character-hex-string-generate-with-openssl-rand-hex-32"
- ANVIL_CLUSTER_SECRET: "must-be-a-long-and-random-secret-for-cluster-gossip"
+ CLUSTER_SECRET: "must-be-a-long-and-random-secret-for-cluster-gossip"
# --- Networking Configuration ---
# These addresses MUST be reachable by other nodes and clients.
# In a real deployment, replace 203.0.113.1 with the node's public IP address.
@@ -91,7 +91,7 @@ services:
retries: 5
anvil2:
- build: .
+ image: ghcr.io/worka-ai/anvil:v2025.11.14-001012
depends_on:
anvil1:
condition: service_started
@@ -102,7 +102,7 @@ services:
REGION: "europe-west-1"
JWT_SECRET: "must-be-a-long-and-random-secret-for-signing-jwts"
ANVIL_SECRET_ENCRYPTION_KEY: "must-be-a-64-character-hex-string-generate-with-openssl-rand-hex-32"
- ANVIL_CLUSTER_SECRET: "must-be-a-long-and-random-secret-for-cluster-gossip"
+ CLUSTER_SECRET: "must-be-a-long-and-random-secret-for-cluster-gossip"
API_LISTEN_ADDR: "0.0.0.0:50051"
CLUSTER_LISTEN_ADDR: "/ip4/0.0.0.0/udp/7444/quic-v1"
PUBLIC_CLUSTER_ADDRS: "/ip4/203.0.113.2/udp/7444/quic-v1"
@@ -115,7 +115,7 @@ services:
networks: [anvilnet]
anvil3:
- build: .
+ image: ghcr.io/worka-ai/anvil:v2025.11.14-001012
depends_on:
anvil1:
condition: service_started
@@ -126,7 +126,7 @@ services:
REGION: "europe-west-1"
JWT_SECRET: "must-be-a-long-and-random-secret-for-signing-jwts"
ANVIL_SECRET_ENCRYPTION_KEY: "must-be-a-64-character-hex-string-generate-with-openssl-rand-hex-32"
- ANVIL_CLUSTER_SECRET: "must-be-a-long-and-random-secret-for-cluster-gossip"
+ CLUSTER_SECRET: "must-be-a-long-and-random-secret-for-cluster-gossip"
API_LISTEN_ADDR: "0.0.0.0:50051"
CLUSTER_LISTEN_ADDR: "/ip4/0.0.0.0/udp/7445/quic-v1"
PUBLIC_CLUSTER_ADDRS: "/ip4/203.0.113.3/udp/7445/quic-v1"
@@ -141,4 +141,4 @@ services:
volumes:
postgres_global_data:
postgres_regional_data:
-```
+```
\ No newline at end of file
diff --git a/docs/19-appendices-postgres-schema.md b/docs/19-appendices-postgres-schema.md
deleted file mode 100644
index 0ea6789..0000000
--- a/docs/19-appendices-postgres-schema.md
+++ /dev/null
@@ -1,198 +0,0 @@
----
-slug: /anvil/appendices/postgres-schema
-title: 'Appendix C: Postgres Schema DDL'
-description: A reference copy of the SQL Data Definition Language (DDL) for Anvil's global and regional databases.
-tags: [appendices, postgres, schema, sql, ddl]
----
-
-# Appendix C: Postgres Schema DDL
-
-This appendix provides the complete SQL schema for both the global and regional PostgreSQL databases used by Anvil. This is useful for understanding the underlying data model and for setting up databases manually.
-
-### Global Database Schema
-
-This schema defines the tables for globally relevant data, such as tenants, buckets, apps, and policies. All nodes in a deployment connect to this single database.
-
-```sql
--- From migrations_global/V1__initial_global_schema.sql
-
-CREATE TABLE regions (
- id BIGSERIAL PRIMARY KEY,
- name TEXT UNIQUE NOT NULL,
- created_at TIMESTAMPTZ NOT NULL DEFAULT now()
-);
-
-CREATE TABLE tenants (
- id BIGSERIAL PRIMARY KEY,
- name TEXT UNIQUE NOT NULL,
- api_key TEXT NOT NULL,
- created_at TIMESTAMPTZ NOT NULL DEFAULT now()
-);
-
-CREATE TABLE buckets (
- id BIGSERIAL PRIMARY KEY,
- tenant_id BIGINT NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
- name TEXT NOT NULL UNIQUE,
- region TEXT NOT NULL REFERENCES regions(name) ON DELETE CASCADE,
- is_public_read BOOLEAN NOT NULL DEFAULT false,
- created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
- deleted_at TIMESTAMPTZ
-);
-
-CREATE TABLE apps (
- id BIGSERIAL PRIMARY KEY,
- tenant_id BIGINT NOT NULL REFERENCES tenants(id) ON DELETE CASCADE,
- name TEXT NOT NULL,
- client_id TEXT UNIQUE NOT NULL,
- client_secret_encrypted BYTEA NOT NULL,
- created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
- UNIQUE(tenant_id, name)
-);
-
-CREATE TABLE policies (
- id BIGSERIAL PRIMARY KEY,
- app_id BIGINT NOT NULL REFERENCES apps(id) ON DELETE CASCADE,
- resource TEXT NOT NULL, -- e.g., "my-bucket/folder/*"
- action TEXT NOT NULL, -- e.g., "read", "write", "grant"
- created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
- UNIQUE(app_id, resource, action)
-);
-
-CREATE TYPE task_status AS ENUM ('pending', 'running', 'completed', 'failed');
-CREATE TYPE task_type AS ENUM ('DELETE_OBJECT', 'DELETE_BUCKET', 'REBALANCE_SHARD');
-
-CREATE TABLE tasks (
- id BIGSERIAL PRIMARY KEY,
- task_type task_type NOT NULL,
- payload JSONB NOT NULL,
-
- -- Scheduling and Execution
- priority INT NOT NULL DEFAULT 100, -- Lower is higher priority
- status task_status NOT NULL DEFAULT 'pending',
- scheduled_at TIMESTAMPTZ NOT NULL DEFAULT now(),
-
- -- Error Handling & Retries
- attempts INT NOT NULL DEFAULT 0,
- last_error TEXT,
-
- -- Timestamps
- created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
- updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
-);
-
--- Indexes for efficient polling
-CREATE INDEX idx_tasks_fetch_pending ON tasks (priority, scheduled_at)
- WHERE status = 'pending';
-```
-
-### Regional Database Schema
-
-This schema defines the tables for object metadata. Each region in a deployment has its own independent database with this schema.
-
-```sql
--- From migrations_regional/V1__initial_regional_schema.sql
-
--- For text search capabilities
-CREATE EXTENSION IF NOT EXISTS pg_trgm;
--- For hierarchical path queries
-CREATE EXTENSION IF NOT EXISTS ltree;
--- For UUID generation
-CREATE EXTENSION IF NOT EXISTS pgcrypto;
-
-CREATE TABLE objects (
- id BIGSERIAL PRIMARY KEY,
- -- This is a reference to a bucket in the global database.
- -- There is no foreign key constraint as it crosses databases.
- bucket_id BIGINT NOT NULL,
- tenant_id BIGINT NOT NULL,
- key TEXT NOT NULL,
- key_ltree LTREE,
-
- -- The BLAKE3 hash of the object's content, used for content-addressing
- content_hash TEXT NOT NULL,
-
- size BIGINT NOT NULL,
- etag TEXT NOT NULL,
- content_type TEXT,
-
- -- For versioning, though we won't implement versioning logic in Phase 1
- version_id UUID NOT NULL DEFAULT gen_random_uuid(),
-
- created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
-
- -- For future use
- storage_class SMALLINT,
- user_meta JSONB,
-
- -- In a distributed setup, this would map shards to peers
- shard_map JSONB,
-
- checksum BYTEA,
- deleted_at TIMESTAMPTZ,
- -- An object is uniquely identified by its bucket, key, and version
- UNIQUE(bucket_id, key, version_id)
-);
-
--- One-time helper to turn a TEXT key into a safe ltree
-CREATE OR REPLACE FUNCTION make_key_ltree(p_key text)
- RETURNS ltree
- LANGUAGE sql
- IMMUTABLE
-AS $$
-WITH cleaned AS (
- SELECT regexp_replace(trim(both '/' from coalesce(p_key, '')), '/+', '/', 'g') AS k
-),
- segs AS (
- SELECT unnest(string_to_array(k, '/')) AS seg
- FROM cleaned
- ),
- norm AS (
- SELECT
- -- keep only letters/digits/underscore, lowercased
- lower(regexp_replace(seg, '[^A-Za-z0-9_]', '_', 'g')) AS s
- FROM segs
- WHERE seg <> '' -- drop empties
- ),
- head_fixed AS (
- SELECT
- CASE
- WHEN s ~ '^[a-z]' THEN s -- starts with a letter already
- WHEN s = '' THEN 'x' -- degenerate -> x
- ELSE 'x' || s -- make it start with a letter
- END AS lbl
- FROM norm
- ),
- joined AS (
- SELECT array_to_string(array_agg(lbl), '.') AS dot
- FROM head_fixed
- )
-SELECT CASE
- WHEN dot IS NULL OR dot = '' THEN NULL
- ELSE text2ltree(dot)
- END
-FROM joined;
-$$;
-
--- Use the helper in your trigger
-CREATE OR REPLACE FUNCTION update_key_ltree()
- RETURNS TRIGGER AS $$
-BEGIN
- NEW.key_ltree := make_key_ltree(NEW.key);
- RETURN NEW;
-END;
-$$ LANGUAGE plpgsql;
-
-DROP TRIGGER IF EXISTS objects_update_key_ltree_trigger ON objects;
-CREATE TRIGGER objects_update_key_ltree_trigger
- BEFORE INSERT OR UPDATE ON objects
- FOR EACH ROW
-EXECUTE FUNCTION update_key_ltree();
-
--- Indexes for efficient querying
-CREATE INDEX idx_objects_bucket_key ON objects(bucket_id, key);
-CREATE INDEX idx_objects_ltree ON objects USING GIST(key_ltree);
-CREATE INDEX idx_objects_trgm ON objects USING GIN(key gin_trgm_ops);
-CREATE INDEX idx_objects_created_at ON objects USING BRIN(created_at);
-
-CREATE INDEX idx_objects_not_deleted ON objects (bucket_id, key) WHERE deleted_at IS NULL;
-```