Skip to content

Commit

Permalink
Merge pull request #18 from datascience/dev
Browse files Browse the repository at this point in the history
Improvements
  • Loading branch information
artourkin authored Jan 5, 2025
2 parents ee6edc8 + 877dbe6 commit 39016d8
Show file tree
Hide file tree
Showing 25 changed files with 974 additions and 305 deletions.
105 changes: 105 additions & 0 deletions .docker-compose/docker-compose.clickhouse.lb.dev.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
version: '3'

services:

fits:
build:
context: ../fits
dockerfile: ../fits/Dockerfile
container_name: fits
env_file: ../.env
networks:
- web
restart: unless-stopped
ports:
- 8081:8080

rest:
build:
context: ..
dockerfile: ../Dockerfile
env_file: ../.env
networks:
- web
restart: unless-stopped
environment:
- SPRING_PROFILES_ACTIVE=clickhouse
deploy:
replicas: 1
ports:
- 8092:8080
depends_on:
- fits
- db-docker

web:
build:
context: ../web
dockerfile: ../web/Dockerfile
container_name: web
env_file: ../.env
networks:
- web
restart: unless-stopped
ports:
- 8080:3000

db-docker:
image: yandex/clickhouse-server
container_name: db-docker
networks:
- web
ports:
- 8123:8123
- 9000:9000
- 9004:9004


db-docker-init:
image: yandex/clickhouse-server
container_name: db-docker-init
volumes:
- ./config/clickhouse:/var/clickhouse
depends_on:
- db-docker
networks:
- web
entrypoint: [ '/bin/sh', '-c' ]
command: |
"
while ! clickhouse-client --host db-docker -q \"SHOW databases;\"; do
echo waiting for clickhouse up
sleep 1
done
clickhouse-client --host db-docker --queries-file /var/clickhouse/initdb.sql
tail -f /dev/null
"
adminer:
image: adminer
container_name: adminer
env_file: ../.env
restart: unless-stopped
networks:
- web
ports:
- 8090:8080

nginx:
image: nginx
container_name: nginx
env_file: ../.env
volumes:
- ./config/nginx/nginx.conf:/etc/nginx/conf.d/default.conf
ports:
- 8082:80
networks:
- web
depends_on:
- rest

networks:
web:
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM maven:3.9.0 as builder
COPY . /app
WORKDIR /app
RUN --mount=type=cache,target=/root/.m2 mvn -pl -web clean install
RUN --mount=type=cache,target=/root/.m2 mvn -pl -web clean install -Dspring.profiles.active=h2

FROM openjdk:21-jdk-slim
WORKDIR /app
Expand Down
93 changes: 66 additions & 27 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,58 +1,97 @@
# FITSInn

Place where your FITS files feel good.

## Purpose

FITSInn is a tool to store and analyse technical metadata extracted by characterisation tools such as [FITS](https://projects.iq.harvard.edu/fits/).
![img.png](docs/img.png)
FITSInn is an easy-to-use tool for storing and analyzing technical metadata extracted by characterisation tools
like [FITS](https://projects.iq.harvard.edu/fits/).

![FITSInn Screenshot](docs/img.png)

The features include:
* Improved user experience through an intuitive UI.
* Running file characterisation using FITS on the uploaded files.
* The original files are not stored.
* Slice and dice:
* Filtering,
* Drill-down,
* Property value aggregations,
* Distribution visualisations,
* Sampling.
* Built-in metadata conflict resolution.
* REST API for workflow automation.
### Key Features:

- **Intuitive User Interface**: Enhanced user experience through a sleek and streamlined interface.
- **File Characterisation**: Analyze uploaded files using FITS without storing the original files.
- **Data Analysis Tools**:
- Advanced filtering,
- Drill-down capabilities,
- Property value aggregations,
- Distribution visualizations,
- Sampling options.
- **Conflict Management**: Resolve metadata conflicts effortlessly.
- **Automation Support**: Comes with a REST API to integrate into your workflows.

---

## Installation

### Deployment
### Deployment (Production)

The artifacts are released as Docker images. You can install FITSInn using Docker-compose:
To deploy FITSInn, use the Docker images provided and execute the following docker-compose command:

```
```bash
docker-compose -f docker-compose.yaml up --pull
```

Installation of FITSInn to Docker Swarm or K8S is possible, but is not currently addressed.
> **Note**:
> - Deployment to Docker Swarm or Kubernetes (K8S) is possible but not covered in this guide.

### Upgrade (Production)

### Local build
To upgrade FITSInn, use the Docker images provided:

Building the Docker images from scratch and starting FITSInn is executed via:
```bash
docker-compose -f docker-compose.yaml down
docker-compose -f docker-compose.yaml up --pull
```

> **Note**:
> - The first command will remove all previously created containers. This will cause deletion of all ingested collections.


### Local Development Build

To build the Docker images from scratch and start FITSInn locally:

```bash
docker-compose -f docker-compose.dev.yaml up --build
```

File uploading using bash:
```
---

### Uploading Files to FITSInn

#### Using Bash:

```bash
bash ./utils/fileupload.sh http://localhost:8082 ~/path/to/files collection_name
```

File uploading using python (pip package requests in necessary):
```
#### Using Python:

Ensure you have the `requests` library installed. Then run:

```python
python ./utils/fileupload.py http://localhost:8082/multipleupload ~/path/to/files 100 3 collection_name
```

## Issues
- **URL**: `http://localhost:8082` is suitable for local deployments.
- **Path to Files**: Replace `~/path/to/files` with the actual directory path containing the files.
- **Collection Name**: Replace `collection_name` with a name for your collection.

---

## Reporting Issues

If you encounter any issues while using FITSInn, please report them on GitHub:

[Submit an Issue](https://github.com/datascience/fitsinn/issues)

If you have any issue regarding FITSInn, please use [https://github.com/datascience/fitsinn/issues](https://github.com/datascience/fitsinn/issues).
---

## License

FITSInn is released under MIT license. See the [LICENSE](LICENSE) for details.
FITSInn is released under the MIT license. For more details, see the [LICENSE](LICENSE) file.
40 changes: 38 additions & 2 deletions config/clickhouse/initdb.sql
Original file line number Diff line number Diff line change
@@ -1,10 +1,46 @@
CREATE TABLE characterisationresult
CREATE DATABASE IF NOT EXISTS current;

CREATE TABLE IF NOT EXISTS current.characterisationresult
(
file_path String,
property String,
source String,
property_value String,
value_type String
) ENGINE = ReplacingMergeTree
)
ENGINE = ReplacingMergeTree
PRIMARY KEY (source, property, file_path)
ORDER BY (source, property, file_path);

CREATE TABLE IF NOT EXISTS current.agg_characterisationresult
(
property String,
file_path String,
unique_values AggregateFunction(uniq, String),
any_value AggregateFunction(any, String)
)
ENGINE = AggregatingMergeTree
ORDER BY (property, file_path);

CREATE MATERIALIZED VIEW IF NOT EXISTS current.mv_characterisationresult
TO current.agg_characterisationresult
AS
SELECT
property,
file_path,
uniqState(property_value) AS unique_values,
anyState(property_value) AS any_value
FROM current.characterisationresult
GROUP BY property, file_path;

CREATE VIEW IF NOT EXISTS current.characterisationresultaggregated
AS
SELECT
property,
file_path,
CASE
WHEN finalizeAggregation(unique_values) = 1
THEN finalizeAggregation(any_value)
ELSE 'CONFLICT'
END AS property_value
FROM current.agg_characterisationresult;
8 changes: 8 additions & 0 deletions core/src/main/java/rocks/artur/api/RemoveDataset.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package rocks.artur.api;

/**
* This interface enables getting a property distribution.
*/
public interface RemoveDataset {
Boolean removeDataset(String datasetName);
}
18 changes: 18 additions & 0 deletions core/src/main/java/rocks/artur/api_impl/RemoveDatasetImpl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package rocks.artur.api_impl;

import rocks.artur.api.RemoveDataset;
import rocks.artur.domain.CharacterisationResultGateway;

public class RemoveDatasetImpl implements RemoveDataset {

private CharacterisationResultGateway characterisationResultGateway;

public RemoveDatasetImpl(CharacterisationResultGateway characterisationResultGateway) {
this.characterisationResultGateway = characterisationResultGateway;
}

@Override
public Boolean removeDataset(String datasetName) {
return characterisationResultGateway.removeDataset(datasetName);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,6 @@ public interface CharacterisationResultGateway {
void resolveConflictsNative(String datasetName);

List<String> listDatasets();

Boolean removeDataset(String datasetName);
}
28 changes: 2 additions & 26 deletions docker-compose.clickhouse.dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ services:
- web
restart: unless-stopped
environment:
- DB_SELECTOR=clickhouse
- SPRING_PROFILES_ACTIVE=clickhouse
deploy:
replicas: 1
ports:
- 8092:8080
- 8082:8080
depends_on:
- fits
- db-docker
Expand Down Expand Up @@ -77,29 +77,5 @@ services:
tail -f /dev/null
"
adminer:
image: adminer
container_name: adminer
env_file: .env
restart: unless-stopped
networks:
- web
ports:
- 8090:8080

nginx:
image: nginx
container_name: nginx
env_file: .env
volumes:
- ./config/nginx/nginx.conf:/etc/nginx/conf.d/default.conf
ports:
- 8082:80
networks:
- web
depends_on:
- rest

networks:
web:
2 changes: 1 addition & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ services:
- web
restart: unless-stopped
environment:
- DB_SELECTOR=clickhouse
- SPRING_PROFILES_ACTIVE=clickhouse
deploy:
replicas: 1
ports:
Expand Down
Loading

0 comments on commit 39016d8

Please sign in to comment.