Imageomics · zhong-al · Oct 11, 2024 · Oct 11, 2024 · Oct 11, 2024 · Oct 11, 2024
diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml
@@ -0,0 +1,34 @@
+name: macos test
+
+on:
+  push:
+
+jobs:
+  build:
+    runs-on: macos-latest
+    strategy:
+      matrix:
+        python-version: ["3.10"]
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip setuptools wheel
+        pip install -r requirements.txt
+        python -m pip install hatchling 
+        python -m pip install --no-build-isolation .
+    - name: Running unit tests
+      env:
+        HF_TOKEN: ${{ secrets.HF_TOKEN_TESTING }}
+      run: |
+        python -m unittest tests/test_cvat2slowfast.py
+        python -m unittest tests/test_cvat2ultralytics.py
+        python -m unittest tests/test_detector2cvat.py
+        python -m unittest tests/test_miniscene2behavior.py
+        python -m unittest tests/test_player.py
+        python -m unittest tests/test_tracks_extractor.py
diff --git a/.github/workflows/test.yml → .github/workflows/ubuntu.yml b/.github/workflows/test.yml → .github/workflows/ubuntu.yml
@@ -1,4 +1,4 @@
-name: Test
+name: ubuntu test
 
 on:
   push:
@@ -31,4 +31,4 @@ jobs:
         python -m unittest tests/test_detector2cvat.py
         python -m unittest tests/test_miniscene2behavior.py
         python -m unittest tests/test_player.py
-        python -m unittest tests/test_tracks_extractor.py
+        python -m unittest tests/test_tracks_extractor.py
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
@@ -0,0 +1,34 @@
+name: windows test
+
+on:
+  push:
+
+jobs:
+  build:
+    runs-on: windows-latest
+    strategy:
+      matrix:
+        python-version: ["3.10"]
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip setuptools wheel
+        pip install -r requirements.txt
+        python -m pip install hatchling 
+        python -m pip install --no-build-isolation .
+    - name: Running unit tests
+      env:
+        HF_TOKEN: ${{ secrets.HF_TOKEN_TESTING }}
+      run: |
+        python -m unittest tests/test_cvat2slowfast.py
+        python -m unittest tests/test_cvat2ultralytics.py
+        python -m unittest tests/test_detector2cvat.py
+        python -m unittest tests/test_miniscene2behavior.py
+        python -m unittest tests/test_player.py
+        python -m unittest tests/test_tracks_extractor.py
diff --git a/README.md b/README.md
@@ -18,9 +18,6 @@ pip install git+https://github.com/Imageomics/kabr-tools
 
 **Notes:**
  - Refer to [pytorch.org](https://pytorch.org/get-started/locally/) to install specific versions of torch/CUDA
- - [detectron2](https://detectron2.readthedocs.io/en/latest/tutorials/install.html#requirements) requires Linux or MacOS.
- - If building detectron2's wheel fails, check gcc & g++ ≥ 5.4 (run `gcc --version` and `g++ --version`).
- - SlowFast's setup.py is outdated; our workaround is `pip install git+https://github.com/Imageomics/SlowFast@797a6f3ae81c49019d006296f1e0f84f431dc356`, which is included when installing `kabr_tools`.
 
 Each KABR tool can be run through the command line (as described below) or imported as a python module. They each have help information which can be accessed on the command line through `<tool-name> -h`.
 
@@ -95,17 +92,26 @@ tracks_extractor --video path_to_videos --annotation path_to_annotations [--trac
 ## Step 3: Label mini-scenes with behavior 
 You can use the [KABR model](https://huggingface.co/imageomics/x3d-kabr-kinetics) to label the mini-scenes with behavior. See the [ethogram](ethogram) folder for the list of behaviors used to label the zebra videos.
 
-
-To use the [KABR model](https://huggingface.co/imageomics/x3d-kabr-kinetics), download `checkpoint_epoch_00075.pyth.zip`, unzip `checkpoint_epoch_00075.pyth`, and install [SlowFast](https://github.com/facebookresearch/SlowFast). Then run [miniscene2behavior.py](miniscene2behavior.py).
-
 Label the mini-scenes:
 ```
-miniscene2behavior [--config path_to_config] --checkpoint path_to_checkpoint [--gpu_num number_of_gpus] --miniscene path_to_miniscene [--output path_to_output_csv]
+miniscene2behavior [--hub huggingface_hub] [--config path_to_config] --checkpoint path_to_checkpoint [--gpu_num number_of_gpus] --miniscene path_to_miniscene [--output path_to_output_csv]
+```
+
+**Examples:**
+ - download checkpoint from huggingface and extract config ex: 
+ ```
+miniscene2behavior --hub imageomics/x3d-kabr-kinetics --checkpoint checkpoint_epoch_00075.pyth.zip --miniscene path_to_miniscene
+ ```
+ - download checkpoint and config from huggingface ex: 
+```
+miniscene2behavior --hub imageomics/x3d-kabr-kinetics --config config.yml --checkpoint checkpoint_epoch_00075.pyth --miniscene path_to_miniscene
+```
+ - use local checkpoint and config ex: 
+```
+miniscene2behavior --config config.yml --checkpoint checkpoint_epoch_00075.pyth --miniscene path_to_miniscene
 ```
 
 **Notes:**
- - If the config hasn't been extracted yet, the script will write it to `config`. 
- - `checkpoint` should be the path to `checkpoint_epoch_00075.pyth`. 
  - If `gpu_num` is 0, the model will use CPU. Using at least 1 GPU greatly increases inference speed. If you're using OSC, you can request a node with one GPU by running `sbatch -N 1 --gpus-per-node 1 -A [account] --time=[minutes] [bash script]`.
  - mini-scenes are clipped videos focused on individual animals and video is the raw video file from which mini-scenes have been extracted.
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -31,13 +31,13 @@ dependencies = [
     "lxml",
     "tqdm",
     "torch",
+    "fvcore",
     "natsort",
     "ruamel.yaml",
     "ultralytics",
     "pandas",
-    "detectron2 @ git+https://github.com/facebookresearch/detectron2.git@2a420edb307c9bdf640f036d3b196bed474b8593",
-    "pytorchvideo @ git+https://github.com/facebookresearch/pytorchvideo.git@1fadaef40dd393ca09680f55582399f4679fc9b7",
-    "slowfast @ git+https://github.com/Imageomics/SlowFast@797a6f3ae81c49019d006296f1e0f84f431dc356"
+    "huggingface_hub",
+    "pytorchvideo @ git+https://github.com/facebookresearch/pytorchvideo.git@1fadaef40dd393ca09680f55582399f4679fc9b7"
 ]
 keywords = [
     "annotation",

diff --git a/src/kabr_tools/cvat2slowfast.py b/src/kabr_tools/cvat2slowfast.py
@@ -8,6 +8,7 @@
 import pandas as pd
 from natsort import natsorted
 import cv2
+from kabr_tools.utils.path import join_paths
 
 
 def cvat2slowfast(path_to_mini_scenes: str, path_to_new_dataset: str,
@@ -24,13 +25,16 @@ def cvat2slowfast(path_to_mini_scenes: str, path_to_new_dataset: str,
     if not os.path.exists(path_to_new_dataset):
         os.makedirs(path_to_new_dataset)
 
-    if not os.path.exists(f"{path_to_new_dataset}/annotation"):
-        os.makedirs(f"{path_to_new_dataset}/annotation")
+    annotation_path = join_paths(path_to_new_dataset, "annotation")
+    if not os.path.exists(annotation_path):
+        os.makedirs(annotation_path)
 
-    if not os.path.exists(f"{path_to_new_dataset}/dataset/image"):
-        os.makedirs(f"{path_to_new_dataset}/dataset/image")
+    image_path = join_paths(path_to_new_dataset, "dataset", "image")
+    if not os.path.exists(image_path):
+        os.makedirs(image_path)
 
-    with open(f"{path_to_new_dataset}/annotation/classes.json", "w") as file:
+    classes_path = join_paths(annotation_path, "classes.json")
+    with open(classes_path, "w", encoding="utf-8") as file:
         json.dump(label2number, file)
 
     headers = {"original_vido_id": [], "video_id": pd.Series(dtype="int"), "frame_id": pd.Series(dtype="int"),
@@ -41,11 +45,12 @@ def cvat2slowfast(path_to_mini_scenes: str, path_to_new_dataset: str,
     flag = False
 
     for i, folder in enumerate(natsorted(os.listdir(path_to_mini_scenes))):
-        if os.path.exists(f"{path_to_mini_scenes}/{folder}/actions"):
-            for j, file in enumerate(natsorted(os.listdir(f"{path_to_mini_scenes}/{folder}/actions"))):
+        actions_path = join_paths(path_to_mini_scenes, folder, "actions")
+        if os.path.exists(actions_path):
+            for j, file in enumerate(natsorted(os.listdir(actions_path))):
                 if os.path.splitext(file)[1] == ".xml":
-                    annotation_file = f"{path_to_mini_scenes}/{folder}/actions/{file}"
-                    video_file = f"{path_to_mini_scenes}/{folder}/{os.path.splitext(file)[0]}.mp4"
+                    annotation_file = join_paths(actions_path, file)
+                    video_file = join_paths(path_to_mini_scenes, folder, f"{os.path.splitext(file)[0]}.mp4")
 
                     if not os.path.exists(video_file):
                         print(f"{video_file} does not exist.")
@@ -91,7 +96,7 @@ def cvat2slowfast(path_to_mini_scenes: str, path_to_new_dataset: str,
 
                     folder_code = f"{label[0].capitalize()}{folder_name:04d}"
                     folder_name += 1
-                    output_folder = f"{path_to_new_dataset}/dataset/image/{folder_code}"
+                    output_folder = join_paths(image_path, folder_code)
                     progress = f"{i + 1}/{len(os.listdir(path_to_mini_scenes))}," \
                                f"{j + 1}/{len(os.listdir(f'{path_to_mini_scenes}/{folder}/actions'))}:" \
                                f"{folder}/actions/{file} -> {output_folder}"
@@ -146,10 +151,10 @@ def cvat2slowfast(path_to_mini_scenes: str, path_to_new_dataset: str,
 
                     if video_id % 10 == 0:
                         charades_df.to_csv(
-                            f"{path_to_new_dataset}/annotation/data.csv", sep=" ", index=False)
+                            join_paths(annotation_path, "data.csv"), sep=" ", index=False)
 
     charades_df.to_csv(
-        f"{path_to_new_dataset}/annotation/data.csv", sep=" ", index=False)
+        join_paths(annotation_path, "data.csv"), sep=" ", index=False)
 
 
 def parse_args() -> argparse.Namespace:

diff --git a/src/kabr_tools/miniscene2behavior.py b/src/kabr_tools/miniscene2behavior.py
@@ -1,53 +1,33 @@
-import sys
 import argparse
+import random
+from zipfile import ZipFile
 import torch
 from lxml import etree
+import numpy as np
 import pandas as pd
 import cv2
 from tqdm import tqdm
-import slowfast.utils.checkpoint as cu
-from slowfast.models import build
-from slowfast.utils import parser
-from slowfast.datasets.utils import get_sequence
-from slowfast.visualization.utils import process_cv2_inputs
-from slowfast.datasets.cv2_transform import scale
-from fvcore.common.config import CfgNode
-from torch import Tensor
-
-
-def get_input_clip(cap: cv2.VideoCapture, cfg: CfgNode, keyframe_idx: int) -> list[Tensor]:
-    # https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/visualization/ava_demo_precomputed_boxes.py
-    seq_length = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE
-    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    seq = get_sequence(
-        keyframe_idx,
-        seq_length // 2,
-        cfg.DATA.SAMPLING_RATE,
-        total_frames,
-    )
-    clip = []
-    for frame_idx in seq:
-        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
-        was_read, frame = cap.read()
-        if was_read:
-            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            frame = scale(cfg.DATA.TEST_CROP_SIZE, frame)
-            clip.append(frame)
-        else:
-            print("Unable to read frame. Duplicating previous frame.")
-            clip.append(clip[-1])
-
-    clip = process_cv2_inputs(clip, cfg)
-    return clip
+from huggingface_hub import hf_hub_download
+from kabr_tools.utils.slowfast.utils import get_input_clip
+from kabr_tools.utils.slowfast.cfg import load_config, CfgNode
+from kabr_tools.utils.slowfast.x3d import build_model
+
+
+def get_cached_datafile(repo_id: str, filename: str):
+    return hf_hub_download(repo_id=repo_id, filename=filename)
 
 
 def parse_args() -> argparse.Namespace:
     local_parser = argparse.ArgumentParser()
+    local_parser.add_argument(
+        "--hub",
+        type=str,
+        help="model hub name"
+    )
     local_parser.add_argument(
         "--config",
         type=str,
-        help="model config.yml filepath",
-        default="config.yml"
+        help="model config.yml filepath"
     )
     local_parser.add_argument(
         "--checkpoint",
@@ -84,24 +64,28 @@ def parse_args() -> argparse.Namespace:
 
 
 def create_model(config_path: str, checkpoint_path: str, gpu_num: int) -> tuple[CfgNode, torch.nn.Module]:
-    # load model config
-    try:
-        cfg = parser.load_config(parser.parse_args(), config_path)
-    except FileNotFoundError:
-        checkpoint = torch.load(
-            checkpoint_path, map_location=torch.device("cpu"))
-        with open(config_path, "w") as file:
-            file.write(checkpoint["cfg"])
-        cfg = parser.load_config(parser.parse_args(), config_path)
-    cfg.NUM_GPUS = gpu_num
-    cfg.OUTPUT_DIR = ""
-    model = build.build_model(cfg)
+    # check params
+    assert config_path is not None
+    assert checkpoint_path is not None
+    assert gpu_num >= 0
 
-    # load model checkpoint
-    cu.load_checkpoint(checkpoint_path, model, data_parallel=False)
+    # load config
+    cfg = load_config(config_path)
+    cfg.NUM_GPUS = gpu_num
 
-    # set model to eval mode
+    # set random seed
+    random.seed(cfg.RNG_SEED)
+    np.random.seed(cfg.RNG_SEED)
+    torch.manual_seed(cfg.RNG_SEED)
+    torch.use_deterministic_algorithms(True)
+
+    # load model
+    model = build_model(cfg)
+    checkpoint = torch.load(checkpoint_path, weights_only=True,
+                            map_location=torch.device("cpu"))
+    model.load_state_dict(checkpoint["model_state"])
     model.eval()
+
     return cfg, model
 
 
@@ -151,6 +135,8 @@ def annotate_miniscene(cfg: CfgNode, model: torch.nn.Module,
                     inputs = inputs.cuda(non_blocking=True)
 
             preds = model(inputs)
+            if frame == 1:
+                print(preds)
             preds = preds.detach()
 
             if cfg.NUM_GPUS:
@@ -166,10 +152,47 @@ def annotate_miniscene(cfg: CfgNode, model: torch.nn.Module,
     pd.DataFrame(label_data).to_csv(output_path, sep=" ", index=False)
 
 
+def download_model(args) -> None:
+    # download checkpoint from huggingface
+    args.checkpoint = get_cached_datafile(args.hub, args.checkpoint)
+    checkpoint_folder = args.checkpoint.rsplit("/", 1)[0]
+
+    # extract checkpoint archive
+    if args.checkpoint.rsplit(".", 1)[-1] == "zip":
+        with ZipFile(args.checkpoint, "r") as zip_ref:
+            zip_ref.extractall(checkpoint_folder)
+        args.checkpoint = args.checkpoint.rsplit(".", 1)[0]
+
+    # download config from huggingface
+    if args.config:
+        args.config = get_cached_datafile(args.hub, args.config)
+
+
+def extract_config(args) -> None:
+    # extract config from checkpoint
+    if len(args.checkpoint.rsplit("/", 1)) > 1:
+        checkpoint_folder = args.checkpoint.rsplit("/", 1)[0]
+    else:
+        checkpoint_folder = "."
+
+    checkpoint = torch.load(args.checkpoint,
+                            map_location=torch.device("cpu"),
+                            weights_only=True)
+    config_path = f"{checkpoint_folder}/config.yml"
+    with open(config_path, "w", encoding="utf-8") as file:
+        file.write(checkpoint["cfg"])
+    args.config = config_path
+
+
 def main() -> None:
-    # clear arguments to avoid slowfast parsing issues
     args = parse_args()
-    sys.argv = [sys.argv[0]]
+
+    if args.hub:
+        download_model(args)
+
+    if not args.config:
+        extract_config(args)
+
     cfg, model = create_model(args.config, args.checkpoint, args.gpu_num)
     annotate_miniscene(cfg, model, args.miniscene,
                        args.video, args.output)

diff --git a/src/kabr_tools/utils/path.py b/src/kabr_tools/utils/path.py
@@ -0,0 +1,6 @@
+from pathlib import Path
+
+
+def join_paths(*parts):
+    assert len(parts) > 0, "At least one path must be provided"
+    return str(Path(*parts))