diff --git a/.drone.yml b/.drone.yml new file mode 100644 index 0000000000..c1779914c7 --- /dev/null +++ b/.drone.yml @@ -0,0 +1,149 @@ +--- +####################################################################################################################### +####################################################################################################################### +####################################################################################################################### +####################### Jobs below are used to build only ######################################### +####################### master branches for all containers ######################################### +####################### ######################################### +####################################################################################################################### +####################################################################################################################### +####################################################################################################################### + +# This pipeline would be used after each merge request to build mmpose latest docker images +kind: pipeline +type: docker +name: mmpose LS_mmpose_latest + +platform: + arch: amd64 + os: linux + +node: + docker: build_only + +trigger: + branch: + - master + event: + - push + +clone: + depth: 1 + +steps: + - name: Build latest LS_mmpose docker image + image: plugins/docker:20.14 + environment: + DOCKER_BUILDKIT: 1 + settings: + dockerfile: docker/LabelStudio.Dockerfile + context: docker/ + registry: quay.io + repo: quay.io/logivations/ml_all + privileged: true + build_args: + - BUILDKIT_INLINE_CACHE=1 + tags: + - LS_mmpose_latest + - LS_mmpose_latest_${DRONE_COMMIT_SHA} + username: + from_secret: DOCKER_QUAY_USERNAME + password: + from_secret: DOCKER_QUAY_PASSWORD + - name: Build LS_mmdeploy docker image for pull request + image: plugins/docker:20.14 + environment: + DOCKER_BUILDKIT: 1 + settings: + dockerfile: docker/MMDeploy.Dockerfile + context: docker/ + registry: quay.io + repo: quay.io/logivations/ml_all + privileged: true + build_args: + - BUILDKIT_INLINE_CACHE=1 + cache_from: quay.io/logivations/ml_all:LS_mmdeploy_latest + tags: + - LS_mmdeploy_latest + - LS_mmdeploy_latest_${DRONE_COMMIT_SHA} + username: + from_secret: DOCKER_QUAY_USERNAME + password: + from_secret: DOCKER_QUAY_PASSWORD + +####################################################################################################################### +####################################################################################################################### +####################################################################################################################### +####################### Jobs below are used to run ######################################### +####################### pull request validation only ######################################### +####################### ######################################### +####################################################################################################################### +####################################################################################################################### +####################################################################################################################### + +# 2. Build PR docker image for mmpose +--- +kind: pipeline +type: docker +name: PR validation build mmpose images + +platform: + arch: amd64 + os: linux + +node: + docker: build_only + +trigger: + event: + include: + - pull_request + +clone: + depth: 50 + +steps: + - name: Build LS_mmpose docker image for pull request + image: plugins/docker:20.14 + environment: + DOCKER_BUILDKIT: 1 + settings: + dockerfile: docker/LabelStudio.Dockerfile + context: docker/ + registry: quay.io + repo: quay.io/logivations/ml_all + privileged: true + build_args: + - BUILDKIT_INLINE_CACHE=1 + cache_from: + - quay.io/logivations/ml_all:LS_mmpose_latest + - quay.io/logivations/ml_all:LS_mmpose_pr${DRONE_PULL_REQUEST} + tags: + - LS_mmpose_pr${DRONE_PULL_REQUEST} + - LS_mmpose_pr${DRONE_PULL_REQUEST}_${DRONE_COMMIT_SHA} + username: + from_secret: DOCKER_QUAY_USERNAME + password: + from_secret: DOCKER_QUAY_PASSWORD + - name: Build LS_mmdeploy docker image for pull request + image: plugins/docker:20.14 + environment: + DOCKER_BUILDKIT: 1 + settings: + dockerfile: docker/MMDeploy.Dockerfile + context: docker/ + registry: quay.io + repo: quay.io/logivations/ml_all + privileged: true + build_args: + - BUILDKIT_INLINE_CACHE=1 + cache_from: + - quay.io/logivations/ml_all:LS_mmdeploy_pr${DRONE_PULL_REQUEST} + - quay.io/logivations/ml_all:LS_mmdeploy_latest + tags: + - LS_mmdeploy_pr${DRONE_PULL_REQUEST} + - LS_mmdeploy_pr${DRONE_PULL_REQUEST}_${DRONE_COMMIT_SHA} + username: + from_secret: DOCKER_QUAY_USERNAME + password: + from_secret: DOCKER_QUAY_PASSWORD diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 5e7118cf25..efd7276d2f 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,32 +1,15 @@ - + -## Motivation +### Basic Info - +| Info | Please fill out this column | +| --------------------- | -------------------------------- | +| Platform tested on | LabelStudio | +| Ticket | | -## Modification +### Description of contribution - +Reason for change: -## BC-breaking (Optional) - - -## Use cases (Optional) - - - -## Checklist - -**Before PR**: - -- [ ] I have read and followed the workflow indicated in the [CONTRIBUTING.md](https://github.com/open-mmlab/mmpose/blob/master/.github/CONTRIBUTING.md) to create this PR. -- [ ] Pre-commit or linting tools indicated in [CONTRIBUTING.md](https://github.com/open-mmlab/mmpose/blob/master/.github/CONTRIBUTING.md) are used to fix the potential lint issues. -- [ ] Bug fixes are covered by unit tests, the case that causes the bug should be added in the unit tests. -- [ ] New functionalities are covered by complete unit tests. If not, please add more unit tests to ensure correctness. -- [ ] The documentation has been modified accordingly, including docstring or example tutorials. - -**After PR**: - -- [ ] CLA has been signed and all committers have signed the CLA in this PR. +Changes in this PR: diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml deleted file mode 100644 index 785b928849..0000000000 --- a/.github/workflows/deploy.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: deploy - -on: push - -jobs: - build-n-publish: - runs-on: ubuntu-latest - if: startsWith(github.event.ref, 'refs/tags') - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.7 - uses: actions/setup-python@v2 - with: - python-version: 3.7 - - name: Build MMPose - run: | - pip install wheel - python setup.py sdist bdist_wheel - - name: Publish distribution to PyPI - run: | - pip install twine - twine upload dist/* -u __token__ -p ${{ secrets.pypi_password }} diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml deleted file mode 100644 index b2c1661a00..0000000000 --- a/.github/workflows/lint.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: lint - -on: [push, pull_request] - -concurrency: - group: github.workflow−{{ github.ref }} - cancel-in-progress: true - -jobs: - lint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.7 - uses: actions/setup-python@v2 - with: - python-version: 3.7 - - name: Install pre-commit hook - run: | - pip install pre-commit - pre-commit install - - name: Linting - run: pre-commit run --all-files - - name: Check docstring coverage - run: | - pip install interrogate - interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-regex "__repr__" --fail-under 80 mmpose diff --git a/.github/workflows/merge_stage_test.yml b/.github/workflows/merge_stage_test.yml deleted file mode 100644 index cd6ef82565..0000000000 --- a/.github/workflows/merge_stage_test.yml +++ /dev/null @@ -1,225 +0,0 @@ -name: merge_stage_test - -on: - push: - paths-ignore: - - 'README.md' - - 'README_zh-CN.md' - - 'docs/**' - - 'demo/**' - - '.dev_scripts/**' - - '.circleci/**' - branches: - - dev-1.x - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - build_cpu_py: - runs-on: ubuntu-22.04 - strategy: - matrix: - python-version: [3.8, 3.9] - torch: [1.8.1] - include: - - torch: 1.8.1 - torchvision: 0.9.1 - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Upgrade pip - run: pip install pip --upgrade - - name: Install Numpy - run: pip install -U numpy - - name: Install PyTorch - run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html - - name: Install MMEngine - run: pip install git+https://github.com/open-mmlab/mmengine.git@main - - name: Install MMCV - run: | - pip install -U openmim - mim install 'mmcv >= 2.0.0' - - name: Install MMDet - run: | - python -m pip install --upgrade pip setuptools wheel - pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - - name: Install other dependencies - run: | - pip install -r requirements/tests.txt - pip install -r requirements/runtime.txt - pip install -r requirements/albu.txt - pip install -r requirements/poseval.txt - - name: Build and install - run: rm -rf .eggs && pip install -e . - - name: Run unittests and generate coverage report - run: | - coverage run --branch --source mmpose -m pytest tests/ - coverage xml - coverage report -m - - build_cpu_pt: - runs-on: ubuntu-22.04 - strategy: - matrix: - python-version: [3.7] - torch: [1.8.0, 1.8.1, 1.9.1, 1.10.1, 1.11.0, 1.12.1, 1.13.0] - include: - - torch: 1.8.0 - torchvision: 0.9.0 - - torch: 1.8.1 - torchvision: 0.9.1 - - torch: 1.9.1 - torchvision: 0.10.1 - - torch: 1.10.1 - torchvision: 0.11.2 - - torch: 1.11.0 - torchvision: 0.12.0 - - torch: 1.12.1 - torchvision: 0.13.1 - - torch: 1.13.0 - torchvision: 0.14.0 - - torch: 2.0.0 - torchvision: 0.15.1 - python-version: 3.8 - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Upgrade pip - run: pip install pip --upgrade - - name: Install Numpy - run: pip install -U numpy - - name: Install PyTorch - run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html - - name: Install MMEngine - run: pip install git+https://github.com/open-mmlab/mmengine.git@main - - name: Install MMCV - run: | - pip install -U openmim - mim install 'mmcv >= 2.0.0' - - name: Install MMDet - run: | - python -m pip install --upgrade pip setuptools wheel - pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - - name: Install other dependencies - run: | - pip install -r requirements/tests.txt - pip install -r requirements/runtime.txt - pip install -r requirements/albu.txt - pip install -r requirements/poseval.txt - - name: Build and install - run: rm -rf .eggs && pip install -e . - - name: Run unittests and generate coverage report - run: | - coverage run --branch --source mmpose -m pytest tests/ - coverage xml - coverage report -m - # Only upload coverage report for python3.7 && pytorch1.8.1 cpu - - name: Upload coverage to Codecov - if: ${{matrix.torch == '1.8.1' && matrix.python-version == '3.7'}} - uses: codecov/codecov-action@v1.0.14 - with: - file: ./coverage.xml - flags: unittests - env_vars: OS,PYTHON - name: codecov-umbrella - fail_ci_if_error: false - - build_cu102: - runs-on: ubuntu-22.04 - container: - image: pytorch/pytorch:1.8.1-cuda10.2-cudnn7-devel - strategy: - matrix: - python-version: [3.7] - include: - - torch: 1.8.1 - cuda: 10.2 - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Upgrade pip - run: pip install pip --upgrade - - name: Fetch GPG keys - run: | - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub - - name: Install Python-dev - run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev - if: ${{matrix.python-version != 3.9}} - - name: Install system dependencies - run: | - apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 - - name: Install mmpose dependencies - run: | - pip install -U numpy - pip install git+https://github.com/open-mmlab/mmengine.git@main - pip install -U openmim - mim install 'mmcv >= 2.0.0' - pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - pip install -r requirements/tests.txt - pip install -r requirements/runtime.txt - pip install -r requirements/albu.txt - pip install -r requirements/poseval.txt - - name: Build and install - run: rm -rf .eggs && pip install -e . - - name: Run unittests and generate coverage report - run: | - coverage run --branch --source mmpose -m pytest tests/ - coverage xml - coverage report -m - - build_windows: - runs-on: windows-2022 - strategy: - matrix: - os: [windows-2022] - python: [3.7] - platform: [cpu, cu111] - torch: [1.8.1] - torchvision: [0.9.1] - include: - - python-version: 3.8 - platform: cu117 - torch: 2.0.0 - torchvision: 0.15.1 - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Upgrade pip - run: python -m pip install pip --upgrade - - name: Install lmdb - run: python -m pip install lmdb - - name: Install PyTorch - run: python -m pip install torch==${{matrix.torch}}+${{matrix.platform}} torchvision==${{matrix.torchvision}}+${{matrix.platform}} -f https://download.pytorch.org/whl/${{matrix.platform}}/torch_stable.html - - name: Install mmpose dependencies - run: | - python -m pip install -U numpy - python -m pip install --upgrade pip setuptools wheel - python -m pip install git+https://github.com/open-mmlab/mmengine.git@main - python -m pip install -U openmim - mim install 'mmcv >= 2.0.0' - mim install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - python -m pip install -r requirements/tests.txt - python -m pip install -r requirements/runtime.txt - python -m pip install -r requirements/albu.txt - python -m pip install -r requirements/poseval.txt - - name: Build and install - run: | - python -m pip install -e . -v - - name: Run unittests and generate coverage report - run: | - pytest tests/ diff --git a/.github/workflows/pr_stage_test.yml b/.github/workflows/pr_stage_test.yml deleted file mode 100644 index d1a3089d84..0000000000 --- a/.github/workflows/pr_stage_test.yml +++ /dev/null @@ -1,194 +0,0 @@ -name: pr_stage_test - -on: - pull_request: - paths-ignore: - - 'README.md' - - 'README_zh-CN.md' - - 'docs/**' - - 'demo/**' - - '.dev_scripts/**' - - '.circleci/**' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - build_cpu: - runs-on: ubuntu-22.04 - strategy: - matrix: - python-version: [3.7] - include: - - torch: 1.8.1 - torchvision: 0.9.1 - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Upgrade pip - run: pip install pip --upgrade - - name: Install Numpy - run: pip install -U numpy - - name: Install PyTorch - run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html - - name: Install mmpose dependencies - run: | - python -m pip install --upgrade pip setuptools wheel - pip install -U numpy - pip install git+https://github.com/open-mmlab/mmengine.git@main - pip install -U openmim - mim install 'mmcv >= 2.0.0' - pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - pip install -r requirements/tests.txt - pip install -r requirements/runtime.txt - pip install -r requirements/albu.txt - pip install -r requirements/poseval.txt - - name: Build and install - run: rm -rf .eggs && pip install -e . - - name: Run unittests and generate coverage report - run: | - coverage run --branch --source mmpose -m pytest tests/ - coverage xml - coverage report -m - # Upload coverage report for python3.7 && pytorch1.8.1 cpu - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1.0.14 - with: - file: ./coverage.xml - flags: unittests - env_vars: OS,PYTHON - name: codecov-umbrella - fail_ci_if_error: false - - build_cu102: - runs-on: ubuntu-22.04 - container: - image: pytorch/pytorch:1.8.1-cuda10.2-cudnn7-devel - strategy: - matrix: - python-version: [3.7] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Upgrade pip - run: pip install pip --upgrade - - name: Fetch GPG keys - run: | - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub - - name: Install Python-dev - run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev - if: ${{matrix.python-version != 3.9}} - - name: Install system dependencies - run: | - apt-get update - apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libxrender-dev - - name: Install mmpose dependencies - run: | - pip install -U numpy - pip install git+https://github.com/open-mmlab/mmengine.git@main - pip install -U openmim - mim install 'mmcv >= 2.0.0' - pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - pip install -r requirements/tests.txt - pip install -r requirements/runtime.txt - pip install -r requirements/albu.txt - pip install -r requirements/poseval.txt - - name: Build and install - run: rm -rf .eggs && pip install -e . - - name: Run unittests and generate coverage report - run: | - coverage run --branch --source mmpose -m pytest tests/ - coverage xml - coverage report -m - - build_cu117: - runs-on: ubuntu-22.04 - container: - image: pytorch/pytorch:2.0.0-cuda11.7-cudnn8-devel - strategy: - matrix: - python-version: [3.9] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Upgrade pip - run: pip install pip --upgrade - - name: Fetch GPG keys - run: | - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub - - name: Install system dependencies - run: apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libxrender-dev - - name: Install mmpose dependencies - run: | - pip install -U numpy - pip install git+https://github.com/open-mmlab/mmengine.git@main - pip install -U openmim - mim install 'mmcv >= 2.0.0' - pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - pip install -r requirements/tests.txt - pip install -r requirements/runtime.txt - pip install -r requirements/albu.txt - pip install -r requirements/poseval.txt - - name: Build and install - run: rm -rf .eggs && pip install -e . - - name: Run unittests and generate coverage report - run: | - coverage run --branch --source mmpose -m pytest tests/ - coverage xml - coverage report -m - - build_windows: - runs-on: windows-2022 - strategy: - matrix: - os: [windows-2022] - python: [3.7] - platform: [cpu, cu111] - torch: [1.8.1] - torchvision: [0.9.1] - include: - - python-version: 3.8 - platform: cu117 - torch: 2.0.0 - torchvision: 0.15.1 - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Upgrade pip - run: python -m pip install pip --upgrade - - name: Install lmdb - run: python -m pip install lmdb - - name: Install PyTorch - run: python -m pip install torch==${{matrix.torch}}+${{matrix.platform}} torchvision==${{matrix.torchvision}}+${{matrix.platform}} -f https://download.pytorch.org/whl/${{matrix.platform}}/torch_stable.html - - name: Install mmpose dependencies - run: | - python -m pip install -U numpy - python -m pip install --upgrade pip setuptools wheel - python -m pip install git+https://github.com/open-mmlab/mmengine.git@main - python -m pip install -U openmim - mim install 'mmcv >= 2.0.0' - mim install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - python -m pip install -r requirements/tests.txt - python -m pip install -r requirements/albu.txt - python -m pip install -r requirements/poseval.txt - - name: Build and install - run: | - python -m pip install -e . -v - - name: Run unittests and generate coverage report - run: | - pytest tests/ diff --git a/.gitignore b/.gitignore index 2b337460f3..cc60fb71a0 100644 --- a/.gitignore +++ b/.gitignore @@ -132,3 +132,6 @@ docs/**/modelzoo.md *.pth *.DS_Store + +data/* +work_dirs/* diff --git a/auto_training/config_factories/mmpose_config_factory.py b/auto_training/config_factories/mmpose_config_factory.py new file mode 100644 index 0000000000..0127eaca41 --- /dev/null +++ b/auto_training/config_factories/mmpose_config_factory.py @@ -0,0 +1,222 @@ +from mmengine.config import Config + + +def make_mmpose_config( + data_root: str, + classes: list, + res: tuple = (192, 256), +): + cfg = Config() + + cfg.default_scope = 'mmpose' + cfg.gpu_ids = range(1) + + # hooks + cfg.default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=1, save_best='coco/AP', rule='greater'), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='PoseVisualizationHook', enable=True), + badcase=dict( + type='BadCaseAnalysisHook', + enable=False, + out_dir='badcase', + metric_type='loss', + badcase_thr=5)) + + # custom hooks + cfg.custom_hooks = [ + # Synchronize model buffers such as running_mean and running_var in BN + # at the end of each epoch + dict(type='SyncBuffersHook') + ] + + # multi-processing backend + cfg.env_cfg = dict( + cudnn_benchmark=False, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl'), + ) + + # visualizer + cfg.vis_backends = [ + dict(type='LocalVisBackend'), + dict(type='TensorboardVisBackend'), + # dict(type='WandbVisBackend'), + ] + cfg.visualizer = dict( + type='PoseLocalVisualizer', vis_backends=cfg.vis_backends, name='visualizer') + + # logger + cfg.log_processor = dict( + type='LogProcessor', window_size=50, by_epoch=True, num_digits=6) + cfg.log_level = 'INFO' + cfg.load_from = None + cfg.resume = False + + # file I/O backend + cfg.backend_args = dict(backend='local') + + # training/validation/testing progress + cfg.train_cfg = dict(max_epochs=300, val_interval=10, by_epoch=True) + cfg.val_cfg = dict() + cfg.test_cfg = dict() + # runtime + + # optimizer + cfg.optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, + )) + # learning policy + cfg.param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=300, + milestones=[200, 250], + gamma=0.1, + by_epoch=True) + ] + + # automatically scaling LR based on the actual training batch size + cfg.auto_scale_lr = dict(base_batch_size=64) + + # codec settings + cfg.codec = dict( + type='MSRAHeatmap', input_size=res, heatmap_size=(48, 64), sigma=2) + + # model settings + cfg.model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=18, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'), + ), + head=dict( + type='HeatmapHead', + in_channels=512, + out_channels=len(classes), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=cfg.codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + + # base dataset settings + cfg.dataset_type = 'CocoDataset' + cfg.data_mode = 'topdown' + cfg.data_root = data_root + + # pipelines + cfg.train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(direction='horizontal', type='RandomFlip', prob=0.5), + dict(type='RandomBBoxTransform', scale_factor=[0.7, 1.3], shift_factor=0.05, rotate_factor=80), + dict(type='TopdownAffine', input_size=cfg.codec['input_size']), + dict( + type='Albumentation', + transforms=[ + dict( + type='OneOf', + transforms=[ + dict(type='MotionBlur', blur_limit=3, p=0.3), + dict(type='MedianBlur', blur_limit=3, p=0.2), + dict(type='Blur', blur_limit=3, p=0.2), + ], p=0.3), + + dict( + type='OneOf', + transforms=[ + dict(type='GaussNoise', var_limit=(10.0, 50.0), p=0.3), + dict(type='MultiplicativeNoise', multiplier=(0.9, 1.1), p=0.3), + ], p=0.4), + + # dict(type='HueSaturationValue', hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.3), + ]), + dict(type='GenerateTarget', encoder=cfg.codec), + dict(type='PackPoseInputs'), + dict(type='TorchVisionWrapper', transforms=[ + dict(type='TrivialAugmentWide', num_magnitude_bins=31) + # TODO CHECK SAVING will save 100 images in /mmpose/test + ], save=True), + ] + val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=cfg.codec['input_size']), + dict(type='PackPoseInputs') + ] + + # data loaders + cfg.train_dataloader = dict( + batch_size=64, + num_workers=6, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=cfg.dataset_type, + labels=classes, + data_root=data_root, + data_mode=cfg.data_mode, + ann_file='coco_annotations.json', + data_prefix=dict(img='train/'), + pipeline=cfg.train_pipeline, + )) + cfg.val_dataloader = dict( + batch_size=32, + num_workers=6, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=cfg.dataset_type, + labels=classes, + data_root=data_root, + data_mode=cfg.data_mode, + ann_file='coco_annotations.json', + bbox_file='', + data_prefix=dict(img='val/'), + test_mode=True, + pipeline=val_pipeline, + )) + cfg.test_dataloader = cfg.val_dataloader + + # evaluators + cfg.val_evaluator = [ + dict( + type='CocoMetric', + ann_file=data_root + '/coco_annotations.json' + ), + dict( + type='EPE', + ), + dict( + type='PCKAccuracy', + prefix="5pr_", + ), + dict( + type='PCKAccuracy', + thr=0.1, + prefix="10pr_", + ), + dict( + type='AUC', + ), + ] + cfg.test_evaluator = cfg.val_evaluator + return cfg \ No newline at end of file diff --git a/auto_training/inference_custom.py b/auto_training/inference_custom.py new file mode 100644 index 0000000000..a100efb9e5 --- /dev/null +++ b/auto_training/inference_custom.py @@ -0,0 +1,144 @@ +import argparse +import os +import json +import numpy as np +import cv2 +from mmengine.config import Config, DictAction +from mmpose.apis import init_model, inference_topdown +from xtcocotools.coco import COCO +from mmengine.utils import ProgressBar + + +def parse_args(): + parser = argparse.ArgumentParser(description="Run MMPose inference on images") + parser.add_argument('config', help='Path to model config file') + parser.add_argument('model', help='Path to checkpoint or ONNX file') + parser.add_argument('--img-dir', type=str, required=True, help='Directory with input images') + parser.add_argument('--bbox-json', type=str, required=True, help='Path to COCO format bounding box JSON') + parser.add_argument('--out-dir', type=str, help='Directory to save visualized results (optional)') + parser.add_argument('--predictions-dir', type=str, required=True, help='Directory to save individual prediction files') + parser.add_argument('--device', default='cuda:0', help='Device to run inference on (e.g., "cuda:0" or "cpu")') + parser.add_argument('--score-thr', type=float, default=0.1, help='Keypoint score threshold') + parser.add_argument( + '--cfg-options', nargs='+', action=DictAction, + help='Override some settings in the config file. The key-value pair in ' + 'xxx=yyy format will be merged into the config.') + return parser.parse_args() + + +def draw_bboxes(image, bboxes): + """Draw bounding boxes on the image using OpenCV.""" + for bbox in bboxes: + x, y, x2, y2 = map(int, bbox) + cv2.rectangle(image, (x, y), (x + x2, y + y2), (255, 0, 0), 2) # Draw rectangle + + +def draw_keypoints(image, keypoints, scores, score_thr): + """Draw keypoints on the image using OpenCV.""" + + + labels = ["front_right", "rear_right", "front_left", "rear_left"] + labels.sort() + for el1, el2 in zip(keypoints, scores): + + for kp, score, label in zip(el1, el2, labels): + if score > score_thr: + x, y = int(kp[0]), int(kp[1]) + color = (0, 255, 0) if "front" in label else (0, 0, 255) + cv2.circle(image, (x, y), 5, color, -1) # Draw keypoint + cv2.putText(image, f"{int(score*100)}% {label}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) + + +def main(): + args = parse_args() + + coco = COCO(args.bbox_json) + img_ids = list(coco.imgs.keys()) + + cfg = Config.fromfile(args.config) + if args.cfg_options: + cfg.merge_from_dict(args.cfg_options) + model = init_model(cfg, args.model, device=args.device) + + if args.out_dir: + os.makedirs(args.out_dir, exist_ok=True) + os.makedirs(args.predictions_dir, exist_ok=True) + + progress_bar = ProgressBar(len(img_ids)) + + for img_id in img_ids: + img_info = coco.loadImgs([img_id])[0] + img_path = os.path.join(args.img_dir, img_info['file_name']) + + if not os.path.exists(img_path): + progress_bar.update() + continue + + image = cv2.imread(img_path) + if image is None: + print(f"Failed to read image: {img_path}") + progress_bar.update() + continue + + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + ann_ids = coco.getAnnIds(imgIds=[img_id]) + annotations = coco.loadAnns(ann_ids) + person_bboxes = np.array([ann['bbox'] for ann in annotations]) + person_bboxes = np.unique(person_bboxes, axis=0) + + #for a in annotations: + # kps = np.asarray(a["keypoints"]).reshape((-1, 3)) + # scores = np.ones_like(kps) + # draw_keypoints(image, [kps], scores, [0]) + + + + pose_results = inference_topdown( + model, + image_rgb, + person_bboxes, + #bbox_format='xywh' # COCO annotations typically use 'xywh' format + ) + + keypoints_results = [] + for pose in pose_results: + pred_instances = pose.pred_instances + if pred_instances is not None: + + + keypoints = pred_instances.keypoints + scores = pred_instances.keypoint_scores + bbox = pred_instances.bboxes[0] + + # Save the bbox along with the keypoints data + keypoints_results.append({ + 'keypoints': keypoints.tolist(), + 'scores': scores.tolist(), + 'bbox': bbox.tolist() # Add bbox to the result + }) + + if args.out_dir: + draw_keypoints(image, keypoints, scores, args.score_thr) + draw_bboxes(image, person_bboxes) + + # Save the visualized image if `out-dir` is provided + if args.out_dir: + out_file = os.path.join(args.out_dir, img_info['file_name']) + cv2.imwrite(out_file, image) + + # Save individual prediction file + prediction_file = os.path.join(args.predictions_dir, f"{os.path.splitext(img_info['file_name'])[0]}.json") + with open(prediction_file, 'w') as f: + json.dump({ + "result": keypoints_results, + "score": 0 # Placeholder for score; replace with actual logic if needed + }, f) + + progress_bar.update() + + print("Inference completed.") + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/auto_training/objective.py b/auto_training/objective.py new file mode 100644 index 0000000000..8932a33918 --- /dev/null +++ b/auto_training/objective.py @@ -0,0 +1,81 @@ +from auto_training.config_factories.mmpose_config_factory import make_mmpose_config +from mmengine.runner import Runner +import argparse +import os +import os.path as osp + + +def parse_args(): + parser = argparse.ArgumentParser(description='Train a pose model') + parser.add_argument('--res', type=int, help='resolution of the model') + parser.add_argument('--augmentation_index', type=int, help='augmentation index') + parser.add_argument('--batch_size', type=int, help='batch size') + parser.add_argument('--repeat_times', type=int, help='repeat times') + parser.add_argument('--resnet_depth', type=int, help='resnet_depth') + parser.add_argument('--backbone_type', type=str, help='backbone_type') + + return parser.parse_args() + + +def merge_args(cfg): + """Merge CLI arguments to config.""" + + cfg.launcher = 'none' + + # set preprocess configs to model + if 'preprocess_cfg' in cfg: + cfg.model.setdefault('data_preprocessor', + cfg.get('preprocess_cfg', {})) + + return cfg + + +def train(res, augmentation_index, batch_size, repeat_times, resnet_depth, backbone_type): + timestamp = f"det_res{res}_aug{augmentation_index}_b{batch_size}_rep{repeat_times}_d{resnet_depth}_{backbone_type}" + # dataset = 'general_dataset_12_12_24' + dataset = 'wurth_optimization_dataset' + data_path = f'/data/{dataset}/' + out_path = f'/data/wurth_optimization/{dataset}' + + # replace the ${key} with the value of cfg.key + cfg = make_mmpose_config( + data_path, + classes=['bottom_left', 'bottom_right', 'top_left', 'top_right'], + res=(res, res), + augmentation_index=augmentation_index, + batch_size=batch_size, + repeat_times=repeat_times, + resnet_depth=resnet_depth, + backbone_type=backbone_type + ) + + # init the logger before other steps + # timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) + cfg.work_dir = os.path.join(out_path, timestamp) + os.makedirs(cfg.work_dir, exist_ok=True) + + # dump config + cfg.dump(osp.join(cfg.work_dir, f"{timestamp}_config.py")) + + # merge CLI arguments to config + cfg = merge_args(cfg) + + # build the runner from config + runner = Runner.from_cfg(cfg) + + # start training + runner.train() + + return runner.val_metrics['PCK'] + + +if __name__ == '__main__': + args = parse_args() + train( + res=args.res, + augmentation_index=args.augmentation_index, + batch_size=args.batch_size, + repeat_times=args.repeat_times, + resnet_depth=args.resnet_depth, + backbone_type=args.backbone_type, + ) \ No newline at end of file diff --git a/auto_training/optimization.py b/auto_training/optimization.py new file mode 100644 index 0000000000..dfaad546ce --- /dev/null +++ b/auto_training/optimization.py @@ -0,0 +1,68 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import optuna +import os +import os.path as osp +import argparse +import os +import os.path as osp +from objective import train +from multiprocessing import Process +import subprocess + + +def objective(trial): + # # Define the hyperparameter search space + res = trial.suggest_categorical("resolution", [384]) # square resolutions + augmentation_index = trial.suggest_categorical("augmentation_index", [0]) # Indices for different augmentations + batch_size = trial.suggest_categorical("batch_size", [64]) # Batch size + repeat_times = trial.suggest_categorical("repeat_times", [1]) # Batch size + resnet_depth = trial.suggest_categorical("resnet_depth", [18]) # backbone resnet depth + backbone_type = trial.suggest_categorical("backbone_type", ["resnet"]) # backbone type + + try: + + process = subprocess.Popen( + [ + "python", "objective.py", + "--res", str(res), + "--augmentation_index", str(augmentation_index), + "--batch_size", str(batch_size), + "--repeat_times", str(repeat_times), + "--resnet_depth", str(resnet_depth), + "--backbone_type", str(backbone_type), + ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True + ) + + for line in iter(process.stdout.readline, ''): + print(line, end='') + + process.stdout.close() + process.wait() + + except Exception as e: + print(f"Error: {e}") + print( + "Params with error: res - {res}, augmentation_index - {augmentation_index}, " + "batch_size - {batch_size}, repeat_times - {repeat_times}" + ) + return np.float("inf") + + return 0 + + # Return the metric to be maximized oDeviceRequestdockerarams) + print("Best value:", study.best_value) + + +def main(): + study = optuna.create_study( + direction="maximize", + study_name="mmpose_optimization", + storage="sqlite:///mmpose_optimization.db", + load_if_exists=True, + ) + + study.optimize(objective, n_trials=100) + + +if __name__ == "__main__": + main() diff --git a/auto_training/train.py b/auto_training/train.py new file mode 100644 index 0000000000..2855cd2d7e --- /dev/null +++ b/auto_training/train.py @@ -0,0 +1,294 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp + +from mmengine.config import Config, DictAction +from mmengine.runner import Runner + +from auto_training.config_factories.mmpose_config_factory import make_mmpose_config + +import cv2 +from mmpose.datasets import build_dataset + + +def parse_args(): + parser = argparse.ArgumentParser(description='Train a pose model') + parser.add_argument('config', help='train config file path') + parser.add_argument('--work-dir', help='the dir to save logs and models') + parser.add_argument( + '--resume', + nargs='?', + type=str, + const='auto', + help='If specify checkpint path, resume from it, while if not ' + 'specify, try to auto resume from the latest checkpoint ' + 'in the work directory.') + parser.add_argument( + '--data-root', + type=str, + help='Root directory for dataset. This will override data_root in the config file.' + ) + parser.add_argument( + '--amp', + action='store_true', + default=False, + help='enable automatic-mixed-precision training') + parser.add_argument( + '--no-validate', + action='store_true', + help='whether not to evaluate the checkpoint during training') + parser.add_argument( + '--auto-scale-lr', + action='store_true', + help='whether to auto scale the learning rate according to the ' + 'actual batch size and the original batch size.') + parser.add_argument( + '--show-dir', + help='directory where the visualization images will be saved.') + parser.add_argument( + '--show', + action='store_true', + help='whether to display the prediction results in a window.') + parser.add_argument( + '--interval', + type=int, + default=1, + help='visualize per interval samples.') + parser.add_argument( + '--wait-time', + type=float, + default=1, + help='display time of every window. (second)') + parser.add_argument( + '--classes', + nargs='+', + required=True, + help='list of classes for the training' + ) + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + # When using PyTorch version >= 2.0.0, the `torch.distributed.launch` + # will pass the `--local-rank` parameter to `tools/train.py` instead + # of `--local_rank`. + parser.add_argument('--local_rank', '--local-rank', type=int, default=0) + parser.add_argument('--visualize', action='store_true', + help='Visualize augmented dataset samples instead of training') + parser.add_argument('--num-samples', type=int, default=20, help='Number of samples to visualize') + parser.add_argument( + '--viz-dir', + help='directory where the training augmentation visualization images will be saved.') + + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + + return args + + +def merge_args(cfg, args): + """Merge CLI arguments to config.""" + + if args.no_validate: + cfg.val_cfg = None + cfg.val_dataloader = None + cfg.val_evaluator = None + + cfg.launcher = args.launcher + cfg.work_dir = args.work_dir + + # enable automatic-mixed-precision training + if args.amp is True: + from mmengine.optim import AmpOptimWrapper, OptimWrapper + optim_wrapper = cfg.optim_wrapper.get('type', OptimWrapper) + assert optim_wrapper in (OptimWrapper, AmpOptimWrapper, + 'OptimWrapper', 'AmpOptimWrapper'), \ + '`--amp` is not supported custom optimizer wrapper type ' \ + f'`{optim_wrapper}.' + cfg.optim_wrapper.type = 'AmpOptimWrapper' + cfg.optim_wrapper.setdefault('loss_scale', 'dynamic') + + # resume training + if args.resume == 'auto': + cfg.resume = True + cfg.load_from = None + elif args.resume is not None: + cfg.resume = True + cfg.load_from = args.resume + + # enable auto scale learning rate + if args.auto_scale_lr: + cfg.auto_scale_lr.enable = True + + # visualization + if args.show or (args.show_dir is not None): + assert 'visualization' in cfg.default_hooks, \ + 'PoseVisualizationHook is not set in the ' \ + '`default_hooks` field of config. Please set ' \ + '`visualization=dict(type="PoseVisualizationHook")`' + + cfg.default_hooks.visualization.enable = True + cfg.default_hooks.visualization.show = args.show + if args.show: + cfg.default_hooks.visualization.wait_time = args.wait_time + cfg.default_hooks.visualization.out_dir = args.show_dir + cfg.default_hooks.visualization.interval = args.interval + + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + # set preprocess configs to model + if 'preprocess_cfg' in cfg: + cfg.model.setdefault('data_preprocessor', + cfg.get('preprocess_cfg', {})) + + return cfg + + +def plot_keypoints_on_image_cv2(image, heatmap, labels=None): + import cv2 + import numpy as np + + image = cv2.resize(image, (image.shape[1] * 3, image.shape[0] * 3), interpolation=cv2.INTER_LINEAR) + num_keypoints, h_heat, w_heat = heatmap.shape + + scale_x = image.shape[1] / w_heat + scale_y = image.shape[0] / h_heat + + if len(image.shape) == 2 or image.shape[2] == 1: + image_bgr = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) + else: + image_bgr = image.copy() + + for i in range(num_keypoints): + _, _, _, max_loc = cv2.minMaxLoc(heatmap[i]) + x_heat, y_heat = max_loc + + x_img = int(x_heat * scale_x) + y_img = int(y_heat * scale_y) + + cv2.drawMarker( + image_bgr, (x_img, y_img), color=(0, 0, 255), + markerType=cv2.MARKER_CROSS, markerSize=20, thickness=2 + ) + + label_text = labels[i] if labels is not None and i < len(labels) else f'KP {i}' + + cv2.putText( + image_bgr, label_text, (x_img + 5, y_img - 5), + fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, + color=(0, 255, 255), thickness=2, lineType=cv2.LINE_AA + ) + + return image_bgr + + +def visualize_samples(cfg, classes, num_samples=5, dir=None): + """Visualize augmented dataset samples with keypoint annotations.""" + dataset_cfg = cfg.train_dataloader['dataset'] + + import torch + # for somereason need to reimport here + from mmpose.datasets.transforms.loading import LoadImage + from mmengine.registry import TRANSFORMS + from mmpose.datasets.transforms.bottomup_transforms import (BottomupGetHeatmapMask, BottomupRandomAffine, + BottomupRandomChoiceResize, + BottomupRandomCrop, BottomupResize) + from mmpose.datasets.transforms.common_transforms import (Albumentation, FilterAnnotations, + GenerateTarget, GetBBoxCenterScale, + PhotometricDistortion, RandomBBoxTransform, + RandomFlip, RandomHalfBody, YOLOXHSVRandomAug, + TorchVisionWrapper) + from mmpose.datasets.transforms.converting import KeypointConverter, SingleHandConverter + from mmpose.datasets.transforms.formatting import PackPoseInputs + from mmpose.datasets.transforms.hand_transforms import HandRandomFlip + from mmpose.datasets.transforms.mix_img_transforms import Mosaic, YOLOXMixUp + from mmpose.datasets.transforms.pose3d_transforms import RandomFlipAroundRoot + from mmpose.datasets.transforms.topdown_transforms import TopdownAffine + + TRANSFORMS.register_module(module=GetBBoxCenterScale) + TRANSFORMS.register_module(module=RandomBBoxTransform) + TRANSFORMS.register_module(module=RandomFlip, force=True) + TRANSFORMS.register_module(module=RandomHalfBody) + TRANSFORMS.register_module(module=TopdownAffine) + TRANSFORMS.register_module(module=Albumentation) + TRANSFORMS.register_module(module=PhotometricDistortion) + TRANSFORMS.register_module(module=PackPoseInputs) + TRANSFORMS.register_module(module=LoadImage) + TRANSFORMS.register_module(module=BottomupGetHeatmapMask) + TRANSFORMS.register_module(module=BottomupRandomAffine) + TRANSFORMS.register_module(module=BottomupResize) + TRANSFORMS.register_module(module=GenerateTarget) + TRANSFORMS.register_module(module=KeypointConverter) + TRANSFORMS.register_module(module=RandomFlipAroundRoot) + TRANSFORMS.register_module(module=FilterAnnotations) + TRANSFORMS.register_module(module=YOLOXHSVRandomAug) + TRANSFORMS.register_module(module=YOLOXMixUp) + TRANSFORMS.register_module(module=Mosaic) + TRANSFORMS.register_module(module=BottomupRandomCrop) + TRANSFORMS.register_module(module=BottomupRandomChoiceResize) + TRANSFORMS.register_module(module=HandRandomFlip) + TRANSFORMS.register_module(module=SingleHandConverter) + TRANSFORMS.register_module(module=TorchVisionWrapper) + + dataset = build_dataset(dataset_cfg) + + print(f"Visualizing {num_samples} samples from the dataset...") + + for i in range(num_samples): + data_info = dataset[i] + data_samples = data_info.get('data_samples', {}) + + img = data_info.get('inputs') + + if isinstance(img, torch.Tensor): + img = img.permute(1, 2, 0).numpy() + + vis_img = img.copy() + # Draw the keypoints and labels on the image. + vis_img = plot_keypoints_on_image_cv2(vis_img, data_samples.gt_fields.heatmaps.numpy(), classes) + + os.makedirs(dir, exist_ok=True) + save_path = osp.join(dir, f'sample_{i}.jpg') + cv2.imwrite(save_path, vis_img) + + +def main(): + args = parse_args() + + # load config + cfg = make_mmpose_config( + data_root=args.data_root, + classes=args.classes + ) + + # merge CLI arguments to config + cfg = merge_args(cfg, args) + + cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) + + if args.visualize: + visualize_samples(cfg, args.classes, num_samples=args.num_samples, dir=args.viz_dir) + + # build the runner from config + runner = Runner.from_cfg(cfg) + + # start training + runner.train() + + +if __name__ == '__main__': + main() diff --git a/configs/_base_/datasets/coco.py b/configs/_base_/datasets/coco.py index 865a95bc02..a8514fc52c 100644 --- a/configs/_base_/datasets/coco.py +++ b/configs/_base_/datasets/coco.py @@ -12,170 +12,74 @@ ), keypoint_info={ 0: - dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''), + dict( + name='rear_left', + id=0, + color=[51, 153, 255], + type='upper', + swap='rear_right'), 1: dict( - name='left_eye', + name='rear_right', id=1, color=[51, 153, 255], type='upper', - swap='right_eye'), + swap='rear_left'), 2: dict( - name='right_eye', + name='front_left', id=2, color=[51, 153, 255], type='upper', - swap='left_eye'), + swap='front_right'), 3: dict( - name='left_ear', + name='front_right', id=3, color=[51, 153, 255], type='upper', - swap='right_ear'), + swap='front_left'), 4: dict( - name='right_ear', + name='L_Fork', id=4, color=[51, 153, 255], type='upper', - swap='left_ear'), + swap='R_Fork'), 5: dict( - name='left_shoulder', + name='R_Fork', id=5, color=[0, 255, 0], type='upper', - swap='right_shoulder'), + swap='L_Fork'), 6: dict( - name='right_shoulder', + name='C_Fork', id=6, color=[255, 128, 0], type='upper', - swap='left_shoulder'), - 7: - dict( - name='left_elbow', - id=7, - color=[0, 255, 0], - type='upper', - swap='right_elbow'), - 8: - dict( - name='right_elbow', - id=8, - color=[255, 128, 0], - type='upper', - swap='left_elbow'), - 9: - dict( - name='left_wrist', - id=9, - color=[0, 255, 0], - type='upper', - swap='right_wrist'), - 10: - dict( - name='right_wrist', - id=10, - color=[255, 128, 0], - type='upper', - swap='left_wrist'), - 11: - dict( - name='left_hip', - id=11, - color=[0, 255, 0], - type='lower', - swap='right_hip'), - 12: - dict( - name='right_hip', - id=12, - color=[255, 128, 0], - type='lower', - swap='left_hip'), - 13: - dict( - name='left_knee', - id=13, - color=[0, 255, 0], - type='lower', - swap='right_knee'), - 14: - dict( - name='right_knee', - id=14, - color=[255, 128, 0], - type='lower', - swap='left_knee'), - 15: - dict( - name='left_ankle', - id=15, - color=[0, 255, 0], - type='lower', - swap='right_ankle'), - 16: - dict( - name='right_ankle', - id=16, - color=[255, 128, 0], - type='lower', - swap='left_ankle') + swap=''), }, skeleton_info={ 0: - dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]), + dict(link=('rear_left', 'rear_right'), id=0, color=[0, 255, 0]), 1: - dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]), + dict(link=('front_left', 'front_right'), id=1, color=[0, 255, 0]), 2: - dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]), + dict(link=('rear_left', 'front_left'), id=2, color=[0, 255, 0]), 3: - dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]), + dict(link=('rear_right', 'front_right'), id=3, color=[0, 255, 0]), 4: - dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]), + dict(link=('L_Fork', 'R_Fork'), id=4, color=[255, 128, 0]), 5: - dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]), + dict(link=('L_Fork', 'C_Fork'), id=5, color=[255, 128, 0]), 6: - dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]), - 7: - dict( - link=('left_shoulder', 'right_shoulder'), - id=7, - color=[51, 153, 255]), - 8: - dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]), - 9: - dict( - link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]), - 10: - dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]), - 11: - dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]), - 12: - dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]), - 13: - dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]), - 14: - dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]), - 15: - dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]), - 16: - dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]), - 17: - dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]), - 18: - dict( - link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255]) + dict(link=('C_Fork', 'R_Fork'), id=6, color=[255, 128, 0]), }, joint_weights=[ - 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, - 1.5 + 1., 1., 1., 1., 1., 1., 1., ], sigmas=[ - 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, - 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089 + 0.05, 0.05, 0.05, 0.05, 0.06, 0.06, 0.07, ]) diff --git a/configs/_base_/datasets/coco_4kp.py b/configs/_base_/datasets/coco_4kp.py new file mode 100644 index 0000000000..6cd487dd9b --- /dev/null +++ b/configs/_base_/datasets/coco_4kp.py @@ -0,0 +1,53 @@ +# Scaletronic 4kp +dataset_info = dict( + dataset_name='coco', + paper_info=dict( + author='', + title='Label: 4 keypoints dataset ', + container='', + year='2025', + homepage='', + ), + keypoint_info={ + 0: dict( + name='front_left', + id=0, + color=[51, 153, 255], + type='upper', + swap='front_right'), + 1: dict( + name='front_right', + id=1, + color=[51, 153, 255], + type='upper', + swap='front_left' + ), + 2:dict( + name='rear_left', + id=2, + color=[51, 153, 255], + type='upper', + swap='rear_right'), + 3: dict( + name='rear_right', + id=3, + color=[51, 153, 255], + type='upper', + swap='rear_left'), + }, + skeleton_info={ + 0: + dict(link=('front_left', 'front_right'), id=0, color=[0, 255, 0]), + 1: + dict(link=('rear_left', 'rear_right'), id=1, color=[0, 255, 0]), + 2: + dict(link=('front_left', 'rear_left'), id=2, color=[0, 255, 0]), + 3: + dict(link=('front_right', 'rear_right'), id=3, color=[0, 255, 0]) + }, + joint_weights=[ + 1., 1., 1., 1. + ], + sigmas=[ + 0.05, 0.05, 0.05, 0.05 + ]) \ No newline at end of file diff --git a/configs/_base_/datasets/coco_7kp.py b/configs/_base_/datasets/coco_7kp.py new file mode 100644 index 0000000000..bed1590695 --- /dev/null +++ b/configs/_base_/datasets/coco_7kp.py @@ -0,0 +1,82 @@ +dataset_info = dict( + dataset_name='coco', + paper_info=dict( + author='', + title='Forklift: 7 keypoints', + container='', + year='2025', + homepage='', + ), + keypoint_info = { + 0: dict( + name='C_Fork', + id=0, + color=[255, 128, 0], + type='upper', + swap='' + ), + 1: dict( + name='L_Fork', + id=1, + color=[51, 153, 255], + type='upper', + swap='R_Fork' + ), + 2: dict( + name='R_Fork', + id=2, + color=[0, 255, 0], + type='upper', + swap='L_Fork' + ), + 3: dict( + name='front_left', + id=3, + color=[51, 153, 255], + type='upper', + swap='front_right' + ), + 4: dict( + name='front_right', + id=4, + color=[51, 153, 255], + type='upper', + swap='front_left' + ), + 5: dict( + name='rear_left', + id=5, + color=[51, 153, 255], + type='upper', + swap='rear_right' + ), + 6: dict( + name='rear_right', + id=6, + color=[51, 153, 255], + type='upper', + swap='rear_left' + ), + }, + skeleton_info={ + 0: + dict(link=('rear_left', 'rear_right'), id=0, color=[0, 255, 0]), + 1: + dict(link=('front_left', 'front_right'), id=1, color=[0, 255, 0]), + 2: + dict(link=('rear_left', 'front_left'), id=2, color=[0, 255, 0]), + 3: + dict(link=('rear_right', 'front_right'), id=3, color=[0, 255, 0]), + 4: + dict(link=('L_Fork', 'R_Fork'), id=4, color=[255, 128, 0]), + 5: + dict(link=('L_Fork', 'C_Fork'), id=5, color=[255, 128, 0]), + 6: + dict(link=('C_Fork', 'R_Fork'), id=6, color=[255, 128, 0]), + }, + joint_weights=[ + 1., 1., 1., 1., 1., 1., 1., + ], + sigmas=[ + 0.05, 0.05, 0.05, 0.05, 0.06, 0.06, 0.07, + ]) diff --git a/configs/_base_/datasets/coco_9kp.py b/configs/_base_/datasets/coco_9kp.py new file mode 100644 index 0000000000..3c43b08313 --- /dev/null +++ b/configs/_base_/datasets/coco_9kp.py @@ -0,0 +1,96 @@ +dataset_info = dict( + dataset_name='coco', + paper_info=dict( + author='', + title='Forklift: 9 keypoints', + container='', + year='2025', + homepage='', + ), + keypoint_info = { + 0: dict( + name='C_Fork', + id=0, + color=[255, 128, 0], + type='upper', + swap='' + ), + 1: dict( + name='L_Fork', + id=1, + color=[51, 153, 255], + type='upper', + swap='R_Fork' + ), + 2: dict( + name='R_Fork', + id=2, + color=[0, 255, 0], + type='upper', + swap='L_Fork' + ), + 3: dict( + name='front_left', + id=3, + color=[51, 153, 255], + type='upper', + swap='front_right' + ), + 4: dict( + name='front_right', + id=4, + color=[51, 153, 255], + type='upper', + swap='front_left' + ), + 5: dict( + name='rear_left', + id=5, + color=[51, 153, 255], + type='upper', + swap='rear_right' + ), + 6: dict( + name='rear_right', + id=6, + color=[51, 153, 255], + type='upper', + swap='rear_left' + ), + 7: dict( + name='tip_left', + id=7, + color=[0, 153, 255], + type='upper', + swap='tip_right' + ), + 8: dict( + name='tip_right', + id=8, + color=[51, 153, 0], + type='upper', + swap='tip_left' + ), + }, + skeleton_info={ + 0: + dict(link=('rear_left', 'rear_right'), id=0, color=[0, 255, 0]), + 1: + dict(link=('front_left', 'front_right'), id=1, color=[0, 255, 0]), + 2: + dict(link=('rear_left', 'front_left'), id=2, color=[0, 255, 0]), + 3: + dict(link=('rear_right', 'front_right'), id=3, color=[0, 255, 0]), + 4: + dict(link=('L_Fork', 'R_Fork'), id=4, color=[255, 128, 0]), + 5: + dict(link=('L_Fork', 'C_Fork'), id=5, color=[255, 128, 0]), + 6: + dict(link=('C_Fork', 'R_Fork'), id=6, color=[255, 128, 0]), + }, + joint_weights=[ + 1., 1., 1., 1., 1., 1., 1.,1.,1., + ], + sigmas=[ + 0.07, 0.06, 0.06, 0.05, 0.05, 0.05, 0.05,0.07,0.07 + ]) diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py index 6f27c0345a..33d258c576 100644 --- a/configs/_base_/default_runtime.py +++ b/configs/_base_/default_runtime.py @@ -5,9 +5,9 @@ timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), - checkpoint=dict(type='CheckpointHook', interval=10), + checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=1, save_best="val/PCK@0.05"), sampler_seed=dict(type='DistSamplerSeedHook'), - visualization=dict(type='PoseVisualizationHook', enable=False), + visualization=dict(type='PoseVisualizationHook', enable=True, interval=1), badcase=dict( type='BadCaseAnalysisHook', enable=False, @@ -32,13 +32,21 @@ # visualizer vis_backends = [ dict(type='LocalVisBackend'), - # dict(type='TensorboardVisBackend'), + dict(type='TensorboardVisBackend') # dict(type='WandbVisBackend'), ] visualizer = dict( type='PoseLocalVisualizer', vis_backends=vis_backends, name='visualizer') # logger +log_config = dict( + interval=25, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ] +) + log_processor = dict( type='LogProcessor', window_size=50, by_epoch=True, num_digits=6) log_level = 'INFO' diff --git a/configs/body_2d_keypoint/topdown_heatmap/coco/label_studio_config.py b/configs/body_2d_keypoint/topdown_heatmap/coco/label_studio_config.py new file mode 100644 index 0000000000..c4a2cf5f9d --- /dev/null +++ b/configs/body_2d_keypoint/topdown_heatmap/coco/label_studio_config.py @@ -0,0 +1,173 @@ +_base_ = ['../../../_base_/default_runtime.py'] + + +num_keypoints =4 # CHECK IT PLZ + +# runtime +train_cfg = dict(max_epochs=300, val_interval=10) + +# optimizer +optim_wrapper = dict( + optimizer=dict( + type='Adam', + lr=5e-4, + ) +) +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=300, + milestones=[200, 250], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=64) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater'), +) + +# custom_hooks = [ + # dict(type='PCKAccuracyTrainHook', interval=10, thr=0.05), +# ] + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=18, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'), + ), + head=dict( + type='HeatmapHead', + in_channels=512, + out_channels=num_keypoints, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = '/dataset' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + # dict(type='RandomHalfBody'), + # TODO: plot + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict( + type='Albumentation', + transforms=[ + dict(type='RandomBrightnessContrast', brightness_limit=[-0.2, 0.2], contrast_limit=[-0.2, 0.2], p=0.4), + + dict( + type='OneOf', + transforms=[ + dict(type='MotionBlur', blur_limit=3, p=0.3), + dict(type='MedianBlur', blur_limit=3, p=0.2), + dict(type='Blur', blur_limit=3, p=0.2), + ], p=0.3), + + dict( + type='OneOf', + transforms=[ + dict(type='GaussNoise', var_limit=(10.0, 50.0), p=0.3), + dict(type='MultiplicativeNoise', multiplier=(0.9, 1.1), p=0.3), + ], p=0.4), + + dict(type='HueSaturationValue', hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.3), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + num_keypoints=num_keypoints, + data_root=data_root, + data_mode=data_mode, + ann_file='coco_annotations.json', + data_prefix=dict(img='train/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + num_keypoints=num_keypoints, + data_root=data_root, + data_mode=data_mode, + ann_file='coco_annotations.json', + bbox_file='', + data_prefix=dict(img='val/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict( + type='CocoMetric', + ann_file=data_root + '/coco_annotations.json' + ), + dict( + type='EPE', + ), + dict( + type='PCKAccuracy', + prefix="5pr_", + ), + dict( + type='PCKAccuracy', + thr=0.1, + prefix="10pr_", + ), + dict( + type='AUC', + ), +] +test_evaluator = val_evaluator diff --git a/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192.py b/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192.py index 7dbe1b43f7..61d3429ec4 100644 --- a/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192.py +++ b/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192.py @@ -1,14 +1,15 @@ _base_ = ['../../../_base_/default_runtime.py'] +num_keypoints = 4 # CHECK IT PLZ + # runtime -train_cfg = dict(max_epochs=210, val_interval=10) +train_cfg = dict(max_epochs=300, val_interval=10) # optimizer optim_wrapper = dict(optimizer=dict( type='Adam', lr=5e-4, )) - # learning policy param_scheduler = [ dict( @@ -17,21 +18,43 @@ dict( type='MultiStepLR', begin=0, - end=210, - milestones=[170, 200], + end=300, + milestones=[100, 200], gamma=0.1, by_epoch=True) ] # automatically scaling LR based on the actual training batch size -auto_scale_lr = dict(base_batch_size=512) +auto_scale_lr = dict(base_batch_size=64) # hooks -default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) +default_hooks = dict( + checkpoint=dict(save_best='val/PCK@0.05', rule='greater'), +) + +# custom_hooks = [ +# dict(type='PCKAccuracyTrainHook', interval=10, thr=0.05), +# ] # codec settings codec = dict( - type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = '/data/rewe_keypoints/' +labels = ['front_left', 'front_right', 'rear_left', 'rear_right'] + +symmetries = [{ + "front_left": "rear_right", + "front_right": "rear_left", + "rear_left": "front_right", + "rear_right": "front_left", +} +] + # model settings model = dict( @@ -43,34 +66,62 @@ bgr_to_rgb=True), backbone=dict( type='ResNet', - depth=50, - init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + depth=18, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'), ), head=dict( type='HeatmapHead', - in_channels=2048, - out_channels=17, - loss=dict(type='KeypointMSELoss', use_target_weight=True), - decoder=codec), + in_channels=512, + out_channels=num_keypoints, + #loss= dict(type='KeypointMSELoss', use_target_weight=True), + loss=dict( + type='OutputSymmetryLoss', + labels=labels, + symmetries=symmetries, + ), + decoder=codec, + labels=labels, + symmetries = symmetries, + ), test_cfg=dict( - flip_test=True, + flip_test=False, flip_mode='heatmap', shift_heatmap=True, )) -# base dataset settings -dataset_type = 'CocoDataset' -data_mode = 'topdown' -data_root = 'data/coco/' + # pipelines train_pipeline = [ dict(type='LoadImage'), dict(type='GetBBoxCenterScale'), dict(type='RandomFlip', direction='horizontal'), - dict(type='RandomHalfBody'), + # dict(type='RandomHalfBody'), + # TODO: plot dict(type='RandomBBoxTransform'), dict(type='TopdownAffine', input_size=codec['input_size']), + dict( + type='Albumentation', + transforms=[ + dict(type='RandomBrightnessContrast', brightness_limit=[-0.2, 0.2], contrast_limit=[-0.2, 0.2], p=0.4), + + dict( + type='OneOf', + transforms=[ + dict(type='MotionBlur', blur_limit=3, p=0.3), + dict(type='MedianBlur', blur_limit=3, p=0.2), + dict(type='Blur', blur_limit=3, p=0.2), + ], p=0.3), + + dict( + type='OneOf', + transforms=[ + dict(type='GaussNoise', var_limit=(10.0, 50.0), p=0.3), + dict(type='MultiplicativeNoise', multiplier=(0.9, 1.1), p=0.3), + ], p=0.4), + + dict(type='HueSaturationValue', hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.3), + ]), dict(type='GenerateTarget', encoder=codec), dict(type='PackPoseInputs') ] @@ -84,38 +135,104 @@ # data loaders train_dataloader = dict( batch_size=64, - num_workers=2, + num_workers=4, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, + labels=labels, data_root=data_root, data_mode=data_mode, - ann_file='annotations/person_keypoints_train2017.json', - data_prefix=dict(img='train2017/'), + ann_file='coco/train.json', + data_prefix=dict(img='images'), pipeline=train_pipeline, )) val_dataloader = dict( - batch_size=32, - num_workers=2, + batch_size=64, + num_workers=4, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), dataset=dict( type=dataset_type, + labels=labels, data_root=data_root, data_mode=data_mode, - ann_file='annotations/person_keypoints_val2017.json', - bbox_file='data/coco/person_detection_results/' - 'COCO_val2017_detections_AP_H_56_person.json', - data_prefix=dict(img='val2017/'), + ann_file='coco/val.json', + bbox_file='', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) + +test_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + labels=labels, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/test.json', + bbox_file='', + data_prefix=dict(img='images/'), test_mode=True, pipeline=val_pipeline, )) -test_dataloader = val_dataloader # evaluators -val_evaluator = dict( - type='CocoMetric', - ann_file=data_root + 'annotations/person_keypoints_val2017.json') -test_evaluator = val_evaluator +val_evaluator = [ + #dict( + # type='CocoMetric', + # ann_file=data_root + "coco/val.json", + #), + dict( + type='PCKAccuracy', + prefix="val", + thr=0.01, + labels=labels, + symmetries=symmetries, + ), + dict( + type='PCKAccuracy', + prefix="val", + thr=0.05, + labels=labels, + symmetries=symmetries, + ), + dict( + type='PCKAccuracy', + thr=0.1, + prefix="val", + labels=labels, + symmetries=symmetries, + ), + ] + +test_evaluator = [ + dict( + type='PCKAccuracy', + prefix="test", + thr=0.01, + labels=labels, + symmetries=symmetries, + ), + dict( + type='PCKAccuracy', + prefix="test", + thr=0.05, + labels=labels, + symmetries=symmetries, + ), + dict( + type='PCKAccuracy', + thr=0.1, + prefix="test", + labels=labels, + symmetries=symmetries, + ), + +] diff --git a/configs/custom/pallet_keypoints.py b/configs/custom/pallet_keypoints.py new file mode 100644 index 0000000000..cb36280774 --- /dev/null +++ b/configs/custom/pallet_keypoints.py @@ -0,0 +1,204 @@ +_base_ = ['../_base_/default_runtime.py'] + +num_keypoints = 4 + +# runtime +train_cfg = dict(max_epochs=300, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=300, + milestones=[200, 250], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=64) + + +# hooks +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=1, save_best='coco/AP', rule='greater'), + sampler_seed=dict(type='DistSamplerSeedHook'), + cdualization=dict(type='PoseVisualizationHook', enable=True), + badcase=dict( + type='BadCaseAnalysisHook', + enable=False, + out_dir='badcase', + metric_type='loss', + badcase_thr=5 + ) +) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(224, 224), heatmap_size=(56, 56), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=18, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'), + ), + head=dict( + type='HeatmapHead', + in_channels=512, + out_channels=num_keypoints, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/new_dataset_14_02/' +work_dir = 'pallet_kp_models/new_dataset_14_02/' +labels = ["top_left", "top_right", "bottom_left", "bottom_right"] + + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + # dict(type='RandomFlip', direction='horizontal'), # TODO: ASK DOES IT NEEDED + dict( + type='RandomBBoxTransform', + rotate_factor=10.0, + rotate_prob=0.6 + ), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type="RandomBottomHalf", threshold=0.4, p=0.5), + dict( + type='Albumentation', + transforms=[ + dict( + type='ColorJitter', + brightness=[0.8, 1.2], + contrast=[0.8, 1.2], + saturation=[0.8, 1.2], + hue=[-0.5, 0.5], + p=0.4 + ), + + dict( + type='OneOf', + transforms=[ + dict(type='MotionBlur', blur_limit=3, p=0.3), + dict(type='MedianBlur', blur_limit=3, p=0.2), + dict(type='Blur', blur_limit=3, p=0.2), + ], p=0.3), + + dict( + type='OneOf', + transforms=[ + dict(type='GaussNoise', var_limit=(10.0, 50.0), p=0.3), + dict(type='MultiplicativeNoise', multiplier=(0.9, 1.1), p=0.3), + ], p=0.4), + + # dict(type='HueSaturationValue', hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.3), # USE WITHOUT TrivialAugmentation + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs'), + dict(type='TorchVisionWrapper', transforms=[ + dict(type='TrivialAugmentWide', num_magnitude_bins=31) + ], save=True), # TODO CHECK SAVING will save 100 images in /mmpose/test + +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='RepeatDataset', + times=2, + dataset=dict( + type=dataset_type, + labels=labels, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + ) + ) +) + +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + labels=labels, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/val.json', + bbox_file='', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) + +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict( + type='CocoMetric', + ann_file=data_root + 'coco/val.json' + ), + dict( + type='EPE', + ), + dict( + type='PCKAccuracy', + prefix="5pr_", + ), + dict( + type='PCKAccuracy', + thr=0.1, + prefix="10pr_", + ), + dict( + type='AUC', + ), +] +test_evaluator = val_evaluator diff --git a/configs/my_res50_coco_640x640.py b/configs/my_res50_coco_640x640.py new file mode 100644 index 0000000000..4aa6475d93 --- /dev/null +++ b/configs/my_res50_coco_640x640.py @@ -0,0 +1,161 @@ +log_level = 'INFO' +load_from = None +resume_from = None +dist_params = dict(backend='nccl') +workflow = [('train', 1)] +checkpoint_config = dict(interval=50) +evaluation = dict(interval=50, metric='mAP', key_indicator='AP') + +optimizer = dict( + type='Adam', + lr=0.001, +) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[400, 460]) +total_epochs = 500 +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) + +channel_cfg = dict( + dataset_joints=6, + dataset_channel=[ + [0, 1, 2, 3, 4, 5], + ], + inference_channel=[0, 1, 2, 3, 4, 5] +) + +data_cfg = dict( + image_size=640, + base_size=320, + base_sigma=2, + heatmap_size=[160], + num_joints=channel_cfg['dataset_joints'], + dataset_channel=channel_cfg['dataset_channel'], + inference_channel=channel_cfg['inference_channel'], + num_scales=1, + scale_aware_sigma=False, +) + +# model settings +model = dict( + type='BottomUp', + pretrained='torchvision://resnet50', + backbone=dict(type='ResNet', depth=50), + keypoint_head=dict( + type='BottomUpSimpleHead', + in_channels=2048, + num_joints=6, + tag_per_joint=True, + with_ae_loss=[True], + loss_keypoint=dict( + type='MultiLossFactory', + num_joints=6, + num_stages=1, + ae_loss_type='exp', + with_ae_loss=[True], + push_loss_factor=[0.001], + pull_loss_factor=[0.001], + with_heatmaps_loss=[True], + heatmaps_loss_factor=[1.0])), + train_cfg=dict( + num_joints=channel_cfg['dataset_joints'], + img_size=data_cfg['image_size']), + test_cfg=dict( + num_joints=channel_cfg['dataset_joints'], + max_num_people=30, + scale_factor=[1], + with_heatmaps=[True], + with_ae=[True], + project2image=True, + nms_kernel=5, + nms_padding=2, + tag_per_joint=True, + detection_threshold=0.1, + tag_threshold=1, + use_detection_val=True, + ignore_too_much=False, + adjust=True, + refine=True, + flip_test=True)) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='BottomUpRandomAffine', + rot_factor=30, + scale_factor=[0.75, 1.5], + scale_type='short', + trans_factor=40), + dict(type='BottomUpRandomFlip', flip_prob=0.5), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict( + type='BottomUpGenerateTarget', + sigma=2, + max_num_people=30, + ), + dict( + type='Collect', + keys=['img', 'joints', 'targets', 'masks'], + meta_keys=[]), +] + +val_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='BottomUpGetImgSize', test_scale_factor=[1]), + dict( + type='BottomUpResizeAlign', + transforms=[ + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'image_file', 'aug_data', 'test_scale_factor', 'base_size', + 'center', 'scale', 'flip_index' + ]), +] + +test_pipeline = val_pipeline + +data_root = 'data/forklift_coco' +data = dict( + samples_per_gpu=4, + workers_per_gpu=1, + train=dict( + type='BottomUpForkliftDataset', + ann_file=f'{data_root}/annotations/forklift_keypoints_train2017.json', + img_prefix=f'{data_root}/train2017/', + data_cfg=data_cfg, + pipeline=train_pipeline), + val=dict( + type='BottomUpForkliftDataset', + ann_file=f'{data_root}/annotations/forklift_keypoints_val2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), + test=dict( + type='BottomUpForkliftDataset', + ann_file=f'{data_root}/annotations/forklift_keypoints_val2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), +) diff --git a/configs/my_top_down_res18_coco_128x96_4KP.py b/configs/my_top_down_res18_coco_128x96_4KP.py new file mode 100644 index 0000000000..3e5da3e603 --- /dev/null +++ b/configs/my_top_down_res18_coco_128x96_4KP.py @@ -0,0 +1,142 @@ +log_level = 'INFO' +load_from = None +resume_from = None +dist_params = dict(backend='nccl') +workflow = [('train', 1)] +checkpoint_config = dict(interval=400) +evaluation = dict(interval=10, metric='mAP', key_indicator='AP') + +optimizer = dict( + type='Adam', + lr=1e-4, +) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[360, 380]) +total_epochs = 400 +log_config = dict( + interval=25, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) + +channel_cfg = dict( + num_output_channels=4, + dataset_joints=4, + dataset_channel=[ + [0, 1, 2, 3], + ], + inference_channel=[ + 0, 1, 2, 3]) + +# model settings +model = dict( + type='TopDown', + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + keypoint_head=dict( + type='TopDownSimpleHead', + in_channels=512, + out_channels=channel_cfg['num_output_channels'], + loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)), + train_cfg=dict(), + test_cfg=dict( + flip_test=True, + post_process='default', + shift_heatmap=True, + modulate_kernel=11)) + +data_cfg = dict( + image_size=[96, 128], + heatmap_size=[24, 32], + num_output_channels=channel_cfg['num_output_channels'], + num_joints=channel_cfg['dataset_joints'], + dataset_channel=channel_cfg['dataset_channel'], + inference_channel=channel_cfg['inference_channel'], + soft_nms=False, + nms_thr=1.0, + oks_thr=0.9, + vis_thr=0.2, + use_gt_bbox=True, + det_bbox_thr=0.0, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', +) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', resize_shape=(800, 600)), + dict(type='TopDownRandomFlip', flip_prob=0.5), + # dict( + # type='TopDownHalfBodyTransform', + # num_joints_half_body=8, + # prob_half_body=0.3), + dict( + type='TopDownGetRandomScaleRotation', rot_factor=180, scale_factor=0.5, rot_prob=0.9), + dict(type='TopDownAffine'), + + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict(type='TopDownGenerateTarget', sigma=3), + dict( + type='Collect', + keys=['img', 'target', 'target_weight'], + meta_keys=[ + 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', + 'rotation', 'bbox_score', 'flip_pairs' + ]), +] + +val_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', resize_shape=(800, 600)), + dict(type='Resize', resize_shape=(1024, 768)), + dict(type='TopDownAffine'), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'image_file', 'center', 'scale', 'rotation', 'bbox_score', + 'flip_pairs' + ]), +] + +test_pipeline = val_pipeline + +data_root = 'data/annotation_tool' +data = dict( + samples_per_gpu=8, + workers_per_gpu=2, + train=dict( + type='TopDownForkliftDataset4KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_train2017.json', + img_prefix=f'{data_root}/train2017/', + data_cfg=data_cfg, + pipeline=train_pipeline), + val=dict( + type='TopDownForkliftDataset4KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_val2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), + test=dict( + type='TopDownForkliftDataset4KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_test2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), +) diff --git a/configs/my_top_down_res18_coco_128x96_pallets.py b/configs/my_top_down_res18_coco_128x96_pallets.py new file mode 100644 index 0000000000..d78b0e52dc --- /dev/null +++ b/configs/my_top_down_res18_coco_128x96_pallets.py @@ -0,0 +1,139 @@ +log_level = 'INFO' +load_from = None +resume_from = None +dist_params = dict(backend='nccl') +workflow = [('train', 1)] +checkpoint_config = dict(interval=200) +evaluation = dict(interval=10, metric='mAP', key_indicator='AP') + +optimizer = dict( + type='Adam', + lr=1e-4, +) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[410, 430]) +total_epochs = 450 +log_config = dict( + interval=25, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) + +channel_cfg = dict( + num_output_channels=4, + dataset_joints=4, + dataset_channel=[ + [0, 1, 2, 3], + ], + inference_channel=[ + 0, 1, 2, 3]) + +# model settings +model = dict( + type='TopDown', + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + keypoint_head=dict( + type='TopDownSimpleHead', + in_channels=512, + out_channels=channel_cfg['num_output_channels'], + loss_keypoint=dict(type='SymmetryLoss', symmetry_group = [[0, 1, 2, 3], [3, 2, 1, 0]])), + train_cfg=dict(), + test_cfg=dict( + flip_test=True, + post_process='default', + shift_heatmap=True, + modulate_kernel=11)) + +data_cfg = dict( + image_size=[96, 128], + heatmap_size=[24, 32], + num_output_channels=channel_cfg['num_output_channels'], + num_joints=channel_cfg['dataset_joints'], + dataset_channel=channel_cfg['dataset_channel'], + inference_channel=channel_cfg['inference_channel'], + soft_nms=False, + nms_thr=1.0, + oks_thr=0.9, + vis_thr=0.2, + use_gt_bbox=True, + det_bbox_thr=0.0, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', +) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownRandomFlip', flip_prob=0.5), + # dict( + # type='TopDownHalfBodyTransform', + # num_joints_half_body=8, + # prob_half_body=0.3), + dict( + type='TopDownGetRandomScaleRotation', rot_factor=180, scale_factor=0.5, rot_prob=0.9), + dict(type='TopDownAffine'), + + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict(type='TopDownGenerateTarget', sigma=3), + dict( + type='Collect', + keys=['img', 'target', 'target_weight'], + meta_keys=[ + 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', + 'rotation', 'bbox_score', 'flip_pairs' + ]), +] + +val_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownAffine'), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'image_file', 'center', 'scale', 'rotation', 'bbox_score', + 'flip_pairs' + ]), +] + +test_pipeline = val_pipeline + +data_root = 'data/pallet3' +data = dict( + samples_per_gpu=8, + workers_per_gpu=2, + train=dict( + type='TopDownForkliftDataset4KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_train2017.json', + img_prefix=f'{data_root}/train2017/', + data_cfg=data_cfg, + pipeline=train_pipeline), + val=dict( + type='TopDownForkliftDataset4KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_val2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), + test=dict( + type='TopDownForkliftDataset4KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_test2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), +) diff --git a/configs/my_top_down_res18_coco_256x192.py b/configs/my_top_down_res18_coco_256x192.py new file mode 100644 index 0000000000..5ad4c7182b --- /dev/null +++ b/configs/my_top_down_res18_coco_256x192.py @@ -0,0 +1,139 @@ +log_level = 'INFO' +load_from = None +resume_from = None +dist_params = dict(backend='nccl') +workflow = [('train', 1)] +checkpoint_config = dict(interval=50) +evaluation = dict(interval=10, metric='mAP', key_indicator='AP') + +optimizer = dict( + type='Adam', + lr=1e-4, +) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[360, 380]) +total_epochs = 400 +log_config = dict( + interval=25, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) + +channel_cfg = dict( + num_output_channels=6, + dataset_joints=6, + dataset_channel=[ + [0, 1, 2, 3, 4, 5], + ], + inference_channel=[ + 0, 1, 2, 3, 4, 5]) + +# model settings +model = dict( + type='TopDown', + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + keypoint_head=dict( + type='TopDownSimpleHead', + in_channels=512, + out_channels=channel_cfg['num_output_channels'], + loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)), + train_cfg=dict(), + test_cfg=dict( + flip_test=True, + post_process='default', + shift_heatmap=True, + modulate_kernel=11)) + +data_cfg = dict( + image_size=[192, 256], + heatmap_size=[48, 64], + num_output_channels=channel_cfg['num_output_channels'], + num_joints=channel_cfg['dataset_joints'], + dataset_channel=channel_cfg['dataset_channel'], + inference_channel=channel_cfg['inference_channel'], + soft_nms=False, + nms_thr=1.0, + oks_thr=0.9, + vis_thr=0.2, + use_gt_bbox=True, + det_bbox_thr=0.0, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', +) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownRandomFlip', flip_prob=0.5), + # dict( + # type='TopDownHalfBodyTransform', + # num_joints_half_body=8, + # prob_half_body=0.3), + dict( + type='TopDownGetRandomScaleRotation', rot_factor=180, scale_factor=0.5, rot_prob=0.9), + dict(type='TopDownAffine'), + + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict(type='TopDownGenerateTarget', sigma=3), + dict( + type='Collect', + keys=['img', 'target', 'target_weight'], + meta_keys=[ + 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', + 'rotation', 'bbox_score', 'flip_pairs' + ]), +] + +val_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownAffine'), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'image_file', 'center', 'scale', 'rotation', 'bbox_score', + 'flip_pairs' + ]), +] + +test_pipeline = val_pipeline + +data_root = 'data/forklift_coco' +data = dict( + samples_per_gpu=8, + workers_per_gpu=2, + train=dict( + type='TopDownForkliftDataset', + ann_file=f'{data_root}/annotations/forklift_keypoints_train2017.json', + img_prefix=f'{data_root}/train2017/', + data_cfg=data_cfg, + pipeline=train_pipeline), + val=dict( + type='TopDownForkliftDataset', + ann_file=f'{data_root}/annotations/forklift_keypoints_val2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), + test=dict( + type='TopDownForkliftDataset', + ann_file=f'{data_root}/annotations/forklift_keypoints_test2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), +) diff --git a/configs/my_top_down_res18_coco_256x192_4KP.py b/configs/my_top_down_res18_coco_256x192_4KP.py new file mode 100644 index 0000000000..d8f84eb0a4 --- /dev/null +++ b/configs/my_top_down_res18_coco_256x192_4KP.py @@ -0,0 +1,139 @@ +log_level = 'INFO' +load_from = None +resume_from = None +dist_params = dict(backend='nccl') +workflow = [('train', 1)] +checkpoint_config = dict(interval=50) +evaluation = dict(interval=10, metric='mAP', key_indicator='AP') + +optimizer = dict( + type='Adam', + lr=1e-4, +) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[360, 380]) +total_epochs = 400 +log_config = dict( + interval=25, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) + +channel_cfg = dict( + num_output_channels=4, + dataset_joints=4, + dataset_channel=[ + [0, 1, 2, 3], + ], + inference_channel=[ + 0, 1, 2, 3]) + +# model settings +model = dict( + type='TopDown', + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + keypoint_head=dict( + type='TopDownSimpleHead', + in_channels=512, + out_channels=channel_cfg['num_output_channels'], + loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)), + train_cfg=dict(), + test_cfg=dict( + flip_test=True, + post_process='default', + shift_heatmap=True, + modulate_kernel=11)) + +data_cfg = dict( + image_size=[192, 256], + heatmap_size=[48, 64], + num_output_channels=channel_cfg['num_output_channels'], + num_joints=channel_cfg['dataset_joints'], + dataset_channel=channel_cfg['dataset_channel'], + inference_channel=channel_cfg['inference_channel'], + soft_nms=False, + nms_thr=1.0, + oks_thr=0.9, + vis_thr=0.2, + use_gt_bbox=True, + det_bbox_thr=0.0, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', +) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownRandomFlip', flip_prob=0.5), + # dict( + # type='TopDownHalfBodyTransform', + # num_joints_half_body=8, + # prob_half_body=0.3), + dict( + type='TopDownGetRandomScaleRotation', rot_factor=180, scale_factor=0.5, rot_prob=0.9), + dict(type='TopDownAffine'), + + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict(type='TopDownGenerateTarget', sigma=3), + dict( + type='Collect', + keys=['img', 'target', 'target_weight'], + meta_keys=[ + 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', + 'rotation', 'bbox_score', 'flip_pairs' + ]), +] + +val_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownAffine'), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'image_file', 'center', 'scale', 'rotation', 'bbox_score', + 'flip_pairs' + ]), +] + +test_pipeline = val_pipeline + +data_root = 'data/annotation_tool' +data = dict( + samples_per_gpu=8, + workers_per_gpu=2, + train=dict( + type='TopDownForkliftDataset4KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_train2017.json', + img_prefix=f'{data_root}/train2017/', + data_cfg=data_cfg, + pipeline=train_pipeline), + val=dict( + type='TopDownForkliftDataset4KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_val2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), + test=dict( + type='TopDownForkliftDataset4KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_test2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), +) diff --git a/configs/my_top_down_res18_coco_256x192_lifted_fork.py b/configs/my_top_down_res18_coco_256x192_lifted_fork.py new file mode 100644 index 0000000000..d77f1ccd50 --- /dev/null +++ b/configs/my_top_down_res18_coco_256x192_lifted_fork.py @@ -0,0 +1,139 @@ +log_level = 'INFO' +load_from = None +resume_from = None +dist_params = dict(backend='nccl') +workflow = [('train', 1)] +checkpoint_config = dict(interval=50) +evaluation = dict(interval=10, metric='mAP', key_indicator='AP') + +optimizer = dict( + type='Adam', + lr=1e-4, +) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[360, 380]) +total_epochs = 400 +log_config = dict( + interval=25, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) + +channel_cfg = dict( + num_output_channels=3, + dataset_joints=3, + dataset_channel=[ + [0, 1, 2], + ], + inference_channel=[ + 0, 1, 2]) + +# model settings +model = dict( + type='TopDown', + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + keypoint_head=dict( + type='TopDownSimpleHead', + in_channels=512, + out_channels=channel_cfg['num_output_channels'], + loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)), + train_cfg=dict(), + test_cfg=dict( + flip_test=True, + post_process='default', + shift_heatmap=True, + modulate_kernel=11)) + +data_cfg = dict( + image_size=[192, 256], + heatmap_size=[48, 64], + num_output_channels=channel_cfg['num_output_channels'], + num_joints=channel_cfg['dataset_joints'], + dataset_channel=channel_cfg['dataset_channel'], + inference_channel=channel_cfg['inference_channel'], + soft_nms=False, + nms_thr=1.0, + oks_thr=0.9, + vis_thr=0.2, + use_gt_bbox=True, + det_bbox_thr=0.0, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', +) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownRandomFlip', flip_prob=0.5), + # dict( + # type='TopDownHalfBodyTransform', + # num_joints_half_body=8, + # prob_half_body=0.3), + dict( + type='TopDownGetRandomScaleRotation', rot_factor=180, scale_factor=0.5, rot_prob=0.9), + dict(type='TopDownAffine'), + + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict(type='TopDownGenerateTarget', sigma=3), + dict( + type='Collect', + keys=['img', 'target', 'target_weight'], + meta_keys=[ + 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', + 'rotation', 'bbox_score', 'flip_pairs' + ]), +] + +val_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownAffine'), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'image_file', 'center', 'scale', 'rotation', 'bbox_score', + 'flip_pairs' + ]), +] + +test_pipeline = val_pipeline + +data_root = 'data/lifted_fork_3p' +data = dict( + samples_per_gpu=8, + workers_per_gpu=2, + train=dict( + type='LiftedForkDataset3KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_train2017.json', + img_prefix=f'{data_root}/train2017/', + data_cfg=data_cfg, + pipeline=train_pipeline), + val=dict( + type='LiftedForkDataset3KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_val2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), + test=dict( + type='LiftedForkDataset3KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_val2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), +) diff --git a/configs/my_top_down_res18_coco_256x192_lifted_fork_combined.py b/configs/my_top_down_res18_coco_256x192_lifted_fork_combined.py new file mode 100644 index 0000000000..2ccb9a4cde --- /dev/null +++ b/configs/my_top_down_res18_coco_256x192_lifted_fork_combined.py @@ -0,0 +1,158 @@ +log_level = 'INFO' +load_from = None +resume_from = None +dist_params = dict(backend='nccl') +workflow = [('train', 1)] +checkpoint_config = dict(interval=50) +evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE', 'mAP', 'NME'], key_indicator='AP') +n = 7 +optimizer = dict( + type='Adam', + lr=1e-4, +) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[300, 350]) +total_epochs = 400 +log_config = dict( + interval=25, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) + +channel_cfg = dict( + num_output_channels=n, + dataset_joints=n, + dataset_channel=[ + list(range(n)), + ], + inference_channel=[ + list(range(n)) +]) + +# model settings +model = dict( + type='TopDown', + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + keypoint_head=dict( + type='TopDownSimpleHead', + in_channels=512, + out_channels=channel_cfg['num_output_channels'], + loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)), + train_cfg=dict(), + test_cfg=dict( + flip_test=True, + post_process='default', + shift_heatmap=True, + modulate_kernel=11)) + +data_cfg = dict( + image_size=[192, 256], + heatmap_size=[48, 64], + num_output_channels=channel_cfg['num_output_channels'], + num_joints=channel_cfg['dataset_joints'], + dataset_channel=channel_cfg['dataset_channel'], + inference_channel=channel_cfg['inference_channel'], + soft_nms=False, + nms_thr=1.0, + oks_thr=0.9, + vis_thr=0.2, + use_gt_bbox=True, + det_bbox_thr=0.0, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', +) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownEnlargeBbox', enlarge_factor=0.1), + dict(type='TopDownRandomFlip', flip_prob=0.5), + dict(type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.1), + dict(type='TopDownAffine'), + dict( + type='Albumentation', + transforms=[ + dict(type='RandomBrightnessContrast', brightness_limit=[-0.2, 0.2], contrast_limit=[-0.2, 0.2], p=0.4), + + dict( + type='OneOf', + transforms=[ + dict(type='MotionBlur', blur_limit=3, p=0.3), + dict(type='MedianBlur', blur_limit=3, p=0.2), + dict(type='Blur', blur_limit=3, p=0.2), + ], p=0.3), + + dict( + type='OneOf', + transforms=[ + dict(type='GaussNoise', var_limit=(10.0, 50.0), p=0.3), + dict(type='MultiplicativeNoise', multiplier=(0.9, 1.1), p=0.3), + ], p=0.4), + + dict(type='HueSaturationValue', hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.4), + ]), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict(type='TopDownGenerateTarget', sigma=2), + dict( + type='Collect', + keys=['img', 'target', 'target_weight'], + meta_keys=[ + 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', + 'rotation', 'bbox_score', 'flip_pairs' + ]), +] + +val_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownEnlargeBbox', enlarge_factor=0.1), + dict(type='TopDownAffine'), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'image_file', 'center', 'scale', 'rotation', 'bbox_score', + 'flip_pairs' + ]), +] + +test_pipeline = val_pipeline + +data_root = 'data/demodesk_lifted_fork_7p' +data = dict( + samples_per_gpu=16, + workers_per_gpu=2, + train=dict( + type='LiftedForkDataset7KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_train2017.json', + img_prefix=f'{data_root}/train2017/', + data_cfg=data_cfg, + pipeline=train_pipeline), + val=dict( + type='LiftedForkDataset7KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_val2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), + test=dict( + type='LiftedForkDataset7KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_val2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), +) diff --git a/configs/my_top_down_res18_coco_256x192_pallets.py b/configs/my_top_down_res18_coco_256x192_pallets.py new file mode 100644 index 0000000000..c6e541500a --- /dev/null +++ b/configs/my_top_down_res18_coco_256x192_pallets.py @@ -0,0 +1,139 @@ +log_level = 'INFO' +load_from = None +resume_from = None +dist_params = dict(backend='nccl') +workflow = [('train', 1)] +checkpoint_config = dict(interval=200) +evaluation = dict(interval=10, metric='mAP', key_indicator='AP') + +optimizer = dict( + type='Adam', + lr=1e-4, +) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[560, 580]) +total_epochs = 600 +log_config = dict( + interval=25, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) + +channel_cfg = dict( + num_output_channels=4, + dataset_joints=4, + dataset_channel=[ + [0, 1, 2, 3], + ], + inference_channel=[ + 0, 1, 2, 3]) + +# model settings +model = dict( + type='TopDown', + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + keypoint_head=dict( + type='TopDownSimpleHead', + in_channels=512, + out_channels=channel_cfg['num_output_channels'], + loss_keypoint=dict(type='SymmetryLoss', symmetry_group = [[0, 1, 2, 3], [3, 2, 1, 0]])), + train_cfg=dict(), + test_cfg=dict( + flip_test=True, + post_process='default', + shift_heatmap=True, + modulate_kernel=11)) + +data_cfg = dict( + image_size=[192, 256], + heatmap_size=[48, 64], + num_output_channels=channel_cfg['num_output_channels'], + num_joints=channel_cfg['dataset_joints'], + dataset_channel=channel_cfg['dataset_channel'], + inference_channel=channel_cfg['inference_channel'], + soft_nms=False, + nms_thr=1.0, + oks_thr=0.9, + vis_thr=0.2, + use_gt_bbox=True, + det_bbox_thr=0.0, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', +) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownRandomFlip', flip_prob=0.5), + # dict( + # type='TopDownHalfBodyTransform', + # num_joints_half_body=8, + # prob_half_body=0.3), + dict( + type='TopDownGetRandomScaleRotation', rot_factor=180, scale_factor=0.5, rot_prob=0.9), + dict(type='TopDownAffine'), + + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict(type='TopDownGenerateTarget', sigma=3), + dict( + type='Collect', + keys=['img', 'target', 'target_weight'], + meta_keys=[ + 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', + 'rotation', 'bbox_score', 'flip_pairs' + ]), +] + +val_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownAffine'), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'image_file', 'center', 'scale', 'rotation', 'bbox_score', + 'flip_pairs' + ]), +] + +test_pipeline = val_pipeline + +data_root = 'data/gazebo' +data = dict( + samples_per_gpu=8, + workers_per_gpu=2, + train=dict( + type='TopDownForkliftDataset4KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_train2017.json', + img_prefix=f'{data_root}/train2017/', + data_cfg=data_cfg, + pipeline=train_pipeline), + val=dict( + type='TopDownForkliftDataset4KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_val2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), + test=dict( + type='TopDownForkliftDataset4KP', + ann_file=f'{data_root}/annotations/forklift_keypoints_test2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), +) diff --git a/configs/my_top_down_res50_coco_384x288.py b/configs/my_top_down_res50_coco_384x288.py new file mode 100644 index 0000000000..59b6865248 --- /dev/null +++ b/configs/my_top_down_res50_coco_384x288.py @@ -0,0 +1,139 @@ +log_level = 'INFO' +load_from = None +resume_from = None +dist_params = dict(backend='nccl') +workflow = [('train', 1)] +checkpoint_config = dict(interval=50) +evaluation = dict(interval=10, metric='mAP', key_indicator='AP') + +optimizer = dict( + type='Adam', + lr=1e-4, +) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[360, 380]) +total_epochs = 400 +log_config = dict( + interval=25, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) + +channel_cfg = dict( + num_output_channels=6, + dataset_joints=6, + dataset_channel=[ + [0, 1, 2, 3, 4, 5], + ], + inference_channel=[ + 0, 1, 2, 3, 4, 5]) + +# model settings +model = dict( + type='TopDown', + pretrained='torchvision://resnet50', + backbone=dict(type='ResNet', depth=50), + keypoint_head=dict( + type='TopDownSimpleHead', + in_channels=2048, + out_channels=channel_cfg['num_output_channels'], + loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)), + train_cfg=dict(), + test_cfg=dict( + flip_test=True, + post_process='default', + shift_heatmap=True, + modulate_kernel=11)) + +data_cfg = dict( + image_size=[288, 384], + heatmap_size=[72, 96], + num_output_channels=channel_cfg['num_output_channels'], + num_joints=channel_cfg['dataset_joints'], + dataset_channel=channel_cfg['dataset_channel'], + inference_channel=channel_cfg['inference_channel'], + soft_nms=False, + nms_thr=1.0, + oks_thr=0.9, + vis_thr=0.2, + use_gt_bbox=True, + det_bbox_thr=0.0, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', +) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownRandomFlip', flip_prob=0.5), + # dict( + # type='TopDownHalfBodyTransform', + # num_joints_half_body=8, + # prob_half_body=0.3), + dict( + type='TopDownGetRandomScaleRotation', rot_factor=180, scale_factor=0.5, rot_prob=0.9), + dict(type='TopDownAffine'), + + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict(type='TopDownGenerateTarget', sigma=3), + dict( + type='Collect', + keys=['img', 'target', 'target_weight'], + meta_keys=[ + 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', + 'rotation', 'bbox_score', 'flip_pairs' + ]), +] + +val_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='TopDownAffine'), + dict(type='ToTensor'), + dict( + type='NormalizeTensor', + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'image_file', 'center', 'scale', 'rotation', 'bbox_score', + 'flip_pairs' + ]), +] + +test_pipeline = val_pipeline + +data_root = 'data/forklift_coco' +data = dict( + samples_per_gpu=8, + workers_per_gpu=2, + train=dict( + type='TopDownForkliftDataset', + ann_file=f'{data_root}/annotations/forklift_keypoints_train2017.json', + img_prefix=f'{data_root}/train2017/', + data_cfg=data_cfg, + pipeline=train_pipeline), + val=dict( + type='TopDownForkliftDataset', + ann_file=f'{data_root}/annotations/forklift_keypoints_val2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), + test=dict( + type='TopDownForkliftDataset', + ann_file=f'{data_root}/annotations/forklift_keypoints_test2017.json', + img_prefix=f'{data_root}/val2017/', + data_cfg=data_cfg, + pipeline=val_pipeline), +) diff --git a/demo/top_down_image_demo_full_frame_without_det.py b/demo/top_down_image_demo_full_frame_without_det.py new file mode 100644 index 0000000000..09146fbade --- /dev/null +++ b/demo/top_down_image_demo_full_frame_without_det.py @@ -0,0 +1,109 @@ +import os +from argparse import ArgumentParser + +import cv2 +import numpy as np + +from mmpose.apis import (inference_top_down_pose_model, init_pose_model, + vis_pose_result) + + +def main(): + """Visualize the demo images. + + Using mmdet to detect the human. + """ + parser = ArgumentParser() + parser.add_argument('pose_config', help='Config file for pose') + parser.add_argument('pose_checkpoint', help='Checkpoint file for pose') + parser.add_argument('--img-root', type=str, help='image root') + parser.add_argument( + '--show', + action='store_true', + default=False, + help='whether to show visualizations.') + parser.add_argument( + '--out-img-root', + default='', + help='Root of the output video file. ' + 'Default not saving the visualization video.') + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + parser.add_argument( + '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold') + + args = parser.parse_args() + + assert (args.show or args.out_img_root != '') + # build the pose model from a config file and a checkpoint file + pose_model = init_pose_model( + args.pose_config, args.pose_checkpoint, device=args.device.lower()) + + dataset = pose_model.cfg.data['test']['type'] + + #cap = cv2.VideoCapture(args.video_path) + #assert cap.isOpened(), f'Faild to load video file {args.video_path}' + + #fps = cap.get(cv2.CAP_PROP_FPS) + #size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), + # int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) + + #if args.out_video_root == '': + # save_out_video = False + #else: + os.makedirs(args.out_img_root, exist_ok=True) + # save_out_video = True + + #if save_out_video: + # fourcc = cv2.VideoWriter_fourcc(*'mp4v') + # videoWriter = cv2.VideoWriter( + # os.path.join(args.out_video_root, + # f'vis_{os.path.basename(args.video_path)}'), fourcc, + # fps, size) + + # optional + return_heatmap = False + + # e.g. use ('backbone', ) to return backbone feature + output_layer_names = None + + # while (cap.isOpened()): + # flag, img = cap.read() + # if not flag: + # break + assert os.path.exists(args.img_root) + for img_name in os.listdir(args.img_root): + img_path = os.path.join(args.img_root, img_name) + img = cv2.imread(img_path) + size = img.shape[1], img.shape[0] + # keep the person class bounding boxes. + person_results = [{'bbox': np.array([0, 0, size[0], size[1]])}] + + # test a single image, with a list of bboxes. + pose_results, returned_outputs = inference_top_down_pose_model( + pose_model, + img, + person_results, + format='xyxy', + dataset=dataset, + return_heatmap=return_heatmap, + outputs=output_layer_names) + + # show the results + vis_img = vis_pose_result( + pose_model, + img, + pose_results, + dataset=dataset, + kpt_score_thr=args.kpt_thr, + show=False) + + if args.show: + cv2.imshow('Image', vis_img) + write_path = os.path.join(args.out_img_root, img_name) + cv2.imwrite(write_path ,vis_img) + + + +if __name__ == '__main__': + main() diff --git a/docker/Dockerfile b/docker/Dockerfile index 064b803979..903e7943a6 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -32,3 +32,6 @@ RUN git checkout main ENV FORCE_CUDA="1" RUN pip install -r requirements/build.txt RUN pip install --no-cache-dir -e . +RUN mim install "mmdet>=3.1.0" +RUN pip install future tensorboard albumentations +RUN pip install setuptools==59.5.0 diff --git a/docker/Dockerfile_aws b/docker/Dockerfile_aws new file mode 100644 index 0000000000..a02dd76f66 --- /dev/null +++ b/docker/Dockerfile_aws @@ -0,0 +1,37 @@ +ARG PYTORCH="1.9.0" +ARG CUDA="11.1" +ARG CUDNN="8" + +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel + +ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX 8.6" +ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" +ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" + +# To fix GPG key error when running apt-get update +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub + +RUN apt-get update && apt-get install -y git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx\ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install xtcocotools +RUN pip install cython +RUN pip install xtcocotools + +# Install MMEngine and MMCV +RUN pip install openmim +RUN mim install mmengine "mmcv>=2.0.0" + +# Install MMPose +RUN conda clean --all +RUN git clone https://github.com/open-mmlab/mmpose.git /mmpose +WORKDIR /mmpose +RUN git checkout main +ENV FORCE_CUDA="1" +RUN pip install -r requirements/build.txt +RUN pip install --no-cache-dir -e . +RUN mim install "mmdet>=3.1.0" +RUN pip install future tensorboard albumentations +RUN pip install setuptools==59.5.0 diff --git a/docker/LabelStudio.Dockerfile b/docker/LabelStudio.Dockerfile new file mode 100644 index 0000000000..19b4a39d05 --- /dev/null +++ b/docker/LabelStudio.Dockerfile @@ -0,0 +1,47 @@ +# It is important that cuda supports the video card architectures that are important to us: +# NVIDIA GeForce RTX 3060 / RTX 3060 Ti - sm_86 +# NVIDIA GeForce RTX 2080 Ti - sm_75 +# NVIDIA A100-SXM4-40GB - sm_80 + +ARG PYTORCH="1.10.0" +ARG CUDA="11.3" +ARG CUDNN="8" + +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel + +ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX 8.6" +ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" +ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" + +# To fix GPG key error when running apt-get update +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub + +RUN apt-get update && apt-get install -y \ + git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install xtcocotools +RUN pip install cython +RUN pip install xtcocotools + +# Install MMEngine and MMCV +RUN pip install openmim +RUN mim install mmengine "mmcv>=2.0.0" + +# Install MMPose +RUN conda clean --all +RUN git clone https://github.com/logivations/mmpose.git /mmpose + +# Checkout to branch (TODO: Remove after merge) +WORKDIR /mmpose +RUN git pull + +# Install requirements +ENV FORCE_CUDA="1" +RUN pip install -r requirements/build.txt +RUN pip install --no-cache-dir -e . +RUN mim install "mmdet>=3.1.0" +RUN pip install future tensorboard albumentations +RUN pip install setuptools==59.5.0 diff --git a/docker/MMDeploy.Dockerfile b/docker/MMDeploy.Dockerfile new file mode 100644 index 0000000000..d0e8a3d2e3 --- /dev/null +++ b/docker/MMDeploy.Dockerfile @@ -0,0 +1,18 @@ +FROM openmmlab/mmdeploy:ubuntu20.04-cuda11.8-mmdeploy1.3.1 + +# previous method of local installation fails so fallback to the official method +RUN pip install -U openmim + +RUN mim install mmsegmentation +RUN python3 -m pip install ftfy regex + +RUN git clone https://github.com/logivations/mmsegmentation.git /mmsegmentation + +#MMPretrain +RUN mim install git+https://github.com/logivations/mmpretrain.git +RUN git clone https://github.com/logivations/mmpretrain.git /mmpretrain + +RUN mim install mmpose +RUN git clone https://github.com/logivations/mmpose.git /mmpose + +WORKDIR /root/workspace/mmdeploy diff --git a/inference.sh b/inference.sh new file mode 100644 index 0000000000..e74e80c569 --- /dev/null +++ b/inference.sh @@ -0,0 +1,4 @@ +python demo/top_down_image_demo_full_frame_without_det.py configs/my_top_down_res18_coco_256x192_lifted_fork.py \ + /data/mmpose/work_dirs/my_top_down_res18_coco_256x192_lifted_fork/epoch_400.pth \ + --img-root /data/stef/20230607_lifted_fork/crops_test \ + --out-img-root /data/stef/20230607_lifted_fork/crops_test_inference diff --git a/mmpose/configs/_base_/default_runtime.py b/mmpose/configs/_base_/default_runtime.py index 349ecf4b17..16e4fb92b3 100644 --- a/mmpose/configs/_base_/default_runtime.py +++ b/mmpose/configs/_base_/default_runtime.py @@ -14,7 +14,7 @@ timer=dict(type=IterTimerHook), logger=dict(type=LoggerHook, interval=50), param_scheduler=dict(type=ParamSchedulerHook), - checkpoint=dict(type=CheckpointHook, interval=10), + checkpoint=dict(type=CheckpointHook, interval=10, save_best="val_5pr"), sampler_seed=dict(type=DistSamplerSeedHook), visualization=dict(type=PoseVisualizationHook, enable=False), ) diff --git a/mmpose/datasets/datasets/base/base_coco_style_dataset.py b/mmpose/datasets/datasets/base/base_coco_style_dataset.py index 9a223984e0..34662a3300 100644 --- a/mmpose/datasets/datasets/base/base_coco_style_dataset.py +++ b/mmpose/datasets/datasets/base/base_coco_style_dataset.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import copy +import os.path import os.path as osp from copy import deepcopy from itertools import chain, filterfalse, groupby @@ -235,12 +236,14 @@ def _load_annotations(self) -> Tuple[List[dict], List[dict]]: if img_id % self.sample_interval != 0: continue img = self.coco.loadImgs(img_id)[0] - img.update({ - 'img_id': - img_id, - 'img_path': - osp.join(self.data_prefix['img'], img['file_name']), - }) + img_path = osp.join(self.data_prefix['img'], img['file_name']) + if os.path.exists(img_path): + img.update({ + 'img_id': img_id, + 'img_path': img_path, + }) + else: + continue image_list.append(img) ann_ids = self.coco.getAnnIds(imgIds=img_id) @@ -450,25 +453,27 @@ def _load_detection_results(self) -> List[dict]: img = self.coco.loadImgs(det['image_id'])[0] img_path = osp.join(self.data_prefix['img'], img['file_name']) - bbox_xywh = np.array( - det['bbox'][:4], dtype=np.float32).reshape(1, 4) - bbox = bbox_xywh2xyxy(bbox_xywh) - bbox_score = np.array(det['score'], dtype=np.float32).reshape(1) - - # use dummy keypoint location and visibility - keypoints = np.zeros((1, num_keypoints, 2), dtype=np.float32) - keypoints_visible = np.ones((1, num_keypoints), dtype=np.float32) - - data_list.append({ - 'img_id': det['image_id'], - 'img_path': img_path, - 'img_shape': (img['height'], img['width']), - 'bbox': bbox, - 'bbox_score': bbox_score, - 'keypoints': keypoints, - 'keypoints_visible': keypoints_visible, - 'id': id_, - }) + if os.path.exists(img_path): + bbox_xywh = np.array( + det['bbox'][:4], dtype=np.float32).reshape(1, 4) + bbox = bbox_xywh2xyxy(bbox_xywh) + bbox_score = np.array(det['score'], dtype=np.float32).reshape(1) + + # use dummy keypoint location and visibility + keypoints = np.zeros((1, num_keypoints, 2), dtype=np.float32) + keypoints_visible = np.ones((1, num_keypoints), dtype=np.float32) + data_list.append({ + 'img_id': det['image_id'], + 'img_path': img_path, + 'img_shape': (img['height'], img['width']), + 'bbox': bbox, + 'bbox_score': bbox_score, + 'keypoints': keypoints, + 'keypoints_visible': keypoints_visible, + 'id': id_, + }) + else: + continue id_ += 1 diff --git a/mmpose/datasets/datasets/body/coco_dataset.py b/mmpose/datasets/datasets/body/coco_dataset.py index 7cc971f91f..49bedc96a3 100644 --- a/mmpose/datasets/datasets/body/coco_dataset.py +++ b/mmpose/datasets/datasets/body/coco_dataset.py @@ -1,35 +1,45 @@ # Copyright (c) OpenMMLab. All rights reserved. +import os +import random +from typing import Callable, List, Sequence from mmpose.registry import DATASETS from ..base import BaseCocoStyleDataset -@DATASETS.register_module() -class CocoDataset(BaseCocoStyleDataset): - """COCO dataset for pose estimation. +def generate_simple_dataset_info(labels: list) -> dict: + dataset_info = dict( + dataset_name='coco', + paper_info=dict( + author='', + title=f'Auto: {len(labels)} keypoints', + container='', + year='', + homepage='', + ), + keypoint_info={}, + skeleton_info={}, + joint_weights=[1.0] * len(labels), + sigmas=[0.05] * len(labels) + ) - "Microsoft COCO: Common Objects in Context", ECCV'2014. - More details can be found in the `paper - `__ . + # Generate keypoint_info + last_label = labels[-1] + for idx, label in enumerate(labels): + dataset_info['keypoint_info'][idx] = dict( + name=label, + id=idx, + color=[random.randint(0, 255) for _ in range(3)], # Random RGB color + type='upper', + swap=last_label + ) + last_label = label - COCO keypoints:: + return dataset_info - 0: 'nose', - 1: 'left_eye', - 2: 'right_eye', - 3: 'left_ear', - 4: 'right_ear', - 5: 'left_shoulder', - 6: 'right_shoulder', - 7: 'left_elbow', - 8: 'right_elbow', - 9: 'left_wrist', - 10: 'right_wrist', - 11: 'left_hip', - 12: 'right_hip', - 13: 'left_knee', - 14: 'right_knee', - 15: 'left_ankle', - 16: 'right_ankle' + +@DATASETS.register_module() +class CocoDataset(BaseCocoStyleDataset): + """COCO dataset for keypoints estimation. Args: ann_file (str): Annotation file path. Default: ''. @@ -69,4 +79,20 @@ class CocoDataset(BaseCocoStyleDataset): image. Default: 1000. """ - METAINFO: dict = dict(from_file='configs/_base_/datasets/coco.py') + def __init__(self, labels, *args, **kwargs): + self.default_config = 'configs/_base_/datasets/coco.py' + super().__init__(metainfo=self.get_dataset_info(labels), *args, **kwargs) + + def get_dataset_info(self, labels: list) -> dict: + print(f"Building dataset for labels: {labels}") + if not isinstance(labels, list): + print(f"Please specify labels in CocoDataset, used dataset config: {self.default_config}") + return dict(from_file=self.default_config) + + dataset_info_path = f'configs/_base_/datasets/coco_{len(labels)}kp.py' + if os.path.exists(dataset_info_path): + print(f"Found custom dataset config: {dataset_info_path}") + return dict(from_file=dataset_info_path) + + print(f"Dataset config not found, trying to generate automatically...") + return generate_simple_dataset_info(labels) diff --git a/mmpose/datasets/datasets/bottom_up/bottom_up_forklift.py b/mmpose/datasets/datasets/bottom_up/bottom_up_forklift.py new file mode 100644 index 0000000000..5fe3750355 --- /dev/null +++ b/mmpose/datasets/datasets/bottom_up/bottom_up_forklift.py @@ -0,0 +1,352 @@ +import os +from collections import OrderedDict, defaultdict + +import json_tricks as json +import numpy as np +import xtcocotools +from xtcocotools.coco import COCO +from xtcocotools.cocoeval import COCOeval + +from mmpose.datasets.builder import DATASETS +from .bottom_up_base_dataset import BottomUpBaseDataset + + +@DATASETS.register_module() +class BottomUpForkliftDataset(BottomUpBaseDataset): + """COCO dataset for bottom-up pose estimation. + + The dataset loads raw features and apply specified transforms + to return a dict containing the image tensors and other information. + + COCO keypoint indexes:: + + 0: 'nose', + 1: 'left_eye', + 2: 'right_eye', + 3: 'left_ear', + 4: 'right_ear', + 5: 'left_shoulder', + 6: 'right_shoulder', + 7: 'left_elbow', + 8: 'right_elbow', + 9: 'left_wrist', + 10: 'right_wrist', + 11: 'left_hip', + 12: 'right_hip', + 13: 'left_knee', + 14: 'right_knee', + 15: 'left_ankle', + 16: 'right_ankle' + + Args: + ann_file (str): Path to the annotation file. + img_prefix (str): Path to a directory where images are held. + Default: None. + data_cfg (dict): config + pipeline (list[dict | callable]): A sequence of data transforms. + test_mode (bool): Store True when building test or + validation dataset. Default: False. + """ + + def __init__(self, + ann_file, + img_prefix, + data_cfg, + pipeline, + test_mode=False): + super().__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode) + + self.ann_info['flip_index'] = [ + 1, 0, 3, 2, 5, 4 + ] + + self.ann_info['use_different_joint_weights'] = False + self.ann_info['joint_weights'] = np.array( + [ + 1., 1., 1., 1., 1., 1., + ], + dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) + + # 'https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/' + # 'pycocotools/cocoeval.py#L523' + self.sigmas = np.array([1.] * 6) / 10.0 + + self.coco = COCO(ann_file) + + cats = [ + cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) + ] + self.classes = ['__background__'] + cats + self.num_classes = len(self.classes) + self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) + self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) + self._coco_ind_to_class_ind = dict( + (self._class_to_coco_ind[cls], self._class_to_ind[cls]) + for cls in self.classes[1:]) + self.img_ids = self.coco.getImgIds() + if not test_mode: + self.img_ids = [ + img_id for img_id in self.img_ids + if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 + ] + self.num_images = len(self.img_ids) + self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) + self.dataset_name = 'coco' + + print(f'=> num_images: {self.num_images}') + + @staticmethod + def _get_mapping_id_name(imgs): + """ + Args: + imgs (dict): dict of image info. + + Returns: + tuple: Image name & id mapping dicts. + + - id2name (dict): Mapping image id to name. + - name2id (dict): Mapping image name to id. + """ + id2name = {} + name2id = {} + for image_id, image in imgs.items(): + file_name = image['file_name'] + id2name[image_id] = file_name + name2id[file_name] = image_id + + return id2name, name2id + + def _get_single(self, idx): + """Get anno for a single image. + + Args: + idx (int): image idx + + Returns: + dict: info for model training + """ + coco = self.coco + img_id = self.img_ids[idx] + ann_ids = coco.getAnnIds(imgIds=img_id) + anno = coco.loadAnns(ann_ids) + + mask = self._get_mask(anno, idx) + anno = [ + obj for obj in anno + if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0 + ] + + joints = self._get_joints(anno) + mask_list = [mask.copy() for _ in range(self.ann_info['num_scales'])] + joints_list = [ + joints.copy() for _ in range(self.ann_info['num_scales']) + ] + + db_rec = {} + db_rec['dataset'] = self.dataset_name + db_rec['image_file'] = os.path.join(self.img_prefix, + self.id2name[img_id]) + db_rec['mask'] = mask_list + db_rec['joints'] = joints_list + + return db_rec + + def _get_joints(self, anno): + """Get joints for all people in an image.""" + num_people = len(anno) + + if self.ann_info['scale_aware_sigma']: + joints = np.zeros((num_people, self.ann_info['num_joints'], 4), + dtype=np.float32) + else: + joints = np.zeros((num_people, self.ann_info['num_joints'], 3), + dtype=np.float32) + + for i, obj in enumerate(anno): + joints[i, :self.ann_info['num_joints'], :3] = \ + np.array(obj['keypoints']).reshape([-1, 3]) + if self.ann_info['scale_aware_sigma']: + # get person box + box = obj['bbox'] + size = max(box[2], box[3]) + sigma = size / self.base_size * self.base_sigma + if self.int_sigma: + sigma = int(np.ceil(sigma)) + assert sigma > 0, sigma + joints[i, :, 3] = sigma + + return joints + + def _get_mask(self, anno, idx): + """Get ignore masks to mask out losses.""" + coco = self.coco + img_info = coco.loadImgs(self.img_ids[idx])[0] + + m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32) + + for obj in anno: + if 'segmentation' in obj: + if obj['iscrowd']: + rle = xtcocotools.mask.frPyObjects(obj['segmentation'], + img_info['height'], + img_info['width']) + m += xtcocotools.mask.decode(rle) + elif obj['num_keypoints'] == 0: + rles = xtcocotools.mask.frPyObjects( + obj['segmentation'], img_info['height'], + img_info['width']) + for rle in rles: + m += xtcocotools.mask.decode(rle) + + return m < 0.5 + + def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): + """Evaluate coco keypoint results. The pose prediction results will be + saved in `${res_folder}/result_keypoints.json`. + + Note: + num_people: P + num_keypoints: K + + Args: + outputs (list(preds, scores, image_path, heatmap)): + + * preds (list[np.ndarray(P, K, 3+tag_num)]): + Pose predictions for all people in images. + * scores (list[P]): + * image_path (list[str]): For example, ['coco/images/ + val2017/000000397133.jpg'] + * heatmap (np.ndarray[N, K, H, W]): model outputs. + + res_folder (str): Path of directory to save the results. + metric (str | list[str]): Metric to be performed. Defaults: 'mAP'. + + Returns: + dict: Evaluation results for evaluation metric. + """ + metrics = metric if isinstance(metric, list) else [metric] + allowed_metrics = ['mAP'] + for metric in metrics: + if metric not in allowed_metrics: + raise KeyError(f'metric {metric} is not supported') + + res_file = os.path.join(res_folder, 'result_keypoints.json') + + preds = [] + scores = [] + image_paths = [] + + for output in outputs: + preds.append(output['preds']) + scores.append(output['scores']) + image_paths.append(output['image_paths'][0]) + + kpts = defaultdict(list) + # iterate over images + for idx, _preds in enumerate(preds): + str_image_path = image_paths[idx] + image_id = self.name2id[os.path.basename(str_image_path)] + # iterate over people + for idx_person, kpt in enumerate(_preds): + # use bbox area + area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * ( + np.max(kpt[:, 1]) - np.min(kpt[:, 1])) + + kpts[image_id].append({ + 'keypoints': kpt[:, 0:3], + 'score': scores[idx][idx_person], + 'tags': kpt[:, 3], + 'image_id': image_id, + 'area': area, + }) + + oks_nmsed_kpts = [] + for img in kpts.keys(): + img_kpts = kpts[img] + keep = [] + if len(keep) == 0: + oks_nmsed_kpts.append(img_kpts) + else: + oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) + + self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) + + info_str = self._do_python_keypoint_eval(res_file) + name_value = OrderedDict(info_str) + return name_value + + def _write_coco_keypoint_results(self, keypoints, res_file): + """Write results into a json file.""" + data_pack = [{ + 'cat_id': self._class_to_coco_ind[cls], + 'cls_ind': cls_ind, + 'cls': cls, + 'ann_type': 'keypoints', + 'keypoints': keypoints + } for cls_ind, cls in enumerate(self.classes) + if not cls == '__background__'] + + results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) + + with open(res_file, 'w') as f: + json.dump(results, f, sort_keys=True, indent=4) + + def _coco_keypoint_results_one_category_kernel(self, data_pack): + """Get coco keypoint results.""" + cat_id = data_pack['cat_id'] + keypoints = data_pack['keypoints'] + cat_results = [] + + for img_kpts in keypoints: + if len(img_kpts) == 0: + continue + + _key_points = np.array( + [img_kpt['keypoints'] for img_kpt in img_kpts]) + key_points = _key_points.reshape(-1, + self.ann_info['num_joints'] * 3) + + for img_kpt, key_point in zip(img_kpts, key_points): + kpt = key_point.reshape((self.ann_info['num_joints'], 3)) + left_top = np.amin(kpt, axis=0) + right_bottom = np.amax(kpt, axis=0) + + w = right_bottom[0] - left_top[0] + h = right_bottom[1] - left_top[1] + + cat_results.append({ + 'image_id': img_kpt['image_id'], + 'category_id': cat_id, + 'keypoints': key_point.tolist(), + 'score': img_kpt['score'], + 'bbox': [left_top[0], left_top[1], w, h] + }) + + return cat_results + + def _do_python_keypoint_eval(self, res_file): + """Keypoint evaluation using COCOAPI.""" + + stats_names = [ + 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', + 'AR .75', 'AR (M)', 'AR (L)' + ] + + with open(res_file, 'r') as file: + res_json = json.load(file) + if not res_json: + info_str = list(zip(stats_names, [ + 0, + ] * len(stats_names))) + return info_str + + coco_det = self.coco.loadRes(res_file) + coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas) + coco_eval.params.useSegm = None + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + info_str = list(zip(stats_names, coco_eval.stats)) + + return info_str diff --git a/mmpose/datasets/datasets/bottom_up/bottom_up_forklift4kp.py b/mmpose/datasets/datasets/bottom_up/bottom_up_forklift4kp.py new file mode 100644 index 0000000000..93f9a23674 --- /dev/null +++ b/mmpose/datasets/datasets/bottom_up/bottom_up_forklift4kp.py @@ -0,0 +1,352 @@ +import os +from collections import OrderedDict, defaultdict + +import json_tricks as json +import numpy as np +import xtcocotools +from xtcocotools.coco import COCO +from xtcocotools.cocoeval import COCOeval + +from mmpose.datasets.builder import DATASETS +from .bottom_up_base_dataset import BottomUpBaseDataset + + +@DATASETS.register_module() +class BottomUpForkliftDataset4KP(BottomUpBaseDataset): + """COCO dataset for bottom-up pose estimation. + + The dataset loads raw features and apply specified transforms + to return a dict containing the image tensors and other information. + + COCO keypoint indexes:: + + 0: 'nose', + 1: 'left_eye', + 2: 'right_eye', + 3: 'left_ear', + 4: 'right_ear', + 5: 'left_shoulder', + 6: 'right_shoulder', + 7: 'left_elbow', + 8: 'right_elbow', + 9: 'left_wrist', + 10: 'right_wrist', + 11: 'left_hip', + 12: 'right_hip', + 13: 'left_knee', + 14: 'right_knee', + 15: 'left_ankle', + 16: 'right_ankle' + + Args: + ann_file (str): Path to the annotation file. + img_prefix (str): Path to a directory where images are held. + Default: None. + data_cfg (dict): config + pipeline (list[dict | callable]): A sequence of data transforms. + test_mode (bool): Store True when building test or + validation dataset. Default: False. + """ + + def __init__(self, + ann_file, + img_prefix, + data_cfg, + pipeline, + test_mode=False): + super().__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode) + + self.ann_info['flip_index'] = [ + 1, 0, 3, 2, + ] + + self.ann_info['use_different_joint_weights'] = False + self.ann_info['joint_weights'] = np.array( + [ + 1., 1., 1., 1., + ], + dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) + + # 'https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/' + # 'pycocotools/cocoeval.py#L523' + self.sigmas = np.array([1.] * 4) / 10.0 + + self.coco = COCO(ann_file) + + cats = [ + cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) + ] + self.classes = ['__background__'] + cats + self.num_classes = len(self.classes) + self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) + self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) + self._coco_ind_to_class_ind = dict( + (self._class_to_coco_ind[cls], self._class_to_ind[cls]) + for cls in self.classes[1:]) + self.img_ids = self.coco.getImgIds() + if not test_mode: + self.img_ids = [ + img_id for img_id in self.img_ids + if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 + ] + self.num_images = len(self.img_ids) + self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) + self.dataset_name = 'coco' + + print(f'=> num_images: {self.num_images}') + + @staticmethod + def _get_mapping_id_name(imgs): + """ + Args: + imgs (dict): dict of image info. + + Returns: + tuple: Image name & id mapping dicts. + + - id2name (dict): Mapping image id to name. + - name2id (dict): Mapping image name to id. + """ + id2name = {} + name2id = {} + for image_id, image in imgs.items(): + file_name = image['file_name'] + id2name[image_id] = file_name + name2id[file_name] = image_id + + return id2name, name2id + + def _get_single(self, idx): + """Get anno for a single image. + + Args: + idx (int): image idx + + Returns: + dict: info for model training + """ + coco = self.coco + img_id = self.img_ids[idx] + ann_ids = coco.getAnnIds(imgIds=img_id) + anno = coco.loadAnns(ann_ids) + + mask = self._get_mask(anno, idx) + anno = [ + obj for obj in anno + if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0 + ] + + joints = self._get_joints(anno) + mask_list = [mask.copy() for _ in range(self.ann_info['num_scales'])] + joints_list = [ + joints.copy() for _ in range(self.ann_info['num_scales']) + ] + + db_rec = {} + db_rec['dataset'] = self.dataset_name + db_rec['image_file'] = os.path.join(self.img_prefix, + self.id2name[img_id]) + db_rec['mask'] = mask_list + db_rec['joints'] = joints_list + + return db_rec + + def _get_joints(self, anno): + """Get joints for all people in an image.""" + num_people = len(anno) + + if self.ann_info['scale_aware_sigma']: + joints = np.zeros((num_people, self.ann_info['num_joints'], 4), + dtype=np.float32) + else: + joints = np.zeros((num_people, self.ann_info['num_joints'], 3), + dtype=np.float32) + + for i, obj in enumerate(anno): + joints[i, :self.ann_info['num_joints'], :3] = \ + np.array(obj['keypoints']).reshape([-1, 3]) + if self.ann_info['scale_aware_sigma']: + # get person box + box = obj['bbox'] + size = max(box[2], box[3]) + sigma = size / self.base_size * self.base_sigma + if self.int_sigma: + sigma = int(np.ceil(sigma)) + assert sigma > 0, sigma + joints[i, :, 3] = sigma + + return joints + + def _get_mask(self, anno, idx): + """Get ignore masks to mask out losses.""" + coco = self.coco + img_info = coco.loadImgs(self.img_ids[idx])[0] + + m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32) + + for obj in anno: + if 'segmentation' in obj: + if obj['iscrowd']: + rle = xtcocotools.mask.frPyObjects(obj['segmentation'], + img_info['height'], + img_info['width']) + m += xtcocotools.mask.decode(rle) + elif obj['num_keypoints'] == 0: + rles = xtcocotools.mask.frPyObjects( + obj['segmentation'], img_info['height'], + img_info['width']) + for rle in rles: + m += xtcocotools.mask.decode(rle) + + return m < 0.5 + + def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): + """Evaluate coco keypoint results. The pose prediction results will be + saved in `${res_folder}/result_keypoints.json`. + + Note: + num_people: P + num_keypoints: K + + Args: + outputs (list(preds, scores, image_path, heatmap)): + + * preds (list[np.ndarray(P, K, 3+tag_num)]): + Pose predictions for all people in images. + * scores (list[P]): + * image_path (list[str]): For example, ['coco/images/ + val2017/000000397133.jpg'] + * heatmap (np.ndarray[N, K, H, W]): model outputs. + + res_folder (str): Path of directory to save the results. + metric (str | list[str]): Metric to be performed. Defaults: 'mAP'. + + Returns: + dict: Evaluation results for evaluation metric. + """ + metrics = metric if isinstance(metric, list) else [metric] + allowed_metrics = ['mAP'] + for metric in metrics: + if metric not in allowed_metrics: + raise KeyError(f'metric {metric} is not supported') + + res_file = os.path.join(res_folder, 'result_keypoints.json') + + preds = [] + scores = [] + image_paths = [] + + for output in outputs: + preds.append(output['preds']) + scores.append(output['scores']) + image_paths.append(output['image_paths'][0]) + + kpts = defaultdict(list) + # iterate over images + for idx, _preds in enumerate(preds): + str_image_path = image_paths[idx] + image_id = self.name2id[os.path.basename(str_image_path)] + # iterate over people + for idx_person, kpt in enumerate(_preds): + # use bbox area + area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * ( + np.max(kpt[:, 1]) - np.min(kpt[:, 1])) + + kpts[image_id].append({ + 'keypoints': kpt[:, 0:3], + 'score': scores[idx][idx_person], + 'tags': kpt[:, 3], + 'image_id': image_id, + 'area': area, + }) + + oks_nmsed_kpts = [] + for img in kpts.keys(): + img_kpts = kpts[img] + keep = [] + if len(keep) == 0: + oks_nmsed_kpts.append(img_kpts) + else: + oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) + + self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) + + info_str = self._do_python_keypoint_eval(res_file) + name_value = OrderedDict(info_str) + return name_value + + def _write_coco_keypoint_results(self, keypoints, res_file): + """Write results into a json file.""" + data_pack = [{ + 'cat_id': self._class_to_coco_ind[cls], + 'cls_ind': cls_ind, + 'cls': cls, + 'ann_type': 'keypoints', + 'keypoints': keypoints + } for cls_ind, cls in enumerate(self.classes) + if not cls == '__background__'] + + results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) + + with open(res_file, 'w') as f: + json.dump(results, f, sort_keys=True, indent=4) + + def _coco_keypoint_results_one_category_kernel(self, data_pack): + """Get coco keypoint results.""" + cat_id = data_pack['cat_id'] + keypoints = data_pack['keypoints'] + cat_results = [] + + for img_kpts in keypoints: + if len(img_kpts) == 0: + continue + + _key_points = np.array( + [img_kpt['keypoints'] for img_kpt in img_kpts]) + key_points = _key_points.reshape(-1, + self.ann_info['num_joints'] * 3) + + for img_kpt, key_point in zip(img_kpts, key_points): + kpt = key_point.reshape((self.ann_info['num_joints'], 3)) + left_top = np.amin(kpt, axis=0) + right_bottom = np.amax(kpt, axis=0) + + w = right_bottom[0] - left_top[0] + h = right_bottom[1] - left_top[1] + + cat_results.append({ + 'image_id': img_kpt['image_id'], + 'category_id': cat_id, + 'keypoints': key_point.tolist(), + 'score': img_kpt['score'], + 'bbox': [left_top[0], left_top[1], w, h] + }) + + return cat_results + + def _do_python_keypoint_eval(self, res_file): + """Keypoint evaluation using COCOAPI.""" + + stats_names = [ + 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', + 'AR .75', 'AR (M)', 'AR (L)' + ] + + with open(res_file, 'r') as file: + res_json = json.load(file) + if not res_json: + info_str = list(zip(stats_names, [ + 0, + ] * len(stats_names))) + return info_str + + coco_det = self.coco.loadRes(res_file) + coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas) + coco_eval.params.useSegm = None + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + info_str = list(zip(stats_names, coco_eval.stats)) + + return info_str diff --git a/mmpose/datasets/datasets/top_down/topdown_forklift_dataset.py b/mmpose/datasets/datasets/top_down/topdown_forklift_dataset.py new file mode 100644 index 0000000000..b8bba65f70 --- /dev/null +++ b/mmpose/datasets/datasets/top_down/topdown_forklift_dataset.py @@ -0,0 +1,464 @@ +import os +import warnings +from collections import OrderedDict, defaultdict + +import json_tricks as json +import numpy as np +from xtcocotools.coco import COCO +from xtcocotools.cocoeval import COCOeval + +from ....core.post_processing import oks_nms, soft_oks_nms +from ...registry import DATASETS +from .topdown_base_dataset import TopDownBaseDataset + + +@DATASETS.register_module() +class TopDownForkliftDataset(TopDownBaseDataset): + """CocoDataset dataset for top-down pose estimation. + + `Microsoft COCO: Common Objects in Context' ECCV'2014 + More details can be found in the `paper + `__ . + + The dataset loads raw features and apply specified transforms + to return a dict containing the image tensors and other information. + + COCO keypoint indexes:: + + 0: 'nose', + 1: 'left_eye', + 2: 'right_eye', + 3: 'left_ear', + 4: 'right_ear', + 5: 'left_shoulder', + 6: 'right_shoulder', + 7: 'left_elbow', + 8: 'right_elbow', + 9: 'left_wrist', + 10: 'right_wrist', + 11: 'left_hip', + 12: 'right_hip', + 13: 'left_knee', + 14: 'right_knee', + 15: 'left_ankle', + 16: 'right_ankle' + + Args: + ann_file (str): Path to the annotation file. + img_prefix (str): Path to a directory where images are held. + Default: None. + data_cfg (dict): config + pipeline (list[dict | callable]): A sequence of data transforms. + test_mode (bool): Store True when building test or + validation dataset. Default: False. + """ + + def __init__(self, + ann_file, + img_prefix, + data_cfg, + pipeline, + test_mode=False): + super().__init__( + ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) + + self.use_gt_bbox = data_cfg['use_gt_bbox'] + self.bbox_file = data_cfg['bbox_file'] + self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0) + if 'image_thr' in data_cfg: + warnings.warn( + 'image_thr is deprecated, ' + 'please use det_bbox_thr instead', DeprecationWarning) + self.det_bbox_thr = data_cfg['image_thr'] + self.use_nms = data_cfg.get('use_nms', True) + self.soft_nms = data_cfg['soft_nms'] + self.nms_thr = data_cfg['nms_thr'] + self.oks_thr = data_cfg['oks_thr'] + self.vis_thr = data_cfg['vis_thr'] + + self.ann_info['flip_pairs'] = [[0, 1], [2, 3], [4, 5]] + + self.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 5) + self.ann_info['lower_body_ids'] = () + + self.ann_info['use_different_joint_weights'] = False + self.ann_info['joint_weights'] = np.array( + [ + 1., 1., 1., 1., 1., 1., + ], + dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) + + # 'https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/' + # 'pycocotools/cocoeval.py#L523' + self.sigmas = np.array([ + .1, .1, .1, .1, .1, .1 + ]) / 1.0 + + self.coco = COCO(ann_file) + + cats = [ + cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) + ] + self.classes = ['__background__'] + cats + self.num_classes = len(self.classes) + self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) + self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) + self._coco_ind_to_class_ind = dict( + (self._class_to_coco_ind[cls], self._class_to_ind[cls]) + for cls in self.classes[1:]) + self.img_ids = self.coco.getImgIds() + self.num_images = len(self.img_ids) + self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) + self.dataset_name = 'coco' + + self.db = self._get_db() + + print(f'=> num_images: {self.num_images}') + print(f'=> load {len(self.db)} samples') + + @staticmethod + def _get_mapping_id_name(imgs): + """ + Args: + imgs (dict): dict of image info. + + Returns: + tuple: Image name & id mapping dicts. + + - id2name (dict): Mapping image id to name. + - name2id (dict): Mapping image name to id. + """ + id2name = {} + name2id = {} + for image_id, image in imgs.items(): + file_name = image['file_name'] + id2name[image_id] = file_name + name2id[file_name] = image_id + + return id2name, name2id + + def _get_db(self): + """Load dataset.""" + if (not self.test_mode) or self.use_gt_bbox: + # use ground truth bbox + gt_db = self._load_coco_keypoint_annotations() + else: + # use bbox from detection + gt_db = self._load_coco_person_detection_results() + return gt_db + + def _load_coco_keypoint_annotations(self): + """Ground truth bbox and keypoints.""" + gt_db = [] + for img_id in self.img_ids: + gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id)) + return gt_db + + def _load_coco_keypoint_annotation_kernel(self, img_id): + """load annotation from COCOAPI. + + Note: + bbox:[x1, y1, w, h] + Args: + img_id: coco image id + Returns: + dict: db entry + """ + img_ann = self.coco.loadImgs(img_id)[0] + width = img_ann['width'] + height = img_ann['height'] + num_joints = self.ann_info['num_joints'] + + ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False) + objs = self.coco.loadAnns(ann_ids) + + # sanitize bboxes + valid_objs = [] + for obj in objs: + if 'bbox' not in obj: + continue + x, y, w, h = obj['bbox'] + x1 = max(0, x) + y1 = max(0, y) + x2 = min(width - 1, x1 + max(0, w - 1)) + y2 = min(height - 1, y1 + max(0, h - 1)) + if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1: + obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] + valid_objs.append(obj) + objs = valid_objs + + bbox_id = 0 + rec = [] + for obj in objs: + if 'keypoints' not in obj: + continue + if max(obj['keypoints']) == 0: + continue + if 'num_keypoints' in obj and obj['num_keypoints'] == 0: + continue + joints_3d = np.zeros((num_joints, 3), dtype=np.float32) + joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) + + keypoints = np.array(obj['keypoints']).reshape(-1, 3) + joints_3d[:, :2] = keypoints[:, :2] + joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3]) + + center, scale = self._xywh2cs(*obj['clean_bbox'][:4]) + + image_file = os.path.join(self.img_prefix, self.id2name[img_id]) + rec.append({ + 'image_file': image_file, + 'center': center, + 'scale': scale, + 'bbox': obj['clean_bbox'][:4], + 'rotation': 0, + 'joints_3d': joints_3d, + 'joints_3d_visible': joints_3d_visible, + 'dataset': self.dataset_name, + 'bbox_score': 1, + 'bbox_id': bbox_id + }) + bbox_id = bbox_id + 1 + + return rec + + def _xywh2cs(self, x, y, w, h): + """This encodes bbox(x,y,w,w) into (center, scale) + + Args: + x, y, w, h + + Returns: + tuple: A tuple containing center and scale. + + - center (np.ndarray[float32](2,)): center of the bbox (x, y). + - scale (np.ndarray[float32](2,)): scale of the bbox w & h. + """ + aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[ + 'image_size'][1] + center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32) + + if (not self.test_mode) and np.random.rand() < 0.3: + center += 0.4 * (np.random.rand(2) - 0.5) * [w, h] + + if w > aspect_ratio * h: + h = w * 1.0 / aspect_ratio + elif w < aspect_ratio * h: + w = h * aspect_ratio + + # pixel std is 200.0 + scale = np.array([w / 200.0, h / 200.0], dtype=np.float32) + # padding to include proper amount of context + scale = scale * 1.25 + + return center, scale + + def _load_coco_person_detection_results(self): + """Load coco person detection results.""" + num_joints = self.ann_info['num_joints'] + all_boxes = None + with open(self.bbox_file, 'r') as f: + all_boxes = json.load(f) + + if not all_boxes: + raise ValueError('=> Load %s fail!' % self.bbox_file) + + print(f'=> Total boxes: {len(all_boxes)}') + + kpt_db = [] + bbox_id = 0 + for det_res in all_boxes: + if det_res['category_id'] != 1: + continue + + image_file = os.path.join(self.img_prefix, + self.id2name[det_res['image_id']]) + box = det_res['bbox'] + score = det_res['score'] + + if score < self.det_bbox_thr: + continue + + center, scale = self._xywh2cs(*box[:4]) + joints_3d = np.zeros((num_joints, 3), dtype=np.float32) + joints_3d_visible = np.ones((num_joints, 3), dtype=np.float32) + kpt_db.append({ + 'image_file': image_file, + 'center': center, + 'scale': scale, + 'rotation': 0, + 'bbox': box[:4], + 'bbox_score': score, + 'dataset': self.dataset_name, + 'joints_3d': joints_3d, + 'joints_3d_visible': joints_3d_visible, + 'bbox_id': bbox_id + }) + bbox_id = bbox_id + 1 + print(f'=> Total boxes after filter ' + f'low score@{self.det_bbox_thr}: {bbox_id}') + return kpt_db + + def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): + """Evaluate coco keypoint results. The pose prediction results will be + saved in `${res_folder}/result_keypoints.json`. + + Note: + batch_size: N + num_keypoints: K + heatmap height: H + heatmap width: W + + Args: + outputs (list(dict)) + :preds (np.ndarray[N,K,3]): The first two dimensions are + coordinates, score is the third dimension of the array. + :boxes (np.ndarray[N,6]): [center[0], center[1], scale[0] + , scale[1],area, score] + :image_paths (list[str]): For example, ['data/coco/val2017 + /000000393226.jpg'] + :heatmap (np.ndarray[N, K, H, W]): model output heatmap + :bbox_id (list(int)). + res_folder (str): Path of directory to save the results. + metric (str | list[str]): Metric to be performed. Defaults: 'mAP'. + + Returns: + dict: Evaluation results for evaluation metric. + """ + metrics = metric if isinstance(metric, list) else [metric] + allowed_metrics = ['mAP'] + for metric in metrics: + if metric not in allowed_metrics: + raise KeyError(f'metric {metric} is not supported') + + res_file = os.path.join(res_folder, 'result_keypoints.json') + + kpts = defaultdict(list) + + for output in outputs: + preds = output['preds'] + boxes = output['boxes'] + image_paths = output['image_paths'] + bbox_ids = output['bbox_ids'] + + batch_size = len(image_paths) + for i in range(batch_size): + image_id = self.name2id[image_paths[i][len(self.img_prefix):]] + kpts[image_id].append({ + 'keypoints': preds[i], + 'center': boxes[i][0:2], + 'scale': boxes[i][2:4], + 'area': boxes[i][4], + 'score': boxes[i][5], + 'image_id': image_id, + 'bbox_id': bbox_ids[i] + }) + kpts = self._sort_and_unique_bboxes(kpts) + + # rescoring and oks nms + num_joints = self.ann_info['num_joints'] + vis_thr = self.vis_thr + oks_thr = self.oks_thr + valid_kpts = [] + for image_id in kpts.keys(): + img_kpts = kpts[image_id] + for n_p in img_kpts: + box_score = n_p['score'] + kpt_score = 0 + valid_num = 0 + for n_jt in range(0, num_joints): + t_s = n_p['keypoints'][n_jt][2] + if t_s > vis_thr: + kpt_score = kpt_score + t_s + valid_num = valid_num + 1 + if valid_num != 0: + kpt_score = kpt_score / valid_num + # rescoring + n_p['score'] = kpt_score * box_score + + if self.use_nms: + nms = soft_oks_nms if self.soft_nms else oks_nms + keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas) + valid_kpts.append([img_kpts[_keep] for _keep in keep]) + else: + valid_kpts.append(img_kpts) + + self._write_coco_keypoint_results(valid_kpts, res_file) + + info_str = self._do_python_keypoint_eval(res_file) + name_value = OrderedDict(info_str) + + return name_value + + def _write_coco_keypoint_results(self, keypoints, res_file): + """Write results into a json file.""" + data_pack = [{ + 'cat_id': self._class_to_coco_ind[cls], + 'cls_ind': cls_ind, + 'cls': cls, + 'ann_type': 'keypoints', + 'keypoints': keypoints + } for cls_ind, cls in enumerate(self.classes) + if not cls == '__background__'] + + results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) + + with open(res_file, 'w') as f: + json.dump(results, f, sort_keys=True, indent=4) + + def _coco_keypoint_results_one_category_kernel(self, data_pack): + """Get coco keypoint results.""" + cat_id = data_pack['cat_id'] + keypoints = data_pack['keypoints'] + cat_results = [] + + for img_kpts in keypoints: + if len(img_kpts) == 0: + continue + + _key_points = np.array( + [img_kpt['keypoints'] for img_kpt in img_kpts]) + key_points = _key_points.reshape(-1, + self.ann_info['num_joints'] * 3) + + result = [{ + 'image_id': img_kpt['image_id'], + 'category_id': cat_id, + 'keypoints': key_point.tolist(), + 'score': float(img_kpt['score']), + 'center': img_kpt['center'].tolist(), + 'scale': img_kpt['scale'].tolist() + } for img_kpt, key_point in zip(img_kpts, key_points)] + + cat_results.extend(result) + + return cat_results + + def _do_python_keypoint_eval(self, res_file): + """Keypoint evaluation using COCOAPI.""" + coco_det = self.coco.loadRes(res_file) + coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas) + coco_eval.params.useSegm = None + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + stats_names = [ + 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', + 'AR .75', 'AR (M)', 'AR (L)' + ] + + info_str = list(zip(stats_names, coco_eval.stats)) + + return info_str + + def _sort_and_unique_bboxes(self, kpts, key='bbox_id'): + """sort kpts and remove the repeated ones.""" + for img_id, persons in kpts.items(): + num = len(persons) + kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key]) + for i in range(num - 1, 0, -1): + if kpts[img_id][i][key] == kpts[img_id][i - 1][key]: + del kpts[img_id][i] + + return kpts diff --git a/mmpose/datasets/datasets/top_down/topdown_forklift_dataset4kp.py b/mmpose/datasets/datasets/top_down/topdown_forklift_dataset4kp.py new file mode 100644 index 0000000000..7e7715f195 --- /dev/null +++ b/mmpose/datasets/datasets/top_down/topdown_forklift_dataset4kp.py @@ -0,0 +1,464 @@ +import os +import warnings +from collections import OrderedDict, defaultdict + +import json_tricks as json +import numpy as np +from xtcocotools.coco import COCO +from xtcocotools.cocoeval import COCOeval + +from ....core.post_processing import oks_nms, soft_oks_nms +from ...registry import DATASETS +from .topdown_base_dataset import TopDownBaseDataset + + +@DATASETS.register_module() +class TopDownForkliftDataset4KP(TopDownBaseDataset): + """CocoDataset dataset for top-down pose estimation. + + `Microsoft COCO: Common Objects in Context' ECCV'2014 + More details can be found in the `paper + `__ . + + The dataset loads raw features and apply specified transforms + to return a dict containing the image tensors and other information. + + COCO keypoint indexes:: + + 0: 'nose', + 1: 'left_eye', + 2: 'right_eye', + 3: 'left_ear', + 4: 'right_ear', + 5: 'left_shoulder', + 6: 'right_shoulder', + 7: 'left_elbow', + 8: 'right_elbow', + 9: 'left_wrist', + 10: 'right_wrist', + 11: 'left_hip', + 12: 'right_hip', + 13: 'left_knee', + 14: 'right_knee', + 15: 'left_ankle', + 16: 'right_ankle' + + Args: + ann_file (str): Path to the annotation file. + img_prefix (str): Path to a directory where images are held. + Default: None. + data_cfg (dict): config + pipeline (list[dict | callable]): A sequence of data transforms. + test_mode (bool): Store True when building test or + validation dataset. Default: False. + """ + + def __init__(self, + ann_file, + img_prefix, + data_cfg, + pipeline, + test_mode=False): + super().__init__( + ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) + + self.use_gt_bbox = data_cfg['use_gt_bbox'] + self.bbox_file = data_cfg['bbox_file'] + self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0) + if 'image_thr' in data_cfg: + warnings.warn( + 'image_thr is deprecated, ' + 'please use det_bbox_thr instead', DeprecationWarning) + self.det_bbox_thr = data_cfg['image_thr'] + self.use_nms = data_cfg.get('use_nms', True) + self.soft_nms = data_cfg['soft_nms'] + self.nms_thr = data_cfg['nms_thr'] + self.oks_thr = data_cfg['oks_thr'] + self.vis_thr = data_cfg['vis_thr'] + + self.ann_info['flip_pairs'] = [[0, 1], [2, 3]] + + self.ann_info['upper_body_ids'] = (0, 1, 2, 3) + self.ann_info['lower_body_ids'] = () + + self.ann_info['use_different_joint_weights'] = False + self.ann_info['joint_weights'] = np.array( + [ + 1., 1., 1., 1., + ], + dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) + + # 'https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/' + # 'pycocotools/cocoeval.py#L523' + self.sigmas = np.array([ + .1, .1, .1, .1, + ]) / 1.0 + + self.coco = COCO(ann_file) + + cats = [ + cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) + ] + self.classes = ['__background__'] + cats + self.num_classes = len(self.classes) + self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) + self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) + self._coco_ind_to_class_ind = dict( + (self._class_to_coco_ind[cls], self._class_to_ind[cls]) + for cls in self.classes[1:]) + self.img_ids = self.coco.getImgIds() + self.num_images = len(self.img_ids) + self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) + self.dataset_name = 'coco' + + self.db = self._get_db() + + print(f'=> num_images: {self.num_images}') + print(f'=> load {len(self.db)} samples') + + @staticmethod + def _get_mapping_id_name(imgs): + """ + Args: + imgs (dict): dict of image info. + + Returns: + tuple: Image name & id mapping dicts. + + - id2name (dict): Mapping image id to name. + - name2id (dict): Mapping image name to id. + """ + id2name = {} + name2id = {} + for image_id, image in imgs.items(): + file_name = image['file_name'] + id2name[image_id] = file_name + name2id[file_name] = image_id + + return id2name, name2id + + def _get_db(self): + """Load dataset.""" + if (not self.test_mode) or self.use_gt_bbox: + # use ground truth bbox + gt_db = self._load_coco_keypoint_annotations() + else: + # use bbox from detection + gt_db = self._load_coco_person_detection_results() + return gt_db + + def _load_coco_keypoint_annotations(self): + """Ground truth bbox and keypoints.""" + gt_db = [] + for img_id in self.img_ids: + gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id)) + return gt_db + + def _load_coco_keypoint_annotation_kernel(self, img_id): + """load annotation from COCOAPI. + + Note: + bbox:[x1, y1, w, h] + Args: + img_id: coco image id + Returns: + dict: db entry + """ + img_ann = self.coco.loadImgs(img_id)[0] + width = img_ann['width'] + height = img_ann['height'] + num_joints = self.ann_info['num_joints'] + + ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False) + objs = self.coco.loadAnns(ann_ids) + + # sanitize bboxes + valid_objs = [] + for obj in objs: + if 'bbox' not in obj: + continue + x, y, w, h = obj['bbox'] + x1 = max(0, x) + y1 = max(0, y) + x2 = min(width - 1, x1 + max(0, w - 1)) + y2 = min(height - 1, y1 + max(0, h - 1)) + if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1: + obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] + valid_objs.append(obj) + objs = valid_objs + + bbox_id = 0 + rec = [] + for obj in objs: + if 'keypoints' not in obj: + continue + if max(obj['keypoints']) == 0: + continue + if 'num_keypoints' in obj and obj['num_keypoints'] == 0: + continue + joints_3d = np.zeros((num_joints, 3), dtype=np.float32) + joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) + + keypoints = np.array(obj['keypoints']).reshape(-1, 3) + joints_3d[:, :2] = keypoints[:, :2] + joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3]) + + center, scale = self._xywh2cs(*obj['clean_bbox'][:4]) + + image_file = os.path.join(self.img_prefix, self.id2name[img_id]) + rec.append({ + 'image_file': image_file, + 'center': center, + 'scale': scale, + 'bbox': obj['clean_bbox'][:4], + 'rotation': 0, + 'joints_3d': joints_3d, + 'joints_3d_visible': joints_3d_visible, + 'dataset': self.dataset_name, + 'bbox_score': 1, + 'bbox_id': bbox_id + }) + bbox_id = bbox_id + 1 + + return rec + + def _xywh2cs(self, x, y, w, h): + """This encodes bbox(x,y,w,w) into (center, scale) + + Args: + x, y, w, h + + Returns: + tuple: A tuple containing center and scale. + + - center (np.ndarray[float32](2,)): center of the bbox (x, y). + - scale (np.ndarray[float32](2,)): scale of the bbox w & h. + """ + aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[ + 'image_size'][1] + center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32) + + if (not self.test_mode) and np.random.rand() < 0.3: + center += 0.4 * (np.random.rand(2) - 0.5) * [w, h] + + if w > aspect_ratio * h: + h = w * 1.0 / aspect_ratio + elif w < aspect_ratio * h: + w = h * aspect_ratio + + # pixel std is 200.0 + scale = np.array([w / 200.0, h / 200.0], dtype=np.float32) + # padding to include proper amount of context + scale = scale * 1.25 + + return center, scale + + def _load_coco_person_detection_results(self): + """Load coco person detection results.""" + num_joints = self.ann_info['num_joints'] + all_boxes = None + with open(self.bbox_file, 'r') as f: + all_boxes = json.load(f) + + if not all_boxes: + raise ValueError('=> Load %s fail!' % self.bbox_file) + + print(f'=> Total boxes: {len(all_boxes)}') + + kpt_db = [] + bbox_id = 0 + for det_res in all_boxes: + if det_res['category_id'] != 1: + continue + + image_file = os.path.join(self.img_prefix, + self.id2name[det_res['image_id']]) + box = det_res['bbox'] + score = det_res['score'] + + if score < self.det_bbox_thr: + continue + + center, scale = self._xywh2cs(*box[:4]) + joints_3d = np.zeros((num_joints, 3), dtype=np.float32) + joints_3d_visible = np.ones((num_joints, 3), dtype=np.float32) + kpt_db.append({ + 'image_file': image_file, + 'center': center, + 'scale': scale, + 'rotation': 0, + 'bbox': box[:4], + 'bbox_score': score, + 'dataset': self.dataset_name, + 'joints_3d': joints_3d, + 'joints_3d_visible': joints_3d_visible, + 'bbox_id': bbox_id + }) + bbox_id = bbox_id + 1 + print(f'=> Total boxes after filter ' + f'low score@{self.det_bbox_thr}: {bbox_id}') + return kpt_db + + def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): + """Evaluate coco keypoint results. The pose prediction results will be + saved in `${res_folder}/result_keypoints.json`. + + Note: + batch_size: N + num_keypoints: K + heatmap height: H + heatmap width: W + + Args: + outputs (list(dict)) + :preds (np.ndarray[N,K,3]): The first two dimensions are + coordinates, score is the third dimension of the array. + :boxes (np.ndarray[N,6]): [center[0], center[1], scale[0] + , scale[1],area, score] + :image_paths (list[str]): For example, ['data/coco/val2017 + /000000393226.jpg'] + :heatmap (np.ndarray[N, K, H, W]): model output heatmap + :bbox_id (list(int)). + res_folder (str): Path of directory to save the results. + metric (str | list[str]): Metric to be performed. Defaults: 'mAP'. + + Returns: + dict: Evaluation results for evaluation metric. + """ + metrics = metric if isinstance(metric, list) else [metric] + allowed_metrics = ['mAP'] + for metric in metrics: + if metric not in allowed_metrics: + raise KeyError(f'metric {metric} is not supported') + + res_file = os.path.join(res_folder, 'result_keypoints.json') + + kpts = defaultdict(list) + + for output in outputs: + preds = output['preds'] + boxes = output['boxes'] + image_paths = output['image_paths'] + bbox_ids = output['bbox_ids'] + + batch_size = len(image_paths) + for i in range(batch_size): + image_id = self.name2id[image_paths[i][len(self.img_prefix):]] + kpts[image_id].append({ + 'keypoints': preds[i], + 'center': boxes[i][0:2], + 'scale': boxes[i][2:4], + 'area': boxes[i][4], + 'score': boxes[i][5], + 'image_id': image_id, + 'bbox_id': bbox_ids[i] + }) + kpts = self._sort_and_unique_bboxes(kpts) + + # rescoring and oks nms + num_joints = self.ann_info['num_joints'] + vis_thr = self.vis_thr + oks_thr = self.oks_thr + valid_kpts = [] + for image_id in kpts.keys(): + img_kpts = kpts[image_id] + for n_p in img_kpts: + box_score = n_p['score'] + kpt_score = 0 + valid_num = 0 + for n_jt in range(0, num_joints): + t_s = n_p['keypoints'][n_jt][2] + if t_s > vis_thr: + kpt_score = kpt_score + t_s + valid_num = valid_num + 1 + if valid_num != 0: + kpt_score = kpt_score / valid_num + # rescoring + n_p['score'] = kpt_score * box_score + + if self.use_nms: + nms = soft_oks_nms if self.soft_nms else oks_nms + keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas) + valid_kpts.append([img_kpts[_keep] for _keep in keep]) + else: + valid_kpts.append(img_kpts) + + self._write_coco_keypoint_results(valid_kpts, res_file) + + info_str = self._do_python_keypoint_eval(res_file) + name_value = OrderedDict(info_str) + + return name_value + + def _write_coco_keypoint_results(self, keypoints, res_file): + """Write results into a json file.""" + data_pack = [{ + 'cat_id': self._class_to_coco_ind[cls], + 'cls_ind': cls_ind, + 'cls': cls, + 'ann_type': 'keypoints', + 'keypoints': keypoints + } for cls_ind, cls in enumerate(self.classes) + if not cls == '__background__'] + + results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) + + with open(res_file, 'w') as f: + json.dump(results, f, sort_keys=True, indent=4) + + def _coco_keypoint_results_one_category_kernel(self, data_pack): + """Get coco keypoint results.""" + cat_id = data_pack['cat_id'] + keypoints = data_pack['keypoints'] + cat_results = [] + + for img_kpts in keypoints: + if len(img_kpts) == 0: + continue + + _key_points = np.array( + [img_kpt['keypoints'] for img_kpt in img_kpts]) + key_points = _key_points.reshape(-1, + self.ann_info['num_joints'] * 3) + + result = [{ + 'image_id': img_kpt['image_id'], + 'category_id': cat_id, + 'keypoints': key_point.tolist(), + 'score': float(img_kpt['score']), + 'center': img_kpt['center'].tolist(), + 'scale': img_kpt['scale'].tolist() + } for img_kpt, key_point in zip(img_kpts, key_points)] + + cat_results.extend(result) + + return cat_results + + def _do_python_keypoint_eval(self, res_file): + """Keypoint evaluation using COCOAPI.""" + coco_det = self.coco.loadRes(res_file) + coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas) + coco_eval.params.useSegm = None + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + stats_names = [ + 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', + 'AR .75', 'AR (M)', 'AR (L)' + ] + + info_str = list(zip(stats_names, coco_eval.stats)) + + return info_str + + def _sort_and_unique_bboxes(self, kpts, key='bbox_id'): + """sort kpts and remove the repeated ones.""" + for img_id, persons in kpts.items(): + num = len(persons) + kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key]) + for i in range(num - 1, 0, -1): + if kpts[img_id][i][key] == kpts[img_id][i - 1][key]: + del kpts[img_id][i] + + return kpts diff --git a/mmpose/datasets/datasets/top_down/topdown_lifted_fork_dataset_3kp.py b/mmpose/datasets/datasets/top_down/topdown_lifted_fork_dataset_3kp.py new file mode 100644 index 0000000000..4bb60ebc57 --- /dev/null +++ b/mmpose/datasets/datasets/top_down/topdown_lifted_fork_dataset_3kp.py @@ -0,0 +1,465 @@ +import os +import pdb +import warnings +from collections import OrderedDict, defaultdict + +import json_tricks as json +import numpy as np +from xtcocotools.coco import COCO +from xtcocotools.cocoeval import COCOeval + +from ....core.post_processing import oks_nms, soft_oks_nms +from ...registry import DATASETS +from .topdown_base_dataset import TopDownBaseDataset + + +@DATASETS.register_module() +class LiftedForkDataset3KP(TopDownBaseDataset): + """CocoDataset dataset for top-down pose estimation. + + `Microsoft COCO: Common Objects in Context' ECCV'2014 + More details can be found in the `paper + `__ . + + The dataset loads raw features and apply specified transforms + to return a dict containing the image tensors and other information. + + COCO keypoint indexes:: + + 0: 'nose', + 1: 'left_eye', + 2: 'right_eye', + 3: 'left_ear', + 4: 'right_ear', + 5: 'left_shoulder', + 6: 'right_shoulder', + 7: 'left_elbow', + 8: 'right_elbow', + 9: 'left_wrist', + 10: 'right_wrist', + 11: 'left_hip', + 12: 'right_hip', + 13: 'left_knee', + 14: 'right_knee', + 15: 'left_ankle', + 16: 'right_ankle' + + Args: + ann_file (str): Path to the annotation file. + img_prefix (str): Path to a directory where images are held. + Default: None. + data_cfg (dict): config + pipeline (list[dict | callable]): A sequence of data transforms. + test_mode (bool): Store True when building test or + validation dataset. Default: False. + """ + + def __init__(self, + ann_file, + img_prefix, + data_cfg, + pipeline, + test_mode=False): + super().__init__( + ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) + + self.use_gt_bbox = data_cfg['use_gt_bbox'] + self.bbox_file = data_cfg['bbox_file'] + self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0) + if 'image_thr' in data_cfg: + warnings.warn( + 'image_thr is deprecated, ' + 'please use det_bbox_thr instead', DeprecationWarning) + self.det_bbox_thr = data_cfg['image_thr'] + self.use_nms = data_cfg.get('use_nms', True) + self.soft_nms = data_cfg['soft_nms'] + self.nms_thr = data_cfg['nms_thr'] + self.oks_thr = data_cfg['oks_thr'] + self.vis_thr = data_cfg['vis_thr'] + + self.ann_info['flip_pairs'] = [[0, 1]] + + self.ann_info['upper_body_ids'] = (0, 1, 2) + self.ann_info['lower_body_ids'] = () + + self.ann_info['use_different_joint_weights'] = False + self.ann_info['joint_weights'] = np.array( + [ + 1., 1., 1., + ], + dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) + + # 'https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/' + # 'pycocotools/cocoeval.py#L523' + self.sigmas = np.array([ + .1, .1, .1, + ]) / 1.0 + + self.coco = COCO(ann_file) + + cats = [ + cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) + ] + self.classes = ['__background__'] + cats + self.num_classes = len(self.classes) + self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) + self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) + self._coco_ind_to_class_ind = dict( + (self._class_to_coco_ind[cls], self._class_to_ind[cls]) + for cls in self.classes[1:]) + self.img_ids = self.coco.getImgIds() + self.num_images = len(self.img_ids) + self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) + self.dataset_name = 'coco' + + self.db = self._get_db() + + print(f'=> num_images: {self.num_images}') + print(f'=> load {len(self.db)} samples') + + @staticmethod + def _get_mapping_id_name(imgs): + """ + Args: + imgs (dict): dict of image info. + + Returns: + tuple: Image name & id mapping dicts. + + - id2name (dict): Mapping image id to name. + - name2id (dict): Mapping image name to id. + """ + id2name = {} + name2id = {} + for image_id, image in imgs.items(): + file_name = image['file_name'] + id2name[image_id] = file_name + name2id[file_name] = image_id + + return id2name, name2id + + def _get_db(self): + """Load dataset.""" + if (not self.test_mode) or self.use_gt_bbox: + # use ground truth bbox + gt_db = self._load_coco_keypoint_annotations() + else: + # use bbox from detection + gt_db = self._load_coco_person_detection_results() + return gt_db + + def _load_coco_keypoint_annotations(self): + """Ground truth bbox and keypoints.""" + gt_db = [] + for img_id in self.img_ids: + gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id)) + return gt_db + + def _load_coco_keypoint_annotation_kernel(self, img_id): + """load annotation from COCOAPI. + + Note: + bbox:[x1, y1, w, h] + Args: + img_id: coco image id + Returns: + dict: db entry + """ + img_ann = self.coco.loadImgs(img_id)[0] + width = img_ann['width'] + height = img_ann['height'] + num_joints = self.ann_info['num_joints'] + + ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False) + objs = self.coco.loadAnns(ann_ids) + + # sanitize bboxes + valid_objs = [] + for obj in objs: + if 'bbox' not in obj: + continue + x, y, w, h = obj['bbox'] + x1 = max(0, x) + y1 = max(0, y) + x2 = min(width - 1, x1 + max(0, w - 1)) + y2 = min(height - 1, y1 + max(0, h - 1)) + if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1: + obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] + valid_objs.append(obj) + objs = valid_objs + + bbox_id = 0 + rec = [] + for obj in objs: + if 'keypoints' not in obj: + continue + if max(obj['keypoints']) == 0: + continue + if 'num_keypoints' in obj and obj['num_keypoints'] == 0: + continue + joints_3d = np.zeros((num_joints, 3), dtype=np.float32) + joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) + + keypoints = np.array(obj['keypoints']).reshape(-1, 3) + joints_3d[:, :2] = keypoints[:, :2] + joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3]) + + center, scale = self._xywh2cs(*obj['clean_bbox'][:4]) + + image_file = os.path.join(self.img_prefix, self.id2name[img_id]) + rec.append({ + 'image_file': image_file, + 'center': center, + 'scale': scale, + 'bbox': obj['clean_bbox'][:4], + 'rotation': 0, + 'joints_3d': joints_3d, + 'joints_3d_visible': joints_3d_visible, + 'dataset': self.dataset_name, + 'bbox_score': 1, + 'bbox_id': bbox_id + }) + bbox_id = bbox_id + 1 + + return rec + + def _xywh2cs(self, x, y, w, h): + """This encodes bbox(x,y,w,w) into (center, scale) + + Args: + x, y, w, h + + Returns: + tuple: A tuple containing center and scale. + + - center (np.ndarray[float32](2,)): center of the bbox (x, y). + - scale (np.ndarray[float32](2,)): scale of the bbox w & h. + """ + aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[ + 'image_size'][1] + center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32) + + if (not self.test_mode) and np.random.rand() < 0.3: + center += 0.4 * (np.random.rand(2) - 0.5) * [w, h] + + if w > aspect_ratio * h: + h = w * 1.0 / aspect_ratio + elif w < aspect_ratio * h: + w = h * aspect_ratio + + # pixel std is 200.0 + scale = np.array([w / 200.0, h / 200.0], dtype=np.float32) + # padding to include proper amount of context + scale = scale * 1.25 + + return center, scale + + def _load_coco_person_detection_results(self): + """Load coco person detection results.""" + num_joints = self.ann_info['num_joints'] + all_boxes = None + with open(self.bbox_file, 'r') as f: + all_boxes = json.load(f) + + if not all_boxes: + raise ValueError('=> Load %s fail!' % self.bbox_file) + + print(f'=> Total boxes: {len(all_boxes)}') + + kpt_db = [] + bbox_id = 0 + for det_res in all_boxes: + if det_res['category_id'] != 1: + continue + + image_file = os.path.join(self.img_prefix, + self.id2name[det_res['image_id']]) + box = det_res['bbox'] + score = det_res['score'] + + if score < self.det_bbox_thr: + continue + + center, scale = self._xywh2cs(*box[:4]) + joints_3d = np.zeros((num_joints, 3), dtype=np.float32) + joints_3d_visible = np.ones((num_joints, 3), dtype=np.float32) + kpt_db.append({ + 'image_file': image_file, + 'center': center, + 'scale': scale, + 'rotation': 0, + 'bbox': box[:4], + 'bbox_score': score, + 'dataset': self.dataset_name, + 'joints_3d': joints_3d, + 'joints_3d_visible': joints_3d_visible, + 'bbox_id': bbox_id + }) + bbox_id = bbox_id + 1 + print(f'=> Total boxes after filter ' + f'low score@{self.det_bbox_thr}: {bbox_id}') + return kpt_db + + def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): + """Evaluate coco keypoint results. The pose prediction results will be + saved in `${res_folder}/result_keypoints.json`. + + Note: + batch_size: N + num_keypoints: K + heatmap height: H + heatmap width: W + + Args: + outputs (list(dict)) + :preds (np.ndarray[N,K,3]): The first two dimensions are + coordinates, score is the third dimension of the array. + :boxes (np.ndarray[N,6]): [center[0], center[1], scale[0] + , scale[1],area, score] + :image_paths (list[str]): For example, ['data/coco/val2017 + /000000393226.jpg'] + :heatmap (np.ndarray[N, K, H, W]): model output heatmap + :bbox_id (list(int)). + res_folder (str): Path of directory to save the results. + metric (str | list[str]): Metric to be performed. Defaults: 'mAP'. + + Returns: + dict: Evaluation results for evaluation metric. + """ + metrics = metric if isinstance(metric, list) else [metric] + allowed_metrics = ['mAP'] + for metric in metrics: + if metric not in allowed_metrics: + raise KeyError(f'metric {metric} is not supported') + + res_file = os.path.join(res_folder, 'result_keypoints.json') + + kpts = defaultdict(list) + + for output in outputs: + preds = output['preds'] + boxes = output['boxes'] + image_paths = output['image_paths'] + bbox_ids = output['bbox_ids'] + + batch_size = len(image_paths) + for i in range(batch_size): + image_id = self.name2id[image_paths[i][len(self.img_prefix):]] + kpts[image_id].append({ + 'keypoints': preds[i], + 'center': boxes[i][0:2], + 'scale': boxes[i][2:4], + 'area': boxes[i][4], + 'score': boxes[i][5], + 'image_id': image_id, + 'bbox_id': bbox_ids[i] + }) + kpts = self._sort_and_unique_bboxes(kpts) + + # rescoring and oks nms + num_joints = self.ann_info['num_joints'] + vis_thr = self.vis_thr + oks_thr = self.oks_thr + valid_kpts = [] + for image_id in kpts.keys(): + img_kpts = kpts[image_id] + for n_p in img_kpts: + box_score = n_p['score'] + kpt_score = 0 + valid_num = 0 + for n_jt in range(0, num_joints): + t_s = n_p['keypoints'][n_jt][2] + if t_s > vis_thr: + kpt_score = kpt_score + t_s + valid_num = valid_num + 1 + if valid_num != 0: + kpt_score = kpt_score / valid_num + # rescoring + n_p['score'] = kpt_score * box_score + + if self.use_nms: + nms = soft_oks_nms if self.soft_nms else oks_nms + keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas) + valid_kpts.append([img_kpts[_keep] for _keep in keep]) + else: + valid_kpts.append(img_kpts) + + self._write_coco_keypoint_results(valid_kpts, res_file) + + info_str = self._do_python_keypoint_eval(res_file) + name_value = OrderedDict(info_str) + + return name_value + + def _write_coco_keypoint_results(self, keypoints, res_file): + """Write results into a json file.""" + data_pack = [{ + 'cat_id': self._class_to_coco_ind[cls], + 'cls_ind': cls_ind, + 'cls': cls, + 'ann_type': 'keypoints', + 'keypoints': keypoints + } for cls_ind, cls in enumerate(self.classes) + if not cls == '__background__'] + + results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) + + with open(res_file, 'w') as f: + json.dump(results, f, sort_keys=True, indent=4) + + def _coco_keypoint_results_one_category_kernel(self, data_pack): + """Get coco keypoint results.""" + cat_id = data_pack['cat_id'] + keypoints = data_pack['keypoints'] + cat_results = [] + + for img_kpts in keypoints: + if len(img_kpts) == 0: + continue + + _key_points = np.array( + [img_kpt['keypoints'] for img_kpt in img_kpts]) + key_points = _key_points.reshape(-1, + self.ann_info['num_joints'] * 3) + + result = [{ + 'image_id': img_kpt['image_id'], + 'category_id': cat_id, + 'keypoints': key_point.tolist(), + 'score': float(img_kpt['score']), + 'center': img_kpt['center'].tolist(), + 'scale': img_kpt['scale'].tolist() + } for img_kpt, key_point in zip(img_kpts, key_points)] + + cat_results.extend(result) + + return cat_results + + def _do_python_keypoint_eval(self, res_file): + """Keypoint evaluation using COCOAPI.""" + coco_det = self.coco.loadRes(res_file) + coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas) + coco_eval.params.useSegm = None + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + stats_names = [ + 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', + 'AR .75', 'AR (M)', 'AR (L)' + ] + + info_str = list(zip(stats_names, coco_eval.stats)) + + return info_str + + def _sort_and_unique_bboxes(self, kpts, key='bbox_id'): + """sort kpts and remove the repeated ones.""" + for img_id, persons in kpts.items(): + num = len(persons) + kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key]) + for i in range(num - 1, 0, -1): + if kpts[img_id][i][key] == kpts[img_id][i - 1][key]: + del kpts[img_id][i] + + return kpts diff --git a/mmpose/datasets/datasets/top_down/topdown_lifted_fork_dataset_7kp.py b/mmpose/datasets/datasets/top_down/topdown_lifted_fork_dataset_7kp.py new file mode 100644 index 0000000000..0ae5e59560 --- /dev/null +++ b/mmpose/datasets/datasets/top_down/topdown_lifted_fork_dataset_7kp.py @@ -0,0 +1,609 @@ +import os +import pdb +import warnings +from collections import OrderedDict, defaultdict + +import json_tricks as json +import numpy as np +from xtcocotools.coco import COCO +from xtcocotools.cocoeval import COCOeval + +from ....core.post_processing import oks_nms, soft_oks_nms +from ...registry import DATASETS +from .topdown_base_dataset import TopDownBaseDataset + +from mmpose.core.evaluation.top_down_eval import ( + keypoint_pck_accuracy, keypoint_auc, keypoint_epe, keypoint_nme +) + +n = 7 +@DATASETS.register_module() +class LiftedForkDataset7KP(TopDownBaseDataset): + """CocoDataset dataset for top-down pose estimation. + + `Microsoft COCO: Common Objects in Context' ECCV'2014 + More details can be found in the `paper + `__ . + + The dataset loads raw features and apply specified transforms + to return a dict containing the image tensors and other information. + + COCO keypoint indexes:: + + 0: 'nose', + 1: 'left_eye', + 2: 'right_eye', + 3: 'left_ear', + 4: 'right_ear', + 5: 'left_shoulder', + 6: 'right_shoulder', + 7: 'left_elbow', + 8: 'right_elbow', + 9: 'left_wrist', + 10: 'right_wrist', + 11: 'left_hip', + 12: 'right_hip', + 13: 'left_knee', + 14: 'right_knee', + 15: 'left_ankle', + 16: 'right_ankle' + + Args: + ann_file (str): Path to the annotation file. + img_prefix (str): Path to a directory where images are held. + Default: None. + data_cfg (dict): config + pipeline (list[dict | callable]): A sequence of data transforms. + test_mode (bool): Store True when building test or + validation dataset. Default: False. + """ + + def __init__(self, + ann_file, + img_prefix, + data_cfg, + pipeline, + test_mode=False): + super().__init__( + ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) + + self.use_gt_bbox = data_cfg['use_gt_bbox'] + self.bbox_file = data_cfg['bbox_file'] + self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0) + if 'image_thr' in data_cfg: + warnings.warn( + 'image_thr is deprecated, ' + 'please use det_bbox_thr instead', DeprecationWarning) + self.det_bbox_thr = data_cfg['image_thr'] + self.use_nms = data_cfg.get('use_nms', True) + self.soft_nms = data_cfg['soft_nms'] + self.nms_thr = data_cfg['nms_thr'] + self.oks_thr = data_cfg['oks_thr'] + self.vis_thr = data_cfg['vis_thr'] + + self.ann_info['flip_pairs'] = [[0, 1], [2, 3], [4, 5]] + + self.ann_info['upper_body_ids'] = tuple(range(n)) + self.ann_info['lower_body_ids'] = () + + self.ann_info['use_different_joint_weights'] = False + self.ann_info['joint_weights'] = np.array( + [1. for _ in range(n)], + dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) + + # 'https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/' + # 'pycocotools/cocoeval.py#L523' + self.sigmas = np.array([1. for _ in range(n)]) / 1.0 + + self.coco = COCO(ann_file) + + cats = [ + cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) + ] + self.classes = ['__background__'] + cats + self.num_classes = len(self.classes) + self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) + self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) + self._coco_ind_to_class_ind = dict( + (self._class_to_coco_ind[cls], self._class_to_ind[cls]) + for cls in self.classes[1:]) + self.img_ids = self.coco.getImgIds() + self.num_images = len(self.img_ids) + self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) + self.dataset_name = 'coco' + + self.db = self._get_db() + + print(f'=> num_images: {self.num_images}') + print(f'=> load {len(self.db)} samples') + + @staticmethod + def _get_mapping_id_name(imgs): + """ + Args: + imgs (dict): dict of image info. + + Returns: + tuple: Image name & id mapping dicts. + + - id2name (dict): Mapping image id to name. + - name2id (dict): Mapping image name to id. + """ + id2name = {} + name2id = {} + for image_id, image in imgs.items(): + file_name = image['file_name'] + id2name[image_id] = file_name + name2id[file_name] = image_id + + return id2name, name2id + + def _get_db(self): + """Load dataset.""" + if (not self.test_mode) or self.use_gt_bbox: + # use ground truth bbox + gt_db = self._load_coco_keypoint_annotations() + else: + # use bbox from detection + gt_db = self._load_coco_person_detection_results() + return gt_db + + def _load_coco_keypoint_annotations(self): + """Ground truth bbox and keypoints.""" + gt_db = [] + for img_id in self.img_ids: + gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id)) + return gt_db + + def _load_coco_keypoint_annotation_kernel(self, img_id): + """load annotation from COCOAPI. + + Note: + bbox:[x1, y1, w, h] + Args: + img_id: coco image id + Returns: + dict: db entry + """ + img_ann = self.coco.loadImgs(img_id)[0] + width = img_ann['width'] + height = img_ann['height'] + num_joints = self.ann_info['num_joints'] + + ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False) + objs = self.coco.loadAnns(ann_ids) + + # sanitize bboxes + valid_objs = [] + for obj in objs: + if 'bbox' not in obj: + continue + x, y, w, h = obj['bbox'] + x1 = max(0, x) + y1 = max(0, y) + x2 = min(width - 1, x1 + max(0, w - 1)) + y2 = min(height - 1, y1 + max(0, h - 1)) + if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1: + obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] + valid_objs.append(obj) + objs = valid_objs + + bbox_id = 0 + rec = [] + for obj in objs: + if 'keypoints' not in obj: + continue + if max(obj['keypoints']) == 0: + continue + if 'num_keypoints' in obj and obj['num_keypoints'] == 0: + continue + joints_3d = np.zeros((num_joints, 3), dtype=np.float32) + joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) + + keypoints = np.array(obj['keypoints']).reshape(-1, 3) + joints_3d[:, :2] = keypoints[:, :2] + joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3]) + + center, scale = self._xywh2cs(*obj['clean_bbox'][:4]) + + image_file = os.path.join(self.img_prefix, self.id2name[img_id]) + rec.append({ + 'image_file': image_file, + 'center': center, + 'scale': scale, + 'bbox': obj['clean_bbox'][:4], + 'rotation': 0, + 'joints_3d': joints_3d, + 'joints_3d_visible': joints_3d_visible, + 'dataset': self.dataset_name, + 'bbox_score': 1, + 'bbox_id': bbox_id + }) + bbox_id = bbox_id + 1 + + return rec + + def _xywh2cs(self, x, y, w, h): + """This encodes bbox(x,y,w,w) into (center, scale) + + Args: + x, y, w, h + + Returns: + tuple: A tuple containing center and scale. + + - center (np.ndarray[float32](2,)): center of the bbox (x, y). + - scale (np.ndarray[float32](2,)): scale of the bbox w & h. + """ + aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[ + 'image_size'][1] + center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32) + + if (not self.test_mode) and np.random.rand() < 0.3: + center += 0.4 * (np.random.rand(2) - 0.5) * [w, h] + + if w > aspect_ratio * h: + h = w * 1.0 / aspect_ratio + elif w < aspect_ratio * h: + w = h * aspect_ratio + + # pixel std is 200.0 + scale = np.array([w / 200.0, h / 200.0], dtype=np.float32) + # padding to include proper amount of context + scale = scale * 1.25 + + return center, scale + + def _load_coco_person_detection_results(self): + """Load coco person detection results.""" + num_joints = self.ann_info['num_joints'] + all_boxes = None + with open(self.bbox_file, 'r') as f: + all_boxes = json.load(f) + + if not all_boxes: + raise ValueError('=> Load %s fail!' % self.bbox_file) + + print(f'=> Total boxes: {len(all_boxes)}') + + kpt_db = [] + bbox_id = 0 + for det_res in all_boxes: + if det_res['category_id'] != 1: + continue + + image_file = os.path.join(self.img_prefix, + self.id2name[det_res['image_id']]) + box = det_res['bbox'] + score = det_res['score'] + + if score < self.det_bbox_thr: + continue + + center, scale = self._xywh2cs(*box[:4]) + joints_3d = np.zeros((num_joints, 3), dtype=np.float32) + joints_3d_visible = np.ones((num_joints, 3), dtype=np.float32) + kpt_db.append({ + 'image_file': image_file, + 'center': center, + 'scale': scale, + 'rotation': 0, + 'bbox': box[:4], + 'bbox_score': score, + 'dataset': self.dataset_name, + 'joints_3d': joints_3d, + 'joints_3d_visible': joints_3d_visible, + 'bbox_id': bbox_id + }) + bbox_id = bbox_id + 1 + print(f'=> Total boxes after filter ' + f'low score@{self.det_bbox_thr}: {bbox_id}') + return kpt_db + + def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): + """Evaluate coco keypoint results. The pose prediction results will be + saved in `${res_folder}/result_keypoints.json`. + + Note: + batch_size: N + num_keypoints: K + heatmap height: H + heatmap width: W + + Args: + outputs (list(dict)) + :preds (np.ndarray[N,K,3]): The first two dimensions are + coordinates, score is the third dimension of the array. + :boxes (np.ndarray[N,6]): [center[0], center[1], scale[0] + , scale[1],area, score] + :image_paths (list[str]): For example, ['data/coco/val2017 + /000000393226.jpg'] + :heatmap (np.ndarray[N, K, H, W]): model output heatmap + :bbox_id (list(int)). + res_folder (str): Path of directory to save the results. + metric (str | list[str]): Metric to be performed. Defaults: 'mAP'. + + Returns: + dict: Evaluation results for evaluation metric. + """ + metrics = metric if isinstance(metric, list) else [metric] + allowed_metrics = ['mAP', 'PCK', 'AUC', 'EPE', 'NME'] + for metric in metrics: + if metric not in allowed_metrics: + raise KeyError(f'metric {metric} is not supported') + + res_file = os.path.join(res_folder, 'result_keypoints.json') + + kpts = defaultdict(list) + kpts2 = [] + + for output in outputs: + preds = output['preds'] + boxes = output['boxes'] + image_paths = output['image_paths'] + bbox_ids = output['bbox_ids'] + + batch_size = len(image_paths) + for i in range(batch_size): + image_id = self.name2id[image_paths[i][len(self.img_prefix):]] + kpts[image_id].append({ + 'keypoints': preds[i], + 'center': boxes[i][0:2], + 'scale': boxes[i][2:4], + 'area': boxes[i][4], + 'score': boxes[i][5], + 'image_id': image_id, + 'bbox_id': bbox_ids[i] + }) + kpts2.append({ + 'keypoints': preds[i].tolist(), + 'center': boxes[i][0:2].tolist(), + 'scale': boxes[i][2:4].tolist(), + 'area': float(boxes[i][4]), + 'score': float(boxes[i][5]), + 'image_id': image_id, + 'bbox_id': bbox_ids[i] + }) + + kpts2 = self._base_sort_and_unique_bboxes(kpts2) + res_file2 = os.path.join(res_folder, 'result_keyptoints2.json') + self._base_write_keypoint_results(kpts2, res_file2) + additional_info = self._report_metric(res_file2, metrics) + + kpts = self._sort_and_unique_bboxes(kpts) + + # rescoring and oks nms + num_joints = self.ann_info['num_joints'] + vis_thr = self.vis_thr + oks_thr = self.oks_thr + valid_kpts = [] + for image_id in kpts.keys(): + img_kpts = kpts[image_id] + for n_p in img_kpts: + box_score = n_p['score'] + kpt_score = 0 + valid_num = 0 + for n_jt in range(0, num_joints): + t_s = n_p['keypoints'][n_jt][2] + if t_s > vis_thr: + kpt_score = kpt_score + t_s + valid_num = valid_num + 1 + if valid_num != 0: + kpt_score = kpt_score / valid_num + # rescoring + n_p['score'] = kpt_score * box_score + + if self.use_nms: + nms = soft_oks_nms if self.soft_nms else oks_nms + keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas) + valid_kpts.append([img_kpts[_keep] for _keep in keep]) + else: + valid_kpts.append(img_kpts) + + self._write_coco_keypoint_results(valid_kpts, res_file) + + info_str_general = self._do_python_keypoint_eval(res_file) + info_str_per_keypoint = self._do_python_keypoint_eval_per_keypoint(res_file) + + info_str = info_str_general + info_str_per_keypoint + info_str.extend(additional_info) + name_value = OrderedDict(info_str) + + return name_value + + def _report_metric(self, res_file, metrics, pck_thr=0.05, auc_nor=30): + """Keypoint evaluation. + + Args: + res_file (str): Json file stored prediction results. + metrics (str | list[str]): Metric to be performed. + Options: 'PCK', 'AUC', 'EPE', 'mAP', 'NME'. + pck_thr (float): PCK threshold, default as 0.05. + auc_nor (float): AUC normalization factor, default as 30 pixel. + + Returns: + list: Evaluation results for evaluation metric. + """ + info_str = [] + + with open(res_file, 'r') as fin: + preds = json.load(fin) + assert len(preds) == len(self.db), f"--- no match. preds {len(preds)} db {len(self.db)}" + + outputs = [] + gts = [] + masks = [] + threshold_bbox = [] + + for pred, item in zip(preds, self.db): + outputs.append(np.array(pred['keypoints'])[:, :-1]) + gts.append(np.array(item['joints_3d'])[:, :-1]) + masks.append((np.array(item['joints_3d_visible'])[:, 0]) > 0) + if 'PCK' in metrics: + bbox = np.array(item['bbox']) + bbox_thr = np.max(bbox[2:]) + threshold_bbox.append(np.array([bbox_thr, bbox_thr])) + + outputs = np.array(outputs) + gts = np.array(gts) + masks = np.array(masks) + threshold_bbox = np.array(threshold_bbox) + + if 'PCK' in metrics: + _, pck, _ = keypoint_pck_accuracy(outputs, gts, masks, pck_thr, threshold_bbox) + info_str.append(('PCK', pck)) + + if 'AUC' in metrics: + info_str.append(('AUC', keypoint_auc(outputs, gts, masks, auc_nor))) + + if 'EPE' in metrics: + info_str.append(('EPE', keypoint_epe(outputs, gts, masks))) + + keypoint_names = [ + 'rear_left', 'rear_right', 'front_left', 'front_right', + 'L_Fork', 'R_Fork', 'C_Fork' + ] + + try: + for kp_idx in range(outputs.shape[1]): + kp_name = keypoint_names[kp_idx] + kp_epe = np.mean(np.sqrt(np.sum((outputs[:, kp_idx, :] - gts[:, kp_idx, :])**2, axis=1))) + info_str.append((f'EPE_{kp_name}', kp_epe)) + except Exception: + print("outputs.shape", outputs.shape) + + if 'NME' in metrics: # WARN! not sure + normalize_factor = self._get_normalize_factor(gts) + info_str.append(('NME', keypoint_nme(outputs, gts, masks, normalize_factor))) + + return info_str + + def _base_write_keypoint_results(self, keypoints, res_file): + """Write results into a json file.""" + + with open(res_file, 'w') as f: + json.dump(keypoints, f, sort_keys=True, indent=4) + + def _base_sort_and_unique_bboxes(self, kpts, key='bbox_id'): + """sort kpts and remove the repeated ones.""" + kpts = sorted(kpts, key=lambda x: x[key]) + num = len(kpts) + for i in range(num - 1, 0, -1): + if kpts[i][key] == kpts[i - 1][key]: + # del kpts[i] + pass + + return kpts + + def _get_normalize_factor(self, gts): + """Get inter-ocular distance as the normalize factor, measured as the + Euclidean distance between the outer corners of the eyes. + + Args: + gts (np.ndarray[N, K, 2]): Groundtruth keypoint location. + + Return: + np.ndarray[N, 2]: normalized factor + """ + + interocular = np.linalg.norm( + gts[:, 0, :] - gts[:, 2, :], axis=1, keepdims=True) + return np.tile(interocular, [1, 2]) + + def _write_coco_keypoint_results(self, keypoints, res_file): + """Write results into a json file.""" + data_pack = [{ + 'cat_id': self._class_to_coco_ind[cls], + 'cls_ind': cls_ind, + 'cls': cls, + 'ann_type': 'keypoints', + 'keypoints': keypoints + } for cls_ind, cls in enumerate(self.classes) + if not cls == '__background__'] + + results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) + + with open(res_file, 'w') as f: + json.dump(results, f, sort_keys=True, indent=4) + + def _coco_keypoint_results_one_category_kernel(self, data_pack): + """Get coco keypoint results.""" + cat_id = data_pack['cat_id'] + keypoints = data_pack['keypoints'] + cat_results = [] + + for img_kpts in keypoints: + if len(img_kpts) == 0: + continue + + _key_points = np.array( + [img_kpt['keypoints'] for img_kpt in img_kpts]) + key_points = _key_points.reshape(-1, + self.ann_info['num_joints'] * 3) + + result = [{ + 'image_id': img_kpt['image_id'], + 'category_id': cat_id, + 'keypoints': key_point.tolist(), + 'score': float(img_kpt['score']), + 'center': img_kpt['center'].tolist(), + 'scale': img_kpt['scale'].tolist() + } for img_kpt, key_point in zip(img_kpts, key_points)] + + cat_results.extend(result) + + return cat_results + + def _do_python_keypoint_eval(self, res_file): + """Keypoint evaluation using COCOAPI.""" + coco_det = self.coco.loadRes(res_file) + coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas) + coco_eval.params.useSegm = None + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + stats_names = [ + 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', + 'AR .75', 'AR (M)', 'AR (L)' + ] + + info_str = list(zip(stats_names, coco_eval.stats)) + + return info_str + + def _sort_and_unique_bboxes(self, kpts, key='bbox_id'): + """sort kpts and remove the repeated ones.""" + for img_id, persons in kpts.items(): + num = len(persons) + kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key]) + for i in range(num - 1, 0, -1): + if kpts[img_id][i][key] == kpts[img_id][i - 1][key]: + del kpts[img_id][i] + + return kpts + + def _do_python_keypoint_eval_per_keypoint(self, res_file): + """Keypoint evaluation using COCOAPI for each keypoint.""" + keypoint_names = [ + 'rear_left', 'rear_right', 'front_left', 'front_right', + 'L_Fork', 'R_Fork', 'C_Fork' + ] + info_str = [] + + for kp_idx, kp_name in enumerate(keypoint_names): + coco_det = self.coco.loadRes(res_file) + coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas) + coco_eval.params.kpt_oks_sigmas = np.zeros_like(self.sigmas) + coco_eval.params.kpt_oks_sigmas[kp_idx] = self.sigmas[kp_idx] + coco_eval.params.useSegm = None + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + stats_names = [ + f'{kp_name}_AP', f'{kp_name}_AR' + ] + info_str.extend(list(zip(stats_names, coco_eval.stats))) + + return info_str diff --git a/mmpose/datasets/transforms/__init__.py b/mmpose/datasets/transforms/__init__.py index 54ad7f3159..e15c391997 100644 --- a/mmpose/datasets/transforms/__init__.py +++ b/mmpose/datasets/transforms/__init__.py @@ -5,7 +5,7 @@ from .common_transforms import (Albumentation, FilterAnnotations, GenerateTarget, GetBBoxCenterScale, PhotometricDistortion, RandomBBoxTransform, - RandomFlip, RandomHalfBody, YOLOXHSVRandomAug) + RandomFlip, RandomHalfBody, YOLOXHSVRandomAug, RandomBottomHalf, TorchVisionWrapper) from .converting import KeypointConverter, SingleHandConverter from .formatting import PackPoseInputs from .hand_transforms import HandRandomFlip @@ -22,5 +22,5 @@ 'GenerateTarget', 'KeypointConverter', 'RandomFlipAroundRoot', 'FilterAnnotations', 'YOLOXHSVRandomAug', 'YOLOXMixUp', 'Mosaic', 'BottomupRandomCrop', 'BottomupRandomChoiceResize', 'HandRandomFlip', - 'SingleHandConverter' + 'SingleHandConverter', 'RandomBottomHalf', 'TorchVisionWrapper' ] diff --git a/mmpose/datasets/transforms/common_transforms.py b/mmpose/datasets/transforms/common_transforms.py index b29417f045..422b0c0b66 100644 --- a/mmpose/datasets/transforms/common_transforms.py +++ b/mmpose/datasets/transforms/common_transforms.py @@ -19,7 +19,14 @@ from mmpose.structures.bbox import bbox_xyxy2cs, flip_bbox from mmpose.structures.keypoint import flip_keypoints from mmpose.utils.typing import MultiConfig +from PIL import Image +try: + import torch + from torchvision import transforms as T +except ImportError: + T = None + try: import albumentations except ImportError: @@ -28,6 +35,127 @@ Number = Union[int, float] + +class CustomTrivialAugment(T.TrivialAugmentWide): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + + def _augmentation_space(self, num_bins: int) -> Dict[str, Tuple[torch.Tensor, bool]]: + return { + # op_name: (magnitudes, signed) + "Identity": (torch.tensor(0.0), False), + # "ShearX": (torch.linspace(0.0, 0.99, num_bins), True), # Remove augmentation that + # "ShearY": (torch.linspace(0.0, 0.99, num_bins), True), # can change keypoints position + # "TranslateX": (torch.linspace(0.0, 32.0, num_bins), True), + # "TranslateY": (torch.linspace(0.0, 32.0, num_bins), True), + # "Rotate": (torch.linspace(0.0, 135.0, num_bins), True), + "Brightness": (torch.linspace(0.0, 0.99, num_bins), True), + "Color": (torch.linspace(0.0, 0.99, num_bins), True), + "Contrast": (torch.linspace(0.0, 0.99, num_bins), True), + "Sharpness": (torch.linspace(0.0, 0.99, num_bins), True), + "Posterize": (8 - (torch.arange(num_bins) / ((num_bins - 1) / 6)).round().int(), False), + # "Solarize": (torch.linspace(256.0, 0.0, num_bins), False), # didn`t work with tensors + "AutoContrast": (torch.tensor(0.0), False), + "Equalize": (torch.tensor(0.0), False), + } + + +T.TrivialAugmentWide = CustomTrivialAugment + + +def tensor_to_image(tensor: torch.Tensor) -> np.ndarray: + """ + Convert a PyTorch tensor to an image array. + + Args: + tensor (torch.Tensor): The input tensor. + Returns: + np.ndarray: The converted image array. + """ + + assert isinstance(tensor, torch.Tensor), "Input must be a PyTorch tensor" + if tensor.ndim == 4: # If batch (B, C, H, W) + return np.stack([tensor_to_image(t) for t in tensor]) + assert tensor.ndim == 3, "Expected tensor dimensions (C, H, W) or (B, C, H, W)" + + tensor = tensor.detach().cpu().permute(1, 2, 0).numpy() # (C, H, W) -> (H, W, C) + + if tensor.dtype == np.float32 or tensor.dtype == np.float64: + tensor = (tensor * 255).clip(0, 255).astype(np.uint8) # Clip and convert to uint8 + + return tensor + + +@TRANSFORMS.register_module() +class TorchVisionWrapper(BaseTransform): + """""" + def __init__(self, transforms: list = [], save: bool = False) -> None: + super().__init__() + self.transforms_dict = transforms + self.transforms = T.Compose([self.torchvision_builder(transform) for transform in self.transforms_dict]) + self.idx = 0 + self.save = save + + def torchvision_builder(self, cfg: dict): + """ + Build a TorchVision transformation object from a configuration dictionary. + + Args: + cfg (dict): A configuration dictionary containing transformation parameters. + It must include a 'type' key. Optionally, it may contain a 'transforms' + key, which should be a list of configurations for nested transformations. + + Raises: + RuntimeError: If torchvision is not installed and a string type is provided. + TypeError: If the 'type' key in the configuration is neither a string nor a class. + """ + args = cfg.copy() + + obj_type = args.pop('type') + if mmengine.is_str(obj_type): + if T is None: + raise RuntimeError('torchvision is not installed') + obj_cls = getattr(T, obj_type) + elif isinstance(obj_type, type): + obj_cls = obj_type + else: + raise TypeError(f'type must be a str, but got {type(obj_type)}') + + if 'transforms' in args: + args['transforms'] = [ + self.torchvision_builder(transform) + for transform in args['transforms'] + ] + + return obj_cls(**args) + + + def transform(self, results: Dict) -> dict: + """ + Apply the TorchVision transformations to the input data. + + Args: + results (Dict): A dictionary containing the input data. + + Returns: + dict: The transformed input data. + """ + """""" + results['inputs'] = self.transforms(results['inputs']) + + if self.save and self.idx < 100: + res = tensor_to_image(results['inputs']) + import cv2; cv2.imwrite(f"/mmpose/test/image{self.idx}.jpg", res) + self.idx +=1 + + return results + + + + + + @TRANSFORMS.register_module() class GetBBoxCenterScale(BaseTransform): """Convert bboxes from [x, y, w, h] to center and scale. @@ -259,6 +387,57 @@ def __repr__(self) -> str: repr_str += f'direction={self.direction})' return repr_str +@TRANSFORMS.register_module() +class RotationCorrection(BaseTransform): + + def transform(self, result: dict) -> dict: + + + + pass + + + +@TRANSFORMS.register_module() +class RandomBottomHalf(BaseTransform): + def __init__(self, threshold: float = 0.4) -> None: + self.threshold = threshold + + def __init__(self, threshold: float = 0.4, p: float = 0.5) -> None: + self.threshold = threshold + self.prob = p + + def transform(self, results: dict) -> dict: + """The transform function. """ + img = results['img'].copy() + keypoints = results['transformed_keypoints'][0] + + # Line + y = int(img.shape[0] - self.threshold * img.shape[0]) + + under_line = np.array([kp for kp in keypoints if kp[1] > y]) + + if len(under_line) == results['num_keypoints']: + height, width, _ = img.shape + bottom_height = int(height * 0.4) + + if np.random.rand() < self.prob: + cv2.rectangle(img, (0, height - bottom_height), (width, height), (0, 0, 0), -1) + + results['img'] = img + return results + + def __repr__(self) -> str: + """print the basic information of the transform. + + Returns: + str: Formatted string. + """ + repr_str = self.__class__.__name__ + repr_str += f'(prob={self.prob}, ' + repr_str += f'threshold={self.threshold})' + return repr_str + @TRANSFORMS.register_module() class RandomHalfBody(BaseTransform): @@ -1064,7 +1243,7 @@ def transform(self, results: Dict) -> Optional[dict]: w = w * results['dataset_keypoint_weights'] else: encoded['keypoint_weights'] = encoded[ - 'keypoint_weights'] * results['dataset_keypoint_weights'] + 'keypoint_weights'] * results['dataset_keypoint_weights'] results.update(encoded) diff --git a/mmpose/datasets/transforms/topdown_transforms.py b/mmpose/datasets/transforms/topdown_transforms.py index c76d45e46a..ece0504814 100644 --- a/mmpose/datasets/transforms/topdown_transforms.py +++ b/mmpose/datasets/transforms/topdown_transforms.py @@ -5,6 +5,7 @@ import numpy as np from mmcv.transforms import BaseTransform from mmengine import is_seq_of +import os.path as osp from mmpose.registry import TRANSFORMS from mmpose.structures.bbox import get_udp_warp_matrix, get_warp_matrix @@ -51,6 +52,7 @@ def __init__(self, self.input_size = input_size self.use_udp = use_udp + self.idx = 0 @staticmethod def _fix_aspect_ratio(bbox_scale: np.ndarray, aspect_ratio: float): @@ -134,6 +136,23 @@ def transform(self, results: Dict) -> Optional[dict]: results['input_center'] = center results['input_scale'] = scale + # output_dir = 'augmented_images' + # img_filename = f'aug_{self.idx}.jpg' + # self.idx += 1 + + # img = results['img'] + # keypoints = results['transformed_keypoints'] + + # for obj in keypoints: + # for kp in obj: + # print(kp) + # x, y = kp + # v = 1 + # if v > 0: # Visibility flag + # cv2.circle(img, (int(x), int(y)), 3, (0, 255, 0), -1) + + # cv2.imwrite(osp.join(output_dir, img_filename), img) + # print(f'========== {self.idx}') return results def __repr__(self) -> str: diff --git a/mmpose/engine/hooks/custom_pck.py b/mmpose/engine/hooks/custom_pck.py new file mode 100644 index 0000000000..7a09437301 --- /dev/null +++ b/mmpose/engine/hooks/custom_pck.py @@ -0,0 +1,42 @@ +import torch +from mmengine.hooks import Hook +from mmpose.evaluation.functional import keypoint_pck_accuracy +from mmpose.structures import merge_data_samples, PoseDataSample + +from mmpose.registry import HOOKS + +@HOOKS.register_module() +class PCKAccuracyTrainHook(Hook): + """A custom hook to calculate and log PCK accuracy during training. + + Args: + interval (int): The interval (in iterations) at which to log PCK accuracy. + thr (float): The threshold for PCK calculation. + """ + + def __init__(self, interval=10, thr=0.05): + self.interval = interval + self.thr = thr + + def after_train_iter(self, runner, batch_idx, data_batch=None, outputs=None): + """Calculates PCK accuracy after each training iteration at a given interval.""" + if batch_idx % self.interval == 0: + model = runner.model + + # Forward pass to get predictions + with torch.no_grad(): + preds = model(**data_batch) + + # Ground truth keypoints + gts = data_batch['keypoints'] # Assuming keypoints are in data_batch + + # Ensure that predictions and ground truth are properly shaped + if preds is None or gts is None: + runner.logger.warning("Predictions or ground truth keypoints are missing.") + return + + # Calculate the PCK accuracy + acc, _ = keypoint_pck_accuracy(preds, gts, thr=self.thr) + + # Log the PCK accuracy + runner.logger.info(f'Training PCK accuracy @ {self.thr}: {acc:.4f}') diff --git a/mmpose/evaluation/functional/keypoint_eval.py b/mmpose/evaluation/functional/keypoint_eval.py index 847faaf6d8..decc48e790 100644 --- a/mmpose/evaluation/functional/keypoint_eval.py +++ b/mmpose/evaluation/functional/keypoint_eval.py @@ -5,6 +5,7 @@ from mmpose.codecs.utils import get_heatmap_maximum, get_simcc_maximum from .mesh_eval import compute_similarity_transform +from typing import List def _calc_distances(preds: np.ndarray, gts: np.ndarray, mask: np.ndarray, @@ -65,7 +66,7 @@ def _distance_acc(distances: np.ndarray, thr: float = 0.5) -> float: def keypoint_pck_accuracy(pred: np.ndarray, gt: np.ndarray, mask: np.ndarray, - thr: np.ndarray, norm_factor: np.ndarray) -> tuple: + thr: np.ndarray, norm_factor: np.ndarray, symmetry_indices:List[List[int]]=None) -> tuple: """Calculate the pose accuracy of PCK for each individual keypoint and the averaged accuracy across all keypoints for coordinates. @@ -95,7 +96,26 @@ def keypoint_pck_accuracy(pred: np.ndarray, gt: np.ndarray, mask: np.ndarray, - avg_acc (float): Averaged accuracy across all keypoints. - cnt (int): Number of valid keypoints. """ - distances = _calc_distances(pred, gt, mask, norm_factor) + if not symmetry_indices: + # (N, K, 2) + distances = _calc_distances(pred, gt, mask, norm_factor) + # (K, N) + else: + + distances = np.stack([ + _calc_distances(pred[:, indices,:], gt, mask, norm_factor).transpose() + for indices in symmetry_indices + ], axis=1, + ) + N, S, K = distances.shape + _distances = distances.copy() + _distances[distances == -1] = 0 + _distances = _distances.sum(-1) + min_indices = np.argmin(_distances, axis=-1) + #with np.printoptions(suppress=True, precision=2): + # print(distances[min_indices == 1]) + distances = distances[np.arange(N), min_indices, :] + acc = np.array([_distance_acc(d, thr) for d in distances]) valid_acc = acc[acc >= 0] cnt = len(valid_acc) @@ -103,6 +123,7 @@ def keypoint_pck_accuracy(pred: np.ndarray, gt: np.ndarray, mask: np.ndarray, return acc, avg_acc, cnt + def keypoint_auc(pred: np.ndarray, gt: np.ndarray, mask: np.ndarray, @@ -192,7 +213,9 @@ def pose_pck_accuracy(output: np.ndarray, target: np.ndarray, mask: np.ndarray, thr: float = 0.05, - normalize: Optional[np.ndarray] = None) -> tuple: + normalize: Optional[np.ndarray] = None, + symmetry_indices: Optional[List[List[int]]] = None + ) -> tuple: """Calculate the pose accuracy of PCK for each individual keypoint and the averaged accuracy across all keypoints from heatmaps. @@ -232,7 +255,7 @@ def pose_pck_accuracy(output: np.ndarray, pred, _ = get_heatmap_maximum(output) gt, _ = get_heatmap_maximum(target) - return keypoint_pck_accuracy(pred, gt, mask, thr, normalize) + return keypoint_pck_accuracy(pred, gt, mask, thr, normalize, symmetry_indices) def simcc_pck_accuracy(output: Tuple[np.ndarray, np.ndarray], diff --git a/mmpose/evaluation/metrics/keypoint_2d_metrics.py b/mmpose/evaluation/metrics/keypoint_2d_metrics.py index c0be4b398f..365657456a 100644 --- a/mmpose/evaluation/metrics/keypoint_2d_metrics.py +++ b/mmpose/evaluation/metrics/keypoint_2d_metrics.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import warnings -from typing import Dict, Optional, Sequence, Union +from typing import Dict, Optional, Sequence, Union, List import numpy as np from mmengine.evaluator import BaseMetric @@ -71,12 +71,24 @@ def __init__(self, thr: float = 0.05, norm_item: Union[str, Sequence[str]] = 'bbox', collect_device: str = 'cpu', - prefix: Optional[str] = None) -> None: + prefix: Optional[str] = None, + labels:List[str] = None, + symmetries:List[Dict[str, str]] = None, + ) -> None: super().__init__(collect_device=collect_device, prefix=prefix) self.thr = thr self.norm_item = norm_item if isinstance(norm_item, (tuple, list)) else [norm_item] + + self.symmetry_indieces = None + if labels and symmetries: + label_to_index = {l: i for i, l in enumerate(labels)} + self.symmetry_indieces = [[label_to_index[l] for l in labels]] + for symmetry in symmetries: + symmetry_indices = [label_to_index[symmetry[l]] for l in labels ] + self.symmetry_indieces.append(symmetry_indices) + allow_normalized_items = ['bbox', 'head', 'torso'] for item in self.norm_item: if item not in allow_normalized_items: @@ -180,8 +192,8 @@ def compute_metrics(self, results: list) -> Dict[str, float]: f'(normalized by ``"bbox_size"``)...') _, pck, _ = keypoint_pck_accuracy(pred_coords, gt_coords, mask, - self.thr, norm_size_bbox) - metrics['PCK'] = pck + self.thr, norm_size_bbox, self.symmetry_indieces) + metrics[f'PCK@{self.thr}'] = pck if 'head' in self.norm_item: norm_size_head = np.concatenate( diff --git a/mmpose/models/backbones/resnet.py b/mmpose/models/backbones/resnet.py index a04853f60d..7bb42e9eff 100644 --- a/mmpose/models/backbones/resnet.py +++ b/mmpose/models/backbones/resnet.py @@ -491,6 +491,7 @@ class ResNet(BaseBackbone): """ arch_settings = { + 9: (BasicBlock, (1, 1, 1, 1)), 18: (BasicBlock, (2, 2, 2, 2)), 34: (BasicBlock, (3, 4, 6, 3)), 50: (Bottleneck, (3, 4, 6, 3)), diff --git a/mmpose/models/heads/heatmap_heads/heatmap_head.py b/mmpose/models/heads/heatmap_heads/heatmap_head.py index ccb10fcf54..26f6217012 100644 --- a/mmpose/models/heads/heatmap_heads/heatmap_head.py +++ b/mmpose/models/heads/heatmap_heads/heatmap_head.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Optional, Sequence, Tuple, Union +from typing import Optional, Sequence, Tuple, Union, List, Dict import torch from mmcv.cnn import build_conv_layer, build_upsample_layer @@ -65,13 +65,24 @@ def __init__(self, loss: ConfigType = dict( type='KeypointMSELoss', use_target_weight=True), decoder: OptConfigType = None, - init_cfg: OptConfigType = None): + init_cfg: OptConfigType = None, + labels: Optional[List[str]] = None, + symmetries:Optional[List[Dict[str, str]]]= None , + ): if init_cfg is None: init_cfg = self.default_init_cfg super().__init__(init_cfg) + self.symmetry_indices = None + if labels and symmetries: + label_to_index = {l: i for i, l in enumerate(labels)} + self.symmetry_indices = [[label_to_index[l] for l in labels]] + for symmetry in symmetries: + symmetry_indices = [label_to_index[symmetry[l]] for l in labels ] + self.symmetry_indices.append(symmetry_indices) + self.in_channels = in_channels self.out_channels = out_channels self.loss_module = MODELS.build(loss) @@ -309,7 +320,10 @@ def loss(self, _, avg_acc, _ = pose_pck_accuracy( output=to_numpy(pred_fields), target=to_numpy(gt_heatmaps), - mask=to_numpy(keypoint_weights) > 0) + mask=to_numpy(keypoint_weights) > 0, + symmetry_indices=self.symmetry_indices + ) + acc_pose = torch.tensor(avg_acc, device=gt_heatmaps.device) losses.update(acc_pose=acc_pose) diff --git a/mmpose/models/losses/__init__.py b/mmpose/models/losses/__init__.py index bec6c06846..581e5a3e4b 100644 --- a/mmpose/models/losses/__init__.py +++ b/mmpose/models/losses/__init__.py @@ -5,7 +5,7 @@ VariFocalLoss) from .fea_dis_loss import FeaLoss from .heatmap_loss import (AdaptiveWingLoss, KeypointMSELoss, - KeypointOHKMMSELoss, MLECCLoss) + KeypointOHKMMSELoss, MLECCLoss, OutputSymmetryLoss) from .logit_dis_loss import KDLoss from .loss_wrappers import CombinedLoss, MultipleLossWrapper from .regression_loss import (BoneLoss, L1Loss, MPJPELoss, @@ -20,5 +20,5 @@ 'KLDiscretLoss', 'MultipleLossWrapper', 'JSDiscretLoss', 'CombinedLoss', 'AssociativeEmbeddingLoss', 'SoftWeightSmoothL1Loss', 'MPJPEVelocityJointLoss', 'FeaLoss', 'KDLoss', 'OKSLoss', 'IoULoss', - 'VariFocalLoss', 'MLECCLoss' + 'VariFocalLoss', 'MLECCLoss', 'OutputSymmetryLoss' ] diff --git a/mmpose/models/losses/heatmap_loss.py b/mmpose/models/losses/heatmap_loss.py index 4618e69ed6..04f0ad5d73 100644 --- a/mmpose/models/losses/heatmap_loss.py +++ b/mmpose/models/losses/heatmap_loss.py @@ -1,10 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Optional +from typing import Optional, Dict, List import torch import torch.nn as nn import torch.nn.functional as F from torch import Tensor +from mmpose.utils.typing import ConfigType from mmpose.registry import MODELS @@ -59,7 +60,6 @@ def forward(self, Returns: Tensor: The calculated loss. """ - _mask = self._get_mask(target, target_weights, mask) if _mask is None: loss = F.mse_loss(output, target) @@ -117,6 +117,76 @@ def _get_mask(self, target: Tensor, target_weights: Optional[Tensor], return mask +@MODELS.register_module() +class OutputSymmetryLoss(nn.Module): + """MSE loss for heatmaps. + + Args: + use_target_weight (bool): Option to use weighted MSE loss. + Different joint types may have different target weights. + Defaults to ``False`` + skip_empty_channel (bool): If ``True``, heatmap channels with no + non-zero value (which means no visible ground-truth keypoint + in the image) will not be used to calculate the loss. Defaults to + ``False`` + loss_weight (float): Weight of the loss. Defaults to 1.0 + """ + + def __init__(self, + labels:List[str], + symmetries:List[Dict[str, str]], + ): + super().__init__() + + label_to_index = {l: i for i, l in enumerate(labels)} + self.loss_indices = [[label_to_index[l] for l in labels]] + for symmetry in symmetries: + symmetry_indices = [label_to_index[symmetry[l]] for l in labels ] + self.loss_indices.append(symmetry_indices) + self.base_loss : KeypointMSELoss = MODELS.build(dict(type='KeypointMSELoss', use_target_weight=True)) + + + def forward(self, + output: Tensor, + target: Tensor, + target_weights: Optional[Tensor] = None, + mask: Optional[Tensor] = None) -> Tensor: + """Forward function of loss. + + Note: + - batch_size: B + - num_keypoints: K + - heatmaps height: H + - heatmaps weight: W + + Args: + output (Tensor): The output heatmaps with shape [B, K, H, W] + target (Tensor): The target heatmaps with shape [B, K, H, W] + target_weights (Tensor, optional): The target weights of differet + keypoints, with shape [B, K] (keypoint-wise) or + [B, K, H, W] (pixel-wise). + mask (Tensor, optional): The masks of valid heatmap pixels in + shape [B, K, H, W] or [B, 1, H, W]. If ``None``, no mask will + be applied. Defaults to ``None`` + + Returns: + Tensor: The calculated loss. + """ + B, K, H, W = output.shape + + losses = [] + _mask = self.base_loss._get_mask(target, target_weights, mask) + for indieces in self.loss_indices: + _loss = F.mse_loss(output[:, indieces, : , :], target, reduction='none') + if _mask is not None: + _loss = _loss * _mask + _loss = _loss.reshape((B, -1)).sum(dim=-1) + losses.append(_loss) + + losses = torch.stack(losses, dim=-1) + min_losses, _ = losses.min(dim=-1) + loss = min_losses.mean() / (H * W * K) + return loss @MODELS.register_module() class CombinedTargetMSELoss(nn.Module): diff --git a/run_docker.sh b/run_docker.sh new file mode 100644 index 0000000000..8ce1ef2d5a --- /dev/null +++ b/run_docker.sh @@ -0,0 +1,4 @@ +docker run --network host -w /data/mmpose \ + -v /data:/data \ + --gpus all --shm-size=8g -it mmpose \ + /bin/bash -c "python setup.py develop && /bin/bash" diff --git a/run_docker_gpu0.sh b/run_docker_gpu0.sh new file mode 100644 index 0000000000..8122d4b1b6 --- /dev/null +++ b/run_docker_gpu0.sh @@ -0,0 +1,8 @@ +docker run --network host -w /mmpose \ + -dit \ + --name mmpose \ + -v /code/mmpose:/mmpose \ + -v /data:/data \ + --gpus '"device=0"' --shm-size=8g -it \ + mmpose:latest + diff --git a/run_docker_gpu1.sh b/run_docker_gpu1.sh new file mode 100644 index 0000000000..409f92552e --- /dev/null +++ b/run_docker_gpu1.sh @@ -0,0 +1,3 @@ +docker run --network host -w /mmpose \ + -v /data/mmpose:/mmpose \ + --gpus '"device=1"' --shm-size=8g -it mmpose:1.3.2 diff --git a/run_make_nvinfer.sh b/run_make_nvinfer.sh new file mode 100644 index 0000000000..8593fc1094 --- /dev/null +++ b/run_make_nvinfer.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +NVINFER_FILE="" +ONNX_FILENAME="" +OPERATE_ON_CLASS_NAMES=() +CLASSES=() +RES=() + +while [[ $# -gt 0 ]]; do + case "$1" in + --nvinfer-file) + NVINFER_FILE="$2" + shift 2 + ;; + --onnx-filename) + ONNX_FILENAME="$2" + shift 2 + ;; + --operate-on-class-names) + shift + while [[ $# -gt 0 ]] && [[ ! "$1" =~ ^-- ]]; do + OPERATE_ON_CLASS_NAMES+=("$1") + shift + done + ;; + --classes) + shift + while [[ $# -gt 0 ]] && [[ ! "$1" =~ ^-- ]]; do + CLASSES+=("$1") + shift + done + ;; + --res) + shift + while [[ $# -gt 0 ]] && [[ ! "$1" =~ ^-- ]]; do + RES+=("$1") + shift + done + ;; + *) + echo "Unknown parameter: $1" + exit 1 + ;; + esac +done + +OPERATE_ON_CLASS_NAMES=$(IFS=';' ; echo "${OPERATE_ON_CLASS_NAMES[*]}") +CLASSES=$(IFS=';' ; echo "${CLASSES[*]}") +RES=$(IFS=';' ; echo "${RES[*]}") + +echo "[property] + +# model loading. +onnx-file=$ONNX_FILENAME + +# model config +infer-dims=3;$RES + +[custom] +min-kp-score=0.0 +operate-on-class-names=$OPERATE_ON_CLASS_NAMES +kp-names=$CLASSES +" > "$NVINFER_FILE" diff --git a/tools/export_model.py b/tools/export_model.py new file mode 100644 index 0000000000..f84be85415 --- /dev/null +++ b/tools/export_model.py @@ -0,0 +1,96 @@ +import argparse + +import mmcv +from mmcv.runner import load_checkpoint +from mmpose.models import build_posenet +from tools.export_specs import export_for_lv +from tools.pytorch2onnx import pytorch2onnx, _convert_batchnorm + +try: + import onnx + import onnxruntime as rt +except ImportError as e: + raise ImportError(f'Please install onnx and onnxruntime first. {e}') + +try: + from mmcv.onnx.symbolic import register_extra_symbolics +except ModuleNotFoundError: + raise NotImplementedError('please update mmcv to version>=1.0.4') + + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert MMPose models to ONNX') + parser.add_argument('config', help='test config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument('--show', action='store_true', help='show onnx graph') + parser.add_argument('--output-file', type=str, default='tmp.onnx') + parser.add_argument('--opset-version', type=int, default=11) + parser.add_argument( + '--verify', + action='store_true', + help='verify the onnx model output against pytorch output') + parser.add_argument( + '--add-normalization', + action='store_true', + help='add normalization layer to the exported onnx model') + parser.add_argument( + '--shape', + type=int, + nargs='+', + default=[1, 3, 256, 192], + help='input size') + parser.add_argument( + '--project_name', + type=str, + help='name of the project for lv usage, should match the folder name structure' + 'in the ml_models repo: e.g. \"brummer\"', + required=True + ) + parser.add_argument( + '--author', + type=str, + help='full name of the Author of this training: e.g. \"Christian Holland\"', + required=True + ) + parser.add_argument( + '--jira_task', + type=str, + help='shortened name of the Jira task for this training: e.g. \"OR-1926\"', + required=True + ) + args = parser.parse_args() + return args + + + +if __name__ == '__main__': + args = parse_args() + + assert args.opset_version == 11, 'MMPose only supports opset 11 now' + + cfg = mmcv.Config.fromfile(args.config) + # build the model + model = build_posenet(cfg.model) + model = _convert_batchnorm(model) + + # onnx.export does not support kwargs + if hasattr(model, 'forward_dummy'): + model.forward = model.forward_dummy + else: + raise NotImplementedError( + 'Please implement the forward method for exporting.') + + checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') + model_output_path = export_for_lv(args) + + pytorch2onnx( + model, + args.shape, + opset_version=args.opset_version, + show=args.show, + output_file=model_output_path, + verify=args.verify, + add_normalization=args.add_normalization) + print(f"Model exported successfully to: {model_output_path}") \ No newline at end of file diff --git a/tools/export_specs.py b/tools/export_specs.py new file mode 100644 index 0000000000..43395f17a2 --- /dev/null +++ b/tools/export_specs.py @@ -0,0 +1,201 @@ +import argparse +import copy +import glob +import json +import os +import os.path as osp +import shutil +import time +from typing import Dict + +from mmcv import Config, DictAction + +YAML_DEFAULT_MIN_KP_SCORE = -99999.0 +YAML_DEFAULT_OPT_BATCH_SIZE = 4 +YAML_DEFAULT_MAX_BATCH_SIZE = 16 + +def parse_args(): + parser = argparse.ArgumentParser(description='Train a pose model') + parser.add_argument('config', help='train config file path') + parser.add_argument('--work-dir', help='the dir to save logs and models') + parser.add_argument( + '--project_name', + type=str, + help='name of the project for lv usage, should match the folder name structure' + 'in the ml_models repo: e.g. \"brummer\"', + required=True) + parser.add_argument( + '--author', + type=str, + help='full name of the Author of this training: e.g. \"Christian Holland\"', + required=True) + parser.add_argument( + '--jira_task', + type=str, + help='shortened name of the Jira task for this training: e.g. \"OR-1926\"', + required=True) + args = parser.parse_args() + + + return args + +def read_json(path: str): + return json.loads( + open(path, + "rb").read()) + +def write_string_as_file(path: str, string: str) -> None: + f = open(path, "w") + f.write(string) + f.close() + +def recreate_dir(dir_name: str): + if os.path.exists(dir_name): + shutil.rmtree(dir_name) + os.makedirs(dir_name) + + +def write_detector_yaml(cfg: Config, coco_file: Dict, write_dir: str, name: str) -> None: + detector_name = "KeypointTrtDetector" + heatmap_shape = cfg["data_cfg"]["heatmap_size"] + input_shape = cfg["data_cfg"]["image_size"] + export_name = f"{name}.onnx" + onnx_file_path = f"/data/ml_models/models/keypoint_trt/{cfg.project_name}/{export_name}" + kp_names = coco_file["categories"][0]["keypoints"] + yaml_contents = f"- !<{detector_name}>\n" \ + f" name: {name}\n" \ + f" heatmap_shape: {heatmap_shape}\n" \ + f" input_shape: {input_shape}\n" \ + f" onnx_file_path: {onnx_file_path}\n" \ + f" kp_names: {kp_names}\n" \ + f" min_kp_score: {YAML_DEFAULT_MIN_KP_SCORE}\n" \ + f" opt_batch_size: {YAML_DEFAULT_OPT_BATCH_SIZE}\n" \ + f" max_batch_size: {YAML_DEFAULT_MAX_BATCH_SIZE}\n" + write_path = os.path.join(write_dir, "detectors.yaml") + write_string_as_file(write_path, yaml_contents) + +def write_deepstream_config(cfg: Config, coco_file: Dict, write_dir: str, name: str): + export_name = f"{name}.onnx" + onnx_file_path = f"/data/ml_models/models/keypoint_trt/{cfg.project_name}/{export_name}" + input_shape = cfg["data_cfg"]["image_size"] + kp_names = coco_file["categories"][0]["keypoints"] + + ds_config_contents = f"[property]\n" \ + f"gpu-id=0\n" \ + f"\n" \ + f"# preprocessing parameters.\n" \ + f"net-scale-factor=0.01742919389\n" \ + f"offsets=123.675;116.128;103.53\n" \ + f"model-color-format=0\n" \ + f"scaling-filter=1 # 0=Nearest, 1=Bilinear\n" \ + f"\n" \ + f"# model loading.\n" \ + f"onnx-file={onnx_file_path}\n" \ + f"model-engine-file={onnx_file_path}_b8_gpu0_fp16.engine\n" \ + f"\n" \ + f"# model config\n" \ + f"infer-dims=3;{input_shape[1]};{input_shape[0]}\n" \ + f"batch-size=8\n" \ + f"network-mode=2 # 0=FP32, 1=INT8, 2=FP16\n" \ + f"network-type=100 # >3 disables post-processing\n" \ + f"cluster-mode=4 # 1=DBSCAN 4=No Clustering\n" \ + f"gie-unique-id=2\n" \ + f"process-mode=2 # 1=Primary, 2=Secondary\n" \ + f"output-tensor-meta=1\n" \ + f"operate-on-class-ids=<<<>>>\n" \ + f"\n" \ + f"[custom]\n" \ + f"min-kp-score=0.0\n" \ + f"kp-names={';'.join(kp_names)}\n" + write_path = os.path.join(write_dir, "keypoint-config.txt") + write_string_as_file(write_path, ds_config_contents) + + + +def write_info_file(cfg: Config, write_dir: str) -> None: + result_file = os.path.join(cfg.work_dir, "best.json") + result_contents = read_json(result_file) + mAP = result_contents["best_score"] + train_img_folder = cfg["data"]["train"]["img_prefix"] + val_img_folder = cfg["data"]["val"]["img_prefix"] + num_tain_img = len(glob.glob(f"{train_img_folder}/*")) + num_val_img = len(glob.glob(f"{val_img_folder}/*")) + date_trained = time.strftime("%m.%d.%Y %H:%M:%S") + info_file = f" -- Keypoint training info -- \n" \ + f"mAP score: {mAP}\n" \ + f"Num train images: {num_tain_img}\n" \ + f"Num val images: {num_val_img}\n" \ + f"Date trained: {date_trained}\n" \ + f"Jira task: {cfg.jira_task}\n" \ + f"Author: {cfg.author}\n" + write_path = os.path.join(write_dir, "model_info.txt") + write_string_as_file(write_path, info_file) + +def write_gstreamer_config(cfg: Config, write_dir: str, model_name: str) -> None: + input_shape = cfg["data_cfg"]["image_size"] + gstreamer_config = f"[property]\n" \ + f"gpu-id=0\n" \ + f"\n" \ + f"# preprocessing parameters.\n" \ + f"net-scale-factor=0.01742919389\n" \ + f"offsets=123.675;116.128;103.53\n" \ + f"model-color-format=0\n" \ + f"scaling-filter=1 # 0=Nearest, 1=Bilinear\n" \ + f"\n" \ + f"# model loading.\n" \ + f"# if the engine file does not exist onnx-file is used to create the engine\n" \ + f"model-engine-file={model_name}.onnx_b8_gpu0_fp16.engine\n" \ + f"onnx-file={model_name}.onnx\n" \ + f"\n" \ + f"# model config\n" \ + f"infer-dims={3};{input_shape[1]};{input_shape[0]}\n" \ + f"batch-size=8\n" \ + f"network-mode=2 # 0=FP32, 1=INT8, 2=FP16\n" \ + f"network-type=100 # >3 disables post-processing\n" \ + f"cluster-mode=4 # 1=DBSCAN 4=No Clustering\n" \ + f"gie-unique-id=2\n" \ + f"process-mode=2 # 1=Primary, 2=Secondary\n" \ + f"output-tensor-meta=1\n" \ + f"\n" \ + f"operate-on-class-ids=0;1;2;3;\n" \ + f"\n" \ + f"[custom]\n" \ + f"min-kp-score=0.0\n" + write_path = os.path.join(write_dir, "gstreamer_config.txt") + write_string_as_file(write_path, gstreamer_config) + + +def copy_training_specs(cfg, write_dir): + shutil.copy(cfg.filename, os.path.join(write_dir, "config.txt")) + + + +def export_for_lv(args): + cfg = Config.fromfile(args.config) + + cfg.work_dir = osp.join('./work_dirs', + osp.splitext(osp.basename(args.config))[0]) + if args.project_name: + cfg.project_name = args.project_name + else: + cfg.project_name = cfg["data"]["train"]["ann_file"].split("/")[1] + cfg.jira_task = args.jira_task + cfg.author = args.author + export_folder = os.path.join(cfg.work_dir, "export") + recreate_dir(export_folder) + coco_file = read_json(cfg["data"]["train"]["ann_file"]) + model_name = f"keypoint_detector_{cfg.project_name}_{time.strftime('%y%m%d')}" + write_detector_yaml(cfg=cfg, coco_file=coco_file, write_dir=export_folder, name=model_name) + write_deepstream_config(cfg=cfg, coco_file=coco_file, write_dir=export_folder, name=model_name) + write_info_file(cfg=cfg, write_dir=export_folder) + write_gstreamer_config(cfg=cfg, write_dir=export_folder, model_name=model_name) + copy_training_specs(cfg=cfg, write_dir=export_folder) + print(f"Training info exported successfully to: {export_folder}") + model_output_path = os.path.join(export_folder, f"{model_name}.onnx") + return model_output_path + + + +if __name__ == '__main__': + args = parse_args() + export_for_lv(args) diff --git a/tools/inference.txt b/tools/inference.txt new file mode 100644 index 0000000000..c67518f2fa --- /dev/null +++ b/tools/inference.txt @@ -0,0 +1,14 @@ +python tools/inference_custom.py \ + /data/new_mmpose/mmpose/work_dirs/ls_1704_res18/td-hm_res50_8xb64-210e_coco-256x192.py \ + /data/new_mmpose/mmpose/work_dirs/ls_1704_res18/epoch_300.pth \ + --img-dir /data/new_mmpose/mmpose/data/1704_split_exported_data_project_id_422/val2017 \ + --out-dir /data/new_mmpose/mmpose/work_dirs/ls_1704_res18/out \ + --bbox-json /data/new_mmpose/mmpose/data/1704_split_exported_data_project_id_422/annotations/forklift_keypoints_val2017.json \ + --output-file /data/new_mmpose/mmpose/work_dirs/ls_1704_res18/results.json + +python tools/inference_custom.py \ + /data/new_mmpose/mmpose/work_dirs/ls_1704_res18/td-hm_res50_8xb64-210e_coco-256x192.py \ + /data/new_mmpose/mmpose/work_dirs/ls_1704_res18/epoch_300.pth \ + --img-dir /data/new_mmpose/mmpose/data/1704_split_exported_data_project_id_422/val2017 \ + --bbox-json /data/new_mmpose/mmpose/data/1704_split_exported_data_project_id_422/annotations/forklift_keypoints_val2017.json \ + --output-file /data/new_mmpose/mmpose/work_dirs/ls_1704_res18/results.json \ No newline at end of file diff --git a/tools/jsonnn.py b/tools/jsonnn.py new file mode 100644 index 0000000000..ddf5677db7 --- /dev/null +++ b/tools/jsonnn.py @@ -0,0 +1,20 @@ +import json +import sys + +def format_json(input_file, output_file): + with open(input_file, 'r') as infile: + data = json.load(infile) + with open(output_file, 'w') as outfile: + json.dump(data, outfile, indent=4) + +if __name__ == "__main__": + if len(sys.argv) != 3: + print("Usage: python format_json.py ") + else: + input_file = sys.argv[1] + output_file = sys.argv[2] + try: + format_json(input_file, output_file) + print(f"Formatted JSON saved to {output_file}") + except Exception as e: + print(f"Error: {e}") diff --git a/tools/test.py b/tools/test.py index 12fd6b4423..e7381c9a3a 100644 --- a/tools/test.py +++ b/tools/test.py @@ -71,6 +71,11 @@ def merge_args(cfg, args): cfg.launcher = args.launcher cfg.load_from = args.checkpoint + args.show, args.badcase, args.show_dir, args.dump = False, False, None, None + cfg.show, cfg.badcase, cfg.show_dir, cfg.dump = False, False, None, None + cfg.default_hooks.badcase.enable = False + cfg.default_hooks.visualization.enable = False + # -------------------- work directory -------------------- # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: @@ -83,6 +88,7 @@ def merge_args(cfg, args): # -------------------- visualization -------------------- if (args.show and not args.badcase) or (args.show_dir is not None): + print(f"---------------------- visualization {args.show} {args.show_dir}") assert 'visualization' in cfg.default_hooks, \ 'PoseVisualizationHook is not set in the ' \ '`default_hooks` field of config. Please set ' \ @@ -98,6 +104,7 @@ def merge_args(cfg, args): # -------------------- badcase analyze -------------------- if args.badcase: + print(f"---------------------- badcase {args.badcase}") assert 'badcase' in cfg.default_hooks, \ 'BadcaseAnalyzeHook is not set in the ' \ '`default_hooks` field of config. Please set ' \ @@ -123,6 +130,7 @@ def merge_args(cfg, args): # -------------------- Dump predictions -------------------- if args.dump is not None: + print(f"---------------------- dump {args.dump}") assert args.dump.endswith(('.pkl', '.pickle')), \ 'The dump file must be a pkl file.' dump_metric = dict(type='DumpResults', out_file_path=args.dump) @@ -135,6 +143,13 @@ def merge_args(cfg, args): if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) + args.show, args.badcase, args.show_dir, args.dump = False, False, None, None + cfg.show, cfg.badcase, cfg.show_dir, cfg.dump = False, False, None, None + cfg.default_hooks.badcase.enable = False + cfg.default_hooks.visualization.enable = False + cfg.vis_backends = None + del cfg.visualizer.vis_backends[1] + return cfg diff --git a/tools/train.py b/tools/train.py index 84eec2d577..55fd3c9be4 100644 --- a/tools/train.py +++ b/tools/train.py @@ -19,6 +19,11 @@ def parse_args(): help='If specify checkpint path, resume from it, while if not ' 'specify, try to auto resume from the latest checkpoint ' 'in the work directory.') + parser.add_argument( + '--data-root', + type=str, + help='Root directory for dataset. This will override data_root in the config file.' + ) parser.add_argument( '--amp', action='store_true', @@ -94,6 +99,17 @@ def merge_args(cfg, args): cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) + # Update data_root + if args.data_root is not None: + cfg.train_dataloader.dataset.data_root = args.data_root + cfg.val_dataloader.dataset.data_root = args.data_root + cfg.test_dataloader.dataset.data_root = args.data_root + + # Update evaluator paths if necessary + for evaluator in cfg.val_evaluator: + if 'ann_file' in evaluator: + evaluator['ann_file'] = osp.join(args.data_root, 'annotations/forklift_keypoints_val2017.json') + # enable automatic-mixed-precision training if args.amp is True: from mmengine.optim import AmpOptimWrapper, OptimWrapper diff --git a/tools/train_grid.py b/tools/train_grid.py new file mode 100644 index 0000000000..7d0f7333cf --- /dev/null +++ b/tools/train_grid.py @@ -0,0 +1,243 @@ +import argparse +import os +import os.path as osp +import csv +import shutil +import itertools +import time + +from mmengine.config import Config, DictAction +from mmengine.runner import Runner +from mmengine.utils import mkdir_or_exist + +csv_file = 'training_results.csv' + +def parse_args(): + parser = argparse.ArgumentParser(description='Train a pose model') + parser.add_argument('config', help='train config file path') + parser.add_argument('--work-dir', help='the dir to save logs and models') + parser.add_argument( + '--resume', + nargs='?', + type=str, + const='auto', + help='If specify checkpint path, resume from it, while if not ' + 'specify, try to auto resume from the latest checkpoint ' + 'in the work directory.') + parser.add_argument( + '--amp', + action='store_true', + default=False, + help='enable automatic-mixed-precision training') + parser.add_argument( + '--no-validate', + action='store_true', + help='whether not to evaluate the checkpoint during training') + parser.add_argument( + '--auto-scale-lr', + action='store_true', + help='whether to auto scale the learning rate according to the ' + 'actual batch size and the original batch size.') + parser.add_argument( + '--show-dir', + help='directory where the visualization images will be saved.') + parser.add_argument( + '--show', + action='store_true', + help='whether to display the prediction results in a window.') + parser.add_argument( + '--interval', + type=int, + default=1, + help='visualize per interval samples.') + parser.add_argument( + '--wait-time', + type=float, + default=1, + help='display time of every window. (second)') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + # When using PyTorch version >= 2.0.0, the `torch.distributed.launch` + # will pass the `--local-rank` parameter to `tools/train.py` instead + # of `--local_rank`. + parser.add_argument('--local_rank', '--local-rank', type=int, default=0) + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + + return args + + +def check_existing_run(work_dir): + if not os.path.exists(csv_file): + return False + with open(csv_file, mode='r') as file: + reader = csv.DictReader(file) + for row in reader: + if row['run_name'] == work_dir and row['status'] == "completed": + return True + return False + + +def save_run_result(run_name, status, epe, ap, ar): + file_exists = os.path.isfile(csv_file) + with open(csv_file, mode='a', newline='') as file: + writer = csv.DictWriter(file, fieldnames=['run_name', 'status', 'EPE', 'AP', 'AR']) + if not file_exists: + writer.writeheader() + writer.writerow({ + 'run_name': run_name, + 'status': status, + 'EPE': epe if epe is not None else "N/A", + 'AP': ap if ap is not None else "N/A", + 'AR': ar if ar is not None else "N/A" + }) + + +def run_test(work_dir, config, checkpoint): + command = f"python tools/test.py {config} {checkpoint} --work-dir {work_dir}" + result = subprocess.run(command, shell=True, capture_output=True, text=True) + + if result.returncode == 0: + lines = result.stdout.splitlines() + epe, ap, ar = None, None, None + for line in lines: + if 'EPE' in line: + epe = float(line.split(': ')[-1]) + elif 'AP' in line: + ap = float(line.split(': ')[-1]) + elif 'AR' in line: + ar = float(line.split(': ')[-1]) + return epe, ap, ar + else: + print(f"Test failed: {result.stderr}") + return None, None, None + + +def merge_args(cfg, args): + """Merge CLI arguments to config.""" + if args.no_validate: + cfg.val_cfg = None + cfg.val_dataloader = None + cfg.val_evaluator = None + + cfg.launcher = args.launcher + + # work_dir is determined in this priority: CLI > config > default + if args.work_dir is not None: + cfg.work_dir = args.work_dir + elif cfg.get('work_dir', None) is None: + cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) + + if args.amp is True: + from mmengine.optim import AmpOptimWrapper + cfg.optim_wrapper.type = 'AmpOptimWrapper' + cfg.optim_wrapper.setdefault('loss_scale', 'dynamic') + + if args.resume == 'auto': + cfg.resume = True + cfg.load_from = None + elif args.resume is not None: + cfg.resume = True + cfg.load_from = args.resume + + if args.auto_scale_lr: + cfg.auto_scale_lr.enable = True + + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + return cfg + + +def main(): + args = parse_args() + + hyperparam_grid = { + 'lr': [1e-5, 1e-4, 1e-3], + 'epochs': [5], # TODO: set valid + 'sigma': [1, 1.5, 2], + 'rot_factor': [40, 60], + 'optimizer': ['Adam', 'SGD'], + 'batch_size': [32, 64, 128], + } + + step_dict = { + 5: [2, 3], + 10: [7, 9], + 100: [70, 90], + 200: [140, 180], + 300: [210, 270], + } + + hyperparam_combinations = list(itertools.product( + hyperparam_grid['lr'], + hyperparam_grid['epochs'], + hyperparam_grid['sigma'], + hyperparam_grid['rot_factor'], + hyperparam_grid['optimizer'], + hyperparam_grid['batch_size'] + )) + + print(f"Total combinations: {len(hyperparam_combinations)}") + + for lr, epochs, sigma, rot_factor, optimizer, batch_size in hyperparam_combinations: + cfg = Config.fromfile(args.config) + cfg = merge_args(cfg, args) + + work_dir = f'work_dirs/grid_lr_{lr}_epochs_{epochs}_sigma_{sigma}_rot_{rot_factor}_optim_{optimizer}_batch_{batch_size}/' + if check_existing_run(work_dir): + print(f"Skipping already completed run: {work_dir}") + continue + + # Update the config with the hyperparameters + cfg.optim_wrapper.optimizer = dict(type=optimizer, lr=lr) + cfg.train_cfg.max_epochs = epochs + cfg.codec.sigma = sigma # Modify sigma in loss + cfg.train_dataloader.batch_size = batch_size + cfg.param_scheduler[1].milestones = step_dict[epochs] + + # Assign the work directory + cfg.work_dir = work_dir + + # Create work directory + mkdir_or_exist(osp.abspath(cfg.work_dir)) + + # Dump the updated config to the work directory + config_dst = osp.join(cfg.work_dir, 'modified_config.py') + cfg.dump(config_dst) + + # Set up Runner and train + runner = Runner.from_cfg(cfg) + + try: + runner.train() + + # Run test and evaluate results + checkpoint_path = osp.join(work_dir, "latest.pth") + epe, ap, ar = run_test(work_dir, config_dst, checkpoint_path) + status = "completed" if epe is not None and ap is not None and ar is not None else "failed" + + except Exception as e: + print(f"Error during training: {e}") + epe, ap, ar = None, None, None + status = "failed" + + save_run_result(work_dir, status, epe, ap, ar) + + +if __name__ == '__main__': + main()